... | ... |
@@ -148,10 +148,12 @@ recoup <- function( |
148 | 148 |
"of arguments or remove any of the above with the\nremoveData ", |
149 | 149 |
"function.\n") |
150 | 150 |
prevCallParams <- input$callopts |
151 |
+ refRanges <- input$refRanges |
|
151 | 152 |
input <- input$data |
152 | 153 |
} |
153 | 154 |
else |
154 |
- prevCallParams <- NULL |
|
155 |
+ prevCallParams <- refRanges <- NULL |
|
156 |
+ |
|
155 | 157 |
if (!is.list(input) && file.exists(input)) |
156 | 158 |
input <- readConfig(input) |
157 | 159 |
checkInput(input) |
... | ... |
@@ -409,42 +411,91 @@ recoup <- function( |
409 | 411 |
############################################################################ |
410 | 412 |
|
411 | 413 |
# Continue with actual work |
412 |
- # Annotation case #1: provided strictly as a BED-like data frame with loci |
|
413 |
- if (is.data.frame(genome)) { |
|
414 |
- if (type=="rnaseq") |
|
415 |
- stop("When type=\"rnaseq\", only the usage of a supported or user-", |
|
416 |
- "imported organism from GTF file is allowed!") |
|
417 |
- rownames(genome) <- as.character(genome[,4]) |
|
418 |
- genomeRanges <- makeGRangesFromDataFrame( |
|
419 |
- df=genome, |
|
420 |
- keep.extra.columns=TRUE |
|
421 |
- ) |
|
414 |
+ # If annotation already here and not removed from the input object because |
|
415 |
+ # of change in region, flank or type, no need to reload it and thus, recoup |
|
416 |
+ # object become even more mobile and portable as localDb is not needed. |
|
417 |
+ if (!is.null(refRanges)) { |
|
418 |
+ genomeRanges <- refRanges$genomeRanges |
|
419 |
+ helperRanges <- refRanges$helperRanges |
|
422 | 420 |
} |
423 |
- # Annotation case #2: provided strictly as a BED-like file with loci |
|
424 |
- else if (!is.list(genome) && !is.data.frame(genome) |
|
425 |
- && is.character(genome)) { |
|
426 |
- if (file.exists(genome)) { |
|
421 |
+ else { |
|
422 |
+ # Annotation case #1: provided as a BED-like data frame with loci |
|
423 |
+ if (is.data.frame(genome)) { |
|
427 | 424 |
if (type=="rnaseq") |
428 | 425 |
stop("When type=\"rnaseq\", only the usage of a supported or ", |
429 | 426 |
"user-imported organism from GTF file is allowed!") |
430 |
- genome <- read.delim(genome) |
|
431 | 427 |
rownames(genome) <- as.character(genome[,4]) |
432 | 428 |
genomeRanges <- makeGRangesFromDataFrame( |
433 | 429 |
df=genome, |
434 | 430 |
keep.extra.columns=TRUE |
435 | 431 |
) |
436 |
- # Need to assigne Seqinfo... |
|
437 |
- sf <- .chromInfoToSeqInfoDf(.chromInfoFromBAM(input[[1]]$file), |
|
438 |
- asSeqinfo=TRUE) |
|
439 |
- seqlevels(genomeRanges) <- seqlevels(sf) |
|
440 |
- seqinfo(genomeRanges) <- sf |
|
432 |
+ helperRanges <- NULL |
|
441 | 433 |
} |
442 |
- # Annotation case #3: use local database or automatically on-the-fly |
|
443 |
- else { |
|
444 |
- if (type=="chipseq") { |
|
445 |
- if (region == "utr3") |
|
434 |
+ # Annotation case #2: provided strictly as a BED-like file with loci |
|
435 |
+ else if (!is.list(genome) && !is.data.frame(genome) |
|
436 |
+ && is.character(genome)) { |
|
437 |
+ if (file.exists(genome)) { |
|
438 |
+ if (type=="rnaseq") |
|
439 |
+ stop("When type=\"rnaseq\", only the usage of a supported ", |
|
440 |
+ "or user-imported organism from GTF file is allowed!") |
|
441 |
+ genome <- read.delim(genome) |
|
442 |
+ rownames(genome) <- as.character(genome[,4]) |
|
443 |
+ genomeRanges <- makeGRangesFromDataFrame( |
|
444 |
+ df=genome, |
|
445 |
+ keep.extra.columns=TRUE |
|
446 |
+ ) |
|
447 |
+ # Need to assigne Seqinfo... |
|
448 |
+ sf <- .chromInfoToSeqInfoDf(.chromInfoFromBAM(input[[1]]$file), |
|
449 |
+ asSeqinfo=TRUE) |
|
450 |
+ seqlevels(genomeRanges) <- seqlevels(sf) |
|
451 |
+ seqinfo(genomeRanges) <- sf |
|
452 |
+ |
|
453 |
+ helperRanges <- NULL |
|
454 |
+ } |
|
455 |
+ # Annotation case #3: use local database or automatically on-the-fly |
|
456 |
+ else { |
|
457 |
+ if (type=="chipseq") { |
|
458 |
+ if (region == "utr3") |
|
459 |
+ genomeRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
460 |
+ type="utr",version=version,summarized=TRUE, |
|
461 |
+ db=localDb,rc=rc), |
|
462 |
+ error=function(e) { |
|
463 |
+ tryCatch({ |
|
464 |
+ gtfFile <- genome$gtf |
|
465 |
+ metadata <- genome |
|
466 |
+ metadata$gtf <- NULL |
|
467 |
+ genomeRanges <- importCustomAnnotation( |
|
468 |
+ gtfFile,metadata,"utr") |
|
469 |
+ },error=function(e) { |
|
470 |
+ message("Error ",e) |
|
471 |
+ stop("Please provide an existing organism ", |
|
472 |
+ "or a list with annotation metadata ", |
|
473 |
+ "and GTF file!") |
|
474 |
+ },finally="") |
|
475 |
+ },finally="") |
|
476 |
+ else |
|
477 |
+ genomeRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
478 |
+ type="gene",version=version,db=localDb,rc=rc), |
|
479 |
+ error=function(e) { |
|
480 |
+ tryCatch({ |
|
481 |
+ gtfFile <- genome$gtf |
|
482 |
+ metadata <- genome |
|
483 |
+ metadata$gtf <- NULL |
|
484 |
+ geneData <- importCustomAnnotation(gtfFile, |
|
485 |
+ metadata,"gene") |
|
486 |
+ },error=function(e) { |
|
487 |
+ message("Error ",e) |
|
488 |
+ stop("Please provide an existing organism ", |
|
489 |
+ "or a list with annotation metadata ", |
|
490 |
+ "and GTF file!") |
|
491 |
+ },finally="") |
|
492 |
+ },finally="") |
|
493 |
+ |
|
494 |
+ helperRanges <- NULL |
|
495 |
+ } |
|
496 |
+ else if (type=="rnaseq") { |
|
446 | 497 |
genomeRanges <- tryCatch(loadAnnotation(genome,refdb, |
447 |
- type="utr",version=version,summarized=TRUE,db=localDb, |
|
498 |
+ type="exon",version=version,summarized=TRUE,db=localDb, |
|
448 | 499 |
rc=rc), |
449 | 500 |
error=function(e) { |
450 | 501 |
tryCatch({ |
... | ... |
@@ -452,16 +503,15 @@ recoup <- function( |
452 | 503 |
metadata <- genome |
453 | 504 |
metadata$gtf <- NULL |
454 | 505 |
genomeRanges <- importCustomAnnotation(gtfFile, |
455 |
- metadata,"utr") |
|
506 |
+ metadata,"exon") |
|
456 | 507 |
},error=function(e) { |
457 | 508 |
message("Error ",e) |
458 | 509 |
stop("Please provide an existing organism or ", |
459 |
- "a list with annotation metadata and GTF ", |
|
460 |
- "file!") |
|
510 |
+ "a list with annotation metadata and ", |
|
511 |
+ "GTF file!") |
|
461 | 512 |
},finally="") |
462 | 513 |
},finally="") |
463 |
- else |
|
464 |
- genomeRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
514 |
+ helperRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
465 | 515 |
type="gene",version=version,db=localDb,rc=rc), |
466 | 516 |
error=function(e) { |
467 | 517 |
tryCatch({ |
... | ... |
@@ -477,54 +527,21 @@ recoup <- function( |
477 | 527 |
"file!") |
478 | 528 |
},finally="") |
479 | 529 |
},finally="") |
480 |
- |
|
481 |
- helperRanges <- NULL |
|
482 |
- } |
|
483 |
- else if (type=="rnaseq") { |
|
484 |
- genomeRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
485 |
- type="exon",version=version,summarized=TRUE,db=localDb, |
|
486 |
- rc=rc), |
|
487 |
- error=function(e) { |
|
488 |
- tryCatch({ |
|
489 |
- gtfFile <- genome$gtf |
|
490 |
- metadata <- genome |
|
491 |
- metadata$gtf <- NULL |
|
492 |
- genomeRanges <- importCustomAnnotation(gtfFile, |
|
493 |
- metadata,"exon") |
|
494 |
- },error=function(e) { |
|
495 |
- message("Error ",e) |
|
496 |
- stop("Please provide an existing organism or ", |
|
497 |
- "a list with annotation metadata and GTF ", |
|
498 |
- "file!") |
|
499 |
- },finally="") |
|
500 |
- },finally="") |
|
501 |
- helperRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
502 |
- type="gene",version=version,db=localDb,rc=rc), |
|
503 |
- error=function(e) { |
|
504 |
- tryCatch({ |
|
505 |
- gtfFile <- genome$gtf |
|
506 |
- metadata <- genome |
|
507 |
- metadata$gtf <- NULL |
|
508 |
- geneData <- importCustomAnnotation(gtfFile, |
|
509 |
- metadata,"gene") |
|
510 |
- },error=function(e) { |
|
511 |
- message("Error ",e) |
|
512 |
- stop("Please provide an existing organism or ", |
|
513 |
- "a list with annotation metadata and GTF ", |
|
514 |
- "file!") |
|
515 |
- },finally="") |
|
516 |
- },finally="") |
|
517 |
- if (!is(genomeRanges,"GRangesList")) |
|
518 |
- genomeRanges <- split(genomeRanges,genomeRanges$gene_id) |
|
519 |
- helperRanges <- helperRanges[names(genomeRanges)] |
|
530 |
+ if (!is(genomeRanges,"GRangesList")) |
|
531 |
+ genomeRanges <- split(genomeRanges,genomeRanges$gene_id) |
|
532 |
+ helperRanges <- helperRanges[names(genomeRanges)] |
|
533 |
+ } |
|
520 | 534 |
} |
521 | 535 |
} |
536 |
+ |
|
537 |
+ # For saving... |
|
538 |
+ refRanges <- list(genomeRanges=genomeRanges,helperRanges=helperRanges) |
|
522 | 539 |
} |
523 | 540 |
|
524 | 541 |
# After genome read, check if we have a valid custom orderer |
525 | 542 |
if (!is.null(orderBy$custom)) { |
526 | 543 |
if (length(orderBy$custom) != length(genomeRanges)) { |
527 |
- warning("The custom orderer provide with orderBy parameter does ", |
|
544 |
+ warning("The custom orderer provided with orderBy parameter does ", |
|
528 | 545 |
"not have length equal to the number\nof elements in the ", |
529 | 546 |
"interrogated genomic regions and will be ignored!", |
530 | 547 |
immediate.=TRUE) |
... | ... |
@@ -666,7 +683,7 @@ recoup <- function( |
666 | 683 |
else if (onFlankFail == "trim") |
667 | 684 |
warning("Off bound regions detected after flanking! Offending ", |
668 | 685 |
"reference regions will be trimmed\n and the flanking regions ", |
669 |
- "will be merged with thre main...",immediate.=TRUE) |
|
686 |
+ "will be merged with the main...",immediate.=TRUE) |
|
670 | 687 |
} |
671 | 688 |
|
672 | 689 |
# Here we must write code for the reading and normalization of bam files |
... | ... |
@@ -675,7 +692,6 @@ recoup <- function( |
675 | 692 |
if (!onTheFly) |
676 | 693 |
input <- preprocessRanges(input,preprocessParams,genome,bamRanges, |
677 | 694 |
bamParams=bamParams,rc=rc) |
678 |
- |
|
679 | 695 |
# At this point we must apply the fraction parameter if <1. We choose this |
680 | 696 |
# point in order not to restrict later usage of the read ranges and since it |
681 | 697 |
# does not take much time to load into memory. In addition, this operation |
... | ... |
@@ -723,17 +739,7 @@ recoup <- function( |
723 | 739 |
#helperRanges <- helperRanges[keeph] |
724 | 740 |
#mainRanges <- mainRanges[names(helperRanges)] |
725 | 741 |
helperRanges <- .subsetGRangesByChrs(helperRanges,chrs) |
726 |
- mainRanges <- .subsetGRangesByChrs(mainRanges,chrs) |
|
727 |
- |
|
728 |
- #################################################################### |
|
729 |
- ## There must be an R bug with `lengths` here as although it runs in |
|
730 |
- ## Rcmd, it does not pass package building or vignette kniting... |
|
731 |
- ## But for the time being it seems that it is not needed as the name |
|
732 |
- ## filtering works |
|
733 |
- #lens <- which(lengths(genomeRanges)==0) |
|
734 |
- #if (length(lens)>0) |
|
735 |
- # genomeRanges[lens] <- NULL |
|
736 |
- #################################################################### |
|
742 |
+ mainRanges <- .subsetGRangesByChrs(mainRanges,chrs) |
|
737 | 743 |
} |
738 | 744 |
} |
739 | 745 |
|
... | ... |
@@ -745,8 +751,10 @@ recoup <- function( |
745 | 751 |
# If normalization method is linear, we must adjust the coverages |
746 | 752 |
# TODO: Check for onTheFly in the beginning |
747 | 753 |
if (preprocessParams$normalize == "linear" && !onTheFly) { |
754 |
+ message("Calculating normalization factors...") |
|
748 | 755 |
linFac <- calcLinearFactors(input,preprocessParams) |
749 | 756 |
for (n in names(input)) { |
757 |
+ message(" normalizing ",n) |
|
750 | 758 |
if (linFac[n]==1) |
751 | 759 |
next |
752 | 760 |
if (signal == "coverage") |
... | ... |
@@ -812,12 +820,13 @@ recoup <- function( |
812 | 820 |
# Coverages and profiles calculated... Now depending on plot option, we go |
813 | 821 |
# further or return the enriched input object for saving |
814 | 822 |
if (!plotParams$profile && !plotParams$heatmap && !plotParams$correlation) { |
815 |
- recoupObj <- toOutput(input,design,saveParams,callParams=callParams) |
|
823 |
+ recoupObj <- toOutput(input,design,saveParams,callParams=callParams, |
|
824 |
+ refRanges=refRanges) |
|
816 | 825 |
return(recoupObj) |
817 | 826 |
} |
818 | 827 |
else { |
819 | 828 |
recoupObj <- toOutput(input,design,list(ranges=TRUE,coverage=TRUE, |
820 |
- profile=TRUE),callParams=callParams) |
|
829 |
+ profile=TRUE),callParams=callParams,refRanges=refRanges) |
|
821 | 830 |
} |
822 | 831 |
|
823 | 832 |
## Our plot objects |
... | ... |
@@ -961,7 +970,8 @@ recoup <- function( |
961 | 970 |
} |
962 | 971 |
|
963 | 972 |
# Overwrite final object so as to return it |
964 |
- recoupObj <- toOutput(input,design,saveParams,recoupPlots,callParams) |
|
973 |
+ recoupObj <- toOutput(input,design,saveParams,recoupPlots,callParams, |
|
974 |
+ refRanges) |
|
965 | 975 |
|
966 | 976 |
# Make any plots asked |
967 | 977 |
if (plotParams$plot) { |
... | ... |
@@ -1009,7 +1019,7 @@ applySelectors <- function(ranges,selector,rc=NULL) { |
1009 | 1019 |
} |
1010 | 1020 |
|
1011 | 1021 |
toOutput <- function(input,design=NULL,saveParams,plotObjs=NULL, |
1012 |
- callParams=NULL) { |
|
1022 |
+ callParams=NULL,refRanges=NULL) { |
|
1013 | 1023 |
if (!saveParams$ranges) |
1014 | 1024 |
input <- removeData(input,"ranges") |
1015 | 1025 |
if (!saveParams$coverage) |
... | ... |
@@ -1027,6 +1037,7 @@ toOutput <- function(input,design=NULL,saveParams,plotObjs=NULL, |
1027 | 1037 |
data=input, |
1028 | 1038 |
design=design, |
1029 | 1039 |
plots=plots, |
1030 |
- callopts=callParams |
|
1040 |
+ callopts=callParams, |
|
1041 |
+ refRanges=refRanges |
|
1031 | 1042 |
)) |
1032 | 1043 |
} |
... | ... |
@@ -216,10 +216,11 @@ recoup <- function( |
216 | 216 |
# If type is rnaseq, the only allowed genomes are the ones supported by |
217 | 217 |
# recoup for the time being. In the future, a custom RNA-Seq genome may be |
218 | 218 |
# constructed from a GFF or like... |
219 |
- if (type=="rnaseq" && !.userOrg(genome,localDb) |
|
220 |
- && !(genome %in% getSupportedOrganisms())) |
|
221 |
- stop("When type is \"rnaseq\", only the supported genomes are allowed!") |
|
222 |
- |
|
219 |
+ if (type=="rnaseq" && is.character(localDb) && file.exists(localDb) |
|
220 |
+ && !.userOrg(genome,localDb) && !(genome %in% getSupportedOrganisms())) |
|
221 |
+ stop("When type is \"rnaseq\", only the supported or user imported ", |
|
222 |
+ "genomes are allowed!") |
|
223 |
+ |
|
223 | 224 |
# annotation must be a list to be fed to buildCustomAnnotation |
224 | 225 |
if (is.list(genome) && !is.data.frame(genome)) { |
225 | 226 |
# members are checked by buildCustomAnnotation if required |
... | ... |
@@ -10,6 +10,7 @@ recoup <- function( |
10 | 10 |
version="auto", |
11 | 11 |
refdb=c("ensembl","ucsc","refseq"), |
12 | 12 |
flank=c(2000,2000), |
13 |
+ onFlankFail=c("drop","trim"), |
|
13 | 14 |
fraction=1, |
14 | 15 |
orderBy=list( |
15 | 16 |
what=c("none","suma","sumn","maxa","maxn","avga","avgn","hcn"), |
... | ... |
@@ -58,6 +59,7 @@ recoup <- function( |
58 | 59 |
corrSmoothPar=ifelse(is.null(design),0.1,0.5), |
59 | 60 |
singleFacet=c("none","wrap","grid"), |
60 | 61 |
multiFacet=c("wrap","grid"), |
62 |
+ singleFacetDirection=c("horizontal","vertical"), |
|
61 | 63 |
conf=TRUE, |
62 | 64 |
device=c("x11","png","jpg","tiff","bmp","pdf","ps"), |
63 | 65 |
outputDir=".", |
... | ... |
@@ -163,15 +165,25 @@ recoup <- function( |
163 | 165 |
region <- tolower(region[1]) |
164 | 166 |
refdb <- tolower(refdb[1]) |
165 | 167 |
type <- tolower(type[1]) |
168 |
+ onFlankFail <- tolower(onFlankFail[1]) |
|
166 | 169 |
signal <- tolower(signal[1]) |
167 | 170 |
if (!is.null(design) && !is.data.frame(design)) |
168 | 171 |
checkFileArgs("design",design) |
169 | 172 |
if (!is.data.frame(genome) && !is.list(genome) && file.exists(genome)) |
170 | 173 |
checkFileArgs("genome",genome) |
171 |
- else if (is.character(genome)) |
|
172 |
- checkTextArgs("genome",genome,getSupportedOrganisms(),multiarg=FALSE) |
|
173 |
- checkTextArgs("refdb",refdb,getSupportedRefDbs(),multiarg=FALSE) |
|
174 |
+ else if (is.character(genome)) { |
|
175 |
+ if (is.character(localDb) && file.exists(localDb)) { |
|
176 |
+ if (!.userOrg(genome,localDb)) |
|
177 |
+ checkTextArgs("genome",genome,getSupportedOrganisms(), |
|
178 |
+ multiarg=FALSE) |
|
179 |
+ } |
|
180 |
+ } |
|
181 |
+ if (is.character(localDb) && file.exists(localDb)) { |
|
182 |
+ if (!.userRefdb(refdb,localDb)) |
|
183 |
+ checkTextArgs("refdb",refdb,getSupportedRefDbs(),multiarg=FALSE) |
|
184 |
+ } |
|
174 | 185 |
checkTextArgs("type",type,c("chipseq","rnaseq"),multiarg=FALSE) |
186 |
+ checkTextArgs("onFlankFail",onFlankFail,c("drop","trim"),multiarg=FALSE) |
|
175 | 187 |
checkTextArgs("signal",signal,c("coverage","rpm"),multiarg=FALSE) |
176 | 188 |
checkNumArgs("fraction",fraction,"numeric",c(0,1),"botheq") |
177 | 189 |
if (any(flank<0)) |
... | ... |
@@ -204,7 +216,8 @@ recoup <- function( |
204 | 216 |
# If type is rnaseq, the only allowed genomes are the ones supported by |
205 | 217 |
# recoup for the time being. In the future, a custom RNA-Seq genome may be |
206 | 218 |
# constructed from a GFF or like... |
207 |
- if (type=="rnaseq" && !(genome %in% getSupportedOrganisms())) |
|
219 |
+ if (type=="rnaseq" && !.userOrg(genome,localDb) |
|
220 |
+ && !(genome %in% getSupportedOrganisms())) |
|
208 | 221 |
stop("When type is \"rnaseq\", only the supported genomes are allowed!") |
209 | 222 |
|
210 | 223 |
# annotation must be a list to be fed to buildCustomAnnotation |
... | ... |
@@ -291,6 +304,8 @@ recoup <- function( |
291 | 304 |
region=if (is.null(thisCall$region)) prevCallParams$region else |
292 | 305 |
region, |
293 | 306 |
type=if (is.null(thisCall$type)) prevCallParams$type else type, |
307 |
+ onFlankFail=if (is.null(thisCall$onFlankFail)) |
|
308 |
+ prevCallParams$onFlankFail else onFlankFail, |
|
294 | 309 |
signal=if (is.null(thisCall$signal)) prevCallParams$signal |
295 | 310 |
else signal, |
296 | 311 |
genome=if (is.null(thisCall$genome)) prevCallParams$genome else |
... | ... |
@@ -335,6 +350,7 @@ recoup <- function( |
335 | 350 |
callParams <- list( |
336 | 351 |
region=region, |
337 | 352 |
type=type, |
353 |
+ onFlankFail=onFlankFail, |
|
338 | 354 |
signal=signal, |
339 | 355 |
genome=genome, |
340 | 356 |
version=version, |
... | ... |
@@ -366,7 +382,8 @@ recoup <- function( |
366 | 382 |
# Redefine all final arguments for this call |
367 | 383 |
region <- callParams$region |
368 | 384 |
type <- callParams$type |
369 |
- singnal <- callParams$signal |
|
385 |
+ onFlankFail <- callParams$onFlankFail |
|
386 |
+ signal <- callParams$signal |
|
370 | 387 |
genome <- callParams$genome |
371 | 388 |
version <- callParams$version |
372 | 389 |
refdb <- callParams$refdb |
... | ... |
@@ -517,7 +534,7 @@ recoup <- function( |
517 | 534 |
# Read and check design compatibilities. Check if k-means is requested and |
518 | 535 |
# message accordingly. If k-means is requested it will be added to the |
519 | 536 |
# design data frame |
520 |
- hasDesign <- FALSE |
|
537 |
+ #hasDesign <- FALSE |
|
521 | 538 |
if (!is.null(design)) { |
522 | 539 |
if (!is.data.frame(design)) |
523 | 540 |
design <- read.delim(design,row.names=1) |
... | ... |
@@ -617,7 +634,7 @@ recoup <- function( |
617 | 634 |
# Here we must determine the final ranges to pass to preprocessRanges and |
618 | 635 |
# then work with these later on |
619 | 636 |
intermRanges <- getMainRanges(genomeRanges,helperRanges=helperRanges,type, |
620 |
- region,flank,rc=rc) |
|
637 |
+ region,flank,onFlankFail,rc=rc) |
|
621 | 638 |
# It is very important that all the Ranges have Seqinfo objects attached as |
622 | 639 |
# they have to be trimmed for potential extensions (due to flanking regions) |
623 | 640 |
# beyond chromosome lengths. The built-in annotations do provide this option |
... | ... |
@@ -625,12 +642,32 @@ recoup <- function( |
625 | 642 |
# interest should be provided too. Or if not provided, the user will be |
626 | 643 |
# responsible for any crash. |
627 | 644 |
# TODO: When input ranges are custom, genome should be given to make Seqinfo |
628 |
- if (type == "chipseq") # Otherwise, throw error with GRangesList |
|
629 |
- mainRanges <- trim(intermRanges$mainRanges) |
|
630 |
- else if (type == "rnaseq") |
|
631 |
- mainRanges <- intermRanges$mainRanges |
|
632 |
- bamRanges <- trim(intermRanges$bamRanges) |
|
633 |
- |
|
645 |
+ |
|
646 |
+ # Should not be needed anymore with the new onFlankFail option |
|
647 |
+ #if (type == "chipseq") # Otherwise, throw error with GRangesList |
|
648 |
+ # mainRanges <- trim(intermRanges$mainRanges) |
|
649 |
+ #else if (type == "rnaseq") |
|
650 |
+ # mainRanges <- intermRanges$mainRanges |
|
651 |
+ #bamRanges <- trim(intermRanges$bamRanges) |
|
652 |
+ |
|
653 |
+ mainRanges <- intermRanges$mainRanges |
|
654 |
+ bamRanges <- intermRanges$bamRanges |
|
655 |
+ hadOffBound <- intermRanges$offBound |
|
656 |
+ # We must also correct the design in case of drop |
|
657 |
+ if (!is.null(design) && onFlankFail == "drop" && hadOffBound) |
|
658 |
+ design <- design[names(mainRanges),,drop=FALSE] |
|
659 |
+ # Say something if off bound detected |
|
660 |
+ if (hadOffBound) { |
|
661 |
+ if (onFlankFail == "drop") |
|
662 |
+ warning("Off bound regions detected after flanking! Offending ", |
|
663 |
+ "regions will be dropped from the reference\n and the design if ", |
|
664 |
+ "provided...",immediate.=TRUE) |
|
665 |
+ else if (onFlankFail == "trim") |
|
666 |
+ warning("Off bound regions detected after flanking! Offending ", |
|
667 |
+ "reference regions will be trimmed\n and the flanking regions ", |
|
668 |
+ "will be merged with thre main...",immediate.=TRUE) |
|
669 |
+ } |
|
670 |
+ |
|
634 | 671 |
# Here we must write code for the reading and normalization of bam files |
635 | 672 |
# The preprocessRanges function looks if there is a valid (not null) ranges |
636 | 673 |
# field in input |
... | ... |
@@ -648,7 +685,7 @@ recoup <- function( |
648 | 685 |
#set.seed(preprocessParams$seed) |
649 | 686 |
refIndex <- sort(sample(length(mainRanges),newSize)) |
650 | 687 |
mainRanges <- mainRanges[refIndex] |
651 |
- for (i in 1:length(input)) { |
|
688 |
+ for (i in seq_len(length(input))) { |
|
652 | 689 |
if (!is.null(input[[i]]$ranges)) { |
653 | 690 |
newSize <- round(fraction*length(input[[i]]$ranges)) |
654 | 691 |
#set.seed(preprocessParams$seed) |
... | ... |
@@ -745,13 +782,17 @@ recoup <- function( |
745 | 782 |
if (signal == "coverage") { # Otherwise, matrix calculate by rpm |
746 | 783 |
if (binParams$chunking) { |
747 | 784 |
if (type=="chipseq") |
748 |
- binParams$chunks <- split(1:length(genomeRanges), |
|
785 |
+ binParams$chunks <- split(seq_len(length(genomeRanges)), |
|
749 | 786 |
as.character(seqnames(genomeRanges))) |
750 | 787 |
else if (type=="rnaseq") |
751 |
- binParams$chunks <- split(1:length(helperRanges), |
|
788 |
+ binParams$chunks <- split(seq_len(length(helperRanges)), |
|
752 | 789 |
as.character(seqnames(helperRanges))) |
753 | 790 |
} |
754 |
- input <- profileMatrix(input,flank,binParams,rc) |
|
791 |
+ # hadOffBounds must be passed on input at some point |
|
792 |
+ fe0 <- FALSE |
|
793 |
+ if (onFlankFail == "trim" && hadOffBound) |
|
794 |
+ fe0 <- TRUE |
|
795 |
+ input <- profileMatrix(input,flank,binParams,rc,.feNoSplit=fe0) |
|
755 | 796 |
} |
756 | 797 |
|
757 | 798 |
# In some strange glimpses, we are getting very few NaNs in profile matrix |
... | ... |
@@ -756,8 +756,11 @@ recoup <- function( |
756 | 756 |
|
757 | 757 |
# In some strange glimpses, we are getting very few NaNs in profile matrix |
758 | 758 |
# which I was unable to reproduce on a gene by gene basis. If no NaNs are |
759 |
- # detected, no action is performed in the input object. |
|
760 |
- input <- imputeProfile(input,method="simple",rc) |
|
759 |
+ # detected, no action is performed in the input object. Also, these NaNs |
|
760 |
+ # seem to appear only on zero count gene profiles, so it's safe to impute |
|
761 |
+ # by zero. |
|
762 |
+ #input <- imputeProfile(input,method="simple",rc) |
|
763 |
+ input <- imputeZero(input) |
|
761 | 764 |
|
762 | 765 |
# Perform the k-means clustering if requested and append to design (which |
763 | 766 |
# has been checked, if we are allowed to do so) |
... | ... |
@@ -415,6 +415,11 @@ recoup <- function( |
415 | 415 |
df=genome, |
416 | 416 |
keep.extra.columns=TRUE |
417 | 417 |
) |
418 |
+ # Need to assigne Seqinfo... |
|
419 |
+ sf <- .chromInfoToSeqInfoDf(.chromInfoFromBAM(input[[1]]$file), |
|
420 |
+ asSeqinfo=TRUE) |
|
421 |
+ seqlevels(genomeRanges) <- seqlevels(sf) |
|
422 |
+ seqinfo(genomeRanges) <- sf |
|
418 | 423 |
} |
419 | 424 |
# Annotation case #3: use local database or automatically on-the-fly |
420 | 425 |
else { |
... | ... |
@@ -752,7 +757,7 @@ recoup <- function( |
752 | 757 |
# In some strange glimpses, we are getting very few NaNs in profile matrix |
753 | 758 |
# which I was unable to reproduce on a gene by gene basis. If no NaNs are |
754 | 759 |
# detected, no action is performed in the input object. |
755 |
- input <- imputeProfile(input,rc) |
|
760 |
+ input <- imputeProfile(input,method="simple",rc) |
|
756 | 761 |
|
757 | 762 |
# Perform the k-means clustering if requested and append to design (which |
758 | 763 |
# has been checked, if we are allowed to do so) |
... | ... |
@@ -212,7 +212,7 @@ recoup <- function( |
212 | 212 |
# members are checked by buildCustomAnnotation if required |
213 | 213 |
# We only need to check that the gtfFile exists here |
214 | 214 |
if (!("gtf" %in% names(genome))) |
215 |
- stopp("A gtf field must be provided with an existing GTF file ", |
|
215 |
+ stop("A gtf field must be provided with an existing GTF file ", |
|
216 | 216 |
"when providing a list with custom annotation elements!") |
217 | 217 |
if ("gtf" %in% names(genome) && is.character(genome$gtf) |
218 | 218 |
&& !file.exists(genome$gtf)) |
... | ... |
@@ -479,8 +479,8 @@ recoup <- function( |
479 | 479 |
type="gene",version=version,db=localDb,rc=rc), |
480 | 480 |
error=function(e) { |
481 | 481 |
tryCatch({ |
482 |
- gtfFile <- annotation$gtf |
|
483 |
- metadata <- annotation |
|
482 |
+ gtfFile <- genome$gtf |
|
483 |
+ metadata <- genome |
|
484 | 484 |
metadata$gtf <- NULL |
485 | 485 |
geneData <- importCustomAnnotation(gtfFile, |
486 | 486 |
metadata,"gene") |
... | ... |
@@ -749,6 +749,11 @@ recoup <- function( |
749 | 749 |
input <- profileMatrix(input,flank,binParams,rc) |
750 | 750 |
} |
751 | 751 |
|
752 |
+ # In some strange glimpses, we are getting very few NaNs in profile matrix |
|
753 |
+ # which I was unable to reproduce on a gene by gene basis. If no NaNs are |
|
754 |
+ # detected, no action is performed in the input object. |
|
755 |
+ input <- imputeProfile(input,rc) |
|
756 |
+ |
|
752 | 757 |
# Perform the k-means clustering if requested and append to design (which |
753 | 758 |
# has been checked, if we are allowed to do so) |
754 | 759 |
if (kmParams$k>0) |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
recoup <- function( |
2 | 2 |
input, |
3 | 3 |
design=NULL, |
4 |
- region=c("genebody","tss","tes","custom"), |
|
4 |
+ region=c("genebody","tss","tes","utr3","custom"), |
|
5 | 5 |
type=c("chipseq","rnaseq"), |
6 | 6 |
signal=c("coverage","rpm"), |
7 | 7 |
genome=c("hg18","hg19","hg38","mm9","mm10","rn5","rn6","dm3","dm6", |
... | ... |
@@ -126,9 +126,16 @@ recoup <- function( |
126 | 126 |
#complexHeatmapParams=getDefaultListArgs("complexHeatmapParams"), |
127 | 127 |
bamParams=NULL, |
128 | 128 |
onTheFly=FALSE, # Directly from BAM w/o Ranges storing, also N/A with BED, |
129 |
- localDbHome=file.path(path.expand("~"),".recoup"), |
|
129 |
+ #localDb=file.path(path.expand("~"),".recoup"), |
|
130 |
+ localDb=file.path(system.file(package="recoup"),"annotation.sqlite"), |
|
130 | 131 |
rc=NULL |
131 | 132 |
) { |
133 |
+ # Simple check to throw error if user us using the old databasing system |
|
134 |
+ if (any(dir.exists(file.path(dirname(localDb),refdb)))) |
|
135 |
+ stop("Possible old recoup database system detected. Please rebuild ", |
|
136 |
+ "using the new system or download a pre-built database. See also ", |
|
137 |
+ "the vignettes.") |
|
138 |
+ |
|
132 | 139 |
############################################################################ |
133 | 140 |
# Begin of simple parameter checking for a new or a restored object |
134 | 141 |
if (is.list(input) && !is.null(input$data)) { # Refeeding recoup object |
... | ... |
@@ -147,7 +154,7 @@ recoup <- function( |
147 | 154 |
input <- readConfig(input) |
148 | 155 |
checkInput(input) |
149 | 156 |
if (is.null(names(input))) |
150 |
- names(input) <- sapply(input,function(x) return(x$id)) |
|
157 |
+ names(input) <- vapply(input,function(x) return(x$id),character(1)) |
|
151 | 158 |
|
152 | 159 |
# Check if there are any mispelled or invalid parameters and throw a warning |
153 | 160 |
checkMainArgs(as.list(match.call())) |
... | ... |
@@ -159,7 +166,7 @@ recoup <- function( |
159 | 166 |
signal <- tolower(signal[1]) |
160 | 167 |
if (!is.null(design) && !is.data.frame(design)) |
161 | 168 |
checkFileArgs("design",design) |
162 |
- if (!is.data.frame(genome) && file.exists(genome)) |
|
169 |
+ if (!is.data.frame(genome) && !is.list(genome) && file.exists(genome)) |
|
163 | 170 |
checkFileArgs("genome",genome) |
164 | 171 |
else if (is.character(genome)) |
165 | 172 |
checkTextArgs("genome",genome,getSupportedOrganisms(),multiarg=FALSE) |
... | ... |
@@ -200,6 +207,33 @@ recoup <- function( |
200 | 207 |
if (type=="rnaseq" && !(genome %in% getSupportedOrganisms())) |
201 | 208 |
stop("When type is \"rnaseq\", only the supported genomes are allowed!") |
202 | 209 |
|
210 |
+ # annotation must be a list to be fed to buildCustomAnnotation |
|
211 |
+ if (is.list(genome) && !is.data.frame(genome)) { |
|
212 |
+ # members are checked by buildCustomAnnotation if required |
|
213 |
+ # We only need to check that the gtfFile exists here |
|
214 |
+ if (!("gtf" %in% names(genome))) |
|
215 |
+ stopp("A gtf field must be provided with an existing GTF file ", |
|
216 |
+ "when providing a list with custom annotation elements!") |
|
217 |
+ if ("gtf" %in% names(genome) && is.character(genome$gtf) |
|
218 |
+ && !file.exists(genome$gtf)) |
|
219 |
+ stop("An existing GTF file must be provided when providing a ", |
|
220 |
+ "list with custom annotation elements!") |
|
221 |
+ } |
|
222 |
+ |
|
223 |
+ # Check what is going on with genome, first check if file, then localDb |
|
224 |
+ if (!is.data.frame(genome) && !is.list(genome) && is.character(genome) |
|
225 |
+ && !file.exists(genome) && is.character(localDb) |
|
226 |
+ && file.exists(localDb)) { |
|
227 |
+ if (!.userOrg(genome,localDb)) |
|
228 |
+ checkTextArgs("genome",genome,c("hg18","hg19","hg38","mm9","mm10", |
|
229 |
+ "rn5","rn6","dm3","dm6","danrer7","danrer10","pantro4", |
|
230 |
+ "pantro5","susscr3","susscr11","ecucab2","tair10"), |
|
231 |
+ multiarg=FALSE) |
|
232 |
+ if (!.userRefdb(refdb,localDb)) |
|
233 |
+ checkTextArgs("refdb",refdb,c("ensembl","ucsc","refseq"), |
|
234 |
+ multiarg=FALSE) |
|
235 |
+ } |
|
236 |
+ |
|
203 | 237 |
# and list arguments |
204 | 238 |
orderByDefault <- getDefaultListArgs("orderBy") |
205 | 239 |
binParamsDefault <- getDefaultListArgs("binParams") |
... | ... |
@@ -232,12 +266,12 @@ recoup <- function( |
232 | 266 |
strandedParams <- validateListArgs("strandedParams",strandedParams) |
233 | 267 |
|
234 | 268 |
if (is.null(plotParams$outputBase)) |
235 |
- plotParams$outputBase <- paste(sapply(input,function(x) return(x$id)), |
|
236 |
- collapse="-") |
|
269 |
+ plotParams$outputBase <- paste(vapply(input,function(x) return(x$id), |
|
270 |
+ character(1)),collapse="-") |
|
237 | 271 |
|
238 |
- if (any(sapply(input,function(x) return(tolower(x$format)=="bed"))) |
|
239 |
- && !(preprocessParams$bedGenome %in% c("hg18","hg19","hg38","mm9", |
|
240 |
- "mm10","rn5","dm3","danrer7","pantro4","susscr3"))) |
|
272 |
+ bbb <- vapply(input,function(x) return(tolower(x$format)=="bed"),logical(1)) |
|
273 |
+ if (any(bbb) && !(preprocessParams$bedGenome %in% c("hg18","hg19","hg38", |
|
274 |
+ "mm9","mm10","rn5","dm3","danrer7","pantro4","susscr3"))) |
|
241 | 275 |
stop("When short read files are in BED format, either the genome ", |
242 | 276 |
"parameter should be one\nof the supported organisms, or ", |
243 | 277 |
"preprocessParams$bedGenome must be specified as one of them.") |
... | ... |
@@ -291,8 +325,8 @@ recoup <- function( |
291 | 325 |
# prevCallParams$bamParams,bamParams), ## Unused |
292 | 326 |
onTheFly=if (is.null(thisCall$onTheFly)) prevCallParams$onTheFly |
293 | 327 |
else onTheFly, |
294 |
- localDbHome=if (is.null(thisCall$localDbHome)) |
|
295 |
- prevCallParams$localDbHome else localDbHome, |
|
328 |
+ localDb=if (is.null(thisCall$localDb)) |
|
329 |
+ prevCallParams$localDb else localDb, |
|
296 | 330 |
rc=if (is.null(thisCall$rc)) prevCallParams$rc else rc, |
297 | 331 |
customIsBase=NULL # Additional placeholder |
298 | 332 |
) |
... | ... |
@@ -319,7 +353,7 @@ recoup <- function( |
319 | 353 |
complexHeatmapParams=complexHeatmapParams, |
320 | 354 |
bamParams=bamParams, |
321 | 355 |
onTheFly=onTheFly, |
322 |
- localDbHome=localDbHome, |
|
356 |
+ localDb=localDb, |
|
323 | 357 |
rc=rc, |
324 | 358 |
customIsBase=NULL # Additional placeholder |
325 | 359 |
) |
... | ... |
@@ -350,152 +384,127 @@ recoup <- function( |
350 | 384 |
complexHeatmapParams <- callParams$complexHeatmapParams |
351 | 385 |
bamParams <- callParams$bamParams |
352 | 386 |
onTheFly <- callParams$onTheFly |
353 |
- localDbHome <- callParams$localDbHome |
|
387 |
+ localDb <- callParams$localDb |
|
354 | 388 |
rc <- callParams$rc |
355 | 389 |
# End of complex parameter storage procedure and parameter recall from a |
356 | 390 |
# previous call |
357 | 391 |
############################################################################ |
358 | 392 |
|
359 | 393 |
# Continue with actual work |
360 |
- if (!is.data.frame(genome)) { |
|
394 |
+ # Annotation case #1: provided strictly as a BED-like data frame with loci |
|
395 |
+ if (is.data.frame(genome)) { |
|
396 |
+ if (type=="rnaseq") |
|
397 |
+ stop("When type=\"rnaseq\", only the usage of a supported or user-", |
|
398 |
+ "imported organism from GTF file is allowed!") |
|
399 |
+ rownames(genome) <- as.character(genome[,4]) |
|
400 |
+ genomeRanges <- makeGRangesFromDataFrame( |
|
401 |
+ df=genome, |
|
402 |
+ keep.extra.columns=TRUE |
|
403 |
+ ) |
|
404 |
+ } |
|
405 |
+ # Annotation case #2: provided strictly as a BED-like file with loci |
|
406 |
+ else if (!is.list(genome) && !is.data.frame(genome) |
|
407 |
+ && is.character(genome)) { |
|
361 | 408 |
if (file.exists(genome)) { |
362 |
- genome <- read.delim(genome) # Must be bed like |
|
409 |
+ if (type=="rnaseq") |
|
410 |
+ stop("When type=\"rnaseq\", only the usage of a supported or ", |
|
411 |
+ "user-imported organism from GTF file is allowed!") |
|
412 |
+ genome <- read.delim(genome) |
|
363 | 413 |
rownames(genome) <- as.character(genome[,4]) |
364 | 414 |
genomeRanges <- makeGRangesFromDataFrame( |
365 | 415 |
df=genome, |
366 | 416 |
keep.extra.columns=TRUE |
367 | 417 |
) |
368 | 418 |
} |
419 |
+ # Annotation case #3: use local database or automatically on-the-fly |
|
369 | 420 |
else { |
370 |
- # Check if local storage has been set |
|
371 |
- storageHome <- file.path(localDbHome,refdb,genome) |
|
372 |
- if (dir.exists(storageHome)) { |
|
373 |
- # Some compatibility with previous annoation stores |
|
374 |
- if ("gene.rda" %in% dir(storageHome)) { # Older version |
|
375 |
- warning("Older annotation storage detected! Please ", |
|
376 |
- "rebuild your annotation storage\nusing the ", |
|
377 |
- "buildAnnotationStore function so as to have the\n", |
|
378 |
- "ability of versioning genomic coordinate annotations.", |
|
379 |
- "\nIn the future, older versions may become unusable.", |
|
380 |
- "\n",immediate.=TRUE) |
|
381 |
- storageHomeVersion <- storageHome |
|
382 |
- } |
|
383 |
- else { # Newer version |
|
384 |
- # Decide on version |
|
385 |
- if (version != "auto") { |
|
386 |
- storageHomeVersion <- file.path(storageHome,version) |
|
387 |
- if (!dir.exists(storageHomeVersion)) { |
|
388 |
- warning("The annotation directory ", |
|
389 |
- storageHomeVersion, |
|
390 |
- " does not seem to exist! Have you run ", |
|
391 |
- "buildAnnotationStorage? Will use newest ", |
|
392 |
- "existing version.",immediate.=TRUE) |
|
393 |
- version <- "auto" |
|
394 |
- } |
|
395 |
- } |
|
396 |
- if (version == "auto") { |
|
397 |
- vers <- suppressWarnings(as.numeric(dir(storageHome))) |
|
398 |
- if (any(is.na(vers))) { |
|
399 |
- of <- vers[which(is.na(vers))] |
|
400 |
- stop("Corrupted annotation storage directory ", |
|
401 |
- "contents! ->",of,"<- Either remove offending ", |
|
402 |
- "files/directories or rebuild.") |
|
403 |
- } |
|
404 |
- vers <- sort(vers,decreasing=TRUE) |
|
405 |
- version <- vers[1] |
|
406 |
- storageHomeVersion <- file.path(storageHome,version) |
|
407 |
- } |
|
408 |
- } |
|
409 |
- |
|
410 |
- if (type=="chipseq") { |
|
411 |
- g <- load(file.path(storageHomeVersion,"gene.rda")) |
|
412 |
- genomeRanges <- gene |
|
413 |
- helperRanges <- NULL |
|
414 |
- } |
|
415 |
- else if (type=="rnaseq") { |
|
416 |
- # Load the helper ranges anyway |
|
417 |
- g <- load(file.path(storageHomeVersion,"gene.rda")) |
|
418 |
- helperRanges <- gene |
|
419 |
- if (all(flank==0)) { |
|
420 |
- g <- load(file.path(storageHomeVersion, |
|
421 |
- "summarized_exon.rda")) |
|
422 |
- genomeRanges <- sexon |
|
423 |
- } |
|
424 |
- else if (all(flank==500)) { |
|
425 |
- g <- load(file.path(storageHomeVersion, |
|
426 |
- "summarized_exon_F500.rda")) |
|
427 |
- genomeRanges <- flankedSexon |
|
428 |
- } |
|
429 |
- else if (all(flank==1000)) { |
|
430 |
- g <- load(file.path(storageHomeVersion, |
|
431 |
- "summarized_exon_F1000.rda")) |
|
432 |
- genomeRanges <- flankedSexon |
|
433 |
- } |
|
434 |
- else if (all(flank==2000)) { |
|
435 |
- g <- load(file.path(storageHomeVersion, |
|
436 |
- "summarized_exon_F2000.rda")) |
|
437 |
- genomeRanges <- flankedSexon |
|
438 |
- } |
|
439 |
- else if (all(flank==5000)) { |
|
440 |
- g <- load(file.path(storageHomeVersion, |
|
441 |
- "summarized_exon_F5000.rda")) |
|
442 |
- genomeRanges <- flankedSexon |
|
443 |
- } |
|
444 |
- else { |
|
445 |
- warning("When using recoup in RNA-Seq mode, it is ", |
|
446 |
- "much faster to use a precalculated set of ", |
|
447 |
- "regions and their flanks (0, 500, 1000, 2000 and ", |
|
448 |
- "5000 bps supported) and subset this one for a ", |
|
449 |
- "custom set of genes. Otherwise calculations may ", |
|
450 |
- "take too long to complete.",immediate.=TRUE) |
|
451 |
- genomeRanges <- |
|
452 |
- getMainRnaRangesOnTheFly(helperRanges,flank,rc=rc) |
|
453 |
- } |
|
454 |
- } |
|
421 |
+ if (type=="chipseq") { |
|
422 |
+ if (region == "utr3") |
|
423 |
+ genomeRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
424 |
+ type="utr",version=version,summarized=TRUE,db=localDb, |
|
425 |
+ rc=rc), |
|
426 |
+ error=function(e) { |
|
427 |
+ tryCatch({ |
|
428 |
+ gtfFile <- genome$gtf |
|
429 |
+ metadata <- genome |
|
430 |
+ metadata$gtf <- NULL |
|
431 |
+ genomeRanges <- importCustomAnnotation(gtfFile, |
|
432 |
+ metadata,"utr") |
|
433 |
+ },error=function(e) { |
|
434 |
+ message("Error ",e) |
|
435 |
+ stop("Please provide an existing organism or ", |
|
436 |
+ "a list with annotation metadata and GTF ", |
|
437 |
+ "file!") |
|
438 |
+ },finally="") |
|
439 |
+ },finally="") |
|
440 |
+ else |
|
441 |
+ genomeRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
442 |
+ type="gene",version=version,db=localDb,rc=rc), |
|
443 |
+ error=function(e) { |
|
444 |
+ tryCatch({ |
|
445 |
+ gtfFile <- genome$gtf |
|
446 |
+ metadata <- genome |
|
447 |
+ metadata$gtf <- NULL |
|
448 |
+ geneData <- importCustomAnnotation(gtfFile, |
|
449 |
+ metadata,"gene") |
|
450 |
+ },error=function(e) { |
|
451 |
+ message("Error ",e) |
|
452 |
+ stop("Please provide an existing organism or ", |
|
453 |
+ "a list with annotation metadata and GTF ", |
|
454 |
+ "file!") |
|
455 |
+ },finally="") |
|
456 |
+ },finally="") |
|
457 |
+ |
|
458 |
+ helperRanges <- NULL |
|
455 | 459 |
} |
456 |
- else { # On the fly |
|
457 |
- message("Getting annotation on the fly for ",genome," from ", |
|
458 |
- refdb) |
|
459 |
- if (type=="chipseq") { |
|
460 |
- genome <- getAnnotation(genome,"gene",refdb=refdb,rc=rc) |
|
461 |
- helperRanges <- NULL |
|
462 |
- } |
|
463 |
- else if (type=="rnaseq") { |
|
464 |
- helper <- getAnnotation(genome,"gene",refdb=refdb,rc=rc) |
|
465 |
- helperRanges <- makeGRangesFromDataFrame( |
|
466 |
- df=helper, |
|
467 |
- keep.extra.columns=TRUE, |
|
468 |
- seqnames.field="chromosome" |
|
469 |
- ) |
|
470 |
- names(helperRanges) <- as.character(helperRanges$gene_id) |
|
471 |
- genome <- getAnnotation(genome,"exon",refdb=refdb,rc=rc) |
|
472 |
- annGr <- makeGRangesFromDataFrame( |
|
473 |
- df=genome, |
|
474 |
- keep.extra.columns=TRUE, |
|
475 |
- seqnames.field="chromosome" |
|
476 |
- ) |
|
477 |
- message("Merging exons") |
|
478 |
- annGr <- reduceExons(annGr,rc=rc) |
|
479 |
- names(annGr) <- as.character(annGr$exon_id) |
|
480 |
- genomeRanges <- split(annGr,annGr$gene_id) |
|
481 |
- } |
|
460 |
+ else if (type=="rnaseq") { |
|
461 |
+ genomeRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
462 |
+ type="exon",version=version,summarized=TRUE,db=localDb, |
|
463 |
+ rc=rc), |
|
464 |
+ error=function(e) { |
|
465 |
+ tryCatch({ |
|
466 |
+ gtfFile <- genome$gtf |
|
467 |
+ metadata <- genome |
|
468 |
+ metadata$gtf <- NULL |
|
469 |
+ genomeRanges <- importCustomAnnotation(gtfFile, |
|
470 |
+ metadata,"exon") |
|
471 |
+ },error=function(e) { |
|
472 |
+ message("Error ",e) |
|
473 |
+ stop("Please provide an existing organism or ", |
|
474 |
+ "a list with annotation metadata and GTF ", |
|
475 |
+ "file!") |
|
476 |
+ },finally="") |
|
477 |
+ },finally="") |
|
478 |
+ helperRanges <- tryCatch(loadAnnotation(genome,refdb, |
|
479 |
+ type="gene",version=version,db=localDb,rc=rc), |
|
480 |
+ error=function(e) { |
|
481 |
+ tryCatch({ |
|
482 |
+ gtfFile <- annotation$gtf |
|
483 |
+ metadata <- annotation |
|
484 |
+ metadata$gtf <- NULL |
|
485 |
+ geneData <- importCustomAnnotation(gtfFile, |
|
486 |
+ metadata,"gene") |
|
487 |
+ },error=function(e) { |
|
488 |
+ message("Error ",e) |
|
489 |
+ stop("Please provide an existing organism or ", |
|
490 |
+ "a list with annotation metadata and GTF ", |
|
491 |
+ "file!") |
|
492 |
+ },finally="") |
|
493 |
+ },finally="") |
|
494 |
+ if (!is(genomeRanges,"GRangesList")) |
|
495 |
+ genomeRanges <- split(genomeRanges,genomeRanges$gene_id) |
|
496 |
+ helperRanges <- helperRanges[names(genomeRanges)] |
|
482 | 497 |
} |
483 | 498 |
} |
484 | 499 |
} |
485 |
- else { |
|
486 |
- rownames(genome) <- as.character(genome[,4]) |
|
487 |
- genomeRanges <- makeGRangesFromDataFrame( |
|
488 |
- df=genome, |
|
489 |
- keep.extra.columns=TRUE |
|
490 |
- ) |
|
491 |
- } |
|
492 | 500 |
|
493 | 501 |
# After genome read, check if we have a valid custom orderer |
494 | 502 |
if (!is.null(orderBy$custom)) { |
495 |
- if (length(orderBy$custom) != nrow(genome)) { |
|
503 |
+ if (length(orderBy$custom) != length(genomeRanges)) { |
|
496 | 504 |
warning("The custom orderer provide with orderBy parameter does ", |
497 |
- "not have length equal to the number of elements in genome ", |
|
498 |
- "argument and will be ignored!",immediate.=TRUE) |
|
505 |
+ "not have length equal to the number\nof elements in the ", |
|
506 |
+ "interrogated genomic regions and will be ignored!", |
|
507 |
+ immediate.=TRUE) |
|
499 | 508 |
orderBy$custom <- NULL |
500 | 509 |
} |
501 | 510 |
} |
... | ... |
@@ -550,8 +559,8 @@ recoup <- function( |
550 | 559 |
helperRanges[rownames(design)] |
551 | 560 |
},error=function(e) { |
552 | 561 |
stop("Unexpected error occured! Are you sure that element ", |
553 |
- "(row) names in the design file are of the same type as ", |
|
554 |
- "the genome file?") |
|
562 |
+ "(row) names in the design file are of the same type ", |
|
563 |
+ "as the genome file?") |
|
555 | 564 |
},finally={}) |
556 | 565 |
} |
557 | 566 |
# ...but maybe the names are incompatible |
... | ... |
@@ -577,7 +586,8 @@ recoup <- function( |
577 | 586 |
} |
578 | 587 |
|
579 | 588 |
# Now we must follow two paths according to region type, genebody and custom |
580 |
- # areas with equal/unequal widths, or tss, tes and 1-width custom areas. |
|
589 |
+ # areas with equal/unequal widths and utr3 or tss, tes and 1-width custom |
|
590 |
+ # areas. |
|
581 | 591 |
callParams$customIsBase <- FALSE |
582 | 592 |
if (region=="custom" && all(width(genomeRanges)==1)) { |
583 | 593 |
if (all(flank==0)) { |
... | ... |
@@ -612,20 +622,23 @@ recoup <- function( |
612 | 622 |
# TODO: When input ranges are custom, genome should be given to make Seqinfo |
613 | 623 |
if (type == "chipseq") # Otherwise, throw error with GRangesList |
614 | 624 |
mainRanges <- trim(intermRanges$mainRanges) |
625 |
+ else if (type == "rnaseq") |
|
626 |
+ mainRanges <- intermRanges$mainRanges |
|
615 | 627 |
bamRanges <- trim(intermRanges$bamRanges) |
616 | 628 |
|
617 | 629 |
# Here we must write code for the reading and normalization of bam files |
618 | 630 |
# The preprocessRanges function looks if there is a valid (not null) ranges |
619 | 631 |
# field in input |
620 |
- #if (!onTheFly) |
|
621 |
- #input <- preprocessRanges(input,preprocessParams,bamParams=bamParams,rc=rc) |
|
622 |
- input <- preprocessRanges(input,preprocessParams,genome,bamRanges, |
|
623 |
- bamParams=bamParams,rc=rc) |
|
632 |
+ if (!onTheFly) |
|
633 |
+ input <- preprocessRanges(input,preprocessParams,genome,bamRanges, |
|
634 |
+ bamParams=bamParams,rc=rc) |
|
624 | 635 |
|
625 | 636 |
# At this point we must apply the fraction parameter if <1. We choose this |
626 | 637 |
# point in order not to restrict later usage of the read ranges and since it |
627 |
- # does not take much time to load into memory. |
|
628 |
- if (fraction<1) { |
|
638 |
+ # does not take much time to load into memory. In addition, this operation |
|
639 |
+ # cannot be applied when streaming BAMs. In that case, user must subsample |
|
640 |
+ # outside recoup. |
|
641 |
+ if (!onTheFly && fraction<1) { |
|
629 | 642 |
newSize <- round(fraction*length(mainRanges)) |
630 | 643 |
#set.seed(preprocessParams$seed) |
631 | 644 |
refIndex <- sort(sample(length(mainRanges),newSize)) |
... | ... |
@@ -644,70 +657,51 @@ recoup <- function( |
644 | 657 |
} |
645 | 658 |
} |
646 | 659 |
|
647 |
- # Remove unwanted seqnames from reference ranges |
|
648 |
- chrs <- unique(unlist(lapply(input,function(x) { |
|
649 |
- if (x$format %in% c("bam","bed")) |
|
650 |
- return(as.character(runValue(seqnames(x$ranges)))) |
|
651 |
- else if (x$format=="bigwig") { |
|
652 |
- if (!requireNamespace("rtracklayer")) |
|
653 |
- stop("R package rtracklayer is required to read and import ", |
|
654 |
- "BigWig files!") |
|
655 |
- return(as.character(seqnames(seqinfo(BigWigFile(x$file))))) |
|
660 |
+ # Remove unwanted seqnames from reference ranges (if not on the fly) and |
|
661 |
+ # properly subset ranges if not all chromosomes are provided with BAM |
|
662 |
+ if (!onTheFly) { |
|
663 |
+ chrs <- unique(unlist(lapply(input,function(x) { |
|
664 |
+ if (x$format %in% c("bam","bed")) |
|
665 |
+ return(as.character(runValue(seqnames(x$ranges)))) |
|
666 |
+ else if (x$format=="bigwig") { |
|
667 |
+ if (!requireNamespace("rtracklayer")) |
|
668 |
+ stop("R package rtracklayer is required to read and ", |
|
669 |
+ "import BigWig files!") |
|
670 |
+ return(as.character(seqnames(seqinfo(BigWigFile(x$file))))) |
|
671 |
+ } |
|
672 |
+ }))) |
|
673 |
+ if (type=="chipseq") { |
|
674 |
+ #keep <- which(as.character(seqnames(mainRanges)) %in% chrs) |
|
675 |
+ #mainRanges <- mainRanges[keep] |
|
676 |
+ mainRanges <- .subsetGRangesByChrs(mainRanges,chrs) |
|
677 |
+ } |
|
678 |
+ else if (type=="rnaseq") { |
|
679 |
+ #keeph <- which(as.character(seqnames(helperRanges)) %in% chrs) |
|
680 |
+ #helperRanges <- helperRanges[keeph] |
|
681 |
+ #mainRanges <- mainRanges[names(helperRanges)] |
|
682 |
+ helperRanges <- .subsetGRangesByChrs(helperRanges,chrs) |
|
683 |
+ mainRanges <- .subsetGRangesByChrs(mainRanges,chrs) |
|
684 |
+ |
|
685 |
+ #################################################################### |
|
686 |
+ ## There must be an R bug with `lengths` here as although it runs in |
|
687 |
+ ## Rcmd, it does not pass package building or vignette kniting... |
|
688 |
+ ## But for the time being it seems that it is not needed as the name |
|
689 |
+ ## filtering works |
|
690 |
+ #lens <- which(lengths(genomeRanges)==0) |
|
691 |
+ #if (length(lens)>0) |
|
692 |
+ # genomeRanges[lens] <- NULL |
|
693 |
+ #################################################################### |
|
656 | 694 |
} |
657 |
- }))) |
|
658 |
- if (type=="chipseq") { |
|
659 |
- keep <- which(as.character(seqnames(mainRanges)) %in% chrs) |
|
660 |
- mainRanges <- mainRanges[keep] |
|
661 |
- } |
|
662 |
- else if (type=="rnaseq") { |
|
663 |
- keeph <- which(as.character(seqnames(helperRanges)) %in% chrs) |
|
664 |
- helperRanges <- helperRanges[keeph] |
|
665 |
- mainRanges <- mainRanges[names(helperRanges)] |
|
666 |
- ######################################################################## |
|
667 |
- ## There must be an R bug with `lengths` here as although it runs in |
|
668 |
- ## Rcmd, it does not pass package building or vignette kniting... But |
|
669 |
- ## for the time being it seems that it is not needed as the name |
|
670 |
- ## filtering works |
|
671 |
- #lens <- which(lengths(genomeRanges)==0) |
|
672 |
- #if (length(lens)>0) |
|
673 |
- # genomeRanges[lens] <- NULL |
|
674 |
- ######################################################################## |
|
675 | 695 |
} |
676 | 696 |
|
677 |
- #if (type=="chipseq") |
|
678 |
- # input <- coverageRef(input,genomeRanges,region,flank,strandedParams, |
|
679 |
- # rc=rc)#,bamParams) |
|
680 |
- #else if (type=="rnaseq") |
|
681 |
- # input <- coverageRnaRef(input,genomeRanges,helperRanges,flank, |
|
682 |
- # strandedParams,rc=rc)#,bamParams) |
|
683 |
- #if (type=="chipseq") |
|
684 |
- # input <- coverageRef(input,mainRanges,strandedParams,rc=rc) |
|
685 |
- #else if (type=="rnaseq") #{ |
|
686 |
- #if (flank[1]==0 && flank[2]==0) |
|
687 |
- # mainRnaRanges <- genomeRanges |
|
688 |
- #else { |
|
689 |
- # mainRnaRanges <- tryCatch( |
|
690 |
- # loadMainRnaRanges(genome,refdb,flank), |
|
691 |
- # error=function(e) { # Insanely slow fallback switch |
|
692 |
- # warning("The requested flanking regions are not ", |
|
693 |
- # "compatible with any of the precalculated ones. They ", |
|
694 |
- # "will be calculated on the fly which is a lenghty ", |
|
695 |
- # "process. If you wish to use precalculated flanking ", |
|
696 |
- # "regions, stop the execution and adjust the flanking ", |
|
697 |
- # "parameter.",immediate.=TRUE) |
|
698 |
- # getMainRnaRangesOnTheFly(helperRanges,flank,rc=rc) |
|
699 |
- # },finally="" |
|
700 |
- # ) |
|
701 |
- #} |
|
702 |
- #input <- coverageRnaRef(input,mainRanges,strandedParams,rc=rc) |
|
703 |
- #} |
|
704 | 697 |
if(signal == "coverage") |
705 | 698 |
input <- coverageRef(input,mainRanges,strandedParams,rc=rc) |
706 | 699 |
else if (signal == "rpm") |
707 | 700 |
input <- rpMatrix(input,mainRanges,flank,binParams,strandedParams,rc=rc) |
708 | 701 |
|
709 | 702 |
# If normalization method is linear, we must adjust the coverages |
710 |
- if (preprocessParams$normalize == "linear") { |
|
703 |
+ # TODO: Check for onTheFly in the beginning |
|
704 |
+ if (preprocessParams$normalize == "linear" && !onTheFly) { |
|
711 | 705 |
linFac <- calcLinearFactors(input,preprocessParams) |
712 | 706 |
for (n in names(input)) { |
713 | 707 |
if (linFac[n]==1) |
... | ... |
@@ -724,13 +718,12 @@ recoup <- function( |
724 | 718 |
} |
725 | 719 |
} |
726 | 720 |
|
727 |
- # Now we must summarize and create the matrices. If genebody or unequal |
|
728 |
- # custom lengths, bin is a must, else we leave to user |
|
721 |
+ # Now we must summarize and create the matrices. If genebody, utr3 or |
|
722 |
+ # unequal custom lengths, bin is a must, else we leave to user |
|
729 | 723 |
mustBin <- FALSE |
730 |
- if (region=="genebody") |
|
724 |
+ if (region=="genebody" || region=="utr3") |
|
731 | 725 |
mustBin <- TRUE |
732 | 726 |
if (region=="custom") { |
733 |
- #w <- width(genomeRanges) |
|
734 | 727 |
w <- width(mainRanges) |
735 | 728 |
if (any(w!=w[1])) |
736 | 729 |
mustBin <- TRUE |
... | ... |
@@ -738,8 +731,8 @@ recoup <- function( |
738 | 731 |
if (mustBin) { |
739 | 732 |
if (binParams$regionBinSize==0) { |
740 | 733 |
warning("Central region bin size not set for a region that must ", |
741 |
- "be binned! Setting to 1000...",immediate.=TRUE) |
|
742 |
- binParams$regionBinSize <- 1000 |
|
734 |
+ "be binned! Setting to 200...",immediate.=TRUE) |
|
735 |
+ binParams$regionBinSize <- 200 |
|
743 | 736 |
} |
744 | 737 |
} |
745 | 738 |
|
... | ... |
@@ -763,7 +756,7 @@ recoup <- function( |
763 | 756 |
|
764 | 757 |
# Coverages and profiles calculated... Now depending on plot option, we go |
765 | 758 |
# further or return the enriched input object for saving |
766 |
- if (!plotParams$profile && !plotParams$heatmap) { |
|
759 |
+ if (!plotParams$profile && !plotParams$heatmap && !plotParams$correlation) { |
|
767 | 760 |
recoupObj <- toOutput(input,design,saveParams,callParams=callParams) |
768 | 761 |
return(recoupObj) |
769 | 762 |
} |
1 | 1 |
old mode 100644 |
2 | 2 |
new mode 100755 |
... | ... |
@@ -3,6 +3,7 @@ recoup <- function( |
3 | 3 |
design=NULL, |
4 | 4 |
region=c("genebody","tss","tes","custom"), |
5 | 5 |
type=c("chipseq","rnaseq"), |
6 |
+ signal=c("coverage","rpm"), |
|
6 | 7 |
genome=c("hg18","hg19","hg38","mm9","mm10","rn5","rn6","dm3","dm6", |
7 | 8 |
"danrer7","danrer10","pantro4","pantro5","susscr3","susscr11", |
8 | 9 |
"ecucab2","tair10"), |
... | ... |
@@ -21,9 +22,11 @@ recoup <- function( |
21 | 22 |
regionBinSize=0, |
22 | 23 |
sumStat=c("mean","median"), |
23 | 24 |
interpolation=c("auto","spline","linear","neighborhood"), |
25 |
+ binType=c("variable","fixed"), |
|
24 | 26 |
forceHeatmapBinning=TRUE, |
25 | 27 |
forcedBinSize=c(50,200), |
26 |
- chunking=FALSE |
|
28 |
+ chunking=FALSE#, |
|
29 |
+ #seed=42 |
|
27 | 30 |
), |
28 | 31 |
#binParams=getDefaultListArgs("binParams"), |
29 | 32 |
selector=NULL, |
... | ... |
@@ -37,8 +40,8 @@ recoup <- function( |
37 | 40 |
spliceRemoveQ=0.75, |
38 | 41 |
#bedGenome=ifelse(genome %in% c("hg18","hg19","hg38","mm9","mm10","rn5", |
39 | 42 |
# "dm3","danrer7","pantro4","susscr3"),genome,NA), |
40 |
- bedGenome=NA, |
|
41 |
- seed=42 |
|
43 |
+ bedGenome=NA#, |
|
44 |
+ #seed=42 |
|
42 | 45 |
), |
43 | 46 |
#preprocessParams=getDefaultListArgs("preprocessParams"), |
44 | 47 |
plotParams=list( |
... | ... |
@@ -75,8 +78,8 @@ recoup <- function( |
75 | 78 |
nstart=20, |
76 | 79 |
algorithm=c("Hartigan-Wong","Lloyd","Forgy","MacQueen"), |
77 | 80 |
iterMax=20, |
78 |
- reference=NULL, |
|
79 |
- seed=42 |
|
81 |
+ reference=NULL#, |
|
82 |
+ #seed=42 |
|
80 | 83 |
), |
81 | 84 |
#kmParams=getDefaultListArgs("kmParams"), |
82 | 85 |
strandedParams=list( |
... | ... |
@@ -93,7 +96,7 @@ recoup <- function( |
93 | 96 |
strip.text.x=element_text(size=10,face="bold"), |
94 | 97 |
strip.text.y=element_text(size=10,face="bold"), |
95 | 98 |
legend.position="bottom", |
96 |
- panel.margin=grid::unit(1,"lines") |
|
99 |
+ panel.spacing=grid::unit(1,"lines") |
|
97 | 100 |
), |
98 | 101 |
#ggplotParams=getDefaultListArgs("ggplotParams"), |
99 | 102 |
complexHeatmapParams=list( |
... | ... |
@@ -153,6 +156,7 @@ recoup <- function( |
153 | 156 |
region <- tolower(region[1]) |
154 | 157 |
refdb <- tolower(refdb[1]) |
155 | 158 |
type <- tolower(type[1]) |
159 |
+ signal <- tolower(signal[1]) |
|
156 | 160 |
if (!is.null(design) && !is.data.frame(design)) |
157 | 161 |
checkFileArgs("design",design) |
158 | 162 |
if (!is.data.frame(genome) && file.exists(genome)) |
... | ... |
@@ -161,6 +165,7 @@ recoup <- function( |
161 | 165 |
checkTextArgs("genome",genome,getSupportedOrganisms(),multiarg=FALSE) |
162 | 166 |
checkTextArgs("refdb",refdb,getSupportedRefDbs(),multiarg=FALSE) |
163 | 167 |
checkTextArgs("type",type,c("chipseq","rnaseq"),multiarg=FALSE) |
168 |
+ checkTextArgs("signal",signal,c("coverage","rpm"),multiarg=FALSE) |
|
164 | 169 |
checkNumArgs("fraction",fraction,"numeric",c(0,1),"botheq") |
165 | 170 |
if (any(flank<0)) |
166 | 171 |
stop("The minimum flanking allowed is 0 bp") |
... | ... |
@@ -189,6 +194,12 @@ recoup <- function( |
189 | 194 |
preprocessParams$fragLen <- NA |
190 | 195 |
} |
191 | 196 |
|
197 |
+ # If type is rnaseq, the only allowed genomes are the ones supported by |
|
198 |
+ # recoup for the time being. In the future, a custom RNA-Seq genome may be |
|
199 |
+ # constructed from a GFF or like... |
|
200 |
+ if (type=="rnaseq" && !(genome %in% getSupportedOrganisms())) |
|
201 |
+ stop("When type is \"rnaseq\", only the supported genomes are allowed!") |
|
202 |
+ |
|
192 | 203 |
# and list arguments |
193 | 204 |
orderByDefault <- getDefaultListArgs("orderBy") |
194 | 205 |
binParamsDefault <- getDefaultListArgs("binParams") |
... | ... |
@@ -246,6 +257,8 @@ recoup <- function( |
246 | 257 |
region=if (is.null(thisCall$region)) prevCallParams$region else |
247 | 258 |
region, |
248 | 259 |
type=if (is.null(thisCall$type)) prevCallParams$type else type, |
260 |
+ signal=if (is.null(thisCall$signal)) prevCallParams$signal |
|
261 |
+ else signal, |
|
249 | 262 |
genome=if (is.null(thisCall$genome)) prevCallParams$genome else |
250 | 263 |
genome, |
251 | 264 |
refdb=if (is.null(thisCall$refdb)) prevCallParams$refdb else |
... | ... |
@@ -288,6 +301,7 @@ recoup <- function( |
288 | 301 |
callParams <- list( |
289 | 302 |
region=region, |
290 | 303 |
type=type, |
304 |
+ signal=signal, |
|
291 | 305 |
genome=genome, |
292 | 306 |
version=version, |
293 | 307 |
refdb=refdb, |
... | ... |
@@ -316,27 +330,28 @@ recoup <- function( |
316 | 330 |
input <- decideChanges(input,callParams,prevCallParams) |
317 | 331 |
|
318 | 332 |
# Redefine all final arguments for this call |
319 |
- region=callParams$region |
|
320 |
- type=callParams$type |
|
321 |
- genome=callParams$genome |
|
322 |
- version=callParams$version |
|
323 |
- refdb=callParams$refdb |
|
324 |
- flank=callParams$flank |
|
325 |
- fraction=callParams$fraction |
|
326 |
- orderBy=callParams$orderBy |
|
327 |
- binParams=callParams$binParams |
|
328 |
- selector=callParams$selector |
|
329 |
- preprocessParams=callParams$preprocessParams |
|
330 |
- plotParams=callParams$plotParams |
|
331 |
- saveParams=callParams$saveParams |
|
332 |
- kmParams=callParams$kmParams |
|
333 |
- strandedParams=callParams$strandedParams |
|
334 |
- ggplotParams=callParams$ggplotParams |
|
335 |
- complexHeatmapParams=callParams$complexHeatmapParams |
|
336 |
- bamParams=callParams$bamParams |
|
337 |
- onTheFly=callParams$onTheFly |
|
338 |
- localDbHome=callParams$localDbHome |
|
339 |
- rc=callParams$rc |
|
333 |
+ region <- callParams$region |
|
334 |
+ type <- callParams$type |
|
335 |
+ singnal <- callParams$signal |
|
336 |
+ genome <- callParams$genome |
|
337 |
+ version <- callParams$version |
|
338 |
+ refdb <- callParams$refdb |
|
339 |
+ flank <- callParams$flank |
|
340 |
+ fraction <- callParams$fraction |
|
341 |
+ orderBy <- callParams$orderBy |
|
342 |
+ binParams <- callParams$binParams |
|
343 |
+ selector <- callParams$selector |
|
344 |
+ preprocessParams <- callParams$preprocessParams |
|
345 |
+ plotParams <- callParams$plotParams |
|
346 |
+ saveParams <- callParams$saveParams |
|
347 |
+ kmParams <- callParams$kmParams |
|
348 |
+ strandedParams <- callParams$strandedParams |
|
349 |
+ ggplotParams <- callParams$ggplotParams |
|
350 |
+ complexHeatmapParams <- callParams$complexHeatmapParams |
|
351 |
+ bamParams <- callParams$bamParams |
|
352 |
+ onTheFly <- callParams$onTheFly |
|
353 |
+ localDbHome <- callParams$localDbHome |
|
354 |
+ rc <- callParams$rc |
|
340 | 355 |
# End of complex parameter storage procedure and parameter recall from a |
341 | 356 |
# previous call |
342 | 357 |
############################################################################ |
... | ... |
@@ -358,11 +373,11 @@ recoup <- function( |
358 | 373 |
# Some compatibility with previous annoation stores |
359 | 374 |
if ("gene.rda" %in% dir(storageHome)) { # Older version |
360 | 375 |
warning("Older annotation storage detected! Please ", |
361 |
- "rebuild your annotation storage using the ", |
|
362 |
- "buildAnnotationStore function\nso as to have the ", |
|
376 |
+ "rebuild your annotation storage\nusing the ", |
|
377 |
+ "buildAnnotationStore function so as to have the\n", |
|
363 | 378 |
"ability of versioning genomic coordinate annotations.", |
364 | 379 |
"\nIn the future, older versions may become unusable.", |
365 |
- immediate.=TRUE) |
|
380 |
+ "\n",immediate.=TRUE) |
|
366 | 381 |
storageHomeVersion <- storageHome |
367 | 382 |
} |
368 | 383 |
else { # Newer version |
... | ... |
@@ -423,7 +438,7 @@ recoup <- function( |
423 | 438 |
} |
424 | 439 |
else if (all(flank==5000)) { |
425 | 440 |
g <- load(file.path(storageHomeVersion, |
426 |
- "summarized_exon_F2000.rda")) |
|
441 |
+ "summarized_exon_F5000.rda")) |
|
427 | 442 |
genomeRanges <- flankedSexon |
428 | 443 |
} |
429 | 444 |
else { |
... | ... |
@@ -588,8 +603,16 @@ recoup <- function( |
588 | 603 |
# then work with these later on |
589 | 604 |
intermRanges <- getMainRanges(genomeRanges,helperRanges=helperRanges,type, |
590 | 605 |
region,flank,rc=rc) |
591 |
- mainRanges <- intermRanges$mainRanges |
|
592 |
- bamRanges <- intermRanges$bamRanges |
|
606 |
+ # It is very important that all the Ranges have Seqinfo objects attached as |
|
607 |
+ # they have to be trimmed for potential extensions (due to flanking regions) |
|
608 |
+ # beyond chromosome lengths. The built-in annotations do provide this option |
|
609 |
+ # otherwise when a custom genome/areas is/are provided, the genome of |
|
610 |
+ # interest should be provided too. Or if not provided, the user will be |
|
611 |
+ # responsible for any crash. |
|
612 |
+ # TODO: When input ranges are custom, genome should be given to make Seqinfo |
|
613 |
+ if (type == "chipseq") # Otherwise, throw error with GRangesList |
|
614 |
+ mainRanges <- trim(intermRanges$mainRanges) |
|
615 |
+ bamRanges <- trim(intermRanges$bamRanges) |
|
593 | 616 |
|
594 | 617 |
# Here we must write code for the reading and normalization of bam files |
595 | 618 |
# The preprocessRanges function looks if there is a valid (not null) ranges |
... | ... |
@@ -602,41 +625,15 @@ recoup <- function( |
602 | 625 |
# At this point we must apply the fraction parameter if <1. We choose this |
603 | 626 |
# point in order not to restrict later usage of the read ranges and since it |
604 | 627 |
# does not take much time to load into memory. |
605 |
- #if (fraction<1) { |
|
606 |
- # newSize <- round(fraction*length(genomeRanges)) |
|
607 |
- # set.seed(preprocessParams$seed) |
|
608 |
- # refIndex <- sort(sample(length(genomeRanges),newSize)) |
|
609 |
- # genomeRanges <- genomeRanges[refIndex] |
|
610 |
- # if (type=="rnaseq") |
|
611 |
- # helperRanges <- helperRanges[refIndex] |
|
612 |
- # for (i in 1:length(input)) { |
|
613 |
- # if (!is.null(input[[i]]$ranges)) { |
|
614 |
- # newSize <- round(fraction*length(input[[i]]$ranges)) |
|
615 |
- # set.seed(preprocessParams$seed) |
|
616 |
- # fracIndex <- sort(sample(length(input[[i]]$ranges),newSize)) |
|
617 |
- # input[[i]]$ranges <- input[[i]]$ranges[fracIndex] |
|
618 |
- # } |
|
619 |
- # if (!is.null(input[[i]]$coverage)) { |
|
620 |
- # #if (!is.null(input[[i]]$coverage$center)) { |
|
621 |
- # # input[[i]]$coverage$center <- |
|
622 |
- # # input[[i]]$coverage$center[refIndex] |
|
623 |
- # #} |
|
624 |
- # #else |
|
625 |
- # input[[i]]$coverage <- input[[i]]$coverage[refIndex] |
|
626 |
- # } |
|
627 |
- # if (!is.null(input[[i]]$profile)) |
|
628 |
- # input[[i]]$profile <- input[[i]]$profile[refIndex,] |
|
629 |
- # } |
|
630 |
- #} |
|
631 | 628 |
if (fraction<1) { |
632 | 629 |
newSize <- round(fraction*length(mainRanges)) |
633 |
- set.seed(preprocessParams$seed) |
|
630 |
+ #set.seed(preprocessParams$seed) |
|
634 | 631 |
refIndex <- sort(sample(length(mainRanges),newSize)) |
635 | 632 |
mainRanges <- mainRanges[refIndex] |
636 | 633 |
for (i in 1:length(input)) { |
637 | 634 |
if (!is.null(input[[i]]$ranges)) { |
638 | 635 |
newSize <- round(fraction*length(input[[i]]$ranges)) |
639 |
- set.seed(preprocessParams$seed) |
|
636 |
+ #set.seed(preprocessParams$seed) |
|
640 | 637 |
fracIndex <- sort(sample(length(input[[i]]$ranges),newSize)) |
641 | 638 |
input[[i]]$ranges <- input[[i]]$ranges[fracIndex] |
642 | 639 |
} |
... | ... |
@@ -658,24 +655,6 @@ recoup <- function( |
658 | 655 |
return(as.character(seqnames(seqinfo(BigWigFile(x$file))))) |
659 | 656 |
} |
660 | 657 |
}))) |
661 |
- #if (type=="chipseq") { |
|
662 |
- # keep <- which(as.character(seqnames(genomeRanges)) %in% chrs) |
|
663 |
- # genomeRanges <- genomeRanges[keep] |
|
664 |
- #} |
|
665 |
- #else if (type=="rnaseq") { |
|
666 |
- # keeph <- which(as.character(seqnames(helperRanges)) %in% chrs) |
|
667 |
- # helperRanges <- helperRanges[keeph] |
|
668 |
- # genomeRanges <- genomeRanges[names(helperRanges)] |
|
669 |
- # ######################################################################## |
|
670 |
- # ## There must be an R bug with `lengths` here as although it runs in |
|
671 |
- # ## Rcmd, it does not pass package building or vignette kniting... But |
|
672 |
- # ## for the time being it seems that it is not needed as the name |
|
673 |
- # ## filtering works |
|
674 |
- # #lens <- which(lengths(genomeRanges)==0) |
|
675 |
- # #if (length(lens)>0) |
|
676 |
- # # genomeRanges[lens] <- NULL |
|
677 |
- # ######################################################################## |
|
678 |
- #} |
|
679 | 658 |
if (type=="chipseq") { |
680 | 659 |
keep <- which(as.character(seqnames(mainRanges)) %in% chrs) |
681 | 660 |
mainRanges <- mainRanges[keep] |
... | ... |
@@ -701,9 +680,9 @@ recoup <- function( |
701 | 680 |
#else if (type=="rnaseq") |
702 | 681 |
# input <- coverageRnaRef(input,genomeRanges,helperRanges,flank, |
703 | 682 |
# strandedParams,rc=rc)#,bamParams) |
704 |
- if (type=="chipseq") |
|
705 |
- input <- coverageRef(input,mainRanges,strandedParams,rc=rc) |
|
706 |
- else if (type=="rnaseq") #{ |
|
683 |
+ #if (type=="chipseq") |
|
684 |
+ # input <- coverageRef(input,mainRanges,strandedParams,rc=rc) |
|
685 |
+ #else if (type=="rnaseq") #{ |
|
707 | 686 |
#if (flank[1]==0 && flank[2]==0) |
708 | 687 |
# mainRnaRanges <- genomeRanges |
709 | 688 |
#else { |
... | ... |
@@ -720,26 +699,28 @@ recoup <- function( |
720 | 699 |
# },finally="" |
721 | 700 |
# ) |
722 | 701 |
#} |
723 |
- input <- coverageRnaRef(input,mainRanges,strandedParams,rc=rc) |
|
702 |
+ #input <- coverageRnaRef(input,mainRanges,strandedParams,rc=rc) |
|
724 | 703 |
#} |
704 |
+ if(signal == "coverage") |
|
705 |
+ input <- coverageRef(input,mainRanges,strandedParams,rc=rc) |
|
706 |
+ else if (signal == "rpm") |
|
707 |
+ input <- rpMatrix(input,mainRanges,flank,binParams,strandedParams,rc=rc) |
|
708 |
+ |
|
725 | 709 |
# If normalization method is linear, we must adjust the coverages |
726 |
- if (preprocessParams$normalize=="linear") { |
|
710 |
+ if (preprocessParams$normalize == "linear") { |
|
727 | 711 |
linFac <- calcLinearFactors(input,preprocessParams) |
728 | 712 |
for (n in names(input)) { |
729 | 713 |
if (linFac[n]==1) |
730 | 714 |
next |
731 |
- #if (is.null(input[[n]]$coverage$center)) |
|
732 |
- input[[n]]$coverage <- cmclapply(input[[n]]$coverage, |
|
715 |
+ if (signal == "coverage") |
|
716 |
+ input[[n]]$coverage <- cmclapply(input[[n]]$coverage, |
|
733 | 717 |
function(x,f) { |
734 | 718 |
return(x*f) |
735 |
- },linFac[n] |
|
736 |
- ) |
|
737 |
- #else { |
|
738 |
- # input[[n]]$coverage$center <- |
|
739 |
- # cmclapply(input[[n]]$coverage$center,function(x,f) { |
|
740 |
- # return(x*f) |
|
741 |
- # },linFac[n]) |
|
742 |
- #} |
|
719 |
+ },linFac[n]) |
|
720 |
+ else if (signal == "rpm") |
|
721 |
+ input[[n]]$profile <- apply(input[[n]]$profile,1,function(x,f) { |
|
722 |
+ return(x*f) |
|
723 |
+ },linFac[n]) |
|
743 | 724 |
} |
744 | 725 |
} |
745 | 726 |
|
... | ... |
@@ -763,15 +744,17 @@ recoup <- function( |
763 | 744 |
} |
764 | 745 |
|
765 | 746 |
# Chunking? |
766 |
- if (binParams$chunking) { |
|
767 |
- if (type=="chipseq") |
|
768 |
- binParams$chunks <- split(1:length(genomeRanges), |
|
769 |
- as.character(seqnames(genomeRanges))) |
|
770 |
- else if (type=="rnaseq") |
|
771 |
- binParams$chunks <- split(1:length(helperRanges), |
|
772 |
- as.character(seqnames(helperRanges))) |
|
747 |
+ if (signal == "coverage") { # Otherwise, matrix calculate by rpm |
|
748 |
+ if (binParams$chunking) { |
|
749 |
+ if (type=="chipseq") |
|
750 |
+ binParams$chunks <- split(1:length(genomeRanges), |
|
751 |
+ as.character(seqnames(genomeRanges))) |
|
752 |
+ else if (type=="rnaseq") |
|
753 |
+ binParams$chunks <- split(1:length(helperRanges), |
|
754 |
+ as.character(seqnames(helperRanges))) |
|
755 |
+ } |
|
756 |
+ input <- profileMatrix(input,flank,binParams,rc) |
|
773 | 757 |
} |
774 |
- input <- profileMatrix(input,flank,binParams,rc) |
|
775 | 758 |
|
776 | 759 |
# Perform the k-means clustering if requested and append to design (which |
777 | 760 |
# has been checked, if we are allowed to do so) |
... | ... |
@@ -3,8 +3,10 @@ recoup <- function( |
3 | 3 |
design=NULL, |
4 | 4 |
region=c("genebody","tss","tes","custom"), |
5 | 5 |
type=c("chipseq","rnaseq"), |
6 |
- genome=c("hg18","hg19","hg38","mm9","mm10","rn5","dm3","danrer7","pantro4", |
|
7 |
- "susscr3"), |
|
6 |
+ genome=c("hg18","hg19","hg38","mm9","mm10","rn5","rn6","dm3","dm6", |
|
7 |
+ "danrer7","danrer10","pantro4","pantro5","susscr3","susscr11", |
|
8 |
+ "ecucab2","tair10"), |
|
9 |
+ version="auto", |
|
8 | 10 |
refdb=c("ensembl","ucsc","refseq"), |
9 | 11 |
flank=c(2000,2000), |
10 | 12 |
fraction=1, |
... | ... |
@@ -27,8 +29,8 @@ recoup <- function( |
27 | 29 |
selector=NULL, |
28 | 30 |
#selector=getDefaultListArgs("selector"), |
29 | 31 |
preprocessParams=list( |
30 |
- fragLen=NA, |
|
31 |
- cleanLevel=c(0,1,2,3), |
|
32 |
+ fragLen=NA, |
|
33 |
+ cleanLevel=c(0,1,2,3), |
|
32 | 34 |
normalize=c("none","linear","downsample","sampleto"), |
33 | 35 |
sampleTo=1e+6, |
34 | 36 |
spliceAction=c("split","keep","remove"), |
... | ... |
@@ -156,16 +158,22 @@ recoup <- function( |
156 | 158 |
if (!is.data.frame(genome) && file.exists(genome)) |
157 | 159 |
checkFileArgs("genome",genome) |
158 | 160 |
else if (is.character(genome)) |
159 |
- checkTextArgs("genome",genome,c("hg18","hg19","hg38","mm9","mm10","rn5", |
|
160 |
- "dm3","danrer7","pantro4","susscr3","tair10"),multiarg=FALSE) |
|
161 |
- checkTextArgs("refdb",refdb,c("ensembl","ucsc","refseq"),multiarg=FALSE) |
|
161 |
+ checkTextArgs("genome",genome,getSupportedOrganisms(),multiarg=FALSE) |
|
162 |
+ checkTextArgs("refdb",refdb,getSupportedRefDbs(),multiarg=FALSE) |
|
162 | 163 |
checkTextArgs("type",type,c("chipseq","rnaseq"),multiarg=FALSE) |
163 | 164 |
checkNumArgs("fraction",fraction,"numeric",c(0,1),"botheq") |
164 | 165 |
if (any(flank<0)) |
165 | 166 |
stop("The minimum flanking allowed is 0 bp") |
166 | 167 |
if (any(flank>50000)) |
167 | 168 |
stop("The maximum flanking allowed is 50000 bp") |
168 |
- |
|
169 |
+ # Check the version argument |
|
170 |
+ version <- version[1] |
|
171 |
+ if (is.character(version)) { |
|
172 |
+ version <- tolower(version) |
|
173 |
+ checkTextArgs("version",version,c("auto"),multiarg=FALSE) |
|
174 |
+ } |
|
175 |
+ else |
|
176 |
+ checkNumArgs("version",version,"numeric") |
|
169 | 177 |
# If type is rnaseq, only genebody plots are valid |
170 | 178 |
if (type=="rnaseq" && region!="genebody") { |
171 | 179 |
warning("When type is \"rnaseq\", plots can be created only on ", |
... | ... |
@@ -175,11 +183,11 @@ recoup <- function( |
175 | 183 |
|
176 | 184 |
# If type is rnaseq, read extension is illegal because of potential splicing |
177 | 185 |
if (type=="rnaseq" && !is.null(preprocessParams$fragLen) |
178 |
- && !is.na(preprocessParams$fragLen)) { |
|
179 |
- warning("When type is \"rnaseq\", read extension/reduction should not ", |
|
186 |
+ && !is.na(preprocessParams$fragLen)) { |
|
187 |
+ warning("When type is \"rnaseq\", read extension/reduction should not ", |
|
180 | 188 |
"happen because of potential splicing! Ignoring...",immediate.=TRUE) |
181 |
- preprocessParams$fragLen <- NA |
|
182 |
- } |
|
189 |
+ preprocessParams$fragLen <- NA |
|
190 |
+ } |
|
183 | 191 |
|
184 | 192 |
# and list arguments |
185 | 193 |
orderByDefault <- getDefaultListArgs("orderBy") |
... | ... |
@@ -242,6 +250,8 @@ recoup <- function( |
242 | 250 |
genome, |
243 | 251 |
refdb=if (is.null(thisCall$refdb)) prevCallParams$refdb else |
244 | 252 |
refdb, |
253 |
+ version=if (is.null(thisCall$version)) prevCallParams$version else |
|
254 |
+ version, |
|
245 | 255 |
flank=if (is.null(thisCall$flank)) prevCallParams$flank else flank, |
246 | 256 |
fraction=if (is.null(thisCall$fraction)) prevCallParams$fraction |
247 | 257 |
else fraction, |
... | ... |
@@ -279,6 +289,7 @@ recoup <- function( |
279 | 289 |
region=region, |
280 | 290 |
type=type, |
281 | 291 |
genome=genome, |
292 |
+ version=version, |
|
282 | 293 |
refdb=refdb, |
283 | 294 |
flank=flank, |
284 | 295 |
fraction=fraction, |
... | ... |
@@ -308,6 +319,7 @@ recoup <- function( |
308 | 319 |
region=callParams$region |
309 | 320 |
type=callParams$type |
310 | 321 |
genome=callParams$genome |
322 |
+ version=callParams$version |
|
311 | 323 |
refdb=callParams$refdb |
312 | 324 |
flank=callParams$flank |
313 | 325 |
fraction=callParams$fraction |
... | ... |
@@ -341,38 +353,76 @@ recoup <- function( |
341 | 353 |
} |
342 | 354 |
else { |
343 | 355 |
# Check if local storage has been set |
344 |
- if (dir.exists(file.path(localDbHome,refdb,genome))) { |
|
356 |
+ storageHome <- file.path(localDbHome,refdb,genome) |
|
357 |
+ if (dir.exists(storageHome)) { |
|
358 |
+ # Some compatibility with previous annoation stores |
|
359 |
+ if ("gene.rda" %in% dir(storageHome)) { # Older version |
|
360 |
+ warning("Older annotation storage detected! Please ", |
|
361 |
+ "rebuild your annotation storage using the ", |
|
362 |
+ "buildAnnotationStore function\nso as to have the ", |
|
363 |
+ "ability of versioning genomic coordinate annotations.", |
|
364 |
+ "\nIn the future, older versions may become unusable.", |
|
365 |
+ immediate.=TRUE) |
|
366 |
+ storageHomeVersion <- storageHome |
|
367 |
+ } |
|
368 |
+ else { # Newer version |
|
369 |
+ # Decide on version |
|
370 |
+ if (version != "auto") { |
|
371 |
+ storageHomeVersion <- file.path(storageHome,version) |
|
372 |
+ if (!dir.exists(storageHomeVersion)) { |
|
373 |
+ warning("The annotation directory ", |
|
374 |
+ storageHomeVersion, |
|
375 |
+ " does not seem to exist! Have you run ", |
|
376 |
+ "buildAnnotationStorage? Will use newest ", |
|
377 |
+ "existing version.",immediate.=TRUE) |
|
378 |
+ version <- "auto" |
|
379 |
+ } |
|
380 |
+ } |
|
381 |
+ if (version == "auto") { |
|
382 |
+ vers <- suppressWarnings(as.numeric(dir(storageHome))) |
|
383 |
+ if (any(is.na(vers))) { |
|
384 |
+ of <- vers[which(is.na(vers))] |
|
385 |
+ stop("Corrupted annotation storage directory ", |
|
386 |
+ "contents! ->",of,"<- Either remove offending ", |
|
387 |
+ "files/directories or rebuild.") |
|
388 |
+ } |
|
389 |
+ vers <- sort(vers,decreasing=TRUE) |
|
390 |
+ version <- vers[1] |
|
391 |
+ storageHomeVersion <- file.path(storageHome,version) |
|
392 |
+ } |
|
393 |
+ } |
|
394 |
+ |
|
345 | 395 |
if (type=="chipseq") { |
346 |
- g <- load(file.path(localDbHome,refdb,genome,"gene.rda")) |
|
396 |
+ g <- load(file.path(storageHomeVersion,"gene.rda")) |
|
347 | 397 |
genomeRanges <- gene |
348 | 398 |
helperRanges <- NULL |
349 | 399 |
} |
350 | 400 |
else if (type=="rnaseq") { |
351 | 401 |
# Load the helper ranges anyway |
352 |
- g <- load(file.path(localDbHome,refdb,genome,"gene.rda")) |
|
402 |
+ g <- load(file.path(storageHomeVersion,"gene.rda")) |
|
353 | 403 |
helperRanges <- gene |
354 | 404 |
if (all(flank==0)) { |
355 |
- g <- load(file.path(localDbHome,refdb,genome, |
|
405 |
+ g <- load(file.path(storageHomeVersion, |
|
356 | 406 |
"summarized_exon.rda")) |
357 | 407 |
genomeRanges <- sexon |
358 | 408 |
} |
359 | 409 |
else if (all(flank==500)) { |
360 |
- g <- load(file.path(localDbHome,refdb,genome, |
|
410 |
+ g <- load(file.path(storageHomeVersion, |
|
361 | 411 |
"summarized_exon_F500.rda")) |
362 | 412 |
genomeRanges <- flankedSexon |
363 | 413 |
} |
364 | 414 |
else if (all(flank==1000)) { |
365 |
- g <- load(file.path(localDbHome,refdb,genome, |
|
415 |
+ g <- load(file.path(storageHomeVersion, |
|
366 | 416 |
"summarized_exon_F1000.rda")) |
367 | 417 |
genomeRanges <- flankedSexon |
368 | 418 |
} |
369 | 419 |
else if (all(flank==2000)) { |
370 |
- g <- load(file.path(localDbHome,refdb,genome, |
|
420 |
+ g <- load(file.path(storageHomeVersion, |
|
371 | 421 |
"summarized_exon_F2000.rda")) |
372 | 422 |
genomeRanges <- flankedSexon |
373 | 423 |
} |
374 | 424 |
else if (all(flank==5000)) { |
375 |
- g <- load(file.path(localDbHome,refdb,genome, |
|
425 |
+ g <- load(file.path(storageHomeVersion, |
|
376 | 426 |
"summarized_exon_F2000.rda")) |
377 | 427 |
genomeRanges <- flankedSexon |
378 | 428 |
} |
... | ... |
@@ -404,15 +454,15 @@ recoup <- function( |
404 | 454 |
) |
405 | 455 |
names(helperRanges) <- as.character(helperRanges$gene_id) |
406 | 456 |
genome <- getAnnotation(genome,"exon",refdb=refdb,rc=rc) |
407 |
- ann.gr <- makeGRangesFromDataFrame( |
|
457 |
+ annGr <- makeGRangesFromDataFrame( |
|
408 | 458 |
df=genome, |
409 | 459 |
keep.extra.columns=TRUE, |
410 | 460 |
seqnames.field="chromosome" |
411 | 461 |
) |
412 | 462 |
message("Merging exons") |
413 |
- ann.gr <- reduceExons(ann.gr,rc=rc) |
|
414 |
- names(ann.gr) <- as.character(ann.gr$exon_id) |
|
415 |
- genomeRanges <- split(ann.gr,ann.gr$gene_id) |
|
463 |
+ annGr <- reduceExons(annGr,rc=rc) |
|
464 |
+ names(annGr) <- as.character(annGr$exon_id) |
|
465 |
+ genomeRanges <- split(annGr,annGr$gene_id) |
|
416 | 466 |
} |
417 | 467 |
} |
418 | 468 |
} |
... | ... |
@@ -644,7 +694,7 @@ recoup <- function( |
644 | 694 |
# genomeRanges[lens] <- NULL |
645 | 695 |
######################################################################## |
646 | 696 |
} |
647 |
- |
|
697 |
+ |
|
648 | 698 |
#if (type=="chipseq") |
649 | 699 |
# input <- coverageRef(input,genomeRanges,region,flank,strandedParams, |
650 | 700 |
# rc=rc)#,bamParams) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/recoup@128121 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -20,12 +20,15 @@ recoup <- function( |
20 | 20 |
sumStat=c("mean","median"), |
21 | 21 |
interpolation=c("auto","spline","linear","neighborhood"), |
22 | 22 |
forceHeatmapBinning=TRUE, |
23 |
- forcedBinSize=c(50,200) |
|
23 |
+ forcedBinSize=c(50,200), |
|
24 |
+ chunking=FALSE |
|
24 | 25 |
), |
25 | 26 |
#binParams=getDefaultListArgs("binParams"), |
26 | 27 |
selector=NULL, |
27 | 28 |
#selector=getDefaultListArgs("selector"), |
28 | 29 |
preprocessParams=list( |
30 |
+ fragLen=NA, |
|
31 |
+ cleanLevel=c(0,1,2,3), |
|
29 | 32 |
normalize=c("none","linear","downsample","sampleto"), |
30 | 33 |
sampleTo=1e+6, |
31 | 34 |
spliceAction=c("split","keep","remove"), |
... | ... |
@@ -170,6 +173,14 @@ recoup <- function( |
170 | 173 |
region <- "genebody" |
171 | 174 |
} |
172 | 175 |
|
176 |
+ # If type is rnaseq, read extension is illegal because of potential splicing |
|
177 |
+ if (type=="rnaseq" && !is.null(preprocessParams$fragLen) |
|
178 |
+ && !is.na(preprocessParams$fragLen)) { |
|
179 |
+ warning("When type is \"rnaseq\", read extension/reduction should not ", |
|
180 |
+ "happen because of potential splicing! Ignoring...",immediate.=TRUE) |
|
181 |
+ preprocessParams$fragLen <- NA |
|
182 |
+ } |
|
183 |
+ |
|
173 | 184 |
# and list arguments |
174 | 185 |
orderByDefault <- getDefaultListArgs("orderBy") |
175 | 186 |
binParamsDefault <- getDefaultListArgs("binParams") |
... | ... |
@@ -334,20 +345,56 @@ recoup <- function( |
334 | 345 |
if (type=="chipseq") { |
335 | 346 |
g <- load(file.path(localDbHome,refdb,genome,"gene.rda")) |
336 | 347 |
genomeRanges <- gene |
348 |
+ helperRanges <- NULL |
|
337 | 349 |
} |
338 | 350 |
else if (type=="rnaseq") { |
339 |
- g <- load(file.path(localDbHome,refdb,genome, |
|
340 |
- "summarized_exon.rda")) |
|
341 |
- genomeRanges <- sexon |
|
351 |
+ # Load the helper ranges anyway |
|
342 | 352 |
g <- load(file.path(localDbHome,refdb,genome,"gene.rda")) |
343 | 353 |
helperRanges <- gene |
354 |
+ if (all(flank==0)) { |
|
355 |
+ g <- load(file.path(localDbHome,refdb,genome, |
|
356 |
+ "summarized_exon.rda")) |
|
357 |
+ genomeRanges <- sexon |
|
358 |
+ } |
|
359 |
+ else if (all(flank==500)) { |
|
360 |
+ g <- load(file.path(localDbHome,refdb,genome, |
|
361 |
+ "summarized_exon_F500.rda")) |
|
362 |
+ genomeRanges <- flankedSexon |
|
363 |
+ } |
|
364 |
+ else if (all(flank==1000)) { |
|
365 |
+ g <- load(file.path(localDbHome,refdb,genome, |
|
366 |
+ "summarized_exon_F1000.rda")) |
|
367 |
+ genomeRanges <- flankedSexon |
|
368 |
+ } |
|
369 |
+ else if (all(flank==2000)) { |
|
370 |
+ g <- load(file.path(localDbHome,refdb,genome, |
|
371 |
+ "summarized_exon_F2000.rda")) |
|
372 |
+ genomeRanges <- flankedSexon |
|
373 |
+ } |
|
374 |
+ else if (all(flank==5000)) { |
|
375 |
+ g <- load(file.path(localDbHome,refdb,genome, |
|
376 |
+ "summarized_exon_F2000.rda")) |
|
377 |
+ genomeRanges <- flankedSexon |
|
378 |
+ } |
|
379 |
+ else { |
|
380 |
+ warning("When using recoup in RNA-Seq mode, it is ", |
|
381 |
+ "much faster to use a precalculated set of ", |
|
382 |
+ "regions and their flanks (0, 500, 1000, 2000 and ", |
|
383 |
+ "5000 bps supported) and subset this one for a ", |
|
384 |
+ "custom set of genes. Otherwise calculations may ", |
|
385 |
+ "take too long to complete.",immediate.=TRUE) |
|
386 |
+ genomeRanges <- |
|
387 |
+ getMainRnaRangesOnTheFly(helperRanges,flank,rc=rc) |
|
388 |
+ } |
|
344 | 389 |
} |
345 | 390 |
} |
346 | 391 |
else { # On the fly |
347 | 392 |
message("Getting annotation on the fly for ",genome," from ", |
348 | 393 |
refdb) |
349 |
- if (type=="chipseq") |
|
394 |
+ if (type=="chipseq") { |
|
350 | 395 |
genome <- getAnnotation(genome,"gene",refdb=refdb,rc=rc) |
396 |
+ helperRanges <- NULL |
|
397 |
+ } |
|
351 | 398 |
else if (type=="rnaseq") { |
352 | 399 |
helper <- getAnnotation(genome,"gene",refdb=refdb,rc=rc) |
353 | 400 |
helperRanges <- makeGRangesFromDataFrame( |
... | ... |
@@ -464,22 +511,78 @@ recoup <- function( |
464 | 511 |
} |
465 | 512 |
} |
466 | 513 |
|
514 |
+ # Now we must follow two paths according to region type, genebody and custom |
|
515 |
+ # areas with equal/unequal widths, or tss, tes and 1-width custom areas. |
|
516 |
+ callParams$customIsBase <- FALSE |
|
517 |
+ if (region=="custom" && all(width(genomeRanges)==1)) { |
|
518 |
+ if (all(flank==0)) { |
|
519 |
+ warning("Flanking cannot be zero bp in both directions when the ", |
|
520 |
+ "reference region is only 1bp! Setting to default ", |
|
521 |
+ "(2000,2000)...",immediate.=TRUE) |
|
522 |
+ flank <- c(2000,2000) |
|
523 |
+ callParams$flank <- flank |
|
524 |
+ } |
|
525 |
+ callParams$customIsBase <- TRUE |
|
526 |
+ } |
|
527 |
+ if (region %in% c("tss","tes")) { |
|
528 |
+ if (all(flank==0)) { |
|
529 |
+ warning("Flanking cannot be zero bp in both directions when the ", |
|
530 |
+ "reference region is \"tss\" or \"tes\"! Setting to default ", |
|
531 |
+ "(2000,2000)...", immediate.=TRUE) |
|
532 |
+ flank <- c(2000,2000) |
|
533 |
+ callParams$flank <- flank |
|
534 |
+ } |
|
535 |
+ } |
|
536 |
+ |
|
537 |
+ # Here we must determine the final ranges to pass to preprocessRanges and |
|
538 |
+ # then work with these later on |
|
539 |
+ intermRanges <- getMainRanges(genomeRanges,helperRanges=helperRanges,type, |
|
540 |
+ region,flank,rc=rc) |
|
541 |
+ mainRanges <- intermRanges$mainRanges |
|
542 |
+ bamRanges <- intermRanges$bamRanges |
|
543 |
+ |
|
467 | 544 |
# Here we must write code for the reading and normalization of bam files |
468 | 545 |
# The preprocessRanges function looks if there is a valid (not null) ranges |
469 | 546 |
# field in input |
470 | 547 |
#if (!onTheFly) |
471 |
- input <- preprocessRanges(input,preprocessParams,bamParams=bamParams,rc=rc) |
|
548 |
+ #input <- preprocessRanges(input,preprocessParams,bamParams=bamParams,rc=rc) |
|
549 |
+ input <- preprocessRanges(input,preprocessParams,genome,bamRanges, |
|
550 |
+ bamParams=bamParams,rc=rc) |
|
472 | 551 |
|
473 | 552 |
# At this point we must apply the fraction parameter if <1. We choose this |
474 | 553 |
# point in order not to restrict later usage of the read ranges and since it |
475 | 554 |
# does not take much time to load into memory. |
555 |
+ #if (fraction<1) { |
|
556 |
+ # newSize <- round(fraction*length(genomeRanges)) |
|
557 |
+ # set.seed(preprocessParams$seed) |
|
558 |
+ # refIndex <- sort(sample(length(genomeRanges),newSize)) |
|
559 |
+ # genomeRanges <- genomeRanges[refIndex] |
|
560 |
+ # if (type=="rnaseq") |
|
561 |
+ # helperRanges <- helperRanges[refIndex] |
|
562 |
+ # for (i in 1:length(input)) { |
|
563 |
+ # if (!is.null(input[[i]]$ranges)) { |
|
564 |
+ # newSize <- round(fraction*length(input[[i]]$ranges)) |
|
565 |
+ # set.seed(preprocessParams$seed) |
|
566 |
+ # fracIndex <- sort(sample(length(input[[i]]$ranges),newSize)) |
|
567 |
+ # input[[i]]$ranges <- input[[i]]$ranges[fracIndex] |
|
568 |
+ # } |
|
569 |
+ # if (!is.null(input[[i]]$coverage)) { |
|
570 |
+ # #if (!is.null(input[[i]]$coverage$center)) { |
|
571 |
+ # # input[[i]]$coverage$center <- |
|
572 |
+ # # input[[i]]$coverage$center[refIndex] |
|
573 |
+ # #} |
|
574 |
+ # #else |
|
575 |
+ # input[[i]]$coverage <- input[[i]]$coverage[refIndex] |
|
576 |
+ # } |
|
577 |
+ # if (!is.null(input[[i]]$profile)) |
|
578 |
+ # input[[i]]$profile <- input[[i]]$profile[refIndex,] |
|
579 |
+ # } |
|
580 |
+ #} |
|
476 | 581 |
if (fraction<1) { |
477 |
- newSize <- round(fraction*length(genomeRanges)) |
|
582 |
+ newSize <- round(fraction*length(mainRanges)) |
|
478 | 583 |
set.seed(preprocessParams$seed) |
479 |
- refIndex <- sort(sample(length(genomeRanges),newSize)) |
|
480 |
- genomeRanges <- genomeRanges[refIndex] |
|
481 |
- if (type=="rnaseq") |
|
482 |
- helperRanges <- helperRanges[refIndex] |
|
584 |
+ refIndex <- sort(sample(length(mainRanges),newSize)) |
|
585 |
+ mainRanges <- mainRanges[refIndex] |
|
483 | 586 |
for (i in 1:length(input)) { |
484 | 587 |
if (!is.null(input[[i]]$ranges)) { |
485 | 588 |
newSize <- round(fraction*length(input[[i]]$ranges)) |
... | ... |
@@ -487,14 +590,8 @@ recoup <- function( |
487 | 590 |
fracIndex <- sort(sample(length(input[[i]]$ranges),newSize)) |
488 | 591 |
input[[i]]$ranges <- input[[i]]$ranges[fracIndex] |
489 | 592 |
} |
490 |
- if (!is.null(input[[i]]$coverage)) { |
|
491 |
- #if (!is.null(input[[i]]$coverage$center)) { |
|
492 |
- # input[[i]]$coverage$center <- |
|
493 |
- # input[[i]]$coverage$center[refIndex] |
|
494 |
- #} |
|
495 |
- #else |
|
593 |
+ if (!is.null(input[[i]]$coverage)) |
|
496 | 594 |
input[[i]]$coverage <- input[[i]]$coverage[refIndex] |
497 |
- } |
|
498 | 595 |
if (!is.null(input[[i]]$profile)) |
499 | 596 |
input[[i]]$profile <- input[[i]]$profile[refIndex,] |
500 | 597 |
} |
... | ... |
@@ -504,17 +601,39 @@ recoup <- function( |
504 | 601 |
chrs <- unique(unlist(lapply(input,function(x) { |
505 | 602 |
if (x$format %in% c("bam","bed")) |
506 | 603 |
return(as.character(runValue(seqnames(x$ranges)))) |
507 |
- else if (x$format=="bigwig") |
|
604 |
+ else if (x$format=="bigwig") { |
|
605 |
+ if (!requireNamespace("rtracklayer")) |
|
606 |
+ stop("R package rtracklayer is required to read and import ", |
|
607 |
+ "BigWig files!") |
|
508 | 608 |
return(as.character(seqnames(seqinfo(BigWigFile(x$file))))) |
609 |
+ } |
|
509 | 610 |
}))) |
611 |
+ #if (type=="chipseq") { |
|
612 |
+ # keep <- which(as.character(seqnames(genomeRanges)) %in% chrs) |
|
613 |
+ # genomeRanges <- genomeRanges[keep] |
|
614 |
+ #} |
|
615 |
+ #else if (type=="rnaseq") { |
|
616 |
+ # keeph <- which(as.character(seqnames(helperRanges)) %in% chrs) |
|
617 |
+ # helperRanges <- helperRanges[keeph] |
|
618 |
+ # genomeRanges <- genomeRanges[names(helperRanges)] |
|
619 |
+ # ######################################################################## |
|
620 |
+ # ## There must be an R bug with `lengths` here as although it runs in |
|
621 |
+ # ## Rcmd, it does not pass package building or vignette kniting... But |
|
622 |
+ # ## for the time being it seems that it is not needed as the name |
|
623 |
+ # ## filtering works |
|
624 |
+ # #lens <- which(lengths(genomeRanges)==0) |
|
625 |
+ # #if (length(lens)>0) |
|
626 |
+ # # genomeRanges[lens] <- NULL |
|
627 |
+ # ######################################################################## |
|
628 |
+ #} |
|
510 | 629 |
if (type=="chipseq") { |
511 |
- keep <- which(as.character(seqnames(genomeRanges)) %in% chrs) |
|
512 |
- genomeRanges <- genomeRanges[keep] |
|
630 |
+ keep <- which(as.character(seqnames(mainRanges)) %in% chrs) |
|
631 |
+ mainRanges <- mainRanges[keep] |
|
513 | 632 |
} |
514 | 633 |
else if (type=="rnaseq") { |
515 | 634 |
keeph <- which(as.character(seqnames(helperRanges)) %in% chrs) |
516 | 635 |
helperRanges <- helperRanges[keeph] |
517 |
- genomeRanges <- genomeRanges[names(helperRanges)] |
|
636 |
+ mainRanges <- mainRanges[names(helperRanges)] |
|
518 | 637 |
######################################################################## |
519 | 638 |
## There must be an R bug with `lengths` here as although it runs in |
520 | 639 |
## Rcmd, it does not pass package building or vignette kniting... But |
... | ... |
@@ -525,36 +644,34 @@ recoup <- function( |
525 | 644 |
# genomeRanges[lens] <- NULL |
526 | 645 |
######################################################################## |
527 | 646 |
} |
528 |
- |
|
529 |
- # Now we must follow two paths according to region type, genebody and custom |
|
530 |
- # areas with equal/unequal widths, or tss, tes and 1-width custom areas |
|
531 |
- callParams$customIsBase <- FALSE |
|
532 |
- if (region=="custom" && all(width(genomeRanges)==1)) { |
|
533 |
- if (all(flank==0)) { |
|
534 |
- warning("Flanking cannot be zero bp in both directions when the ", |
|
535 |
- "reference region is only 1bp! Setting to default ", |
|
536 |
- "(2000,2000)...",immediate.=TRUE) |
|
537 |
- flank <- c(2000,2000) |
|
538 |
- callParams$flank <- flank |
|
539 |
- } |
|
540 |
- callParams$customIsBase <- TRUE |
|
541 |
- } |
|
542 |
- if (region %in% c("tss","tes")) { |
|
543 |
- if (all(flank==0)) { |
|
544 |
- warning("Flanking cannot be zero bp in both directions when the ", |
|
545 |
- "reference region is \"tss\" or \"tes\"! Setting to default ", |
|
546 |
- "(2000,2000)...", immediate.=TRUE) |
|
547 |
- flank <- c(2000,2000) |
|
548 |
- callParams$flank <- flank |
|
549 |
- } |
|
550 |
- } |
|
647 |
+ |
|
648 |
+ #if (type=="chipseq") |
|
649 |
+ # input <- coverageRef(input,genomeRanges,region,flank,strandedParams, |
|
650 |
+ # rc=rc)#,bamParams) |
|
651 |
+ #else if (type=="rnaseq") |
|
652 |
+ # input <- coverageRnaRef(input,genomeRanges,helperRanges,flank, |
|
653 |
+ # strandedParams,rc=rc)#,bamParams) |
|
551 | 654 |
if (type=="chipseq") |
552 |
- input <- coverageRef(input,genomeRanges,region,flank,strandedParams, |
|
553 |
- rc=rc)#,bamParams) |
|
554 |
- else if (type=="rnaseq") |
|
555 |
- input <- coverageRnaRef(input,genomeRanges,helperRanges,flank, |
|
556 |
- strandedParams,rc=rc)#,bamParams) |
|
557 |
- |
|
655 |
+ input <- coverageRef(input,mainRanges,strandedParams,rc=rc) |
|
656 |
+ else if (type=="rnaseq") #{ |
|
657 |
+ #if (flank[1]==0 && flank[2]==0) |
|
658 |
+ # mainRnaRanges <- genomeRanges |
|
659 |
+ #else { |
|
660 |
+ # mainRnaRanges <- tryCatch( |
|
661 |
+ # loadMainRnaRanges(genome,refdb,flank), |
|
662 |
+ # error=function(e) { # Insanely slow fallback switch |
|
663 |
+ # warning("The requested flanking regions are not ", |
|
664 |
+ # "compatible with any of the precalculated ones. They ", |
|
665 |
+ # "will be calculated on the fly which is a lenghty ", |
|
666 |
+ # "process. If you wish to use precalculated flanking ", |
|
667 |
+ # "regions, stop the execution and adjust the flanking ", |
|
668 |
+ # "parameter.",immediate.=TRUE) |
|
669 |
+ # getMainRnaRangesOnTheFly(helperRanges,flank,rc=rc) |
|
670 |
+ # },finally="" |
|
671 |
+ # ) |
|
672 |
+ #} |
|
673 |
+ input <- coverageRnaRef(input,mainRanges,strandedParams,rc=rc) |
|
674 |
+ #} |
|
558 | 675 |
# If normalization method is linear, we must adjust the coverages |
559 | 676 |
if (preprocessParams$normalize=="linear") { |
560 | 677 |
linFac <- calcLinearFactors(input,preprocessParams) |
... | ... |
@@ -582,11 +699,11 @@ recoup <- function( |
582 | 699 |
if (region=="genebody") |
583 | 700 |
mustBin <- TRUE |
584 | 701 |
if (region=="custom") { |
585 |
- w <- width(genomeRanges) |
|
702 |
+ #w <- width(genomeRanges) |
|
703 |
+ w <- width(mainRanges) |
|
586 | 704 |
if (any(w!=w[1])) |
587 | 705 |
mustBin <- TRUE |
588 | 706 |
} |
589 |
- |
|
590 | 707 |
if (mustBin) { |
591 | 708 |
if (binParams$regionBinSize==0) { |
592 | 709 |
warning("Central region bin size not set for a region that must ", |
... | ... |
@@ -594,6 +711,16 @@ recoup <- function( |
594 | 711 |
binParams$regionBinSize <- 1000 |
595 | 712 |
} |
596 | 713 |
} |
714 |
+ |
|
715 |
+ # Chunking? |
|
716 |
+ if (binParams$chunking) { |
|
717 |
+ if (type=="chipseq") |
|
718 |
+ binParams$chunks <- split(1:length(genomeRanges), |
|
719 |
+ as.character(seqnames(genomeRanges))) |
|
720 |
+ else if (type=="rnaseq") |
|
721 |
+ binParams$chunks <- split(1:length(helperRanges), |
|
722 |
+ as.character(seqnames(helperRanges))) |
|
723 |
+ } |
|
597 | 724 |
input <- profileMatrix(input,flank,binParams,rc) |
598 | 725 |
|
599 | 726 |
# Perform the k-means clustering if requested and append to design (which |
... | ... |
@@ -700,7 +827,7 @@ recoup <- function( |
700 | 827 |
# binSize=binParams$forcedBinSize[1], |
701 | 828 |
# stat=binParams$sumStat,rc=rc) |
702 | 829 |
right <- binCoverageMatrix( |
703 |
- input[[n]]$coverage,binSize=forcedBinSize[1], |
|
830 |
+ input[[n]]$coverage,binSize=binParams$forcedBinSize[1], |
|
704 | 831 |
stat=binParams$sumStat, |
705 | 832 |
interpolation=binParams$interpolation,flank=flank, |
706 | 833 |
where="downstream",rc=rc |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/recoup@115628 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -18,7 +18,6 @@ recoup <- function( |
18 | 18 |
flankBinSize=0, |
19 | 19 |
regionBinSize=0, |
20 | 20 |
sumStat=c("mean","median"), |
21 |
- smooth=TRUE, |
|
22 | 21 |
interpolation=c("auto","spline","linear","neighborhood"), |
23 | 22 |
forceHeatmapBinning=TRUE, |
24 | 23 |
forcedBinSize=c(50,200) |
... | ... |
@@ -46,6 +45,8 @@ recoup <- function( |
46 | 45 |
heatmapScale=c("common","each"), |
47 | 46 |
heatmapFactor=1, |
48 | 47 |
corrScale=c("normalized","each"), |
48 |
+ sumStat=c("mean","median"), |
|
49 |
+ smooth=TRUE, |
|
49 | 50 |
corrSmoothPar=ifelse(is.null(design),0.1,0.5), |
50 | 51 |
singleFacet=c("none","wrap","grid"), |
51 | 52 |
multiFacet=c("wrap","grid"), |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/recoup@115380 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -31,8 +31,9 @@ recoup <- function( |
31 | 31 |
sampleTo=1e+6, |
32 | 32 |
spliceAction=c("split","keep","remove"), |
33 | 33 |
spliceRemoveQ=0.75, |
34 |
- bedGenome=ifelse(genome %in% c("hg18","hg19","hg38","mm9","mm10","rn5", |
|
35 |
- "dm3","danrer7","pantro4","susscr3"),genome,NA), |
|
34 |
+ #bedGenome=ifelse(genome %in% c("hg18","hg19","hg38","mm9","mm10","rn5", |
|
35 |
+ # "dm3","danrer7","pantro4","susscr3"),genome,NA), |
|
36 |
+ bedGenome=NA, |
|
36 | 37 |
seed=42 |
37 | 38 |
), |
38 | 39 |
#preprocessParams=getDefaultListArgs("preprocessParams"), |
... | ... |
@@ -470,7 +471,7 @@ recoup <- function( |
470 | 471 |
|
471 | 472 |
# At this point we must apply the fraction parameter if <1. We choose this |
472 | 473 |
# point in order not to restrict later usage of the read ranges and since it |
473 |
- # does not take much time to load into memory |
|
474 |
+ # does not take much time to load into memory. |
|
474 | 475 |
if (fraction<1) { |
475 | 476 |
newSize <- round(fraction*length(genomeRanges)) |
476 | 477 |
set.seed(preprocessParams$seed) |
... | ... |
@@ -479,10 +480,12 @@ recoup <- function( |
479 | 480 |
if (type=="rnaseq") |
480 | 481 |
helperRanges <- helperRanges[refIndex] |
481 | 482 |
for (i in 1:length(input)) { |
482 |
- newSize <- round(fraction*length(input[[i]]$ranges)) |
|
483 |
- set.seed(preprocessParams$seed) |
|
484 |
- fracIndex <- sort(sample(length(input[[i]]$ranges),newSize)) |
|
485 |
- input[[i]]$ranges <- input[[i]]$ranges[fracIndex] |
|
483 |
+ if (!is.null(input[[i]]$ranges)) { |
|
484 |
+ newSize <- round(fraction*length(input[[i]]$ranges)) |
|
485 |
+ set.seed(preprocessParams$seed) |
|
486 |
+ fracIndex <- sort(sample(length(input[[i]]$ranges),newSize)) |
|
487 |
+ input[[i]]$ranges <- input[[i]]$ranges[fracIndex] |
|
488 |
+ } |
|
486 | 489 |
if (!is.null(input[[i]]$coverage)) { |
487 | 490 |
#if (!is.null(input[[i]]$coverage$center)) { |
488 | 491 |
# input[[i]]$coverage$center <- |
... | ... |
@@ -498,7 +501,10 @@ recoup <- function( |
498 | 501 |
|
499 | 502 |
# Remove unwanted seqnames from reference ranges |
500 | 503 |
chrs <- unique(unlist(lapply(input,function(x) { |
501 |
- return(as.character(runValue(seqnames(x$ranges)))) |
|
504 |
+ if (x$format %in% c("bam","bed")) |
|
505 |
+ return(as.character(runValue(seqnames(x$ranges)))) |
|
506 |
+ else if (x$format=="bigwig") |
|
507 |
+ return(as.character(seqnames(seqinfo(BigWigFile(x$file))))) |
|
502 | 508 |
}))) |
503 | 509 |
if (type=="chipseq") { |
504 | 510 |
keep <- which(as.character(seqnames(genomeRanges)) %in% chrs) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/recoup@115200 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,817 @@ |
1 |
+recoup <- function( |
|
2 |
+ input, |
|
3 |
+ design=NULL, |
|
4 |
+ region=c("genebody","tss","tes","custom"), |
|
5 |
+ type=c("chipseq","rnaseq"), |
|
6 |
+ genome=c("hg18","hg19","hg38","mm9","mm10","rn5","dm3","danrer7","pantro4", |
|
7 |
+ "susscr3"), |
|
8 |
+ refdb=c("ensembl","ucsc","refseq"), |
|
9 |
+ flank=c(2000,2000), |
|
10 |
+ fraction=1, |
|
11 |
+ orderBy=list( |
|
12 |
+ what=c("none","suma","sumn","maxa","maxn","avga","avgn","hcn"), |
|
13 |
+ order=c("descending","ascending"), |
|
14 |
+ custom=NULL |
|
15 |
+ ), |
|
16 |
+ #orderBy=getDefaultListArgs("orderBy"), |
|
17 |
+ binParams=list( |
|
18 |
+ flankBinSize=0, |
|
19 |
+ regionBinSize=0, |
|
20 |
+ sumStat=c("mean","median"), |
|
21 |
+ smooth=TRUE, |
|
22 |
+ interpolation=c("auto","spline","linear","neighborhood"), |
|
23 |
+ forceHeatmapBinning=TRUE, |
|
24 |
+ forcedBinSize=c(50,200) |
|
25 |
+ ), |
|
26 |
+ #binParams=getDefaultListArgs("binParams"), |
|
27 |
+ selector=NULL, |
|
28 |
+ #selector=getDefaultListArgs("selector"), |
|
29 |
+ preprocessParams=list( |
|
30 |
+ normalize=c("none","linear","downsample","sampleto"), |
|
31 |
+ sampleTo=1e+6, |
|
32 |
+ spliceAction=c("split","keep","remove"), |
|
33 |
+ spliceRemoveQ=0.75, |
|
34 |
+ bedGenome=ifelse(genome %in% c("hg18","hg19","hg38","mm9","mm10","rn5", |
|
35 |
+ "dm3","danrer7","pantro4","susscr3"),genome,NA), |
|
36 |
+ seed=42 |
|
37 |
+ ), |
|
38 |
+ #preprocessParams=getDefaultListArgs("preprocessParams"), |
|
39 |
+ plotParams=list( |
|
40 |
+ plot=TRUE, |
|
41 |
+ profile=TRUE, |
|
42 |
+ heatmap=TRUE, |
|
43 |
+ correlation=TRUE, |
|
44 |
+ signalScale=c("natural","log2"), |
|
45 |
+ heatmapScale=c("common","each"), |
|
46 |
+ heatmapFactor=1, |
|
47 |
+ corrScale=c("normalized","each"), |
|
48 |
+ corrSmoothPar=ifelse(is.null(design),0.1,0.5), |
|
49 |
+ singleFacet=c("none","wrap","grid"), |
|
50 |
+ multiFacet=c("wrap","grid"), |
|
51 |
+ conf=TRUE, |
|
52 |
+ device=c("x11","png","jpg","tiff","bmp","pdf","ps"), |
|
53 |
+ outputDir=".", |
|
54 |
+ outputBase=NULL |
|
55 |
+ ), |
|
56 |
+ #plotParams=getDefaultListArgs("plotParams",design), |
|
57 |
+ saveParams=list( |
|
58 |
+ ranges=TRUE, |
|
59 |
+ coverage=TRUE, |
|
60 |
+ profile=TRUE, |
|
61 |
+ profilePlot=TRUE, |
|
62 |
+ heatmapPlot=TRUE, |
|
63 |
+ correlationPlot=TRUE |
|
64 |
+ ), |
|
65 |
+ #saveParams=getDefaultListArgs("saveParams"), |
|
66 |
+ kmParams=list( |
|
67 |
+ k=0, # Do not perform kmeans |
|
68 |
+ nstart=20, |
|
69 |
+ algorithm=c("Hartigan-Wong","Lloyd","Forgy","MacQueen"), |
|
70 |
+ iterMax=20, |
|
71 |
+ reference=NULL, |
|
72 |
+ seed=42 |
|
73 |
+ ), |
|
74 |
+ #kmParams=getDefaultListArgs("kmParams"), |
|
75 |
+ strandedParams=list( |
|
76 |
+ strand=NULL, |
|
77 |
+ ignoreStrand=TRUE |
|
78 |
+ ), |
|
79 |
+ #strandedParams=getDefaultListArgs("strandedParams"), |
|
80 |
+ ggplotParams=list( |
|
81 |
+ title=element_text(size=12), |
|
82 |
+ axis.title.x=element_text(size=10,face="bold"), |
|
83 |
+ axis.title.y=element_text(size=10,face="bold"), |
|
84 |
+ axis.text.x=element_text(size=9,face="bold"), |
|
85 |
+ axis.text.y=element_text(size=10,face="bold"), |
|
86 |
+ strip.text.x=element_text(size=10,face="bold"), |
|
87 |
+ strip.text.y=element_text(size=10,face="bold"), |
|
88 |
+ legend.position="bottom", |
|
89 |
+ panel.margin=grid::unit(1,"lines") |
|
90 |
+ ), |
|
91 |
+ #ggplotParams=getDefaultListArgs("ggplotParams"), |
|
92 |
+ complexHeatmapParams=list( |
|
93 |
+ main=list( |
|
94 |
+ cluster_rows=ifelse(length(grep("hc",orderBy$what))>0,TRUE,FALSE), |
|
95 |
+ cluster_columns=FALSE, |
|
96 |
+ column_title_gp=grid::gpar(fontsize=10,font=2), |
|
97 |
+ show_row_names=FALSE, |
|
98 |
+ show_column_names=FALSE, |
|
99 |
+ heatmap_legend_param=list( |
|
100 |
+ color_bar="continuous" |
|
101 |
+ ) |
|
102 |
+ ), |
|
103 |
+ group=list( |
|
104 |
+ cluster_rows=ifelse(length(grep("hc",orderBy$what))>0,TRUE,FALSE), |
|
105 |
+ cluster_columns=FALSE, |
|
106 |
+ column_title_gp=grid::gpar(fontsize=10,font=2), |
|
107 |
+ show_row_names=FALSE, |
|
108 |
+ show_column_names=FALSE, |
|
109 |
+ row_title_gp=grid::gpar(fontsize=8,font=2), |
|
110 |
+ gap=unit(5,"mm"), |
|
111 |
+ heatmap_legend_param=list( |
|
112 |
+ color_bar="continuous" |
|
113 |
+ ) |
|
114 |
+ ) |
|
115 |
+ ), |
|
116 |
+ #complexHeatmapParams=getDefaultListArgs("complexHeatmapParams"), |
|
117 |
+ bamParams=NULL, |
|
118 |
+ onTheFly=FALSE, # Directly from BAM w/o Ranges storing, also N/A with BED, |
|
119 |
+ localDbHome=file.path(path.expand("~"),".recoup"), |
|
120 |
+ rc=NULL |
|
121 |
+) { |
|
122 |
+ ############################################################################ |
|
123 |
+ # Begin of simple parameter checking for a new or a restored object |
|
124 |
+ if (is.list(input) && !is.null(input$data)) { # Refeeding recoup object |
|
125 |
+ message("Detected a previous recoup output as input. Any existing ", |
|
126 |
+ "data requiring\nlengthy calculations (short reads, coverages and ", |
|
127 |
+ "profile matrices will be\nreused depending on other parameters. ", |
|
128 |
+ "If you want a complete recalculation,\neither input a fresh list ", |
|
129 |
+ "of arguments or remove any of the above with the\nremoveData ", |
|
130 |
+ "function.\n") |
|
131 |
+ prevCallParams <- input$callopts |
|
132 |
+ input <- input$data |
|
133 |
+ } |
|
134 |
+ else |
|
135 |
+ prevCallParams <- NULL |
|
136 |
+ if (!is.list(input) && file.exists(input)) |
|
137 |
+ input <- readConfig(input) |
|
138 |
+ checkInput(input) |
|
139 |
+ if (is.null(names(input))) |
|
140 |
+ names(input) <- sapply(input,function(x) return(x$id)) |
|
141 |
+ |
|
142 |
+ # Check if there are any mispelled or invalid parameters and throw a warning |
|
143 |
+ checkMainArgs(as.list(match.call())) |
|
144 |
+ |
|
145 |
+ # Check rest of arguments |
|
146 |
+ region <- tolower(region[1]) |
|
147 |
+ refdb <- tolower(refdb[1]) |
|
148 |
+ type <- tolower(type[1]) |
|
149 |
+ if (!is.null(design) && !is.data.frame(design)) |
|
150 |
+ checkFileArgs("design",design) |
|
151 |
+ if (!is.data.frame(genome) && file.exists(genome)) |
|
152 |
+ checkFileArgs("genome",genome) |
|
153 |
+ else if (is.character(genome)) |
|
154 |
+ checkTextArgs("genome",genome,c("hg18","hg19","hg38","mm9","mm10","rn5", |
|
155 |
+ "dm3","danrer7","pantro4","susscr3","tair10"),multiarg=FALSE) |
|
156 |
+ checkTextArgs("refdb",refdb,c("ensembl","ucsc","refseq"),multiarg=FALSE) |
|
157 |
+ checkTextArgs("type",type,c("chipseq","rnaseq"),multiarg=FALSE) |
|
158 |
+ checkNumArgs("fraction",fraction,"numeric",c(0,1),"botheq") |
|
159 |
+ if (any(flank<0)) |
|
160 |
+ stop("The minimum flanking allowed is 0 bp") |
|
161 |
+ if (any(flank>50000)) |
|
162 |
+ stop("The maximum flanking allowed is 50000 bp") |
|
163 |
+ |
|
164 |
+ # If type is rnaseq, only genebody plots are valid |
|
165 |
+ if (type=="rnaseq" && region!="genebody") { |
|
166 |
+ warning("When type is \"rnaseq\", plots can be created only on ", |
|
167 |
+ "genebodies! Switching to genebody regions...",immediate.=TRUE) |
|
168 |
+ region <- "genebody" |
|
169 |
+ } |
|
170 |
+ |
|
171 |
+ # and list arguments |
|
172 |
+ orderByDefault <- getDefaultListArgs("orderBy") |
|
173 |
+ binParamsDefault <- getDefaultListArgs("binParams") |
|
174 |
+ selectorDefault <- getDefaultListArgs("selector") |
|
175 |
+ preprocessParamsDefault <- getDefaultListArgs("preprocessParams", |
|
176 |
+ genome=genome) |
|
177 |
+ plotParamsDefault <- getDefaultListArgs("plotParams",design=design) |
|
178 |
+ saveParamsDefault <- getDefaultListArgs("saveParams") |
|
179 |
+ kmParamsDefault <- getDefaultListArgs("kmParams") |
|
180 |
+ strandedParamsDefault <- getDefaultListArgs("strandedParams") |
|
181 |
+ |
|
182 |
+ orderBy <- setArg(orderByDefault,orderBy) |
|
183 |
+ binParams <- setArg(binParamsDefault,binParams) |
|
184 |
+ if (!is.null(selector)) |
|
185 |
+ selector <- setArg(selectorDefault,selector) |
|
186 |
+ preprocessParams <- setArg(preprocessParamsDefault,preprocessParams) |
|
187 |
+ plotParams <- setArg(plotParamsDefault,plotParams) |
|
188 |
+ saveParams <- setArg(saveParamsDefault,saveParams) |
|
189 |
+ kmParams <- setArg(kmParamsDefault,kmParams) |
|
190 |
+ strandedParams <- setArg(strandedParamsDefault,strandedParams) |
|
191 |
+ |
|
192 |
+ orderBy <- validateListArgs("orderBy",orderBy) |
|
193 |
+ binParams <- validateListArgs("binParams",binParams) |
|
194 |
+ if (!is.null(selector)) |
|
195 |
+ selector <- validateListArgs("selector",selector) |
|
196 |
+ preprocessParams <- validateListArgs("preprocessParams",preprocessParams) |
|
197 |
+ plotParams <- validateListArgs("plotParams",plotParams) |
|
198 |
+ saveParams <- validateListArgs("saveParams",saveParams) |
|
199 |
+ kmParams <- validateListArgs("kmParams",kmParams) |
|
200 |
+ strandedParams <- validateListArgs("strandedParams",strandedParams) |
|
201 |
+ |
|
202 |
+ if (is.null(plotParams$outputBase)) |
|
203 |
+ plotParams$outputBase <- paste(sapply(input,function(x) return(x$id)), |
|
204 |
+ collapse="-") |
|
205 |
+ |
|
206 |
+ if (any(sapply(input,function(x) return(tolower(x$format)=="bed"))) |
|
207 |
+ && !(preprocessParams$bedGenome %in% c("hg18","hg19","hg38","mm9", |
|
208 |
+ "mm10","rn5","dm3","danrer7","pantro4","susscr3"))) |
|
209 |
+ stop("When short read files are in BED format, either the genome ", |
|
210 |
+ "parameter should be one\nof the supported organisms, or ", |
|
211 |
+ "preprocessParams$bedGenome must be specified as one of them.") |
|
212 |
+ |
|
213 |
+ # End of simple parameter checking for a new or a restored object |
|
214 |
+ ############################################################################ |
|
215 |
+ |
|
216 |
+ ############################################################################ |
|
217 |
+ # Begin of complex parameter storage procedure and parameter recall from a |
|
218 |
+ # previous call |
|
219 |
+ |
|
220 |
+ # Store all parameters to an obect for later reference to a next call, after |
|
221 |
+ # checking if something has changed (e.g. using setr)... |
|
222 |
+ thisCall <- as.list(match.call())[-1] |
|
223 |
+ if (!is.null(prevCallParams)) { |
|
224 |
+ callParams <- list( |
|
225 |
+ region=if (is.null(thisCall$region)) prevCallParams$region else |
|
226 |
+ region, |
|
227 |
+ type=if (is.null(thisCall$type)) prevCallParams$type else type, |
|
228 |
+ genome=if (is.null(thisCall$genome)) prevCallParams$genome else |
|
229 |
+ genome, |
|
230 |
+ refdb=if (is.null(thisCall$refdb)) prevCallParams$refdb else |
|
231 |
+ refdb, |
|
232 |
+ flank=if (is.null(thisCall$flank)) prevCallParams$flank else flank, |
|
233 |
+ fraction=if (is.null(thisCall$fraction)) prevCallParams$fraction |
|
234 |
+ else fraction, |
|
235 |
+ orderBy=if (is.null(thisCall$orderBy)) prevCallParams$orderBy else |
|
236 |
+ orderBy, |
|
237 |
+ binParams=if (is.null(thisCall$binParams)) prevCallParams$binParams |
|
238 |
+ else binParams, |
|
239 |
+ selector=selector, # selector is a special case |
|
240 |
+ preprocessParams=if (is.null(thisCall$preprocessParams)) |
|
241 |
+ prevCallParams$preprocessParams else preprocessParams, |
|
242 |
+ plotParams=if (is.null(thisCall$plotParams)) |
|
243 |
+ prevCallParams$plotParams else plotParams, |
|
244 |
+ saveParams=if (is.null(thisCall$saveParams)) |
|
245 |
+ prevCallParams$saveParams else saveParams, |
|
246 |
+ kmParams=if (is.null(thisCall$kmParams)) |
|
247 |
+ prevCallParams$kmParams else kmParams, |
|
248 |
+ strandedParams=if (is.null(thisCall$strandedParams)) |
|
249 |
+ prevCallParams$strandedParams else strandedParams, |
|
250 |
+ ggplotParams=if (is.null(thisCall$ggplotParams)) |
|
251 |
+ prevCallParams$ggplotParams else ggplotParams, |
|
252 |
+ complexHeatmapParams=if (is.null(thisCall$complexHeatmapParams)) |
|
253 |
+ prevCallParams$complexHeatmapParams else complexHeatmapParams, |
|
254 |
+ #bamParams=if (is.null(thisCall$bamParams), |
|
255 |
+ # prevCallParams$bamParams,bamParams), ## Unused |
|
256 |
+ onTheFly=if (is.null(thisCall$onTheFly)) prevCallParams$onTheFly |
|
257 |
+ else onTheFly, |
|
258 |
+ localDbHome=if (is.null(thisCall$localDbHome)) |
|
259 |
+ prevCallParams$localDbHome else localDbHome, |
|
260 |
+ rc=if (is.null(thisCall$rc)) prevCallParams$rc else rc, |
|
261 |
+ customIsBase=NULL # Additional placeholder |
|
262 |
+ ) |
|
263 |
+ } |
|
264 |
+ else { |
|
265 |
+ callParams <- list( |
|
266 |
+ region=region, |
|
267 |
+ type=type, |
|
268 |
+ genome=genome, |
|
269 |
+ refdb=refdb, |
|
270 |
+ flank=flank, |
|
271 |
+ fraction=fraction, |
|
272 |
+ orderBy=orderBy, |
|
273 |
+ binParams=binParams, |
|
274 |
+ selector=selector, |
|
275 |
+ preprocessParams=preprocessParams, |
|
276 |
+ plotParams=plotParams, |
|
277 |
+ saveParams=saveParams, |
|
278 |
+ kmParams=kmParams, |
|
279 |
+ strandedParams=strandedParams, |
|
280 |
+ ggplotParams=ggplotParams, |
|
281 |
+ complexHeatmapParams=complexHeatmapParams, |
|
282 |
+ bamParams=bamParams, |
|
283 |
+ onTheFly=onTheFly, |
|
284 |
+ localDbHome=localDbHome, |
|
285 |
+ rc=rc, |
|
286 |
+ customIsBase=NULL # Additional placeholder |
|
287 |
+ ) |
|
288 |
+ } |
|
289 |
+ |
|
290 |
+ # ...and check if there has been a previous call and decide on big |
|
291 |
+ # recalculations... |
|
292 |
+ input <- decideChanges(input,callParams,prevCallParams) |
|
293 |
+ |
|
294 |
+ # Redefine all final arguments for this call |
|
295 |
+ region=callParams$region |
|
296 |
+ type=callParams$type |
|
297 |
+ genome=callParams$genome |
|
298 |
+ refdb=callParams$refdb |
|
299 |
+ flank=callParams$flank |
|
300 |
+ fraction=callParams$fraction |
|
301 |
+ orderBy=callParams$orderBy |
|
302 |
+ binParams=callParams$binParams |
|
303 |
+ selector=callParams$selector |
|
304 |
+ preprocessParams=callParams$preprocessParams |
|
305 |
+ plotParams=callParams$plotParams |
|
306 |
+ saveParams=callParams$saveParams |
|
307 |
+ kmParams=callParams$kmParams |
|
308 |
+ strandedParams=callParams$strandedParams |
|
309 |
+ ggplotParams=callParams$ggplotParams |
|
310 |
+ complexHeatmapParams=callParams$complexHeatmapParams |
|
311 |
+ bamParams=callParams$bamParams |
|
312 |
+ onTheFly=callParams$onTheFly |
|
313 |
+ localDbHome=callParams$localDbHome |
|
314 |
+ rc=callParams$rc |
|
315 |
+ # End of complex parameter storage procedure and parameter recall from a |
|
316 |
+ # previous call |
|
317 |
+ ############################################################################ |
|
318 |
+ |
|
319 |
+ # Continue with actual work |
|
320 |
+ if (!is.data.frame(genome)) { |
|
321 |
+ if (file.exists(genome)) { |
|
322 |
+ genome <- read.delim(genome) # Must be bed like |
|
323 |
+ rownames(genome) <- as.character(genome[,4]) |
|
324 |
+ genomeRanges <- makeGRangesFromDataFrame( |
|
325 |
+ df=genome, |
|
326 |
+ keep.extra.columns=TRUE |
|
327 |
+ ) |
|
328 |
+ } |
|
329 |
+ else { |
|
330 |
+ # Check if local storage has been set |
|
331 |
+ if (dir.exists(file.path(localDbHome,refdb,genome))) { |
|
332 |
+ if (type=="chipseq") { |
|
333 |
+ g <- load(file.path(localDbHome,refdb,genome,"gene.rda")) |
|
334 |
+ genomeRanges <- gene |
|
335 |
+ } |
|
336 |
+ else if (type=="rnaseq") { |
|
337 |
+ g <- load(file.path(localDbHome,refdb,genome, |
|
338 |
+ "summarized_exon.rda")) |
|
339 |
+ genomeRanges <- sexon |
|
340 |
+ g <- load(file.path(localDbHome,refdb,genome,"gene.rda")) |
|
341 |
+ helperRanges <- gene |
|
342 |
+ } |
|
343 |
+ } |
|
344 |
+ else { # On the fly |
|
345 |
+ message("Getting annotation on the fly for ",genome," from ", |
|
346 |
+ refdb) |
|
347 |
+ if (type=="chipseq") |
|
348 |
+ genome <- getAnnotation(genome,"gene",refdb=refdb,rc=rc) |
|
349 |
+ else if (type=="rnaseq") { |
|
350 |
+ helper <- getAnnotation(genome,"gene",refdb=refdb,rc=rc) |
|
351 |
+ helperRanges <- makeGRangesFromDataFrame( |
|
352 |
+ df=helper, |
|
353 |
+ keep.extra.columns=TRUE, |
|
354 |
+ seqnames.field="chromosome" |
|
355 |
+ ) |
|
356 |
+ names(helperRanges) <- as.character(helperRanges$gene_id) |
|
357 |
+ genome <- getAnnotation(genome,"exon",refdb=refdb,rc=rc) |
|
358 |
+ ann.gr <- makeGRangesFromDataFrame( |
|
359 |
+ df=genome, |
|
360 |
+ keep.extra.columns=TRUE, |
|
361 |
+ seqnames.field="chromosome" |
|
362 |
+ ) |
|
363 |
+ message("Merging exons") |
|
364 |
+ ann.gr <- reduceExons(ann.gr,rc=rc) |
|
365 |
+ names(ann.gr) <- as.character(ann.gr$exon_id) |
|
366 |
+ genomeRanges <- split(ann.gr,ann.gr$gene_id) |
|
367 |
+ } |
|
368 |
+ } |
|
369 |
+ } |
|
370 |
+ } |
|
371 |
+ else { |
|
372 |
+ rownames(genome) <- as.character(genome[,4]) |
|
373 |
+ genomeRanges <- makeGRangesFromDataFrame( |
|
374 |
+ df=genome, |
|
375 |
+ keep.extra.columns=TRUE |
|
376 |
+ ) |
|
377 |
+ } |
|
378 |
+ |
|
379 |
+ # After genome read, check if we have a valid custom orderer |
|
380 |
+ if (!is.null(orderBy$custom)) { |
|
381 |
+ if (length(orderBy$custom) != nrow(genome)) { |
|
382 |
+ warning("The custom orderer provide with orderBy parameter does ", |
|
383 |
+ "not have length equal to the number of elements in genome ", |
|
384 |
+ "argument and will be ignored!",immediate.=TRUE) |
|
385 |
+ orderBy$custom <- NULL |
|
386 |
+ } |
|
387 |
+ } |
|
388 |
+ |
|
389 |
+ # Read and check design compatibilities. Check if k-means is requested and |
|
390 |
+ # message accordingly. If k-means is requested it will be added to the |
|
391 |
+ # design data frame |
|
392 |
+ hasDesign <- FALSE |
|
393 |
+ if (!is.null(design)) { |
|
394 |
+ if (!is.data.frame(design)) |
|
395 |
+ design <- read.delim(design,row.names=1) |
|
396 |
+ nfac <- ncol(design) |
|
397 |
+ if (length(input)>1 && nfac>2) |
|
398 |
+ stop("When more than one files are provided for coverage ", |
|
399 |
+ "generation, the maximum number of allowed design factors is 2") |
|
400 |
+ if (length(input)>1 && nfac>1 && kmParams$k>0) |
|
401 |
+ stop("When more than one files are provided for coverage ", |
|
402 |
+ "generation and k-means clustering is also requested, the ", |
|
403 |
+ "maximum number of allowed design factors is 1") |
|
404 |
+ if (length(input)==1 && nfac>3) |
|
405 |
+ stop("The maximum number of allowed design factors is 3") |
|
406 |
+ if (length(input)==1 && nfac>2 && kmParams$k>0) |
|
407 |
+ stop("The maximum number of allowed design factors when k-means ", |
|
408 |
+ "clustering is requested is 2") |
|
409 |
+ if (length(input)==1 && nfac>2 && plotParams$singleFacet!="none") |
|
410 |
+ stop("The maximum number of allowed design factors whith one ", |
|
411 |
+ "sample but wishing a gridded profile layout is 2") |
|
412 |
+ if (length(input)==1 && nfac>1 && kmParams$k>0 |
|
413 |
+ && plotParams$singleFacet!="none") |
|
414 |
+ stop("The maximum number of allowed design factors whith one ", |
|
415 |
+ "sample but wishing for k-means clustering and gridded ", |
|
416 |
+ "profile layout is 1") |
|
417 |
+ # Reduce the genomeRanges according to design or the other way |
|
418 |
+ if (nrow(design)>length(genomeRanges)) |
|
419 |
+ design <- tryCatch({ |
|
420 |
+ design[names(genomeRanges),,drop=FALSE] |
|
421 |
+ },error=function(e) { |
|
422 |
+ stop("Unexpected error occured! Are you sure that element ", |
|
423 |
+ "(row) names in the design file are of the same type as ", |
|
424 |
+ "the genome file?") |
|
425 |
+ },finally={}) |
|
426 |
+ else if (nrow(design)<=length(genomeRanges)) { |
|
427 |
+ genomeRanges <- tryCatch({ |
|
428 |
+ genomeRanges[rownames(design)] |
|
429 |
+ },error=function(e) { |
|
430 |
+ stop("Unexpected error occured! Are you sure that element ", |
|
431 |
+ "(row) names in the design file are of the same type as ", |
|
432 |
+ "the genome file?") |
|
433 |
+ },finally={}) |
|
434 |
+ if (type=="rnaseq") |
|
435 |
+ helperRanges <- tryCatch({ |
|
436 |
+ helperRanges[rownames(design)] |
|
437 |
+ },error=function(e) { |
|
438 |
+ stop("Unexpected error occured! Are you sure that element ", |
|
439 |
+ "(row) names in the design file are of the same type as ", |
|
440 |
+ "the genome file?") |
|
441 |
+ },finally={}) |
|
442 |
+ } |
|
443 |
+ # ...but maybe the names are incompatible |
|
444 |
+ if (length(genomeRanges)==0) |
|
445 |
+ stop("No ranges left after using the identifiers provided with ", |
|
446 |
+ "the design file. Are you sure the identifiers between the ", |
|
447 |
+ "two files are compatible?") |
|
448 |
+ if (nrow(design)==0) |
|
449 |
+ stop("No design elements left after using the identifiers ", |
|
450 |
+ "provided with the genome file. Are you sure the identifiers ", |
|
451 |
+ "between the two files are compatible?") |
|
452 |
+ } |
|
453 |
+ |
|
454 |
+ # Apply the rest of the filters if any to reduce later computational burden |
|
455 |
+ if (!is.null(selector)) { |
|
456 |
+ if (type=="chipseq") |
|
457 |
+ genomeRanges <- applySelectors(genomeRanges,selector,rc=rc) |
|
458 |
+ if (type=="rnaseq") { |
|
459 |
+ helperRanges <- applySelectors(helperRanges,selector) |
|
460 |
+ # Filter and align names if we have helperRanges around |
|
461 |
+ genomeRanges <- genomeRanges[names(helperRanges)] |
|
462 |
+ } |
|
463 |
+ } |
|
464 |
+ |
|
465 |
+ # Here we must write code for the reading and normalization of bam files |
|
466 |
+ # The preprocessRanges function looks if there is a valid (not null) ranges |
|
467 |
+ # field in input |
|
468 |
+ #if (!onTheFly) |
|
469 |
+ input <- preprocessRanges(input,preprocessParams,bamParams=bamParams,rc=rc) |
|
470 |
+ |
|
471 |
+ # At this point we must apply the fraction parameter if <1. We choose this |
|
472 |
+ # point in order not to restrict later usage of the read ranges and since it |
|
473 |
+ # does not take much time to load into memory |
|
474 |
+ if (fraction<1) { |
|
475 |
+ newSize <- round(fraction*length(genomeRanges)) |
|
476 |
+ set.seed(preprocessParams$seed) |
|
477 |
+ refIndex <- sort(sample(length(genomeRanges),newSize)) |
|
478 |
+ genomeRanges <- genomeRanges[refIndex] |
|
479 |
+ if (type=="rnaseq") |
|
480 |
+ helperRanges <- helperRanges[refIndex] |
|
481 |
+ for (i in 1:length(input)) { |
|
482 |
+ newSize <- round(fraction*length(input[[i]]$ranges)) |
|
483 |
+ set.seed(preprocessParams$seed) |
|
484 |
+ fracIndex <- sort(sample(length(input[[i]]$ranges),newSize)) |
|
485 |
+ input[[i]]$ranges <- input[[i]]$ranges[fracIndex] |
|
486 |
+ if (!is.null(input[[i]]$coverage)) { |
|
487 |
+ #if (!is.null(input[[i]]$coverage$center)) { |
|
488 |
+ # input[[i]]$coverage$center <- |
|
489 |
+ # input[[i]]$coverage$center[refIndex] |
|
490 |
+ #} |
|
491 |
+ #else |
|
492 |
+ input[[i]]$coverage <- input[[i]]$coverage[refIndex] |
|
493 |
+ } |
|
494 |
+ if (!is.null(input[[i]]$profile)) |
|
495 |
+ input[[i]]$profile <- input[[i]]$profile[refIndex,] |
|
496 |
+ } |
|
497 |
+ } |
|
498 |
+ |
|
499 |
+ # Remove unwanted seqnames from reference ranges |
|
500 |
+ chrs <- unique(unlist(lapply(input,function(x) { |
|
501 |
+ return(as.character(runValue(seqnames(x$ranges)))) |
|
502 |
+ }))) |
|
503 |
+ if (type=="chipseq") { |
|
504 |
+ keep <- which(as.character(seqnames(genomeRanges)) %in% chrs) |
|
505 |
+ genomeRanges <- genomeRanges[keep] |
|
506 |
+ } |
|
507 |
+ else if (type=="rnaseq") { |
|
508 |
+ keeph <- which(as.character(seqnames(helperRanges)) %in% chrs) |
|
509 |
+ helperRanges <- helperRanges[keeph] |
|
510 |
+ genomeRanges <- genomeRanges[names(helperRanges)] |
|
511 |
+ ######################################################################## |
|
512 |
+ ## There must be an R bug with `lengths` here as although it runs in |
|
513 |
+ ## Rcmd, it does not pass package building or vignette kniting... But |
|
514 |
+ ## for the time being it seems that it is not needed as the name |
|
515 |
+ ## filtering works |
|
516 |
+ #lens <- which(lengths(genomeRanges)==0) |
|
517 |
+ #if (length(lens)>0) |
|
518 |
+ # genomeRanges[lens] <- NULL |
|
519 |
+ ######################################################################## |
|
520 |
+ } |
|
521 |
+ |
|
522 |
+ # Now we must follow two paths according to region type, genebody and custom |
|
523 |
+ # areas with equal/unequal widths, or tss, tes and 1-width custom areas |
|
524 |
+ callParams$customIsBase <- FALSE |
|
525 |
+ if (region=="custom" && all(width(genomeRanges)==1)) { |
|
526 |
+ if (all(flank==0)) { |
|
527 |
+ warning("Flanking cannot be zero bp in both directions when the ", |
|
528 |
+ "reference region is only 1bp! Setting to default ", |
|
529 |
+ "(2000,2000)...",immediate.=TRUE) |
|
530 |
+ flank <- c(2000,2000) |
|
531 |
+ callParams$flank <- flank |
|
532 |
+ } |
|
533 |
+ callParams$customIsBase <- TRUE |
|
534 |
+ } |
|
535 |
+ if (region %in% c("tss","tes")) { |
|
536 |
+ if (all(flank==0)) { |
|
537 |
+ warning("Flanking cannot be zero bp in both directions when the ", |
|
538 |
+ "reference region is \"tss\" or \"tes\"! Setting to default ", |
|
539 |
+ "(2000,2000)...", immediate.=TRUE) |
|
540 |
+ flank <- c(2000,2000) |
|
541 |
+ callParams$flank <- flank |
|
542 |
+ } |
|
543 |
+ } |
|
544 |
+ if (type=="chipseq") |
|
545 |
+ input <- coverageRef(input,genomeRanges,region,flank,strandedParams, |
|
546 |
+ rc=rc)#,bamParams) |
|
547 |
+ else if (type=="rnaseq") |
|
548 |
+ input <- coverageRnaRef(input,genomeRanges,helperRanges,flank, |
|
549 |
+ strandedParams,rc=rc)#,bamParams) |
|
550 |
+ |
|
551 |
+ # If normalization method is linear, we must adjust the coverages |
|
552 |
+ if (preprocessParams$normalize=="linear") { |
|
553 |
+ linFac <- calcLinearFactors(input,preprocessParams) |
|
554 |
+ for (n in names(input)) { |
|
555 |
+ if (linFac[n]==1) |
|
556 |
+ next |
|
557 |
+ #if (is.null(input[[n]]$coverage$center)) |
|
558 |
+ input[[n]]$coverage <- cmclapply(input[[n]]$coverage, |
|
559 |
+ function(x,f) { |
|
560 |
+ return(x*f) |
|
561 |
+ },linFac[n] |
|
562 |
+ ) |
|
563 |
+ #else { |
|
564 |
+ # input[[n]]$coverage$center <- |
|
565 |
+ # cmclapply(input[[n]]$coverage$center,function(x,f) { |
|
566 |
+ # return(x*f) |
|
567 |
+ # },linFac[n]) |
|
568 |
+ #} |
|
569 |
+ } |
|
570 |
+ } |
|
571 |
+ |
|
572 |
+ # Now we must summarize and create the matrices. If genebody or unequal |
|
573 |
+ # custom lengths, bin is a must, else we leave to user |
|
574 |
+ mustBin <- FALSE |
|
575 |
+ if (region=="genebody") |
|
576 |
+ mustBin <- TRUE |
|
577 |
+ if (region=="custom") { |
|
578 |
+ w <- width(genomeRanges) |
|
579 |
+ if (any(w!=w[1])) |
|
580 |
+ mustBin <- TRUE |
|
581 |
+ } |
|
582 |
+ |
|
583 |
+ if (mustBin) { |
|
584 |
+ if (binParams$regionBinSize==0) { |
|
585 |
+ warning("Central region bin size not set for a region that must ", |
|
586 |
+ "be binned! Setting to 1000...",immediate.=TRUE) |
|
587 |
+ binParams$regionBinSize <- 1000 |
|
588 |
+ } |
|
589 |
+ } |
|
590 |
+ input <- profileMatrix(input,flank,binParams,rc) |
|
591 |
+ |
|
592 |
+ # Perform the k-means clustering if requested and append to design (which |
|
593 |
+ # has been checked, if we are allowed to do so) |
|
594 |
+ if (kmParams$k>0) |
|
595 |
+ design <- kmeansDesign(input,design,kmParams) |
|
596 |
+ |
|
597 |
+ # Coverages and profiles calculated... Now depending on plot option, we go |
|
598 |
+ # further or return the enriched input object for saving |
|
599 |
+ if (!plotParams$profile && !plotParams$heatmap) { |
|
600 |
+ recoupObj <- toOutput(input,design,saveParams,callParams=callParams) |
|
601 |
+ return(recoupObj) |
|
602 |
+ } |
|
603 |
+ else { |
|
604 |
+ recoupObj <- toOutput(input,design,list(ranges=TRUE,coverage=TRUE, |
|
605 |
+ profile=TRUE),callParams=callParams) |
|
606 |
+ } |
|
607 |
+ |
|
608 |
+ ## Our plot objects |
|
609 |
+ recoupPlots <- list() |
|
610 |
+ |
|
611 |
+ # We must pass the matrices to plotting function |
|
612 |
+ if (plotParams$profile) { |
|
613 |
+ message("Constructing genomic coverage profile curve(s)") |
|
614 |
+ #theProfile <- recoupProfile(recoupObj,rc=rc) |
|
615 |
+ recoupObj <- recoupProfile(recoupObj,rc=rc) |
|
616 |
+ theProfile <- getr(recoupObj,"profile") |
|
617 |
+ recoupPlots$profilePlot <- theProfile |
|
618 |
+ } |
|
619 |
+ |
|
620 |
+ # Some default binning MUST be applied for the heatmap... Otherwise it could |
|
621 |
+ # take insanely long time and space to draw/store |
|
622 |
+ if (plotParams$heatmap) { |
|
623 |
+ # Inform the user about enforced binning (or not) |
|
624 |
+ if (region %in% c("tss","tes") || callParams$customIsBase) { |
|
625 |
+ if (binParams$regionBinSize==0 && binParams$forceHeatmapBinning) { |
|
626 |
+ message("The resolution of the requested profiles will be ", |
|
627 |
+ "lowered to avoid\nincreased computation time and/or ", |
|
628 |
+ "storage space for heatmap profiles...") |
|
629 |
+ |
|
630 |
+ } |
|
631 |
+ else if (binParams$regionBinSize==0 |
|
632 |
+ && !binParams$forceHeatmapBinning) |
|
633 |
+ warning("forceHeatmapBinning is turned off for high ", |
|
634 |
+ "resolution plotting. Be prepared for\nlong computational ", |
|
635 |
+ "times and big figures!",immediate.=TRUE) |
|
636 |
+ } |
|
637 |
+ else { |
|
638 |
+ if ((binParams$regionBinSize==0 || binParams$flankBinSize==0) |
|
639 |
+ && binParams$forceHeatmapBinning) { |
|
640 |
+ message("The resolution of the requested profiles will be ", |
|
641 |
+ "lowered to avoid\nincreased computation time and/or ", |
|
642 |
+ "storage space for heatmap profiles...") |
|
643 |
+ |
|
644 |
+ } |
|
645 |
+ else if ((binParams$regionBinSize==0 || binParams$flankBinSize==0) |
|
646 |
+ && !binParams$forceHeatmapBinning) |
|
647 |
+ warning("forceHeatmapBinning is turned off for high ", |
|
648 |
+ "resolution plotting. Be prepared for\nlong computational ", |
|
649 |
+ "times and big figures!",immediate.=TRUE) |
|
650 |
+ } |
|
651 |
+ |
|
652 |
+ if (binParams$forceHeatmapBinning |
|
653 |
+ && (binParams$regionBinSize==0 || binParams$flankBinSize==0)) { |
|
654 |
+ helpInput <- recoupObj$data |
|
655 |
+ if (region %in% c("tss","tes") || callParams$customIsBase) { |
|
656 |
+ for (n in names(helpInput)) { |
|
657 |
+ message("Calculating ",region," profile for ", |
|
658 |
+ helpInput[[n]]$name) |
|
659 |
+ helpInput[[n]]$profile <- |
|
660 |
+ binCoverageMatrix(helpInput[[n]]$coverage, |
|
661 |
+ binSize=binParams$forcedBinSize[2], |
|
662 |
+ stat=binParams$sumStat,rc=rc) |
|
663 |
+ helpInput[[n]]$profile <- helpInput[[n]]$profile |
|
664 |
+ } |
|
665 |
+ } |
|
666 |
+ else { |
|
667 |
+ for (n in names(helpInput)) { |
|
668 |
+ message("Calculating ",region," profile for ", |
|
669 |
+ helpInput[[n]]$name) |
|
670 |
+ message(" center") |
|
671 |
+ #center <- binCoverageMatrix(helpInput[[n]]$coverage$center, |
|
672 |
+ # binSize=binParams$forcedBinSize[2], |
|
673 |
+ # stat=binParams$sumStat,rc=rc) |
|
674 |
+ center <- binCoverageMatrix( |
|
675 |
+ input[[n]]$coverage,binSize=binParams$forcedBinSize[2], |
|
676 |
+ stat=binParams$sumStat, |
|
677 |
+ interpolation=binParams$interpolation, |
|
678 |
+ flank=flank,where="center",rc=rc |
|
679 |
+ ) |
|
680 |
+ message(" upstream") |
|
681 |
+ #left <- binCoverageMatrix(helpInput[[n]]$coverage$upstream, |
|
682 |
+ # binSize=binParams$forcedBinSize[1], |
|
683 |
+ # stat=binParams$sumStat,rc=rc) |
|
684 |
+ left <- binCoverageMatrix( |
|
685 |
+ input[[n]]$coverage,binSize=binParams$forcedBinSize[1], |
|
686 |
+ stat=binParams$sumStat, |
|
687 |
+ interpolation=binParams$interpolation,flank=flank, |
|
688 |
+ where="upstream",rc=rc |
|
689 |
+ ) |
|
690 |
+ message(" downstream") |
|
691 |
+ #right <- binCoverageMatrix( |
|
692 |
+ # helpInput[[n]]$coverage$downstream, |
|
693 |
+ # binSize=binParams$forcedBinSize[1], |
|
694 |
+ # stat=binParams$sumStat,rc=rc) |
|
695 |
+ right <- binCoverageMatrix( |
|
696 |
+ input[[n]]$coverage,binSize=forcedBinSize[1], |
|
697 |
+ stat=binParams$sumStat, |
|
698 |
+ interpolation=binParams$interpolation,flank=flank, |
|
699 |
+ where="downstream",rc=rc |
|
700 |
+ ) |
|
701 |
+ helpInput[[n]]$profile <- cbind(left,center,right) |
|
702 |
+ rownames(helpInput[[n]]$profile) <- |
|
703 |
+ names(input[[n]]$coverage) |
|
704 |
+ helpInput[[n]]$profile <- helpInput[[n]]$profile |
|
705 |
+ } |
|
706 |
+ } |
|
707 |
+ } |
|
708 |
+ else |
|
709 |
+ helpInput <- recoupObj$data |
|
710 |
+ |
|
711 |
+ helpObj <- recoupObj |
|
712 |
+ helpObj$data <- helpInput |
|
713 |
+ message("Constructing genomic coverage heatmap(s)") |
|
714 |
+ #theHeatmap <- recoupHeatmap(helpObj,rc=rc) |
|
715 |
+ helpObj <- recoupHeatmap(helpObj,rc=rc) |
|
716 |
+ theHeatmap <- getr(helpObj,"heatmap") |
|
717 |
+ recoupObj <- setr(recoupObj,"heatmap",theHeatmap) |
|
718 |
+ recoupPlots$heatmapPlot <- theHeatmap |
|
719 |
+ |
|
720 |
+ # Derive the main heatmap in case of hierarchical clustering |
|
721 |
+ mainh <- 1 |
|
722 |
+ if (length(grep("hc",orderBy$what))>0) { |
|
723 |
+ nc <- nchar(orderBy$what) |
|
724 |
+ mh <- suppressWarnings(as.numeric(substr(orderBy$what,nc,nc))) |
|
725 |
+ if (is.na(mh)) |
|
726 |
+ warning("Reference profile for hierarchical clustering order ", |
|
727 |
+ "not recognized! Using the 1st...",immediate.=TRUE) |
|
728 |
+ else if (mh > length(input)) { |
|
729 |
+ warning("Reference profile (",mh,") for hierarchical ", |
|
730 |
+ "clustering order does not exist (the input has only ", |
|
731 |
+ length(input)," sources! Using the last...", |
|
732 |
+ immediate.=TRUE) |
|
733 |
+ mainh <- length(input) |
|
734 |
+ } |
|
735 |
+ else |
|
736 |
+ mainh <- mh |
|
737 |
+ } |
|
738 |
+ } |
|
739 |
+ |
|
740 |
+ # We must pass the matrices to plotting function |
|
741 |
+ if (plotParams$correlation) { |
|
742 |
+ message("Constructing coverage correlation profile curve(s)") |
|
743 |
+ recoupObj <- recoupCorrelation(recoupObj,rc=rc) |
|
744 |
+ theCorrelation <- getr(recoupObj,"correlation") |
|
745 |
+ recoupPlots$correlationPlot <- theCorrelation |
|
746 |
+ } |
|
747 |
+ |
|
748 |
+ # Overwrite final object so as to return it |
|
749 |
+ recoupObj <- toOutput(input,design,saveParams,recoupPlots,callParams) |
|
750 |
+ |
|
751 |
+ # Make any plots asked |
|
752 |
+ if (plotParams$plot) { |
|
753 |
+ what <- character(0) |
|
754 |
+ if (plotParams$profile) |
|
755 |
+ what <- c(what,"profile") |
|
756 |
+ if (plotParams$heatmap) |
|
757 |
+ what <- c(what,"heatmap") |
|
758 |
+ if (plotParams$correlation) |
|
759 |
+ what <- c(what,"correlation") |
|
760 |
+ if (length(what)>0) |
|
761 |
+ recoupPlot(recoupObj,what,plotParams$device,plotParams$outputDir, |
|
762 |
+ plotParams$outputBase,mainh) |
|
763 |
+ } |
|
764 |
+ |
|
765 |
+ # Return the enriched input object according to save options |
|
766 |
+ return(recoupObj) |
|
767 |
+} |
|
768 |
+ |
|
769 |
+applySelectors <- function(ranges,selector,rc=NULL) { |
|
770 |
+ if (!is.null(selector$id)) { |
|
771 |
+ ranges <- ranges[selector$ids] |
|
772 |
+ if (length(ranges)==0) |
|
773 |
+ stop("No ranges left after using the identifiers provided with ", |
|
774 |
+ "the selector field. Are you sure the identifiers between the ", |
|
775 |
+ "two files are compatible?") |
|
776 |
+ } |
|
777 |
+ if (!is.null(selector$biotype) && !is.null(ranges$biotype)) { |
|
778 |
+ good <- which(ranges$biotype %in% selector$biotype) |
|
779 |
+ ranges <- ranges[good] |
|
780 |
+ if (length(ranges)==0) |
|
781 |
+ stop("No ranges left after using the biotypes provided with the ", |
|
782 |
+ "selector field. Are you sure the identifiers between the two ", |
|
783 |
+ "files are compatible?") |
|
784 |
+ } |
|
785 |
+ if (!is.null(selector$exonType) && !is.null(ranges$exon_type)) { |
|
786 |
+ good <- which(ranges$exonType %in% selector$exonType) |
|
787 |
+ ranges <- ranges[good] |
|
788 |
+ if (length(ranges)==0) |
|
789 |
+ stop("No ranges left after using the exon types provided with ", |
|
790 |
+ "the selector field. Are you sure the identifiers between the ", |
|
791 |
+ "two files are compatible?") |
|
792 |
+ } |
|
793 |
+ return(ranges) |
|
794 |
+} |
|
795 |
+ |
|
796 |
+toOutput <- function(input,design=NULL,saveParams,plotObjs=NULL, |
|
797 |
+ callParams=NULL) { |
|
798 |
+ if (!saveParams$ranges) |
|
799 |
+ input <- removeData(input,"ranges") |
|
800 |
+ if (!saveParams$coverage) |
|
801 |
+ input <- removeData(input,"coverage") |
|
802 |
+ if (!saveParams$profile) |
|
803 |
+ input <- removeData(input,"profile") |
|
804 |
+ plots <- list(profile=NULL,heatmap=NULL,correlation=NULL) |
|
805 |
+ if (!is.null(plotObjs) && saveParams$profilePlot) |
|
806 |
+ plots$profile <- plotObjs$profilePlot |
|
807 |
+ if (!is.null(plotObjs) && saveParams$heatmapPlot) |
|
808 |
+ plots$heatmap <- plotObjs$heatmapPlot |
|
809 |
+ if (!is.null(plotObjs) && saveParams$correlationPlot) |
|
810 |
+ plots$correlation <- plotObjs$correlationPlot |
|
811 |
+ return(list( |
|
812 |
+ data=input, |
|
813 |
+ design=design, |
|
814 |
+ plots=plots, |
|
815 |
+ callopts=callParams |
|
816 |
+ )) |
|
817 |
+} |