git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45083 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -413,3 +413,19 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error. |
413 | 413 |
|
414 | 414 |
2010-03-05 M. Ritchie - committed version 1.5.29 |
415 | 415 |
* crlmmIlluminaV2() now exported. Added man page crlmmIlluminaV2.Rd |
416 |
+ |
|
417 |
+2010-03-07 R. Scharpf committed version 1.5.30 |
|
418 |
+ |
|
419 |
+- one can use ff in conjunction with affy platforms 5.0 and 6.0 |
|
420 |
+ |
|
421 |
+- more s4-style code |
|
422 |
+ |
|
423 |
+- preprocessing / genotyping is basically the same set of commands with either illumina/affy platforms (though illumina-users may have to play with some of the options for reading idat files |
|
424 |
+ |
|
425 |
+- if ff package is loaded, the assayData elements are ff objects |
|
426 |
+ |
|
427 |
+- the classes all inherit from 'CrlmmContainer' that contains an additional slot 'options' and 'genomeAnnotation'. options is a list with the default arguments to snprma, crlmm, etc, as well as a few global settings such as 'verbose' and 'seed'. I added the genomeAnnotation slot simply because I want to be able to use ff-objects for the feature-level data. Maybe with setClassUnion we could avoid adding the genomeAnnotation slot (and use featureData instead), but I didn't have much success with this. |
|
428 |
+ |
|
429 |
+- the batchSize argument will run the genotyping (crlmmGT) in batches to reduce the RAM. The default is batches of size 1000. |
|
430 |
+ |
|
431 |
+- The crlmm.Rd file contains an example with / without ff for Affymetrix data. |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.5.29 |
|
5 |
-Date: 2010-03-05 |
|
4 |
+Version: 1.5.30 |
|
5 |
+Date: 2010-03-07 |
|
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au> |
7 | 7 |
Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms |
... | ... |
@@ -19,16 +19,21 @@ Imports: affyio (>= 1.15.2), |
19 | 19 |
splines, |
20 | 20 |
mvtnorm, |
21 | 21 |
ellipse, |
22 |
- SNPchip |
|
23 |
-Enhances: ff |
|
22 |
+ SNPchip,ff |
|
24 | 23 |
Suggests: hapmapsnp5, |
25 | 24 |
hapmapsnp6, |
26 | 25 |
genomewidesnp5Crlmm (>= 1.0.2), |
27 | 26 |
genomewidesnp6Crlmm (>= 1.0.2), |
28 | 27 |
snpMatrix, |
29 | 28 |
metaArray |
30 |
-Collate: AllGenerics.R |
|
29 |
+Collate: AllClasses.R |
|
30 |
+ AllGenerics.R |
|
31 |
+ methods-AffymetrixAlleleSet.R |
|
32 |
+ methods-IlluminaAlleleSet.R |
|
33 |
+ methods-CrlmmContainer.R |
|
31 | 34 |
methods-CNSet.R |
35 |
+ methods-AlleleSet.R |
|
36 |
+ methods-CallSet.R |
|
32 | 37 |
methods-eSet.R |
33 | 38 |
methods-SnpSuperSet.R |
34 | 39 |
cnrma-functions.R |
... | ... |
@@ -8,11 +8,12 @@ importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet, SnpSet, |
8 | 8 |
|
9 | 9 |
importMethodsFrom(Biobase, annotation, "annotation<-", |
10 | 10 |
annotatedDataFrameFrom, assayData, "assayData<-", |
11 |
+ snpCallProbability, |
|
11 | 12 |
combine, dims, experimentData, "experimentData<-", |
12 | 13 |
fData, featureData, "featureData<-", featureNames, |
13 | 14 |
fvarMetadata, fvarLabels, pData, phenoData, |
14 | 15 |
"phenoData<-", protocolData, "protocolData<-", |
15 |
- pubMedIds, rowMedians, sampleNames, storageMode, |
|
16 |
+ pubMedIds, rowMedians, sampleNames, snpCall, storageMode, |
|
16 | 17 |
"storageMode<-", updateObject, varLabels) |
17 | 18 |
|
18 | 19 |
importFrom(Biobase, assayDataElement, assayDataElementNames, |
... | ... |
@@ -20,11 +21,16 @@ importFrom(Biobase, assayDataElement, assayDataElementNames, |
20 | 21 |
validMsg) |
21 | 22 |
|
22 | 23 |
## oligoClasses |
23 |
-importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet) |
|
24 |
+##importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet) |
|
24 | 25 |
|
25 |
-importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs, |
|
26 |
- "confs<-", cnConfidence, "cnConfidence<-", isSnp, |
|
27 |
- chromosome, position, CA, "CA<-", CB, "CB<-", A, B) |
|
26 |
+##S3 method ffdf and class ffdf |
|
27 |
+importFrom(ff, ffdf, ff, as.ff, as.ffdf) |
|
28 |
+ |
|
29 |
+ |
|
30 |
+ |
|
31 |
+##importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs, |
|
32 |
+## "confs<-", cnConfidence, "cnConfidence<-", isSnp, |
|
33 |
+## chromosome, position, CA, "CA<-", CB, "CB<-", A, B) |
|
28 | 34 |
|
29 | 35 |
importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles, |
30 | 36 |
copyNumber) |
... | ... |
@@ -60,8 +66,24 @@ importFrom(mvtnorm, dmvnorm) |
60 | 66 |
## ellipse |
61 | 67 |
importFrom(ellipse, ellipse) |
62 | 68 |
|
63 |
-exportMethods(copyNumber) |
|
64 |
-export(cnOptions, crlmm, crlmmIllumina, crlmmIlluminaV2, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) |
|
69 |
+ |
|
70 |
+##exportClasses(FFSet) |
|
71 |
+exportMethods(annotatedDataFrameFrom, |
|
72 |
+ copyNumber, initialize, |
|
73 |
+ ##show, "$", "[[", "[", |
|
74 |
+ genomeAnnotation, |
|
75 |
+ lM, |
|
76 |
+ CA, |
|
77 |
+ CB, |
|
78 |
+ A, |
|
79 |
+ B, |
|
80 |
+ snpCall, |
|
81 |
+ confs, |
|
82 |
+ chromosome, |
|
83 |
+ position, |
|
84 |
+ isSnp) |
|
85 |
+export(crlmmOptions, crlmm, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam, validCdfNames) |
|
86 |
+ |
|
65 | 87 |
|
66 | 88 |
|
67 | 89 |
############# |
... | ... |
@@ -69,10 +91,5 @@ export(cnOptions, crlmm, crlmmIllumina, crlmmIlluminaV2, crlmmCopynumber, ellips |
69 | 91 |
############# |
70 | 92 |
|
71 | 93 |
##export everything that does not start with a . |
72 |
-##exportPattern("^[^\\.]") |
|
94 |
+exportPattern("^[^\\.]") |
|
73 | 95 |
|
74 |
-##export(thresholdModelParams, computeCopynumber.CNSet, nuphiAllele, coefs, biasAdjNP, |
|
75 |
-## nonpolymorphic.poe, crlmmWrapper, |
|
76 |
-## loadIlluminaCallSet, loadIlluminaRG, loadIlluminaCnrma, |
|
77 |
-## cnrma, |
|
78 |
-## crlmmGT, oneBatch) |
79 | 96 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,74 @@ |
1 |
+setOldClass("ffdf") |
|
2 |
+setOldClass("ff_matrix") |
|
3 |
+setClassUnion("matrix_or_ff", c("matrix", "ff_matrix")) |
|
4 |
+setClassUnion("list_or_ffdf", c("list", "ffdf")) |
|
5 |
+setClass("CrlmmContainer", contains="eSet", |
|
6 |
+ representation(options="list", |
|
7 |
+ genomeAnnotation="ANY", |
|
8 |
+ "VIRTUAL")) |
|
9 |
+ |
|
10 |
+setMethod("show", "CrlmmContainer", function(object){ |
|
11 |
+ callNextMethod(object) |
|
12 |
+ cat("options: \n") |
|
13 |
+ print(names(crlmmOptions(object))) |
|
14 |
+ cat("\n") |
|
15 |
+ cat("genomeAnnotation:", nrow(genomeAnnotation(object)), " rows, ", ncol(genomeAnnotation(object)), " columns\n") |
|
16 |
+ print(genomeAnnotation(object)[1:5, ]) |
|
17 |
+ cat("\n") |
|
18 |
+}) |
|
19 |
+setClass("AlleleSet", contains="CrlmmContainer") |
|
20 |
+setClass("CallSet", contains="AlleleSet") |
|
21 |
+setClass("CNSet", contains="CallSet", |
|
22 |
+ representation(lM="list_or_ffdf")) |
|
23 |
+ |
|
24 |
+setClass("IlluminaRGSet", contains="CrlmmContainer") |
|
25 |
+setClass("IlluminaXYSet", contains="CrlmmContainer") |
|
26 |
+ |
|
27 |
+setClass("AffymetrixAlleleSet", contains="AlleleSet") ##AffymetrixAlleleSet |
|
28 |
+setClass("IlluminaAlleleSet", contains="AlleleSet") |
|
29 |
+##setClass("AffymetrixBigData", contains="AffymetrixAlleleSet") |
|
30 |
+##setClass("AffymetrixSmallData", contains="AffymetrixAlleleSet") |
|
31 |
+##setClass("IlluminaSmallData", contains="IlluminaAlleleSet") |
|
32 |
+##setClass("IlluminaBigData", contains="IlluminaAlleleSet") |
|
33 |
+##setMethod("initialize", "AffymetrixBigData", function(.Object, annotation){ |
|
34 |
+## .Object <- callNextMethod(.Object) |
|
35 |
+## if(!missing(annotation)) annotation(.Object) <- annotation |
|
36 |
+## .Object |
|
37 |
+##}) |
|
38 |
+##setClass("AffymetrixCallSet", contains="CallSet") |
|
39 |
+##setClass("IlluminaCallSet", contains="CallSet") |
|
40 |
+setMethod("initialize", "AlleleSet", function(.Object, alleleA=new("matrix"), alleleB=new("matrix"), ...){ |
|
41 |
+ .Object <- callNextMethod(.Object, alleleA=alleleA, alleleB=alleleB, ...) |
|
42 |
+ storageMode(.Object) <- "environment" |
|
43 |
+ .Object |
|
44 |
+}) |
|
45 |
+setMethod("initialize", "CallSet", function(.Object, call=new("matrix"), callProbability=new("matrix"), ...){ |
|
46 |
+ .Object <- callNextMethod(.Object, call=call, callProbability=callProbability, ...) |
|
47 |
+ storageMode(.Object) <- "environment" |
|
48 |
+ .Object |
|
49 |
+}) |
|
50 |
+setMethod("initialize", "CNSet", function(.Object, CA=new("matrix"), CB=new("matrix"), lM=new("list"), ...){ |
|
51 |
+ .Object <- callNextMethod(.Object, CA=CA, CB=CB, lM=lM,...) |
|
52 |
+ storageMode(.Object) <- "environment" |
|
53 |
+ .Object |
|
54 |
+}) |
|
55 |
+setValidity("AlleleSet", function(object) { |
|
56 |
+ assayDataValidMembers(assayData(object), c("alleleA", "alleleB")) |
|
57 |
+}) |
|
58 |
+setValidity("IlluminaRGSet", function(object) { |
|
59 |
+ assayDataValidMembers(assayData(object), c("R", "G", "zero")) |
|
60 |
+}) |
|
61 |
+setValidity("IlluminaXYSet", function(object) { |
|
62 |
+ assayDataValidMembers(assayData(object), c("X", "Y", "zero")) |
|
63 |
+}) |
|
64 |
+ |
|
65 |
+setValidity("CallSet", function(object) { |
|
66 |
+ assayDataValidMembers(assayData(object), c("alleleA", "alleleB", "call", "callProbability")) |
|
67 |
+}) |
|
68 |
+setValidity("CNSet", function(object) { |
|
69 |
+ assayDataValidMembers(assayData(object), c("alleleA", "alleleB", "call", "callProbability", "CA", "CB")) |
|
70 |
+}) |
|
71 |
+ |
|
72 |
+ |
|
73 |
+ |
|
74 |
+ |
... | ... |
@@ -1,12 +1,57 @@ |
1 |
-##setGeneric("A<-", function(object, value) standardGeneric("A<-")) |
|
2 |
-##setGeneric("B<-", function(object, value) standardGeneric("B<-")) |
|
1 |
+setGeneric("A<-", function(object, value) standardGeneric("A<-")) |
|
2 |
+setGeneric("B<-", function(object, value) standardGeneric("B<-")) |
|
3 | 3 |
|
4 | 4 |
setGeneric("getParam", function(object, name, ...) standardGeneric("getParam")) |
5 | 5 |
setGeneric("cnIndex", function(object) standardGeneric("cnIndex")) |
6 | 6 |
setGeneric("cnNames", function(object) standardGeneric("cnNames")) |
7 |
-setGeneric("computeCopynumber", function(object, cnOptions) standardGeneric("computeCopynumber")) |
|
7 |
+setGeneric("confs", function(object) standardGeneric("confs")) |
|
8 |
+setGeneric("computeCopynumber", function(object) standardGeneric("computeCopynumber")) |
|
9 |
+setGeneric("crlmm", function(object, ...) standardGeneric("crlmm")) |
|
10 |
+setGeneric("crlmmOptions", function(object) standardGeneric("crlmmOptions")) |
|
11 |
+setGeneric("crlmmOptions<-", function(object, value) standardGeneric("crlmmOptions<-")) |
|
12 |
+setGeneric("construct", function(object, filenames) standardGeneric("construct")) |
|
13 |
+ |
|
14 |
+setGeneric("getOptions", function(object) standardGeneric("getOptions")) |
|
15 |
+ |
|
16 |
+##setGeneric("getA", function(object) standardGeneric("getA")) |
|
17 |
+##setGeneric("getB", function(object) standardGeneric("getB")) |
|
18 |
+##setGeneric("getG", function(object) standardGeneric("getG")) |
|
19 |
+##setGeneric("getR", function(object) standardGeneric("getR")) |
|
20 |
+##setGeneric("getZero", function(object) standardGeneric("getZero")) |
|
21 |
+##setGeneric("getCalls", function(object) standardGeneric("getCalls")) |
|
22 |
+##setGeneric("getConfs", function(object) standardGeneric("getConfs")) |
|
23 |
+##setGeneric("getCA", function(object) standardGeneric("getCA")) |
|
24 |
+##setGeneric("getCB", function(object) standardGeneric("getCB")) |
|
25 |
+setGeneric("getPhenoData", function(object) standardGeneric("getPhenoData")) |
|
26 |
+setGeneric("getFeatureData", function(object) standardGeneric("getFeatureData")) |
|
27 |
+setGeneric("getProtocolData", function(object, filenames) standardGeneric("getProtocolData")) |
|
28 |
+setGeneric("getGenomeAnnotation", function(object, ...) standardGeneric("getGenomeAnnotation")) |
|
29 |
+setGeneric("getLinearModelParam", function(object, ...) standardGeneric("getLinearModelParam")) |
|
30 |
+ |
|
31 |
+##setGeneric("initializeStorage", function(object) standardGeneric("initializeStorage")) |
|
32 |
+setGeneric("prediction", function(x, ...) standardGeneric("prediction")) |
|
33 |
+setGeneric("genomeAnnotation", function(object) standardGeneric("genomeAnnotation")) |
|
34 |
+setGeneric("genomeAnnotation<-", function(object,value) standardGeneric("genomeAnnotation<-")) |
|
35 |
+setGeneric("lM", function(object) standardGeneric("lM")) |
|
36 |
+setGeneric("lM<-", function(object, value) standardGeneric("lM<-")) |
|
37 |
+ |
|
38 |
+##setGeneric("nFeatures", function(object) standardGeneric("nFeatures")) |
|
39 |
+ |
|
8 | 40 |
setGeneric("pr", function(object, name, batch, value) standardGeneric("pr")) |
41 |
+setGeneric("rma", function(object) standardGeneric("rma")) |
|
42 |
+setGeneric("snprma", function(object, ...) standardGeneric("snprma")) |
|
9 | 43 |
setGeneric("snpIndex", function(object) standardGeneric("snpIndex")) |
10 | 44 |
setGeneric("snpNames", function(object) standardGeneric("snpNames")) |
11 | 45 |
##setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome")) |
12 | 46 |
|
47 |
+setGeneric("R", function(object) standardGeneric("R")) |
|
48 |
+setGeneric("G", function(object) standardGeneric("G")) |
|
49 |
+setGeneric("Z", function(object) standardGeneric("Z")) |
|
50 |
+setGeneric("X", function(object) standardGeneric("X")) |
|
51 |
+setGeneric("Y", function(object) standardGeneric("Y")) |
|
52 |
+setMethod("R", "IlluminaRGSet", function(object) assayDataElement(object, "R")) |
|
53 |
+setMethod("G", "IlluminaRGSet", function(object) assayDataElement(object, "G")) |
|
54 |
+setMethod("Z", "IlluminaRGSet", function(object) assayDataElement(object, "zero")) |
|
55 |
+setMethod("X", "IlluminaXYSet", function(object) assayDataElement(object, "X")) |
|
56 |
+setMethod("Y", "IlluminaXYSet", function(object) assayDataElement(object, "Y")) |
|
57 |
+setMethod("Z", "IlluminaXYSet", function(object) assayDataElement(object, "zero")) |
... | ... |
@@ -153,366 +153,272 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){ |
153 | 153 |
return(gender) |
154 | 154 |
} |
155 | 155 |
|
156 |
-##crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
157 |
-## if(!missing(object)){ |
|
158 |
-## stopifnot(class(object) == "CNSet") |
|
159 |
-## createIntermediateObjects <- FALSE |
|
160 |
-## } else { |
|
161 |
-## createIntermediateObjects <- TRUE |
|
162 |
-## ## 33G for 1239 files |
|
163 |
-## crlmmResults <- crlmmWrapper(filenames, cnOptions, ...) |
|
164 |
-## snprmaResult <- crlmmResults[["snprmaResult"]] |
|
165 |
-## cnrmaResult <- crlmmResults[["cnrmaResult"]] |
|
166 |
-## callSet <- crlmmResults[["callSet"]] |
|
167 |
-## rm(crlmmResults); gc() |
|
168 |
-## annotation(callSet) <- cnOptions[["cdfName"]] |
|
169 |
-## stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]])) |
|
170 |
-## path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep="")) |
|
156 |
+ |
|
157 |
+##initializeFFObjects <- function(filenames, cnOptions){ |
|
158 |
+## outdir <- cnOptions[["outdir"]] |
|
159 |
+## cdfName <- cnOptions[["cdfName"]] |
|
160 |
+## AFile <- cnOptions[["AFile"]] |
|
161 |
+## BFile <- cnOptions[["BFile"]] |
|
162 |
+## callsFile <- cnOptions[["callsFile"]] |
|
163 |
+## confsFile <- cnOptions[["confsFile"]] |
|
164 |
+## snprmaFile <- cnOptions[["snprmaFile"]] |
|
165 |
+## cnrmaFile <- cnOptions[["cnrmaFile"]] |
|
166 |
+## CAFile <- cnOptions[["CAFile"]] |
|
167 |
+## CBFile <- cnOptions[["CBFile"]] |
|
168 |
+## load.it <- cnOptions[["load.it"]] |
|
169 |
+## fileExists <- list(A=file.exists(AFile), |
|
170 |
+## B=file.exists(BFile), |
|
171 |
+## calls=file.exists(callsFile), |
|
172 |
+## confs=file.exists(confsFile), |
|
173 |
+## CA=file.exists(CAFile), |
|
174 |
+## CB=file.exists(CBFile)) |
|
175 |
+## allExists <- all(unlist(fileExists)) |
|
176 |
+## ##if files already exist, check that the files have the appropriate dimension |
|
177 |
+## if(allExists){ |
|
178 |
+## load(AFile) |
|
179 |
+## open(A) |
|
180 |
+## sns <- dimnames(A)[[2]] |
|
181 |
+## if(!identical(sns, basename(filenames)) | !load.it){ |
|
182 |
+## ## if not of the same dimension, clean up |
|
183 |
+## message("Sample names in previously saved objects differ from the filenames. Removing previously saved objects.") |
|
184 |
+## delete(A); gc() |
|
185 |
+## unlink(AFile) |
|
186 |
+## load(BFile); delete(B); unlink(BFile) |
|
187 |
+## unlink(snprmaFile) |
|
188 |
+## unlink(cnrmaFile) |
|
189 |
+## if(file.exists(file.path(outdir, "cnParams.rda"))){ |
|
190 |
+## load(file.path(outdir, "cnParams.rda")) |
|
191 |
+## delete(cnParams); gc() |
|
192 |
+## unlink(file.path(outdir, "cnParams.rda")) |
|
193 |
+## } |
|
194 |
+## load(callsFile); delete(calls); unlink(callsFile) |
|
195 |
+## load(confsFile); delete(confs); unlink(confsFile) |
|
196 |
+## load(CAFile); delete(CA); unlink(CAFile) |
|
197 |
+## load(CBFile); delete(CB); unlink(CBFile) |
|
198 |
+## allExists <- FALSE |
|
199 |
+## } |
|
200 |
+## } |
|
201 |
+## if(!allExists) { |
|
202 |
+## message("Initializing ff objects for A, B, confs, calls, CA, and CB.") |
|
203 |
+## dns <- .dimnames(filenames, cnOptions[["cdfName"]], cnOptions[["verbose"]]) |
|
204 |
+## fns <- dns[[1]] |
|
205 |
+## } |
|
206 |
+## if(!file.exists(AFile)) {A <- initializeBigMatrix(dns); save(A, file=AFile); close(A)} |
|
207 |
+## if(!file.exists(BFile)) {B <- initializeBigMatrix(dns); save(B, file=BFile); close(B)} |
|
208 |
+## if(!file.exists(confsFile)) {confs <- initializeBigMatrix(dns); save(confs, file=confsFile); close(confs)} |
|
209 |
+## if(!file.exists(callsFile)) {calls <- initializeBigMatrix(dns); save(calls, file=callsFile); close(calls)} |
|
210 |
+## if(!file.exists(CAFile)) {CA <- initializeBigMatrix(dns); save(CA, file=CAFile); close(CA)} |
|
211 |
+## if(!file.exists(CBFile)) {CB <- initializeBigMatrix(dns); save(CB, file=CBFile); close(CB)} |
|
212 |
+## featureDataFile <- file.path(outdir, "featureDataFF.rda") |
|
213 |
+## if(!file.exists(featureDataFile)){ |
|
214 |
+## path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep="")) |
|
171 | 215 |
## load(file.path(path, "snpProbes.rda")) |
172 | 216 |
## snpProbes <- get("snpProbes") |
173 | 217 |
## load(file.path(path, "cnProbes.rda")) |
174 |
-## cnProbes <- get("cnProbes") |
|
175 |
-## k <- grep("chr", colnames(snpProbes)) |
|
176 |
-## if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)") |
|
218 |
+## cnProbes <- get("cnProbes") |
|
219 |
+## message("Initializing featureDataFF.") |
|
220 |
+## fvarlabels <- c("chromosome", "position", "isSnp") |
|
221 |
+## M <- matrix(NA, length(fns), 3, dimnames=list(fns, fvarlabels)) |
|
222 |
+## index <- match(rownames(snpProbes), rownames(M)) #only snp probes in M get assigned position |
|
223 |
+## M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))] |
|
224 |
+## M[index, "chromosome"] <- snpProbes[, grep("chr", colnames(snpProbes))] |
|
225 |
+## M[index, "isSnp"] <- 1L |
|
226 |
+## index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position |
|
227 |
+## M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))] |
|
228 |
+## M[index, "chromosome"] <- cnProbes[, grep("chr", colnames(cnProbes))] |
|
229 |
+## M[index, "isSnp"] <- 0L |
|
230 |
+## featureDataFF <- ff(M, dim=c(nrow(M), ncol(M)), |
|
231 |
+## vmode="integer", finalizer="close", |
|
232 |
+## overwrite=TRUE, |
|
233 |
+## dimnames=list(fns, fvarlabels)) |
|
234 |
+## save(featureDataFF, file=file.path(outdir, "featureDataFF.rda")) |
|
235 |
+## close(featureDataFF) |
|
236 |
+## rm(M, cnProbes, snpProbes, featureDataFF); gc() |
|
177 | 237 |
## } |
238 |
+## ## parameters file |
|
239 |
+## parameterFile <- file.path(outdir, "cnParams.rda") |
|
240 |
+## if(!file.exists(parameterFile)) { |
|
241 |
+## message("Initializing parameters file") |
|
242 |
+## batch <- cnOptions[["batch"]] |
|
243 |
+## dns.batch <- list(fns, unique(batch)) |
|
244 |
+## cnParams <- initializeParamObject(dns.batch) |
|
245 |
+## save(cnParams, file=file.path(outdir, "cnParams.rda")) |
|
246 |
+## close(cnParams) |
|
247 |
+## } |
|
248 |
+##} |
|
249 |
+ |
|
250 |
+##preprocessAndGenotype <- function(filenames, cnOptions, ...){ |
|
178 | 251 |
## set.seed(cnOptions[["seed"]]) ##for reproducibility |
179 |
-## cnFile <- cnOptions[["cnFile"]] |
|
180 |
-## chromosome <- cnOptions[["chromosome"]] |
|
181 |
-## SNRmin <- cnOptions[["SNRmin"]] |
|
252 |
+## protocolFile <- cnOptions[["protocolFile"]] |
|
253 |
+## cdfName <- cnOptions[["cdfName"]] |
|
254 |
+## verbose <- cnOptions[["verbose"]] |
|
255 |
+## if(file.exists(protocolFile)){ |
|
256 |
+## ## check that file is the same dimension |
|
257 |
+## load(protocolFile) |
|
258 |
+## if(!identical(sampleNames(protocoldata), basename(filenames))) |
|
259 |
+## unlink(protocolFile) |
|
260 |
+## } |
|
261 |
+## if(!file.exists(protocolFile)){ |
|
262 |
+## platform <- whichPlatform(paste(cdfName, "Crlmm", sep="")) |
|
263 |
+## if(platform=="affymetrix"){ |
|
264 |
+## if(verbose) message("Creating protocol file with scan dates for the affy arrays") |
|
265 |
+## scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate)) |
|
266 |
+## rownames(scanDates) <- basename(rownames(scanDates)) |
|
267 |
+## protocoldata <- new("AnnotatedDataFrame", |
|
268 |
+## data=scanDates, |
|
269 |
+## varMetadata=data.frame(labelDescription=colnames(scanDates), |
|
270 |
+## row.names=colnames(scanDates))) |
|
271 |
+## save(protocoldata, file=protocolFile) |
|
272 |
+## } |
|
273 |
+## ## protocol file for illumina saved during the readIdatFile step |
|
274 |
+## } |
|
275 |
+## if(isPackageLoaded("ff")) initializeFFObjects(filenames, cnOptions) |
|
276 |
+## crlmmWrapper(filenames, cnOptions, ...) |
|
277 |
+## message("Checking for required files...") |
|
278 |
+## message(cnOptions[["AFile"]], ": ", file.exists(cnOptions[["AFile"]])) |
|
279 |
+## message(cnOptions[["BFile"]], ": ", file.exists(cnOptions[["BFile"]])) |
|
280 |
+## message(cnOptions[["callsFile"]], ": ", file.exists(cnOptions[["callsFile"]])) |
|
281 |
+## message(cnOptions[["confsFile"]], ": ", file.exists(cnOptions[["confsFile"]])) |
|
282 |
+## message(cnOptions[["snprmaFile"]], ": ", file.exists(cnOptions[["snprmaFile"]])) |
|
283 |
+## message(cnOptions[["protocolFile"]], ": ", file.exists(cnOptions[["protocolFile"]])) |
|
284 |
+##} |
|
285 |
+ |
|
286 |
+##crlmmCopynumber <- function(cnOptions, ...){ |
|
287 |
+##crlmmCopynumber <- function(object){ |
|
288 |
+## ops <- crlmmOptions(object) |
|
289 |
+## verbose <- ops$verbose |
|
290 |
+## calls <- snpCall(object) |
|
291 |
+## confs <- confs(object) |
|
292 |
+## fns <- featureNames(object) |
|
293 |
+## SNRmin <- ops$SNRMin |
|
294 |
+## batch <- object$batch |
|
295 |
+## whichBatch <- ops$cnOpts$whichBatch |
|
296 |
+## chromosome <- ops$cnOpts$chromosome |
|
297 |
+## MIN.SAMPLES <- ops$cnOpts$MIN.SAMPLES |
|
298 |
+## ##k <- grep("chr", colnames(snpProbes)) |
|
182 | 299 |
## for(CHR in chromosome){ |
300 |
+## ##annotated snp and cn probes |
|
301 |
+## ##snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
|
302 |
+## ##cns <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
303 |
+## ##where are the annotated snps in the fns file |
|
304 |
+## ##index.snps <- match(snps, fns) |
|
305 |
+## ##index.cn <- match(cns, fns) |
|
306 |
+## ##row.index <- c(index.snps, index.cn) |
|
183 | 307 |
## cat("Chromosome ", CHR, "\n") |
184 |
-## if(createIntermediateObjects){ |
|
185 |
-## snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
|
186 |
-## cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
187 |
-## index.snps <- match(snps, featureNames(callSet)) |
|
188 |
-## index.nps <- match(cnps, rownames(cnrmaResult[["NP"]])) |
|
189 |
-## if(!is.null(cnrmaResult)){ |
|
190 |
-## npI <- cnrmaResult$NP[index.nps, ] |
|
191 |
-## } else npI <- NULL |
|
192 |
-## snpI <- list(A=snprmaResult$A[index.snps, ], |
|
193 |
-## B=snprmaResult$B[index.snps, ], |
|
194 |
-## sns=snprmaResult$sns, |
|
195 |
-## gns=snprmaResult$gns[index.snps], |
|
196 |
-## SNR=snprmaResult$SNR, |
|
197 |
-## SKW=snprmaResult$SKW, |
|
198 |
-## mixtureParams=snprmaResult$mixtureParams, |
|
199 |
-## cdfName=snprmaResult$cdfName) |
|
200 |
-## cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
|
201 |
-## cnSet <- combineIntensities(res=snpI, |
|
202 |
-## NP=npI, |
|
203 |
-## callSet=callSet[index.snps, ]) |
|
204 |
-## if(any(cnSet$SNR > SNRmin)){ |
|
205 |
-## message(paste("Excluding samples with SNR < ", SNRmin)) |
|
206 |
-## cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
|
207 |
-## } |
|
208 |
-## rm(snpI, npI, snps, cnps, index.snps, index.nps); gc() |
|
209 |
-## pData(cnSet)$batch <- cnOptions[["batch"]] |
|
210 |
-## featureData(cnSet) <- lm.parameters(cnSet, cnOptions) |
|
211 |
-## } else { |
|
212 |
-## cnSet <- object |
|
213 |
-## } |
|
214 |
-## if(CHR != 24){ |
|
215 |
-## ## 60G for 1239 files |
|
216 |
-## cnSet <- computeCopynumber(cnSet, cnOptions) |
|
217 |
-## } else{ |
|
218 |
-## message("Copy number estimates not available for chromosome Y. Saving only the 'callSet' object for this chromosome") |
|
219 |
-## alleleSet <- cnSet |
|
220 |
-## save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep=""))) |
|
221 |
-## rm(cnSet, alleleSet); gc() |
|
222 |
-## next() |
|
223 |
-## } |
|
224 |
-## if(length(chromosome) == 1){ |
|
225 |
-## if(cnOptions[["save.cnset"]]){ |
|
226 |
-## save(cnSet, file=file.path(paste(cnFile, "_", CHR, ".rda", sep=""))) |
|
227 |
-## ##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
308 |
+## for(i in whichBatch){ |
|
309 |
+## PLATE <- unique(batch)[i] |
|
310 |
+## message("Plate: ", PLATE) |
|
311 |
+## sample.index <- which(batch==PLATE) |
|
312 |
+## if(length(sample.index) < MIN.SAMPLES) { |
|
313 |
+## warning("Plate ", PLATE, " has fewer than 10 samples. Skipping this plate.") |
|
314 |
+## next() |
|
228 | 315 |
## } |
229 |
-## } |
|
230 |
-## if(length(chromosome) > 1){ |
|
231 |
-## message(paste("Saving ", file.path(cnOptions[["outdir"]], paste(cnFile, "_", CHR, ".rda", sep="")))) |
|
232 |
-## save(cnSet, file=file.path(paste(cnFile, "_", CHR, ".rda", sep=""))) |
|
233 |
-## ##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
234 |
-## rm(cnSet); gc() |
|
235 |
-## } else { |
|
236 |
-## return(cnSet) |
|
237 |
-## } |
|
238 |
-## } |
|
239 |
-## saved.objects <- list.files(cnOptions[["outdir"]], pattern=cnFile, full.names=TRUE) |
|
240 |
-## return(saved.objects) |
|
316 |
+## ##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
|
317 |
+#### if(isPackageLoaded("ff")){ |
|
318 |
+#### ca <- as.matrix(CA[row.index, sample.index]) |
|
319 |
+#### cb <- as.matrix(CB[row.index, sample.index]) |
|
320 |
+#### } else{ |
|
321 |
+#### dns <- dimnames(A[row.index, sample.index]) |
|
322 |
+#### cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index), dimnames=dns) |
|
323 |
+#### } |
|
324 |
+#### cnSet <- new("CNSet", |
|
325 |
+#### alleleA=as.matrix(A[row.index, sample.index]), |
|
326 |
+#### alleleB=as.matrix(B[row.index, sample.index]), |
|
327 |
+#### call=as.matrix(calls[row.index, sample.index]), |
|
328 |
+#### callProbability=as.matrix(confs[row.index, sample.index]), |
|
329 |
+#### CA=ca, |
|
330 |
+#### CB=cb, |
|
331 |
+#### featureData=annotatedDataFrameFrom(as.matrix(A[row.index, sample.index]), byrow=TRUE), |
|
332 |
+#### phenoData=pD[sample.index, ], |
|
333 |
+#### protocolData=protocoldata[sample.index, ]) |
|
334 |
+## ##Verify this is correct |
|
335 |
+#### annotation(cnSet) <- cnOptions[["cdfName"]] |
|
336 |
+#### featureNames(cnSet) <- fns[row.index] |
|
337 |
+## ##add chromosome, position, isSnp |
|
338 |
+#### cnSet <- annotate(cnSet) |
|
339 |
+#### if(any(cnSet$SNR > SNRmin)){ |
|
340 |
+#### if(CHR == chromosome[1]) message(paste("Excluding samples with SNR < ", SNRmin)) |
|
341 |
+#### cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
|
342 |
+#### } |
|
343 |
+#### featureData(cnSet) <- lm.parameters(cnSet, cnOptions) |
|
344 |
+## if(CHR > 23) next() |
|
345 |
+## cnSet <- computeCopynumber(object[chromosome(object) == CHR, sample.index]) |
|
346 |
+#### if(!isPackageLoaded("ff") & i == whichBatch[1]) cnParams <- initializeParamObject(list(featureNames(cnSet), unique(cnOptions[["batch"]])[whichBatch])) |
|
347 |
+#### if(!isPackageLoaded("ff")) { |
|
348 |
+#### row.index <- 1:nrow(cnSet) |
|
349 |
+#### } else { |
|
350 |
+#### ##Warning message: |
|
351 |
+#### ##In d[[1]] * d[[2]] : NAs produced by integer overflow |
|
352 |
+#### CA[row.index, sample.index] <- cnSet@assayData[["CA"]] |
|
353 |
+#### CB[row.index, sample.index] <- cnSet@assayData[["CB"]] |
|
354 |
+#### } |
|
355 |
+#### cnParams <- updateParams(cnParams, cnSet, row.index, batch=unique(batch)[i]) |
|
356 |
+## ## keep only chromosome, position, and isSnp |
|
357 |
+#### featureData(cnSet) <- featureData(cnSet)[, 1:3] |
|
358 |
+#### if(!isPackageLoaded("ff")){ |
|
359 |
+#### save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
360 |
+#### save(cnParams, file=paste(outdir, "cnParams_", PLATE, "_", CHR, ".rda", sep="")) |
|
361 |
+#### } |
|
362 |
+## } ## end of batch loop |
|
363 |
+## } ## end of chromosome loop |
|
364 |
+#### if(isPackageLoaded("ff")) { |
|
365 |
+#### close(cnParams) |
|
366 |
+#### close(A); close(B) |
|
367 |
+#### close(CA); close(CB) |
|
368 |
+#### save(CA, file=CAFile) |
|
369 |
+#### save(CB, file=CBFile) |
|
370 |
+#### close(calls); close(confs) |
|
371 |
+#### return() |
|
372 |
+#### } |
|
373 |
+## return(cnSet) |
|
241 | 374 |
##} |
242 | 375 |
|
243 | 376 |
|
244 | 377 |
|
245 |
-##crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
246 |
-## cnOpts <- cnOptions |
|
247 |
-## batchSize <- cnOptions[["crlmmBatchSize"]] |
|
248 |
-## batch <- cnOptions[["batch"]] |
|
249 |
-## outdir <- cnOptions[["outdir"]] |
|
250 |
-## ##if necessary, split plates in separate batches |
|
251 |
-## if(length(filenames) > batchSize){ |
|
252 |
-## L <- length(table(batch)) |
|
253 |
-## message("Processing the files in") |
|
254 |
-## batchList <- split(names(table(batch)), rep(1:L, each=10, length.out=L)) |
|
255 |
-## for(i in seq(along=batchList)){ |
|
256 |
-## index <- as.character(batch) %in% as.character(batchList[[i]]) |
|
257 |
-## fns <- filenames[index] |
|
258 |
-## cnOpts[["batch"]] <- cnOptions[["batch"]][index] |
|
259 |
-## cnOpts[["AFile"]] <- paste(outdir, "/", paste(cnOptions[["AFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
260 |
-## cnOpts[["BFile"]] <- paste(outdir, "/", paste(cnOptions[["BFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
261 |
-## cnOpts[["CAFile"]] <- paste(outdir, "/", paste(cnOptions[["CAFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
262 |
-##g cnOpts[["CBFile"]] <- paste(outdir, "/", paste(cnOptions[["CBFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
263 |
-## cnOpts[["callsFile"]] <- paste(outdir, "/", paste(cnOptions[["callsFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
264 |
-## cnOpts[["confsFile"]] <- paste(outdir, "/", paste(cnOptions[["confsFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
265 |
-## cnOpts[["snprmaFile"]] <- paste(outdir, "/", paste(cnOptions[["snprmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
266 |
-## cnOpts[["cnrmaFile"]] <- paste(outdir, "/", paste(cnOptions[["cnrmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
267 |
-## cnOpts[["cnFile"]] <- file.path(outdir, paste(cnOptions[["cnFile"]], i, sep="_crlmmBatch")) |
|
268 |
-## cnOpts[["rgFile"]] <- file.path(outdir, paste(cnOptions[["rgFile"]], i, sep="_crlmmBatch")) |
|
269 |
-## crlmmCopynumber.crlmmBatch(fns, cnOpts, object, ...) |
|
270 |
-## } |
|
271 |
-## } else { |
|
272 |
-## cnOpts[["AFile"]] <- paste(outdir, "/", cnOptions[["AFile"]], ".rda", sep="") |
|
273 |
-## cnOpts[["BFile"]] <- paste(outdir, "/", cnOptions[["BFile"]], ".rda", sep="") |
|
274 |
-## cnOpts[["callsFile"]] <- paste(outdir, "/", cnOptions[["callsFile"]], ".rda", sep="") |
|
275 |
-## cnOpts[["confsFile"]] <- paste(outdir, "/", cnOptions[["confsFile"]], ".rda", sep="") |
|
276 |
-## cnOpts[["snprmaFile"]] <- paste(outdir, "/",cnOptions[["snprmaFile"]], ".rda", sep="") |
|
277 |
-## cnOpts[["cnrmaFile"]] <- paste(outdir, "/", cnOptions[["cnrmaFile"]], ".rda", sep="") |
|
278 |
-## cnOpts[["cnFile"]] <- file.path(outdir, cnOptions[["cnFile"]]) |
|
279 |
-## cnOpts[["rgFile"]] <- file.path(outdir, cnOptions[["rgFile"]]) |
|
280 |
-## crlmmCopynumber.crlmmBatch(filenames, cnOpts, object, ...) |
|
281 |
-## } |
|
282 |
-##} |
|
283 | 378 |
|
284 | 379 |
|
285 |
-crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
286 |
- set.seed(cnOptions[["seed"]]) ##for reproducibility |
|
287 |
- if(!missing(object)){ |
|
288 |
- stopifnot(class(object) == "CNSet") |
|
289 |
- createIntermediateObjects <- FALSE |
|
290 |
- } else { |
|
291 |
- createIntermediateObjects <- TRUE |
|
292 |
- ## 33G for 1239 files |
|
293 |
- cnOptions[["save.it"]] <- TRUE |
|
294 |
- ##crlmmResults <- crlmmWrapper(filenames, cnOptions, ...) |
|
295 |
- crlmmWrapper(filenames, cnOptions, ...) |
|
296 |
- } |
|
297 |
- verbose <- cnOptions[["verbose"]] |
|
298 |
- load.it <- cnOptions[["load.it"]] |
|
299 |
- outdir <- cnOptions[["outdir"]] |
|
300 |
- snprmaFile <- cnOptions[["snprmaFile"]] |
|
301 |
- cnFile <- cnOptions[["cnFile"]] |
|
302 |
- cnrmaFile <- cnOptions[["cnrmaFile"]] |
|
303 |
- snprmaFile <- cnOptions[["snprmaFile"]] |
|
304 |
- callsFile <- cnOptions[["callsFile"]] |
|
305 |
- confsFile <- cnOptions[["confsFile"]] |
|
306 |
- AFile <- cnOptions[["AFile"]] |
|
307 |
- BFile <- cnOptions[["BFile"]] |
|
308 |
- CAFile <- cnOptions[["CAFile"]] |
|
309 |
- CBFile <- cnOptions[["CBFile"]] |
|
310 |
- path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep="")) |
|
311 |
- load(file.path(path, "snpProbes.rda")) |
|
312 |
- snpProbes <- get("snpProbes") |
|
313 |
- load(file.path(path, "cnProbes.rda")) |
|
314 |
- cnProbes <- get("cnProbes") |
|
315 |
- k <- grep("chr", colnames(snpProbes)) |
|
316 |
- if(verbose) message("Loading quantile-normalized intensities...") |
|
317 |
- ##cwd <- getwd() |
|
318 |
- ##setwd(dirname(snprmaFile[1])) |
|
319 |
- load(snprmaFile) |
|
320 |
- res <- get("res") |
|
321 |
- load(AFile) |
|
322 |
- if(isPackageLoaded("ff")) open(A) |
|
323 |
- load(BFile) |
|
324 |
- if(isPackageLoaded("ff")) open(B) |
|
325 |
- message("Loading genotype calls...") |
|
326 |
- load(callsFile) |
|
327 |
- calls <- get("calls") |
|
328 |
- if(isPackageLoaded("ff")) open(calls) |
|
329 |
- load(confsFile) |
|
330 |
- confs <- get("confs") |
|
331 |
- if(isPackageLoaded("ff")) open(confs) |
|
332 |
- ##fns <- rownames(A) |
|
333 |
- fns <- c(res[["gns"]], res[["cnnames"]]) |
|
334 |
- if(length(fns) != nrow(A)) stop("check featurenames. fns should be the rownames of A") |
|
335 |
- nr <- nrow(A); nc <- ncol(A) |
|
336 |
- if(isPackageLoaded("ff")){ |
|
337 |
- if(file.exists(CAFile)){ |
|
338 |
- load(CAFile) |
|
339 |
- if(is.null(dim(CA)) | !all(dim(CA) == dim(A))) { |
|
340 |
- unlink(CAFile) |
|
341 |
- CA <- initializeBigMatrix(nr, nc) |
|
342 |
- } |
|
343 |
- } else CA <- initializeBigMatrix(nr, nc) |
|
344 |
- if(file.exists(CBFile)){ |
|
345 |
- load(CBFile) |
|
346 |
- if(is.null(dim(CB)) | !all(dim(CB) == dim(A))) { |
|
347 |
- CB <- initializeBigMatrix(nr, nc) |
|
348 |
- } |
|
349 |
- } else CB <- initializeBigMatrix(nr, nc) |
|
350 |
- open(CA) |
|
351 |
- open(CB) |
|
352 |
- } |
|
353 |
- ##cnFile <- cnOptions[["cnFile"]] |
|
354 |
- chromosome <- cnOptions[["chromosome"]] |
|
355 |
- SNRmin <- cnOptions[["SNRmin"]] |
|
356 |
- ll <- 1 |
|
357 |
- scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate)) |
|
358 |
- rownames(scanDates) <- basename(rownames(scanDates)) |
|
359 |
- protocoldata <- new("AnnotatedDataFrame", |
|
360 |
- data=scanDates, |
|
361 |
- varMetadata=data.frame(labelDescription=colnames(scanDates), |
|
362 |
- row.names=colnames(scanDates))) |
|
363 |
- ##load(file.path(cnOptions[["outdir"]], "sampleStats.rda")) |
|
364 |
- sampleStats <- data.frame(SKW=res$SKW, |
|
365 |
- SNR=res$SNR, |
|
366 |
- mixtureParams=res$mixtureParams, |
|
367 |
- gender=res$gender, |
|
368 |
- batch=cnOptions[["batch"]]) |
|
369 |
- ##sampleStats, batch=cnOptions[["batch"]]) |
|
370 |
- pD <- new("AnnotatedDataFrame", |
|
371 |
- data=sampleStats, |
|
372 |
- varMetadata=data.frame(labelDescription=colnames(sampleStats))) |
|
373 |
- batch <- cnOptions[["batch"]] |
|
374 |
- if(isPackageLoaded("ff")){ |
|
375 |
-## featureDataFF <- ffdf(position=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"), |
|
376 |
-## chromosome=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"), |
|
377 |
-## isSnp=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"), |
|
378 |
-## row.names=fns) |
|
379 |
-## fD <- vector("list", length(chromosome)) |
|
380 |
- featureDataFF <- ff(dim=c(nrow(A), 17*length(unique(batch))+3), vmode="double", finalizer="close", |
|
381 |
- overwrite=TRUE) |
|
382 |
- } |
|
383 |
- for(CHR in chromosome){ |
|
384 |
- snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
|
385 |
- cns <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
386 |
- index.snps <- match(snps, fns) |
|
387 |
- index.cn <- match(cns, fns) |
|
388 |
- row.index <- c(index.snps, index.cn) |
|
389 |
- cat("Chromosome ", CHR, "\n") |
|
390 |
- if(isPackageLoaded("ff")){ |
|
391 |
- fD.batch <- vector("list", length(unique(batch))) |
|
392 |
- } |
|
393 |
- for(i in seq(along=unique(batch))){ |
|
394 |
- PLATE <- unique(batch)[i] |
|
395 |
- message("Plate: ", PLATE) |
|
396 |
- sample.index <- which(batch==PLATE) |
|
397 |
- ##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
|
398 |
- if(isPackageLoaded("ff")){ |
|
399 |
- ca <- CA[row.index, sample.index] |
|
400 |
- cb <- CB[row.index, sample.index] |
|
401 |
- } else{ |
|
402 |
- ##dns <- dimnames(A[row.index, sample.index]) |
|
403 |
- cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index))##, dimnames=dns) |
|
404 |
- } |
|
405 |
- cnSet <- new("CNSet", |
|
406 |
- alleleA=A[row.index, sample.index], |
|
407 |
- alleleB=B[row.index, sample.index], |
|
408 |
- call=calls[row.index, sample.index], |
|
409 |
- callProbability=confs[row.index, sample.index], |
|
410 |
- CA=ca, |
|
411 |
- CB=cb, |
|
412 |
- featureData=annotatedDataFrameFrom(A[row.index, sample.index], byrow=TRUE), |
|
413 |
- phenoData=pD[sample.index, ], |
|
414 |
- protocolData=protocoldata[sample.index, ]) |
|
415 |
- ##Verify this is correct |
|
416 |
- annotation(cnSet) <- cnOptions[["cdfName"]] |
|
417 |
- featureNames(cnSet) <- fns[row.index] |
|
418 |
- ##add chromosome, position, isSnp |
|
419 |
- cnSet <- annotate(cnSet) |
|
420 |
-## if(any(cnSet$SNR > SNRmin)){ |
|
421 |
-## if(CHR == chromosome[1]) message(paste("Excluding samples with SNR < ", SNRmin)) |
|
422 |
-## cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
|
423 |
-## } |
|
424 |
- featureData(cnSet) <- lm.parameters(cnSet, cnOptions) |
|
425 |
- if(CHR < 24){ |
|
426 |
- cnSet <- computeCopynumber(cnSet, cnOptions) |
|
427 |
- if(isPackageLoaded("ff")){ |
|
428 |
- if(i == 1) { ## first batch |
|
429 |
- featureDataFF[row.index, 1:20] <- as.matrix(fData(cnSet)) |
|
430 |
- fcol <- 20 |
|
431 |
- } else { |
|
432 |
- featureDataFF[row.index, (fcol+1):(fcol+17)] |
|
433 |
- fcol <- fcol+17 |
|
434 |
- ##remove redundant columns |
|
435 |
- } |
|
436 |
- CA[row.index, sample.index] <- cnSet@assayData[["CA"]] |
|
437 |
- CB[row.index, sample.index] <- cnSet@assayData[["CB"]] |
|
438 |
- next() ## next batch |
|
439 |
- } else { |
|
440 |
- save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
441 |
- } |
|
442 |
- } else break() ## break out of the batch loop. Go to next chromosome |
|
443 |
- ## if(length(chromosome) == 1 & length(unique(batch)) == 1){ |
|
444 |
- ## if(cnOptions[["save.cnset"]] & !isLoadedPackage("ff")) |
|
445 |
- ## save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
446 |
- ## return(cnSet) |
|
447 |
- ## } else { |
|
448 |
- ## ##either multiple chromosome or multiple batches...save to file if ff is not loaded |
|
449 |
- ## if(!isPackageLoaded("ff")){ |
|
450 |
- ## save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
451 |
- ## } |
|
452 |
- ## rm(cnSet); gc() |
|
453 |
- ## } |
|
454 |
- } ## end of batch loop |
|
455 |
- } ## end of chromosome loop |
|
456 |
- if(isPackageLoaded("ff")) { |
|
457 |
- close(A); close(B) |
|
458 |
- close(featureDataFF) |
|
459 |
- close(CA); close(CB) |
|
460 |
- save(featureDataFF, file=file.path(outdir, "featureDataFF.rda")) |
|
461 |
- save(CA, file=CAFile) |
|
462 |
- save(CB, file=CBFile) |
|
463 |
- close(calls); close(confs) |
|
464 |
- ffSet <- new("FFSet", |
|
465 |
- A=A, |
|
466 |
- B=B, |
|
467 |
- snpCall=calls, |
|
468 |
- callProbability=confs, |
|
469 |
- CA=CA, |
|
470 |
- CB=CB) |
|
471 |
- return(ffSet) |
|
472 |
- } |
|
473 |
-} |
|
474 | 380 |
|
475 | 381 |
|
476 |
-loadIlluminaRG <- function(rgFile, crlmmFile, load.it, save.it,...){ |
|
477 |
-## if(missing(rgFile)){ |
|
478 |
-## ##stop("must specify 'rgFile'.") |
|
479 |
-## rgFile <- file.path(dirname(crlmmFile), "rgFile.rda") |
|
480 |
-## message("rgFile not specified. Using ", rgFile) |
|
382 |
+##loadIlluminaRG <- function(rgFile, crlmmFile, load.it, save.it,...){ |
|
383 |
+#### if(missing(rgFile)){ |
|
384 |
+#### ##stop("must specify 'rgFile'.") |
|
385 |
+#### rgFile <- file.path(dirname(crlmmFile), "rgFile.rda") |
|
386 |
+#### message("rgFile not specified. Using ", rgFile) |
|
387 |
+#### } |
|
388 |
+## if(!load.it){ |
|
389 |
+## RG <- readIdatFiles(...) |
|
390 |
+## if(save.it) save(RG, file=rgFile) |
|
481 | 391 |
## } |
482 |
- if(!load.it){ |
|
483 |
- RG <- readIdatFiles(...) |
|
484 |
- if(save.it) save(RG, file=rgFile) |
|
485 |
- } |
|
486 |
- if(load.it & !file.exists(rgFile)){ |
|
487 |
- message("load.it is TRUE, but rgFile not present. Attempting to read the idatFiles.") |
|
488 |
- RG <- readIdatFiles(...) |
|
489 |
- if(save.it) save(RG, file=rgFile) |
|
490 |
- } |
|
491 |
- if(load.it & file.exists(rgFile)){ |
|
492 |
- message("Loading RG file") |
|
493 |
- load(rgFile) |
|
494 |
- RG <- get("RG") |
|
495 |
- } |
|
496 |
- return(RG) |
|
497 |
-} |
|
498 |
- |
|
499 |
-loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdfName){ |
|
500 |
- if(!file.exists(crlmmFile) | !load.it){ |
|
501 |
- callSet <- crlmmIllumina(RG=RG, |
|
502 |
- cdfName=cdfName, |
|
503 |
- sns=sampleNames(RG), |
|
504 |
- returnParams=TRUE, |
|
505 |
- save.it=TRUE, |
|
506 |
- intensityFile=snprmaFile) |
|
507 |
- if(save.it) save(callSet, file=crlmmFile) |
|
508 |
- } else { |
|
509 |
- message("Loading ", crlmmFile, "...") |
|
510 |
- load(crlmmFile) |
|
511 |
- callSet <- get("callSet") |
|
512 |
- } |
|
513 |
- protocolData(callSet) <- protocolData(RG) |
|
514 |
- return(callSet) |
|
515 |
-} |
|
392 |
+## if(load.it & !file.exists(rgFile)){ |
|
393 |
+## message("load.it is TRUE, but rgFile not present. Attempting to read the idatFiles.") |
|
394 |
+## RG <- readIdatFiles(...) |
|
395 |
+## if(save.it) save(RG, file=rgFile) |
|
396 |
+## } |
|
397 |
+## if(load.it & file.exists(rgFile)){ |
|
398 |
+## message("Loading RG file") |
|
399 |
+## load(rgFile) |
|
400 |
+## RG <- get("RG") |
|
401 |
+## } |
|
402 |
+## return(RG) |
|
403 |
+##} |
|
404 |
+## |
|
405 |
+##loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdfName){ |
|
406 |
+## if(!file.exists(crlmmFile) | !load.it){ |
|
407 |
+## callSet <- crlmmIllumina(RG=RG, |
|
408 |
+## cdfName=cdfName, |
|
409 |
+## sns=sampleNames(RG), |
|
410 |
+## returnParams=TRUE, |
|
411 |
+## save.it=TRUE, |
|
412 |
+## intensityFile=snprmaFile) |
|
413 |
+## if(save.it) save(callSet, file=crlmmFile) |
|
414 |
+## } else { |
|
415 |
+## message("Loading ", crlmmFile, "...") |
|
416 |
+## load(crlmmFile) |
|
417 |
+## callSet <- get("callSet") |
|
418 |
+## } |
|
419 |
+## protocolData(callSet) <- protocolData(RG) |
|
420 |
+## return(callSet) |
|
421 |
+##} |
|
516 | 422 |
|
517 | 423 |
|
518 | 424 |
##loadAffyCallSet <- function(filenames, confsFile, callsFile, snprmaFile, load.it, save.it, cdfName){ |
... | ... |
@@ -544,109 +450,121 @@ loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdf |
544 | 450 |
## return(cnrmaResult) |
545 | 451 |
##} |
546 | 452 |
|
547 |
-loadIlluminaCnrma <- function(){ |
|
548 |
- if(exists("cnAB")){ |
|
549 |
- np.A <- as.integer(cnAB$A) |
|
550 |
- np.B <- as.integer(cnAB$B) |
|
551 |
- np <- ifelse(np.A > np.B, np.A, np.B) |
|
552 |
- np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A)) |
|
553 |
- rownames(np) <- cnAB$gns |
|
554 |
- colnames(np) <- cnAB$sns |
|
555 |
- cnAB$NP <- np |
|
556 |
- ##sampleNames(callSet) <- res$sns |
|
557 |
- sampleNames(callSet) <- cnAB$sns |
|
558 |
- cnrmaResult <- get("cnAB") |
|
559 |
- } else cnrmaResult <- NULL |
|
560 |
- return(cnrmaResult) |
|
561 |
-} |
|
562 |
- |
|
563 |
-crlmmWrapper <- function(filenames, cnOptions, ...){ |
|
564 |
- crlmmBatchSize <- cnOptions[["crlmmBatchSize"]] |
|
565 |
- cdfName <- cnOptions[["cdfName"]] |
|
566 |
- load.it <- cnOptions[["load.it"]] |
|
567 |
- save.it <- cnOptions[["save.it"]] |
|
568 |
- callsFile <- cnOptions[["callsFile"]] |
|
569 |
- confsFile <- cnOptions[["confsFile"]] |
|
570 |
- AFile=cnOptions[["AFile"]] |
|
571 |
- BFile=cnOptions[["BFile"]] |
|
572 |
- snprmaFile=cnOptions[["snprmaFile"]] |
|
573 |
- cnrmaFile=cnOptions[["cnrmaFile"]] |
|
574 |
- rgFile=cnOptions[["rgFile"]] |
|
575 |
- outdir <- cnOptions[["outdir"]] |
|
576 |
- if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required. See crlmm:::validCdfNames()") |
|
577 |
- platform <- whichPlatform(cdfName) |
|
578 |
- if(!(platform %in% c("affymetrix", "illumina"))){ |
|
579 |
- stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.") |
|
580 |
- } else { |
|
581 |
- message("Checking whether annotation package for the ", platform, " platform is available") |
|
582 |
- if(!isValidCdfName(cdfName)){ |
|
583 |
- cat("FALSE\n") |
|
584 |
- stop(cdfName, " is not a valid entry. See crlmm:::validCdfNames(platform)") |
|
585 |
- } else cat("TRUE\n") |
|
586 |
- } |
|
587 |
- if(platform == "illumina") RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it) |
|
588 |
- if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.") |
|
589 |
- if(!(file.exists(dirname(snprmaFile)))) stop(dirname(snprmaFile), " does not exist.") |
|
590 |
-## if(load.it){ |
|
591 |
-## if(!file.exists(callsFile)){ |
|
592 |
-## message("load.it is TRUE, but ", callsFile, " does not exist. Rerunning the genotype calling algorithm") |
|
593 |
-## ##load.it <- FALSE |
|
594 |
-## } |
|
453 |
+##loadIlluminaCnrma <- function(){ |
|
454 |
+## if(exists("cnAB")){ |
|
455 |
+## np.A <- as.integer(cnAB$A) |
|
456 |
+## np.B <- as.integer(cnAB$B) |
|
457 |
+## np <- ifelse(np.A > np.B, np.A, np.B) |
|
458 |
+## np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A)) |
|
459 |
+## rownames(np) <- cnAB$gns |
|
460 |
+## colnames(np) <- cnAB$sns |
|
461 |
+## cnAB$NP <- np |
|
462 |
+## ##sampleNames(callSet) <- res$sns |
|
463 |
+## sampleNames(callSet) <- cnAB$sns |
|
464 |
+## cnrmaResult <- get("cnAB") |
|
465 |
+## } else cnrmaResult <- NULL |
|
466 |
+## return(cnrmaResult) |
|
467 |
+##} |
|
468 |
+## |
|
469 |
+##crlmmWrapper <- function(filenames, cnOptions, ...){ |
|
470 |
+## crlmmBatchSize <- cnOptions[["crlmmBatchSize"]] |
|
471 |
+## cdfName <- cnOptions[["cdfName"]] |
|
472 |
+## load.it <- cnOptions[["load.it"]] |
|
473 |
+## save.it <- cnOptions[["save.it"]] |
|
474 |
+## callsFile <- cnOptions[["callsFile"]] |
|
475 |
+## confsFile <- cnOptions[["confsFile"]] |
|
476 |
+## AFile=cnOptions[["AFile"]] |
|
477 |
+## BFile=cnOptions[["BFile"]] |
|
478 |
+## snprmaFile=cnOptions[["snprmaFile"]] |
|
479 |
+## cnrmaFile=cnOptions[["cnrmaFile"]] |
|
480 |
+## rgFile=cnOptions[["rgFile"]] |
|
481 |
+## protocolFile <- cnOptions[["protocolFile"]] |
|
482 |
+## outdir <- cnOptions[["outdir"]] |
|
483 |
+## if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required. See crlmm:::validCdfNames()") |
|
484 |
+## platform <- whichPlatform(cdfName) |
|
485 |
+## if(!(platform %in% c("affymetrix", "illumina"))){ |
|
486 |
+## stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.") |
|
487 |
+## } else { |
|
488 |
+## if(!isValidCdfName(cdfName)){ |
|
489 |
+## stop(cdfName, " is not a valid entry. See crlmm:::validCdfNames(platform)") |
|
490 |
+## } else message("Using the annotation package ", cdfName, " for this ", platform, " platform") |
|
595 | 491 |
## } |
596 |
- if(platform == "affymetrix"){ |
|
597 |
-## if(!file.exists(callsFile) | !load.it){ |
|
598 |
- crlmm(filenames=filenames, |
|
599 |
- cdfName=cdfName, |
|
600 |
- save.it=TRUE, |
|
601 |
- load.it=load.it, |
|
602 |
- snprmaFile=snprmaFile, |
|
603 |
- callsFile=callsFile, |
|
604 |
- confsFile=confsFile, |
|
605 |
- AFile=AFile, |
|
606 |
- BFile=BFile, |
|
607 |
- crlmmBatchSize=crlmmBatchSize) |
|
608 |
-## } |
|
609 |
- } |
|
610 |
- gc() |
|
611 |
- if(platform == "illumina") { |
|
612 |
- browser() |
|
613 |
- callSet <- loadIlluminaCallSet(callsFile, snprmaFile, RG, load.it, save.it, cdfName) |
|
614 |
- } |
|
615 |
- if(platform == "affymetrix"){ |
|
616 |
- if(!file.exists(cnrmaFile) | !load.it){ |
|
617 |
- message("Quantile normalizing the copy number probes...") |
|
618 |
- ## updates A matrix and saves cnrmaFile |
|
619 |
- cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir, cnrmaFile=cnrmaFile, AFile=AFile) |
|
620 |
- } |
|
621 |
- } |
|
622 |
-## if(!is.null(cnrmaResult)){ |
|
623 |
-## for(CHR in chromosome){ |
|
624 |
-## cat(CHR, " ") |
|
625 |
-## cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
626 |
-## index.nps <- match(cnps, rownames(cnrmaResult[["NP"]])) |
|
627 |
-## NP <- cnrmaResult$NP[index.nps, ] |
|
628 |
-## save(NP, file=file.path(tmpdir, paste("NP_", CHR, ".rda", sep=""))) |
|
629 |
-## rm(NP); gc() |
|
492 |
+## if(platform == "illumina") { |
|
493 |
+## if(!file.exists(rgFile)){ |
|
494 |
+## if(load.it) message(rgFile, " does not exist and you chose to load.it. Re-reading the R and G intensities from the IDAT files") |
|
495 |
+## sampleSheet <- cnOptions$sampleSheet |
|
496 |
+## ids <- cnOptions$ids |
|
497 |
+## arrayInfoColNames <- cnOptions$arrayInfoColNames |
|
498 |
+## highDensity <- cnOptions$highDensity |
|
499 |
+## ##this is either an NChannelSet object, or a list of pointers to the ff objects |
|
500 |
+## RG <- readIdatFiles(sampleSheet=sampleSheet, |
|
501 |
+## arrayNames=basename(filenames), |
|
502 |
+## ids=ids, |
|
503 |
+## path=dirname(filenames), |
|
504 |
+## highDensity=highDensity, |
|
505 |
+## fileExt=cnOptions$fileExt[1:2], |
|
506 |
+## sep=cnOptions$fileExt[[3]], |
|
507 |
+## saveDate=FALSE, ## I do this earlier |
|
508 |
+## verbose=cnOptions[["verbose"]], |
|
509 |
+## protocolFile=protocolFile) |
|
510 |
+## if(save.it) save(RG, file=rgFile) |
|
511 |
+## ##RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it) |
|
512 |
+## } else{ |
|
513 |
+## if(!isPackageLoaded("ff")) {load(rgFile); RG <- get("RG")} |
|
630 | 514 |
## } |
631 | 515 |
## } |
632 |
- if(!save.it){ |
|
633 |
- message("Cleaning up") |
|
634 |
- unlink(snprmaFile); unlink(cnrmaFile) |
|
635 |
- } |
|
636 |
-} |
|
637 |
- |
|
638 |
- |
|
516 |
+## if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.") |
|
517 |
+## if(!(file.exists(dirname(snprmaFile)))) stop(dirname(snprmaFile), " does not exist.") |
|
518 |
+## if(platform == "affymetrix"){ |
|
519 |
+## crlmm(filenames=filenames, |
|
520 |
+## cdfName=cdfName, |
|
521 |
+## save.it=TRUE, |
|
522 |
+## load.it=load.it, |
|
523 |
+## snprmaFile=snprmaFile, |
|
524 |
+## callsFile=callsFile, |
|
525 |
+## confsFile=confsFile, |
|
526 |
+## AFile=AFile, |
|
527 |
+## BFile=BFile, |
|
528 |
+## crlmmBatchSize=crlmmBatchSize, |
|
529 |
+## SNRMin=cnOptions[["SNRMin"]]) |
|
530 |
+## } |
|
531 |
+## gc() |
|
532 |
+## if(platform == "illumina") { |
|
533 |
+## callSet <- crlmmIllumina(RG=RG, |
|
534 |
+## cdfName=cdfName, |
|
535 |
+## sns=sampleNames(RG), |
|
536 |
+## returnParams=TRUE, |
|
537 |
+## save.it=TRUE, |
|
538 |
+## snprmaFile=snprmaFile, |
|
539 |
+## callsFile=callsFile, |
|
540 |
+## confsFile=confsFile, |
|
541 |
+## AFile=AFile, |
|
542 |
+## BFile=BFile) |
|
543 |
+## ##callSet <- loadIlluminaCallSet(callsFile, snprmaFile, RG, load.it, save.it, cdfName) |
|
544 |
+## } |
|
545 |
+## if(platform == "affymetrix"){ |
|
546 |
+## if(!file.exists(cnrmaFile) | !load.it){ |
|
547 |
+## message("Quantile normalizing the copy number probes...") |
|
548 |
+## ## updates A matrix and saves cnrmaFile |
|
549 |
+## cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir, verbose=cnOptions[["verbose"]], cnrmaFile=cnrmaFile, AFile=AFile, snprmaFile=snprmaFile) |
|
550 |
+## } |
|
551 |
+## } |
|
552 |
+#### if(!is.null(cnrmaResult)){ |
|
553 |
+#### for(CHR in chromosome){ |
|
554 |
+#### cat(CHR, " ") |
|
555 |
+#### cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
556 |
+#### index.nps <- match(cnps, rownames(cnrmaResult[["NP"]])) |
|
557 |
+#### NP <- cnrmaResult$NP[index.nps, ] |
|
558 |
+#### save(NP, file=file.path(tmpdir, paste("NP_", CHR, ".rda", sep=""))) |
|
559 |
+#### rm(NP); gc() |
|
560 |
+#### } |
|
561 |
+#### } |
|
562 |
+## if(!save.it){ |
|
563 |
+## message("Cleaning up") |
|
564 |
+## unlink(snprmaFile); unlink(cnrmaFile) |
|
565 |
+## } |
|
566 |
+##} |
|
639 | 567 |
|
640 |
-whichPlatform <- function(cdfName){ |
|
641 |
- index <- grep("genomewidesnp", cdfName) |
|
642 |
- if(length(index) > 0){ |
|
643 |
- platform <- "affymetrix" |
|
644 |
- } else{ |
|
645 |
- index <- grep("human", cdfName) |
|
646 |
- platform <- "illumina" |
|
647 |
- } |
|
648 |
- return(platform) |
|
649 |
-} |
|
650 | 568 |
|
651 | 569 |
|
652 | 570 |
# steps: quantile normalize hapmap: create 1m_reference_cn.rda object |
... | ... |
@@ -693,25 +611,25 @@ whichPlatform <- function(cdfName){ |
693 | 611 |
## return(res3) |
694 | 612 |
##} |
695 | 613 |
|
696 |
-cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir, cnrmaFile, AFile){ |
|
614 |
+cnrma <- function(object, filenames){ |
|
615 |
+ ops <- crlmmOptions(object) |
|
616 |
+ cdfName <- annotation(object) |
|
617 |
+ seed <- ops$seed |
|
618 |
+ verbose <- ops$verbose |
|
619 |
+ ##cnrmaFile <- ops$cnrmaFile |
|
620 |
+ A <- A(object) |
|
697 | 621 |
if(missing(cdfName)) stop("must specify cdfName") |
698 | 622 |
pkgname <- getCrlmmAnnotationName(cdfName) |
699 | 623 |
require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
700 |
- if (missing(sns)) sns <- basename(filenames) |
|
624 |
+ sns <- basename(filenames) |
|
701 | 625 |
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
702 | 626 |
fid <- getVarInEnv("npProbesFid") |
703 | 627 |
set.seed(seed) |
704 | 628 |
idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
705 | 629 |
SKW <- vector("numeric", length(filenames)) |
706 |
- load(AFile) |
|
707 |
- if(isPackageLoaded("ff")) open(A) |
|
708 |
- path <- system.file("extdata", package=pkgname) |
|
709 |
- load(file.path(path, "cnProbes.rda")) |
|
710 |
- cnProbes <- get("cnProbes") |
|
711 |
- cnps <- rownames(cnProbes) |
|
712 |
- cnps <- cnps[cnps %in% rownames(A)] |
|
713 |
- index <- match(cnps, rownames(A), nomatch=0) |
|
714 |
- index <- index[index != 0] |
|
630 |
+ index <- match(names(fid), featureNames(object)) |
|
631 |
+ stopifnot(identical(featureNames(object)[index], names(fid))) |
|
632 |
+ if(length(index) < 1) stop("None of the names for the nonpolymorphic probes in the annotation package match the names stored in the snprmaFile.") |
|
715 | 633 |
if(verbose){ |
716 | 634 |
message("Processing ", length(filenames), " files.") |
717 | 635 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
... | ... |
@@ -723,26 +641,21 @@ cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir, cnrmaF |
723 | 641 |
loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname) |
724 | 642 |
} |
725 | 643 |
reference <- getVarInEnv("reference") |
726 |
- ##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.") |
|
727 | 644 |
for(i in seq(along=filenames)){ |
728 | 645 |
y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
729 | 646 |
x <- log2(y[idx2]) |
730 | 647 |
SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
731 | 648 |
rm(x); gc() |
732 | 649 |
A[index, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
733 |
- ##NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
734 | 650 |
if (verbose) |
735 | 651 |
if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
736 | 652 |
else cat(".") |
737 | 653 |
rm(y); gc() |
738 | 654 |
} |
739 |
- message("Done") |
|
740 |
- cnrmaResults <- list(SKW=SKW) |
|
741 |
- save(cnrmaResults, file=cnrmaFile) |
|
742 |
- if(isPackageLoaded("ff")) close(A) |
|
743 |
- save(A, file=AFile) |
|
744 |
- rm(list=ls()); gc() |
|
745 |
- return() |
|
655 |
+ cat("\nDone\n") |
|
656 |
+ pData(object)$SKW_nonpolymorphic <- SKW |
|
657 |
+ object@assayData[["alleleA"]] <- A |
|
658 |
+ return(object) |
|
746 | 659 |
} |
747 | 660 |
|
748 | 661 |
getFlags <- function(object, PHI.THR){ |
... | ... |
@@ -760,7 +673,7 @@ getFlags <- function(object, PHI.THR){ |
760 | 673 |
} |
761 | 674 |
|
762 | 675 |
|
763 |
-instantiateObjects <- function(object, cnOptions){ |
|
676 |
+instantiateObjects <- function(object){ |
|
764 | 677 |
Ns <- matrix(NA, nrow(object), 5) |
765 | 678 |
colnames(Ns) <- c("A", "B", "AA", "AB", "BB") |
766 | 679 |
vA <- vB <- muB <- muA <- Ns |
... | ... |
@@ -787,149 +700,38 @@ thresholdCopynumber <- function(object){ |
787 | 700 |
return(object) |
788 | 701 |
} |
789 | 702 |
|
790 |
-##preprocessOptions <- function(callsFile="snpsetObject.rda", |
|
791 |
-## snprmaFile="normalizedIntensities.rda", |
|
792 |
-## rgFile="rgFile.rda"){ |
|
793 |
-## |
|
794 |
-##} |
|
795 |
- |
|
796 |
-cnOptions <- function(outdir="./", |
|
797 |
- cdfName, |
|
798 |
- AFile="A.rda", |
|
799 |
- BFile="B.rda", |
|
800 |
- CAFile="CA.rda", |
|
801 |
- CBFile="CB.rda", |
|
802 |
- callsFile="genotypes.rda", |
|
803 |
- rgFile="rgFile.rda", |
|
804 |
- cnFile="cnSet", |
|
805 |
- cnrmaFile="cn_rmaResult.rda", |
|
806 |
- ##npBinary="NP.bin", ##name of file.backed.big.matrix |
|
807 |
- ##npDesc="NP.desc", |
|
808 |
- snprmaFile="snp_rmaResult.rda", |
|
809 |
- ##callsFile="calls.desc", |
|
810 |
- ##confsFile="confsFile.desc", |
|
811 |
- confsFile="confs.rda", |
|
812 |
- save.it=TRUE, |
|
813 |
- save.cnset=TRUE, |
|
814 |
- load.it=TRUE, |
|
815 |
- MIN.OBS=3, |
|
816 |
- MIN.SAMPLES=10, |
|
817 |
- batch=NULL, |
|
818 |
- DF.PRIOR=50, |
|
819 |
- bias.adj=FALSE, |
|
820 |
- prior.prob=rep(1/4, 4), |
|
821 |
- SNRmin=4, |
|
822 |
- chromosome=1:24, |
|
823 |
- seed=123, |
|
824 |
- verbose=TRUE, |
|
825 |
- GT.CONF.THR=0.99, |
|
826 |
- PHI.THR=2^6,##used in nonpolymorphic fxn for training |
|
827 |
- nHOM.THR=5, ##used in nonpolymorphic fxn for training |
|
828 |
- MIN.NU=2^3, |
|
829 |
- MIN.PHI=2^3, |
|
830 |
- THR.NU.PHI=TRUE, |
|
831 |
- thresholdCopynumber=TRUE, |
|
832 |
- unlink=TRUE, |
|
833 |
- use.poe=FALSE, |
|
834 |
- crlmmBatchSize=1000, |
|
835 |
- ...){ |
|
836 |
-## if(isPackageLoaded("ff")){ |
|
837 |
-## AFile=paste(AFile, "ff", sep="_") |
|
838 |
-## BFile=paste(BFile, "ff", sep="_") |
|
839 |
-## confsFile=paste(confsFile, "ff", sep="_") |
|
840 |
-## callsFile=paste(callsFile, "ff", sep="_") |
|
841 |
-## cnFile=paste(cnFile, "ff", sep="") |
|
842 |
-## } |
|
843 |
- if(length(batch) > 200) warning("This job may require a lot of RAM. Consider using the ff package in conjuction with crlmm, as described in the copynumber_ff vignette...") |
|
844 |
- if(use.poe) require(metaArray) |
|
845 |
- if(missing(cdfName)) stop("must specify cdfName") |
|
846 |
- if(!file.exists(outdir)){ |
|
847 |
- message(outdir, " does not exist. Trying to create it.") |
|
848 |
- dir.create(outdir, recursive=TRUE) |
|
849 |
- } |
|
850 |
- stopifnot(isValidCdfName(cdfName)) |
|
851 |
-## if(hiddenMarkovModel){ |
|
852 |
-## hmmOpts <- hmmOptions(...) |
|
853 |
-## } |
|
854 |
-## if(missing(snprmaFile)){ |
|
855 |
-## snprmaFile <- file.path(outdir, "normalizedIntensities.rda") |
|
856 |
-## } |
|
857 |
- if(is.null(batch)) |
|
858 |
- stop("must specify batch -- should be the same length as the number of files") |
|
859 |
- list(outdir=outdir, |
|
860 |
- cdfName=cdfName, |
|
861 |
- callsFile=file.path(outdir, callsFile), |
|
862 |
- confsFile=file.path(outdir, confsFile), |
|
863 |
- AFile=file.path(outdir, AFile), |
|
864 |
- BFile=file.path(outdir, BFile), |
|
865 |
- CAFile=file.path(outdir, CAFile), |
|
866 |
- CBFile=file.path(outdir, CBFile), |
|
867 |
- snprmaFile=file.path(outdir, snprmaFile), |
|
868 |
- cnrmaFile=file.path(outdir, cnrmaFile), |
|
869 |
- rgFile=file.path(outdir, rgFile), |
|
870 |
- cnFile=file.path(outdir, cnFile), |
|
871 |
- save.it=save.it, |
|
872 |
- save.cnset=save.cnset, |
|
873 |
- load.it=load.it, |
|
874 |
- MIN.OBS=MIN.OBS, |
|
875 |
- MIN.SAMPLES=MIN.SAMPLES, |
|
876 |
- batch=batch, |
|
877 |
- DF.PRIOR=DF.PRIOR, |
|
878 |
- GT.CONF.THR=GT.CONF.THR, |
|
879 |
- prior.prob=prior.prob, |
|
880 |
- bias.adj=bias.adj, |
|
881 |
- SNRmin=SNRmin, |
|
882 |
- chromosome=chromosome, |
|
883 |
- seed=seed, |
|
884 |
- verbose=verbose, |
|
885 |
- PHI.THR=PHI.THR, |
|
886 |
- nHOM.THR=nHOM.THR, |
|
887 |
- MIN.NU=MIN.NU, |
|
888 |
- MIN.PHI=MIN.PHI, |
|
889 |
- THR.NU.PHI=THR.NU.PHI, |
|
890 |
- thresholdCopynumber=thresholdCopynumber, |
|
891 |
- unlink=unlink, |
|
892 |
- use.poe=use.poe, |
|
893 |
- crlmmBatchSize=crlmmBatchSize) |
|
894 |
-## use.bigmemory=use.bigmemory) |
|
895 |
-## hiddenMarkovModel=hiddenMarkovModel, |
|
896 |
-## circularBinarySegmentation=circularBinarySegmentation, |
|
897 |
-## cbsOpts=cbsOpts, |
|
898 |
-## hmmOpts=hmmOpts) ##remove SnpSuperSet object |
|
899 |
-} |
|
900 |
- |
|
901 | 703 |
##linear model parameters |
902 |
-lm.parameters <- function(object, cnOptions){ |
|
903 |
- fD <- fData(object) |
|
904 |
- batch <- object$batch |
|
905 |
- uplate <- unique(batch) |
|
906 |
- parameterNames <- c(paste("tau2A", uplate, sep="_"), |
|
907 |
- paste("tau2B", uplate, sep="_"), |
|
908 |
- paste("sig2A", uplate, sep="_"), |
|
909 |
- paste("sig2B", uplate, sep="_"), |
|
910 |
- paste("nuA", uplate, sep="_"), |
|
911 |
- paste("nuA.se", uplate, sep="_"), |
|
912 |
- paste("nuB", uplate, sep="_"), |
|
913 |
- paste("nuB.se", uplate, sep="_"), |
|
914 |
- paste("phiA", uplate, sep="_"), |
|
915 |
- paste("phiA.se", uplate, sep="_"), |
|
916 |
- paste("phiB", uplate, sep="_"), |
|
917 |
- paste("phiB.se", uplate, sep="_"), |
|
918 |
- paste("phiAX", uplate, sep="_"), |
|
919 |
- paste("phiBX", uplate, sep="_"), |
|
920 |
- paste("corr", uplate, sep="_"), |
|
921 |
- paste("corrA.BB", uplate, sep="_"), |
|
922 |
- paste("corrB.AA", uplate, sep="_")) |
|
923 |
- pMatrix <- data.frame(matrix(numeric(0), |
|
924 |
- nrow(object), |
|
925 |
- length(parameterNames)), |
|
926 |
- row.names=featureNames(object)) |
|
927 |
- colnames(pMatrix) <- parameterNames |
|
928 |
- fD2 <- cbind(fD, pMatrix) |
|
929 |
- new("AnnotatedDataFrame", data=fD2, |
|
930 |
- varMetadata=data.frame(labelDescription=colnames(fD2), |
|
931 |
- row.names=colnames(fD2))) |
|
932 |
-} |
|
704 |
+##lm.parameters <- function(object, cnOptions){ |
|
705 |
+## fD <- fData(object) |
|
706 |
+## batch <- object$batch |
|
707 |
+## uplate <- unique(batch) |
|
708 |
+## parameterNames <- c(paste("tau2A", uplate, sep="_"), |
|
709 |
+## paste("tau2B", uplate, sep="_"), |
|
710 |
+## paste("sig2A", uplate, sep="_"), |
|
711 |
+## paste("sig2B", uplate, sep="_"), |
|
712 |
+## paste("nuA", uplate, sep="_"), |
|
713 |
+## paste("nuA.se", uplate, sep="_"), |
|
714 |
+## paste("nuB", uplate, sep="_"), |
|
715 |
+## paste("nuB.se", uplate, sep="_"), |
|
716 |
+## paste("phiA", uplate, sep="_"), |
|
717 |
+## paste("phiA.se", uplate, sep="_"), |
|
718 |
+## paste("phiB", uplate, sep="_"), |
|
719 |
+## paste("phiB.se", uplate, sep="_"), |
|
720 |
+## paste("phiAX", uplate, sep="_"), |
|
721 |
+## paste("phiBX", uplate, sep="_"), |
|
722 |
+## paste("corr", uplate, sep="_"), |
|
723 |
+## paste("corrA.BB", uplate, sep="_"), |
|
724 |
+## paste("corrB.AA", uplate, sep="_")) |
|
725 |
+## pMatrix <- data.frame(matrix(numeric(0), |
|
726 |
+## nrow(object), |
|
727 |
+## length(parameterNames)), |
|
728 |
+## row.names=featureNames(object)) |
|
729 |
+## colnames(pMatrix) <- parameterNames |
|
730 |
+## fD2 <- cbind(fD, pMatrix) |
|
731 |
+## new("AnnotatedDataFrame", data=fD2, |
|
732 |
+## varMetadata=data.frame(labelDescription=colnames(fD2), |
|
733 |
+## row.names=colnames(fD2))) |
|
734 |
+##} |
|
933 | 735 |
|
934 | 736 |
nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
935 | 737 |
chromosome <- cnOptions[["chromosome"]] |
... | ... |
@@ -937,6 +739,7 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
937 | 739 |
CHR <- unique(chromosome(object)) |
938 | 740 |
verbose <- cnOptions[["verbose"]] |
939 | 741 |
if(CHR != chromosome[1]) verbose <- FALSE |
742 |
+ if(batch != unique(cnOptions[["batch"]])[1]) verbose <- FALSE |
|
940 | 743 |
goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){ |
941 | 744 |
Ns <- tmp.objects[["Ns"]] |
942 | 745 |
##Ns <- get("Ns", envir) |
... | ... |
@@ -1215,12 +1018,12 @@ withinGenotypeMoments <- function(object, cnOptions, tmp.objects){ |
1215 | 1018 |
return(tmp.objects) |
1216 | 1019 |
} |
1217 | 1020 |
|
1218 |
- |
|
1219 | 1021 |
oneBatch <- function(object, cnOptions, tmp.objects){ |
1220 | 1022 |
muA <- tmp.objects[["muA"]] |
1221 | 1023 |
muB < |