git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45126 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -433,3 +433,9 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error. |
433 | 433 |
2010-03-08 M. Ritchie committed version 1.5.31 |
434 | 434 |
* removed a few unnecessary lines of code from crlmm-illumina.R (zero not needed as argument for preprocessInfinium2() and storageMode should not be "lockedEnvironment" in RGtoXY()) |
435 | 435 |
* added "humanomni1quadv1b" to validCdfName() in utils.R |
436 |
+ |
|
437 |
+2010-03-08 R.Scharpf committed version 1.5.32 |
|
438 |
+ |
|
439 |
+** This is a roll back to version 1.5.24 |
|
440 |
+ |
|
441 |
+ |
... | ... |
@@ -1,39 +1,17 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.5.31 |
|
5 |
-Date: 2010-03-08 |
|
6 |
-Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au> |
|
7 |
-Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
|
4 |
+Version: 1.5.32 |
|
5 |
+Date: 2010-02-05 |
|
6 |
+Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au> |
|
7 |
+Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
|
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms |
9 | 9 |
License: Artistic-2.0 |
10 |
-Depends: R (>= 2.11.0), |
|
11 |
- methods, |
|
12 |
- Biobase (>= 2.7.2), |
|
13 |
- oligoClasses (>= 1.9.28) |
|
14 |
-Imports: affyio (>= 1.15.2), |
|
15 |
- preprocessCore, |
|
16 |
- utils, |
|
17 |
- stats, |
|
18 |
- genefilter, |
|
19 |
- splines, |
|
20 |
- mvtnorm, |
|
21 |
- ellipse, |
|
22 |
- SNPchip,ff |
|
23 |
-Suggests: hapmapsnp5, |
|
24 |
- hapmapsnp6, |
|
25 |
- genomewidesnp5Crlmm (>= 1.0.2), |
|
26 |
- genomewidesnp6Crlmm (>= 1.0.2), |
|
27 |
- snpMatrix, |
|
28 |
- metaArray |
|
29 |
-Collate: AllClasses.R |
|
30 |
- AllGenerics.R |
|
31 |
- methods-AffymetrixAlleleSet.R |
|
32 |
- methods-IlluminaAlleleSet.R |
|
33 |
- methods-CrlmmContainer.R |
|
10 |
+Depends: methods, Biobase (>= 2.7.2), R (>= 2.11.0), oligoClasses (>= 1.9.21) |
|
11 |
+Imports: affyio (>= 1.15.2), preprocessCore, utils, stats, genefilter, splines, mvtnorm, ellipse, SNPchip |
|
12 |
+Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix |
|
13 |
+Collate: AllGenerics.R |
|
34 | 14 |
methods-CNSet.R |
35 |
- methods-AlleleSet.R |
|
36 |
- methods-CallSet.R |
|
37 | 15 |
methods-eSet.R |
38 | 16 |
methods-SnpSuperSet.R |
39 | 17 |
cnrma-functions.R |
... | ... |
@@ -1,4 +1,3 @@ |
1 |
-## crlmm NAMESPACE |
|
2 | 1 |
useDynLib("crlmm", .registration=TRUE) |
3 | 2 |
|
4 | 3 |
## Biobase |
... | ... |
@@ -8,12 +7,11 @@ importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet, SnpSet, |
8 | 7 |
|
9 | 8 |
importMethodsFrom(Biobase, annotation, "annotation<-", |
10 | 9 |
annotatedDataFrameFrom, assayData, "assayData<-", |
11 |
- snpCallProbability, |
|
12 | 10 |
combine, dims, experimentData, "experimentData<-", |
13 | 11 |
fData, featureData, "featureData<-", featureNames, |
14 | 12 |
fvarMetadata, fvarLabels, pData, phenoData, |
15 | 13 |
"phenoData<-", protocolData, "protocolData<-", |
16 |
- pubMedIds, rowMedians, sampleNames, snpCall, storageMode, |
|
14 |
+ pubMedIds, rowMedians, sampleNames, storageMode, |
|
17 | 15 |
"storageMode<-", updateObject, varLabels) |
18 | 16 |
|
19 | 17 |
importFrom(Biobase, assayDataElement, assayDataElementNames, |
... | ... |
@@ -21,75 +19,44 @@ importFrom(Biobase, assayDataElement, assayDataElementNames, |
21 | 19 |
validMsg) |
22 | 20 |
|
23 | 21 |
## oligoClasses |
24 |
-##importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet) |
|
22 |
+importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet) |
|
25 | 23 |
|
26 |
-##S3 method ffdf and class ffdf |
|
27 |
-importFrom(ff, ffdf, ff, as.ff, as.ffdf) |
|
24 |
+importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs, |
|
25 |
+ "confs<-", cnConfidence, "cnConfidence<-", |
|
26 |
+ isSnp, chromosome, position, CA, "CA<-", CB, "CB<-") |
|
28 | 27 |
|
29 | 28 |
|
29 |
+importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles) |
|
30 | 30 |
|
31 |
-##importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs, |
|
32 |
-## "confs<-", cnConfidence, "cnConfidence<-", isSnp, |
|
33 |
-## chromosome, position, CA, "CA<-", CB, "CB<-", A, B) |
|
31 |
+importFrom(oligoClasses, copyNumber) |
|
34 | 32 |
|
35 |
-importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles, |
|
36 |
- copyNumber) |
|
37 |
- |
|
38 |
-## graphics |
|
39 | 33 |
importFrom(graphics, abline, axis, layout, legend, mtext, par, plot, |
40 | 34 |
polygon, rect, segments, text, points, boxplot) |
41 | 35 |
|
42 |
-## grDevices |
|
43 | 36 |
importFrom(grDevices, grey) |
44 | 37 |
|
45 |
-## affyio |
|
46 | 38 |
importFrom(affyio, read.celfile.header, read.celfile) |
47 | 39 |
|
48 |
-## preprocessCore |
|
49 |
-importFrom(preprocessCore, normalize.quantiles.use.target, |
|
50 |
- normalize.quantiles) |
|
40 |
+importFrom(preprocessCore, normalize.quantiles.use.target, normalize.quantiles) |
|
51 | 41 |
|
52 |
-## utils |
|
53 |
-importFrom(utils, data, packageDescription, setTxtProgressBar, |
|
54 |
- txtProgressBar) |
|
42 |
+importFrom(utils, data, packageDescription, setTxtProgressBar, txtProgressBar) |
|
55 | 43 |
|
56 |
-## stats |
|
57 |
-importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, |
|
58 |
- sd, update) |
|
44 |
+importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, sd, update) |
|
59 | 45 |
|
60 |
-## genefilter |
|
61 | 46 |
importFrom(genefilter, rowSds) |
62 | 47 |
|
63 |
-## mvtnorm |
|
64 | 48 |
importFrom(mvtnorm, dmvnorm) |
65 | 49 |
|
66 |
-## ellipse |
|
67 | 50 |
importFrom(ellipse, ellipse) |
68 | 51 |
|
69 |
- |
|
70 |
-##exportClasses(FFSet) |
|
71 |
-exportMethods(annotatedDataFrameFrom, |
|
72 |
- copyNumber, initialize, |
|
73 |
- ##show, "$", "[[", "[", |
|
74 |
- genomeAnnotation, |
|
75 |
- lM, |
|
76 |
- CA, |
|
77 |
- CB, |
|
78 |
- A, |
|
79 |
- B, |
|
80 |
- snpCall, |
|
81 |
- confs, |
|
82 |
- chromosome, |
|
83 |
- position, |
|
84 |
- isSnp) |
|
85 |
-export(crlmmOptions, crlmm, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam, validCdfNames) |
|
86 |
- |
|
87 |
- |
|
88 |
- |
|
89 |
-############# |
|
90 |
-## TO REMOVE |
|
91 |
-############# |
|
92 |
- |
|
93 |
-##export everything that does not start with a . |
|
94 |
-exportPattern("^[^\\.]") |
|
95 |
- |
|
52 |
+exportMethods(A, B, copyNumber) |
|
53 |
+export(cnOptions, crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) |
|
54 |
+ |
|
55 |
+##export(##viterbi.CNSet, ##move to VanillaICE |
|
56 |
+## combineIntensities, |
|
57 |
+## whichPlatform, |
|
58 |
+## isValidCdfName, |
|
59 |
+## crlmmWrapper, |
|
60 |
+## withinGenotypeMoments, |
|
61 |
+## locationAndScale, getParam.SnpSuperSet) |
|
62 |
+export(thresholdModelParams, computeCopynumber.CNSet, nuphiAllele, coefs, biasAdjNP) |
... | ... |
@@ -1,17 +1,3 @@ |
1 |
- ************************************************** |
|
2 |
- * * |
|
3 |
- * 1.5 SERIES NEWS * |
|
4 |
- * * |
|
5 |
- ************************************************** |
|
6 |
- |
|
7 |
-USER VISIBLE CHANGES |
|
8 |
- |
|
9 |
-NEW FEATURES |
|
10 |
- |
|
11 |
- o Support to large datasets via ff package. The user |
|
12 |
- must load the ff package manually to benefit from |
|
13 |
- this. |
|
14 |
- |
|
15 | 1 |
************************************************** |
16 | 2 |
* * |
17 | 3 |
* 1.3 SERIES NEWS * |
18 | 4 |
deleted file mode 100644 |
... | ... |
@@ -1,74 +0,0 @@ |
1 |
-setOldClass("ffdf") |
|
2 |
-setOldClass("ff_matrix") |
|
3 |
-setClassUnion("matrix_or_ff", c("matrix", "ff_matrix")) |
|
4 |
-setClassUnion("list_or_ffdf", c("list", "ffdf")) |
|
5 |
-setClass("CrlmmContainer", contains="eSet", |
|
6 |
- representation(options="list", |
|
7 |
- genomeAnnotation="ANY", |
|
8 |
- "VIRTUAL")) |
|
9 |
- |
|
10 |
-setMethod("show", "CrlmmContainer", function(object){ |
|
11 |
- callNextMethod(object) |
|
12 |
- cat("options: \n") |
|
13 |
- print(names(crlmmOptions(object))) |
|
14 |
- cat("\n") |
|
15 |
- cat("genomeAnnotation:", nrow(genomeAnnotation(object)), " rows, ", ncol(genomeAnnotation(object)), " columns\n") |
|
16 |
- print(genomeAnnotation(object)[1:5, ]) |
|
17 |
- cat("\n") |
|
18 |
-}) |
|
19 |
-setClass("AlleleSet", contains="CrlmmContainer") |
|
20 |
-setClass("CallSet", contains="AlleleSet") |
|
21 |
-setClass("CNSet", contains="CallSet", |
|
22 |
- representation(lM="list_or_ffdf")) |
|
23 |
- |
|
24 |
-setClass("IlluminaRGSet", contains="CrlmmContainer") |
|
25 |
-setClass("IlluminaXYSet", contains="CrlmmContainer") |
|
26 |
- |
|
27 |
-setClass("AffymetrixAlleleSet", contains="AlleleSet") ##AffymetrixAlleleSet |
|
28 |
-setClass("IlluminaAlleleSet", contains="AlleleSet") |
|
29 |
-##setClass("AffymetrixBigData", contains="AffymetrixAlleleSet") |
|
30 |
-##setClass("AffymetrixSmallData", contains="AffymetrixAlleleSet") |
|
31 |
-##setClass("IlluminaSmallData", contains="IlluminaAlleleSet") |
|
32 |
-##setClass("IlluminaBigData", contains="IlluminaAlleleSet") |
|
33 |
-##setMethod("initialize", "AffymetrixBigData", function(.Object, annotation){ |
|
34 |
-## .Object <- callNextMethod(.Object) |
|
35 |
-## if(!missing(annotation)) annotation(.Object) <- annotation |
|
36 |
-## .Object |
|
37 |
-##}) |
|
38 |
-##setClass("AffymetrixCallSet", contains="CallSet") |
|
39 |
-##setClass("IlluminaCallSet", contains="CallSet") |
|
40 |
-setMethod("initialize", "AlleleSet", function(.Object, alleleA=new("matrix"), alleleB=new("matrix"), ...){ |
|
41 |
- .Object <- callNextMethod(.Object, alleleA=alleleA, alleleB=alleleB, ...) |
|
42 |
- storageMode(.Object) <- "environment" |
|
43 |
- .Object |
|
44 |
-}) |
|
45 |
-setMethod("initialize", "CallSet", function(.Object, call=new("matrix"), callProbability=new("matrix"), ...){ |
|
46 |
- .Object <- callNextMethod(.Object, call=call, callProbability=callProbability, ...) |
|
47 |
- storageMode(.Object) <- "environment" |
|
48 |
- .Object |
|
49 |
-}) |
|
50 |
-setMethod("initialize", "CNSet", function(.Object, CA=new("matrix"), CB=new("matrix"), lM=new("list"), ...){ |
|
51 |
- .Object <- callNextMethod(.Object, CA=CA, CB=CB, lM=lM,...) |
|
52 |
- storageMode(.Object) <- "environment" |
|
53 |
- .Object |
|
54 |
-}) |
|
55 |
-setValidity("AlleleSet", function(object) { |
|
56 |
- assayDataValidMembers(assayData(object), c("alleleA", "alleleB")) |
|
57 |
-}) |
|
58 |
-setValidity("IlluminaRGSet", function(object) { |
|
59 |
- assayDataValidMembers(assayData(object), c("R", "G", "zero")) |
|
60 |
-}) |
|
61 |
-setValidity("IlluminaXYSet", function(object) { |
|
62 |
- assayDataValidMembers(assayData(object), c("X", "Y", "zero")) |
|
63 |
-}) |
|
64 |
- |
|
65 |
-setValidity("CallSet", function(object) { |
|
66 |
- assayDataValidMembers(assayData(object), c("alleleA", "alleleB", "call", "callProbability")) |
|
67 |
-}) |
|
68 |
-setValidity("CNSet", function(object) { |
|
69 |
- assayDataValidMembers(assayData(object), c("alleleA", "alleleB", "call", "callProbability", "CA", "CB")) |
|
70 |
-}) |
|
71 |
- |
|
72 |
- |
|
73 |
- |
|
74 |
- |
... | ... |
@@ -1,57 +1,14 @@ |
1 |
-setGeneric("A<-", function(object, value) standardGeneric("A<-")) |
|
2 |
-setGeneric("B<-", function(object, value) standardGeneric("B<-")) |
|
1 |
+setGeneric("A", function(object) standardGeneric("A")) |
|
2 |
+setGeneric("B", function(object) standardGeneric("B")) |
|
3 |
+##setGeneric("A<-", function(object, value) standardGeneric("A<-")) |
|
4 |
+##setGeneric("B<-", function(object, value) standardGeneric("B<-")) |
|
3 | 5 |
|
4 |
-setGeneric("getParam", function(object, name, ...) standardGeneric("getParam")) |
|
6 |
+setGeneric("getParam", function(object, name, batch) standardGeneric("getParam")) |
|
5 | 7 |
setGeneric("cnIndex", function(object) standardGeneric("cnIndex")) |
6 | 8 |
setGeneric("cnNames", function(object) standardGeneric("cnNames")) |
7 |
-setGeneric("confs", function(object) standardGeneric("confs")) |
|
8 |
-setGeneric("computeCopynumber", function(object) standardGeneric("computeCopynumber")) |
|
9 |
-setGeneric("crlmm", function(object, ...) standardGeneric("crlmm")) |
|
10 |
-setGeneric("crlmmOptions", function(object) standardGeneric("crlmmOptions")) |
|
11 |
-setGeneric("crlmmOptions<-", function(object, value) standardGeneric("crlmmOptions<-")) |
|
12 |
-setGeneric("construct", function(object, filenames) standardGeneric("construct")) |
|
13 |
- |
|
14 |
-setGeneric("getOptions", function(object) standardGeneric("getOptions")) |
|
15 |
- |
|
16 |
-##setGeneric("getA", function(object) standardGeneric("getA")) |
|
17 |
-##setGeneric("getB", function(object) standardGeneric("getB")) |
|
18 |
-##setGeneric("getG", function(object) standardGeneric("getG")) |
|
19 |
-##setGeneric("getR", function(object) standardGeneric("getR")) |
|
20 |
-##setGeneric("getZero", function(object) standardGeneric("getZero")) |
|
21 |
-##setGeneric("getCalls", function(object) standardGeneric("getCalls")) |
|
22 |
-##setGeneric("getConfs", function(object) standardGeneric("getConfs")) |
|
23 |
-##setGeneric("getCA", function(object) standardGeneric("getCA")) |
|
24 |
-##setGeneric("getCB", function(object) standardGeneric("getCB")) |
|
25 |
-setGeneric("getPhenoData", function(object) standardGeneric("getPhenoData")) |
|
26 |
-setGeneric("getFeatureData", function(object) standardGeneric("getFeatureData")) |
|
27 |
-setGeneric("getProtocolData", function(object, filenames) standardGeneric("getProtocolData")) |
|
28 |
-setGeneric("getGenomeAnnotation", function(object, ...) standardGeneric("getGenomeAnnotation")) |
|
29 |
-setGeneric("getLinearModelParam", function(object, ...) standardGeneric("getLinearModelParam")) |
|
30 |
- |
|
31 |
-##setGeneric("initializeStorage", function(object) standardGeneric("initializeStorage")) |
|
32 |
-setGeneric("prediction", function(x, ...) standardGeneric("prediction")) |
|
33 |
-setGeneric("genomeAnnotation", function(object) standardGeneric("genomeAnnotation")) |
|
34 |
-setGeneric("genomeAnnotation<-", function(object,value) standardGeneric("genomeAnnotation<-")) |
|
35 |
-setGeneric("lM", function(object) standardGeneric("lM")) |
|
36 |
-setGeneric("lM<-", function(object, value) standardGeneric("lM<-")) |
|
37 |
- |
|
38 |
-##setGeneric("nFeatures", function(object) standardGeneric("nFeatures")) |
|
39 |
- |
|
9 |
+setGeneric("computeCopynumber", function(object, cnOptions) standardGeneric("computeCopynumber")) |
|
40 | 10 |
setGeneric("pr", function(object, name, batch, value) standardGeneric("pr")) |
41 |
-setGeneric("rma", function(object) standardGeneric("rma")) |
|
42 |
-setGeneric("snprma", function(object, ...) standardGeneric("snprma")) |
|
43 | 11 |
setGeneric("snpIndex", function(object) standardGeneric("snpIndex")) |
44 | 12 |
setGeneric("snpNames", function(object) standardGeneric("snpNames")) |
45 | 13 |
##setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome")) |
46 | 14 |
|
47 |
-setGeneric("R", function(object) standardGeneric("R")) |
|
48 |
-setGeneric("G", function(object) standardGeneric("G")) |
|
49 |
-setGeneric("Z", function(object) standardGeneric("Z")) |
|
50 |
-setGeneric("X", function(object) standardGeneric("X")) |
|
51 |
-setGeneric("Y", function(object) standardGeneric("Y")) |
|
52 |
-setMethod("R", "IlluminaRGSet", function(object) assayDataElement(object, "R")) |
|
53 |
-setMethod("G", "IlluminaRGSet", function(object) assayDataElement(object, "G")) |
|
54 |
-setMethod("Z", "IlluminaRGSet", function(object) assayDataElement(object, "zero")) |
|
55 |
-setMethod("X", "IlluminaXYSet", function(object) assayDataElement(object, "X")) |
|
56 |
-setMethod("Y", "IlluminaXYSet", function(object) assayDataElement(object, "Y")) |
|
57 |
-setMethod("Z", "IlluminaXYSet", function(object) assayDataElement(object, "zero")) |
... | ... |
@@ -153,483 +153,337 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){ |
153 | 153 |
return(gender) |
154 | 154 |
} |
155 | 155 |
|
156 |
- |
|
157 |
-##initializeFFObjects <- function(filenames, cnOptions){ |
|
158 |
-## outdir <- cnOptions[["outdir"]] |
|
159 |
-## cdfName <- cnOptions[["cdfName"]] |
|
160 |
-## AFile <- cnOptions[["AFile"]] |
|
161 |
-## BFile <- cnOptions[["BFile"]] |
|
162 |
-## callsFile <- cnOptions[["callsFile"]] |
|
163 |
-## confsFile <- cnOptions[["confsFile"]] |
|
164 |
-## snprmaFile <- cnOptions[["snprmaFile"]] |
|
165 |
-## cnrmaFile <- cnOptions[["cnrmaFile"]] |
|
166 |
-## CAFile <- cnOptions[["CAFile"]] |
|
167 |
-## CBFile <- cnOptions[["CBFile"]] |
|
168 |
-## load.it <- cnOptions[["load.it"]] |
|
169 |
-## fileExists <- list(A=file.exists(AFile), |
|
170 |
-## B=file.exists(BFile), |
|
171 |
-## calls=file.exists(callsFile), |
|
172 |
-## confs=file.exists(confsFile), |
|
173 |
-## CA=file.exists(CAFile), |
|
174 |
-## CB=file.exists(CBFile)) |
|
175 |
-## allExists <- all(unlist(fileExists)) |
|
176 |
-## ##if files already exist, check that the files have the appropriate dimension |
|
177 |
-## if(allExists){ |
|
178 |
-## load(AFile) |
|
179 |
-## open(A) |
|
180 |
-## sns <- dimnames(A)[[2]] |
|
181 |
-## if(!identical(sns, basename(filenames)) | !load.it){ |
|
182 |
-## ## if not of the same dimension, clean up |
|
183 |
-## message("Sample names in previously saved objects differ from the filenames. Removing previously saved objects.") |
|
184 |
-## delete(A); gc() |
|
185 |
-## unlink(AFile) |
|
186 |
-## load(BFile); delete(B); unlink(BFile) |
|
187 |
-## unlink(snprmaFile) |
|
188 |
-## unlink(cnrmaFile) |
|
189 |
-## if(file.exists(file.path(outdir, "cnParams.rda"))){ |
|
190 |
-## load(file.path(outdir, "cnParams.rda")) |
|
191 |
-## delete(cnParams); gc() |
|
192 |
-## unlink(file.path(outdir, "cnParams.rda")) |
|
193 |
-## } |
|
194 |
-## load(callsFile); delete(calls); unlink(callsFile) |
|
195 |
-## load(confsFile); delete(confs); unlink(confsFile) |
|
196 |
-## load(CAFile); delete(CA); unlink(CAFile) |
|
197 |
-## load(CBFile); delete(CB); unlink(CBFile) |
|
198 |
-## allExists <- FALSE |
|
199 |
-## } |
|
200 |
-## } |
|
201 |
-## if(!allExists) { |
|
202 |
-## message("Initializing ff objects for A, B, confs, calls, CA, and CB.") |
|
203 |
-## dns <- .dimnames(filenames, cnOptions[["cdfName"]], cnOptions[["verbose"]]) |
|
204 |
-## fns <- dns[[1]] |
|
205 |
-## } |
|
206 |
-## if(!file.exists(AFile)) {A <- initializeBigMatrix(dns); save(A, file=AFile); close(A)} |
|
207 |
-## if(!file.exists(BFile)) {B <- initializeBigMatrix(dns); save(B, file=BFile); close(B)} |
|
208 |
-## if(!file.exists(confsFile)) {confs <- initializeBigMatrix(dns); save(confs, file=confsFile); close(confs)} |
|
209 |
-## if(!file.exists(callsFile)) {calls <- initializeBigMatrix(dns); save(calls, file=callsFile); close(calls)} |
|
210 |
-## if(!file.exists(CAFile)) {CA <- initializeBigMatrix(dns); save(CA, file=CAFile); close(CA)} |
|
211 |
-## if(!file.exists(CBFile)) {CB <- initializeBigMatrix(dns); save(CB, file=CBFile); close(CB)} |
|
212 |
-## featureDataFile <- file.path(outdir, "featureDataFF.rda") |
|
213 |
-## if(!file.exists(featureDataFile)){ |
|
214 |
-## path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep="")) |
|
215 |
-## load(file.path(path, "snpProbes.rda")) |
|
216 |
-## snpProbes <- get("snpProbes") |
|
217 |
-## load(file.path(path, "cnProbes.rda")) |
|
218 |
-## cnProbes <- get("cnProbes") |
|
219 |
-## message("Initializing featureDataFF.") |
|
220 |
-## fvarlabels <- c("chromosome", "position", "isSnp") |
|
221 |
-## M <- matrix(NA, length(fns), 3, dimnames=list(fns, fvarlabels)) |
|
222 |
-## index <- match(rownames(snpProbes), rownames(M)) #only snp probes in M get assigned position |
|
223 |
-## M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))] |
|
224 |
-## M[index, "chromosome"] <- snpProbes[, grep("chr", colnames(snpProbes))] |
|
225 |
-## M[index, "isSnp"] <- 1L |
|
226 |
-## index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position |
|
227 |
-## M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))] |
|
228 |
-## M[index, "chromosome"] <- cnProbes[, grep("chr", colnames(cnProbes))] |
|
229 |
-## M[index, "isSnp"] <- 0L |
|
230 |
-## featureDataFF <- ff(M, dim=c(nrow(M), ncol(M)), |
|
231 |
-## vmode="integer", finalizer="close", |
|
232 |
-## overwrite=TRUE, |
|
233 |
-## dimnames=list(fns, fvarlabels)) |
|
234 |
-## save(featureDataFF, file=file.path(outdir, "featureDataFF.rda")) |
|
235 |
-## close(featureDataFF) |
|
236 |
-## rm(M, cnProbes, snpProbes, featureDataFF); gc() |
|
237 |
-## } |
|
238 |
-## ## parameters file |
|
239 |
-## parameterFile <- file.path(outdir, "cnParams.rda") |
|
240 |
-## if(!file.exists(parameterFile)) { |
|
241 |
-## message("Initializing parameters file") |
|
242 |
-## batch <- cnOptions[["batch"]] |
|
243 |
-## dns.batch <- list(fns, unique(batch)) |
|
244 |
-## cnParams <- initializeParamObject(dns.batch) |
|
245 |
-## save(cnParams, file=file.path(outdir, "cnParams.rda")) |
|
246 |
-## close(cnParams) |
|
247 |
-## } |
|
248 |
-##} |
|
249 |
- |
|
250 |
-##preprocessAndGenotype <- function(filenames, cnOptions, ...){ |
|
251 |
-## set.seed(cnOptions[["seed"]]) ##for reproducibility |
|
252 |
-## protocolFile <- cnOptions[["protocolFile"]] |
|
253 |
-## cdfName <- cnOptions[["cdfName"]] |
|
254 |
-## verbose <- cnOptions[["verbose"]] |
|
255 |
-## if(file.exists(protocolFile)){ |
|
256 |
-## ## check that file is the same dimension |
|
257 |
-## load(protocolFile) |
|
258 |
-## if(!identical(sampleNames(protocoldata), basename(filenames))) |
|
259 |
-## unlink(protocolFile) |
|
260 |
-## } |
|
261 |
-## if(!file.exists(protocolFile)){ |
|
262 |
-## platform <- whichPlatform(paste(cdfName, "Crlmm", sep="")) |
|
263 |
-## if(platform=="affymetrix"){ |
|
264 |
-## if(verbose) message("Creating protocol file with scan dates for the affy arrays") |
|
265 |
-## scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate)) |
|
266 |
-## rownames(scanDates) <- basename(rownames(scanDates)) |
|
267 |
-## protocoldata <- new("AnnotatedDataFrame", |
|
268 |
-## data=scanDates, |
|
269 |
-## varMetadata=data.frame(labelDescription=colnames(scanDates), |
|
270 |
-## row.names=colnames(scanDates))) |
|
271 |
-## save(protocoldata, file=protocolFile) |
|
272 |
-## } |
|
273 |
-## ## protocol file for illumina saved during the readIdatFile step |
|
274 |
-## } |
|
275 |
-## if(isPackageLoaded("ff")) initializeFFObjects(filenames, cnOptions) |
|
276 |
-## crlmmWrapper(filenames, cnOptions, ...) |
|
277 |
-## message("Checking for required files...") |
|
278 |
-## message(cnOptions[["AFile"]], ": ", file.exists(cnOptions[["AFile"]])) |
|
279 |
-## message(cnOptions[["BFile"]], ": ", file.exists(cnOptions[["BFile"]])) |
|
280 |
-## message(cnOptions[["callsFile"]], ": ", file.exists(cnOptions[["callsFile"]])) |
|
281 |
-## message(cnOptions[["confsFile"]], ": ", file.exists(cnOptions[["confsFile"]])) |
|
282 |
-## message(cnOptions[["snprmaFile"]], ": ", file.exists(cnOptions[["snprmaFile"]])) |
|
283 |
-## message(cnOptions[["protocolFile"]], ": ", file.exists(cnOptions[["protocolFile"]])) |
|
284 |
-##} |
|
156 |
+combineIntensities <- function(res, NP=NULL, callSet){ |
|
157 |
+ rownames(res$B) <- rownames(res$A) <- res$gns |
|
158 |
+ colnames(res$B) <- colnames(res$A) <- res$sns |
|
159 |
+ if(!is.null(NP)){ |
|
160 |
+ blank <- matrix(NA, nrow(NP), ncol(NP)) |
|
161 |
+ dimnames(blank) <- dimnames(NP) |
|
162 |
+ A <- rbind(res$A, NP) |
|
163 |
+ B <- rbind(res$B, blank) |
|
164 |
+ } else { |
|
165 |
+ A <- res$A |
|
166 |
+ B <- res$B |
|
167 |
+ } |
|
168 |
+ dimnames(B) <- dimnames(A) |
|
169 |
+ index.snps <- match(res$gns, rownames(A)) |
|
170 |
+ callsConfs <- calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)) |
|
285 | 171 |
|
286 |
-##crlmmCopynumber <- function(cnOptions, ...){ |
|
287 |
-##crlmmCopynumber <- function(object){ |
|
288 |
-## ops <- crlmmOptions(object) |
|
289 |
-## verbose <- ops$verbose |
|
290 |
-## calls <- snpCall(object) |
|
291 |
-## confs <- confs(object) |
|
292 |
-## fns <- featureNames(object) |
|
293 |
-## SNRmin <- ops$SNRMin |
|
294 |
-## batch <- object$batch |
|
295 |
-## whichBatch <- ops$cnOpts$whichBatch |
|
296 |
-## chromosome <- ops$cnOpts$chromosome |
|
297 |
-## MIN.SAMPLES <- ops$cnOpts$MIN.SAMPLES |
|
298 |
-## ##k <- grep("chr", colnames(snpProbes)) |
|
299 |
-## for(CHR in chromosome){ |
|
300 |
-## ##annotated snp and cn probes |
|
301 |
-## ##snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
|
302 |
-## ##cns <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
303 |
-## ##where are the annotated snps in the fns file |
|
304 |
-## ##index.snps <- match(snps, fns) |
|
305 |
-## ##index.cn <- match(cns, fns) |
|
306 |
-## ##row.index <- c(index.snps, index.cn) |
|
307 |
-## cat("Chromosome ", CHR, "\n") |
|
308 |
-## for(i in whichBatch){ |
|
309 |
-## PLATE <- unique(batch)[i] |
|
310 |
-## message("Plate: ", PLATE) |
|
311 |
-## sample.index <- which(batch==PLATE) |
|
312 |
-## if(length(sample.index) < MIN.SAMPLES) { |
|
313 |
-## warning("Plate ", PLATE, " has fewer than 10 samples. Skipping this plate.") |
|
314 |
-## next() |
|
315 |
-## } |
|
316 |
-## ##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
|
317 |
-#### if(isPackageLoaded("ff")){ |
|
318 |
-#### ca <- as.matrix(CA[row.index, sample.index]) |
|
319 |
-#### cb <- as.matrix(CB[row.index, sample.index]) |
|
320 |
-#### } else{ |
|
321 |
-#### dns <- dimnames(A[row.index, sample.index]) |
|
322 |
-#### cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index), dimnames=dns) |
|
323 |
-#### } |
|
324 |
-#### cnSet <- new("CNSet", |
|
325 |
-#### alleleA=as.matrix(A[row.index, sample.index]), |
|
326 |
-#### alleleB=as.matrix(B[row.index, sample.index]), |
|
327 |
-#### call=as.matrix(calls[row.index, sample.index]), |
|
328 |
-#### callProbability=as.matrix(confs[row.index, sample.index]), |
|
329 |
-#### CA=ca, |
|
330 |
-#### CB=cb, |
|
331 |
-#### featureData=annotatedDataFrameFrom(as.matrix(A[row.index, sample.index]), byrow=TRUE), |
|
332 |
-#### phenoData=pD[sample.index, ], |
|
333 |
-#### protocolData=protocoldata[sample.index, ]) |
|
334 |
-## ##Verify this is correct |
|
335 |
-#### annotation(cnSet) <- cnOptions[["cdfName"]] |
|
336 |
-#### featureNames(cnSet) <- fns[row.index] |
|
337 |
-## ##add chromosome, position, isSnp |
|
338 |
-#### cnSet <- annotate(cnSet) |
|
339 |
-#### if(any(cnSet$SNR > SNRmin)){ |
|
340 |
-#### if(CHR == chromosome[1]) message(paste("Excluding samples with SNR < ", SNRmin)) |
|
341 |
-#### cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
|
342 |
-#### } |
|
343 |
-#### featureData(cnSet) <- lm.parameters(cnSet, cnOptions) |
|
344 |
-## if(CHR > 23) next() |
|
345 |
-## cnSet <- computeCopynumber(object[chromosome(object) == CHR, sample.index]) |
|
346 |
-#### if(!isPackageLoaded("ff") & i == whichBatch[1]) cnParams <- initializeParamObject(list(featureNames(cnSet), unique(cnOptions[["batch"]])[whichBatch])) |
|
347 |
-#### if(!isPackageLoaded("ff")) { |
|
348 |
-#### row.index <- 1:nrow(cnSet) |
|
349 |
-#### } else { |
|
350 |
-#### ##Warning message: |
|
351 |
-#### ##In d[[1]] * d[[2]] : NAs produced by integer overflow |
|
352 |
-#### CA[row.index, sample.index] <- cnSet@assayData[["CA"]] |
|
353 |
-#### CB[row.index, sample.index] <- cnSet@assayData[["CB"]] |
|
354 |
-#### } |
|
355 |
-#### cnParams <- updateParams(cnParams, cnSet, row.index, batch=unique(batch)[i]) |
|
356 |
-## ## keep only chromosome, position, and isSnp |
|
357 |
-#### featureData(cnSet) <- featureData(cnSet)[, 1:3] |
|
358 |
-#### if(!isPackageLoaded("ff")){ |
|
359 |
-#### save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
360 |
-#### save(cnParams, file=paste(outdir, "cnParams_", PLATE, "_", CHR, ".rda", sep="")) |
|
361 |
-#### } |
|
362 |
-## } ## end of batch loop |
|
363 |
-## } ## end of chromosome loop |
|
364 |
-#### if(isPackageLoaded("ff")) { |
|
365 |
-#### close(cnParams) |
|
366 |
-#### close(A); close(B) |
|
367 |
-#### close(CA); close(CB) |
|
368 |
-#### save(CA, file=CAFile) |
|
369 |
-#### save(CB, file=CBFile) |
|
370 |
-#### close(calls); close(confs) |
|
371 |
-#### return() |
|
372 |
-#### } |
|
373 |
-## return(cnSet) |
|
374 |
-##} |
|
375 |
- |
|
376 |
- |
|
377 |
- |
|
172 |
+ calls[index.snps, ] <- calls(callSet) |
|
173 |
+ callsConfs[index.snps, ] <- assayData(callSet)[["callProbability"]] |
|
174 |
+ fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet)))) |
|
175 |
+ fd[index.snps, ] <- fData(callSet) |
|
176 |
+ rownames(fd) <- rownames(A) |
|
177 |
+ colnames(fd) <- fvarLabels(callSet) |
|
178 |
+ fD <- new("AnnotatedDataFrame", |
|
179 |
+ data=data.frame(fd), |
|
180 |
+ varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd))) |
|
181 |
+ superSet <- new("CNSet", |
|
182 |
+ CA=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)), |
|
183 |
+ CB=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)), |
|
184 |
+ alleleA=A, |
|
185 |
+ alleleB=B, |
|
186 |
+ call=calls, |
|
187 |
+ callProbability=callsConfs, |
|
188 |
+ featureData=fD, |
|
189 |
+ phenoData=phenoData(callSet), |
|
190 |
+ experimentData=experimentData(callSet), |
|
191 |
+ protocolData=protocolData(callSet), |
|
192 |
+ annotation=annotation(callSet)) |
|
193 |
+ return(superSet) |
|
194 |
+} |
|
378 | 195 |
|
196 |
+harmonizeDimnamesTo <- function(object1, object2){ |
|
197 |
+ #object2 should be a subset of object 1 |
|
198 |
+ object2 <- object2[featureNames(object2) %in% featureNames(object1), ] |
|
199 |
+ object1 <- object1[match(featureNames(object2), featureNames(object1)), ] |
|
200 |
+ object1 <- object1[, match(sampleNames(object2), sampleNames(object1))] |
|
201 |
+ stopifnot(all.equal(featureNames(object1), featureNames(object2))) |
|
202 |
+ stopifnot(all.equal(sampleNames(object1), sampleNames(object2))) |
|
203 |
+ return(object1) |
|
204 |
+} |
|
379 | 205 |
|
206 |
+crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
207 |
+ if(!missing(object)){ |
|
208 |
+ stopifnot(class(object) == "CNSet") |
|
209 |
+ createIntermediateObjects <- FALSE |
|
210 |
+ } else { |
|
211 |
+ createIntermediateObjects <- TRUE |
|
212 |
+ crlmmResults <- crlmmWrapper(filenames, cnOptions, ...) |
|
213 |
+ snprmaResult <- crlmmResults[["snprmaResult"]] |
|
214 |
+ cnrmaResult <- crlmmResults[["cnrmaResult"]] |
|
215 |
+ callSet <- crlmmResults[["callSet"]] |
|
216 |
+ rm(crlmmResults); gc() |
|
217 |
+ annotation(callSet) <- cnOptions[["cdfName"]] |
|
218 |
+ stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]])) |
|
219 |
+ path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep="")) |
|
220 |
+ load(file.path(path, "snpProbes.rda")) |
|
221 |
+ snpProbes <- get("snpProbes") |
|
222 |
+ load(file.path(path, "cnProbes.rda")) |
|
223 |
+ cnProbes <- get("cnProbes") |
|
224 |
+ k <- grep("chr", colnames(snpProbes)) |
|
225 |
+ if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)") |
|
226 |
+ } |
|
227 |
+ set.seed(cnOptions[["seed"]]) ##for reproducibility |
|
228 |
+ chromosome <- cnOptions[["chromosome"]] |
|
229 |
+ SNRmin <- cnOptions[["SNRmin"]] |
|
230 |
+ for(CHR in chromosome){ |
|
231 |
+ cat("Chromosome ", CHR, "\n") |
|
232 |
+ if(createIntermediateObjects){ |
|
233 |
+ snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
|
234 |
+ cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
235 |
+ index.snps <- match(snps, featureNames(callSet)) |
|
236 |
+ index.nps <- match(cnps, rownames(cnrmaResult[["NP"]])) |
|
237 |
+ if(!is.null(cnrmaResult)){ |
|
238 |
+ npI <- cnrmaResult$NP[index.nps, ] |
|
239 |
+ } else npI <- NULL |
|
240 |
+ snpI <- list(A=snprmaResult$A[index.snps, ], |
|
241 |
+ B=snprmaResult$B[index.snps, ], |
|
242 |
+ sns=snprmaResult$sns, |
|
243 |
+ gns=snprmaResult$gns[index.snps], |
|
244 |
+ SNR=snprmaResult$SNR, |
|
245 |
+ SKW=snprmaResult$SKW, |
|
246 |
+ mixtureParams=snprmaResult$mixtureParams, |
|
247 |
+ cdfName=snprmaResult$cdfName) |
|
248 |
+ cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
|
249 |
+ cnSet <- combineIntensities(res=snpI, |
|
250 |
+ NP=npI, |
|
251 |
+ callSet=callSet[index.snps, ]) |
|
252 |
+ if(any(cnSet$SNR > SNRmin)){ |
|
253 |
+ message(paste("Excluding samples with SNR < ", SNRmin)) |
|
254 |
+ cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
|
255 |
+ } |
|
256 |
+ rm(snpI, npI, snps, cnps, index.snps, index.nps); gc() |
|
257 |
+ pData(cnSet)$batch <- cnOptions[["batch"]] |
|
258 |
+ featureData(cnSet) <- lm.parameters(cnSet, cnOptions) |
|
259 |
+ } else { |
|
260 |
+ cnSet <- object |
|
261 |
+ } |
|
262 |
+ if(CHR != 24){ |
|
263 |
+ cnSet <- computeCopynumber(cnSet, cnOptions) |
|
264 |
+ } else{ |
|
265 |
+ message("Copy number estimates not available for chromosome Y. Saving only the 'callSet' object for this chromosome") |
|
266 |
+ alleleSet <- cnSet |
|
267 |
+ save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep=""))) |
|
268 |
+ rm(cnSet, alleleSet); gc() |
|
269 |
+ next() |
|
270 |
+ } |
|
271 |
+ if(length(chromosome) == 1){ |
|
272 |
+ if(cnOptions[["save.cnset"]]){ |
|
273 |
+ save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
274 |
+ } |
|
275 |
+ } |
|
276 |
+ if(length(chromosome) > 1){ |
|
277 |
+ save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
278 |
+ rm(cnSet); gc() |
|
279 |
+ } else { |
|
280 |
+ return(cnSet) |
|
281 |
+ } |
|
282 |
+ } |
|
283 |
+ saved.objects <- list.files(cnOptions[["outdir"]], pattern="cnSet", full.names=TRUE) |
|
284 |
+ return(saved.objects) |
|
285 |
+} |
|
380 | 286 |
|
381 | 287 |
|
382 |
-##loadIlluminaRG <- function(rgFile, crlmmFile, load.it, save.it,...){ |
|
383 |
-#### if(missing(rgFile)){ |
|
384 |
-#### ##stop("must specify 'rgFile'.") |
|
385 |
-#### rgFile <- file.path(dirname(crlmmFile), "rgFile.rda") |
|
386 |
-#### message("rgFile not specified. Using ", rgFile) |
|
387 |
-#### } |
|
388 |
-## if(!load.it){ |
|
389 |
-## RG <- readIdatFiles(...) |
|
390 |
-## if(save.it) save(RG, file=rgFile) |
|
391 |
-## } |
|
392 |
-## if(load.it & !file.exists(rgFile)){ |
|
393 |
-## message("load.it is TRUE, but rgFile not present. Attempting to read the idatFiles.") |
|
394 |
-## RG <- readIdatFiles(...) |
|
395 |
-## if(save.it) save(RG, file=rgFile) |
|
396 |
-## } |
|
397 |
-## if(load.it & file.exists(rgFile)){ |
|
398 |
-## message("Loading RG file") |
|
399 |
-## load(rgFile) |
|
400 |
-## RG <- get("RG") |
|
401 |
-## } |
|
402 |
-## return(RG) |
|
403 |
-##} |
|
404 |
-## |
|
405 |
-##loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdfName){ |
|
406 |
-## if(!file.exists(crlmmFile) | !load.it){ |
|
407 |
-## callSet <- crlmmIllumina(RG=RG, |
|
408 |
-## cdfName=cdfName, |
|
409 |
-## sns=sampleNames(RG), |
|
410 |
-## returnParams=TRUE, |
|
411 |
-## save.it=TRUE, |
|
412 |
-## intensityFile=snprmaFile) |
|
413 |
-## if(save.it) save(callSet, file=crlmmFile) |
|
414 |
-## } else { |
|
415 |
-## message("Loading ", crlmmFile, "...") |
|
416 |
-## load(crlmmFile) |
|
417 |
-## callSet <- get("callSet") |
|
418 |
-## } |
|
419 |
-## protocolData(callSet) <- protocolData(RG) |
|
420 |
-## return(callSet) |
|
421 |
-##} |
|
422 | 288 |
|
289 |
+crlmmWrapper <- function(filenames, cnOptions, ...){ |
|
290 |
+ cdfName <- cnOptions[["cdfName"]] |
|
291 |
+ load.it <- cnOptions[["load.it"]] |
|
292 |
+ save.it <- cnOptions[["save.it"]] |
|
293 |
+ splitByChr <- cnOptions[["splitByChr"]] |
|
294 |
+ crlmmFile <- cnOptions[["crlmmFile"]] |
|
295 |
+ intensityFile=cnOptions[["intensityFile"]] |
|
296 |
+ rgFile=cnOptions[["rgFile"]] |
|
297 |
+ ##use.ff=cnOptions[["use.ff"]] |
|
298 |
+ outdir <- cnOptions[["outdir"]] |
|
299 |
+ if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required. See crlmm:::validCdfNames()") |
|
300 |
+ platform <- whichPlatform(cdfName) |
|
301 |
+ if(!(platform %in% c("affymetrix", "illumina"))){ |
|
302 |
+ stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.") |
|
303 |
+ } else { |
|
304 |
+ message("Checking whether annotation package for the ", platform, " platform is available") |
|
305 |
+ if(!isValidCdfName(cdfName)){ |
|
306 |
+ cat("FALSE\n") |
|
307 |
+ stop(cdfName, " is not a valid entry. See crlmm:::validCdfNames(platform)") |
|
308 |
+ } else cat("TRUE\n") |
|
309 |
+ } |
|
310 |
+ if(missing(intensityFile)) stop("must specify 'intensityFile'.") |
|
311 |
+ if(missing(crlmmFile)) stop("must specify 'crlmmFile'.") |
|
312 |
+ if(platform == "illumina"){ |
|
313 |
+ if(missing(rgFile)){ |
|
314 |
+ ##stop("must specify 'rgFile'.") |
|
315 |
+ rgFile <- file.path(dirname(crlmmFile), "rgFile.rda") |
|
316 |
+ message("rgFile not specified. Using ", rgFile) |
|
317 |
+ } |
|
318 |
+ if(!load.it){ |
|
319 |
+ RG <- readIdatFiles(...) |
|
320 |
+ if(save.it) save(RG, file=rgFile) |
|
321 |
+ } |
|
322 |
+ if(load.it & !file.exists(rgFile)){ |
|
323 |
+ message("load.it is TRUE, but rgFile not present. Attempting to read the idatFiles.") |
|
324 |
+ RG <- readIdatFiles(...) |
|
325 |
+ if(save.it) save(RG, file=rgFile) |
|
326 |
+ } |
|
327 |
+ if(load.it & file.exists(rgFile)){ |
|
328 |
+ message("Loading RG file") |
|
329 |
+ load(rgFile) |
|
330 |
+ RG <- get("RG") |
|
331 |
+ } |
|
332 |
+ } |
|
333 |
+ if(!(file.exists(dirname(crlmmFile)))) stop(dirname(crlmmFile), " does not exist.") |
|
334 |
+ if(!(file.exists(dirname(intensityFile)))) stop(dirname(intensityFile), " does not exist.") |
|
335 |
+ ##--------------------------------------------------------------------------- |
|
336 |
+ ## FIX |
|
337 |
+ outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep="")) |
|
338 |
+ if(load.it & all(file.exists(outfiles))){ |
|
339 |
+ load(outfiles[1]) |
|
340 |
+ crlmmSetList <- get("crlmmSetList") |
|
341 |
+ if(!all(sampleNames(crlmmSetList) == basename(filenames))){ |
|
342 |
+ stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))") |
|
343 |
+ } else{ |
|
344 |
+ return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...") |
|
345 |
+ } |
|
346 |
+ } |
|
347 |
+ if(load.it){ |
|
348 |
+ if(!file.exists(crlmmFile)){ |
|
349 |
+ message("load.it is TRUE, but ", crlmmFile, " does not exist. Rerunning the genotype calling algorithm") |
|
350 |
+ load.it <- FALSE |
|
351 |
+ } |
|
352 |
+ } |
|
353 |
+ if(platform == "affymetrix"){ |
|
354 |
+ if(!file.exists(crlmmFile) | !load.it){ |
|
355 |
+ callSet <- crlmm(filenames=filenames, |
|
356 |
+ cdfName=cdfName, |
|
357 |
+ save.it=TRUE, |
|
358 |
+ load.it=load.it, |
|
359 |
+ intensityFile=intensityFile) |
|
360 |
+ message("Quantile normalizing the copy number probes...") |
|
361 |
+ cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir) |
|
362 |
+ if(save.it){ |
|
363 |
+ message("Saving callSet and cnrmaResult to", crlmmFile) |
|
364 |
+ save(callSet, cnrmaResult, file=crlmmFile) |
|
365 |
+ } |
|
366 |
+ } else { |
|
367 |
+ message("Loading ", crlmmFile, "...") |
|
368 |
+ load(intensityFile) |
|
369 |
+ load(crlmmFile) |
|
370 |
+ callSet <- get("callSet") |
|
371 |
+ cnrmaResult <- get("cnrmaResult") |
|
372 |
+ } |
|
373 |
+ scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate)) |
|
374 |
+ protocolData(callSet) <- new("AnnotatedDataFrame", |
|
375 |
+ data=scanDates, |
|
376 |
+ varMetadata=data.frame(labelDescription=colnames(scanDates), |
|
377 |
+ row.names=colnames(scanDates))) |
|
378 |
+ } |
|
379 |
+ if(platform == "illumina"){ |
|
380 |
+ if(!file.exists(crlmmFile) | !load.it){ |
|
381 |
+ callSet <- crlmmIllumina(RG=RG, |
|
382 |
+ cdfName=cdfName, |
|
383 |
+ sns=sampleNames(RG), |
|
384 |
+ returnParams=TRUE, |
|
385 |
+ save.it=TRUE, |
|
386 |
+ intensityFile=intensityFile) |
|
387 |
+ if(save.it) save(callSet, file=crlmmFile) |
|
388 |
+ } else { |
|
389 |
+ message("Loading ", crlmmFile, "...") |
|
390 |
+ load(crlmmFile) |
|
391 |
+ callSet <- get("callSet") |
|
392 |
+ } |
|
393 |
+ protocolData(callSet) <- protocolData(RG) |
|
394 |
+ } |
|
395 |
+ if(platform=="affymetrix") { |
|
396 |
+ protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames)) |
|
397 |
+ sampleNames(protocolData(callSet)) <- sampleNames(callSet) |
|
398 |
+ } |
|
399 |
+ load(intensityFile) |
|
400 |
+ snprmaResult <- get("res") |
|
401 |
+ if(platform=="illumina"){ |
|
402 |
+ if(exists("cnAB")){ |
|
403 |
+ np.A <- as.integer(cnAB$A) |
|
404 |
+ np.B <- as.integer(cnAB$B) |
|
405 |
+ np <- ifelse(np.A > np.B, np.A, np.B) |
|
406 |
+ np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A)) |
|
407 |
+ rownames(np) <- cnAB$gns |
|
408 |
+ colnames(np) <- cnAB$sns |
|
409 |
+ cnAB$NP <- np |
|
410 |
+ ##sampleNames(callSet) <- res$sns |
|
411 |
+ sampleNames(callSet) <- cnAB$sns |
|
412 |
+ cnrmaResult <- get("cnAB") |
|
413 |
+ } else cnrmaResult <- NULL |
|
414 |
+ } |
|
415 |
+ if(platform=="affymetrix"){ |
|
416 |
+ if(exists("cnrmaResult")){ |
|
417 |
+ cnrmaResult <- get("cnrmaResult") |
|
418 |
+ } else cnrmaResult <- NULL |
|
419 |
+ } |
|
420 |
+ crlmmResults <- list(snprmaResult=snprmaResult, |
|
421 |
+ cnrmaResult=cnrmaResult, |
|
422 |
+ callSet=callSet) |
|
423 | 423 |
|
424 |
-##loadAffyCallSet <- function(filenames, confsFile, callsFile, snprmaFile, load.it, save.it, cdfName){ |
|
425 |
-## |
|
426 |
-#### if(save.it){ |
|
427 |
-#### message("Saving callSet to", callsFile) |
|
428 |
-#### ##save(callSet, cnrmaResult, file=callsFile) |
|
429 |
-#### save(callSet, file=callsFile) |
|
430 |
-#### } |
|
431 |
-#### } else { |
|
432 |
-#### message("Loading ", callsFile, "...") |
|
433 |
-#### ##load(snprmaFile) |
|
434 |
-#### load(callsFile) |
|
435 |
-#### callSet <- get("callSet") |
|
436 |
-#### ##cnrmaResult <- get("cnrmaResult") |
|
437 |
-#### } |
|
438 |
-## |
|
439 |
-##} |
|
440 |
-## if(platform=="affymetrix") { |
|
441 |
-## protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames)) |
|
442 |
-## sampleNames(protocolData(callSet)) <- sampleNames(callSet) |
|
443 |
-## } |
|
424 |
+ if(!save.it){ |
|
425 |
+ message("Cleaning up") |
|
426 |
+ unlink(intensityFile) |
|
427 |
+ } |
|
428 |
+ return(crlmmResults) |
|
429 |
+} |
|
444 | 430 |
|
445 |
-##loadAffyCnrma <- function(filenames, cnrmaFile, cdfName, outdir, load.it, save.it, use.bigmemory=FALSE){ |
|
446 |
-## if(!file.exists(cnrmaFile) | !load.it){ |
|
447 |
-## message("Quantile normalizing the copy number probes...") |
|
448 |
-## cnrmaResult <- cnrma2(filenames=filenames, cdfName=cdfName, outdir=outdir, cnrmaFile=cnrmaFile) |
|
449 |
-## } else cnrmaResult <- attach.big.matrix("NP.desc", outdir) |
|
450 |
-## return(cnrmaResult) |
|
451 |
-##} |
|
431 |
+validCdfNames <- function(){ |
|
432 |
+ c("genomewidesnp6", |
|
433 |
+ "genomewidesnp5", |
|
434 |
+ "human370v1c", |
|
435 |
+ "human370quadv3c", |
|
436 |
+ "human550v3b", |
|
437 |
+ "human650v3a", |
|
438 |
+ "human610quadv1b", |
|
439 |
+ "human660quadv1a", |
|
440 |
+ "human1mduov3b") |
|
441 |
+} |
|
452 | 442 |
|
453 |
-##loadIlluminaCnrma <- function(){ |
|
454 |
-## if(exists("cnAB")){ |
|
455 |
-## np.A <- as.integer(cnAB$A) |
|
456 |
-## np.B <- as.integer(cnAB$B) |
|
457 |
-## np <- ifelse(np.A > np.B, np.A, np.B) |
|
458 |
-## np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A)) |
|
459 |
-## rownames(np) <- cnAB$gns |
|
460 |
-## colnames(np) <- cnAB$sns |
|
461 |
-## cnAB$NP <- np |
|
462 |
-## ##sampleNames(callSet) <- res$sns |
|
463 |
-## sampleNames(callSet) <- cnAB$sns |
|
464 |
-## cnrmaResult <- get("cnAB") |
|
465 |
-## } else cnrmaResult <- NULL |
|
466 |
-## return(cnrmaResult) |
|
467 |
-##} |
|
468 |
-## |
|
469 |
-##crlmmWrapper <- function(filenames, cnOptions, ...){ |
|
470 |
-## crlmmBatchSize <- cnOptions[["crlmmBatchSize"]] |
|
471 |
-## cdfName <- cnOptions[["cdfName"]] |
|
472 |
-## load.it <- cnOptions[["load.it"]] |
|
473 |
-## save.it <- cnOptions[["save.it"]] |
|
474 |
-## callsFile <- cnOptions[["callsFile"]] |
|
475 |
-## confsFile <- cnOptions[["confsFile"]] |
|
476 |
-## AFile=cnOptions[["AFile"]] |
|
477 |
-## BFile=cnOptions[["BFile"]] |
|
478 |
-## snprmaFile=cnOptions[["snprmaFile"]] |
|
479 |
-## cnrmaFile=cnOptions[["cnrmaFile"]] |
|
480 |
-## rgFile=cnOptions[["rgFile"]] |
|
481 |
-## protocolFile <- cnOptions[["protocolFile"]] |
|
482 |
-## outdir <- cnOptions[["outdir"]] |
|
483 |
-## if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required. See crlmm:::validCdfNames()") |
|
484 |
-## platform <- whichPlatform(cdfName) |
|
485 |
-## if(!(platform %in% c("affymetrix", "illumina"))){ |
|
486 |
-## stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.") |
|
487 |
-## } else { |
|
488 |
-## if(!isValidCdfName(cdfName)){ |
|
489 |
-## stop(cdfName, " is not a valid entry. See crlmm:::validCdfNames(platform)") |
|
490 |
-## } else message("Using the annotation package ", cdfName, " for this ", platform, " platform") |
|
491 |
-## } |
|
492 |
-## if(platform == "illumina") { |
|
493 |
-## if(!file.exists(rgFile)){ |
|
494 |
-## if(load.it) message(rgFile, " does not exist and you chose to load.it. Re-reading the R and G intensities from the IDAT files") |
|
495 |
-## sampleSheet <- cnOptions$sampleSheet |
|
496 |
-## ids <- cnOptions$ids |
|
497 |
-## arrayInfoColNames <- cnOptions$arrayInfoColNames |
|
498 |
-## highDensity <- cnOptions$highDensity |
|
499 |
-## ##this is either an NChannelSet object, or a list of pointers to the ff objects |
|
500 |
-## RG <- readIdatFiles(sampleSheet=sampleSheet, |
|
501 |
-## arrayNames=basename(filenames), |
|
502 |
-## ids=ids, |
|
503 |
-## path=dirname(filenames), |
|
504 |
-## highDensity=highDensity, |
|
505 |
-## fileExt=cnOptions$fileExt[1:2], |
|
506 |
-## sep=cnOptions$fileExt[[3]], |
|
507 |
-## saveDate=FALSE, ## I do this earlier |
|
508 |
-## verbose=cnOptions[["verbose"]], |
|
509 |
-## protocolFile=protocolFile) |
|
510 |
-## if(save.it) save(RG, file=rgFile) |
|
511 |
-## ##RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it) |
|
512 |
-## } else{ |
|
513 |
-## if(!isPackageLoaded("ff")) {load(rgFile); RG <- get("RG")} |
|
514 |
-## } |
|
515 |
-## } |
|
516 |
-## if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.") |
|
517 |
-## if(!(file.exists(dirname(snprmaFile)))) stop(dirname(snprmaFile), " does not exist.") |
|
518 |
-## if(platform == "affymetrix"){ |
|
519 |
-## crlmm(filenames=filenames, |
|
520 |
-## cdfName=cdfName, |
|
521 |
-## save.it=TRUE, |
|
522 |
-## load.it=load.it, |
|
523 |
-## snprmaFile=snprmaFile, |
|
524 |
-## callsFile=callsFile, |
|
525 |
-## confsFile=confsFile, |
|
526 |
-## AFile=AFile, |
|
527 |
-## BFile=BFile, |
|
528 |
-## crlmmBatchSize=crlmmBatchSize, |
|
529 |
-## SNRMin=cnOptions[["SNRMin"]]) |
|
530 |
-## } |
|
531 |
-## gc() |
|
532 |
-## if(platform == "illumina") { |
|
533 |
-## callSet <- crlmmIllumina(RG=RG, |
|
534 |
-## cdfName=cdfName, |
|
535 |
-## sns=sampleNames(RG), |
|
536 |
-## returnParams=TRUE, |
|
537 |
-## save.it=TRUE, |
|
538 |
-## snprmaFile=snprmaFile, |
|
539 |
-## callsFile=callsFile, |
|
540 |
-## confsFile=confsFile, |
|
541 |
-## AFile=AFile, |
|
542 |
-## BFile=BFile) |
|
543 |
-## ##callSet <- loadIlluminaCallSet(callsFile, snprmaFile, RG, load.it, save.it, cdfName) |
|
544 |
-## } |
|
545 |
-## if(platform == "affymetrix"){ |
|
546 |
-## if(!file.exists(cnrmaFile) | !load.it){ |
|
547 |
-## message("Quantile normalizing the copy number probes...") |
|
548 |
-## ## updates A matrix and saves cnrmaFile |
|
549 |
-## cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir, verbose=cnOptions[["verbose"]], cnrmaFile=cnrmaFile, AFile=AFile, snprmaFile=snprmaFile) |
|
550 |
-## } |
|
551 |
-## } |
|
552 |
-#### if(!is.null(cnrmaResult)){ |
|
553 |
-#### for(CHR in chromosome){ |
|
554 |
-#### cat(CHR, " ") |
|
555 |
-#### cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
556 |
-#### index.nps <- match(cnps, rownames(cnrmaResult[["NP"]])) |
|
557 |
-#### NP <- cnrmaResult$NP[index.nps, ] |
|
558 |
-#### save(NP, file=file.path(tmpdir, paste("NP_", CHR, ".rda", sep=""))) |
|
559 |
-#### rm(NP); gc() |
|
560 |
-#### } |
|
561 |
-#### } |
|
562 |
-## if(!save.it){ |
|
563 |
-## message("Cleaning up") |
|
564 |
-## unlink(snprmaFile); unlink(cnrmaFile) |
|
565 |
-## } |
|
566 |
-##} |
|
443 |
+isValidCdfName <- function(cdfName){ |
|
444 |
+ chipList <- validCdfNames() |
|
445 |
+ result <- cdfName %in% chipList |
|
446 |
+ if(!(result)){ |
|
447 |
+ warning("cdfName must be one of the following: ", |
|
448 |
+ chipList) |
|
449 |
+ } |
|
450 |
+ return(result) |
|
451 |
+} |
|
567 | 452 |
|
453 |
+whichPlatform <- function(cdfName){ |
|
454 |
+ index <- grep("genomewidesnp", cdfName) |
|
455 |
+ if(length(index) > 0){ |
|
456 |
+ platform <- "affymetrix" |
|
457 |
+ } else{ |
|
458 |
+ index <- grep("human", cdfName) |
|
459 |
+ platform <- "illumina" |
|
460 |
+ } |
|
461 |
+ return(platform) |
|
462 |
+} |
|
568 | 463 |
|
569 | 464 |
|
570 | 465 |
# steps: quantile normalize hapmap: create 1m_reference_cn.rda object |
571 |
-##cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){ |
|
572 |
-## if(missing(cdfName)) stop("must specify cdfName") |
|
573 |
-## pkgname <- getCrlmmAnnotationName(cdfName) |
|
574 |
-## require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
|
575 |
-## if (missing(sns)) sns <- basename(filenames) |
|
576 |
-## loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
577 |
-## fid <- getVarInEnv("npProbesFid") |
|
578 |
-## set.seed(seed) |
|
579 |
-## idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
580 |
-## SKW <- vector("numeric", length(filenames)) |
|
581 |
-## |
|
582 |
-## |
|
583 |
-## NP <- matrix(NA, length(fid), length(filenames)) |
|
584 |
-## verbose <- TRUE |
|
585 |
-## if(verbose){ |
|
586 |
-## message("Processing ", length(filenames), " files.") |
|
587 |
-## if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
588 |
-## } |
|
589 |
-## if(cdfName=="genomewidesnp6"){ |
|
590 |
-## loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
591 |
-## } |
|
592 |
-## if(cdfName=="genomewidesnp5"){ |
|
593 |
-## loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
594 |
-## } |
|
595 |
-## reference <- getVarInEnv("reference") |
|
596 |
-## ##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.") |
|
597 |
-## for(i in seq(along=filenames)){ |
|
598 |
-## y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
599 |
-## x <- log2(y[idx2]) |
|
600 |
-## SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
601 |
-## rm(x) |
|
602 |
-## NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
603 |
-## if (verbose) |
|
604 |
-## if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
605 |
-## else cat(".") |
|
606 |
-## } |
|
607 |
-## dimnames(NP) <- list(names(fid), sns) |
|
608 |
-## ##dimnames(NP) <- list(map[, "man_fsetid"], sns) |
|
609 |
-## res3 <- list(NP=NP, SKW=SKW) |
|
610 |
-## cat("\n") |
|
611 |
-## return(res3) |
|
612 |
-##} |
|
613 |
- |
|
614 |
-cnrma <- function(object, filenames){ |
|
615 |
- ops <- crlmmOptions(object) |
|
616 |
- cdfName <- annotation(object) |
|
617 |
- seed <- ops$seed |
|
618 |
- verbose <- ops$verbose |
|
619 |
- ##cnrmaFile <- ops$cnrmaFile |
|
620 |
- A <- A(object) |
|
466 |
+cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){ |
|
621 | 467 |
if(missing(cdfName)) stop("must specify cdfName") |
622 | 468 |
pkgname <- getCrlmmAnnotationName(cdfName) |
623 | 469 |
require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
624 |
- sns <- basename(filenames) |
|
470 |
+ if (missing(sns)) sns <- basename(filenames) |
|
625 | 471 |
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
626 | 472 |
fid <- getVarInEnv("npProbesFid") |
627 | 473 |
set.seed(seed) |
628 | 474 |
idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
629 | 475 |
SKW <- vector("numeric", length(filenames)) |
630 |
- index <- match(names(fid), featureNames(object)) |
|
631 |
- stopifnot(identical(featureNames(object)[index], names(fid))) |
|
632 |
- if(length(index) < 1) stop("None of the names for the nonpolymorphic probes in the annotation package match the names stored in the snprmaFile.") |
|
476 |
+## if(bigmemory){ |
|
477 |
+## NP <- filebacked.big.matrix(length(pnsa), length(filenames), |
|
478 |
+## type="integer", |
|
479 |
+## init=as.integer(0), |
|
480 |
+## backingpath=outdir, |
|
481 |
+## backingfile="NP.bin", |
|
482 |
+## descriptorfile="NP.desc") |
|
483 |
+## } else{ |
|
484 |
+ NP <- matrix(NA, length(fid), length(filenames)) |
|
485 |
+## } |
|
486 |
+ verbose <- TRUE |
|
633 | 487 |
if(verbose){ |
634 | 488 |
message("Processing ", length(filenames), " files.") |
635 | 489 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
... | ... |
@@ -641,21 +495,22 @@ cnrma <- function(object, filenames){ |
641 | 495 |
loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname) |
642 | 496 |
} |
643 | 497 |
reference <- getVarInEnv("reference") |
498 |
+ ##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.") |
|
644 | 499 |
for(i in seq(along=filenames)){ |
645 | 500 |
y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
646 | 501 |
x <- log2(y[idx2]) |
647 | 502 |
SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
648 |
- rm(x); gc() |
|
649 |
- A[index, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
503 |
+ rm(x) |
|
504 |
+ NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
650 | 505 |
if (verbose) |
651 | 506 |
if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
652 | 507 |
else cat(".") |
653 |
- rm(y); gc() |
|
654 | 508 |
} |
655 |
- cat("\nDone\n") |
|
656 |
- pData(object)$SKW_nonpolymorphic <- SKW |
|
657 |
- object@assayData[["alleleA"]] <- A |
|
658 |
- return(object) |
|
509 |
+ dimnames(NP) <- list(names(fid), sns) |
|
510 |
+ ##dimnames(NP) <- list(map[, "man_fsetid"], sns) |
|
511 |
+ res3 <- list(NP=NP, SKW=SKW) |
|
512 |
+ cat("\n") |
|
513 |
+ return(res3) |
|
659 | 514 |
} |
660 | 515 |
|
661 | 516 |
getFlags <- function(object, PHI.THR){ |
... | ... |
@@ -673,7 +528,7 @@ getFlags <- function(object, PHI.THR){ |
673 | 528 |
} |
674 | 529 |
|
675 | 530 |
|
676 |
-instantiateObjects <- function(object){ |
|
531 |
+instantiateObjects <- function(object, cnOptions){ |
|
677 | 532 |
Ns <- matrix(NA, nrow(object), 5) |
678 | 533 |
colnames(Ns) <- c("A", "B", "AA", "AB", "BB") |
679 | 534 |
vA <- vB <- muB <- muA <- Ns |
... | ... |
@@ -700,46 +555,130 @@ thresholdCopynumber <- function(object){ |
700 | 555 |
return(object) |
701 | 556 |
} |
702 | 557 |
|
703 |
-##linear model parameters |
|
704 |
-##lm.parameters <- function(object, cnOptions){ |
|
705 |
-## fD <- fData(object) |
|
706 |
-## batch <- object$batch |
|
707 |
-## uplate <- unique(batch) |
|
708 |
-## parameterNames <- c(paste("tau2A", uplate, sep="_"), |
|
709 |
-## paste("tau2B", uplate, sep="_"), |
|
710 |
-## paste("sig2A", uplate, sep="_"), |
|
711 |
-## paste("sig2B", uplate, sep="_"), |
|
712 |
-## paste("nuA", uplate, sep="_"), |
|
713 |
-## paste("nuA.se", uplate, sep="_"), |
|
714 |
-## paste("nuB", uplate, sep="_"), |
|
715 |
-## paste("nuB.se", uplate, sep="_"), |
|
716 |
-## paste("phiA", uplate, sep="_"), |
|
717 |
-## paste("phiA.se", uplate, sep="_"), |
|
718 |
-## paste("phiB", uplate, sep="_"), |
|
719 |
-## paste("phiB.se", uplate, sep="_"), |
|
720 |
-## paste("phiAX", uplate, sep="_"), |
|
721 |
-## paste("phiBX", uplate, sep="_"), |
|
722 |
-## paste("corr", uplate, sep="_"), |
|
723 |
-## paste("corrA.BB", uplate, sep="_"), |
|
724 |
-## paste("corrB.AA", uplate, sep="_")) |
|
725 |
-## pMatrix <- data.frame(matrix(numeric(0), |
|
726 |
-## nrow(object), |
|
727 |
-## length(parameterNames)), |
|
728 |
-## row.names=featureNames(object)) |
|
729 |
-## colnames(pMatrix) <- parameterNames |
|
730 |
-## fD2 <- cbind(fD, pMatrix) |
|
731 |
-## new("AnnotatedDataFrame", data=fD2, |
|
732 |
-## varMetadata=data.frame(labelDescription=colnames(fD2), |
|
733 |
-## row.names=colnames(fD2))) |
|
558 |
+##preprocessOptions <- function(crlmmFile="snpsetObject.rda", |
|
559 |
+## intensityFile="normalizedIntensities.rda", |
|
560 |
+## rgFile="rgFile.rda"){ |
|
561 |
+## |
|
734 | 562 |
##} |
735 | 563 |
|
564 |
+cnOptions <- function(outdir="./", |
|
565 |
+ cdfName, |
|
566 |
+ crlmmFile, |
|
567 |
+ intensityFile, |
|
568 |
+ rgFile="rgFile.rda", |
|
569 |
+ save.it=TRUE, |
|
570 |
+ save.cnset=TRUE, |
|
571 |
+ load.it=TRUE, |
|
572 |
+ splitByChr=TRUE, |
|
573 |
+ MIN.OBS=3, |
|
574 |
+ MIN.SAMPLES=10, |
|
575 |
+ batch=NULL, |
|
576 |
+ DF.PRIOR=50, |
|
577 |
+ bias.adj=FALSE, |
|
578 |
+ prior.prob=rep(1/4, 4), |
|
579 |
+ SNRmin=4, |
|
580 |
+ chromosome=1:24, |
|
581 |
+ seed=123, |
|
582 |
+ verbose=TRUE, |
|
583 |
+ GT.CONF.THR=0.99, |
|
584 |
+ PHI.THR=2^6,##used in nonpolymorphic fxn for training |
|
585 |
+ nHOM.THR=5, ##used in nonpolymorphic fxn for training |
|
586 |
+ MIN.NU=2^3, |
|
587 |
+ MIN.PHI=2^3, |
|
588 |
+ THR.NU.PHI=TRUE, |
|
589 |
+ thresholdCopynumber=TRUE, |
|
590 |
+ unlink=TRUE, |
|
591 |
+ ##hiddenMarkovModel=FALSE, |
|
592 |
+ ##circularBinarySegmentation=FALSE, |
|
593 |
+## cbsOpts=NULL, |
|
594 |
+ ##hmmOpts=NULL, |
|
595 |
+ ...){ |
|
596 |
+ if(missing(cdfName)) stop("must specify cdfName") |
|
597 |
+ if(!file.exists(outdir)){ |
|
598 |
+ message(outdir, " does not exist. Trying to create it.") |
|
599 |
+ dir.create(outdir, recursive=TRUE) |
|
600 |
+ } |
|
601 |
+ stopifnot(isValidCdfName(cdfName)) |
|
602 |
+## if(hiddenMarkovModel){ |
|
603 |
+## hmmOpts <- hmmOptions(...) |
|
604 |
+## } |
|
605 |
+ if(missing(crlmmFile)){ |
|
606 |
+ crlmmFile <- file.path(outdir, "snpsetObject.rda") |
|
607 |
+ } |
|
608 |
+ if(missing(intensityFile)){ |
|
609 |
+ intensityFile <- file.path(outdir, "normalizedIntensities.rda") |
|
610 |
+ } |
|
611 |
+ if(is.null(batch)) |
|
612 |
+ stop("must specify batch -- should be the same length as the number of files") |
|
613 |
+ list(outdir=outdir, |
|
614 |
+ cdfName=cdfName, |
|
615 |
+ crlmmFile=crlmmFile, |
|
616 |
+ intensityFile=intensityFile, |
|
617 |
+ rgFile=file.path(outdir, rgFile), |
|
618 |
+ save.it=save.it, |
|
619 |
+ save.cnset=save.cnset, |
|
620 |
+ load.it=load.it, |
|
621 |
+ splitByChr=splitByChr, |
|
622 |
+ MIN.OBS=MIN.OBS, |
|
623 |
+ MIN.SAMPLES=MIN.SAMPLES, |
|
624 |
+ batch=batch, |
|
625 |
+ DF.PRIOR=DF.PRIOR, |
|
626 |
+ GT.CONF.THR=GT.CONF.THR, |
|
627 |
+ prior.prob=prior.prob, |
|
628 |
+ bias.adj=bias.adj, |
|
629 |
+ SNRmin=SNRmin, |
|
630 |
+ chromosome=chromosome, |
|
631 |
+ seed=seed, |
|
632 |
+ verbose=verbose, |
|
633 |
+ PHI.THR=PHI.THR, |
|
634 |
+ nHOM.THR=nHOM.THR, |
|
635 |
+ MIN.NU=MIN.NU, |
|
636 |
+ MIN.PHI=MIN.PHI, |
|
637 |
+ THR.NU.PHI=THR.NU.PHI, |
|
638 |
+ thresholdCopynumber=thresholdCopynumber, |
|
639 |
+ unlink=unlink) |
|
640 |
+## hiddenMarkovModel=hiddenMarkovModel, |
|
641 |
+## circularBinarySegmentation=circularBinarySegmentation, |
|
642 |
+## cbsOpts=cbsOpts, |
|
643 |
+## hmmOpts=hmmOpts) ##remove SnpSuperSet object |
|
644 |
+} |
|
645 |
+ |
|
646 |
+##linear model parameters |
|
647 |
+lm.parameters <- function(object, cnOptions){ |
|
648 |
+ fD <- fData(object) |
|
649 |
+ batch <- object$batch |
|
650 |
+ uplate <- unique(batch) |
|
651 |
+ parameterNames <- c(paste("tau2A", uplate, sep="_"), |
|
652 |
+ paste("tau2B", uplate, sep="_"), |
|
653 |
+ paste("sig2A", uplate, sep="_"), |
|
654 |
+ paste("sig2B", uplate, sep="_"), |
|
655 |
+ paste("nuA", uplate, sep="_"), |
|
656 |
+ paste("nuA.se", uplate, sep="_"), |
|
657 |
+ paste("nuB", uplate, sep="_"), |
|
658 |
+ paste("nuB.se", uplate, sep="_"), |
|
659 |
+ paste("phiA", uplate, sep="_"), |
|
660 |
+ paste("phiA.se", uplate, sep="_"), |
|
661 |
+ paste("phiB", uplate, sep="_"), |
|
662 |
+ paste("phiB.se", uplate, sep="_"), |
|
663 |
+ paste("phiAX", uplate, sep="_"), |
|
664 |
+ paste("phiBX", uplate, sep="_"), |
|
665 |
+ paste("corr", uplate, sep="_"), |
|
666 |
+ paste("corrA.BB", uplate, sep="_"), |
|
667 |
+ paste("corrB.AA", uplate, sep="_")) |
|
668 |
+ pMatrix <- data.frame(matrix(numeric(0), |
|
669 |
+ nrow(object), |
|
670 |
+ length(parameterNames)), |
|
671 |
+ row.names=featureNames(object)) |
|
672 |
+ colnames(pMatrix) <- parameterNames |
|
673 |
+ fD2 <- cbind(fD, pMatrix) |
|
674 |
+ new("AnnotatedDataFrame", data=fD2, |
|
675 |
+ varMetadata=data.frame(labelDescription=colnames(fD2), |
|
676 |
+ row.names=colnames(fD2))) |
|
677 |
+} |
|
678 |
+ |
|
736 | 679 |
nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
737 |
- chromosome <- cnOptions[["chromosome"]] |
|
738 | 680 |
batch <- unique(object$batch) |
739 | 681 |
CHR <- unique(chromosome(object)) |
740 |
- verbose <- cnOptions[["verbose"]] |
|
741 |
- if(CHR != chromosome[1]) verbose <- FALSE |
|
742 |
- if(batch != unique(cnOptions[["batch"]])[1]) verbose <- FALSE |
|
743 | 682 |
goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){ |
744 | 683 |
Ns <- tmp.objects[["Ns"]] |
745 | 684 |
##Ns <- get("Ns", envir) |
... | ... |
@@ -865,9 +804,10 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
865 | 804 |
|
866 | 805 |
THR.NU.PHI <- cnOptions$THR.NU.PHI |
867 | 806 |
if(THR.NU.PHI){ |
807 |
+ verbose <- cnOptions$verbose |
|
868 | 808 |
##Assign values to object |
869 | 809 |
object <- pr(object, "nuA", batch, nuA) |
870 |
- object <- pr(object, "phiA", batch, phiA) |
|
810 |
+ object <- pr(object, "phiA", batch, phiA) |
|
871 | 811 |
if(verbose) message("Thresholding nu and phi") |
872 | 812 |
object <- thresholdModelParams(object, cnOptions) |
873 | 813 |
} else { |
... | ... |
@@ -885,6 +825,7 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
885 | 825 |
|
886 | 826 |
THR.NU.PHI <- cnOptions$THR.NU.PHI |
887 | 827 |
if(THR.NU.PHI){ |
828 |
+ verbose <- cnOptions$verbose |
|
888 | 829 |
##Assign values to object |
889 | 830 |
object <- pr(object, "nuA", batch, nuA) |
890 | 831 |
object <- pr(object, "phiA", batch, phiA) |
... | ... |
@@ -905,27 +846,6 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
905 | 846 |
return(object) |
906 | 847 |
} |
907 | 848 |
|
908 |
-nonpolymorphic.poe <- function(object, cnOptions, tmp.object){ |
|
909 |
- require(metaArray) |
|
910 |
- nps <- log2(A(object)[!isSnp(object), ]) |
|
911 |
- nps <- (nps-rowMedians(nps))/rowMAD(nps) |
|
912 |
- runAvg <- apply(nps, 2, myfilter, filter=rep(1/10, 10)) |
|
913 |
- rownames(runAvg) <- featureNames(object)[!isSnp(object)] |
|
914 |
- rm.nas <- rowSums(is.na(runAvg)) == 0 |
|
915 |
- runAvg <- runAvg[rm.nas, ] |
|
916 |
- |
|
917 |
- poe.scale <- poe.em(runAvg, cl=rep(0, ncol(nps)))$data |
|
918 |
- pinegg <- piposg <- poe.scale |
|
919 |
- piposg[piposg < 0] <- 0 |
|
920 |
- pinegg[pinegg > 0] <- 0 |
|
921 |
- pinegg <- pinegg*-1 |
|
922 |
- pm.em <- 1*pinegg + 2*(1-pinegg-piposg) + 3*piposg |
|
923 |
- rownames(pm.em) <- rownames(runAvg) |
|
924 |
- CA(object)[match(rownames(pm.em), featureNames(object)), ] <- pm.em |
|
925 |
- ##CA(object)[!isSnp(object), ] <- pm.em |
|
926 |
- return(object) |
|
927 |
-} |
|
928 |
- |
|
929 | 849 |
##sufficient statistics on the intensity scale |
930 | 850 |
withinGenotypeMoments <- function(object, cnOptions, tmp.objects){ |
931 | 851 |
normal <- tmp.objects[["normal"]] |
... | ... |
@@ -1018,12 +938,12 @@ withinGenotypeMoments <- function(object, cnOptions, tmp.objects){ |
1018 | 938 |
return(tmp.objects) |
1019 | 939 |
} |