git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44778 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -385,3 +385,19 @@ is stored in the assayData slot. |
385 | 385 |
|
386 | 386 |
2010-02-07 B. Carvalho - committed version 1.5.25 |
387 | 387 |
* Updated requirement of affyio to support Win64 |
388 |
+ |
|
389 |
+2010-02-20 R. Scharpf - committed version 1.5.25 |
|
390 |
+ |
|
391 |
+** begin adding support for ff |
|
392 |
+ |
|
393 |
+** added use.poe option for nonpolymorphic loci |
|
394 |
+ |
|
395 |
+** crlmm does not return anything, but saves the following objects: |
|
396 |
+ - genotypes.rda |
|
397 |
+ - confs.rda |
|
398 |
+ - snp_rmaResult.rda (gns, sns, SNR, SKW, mixtureParams, cdfName) |
|
399 |
+ - A.rda |
|
400 |
+ - B.rda |
|
401 |
+ - cn_rmaResult.rda (SKW) |
|
402 |
+ |
|
403 |
+ |
... | ... |
@@ -1,17 +1,17 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.5.24 |
|
5 |
-Date: 2010-02-05 |
|
4 |
+Version: 1.5.25 |
|
5 |
+Date: 2010-02-20 |
|
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms |
9 | 9 |
License: Artistic-2.0 |
10 |
-Depends: methods, Biobase (>= 2.7.2), R (>= 2.11.0), oligoClasses (>= 1.9.21) |
|
10 |
+Depends: R (>= 2.11.0), methods, Biobase (>= 2.7.2), oligoClasses (>= 1.9.28) |
|
11 | 11 |
Imports: affyio (>= 1.15.2), preprocessCore, utils, stats, genefilter, splines, mvtnorm, ellipse, SNPchip |
12 |
-Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix |
|
12 |
+Enhances: ff |
|
13 |
+Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, metaArray |
|
13 | 14 |
Collate: AllGenerics.R |
14 |
- methods-AlleleSet.R |
|
15 | 15 |
methods-CNSet.R |
16 | 16 |
methods-eSet.R |
17 | 17 |
methods-SnpSuperSet.R |
... | ... |
@@ -1,3 +1,4 @@ |
1 |
+## crlmm NAMESPACE |
|
1 | 2 |
useDynLib("crlmm", .registration=TRUE) |
2 | 3 |
|
3 | 4 |
## Biobase |
... | ... |
@@ -23,7 +24,7 @@ importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet) |
23 | 24 |
|
24 | 25 |
importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs, |
25 | 26 |
"confs<-", cnConfidence, "cnConfidence<-", |
26 |
- isSnp, chromosome, position, CA, "CA<-", CB, "CB<-") |
|
27 |
+ isSnp, chromosome, position, CA, "CA<-", CB, "CB<-", A, B) |
|
27 | 28 |
|
28 | 29 |
|
29 | 30 |
importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles) |
... | ... |
@@ -49,14 +50,14 @@ importFrom(mvtnorm, dmvnorm) |
49 | 50 |
|
50 | 51 |
importFrom(ellipse, ellipse) |
51 | 52 |
|
52 |
-exportMethods(A, B, copyNumber) |
|
53 |
+exportMethods(copyNumber) |
|
53 | 54 |
export(cnOptions, crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) |
54 | 55 |
|
55 |
-##export(##viterbi.CNSet, ##move to VanillaICE |
|
56 |
-## combineIntensities, |
|
57 |
-## whichPlatform, |
|
58 |
-## isValidCdfName, |
|
59 |
-## crlmmWrapper, |
|
60 |
-## withinGenotypeMoments, |
|
61 |
-## locationAndScale, getParam.SnpSuperSet) |
|
62 |
-export(thresholdModelParams, computeCopynumber.CNSet, nuphiAllele, coefs, biasAdjNP) |
|
56 |
+##export everything that does not start with a . |
|
57 |
+##exportPattern("^[^\\.]") |
|
58 |
+ |
|
59 |
+##export(thresholdModelParams, computeCopynumber.CNSet, nuphiAllele, coefs, biasAdjNP, |
|
60 |
+## nonpolymorphic.poe, crlmmWrapper, |
|
61 |
+## loadIlluminaCallSet, loadIlluminaRG, loadIlluminaCnrma, |
|
62 |
+## cnrma, |
|
63 |
+## crlmmGT, oneBatch) |
... | ... |
@@ -1,9 +1,7 @@ |
1 |
-setGeneric("A", function(object) standardGeneric("A")) |
|
2 |
-setGeneric("B", function(object) standardGeneric("B")) |
|
3 | 1 |
##setGeneric("A<-", function(object, value) standardGeneric("A<-")) |
4 | 2 |
##setGeneric("B<-", function(object, value) standardGeneric("B<-")) |
5 | 3 |
|
6 |
-setGeneric("getParam", function(object, name, batch) standardGeneric("getParam")) |
|
4 |
+setGeneric("getParam", function(object, name, ...) standardGeneric("getParam")) |
|
7 | 5 |
setGeneric("cnIndex", function(object) standardGeneric("cnIndex")) |
8 | 6 |
setGeneric("cnNames", function(object) standardGeneric("cnNames")) |
9 | 7 |
setGeneric("computeCopynumber", function(object, cnOptions) standardGeneric("computeCopynumber")) |
... | ... |
@@ -153,148 +153,387 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){ |
153 | 153 |
return(gender) |
154 | 154 |
} |
155 | 155 |
|
156 |
-combineIntensities <- function(res, NP=NULL, callSet){ |
|
157 |
- rownames(res$B) <- rownames(res$A) <- res$gns |
|
158 |
- colnames(res$B) <- colnames(res$A) <- res$sns |
|
159 |
- if(!is.null(NP)){ |
|
160 |
- blank <- matrix(NA, nrow(NP), ncol(NP)) |
|
161 |
- dimnames(blank) <- dimnames(NP) |
|
162 |
- A <- rbind(res$A, NP) |
|
163 |
- B <- rbind(res$B, blank) |
|
156 |
+##crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
157 |
+## if(!missing(object)){ |
|
158 |
+## stopifnot(class(object) == "CNSet") |
|
159 |
+## createIntermediateObjects <- FALSE |
|
160 |
+## } else { |
|
161 |
+## createIntermediateObjects <- TRUE |
|
162 |
+## ## 33G for 1239 files |
|
163 |
+## crlmmResults <- crlmmWrapper(filenames, cnOptions, ...) |
|
164 |
+## snprmaResult <- crlmmResults[["snprmaResult"]] |
|
165 |
+## cnrmaResult <- crlmmResults[["cnrmaResult"]] |
|
166 |
+## callSet <- crlmmResults[["callSet"]] |
|
167 |
+## rm(crlmmResults); gc() |
|
168 |
+## annotation(callSet) <- cnOptions[["cdfName"]] |
|
169 |
+## stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]])) |
|
170 |
+## path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep="")) |
|
171 |
+## load(file.path(path, "snpProbes.rda")) |
|
172 |
+## snpProbes <- get("snpProbes") |
|
173 |
+## load(file.path(path, "cnProbes.rda")) |
|
174 |
+## cnProbes <- get("cnProbes") |
|
175 |
+## k <- grep("chr", colnames(snpProbes)) |
|
176 |
+## if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)") |
|
177 |
+## } |
|
178 |
+## set.seed(cnOptions[["seed"]]) ##for reproducibility |
|
179 |
+## cnFile <- cnOptions[["cnFile"]] |
|
180 |
+## chromosome <- cnOptions[["chromosome"]] |
|
181 |
+## SNRmin <- cnOptions[["SNRmin"]] |
|
182 |
+## for(CHR in chromosome){ |
|
183 |
+## cat("Chromosome ", CHR, "\n") |
|
184 |
+## if(createIntermediateObjects){ |
|
185 |
+## snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
|
186 |
+## cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
187 |
+## index.snps <- match(snps, featureNames(callSet)) |
|
188 |
+## index.nps <- match(cnps, rownames(cnrmaResult[["NP"]])) |
|
189 |
+## if(!is.null(cnrmaResult)){ |
|
190 |
+## npI <- cnrmaResult$NP[index.nps, ] |
|
191 |
+## } else npI <- NULL |
|
192 |
+## snpI <- list(A=snprmaResult$A[index.snps, ], |
|
193 |
+## B=snprmaResult$B[index.snps, ], |
|
194 |
+## sns=snprmaResult$sns, |
|
195 |
+## gns=snprmaResult$gns[index.snps], |
|
196 |
+## SNR=snprmaResult$SNR, |
|
197 |
+## SKW=snprmaResult$SKW, |
|
198 |
+## mixtureParams=snprmaResult$mixtureParams, |
|
199 |
+## cdfName=snprmaResult$cdfName) |
|
200 |
+## cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
|
201 |
+## cnSet <- combineIntensities(res=snpI, |
|
202 |
+## NP=npI, |
|
203 |
+## callSet=callSet[index.snps, ]) |
|
204 |
+## if(any(cnSet$SNR > SNRmin)){ |
|
205 |
+## message(paste("Excluding samples with SNR < ", SNRmin)) |
|
206 |
+## cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
|
207 |
+## } |
|
208 |
+## rm(snpI, npI, snps, cnps, index.snps, index.nps); gc() |
|
209 |
+## pData(cnSet)$batch <- cnOptions[["batch"]] |
|
210 |
+## featureData(cnSet) <- lm.parameters(cnSet, cnOptions) |
|
211 |
+## } else { |
|
212 |
+## cnSet <- object |
|
213 |
+## } |
|
214 |
+## if(CHR != 24){ |
|
215 |
+## ## 60G for 1239 files |
|
216 |
+## cnSet <- computeCopynumber(cnSet, cnOptions) |
|
217 |
+## } else{ |
|
218 |
+## message("Copy number estimates not available for chromosome Y. Saving only the 'callSet' object for this chromosome") |
|
219 |
+## alleleSet <- cnSet |
|
220 |
+## save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep=""))) |
|
221 |
+## rm(cnSet, alleleSet); gc() |
|
222 |
+## next() |
|
223 |
+## } |
|
224 |
+## if(length(chromosome) == 1){ |
|
225 |
+## if(cnOptions[["save.cnset"]]){ |
|
226 |
+## save(cnSet, file=file.path(paste(cnFile, "_", CHR, ".rda", sep=""))) |
|
227 |
+## ##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
228 |
+## } |
|
229 |
+## } |
|
230 |
+## if(length(chromosome) > 1){ |
|
231 |
+## message(paste("Saving ", file.path(cnOptions[["outdir"]], paste(cnFile, "_", CHR, ".rda", sep="")))) |
|
232 |
+## save(cnSet, file=file.path(paste(cnFile, "_", CHR, ".rda", sep=""))) |
|
233 |
+## ##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
234 |
+## rm(cnSet); gc() |
|
235 |
+## } else { |
|
236 |
+## return(cnSet) |
|
237 |
+## } |
|
238 |
+## } |
|
239 |
+## saved.objects <- list.files(cnOptions[["outdir"]], pattern=cnFile, full.names=TRUE) |
|
240 |
+## return(saved.objects) |
|
241 |
+##} |
|
242 |
+ |
|
243 |
+ |
|
244 |
+ |
|
245 |
+crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
246 |
+ cnOpts <- cnOptions |
|
247 |
+ batchSize <- cnOptions[["crlmmBatchSize"]] |
|
248 |
+ batch <- cnOptions[["batch"]] |
|
249 |
+ outdir <- cnOptions[["outdir"]] |
|
250 |
+ ##if necessary, split plates in separate batches |
|
251 |
+ if(length(filenames) > batchSize){ |
|
252 |
+ L <- length(table(batch)) |
|
253 |
+ message("Processing the files in") |
|
254 |
+ batchList <- split(names(table(batch)), rep(1:L, each=10, length.out=L)) |
|
255 |
+ for(i in seq(along=batchList)){ |
|
256 |
+ index <- as.character(batch) %in% as.character(batchList[[i]]) |
|
257 |
+ fns <- filenames[index] |
|
258 |
+ cnOpts[["batch"]] <- cnOptions[["batch"]][index] |
|
259 |
+ cnOpts[["AFile"]] <- paste(outdir, "/", paste(cnOptions[["AFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
260 |
+ cnOpts[["BFile"]] <- paste(outdir, "/", paste(cnOptions[["BFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
261 |
+ cnOpts[["callsFile"]] <- paste(outdir, "/", paste(cnOptions[["callsFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
262 |
+ cnOpts[["confsFile"]] <- paste(outdir, "/", paste(cnOptions[["confsFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
263 |
+ cnOpts[["snprmaFile"]] <- paste(outdir, "/", paste(cnOptions[["snprmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
264 |
+ cnOpts[["cnrmaFile"]] <- paste(outdir, "/", paste(cnOptions[["cnrmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
265 |
+ cnOpts[["cnFile"]] <- file.path(outdir, paste(cnOptions[["cnFile"]], i, sep="_crlmmBatch")) |
|
266 |
+ cnOpts[["rgFile"]] <- file.path(outdir, paste(cnOptions[["rgFile"]], i, sep="_crlmmBatch")) |
|
267 |
+ crlmmCopynumber.crlmmBatch(fns, cnOpts, object, ...) |
|
268 |
+ } |
|
164 | 269 |
} else { |
165 |
- A <- res$A |
|
166 |
- B <- res$B |
|
270 |
+ cnOpts[["AFile"]] <- paste(outdir, "/", cnOptions[["AFile"]], ".rda", sep="") |
|
271 |
+ cnOpts[["BFile"]] <- paste(outdir, "/", cnOptions[["BFile"]], ".rda", sep="") |
|
272 |
+ cnOpts[["callsFile"]] <- paste(outdir, "/", cnOptions[["callsFile"]], ".rda", sep="") |
|
273 |
+ cnOpts[["confsFile"]] <- paste(outdir, "/", cnOptions[["confsFile"]], ".rda", sep="") |
|
274 |
+ cnOpts[["snprmaFile"]] <- paste(outdir, "/",cnOptions[["snprmaFile"]], ".rda", sep="") |
|
275 |
+ cnOpts[["cnrmaFile"]] <- paste(outdir, "/", cnOptions[["cnrmaFile"]], ".rda", sep="") |
|
276 |
+ cnOpts[["cnFile"]] <- file.path(outdir, cnOptions[["cnFile"]]) |
|
277 |
+ cnOpts[["rgFile"]] <- file.path(outdir, cnOptions[["rgFile"]]) |
|
278 |
+ crlmmCopynumber.crlmmBatch(filenames, cnOpts, object, ...) |
|
167 | 279 |
} |
168 |
- dimnames(B) <- dimnames(A) |
|
169 |
- index.snps <- match(res$gns, rownames(A)) |
|
170 |
- callsConfs <- calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)) |
|
171 |
- |
|
172 |
- calls[index.snps, ] <- calls(callSet) |
|
173 |
- callsConfs[index.snps, ] <- assayData(callSet)[["callProbability"]] |
|
174 |
- fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet)))) |
|
175 |
- fd[index.snps, ] <- fData(callSet) |
|
176 |
- rownames(fd) <- rownames(A) |
|
177 |
- colnames(fd) <- fvarLabels(callSet) |
|
178 |
- fD <- new("AnnotatedDataFrame", |
|
179 |
- data=data.frame(fd), |
|
180 |
- varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd))) |
|
181 |
- superSet <- new("CNSet", |
|
182 |
- CA=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)), |
|
183 |
- CB=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)), |
|
184 |
- alleleA=A, |
|
185 |
- alleleB=B, |
|
186 |
- call=calls, |
|
187 |
- callProbability=callsConfs, |
|
188 |
- featureData=fD, |
|
189 |
- phenoData=phenoData(callSet), |
|
190 |
- experimentData=experimentData(callSet), |
|
191 |
- protocolData=protocolData(callSet), |
|
192 |
- annotation=annotation(callSet)) |
|
193 |
- return(superSet) |
|
194 | 280 |
} |
195 | 281 |
|
196 |
-harmonizeDimnamesTo <- function(object1, object2){ |
|
197 |
- #object2 should be a subset of object 1 |
|
198 |
- object2 <- object2[featureNames(object2) %in% featureNames(object1), ] |
|
199 |
- object1 <- object1[match(featureNames(object2), featureNames(object1)), ] |
|
200 |
- object1 <- object1[, match(sampleNames(object2), sampleNames(object1))] |
|
201 |
- stopifnot(all.equal(featureNames(object1), featureNames(object2))) |
|
202 |
- stopifnot(all.equal(sampleNames(object1), sampleNames(object2))) |
|
203 |
- return(object1) |
|
204 |
-} |
|
205 | 282 |
|
206 |
-crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
283 |
+crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){ |
|
284 |
+ set.seed(cnOptions[["seed"]]) ##for reproducibility |
|
207 | 285 |
if(!missing(object)){ |
208 | 286 |
stopifnot(class(object) == "CNSet") |
209 | 287 |
createIntermediateObjects <- FALSE |
210 | 288 |
} else { |
211 | 289 |
createIntermediateObjects <- TRUE |
212 |
- crlmmResults <- crlmmWrapper(filenames, cnOptions, ...) |
|
213 |
- snprmaResult <- crlmmResults[["snprmaResult"]] |
|
214 |
- cnrmaResult <- crlmmResults[["cnrmaResult"]] |
|
215 |
- callSet <- crlmmResults[["callSet"]] |
|
216 |
- rm(crlmmResults); gc() |
|
217 |
- annotation(callSet) <- cnOptions[["cdfName"]] |
|
218 |
- stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]])) |
|
219 |
- path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep="")) |
|
220 |
- load(file.path(path, "snpProbes.rda")) |
|
221 |
- snpProbes <- get("snpProbes") |
|
222 |
- load(file.path(path, "cnProbes.rda")) |
|
223 |
- cnProbes <- get("cnProbes") |
|
224 |
- k <- grep("chr", colnames(snpProbes)) |
|
225 |
- if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)") |
|
290 |
+ ## 33G for 1239 files |
|
291 |
+ cnOptions[["save.it"]] <- TRUE |
|
292 |
+ ##crlmmResults <- crlmmWrapper(filenames, cnOptions, ...) |
|
293 |
+ crlmmWrapper(filenames, cnOptions, ...) |
|
226 | 294 |
} |
227 |
- set.seed(cnOptions[["seed"]]) ##for reproducibility |
|
295 |
+ load.it <- cnOptions[["load.it"]] |
|
296 |
+ outdir <- cnOptions[["outdir"]] |
|
297 |
+ snprmaFile <- cnOptions[["snprmaFile"]] |
|
298 |
+ cnFile <- cnOptions[["cnFile"]] |
|
299 |
+ cnrmaFile <- cnOptions[["cnrmaFile"]] |
|
300 |
+ callsFile <- cnOptions[["callsFile"]] |
|
301 |
+ confsFile <- cnOptions[["confsFile"]] |
|
302 |
+ AFile <- cnOptions[["AFile"]] |
|
303 |
+ BFile <- cnOptions[["BFile"]] |
|
304 |
+ path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep="")) |
|
305 |
+ load(file.path(path, "snpProbes.rda")) |
|
306 |
+ snpProbes <- get("snpProbes") |
|
307 |
+ load(file.path(path, "cnProbes.rda")) |
|
308 |
+ cnProbes <- get("cnProbes") |
|
309 |
+ k <- grep("chr", colnames(snpProbes)) |
|
310 |
+ message("Attaching quantile-normalized intensities...") |
|
311 |
+ ##cwd <- getwd() |
|
312 |
+ ##setwd(dirname(snprmaFile[1])) |
|
313 |
+ load(AFile) |
|
314 |
+ if(isPackageLoaded("ff")) open(A) |
|
315 |
+ load(BFile) |
|
316 |
+ if(isPackageLoaded("ff")) open(B) |
|
317 |
+ message("Loading genotype calls...") |
|
318 |
+ load(callsFile) |
|
319 |
+ calls <- get("calls") |
|
320 |
+ if(isPackageLoaded("ff")) open(calls) |
|
321 |
+ load(confsFile) |
|
322 |
+ confs <- get("confs") |
|
323 |
+ if(isPackageLoaded("ff")) open(confs) |
|
324 |
+ fns <- rownames(A) |
|
325 |
+ nr <- nrow(A); nc <- ncol(A) |
|
326 |
+ if(isPackageLoaded("ff")){ |
|
327 |
+ ##initialize CA, CB |
|
328 |
+ if(!file.exists(file.path(outdir, "CA.rda"))){ |
|
329 |
+ CA <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A)) |
|
330 |
+ |
|
331 |
+ } else{ |
|
332 |
+ load(file.path(outdir, "CA.rda")) |
|
333 |
+ if(!all(dim(CA) == dim(A))) |
|
334 |
+ CA <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A), overwrite=TRUE) |
|
335 |
+ open(CA) |
|
336 |
+ } |
|
337 |
+ if(!file.exists(file.path(outdir, "CB.rda"))){ |
|
338 |
+ CB <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A)) |
|
339 |
+ } else{ |
|
340 |
+ load(file.path(outdir, "CB.rda")) |
|
341 |
+ if(!all(dim(CB) == dim(A))) |
|
342 |
+ CB <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A), overwrite=TRUE) |
|
343 |
+ open(CB) |
|
344 |
+ } |
|
345 |
+ } |
|
346 |
+ ##cnFile <- cnOptions[["cnFile"]] |
|
228 | 347 |
chromosome <- cnOptions[["chromosome"]] |
229 | 348 |
SNRmin <- cnOptions[["SNRmin"]] |
349 |
+ ll <- 1 |
|
350 |
+ scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate)) |
|
351 |
+ rownames(scanDates) <- basename(rownames(scanDates)) |
|
352 |
+ protocoldata <- new("AnnotatedDataFrame", |
|
353 |
+ data=scanDates, |
|
354 |
+ varMetadata=data.frame(labelDescription=colnames(scanDates), |
|
355 |
+ row.names=colnames(scanDates))) |
|
356 |
+ load(file.path(cnOptions[["outdir"]], "sampleStats.rda")) |
|
357 |
+ sampleStats <- data.frame(sampleStats, batch=cnOptions[["batch"]]) |
|
358 |
+ pD <- new("AnnotatedDataFrame", |
|
359 |
+ data=sampleStats, |
|
360 |
+ varMetadata=data.frame(labelDescription=colnames(sampleStats))) |
|
361 |
+ batch <- cnOptions[["batch"]] |
|
230 | 362 |
for(CHR in chromosome){ |
363 |
+ snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
|
364 |
+ cns <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
365 |
+ index.snps <- match(snps, rownames(A)) |
|
366 |
+ index.cn <- match(cns, rownames(A)) |
|
367 |
+ row.index <- c(index.snps, index.cn) |
|
231 | 368 |
cat("Chromosome ", CHR, "\n") |
232 |
- if(createIntermediateObjects){ |
|
233 |
- snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
|
234 |
- cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
235 |
- index.snps <- match(snps, featureNames(callSet)) |
|
236 |
- index.nps <- match(cnps, rownames(cnrmaResult[["NP"]])) |
|
237 |
- if(!is.null(cnrmaResult)){ |
|
238 |
- npI <- cnrmaResult$NP[index.nps, ] |
|
239 |
- } else npI <- NULL |
|
240 |
- snpI <- list(A=snprmaResult$A[index.snps, ], |
|
241 |
- B=snprmaResult$B[index.snps, ], |
|
242 |
- sns=snprmaResult$sns, |
|
243 |
- gns=snprmaResult$gns[index.snps], |
|
244 |
- SNR=snprmaResult$SNR, |
|
245 |
- SKW=snprmaResult$SKW, |
|
246 |
- mixtureParams=snprmaResult$mixtureParams, |
|
247 |
- cdfName=snprmaResult$cdfName) |
|
248 |
- cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
|
249 |
- cnSet <- combineIntensities(res=snpI, |
|
250 |
- NP=npI, |
|
251 |
- callSet=callSet[index.snps, ]) |
|
369 |
+ for(i in seq(along=unique(batch))){ |
|
370 |
+ PLATE <- unique(batch)[i] |
|
371 |
+ sample.index <- which(batch==PLATE) |
|
372 |
+ ##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
|
373 |
+ if(isPackageLoaded("ff")){ |
|
374 |
+ ca <- CA[row.index, sample.index] |
|
375 |
+ cb <- CB[row.index, sample.index] |
|
376 |
+ } else{ |
|
377 |
+ dns <- dimnames(A[row.index, sample.index]) |
|
378 |
+ cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index), dimnames=dns) |
|
379 |
+ } |
|
380 |
+ cnSet <- new("CNSet", |
|
381 |
+ alleleA=A[row.index, sample.index], |
|
382 |
+ alleleB=B[row.index, sample.index], |
|
383 |
+ call=calls[row.index, sample.index], |
|
384 |
+ callProbability=confs[row.index, sample.index], |
|
385 |
+ CA=ca, |
|
386 |
+ CB=cb, |
|
387 |
+ featureData=annotatedDataFrameFrom(A[row.index, sample.index], byrow=TRUE), |
|
388 |
+ phenoData=pD[sample.index, ], |
|
389 |
+ protocolData=protocoldata[sample.index, ], |
|
390 |
+ annotation=cnOptions[["cdfName"]]) |
|
252 | 391 |
if(any(cnSet$SNR > SNRmin)){ |
253 | 392 |
message(paste("Excluding samples with SNR < ", SNRmin)) |
254 | 393 |
cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
255 | 394 |
} |
256 |
- rm(snpI, npI, snps, cnps, index.snps, index.nps); gc() |
|
257 |
- pData(cnSet)$batch <- cnOptions[["batch"]] |
|
395 |
+ ##rm(snpI, NP, gtSet); gc() |
|
258 | 396 |
featureData(cnSet) <- lm.parameters(cnSet, cnOptions) |
259 |
- } else { |
|
260 |
- cnSet <- object |
|
261 |
- } |
|
262 |
- if(CHR != 24){ |
|
397 |
+ } |
|
398 |
+ if(CHR != 24){ |
|
399 |
+ ## 60G for 1239 files |
|
263 | 400 |
cnSet <- computeCopynumber(cnSet, cnOptions) |
401 |
+ if(isPackageLoaded("ff")){ |
|
402 |
+ CA[row.index, sample.index] <- cnSet@assayData[["CA"]] |
|
403 |
+ CB[row.index, sample.index] <- cnSet@assayData[["CB"]] |
|
404 |
+ } |
|
264 | 405 |
} else{ |
265 | 406 |
message("Copy number estimates not available for chromosome Y. Saving only the 'callSet' object for this chromosome") |
266 | 407 |
alleleSet <- cnSet |
267 |
- save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep=""))) |
|
268 |
- rm(cnSet, alleleSet); gc() |
|
408 |
+ rm(cnSet) ; gc() |
|
409 |
+ save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", PLATE, "_", CHR, ".rda", sep=""))) |
|
410 |
+ rm(alleleSet); gc() |
|
269 | 411 |
next() |
270 | 412 |
} |
271 | 413 |
if(length(chromosome) == 1){ |
272 | 414 |
if(cnOptions[["save.cnset"]]){ |
273 |
- save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
415 |
+ save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
274 | 416 |
} |
275 | 417 |
} |
276 | 418 |
if(length(chromosome) > 1){ |
277 |
- save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
419 |
+ save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
420 |
+ ##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
278 | 421 |
rm(cnSet); gc() |
279 | 422 |
} else { |
423 |
+ if(isPackageLoaded("ff")) { |
|
424 |
+ close(CA); close(CB) |
|
425 |
+ save(CA, file=file.path(cnOptions[["outdir"]], "CA.rda")) |
|
426 |
+ save(CB, file=file.path(cnOptions[["outdir"]], "CB.rda")) |
|
427 |
+ } |
|
280 | 428 |
return(cnSet) |
281 | 429 |
} |
282 | 430 |
} |
283 |
- saved.objects <- list.files(cnOptions[["outdir"]], pattern="cnSet", full.names=TRUE) |
|
284 |
- return(saved.objects) |
|
431 |
+ if(isPackageLoaded("ff")) { |
|
432 |
+ close(CA); close(CB) |
|
433 |
+ save(CA, file=file.path(cnOptions[["outdir"]], "CA.rda")) |
|
434 |
+ save(CB, file=file.path(cnOptions[["outdir"]], "CB.rda")) |
|
435 |
+ } |
|
285 | 436 |
} |
286 | 437 |
|
287 | 438 |
|
439 |
+loadIlluminaRG <- function(rgFile, crlmmFile, load.it, save.it,...){ |
|
440 |
+## if(missing(rgFile)){ |
|
441 |
+## ##stop("must specify 'rgFile'.") |
|
442 |
+## rgFile <- file.path(dirname(crlmmFile), "rgFile.rda") |
|
443 |
+## message("rgFile not specified. Using ", rgFile) |
|
444 |
+## } |
|
445 |
+ if(!load.it){ |
|
446 |
+ RG <- readIdatFiles(...) |
|
447 |
+ if(save.it) save(RG, file=rgFile) |
|
448 |
+ } |
|
449 |
+ if(load.it & !file.exists(rgFile)){ |
|
450 |
+ message("load.it is TRUE, but rgFile not present. Attempting to read the idatFiles.") |
|
451 |
+ RG <- readIdatFiles(...) |
|
452 |
+ if(save.it) save(RG, file=rgFile) |
|
453 |
+ } |
|
454 |
+ if(load.it & file.exists(rgFile)){ |
|
455 |
+ message("Loading RG file") |
|
456 |
+ load(rgFile) |
|
457 |
+ RG <- get("RG") |
|
458 |
+ } |
|
459 |
+ return(RG) |
|
460 |
+} |
|
461 |
+ |
|
462 |
+loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdfName){ |
|
463 |
+ if(!file.exists(crlmmFile) | !load.it){ |
|
464 |
+ callSet <- crlmmIllumina(RG=RG, |
|
465 |
+ cdfName=cdfName, |
|
466 |
+ sns=sampleNames(RG), |
|
467 |
+ returnParams=TRUE, |
|
468 |
+ save.it=TRUE, |
|
469 |
+ intensityFile=snprmaFile) |
|
470 |
+ if(save.it) save(callSet, file=crlmmFile) |
|
471 |
+ } else { |
|
472 |
+ message("Loading ", crlmmFile, "...") |
|
473 |
+ load(crlmmFile) |
|
474 |
+ callSet <- get("callSet") |
|
475 |
+ } |
|
476 |
+ protocolData(callSet) <- protocolData(RG) |
|
477 |
+ return(callSet) |
|
478 |
+} |
|
479 |
+ |
|
480 |
+ |
|
481 |
+##loadAffyCallSet <- function(filenames, confsFile, callsFile, snprmaFile, load.it, save.it, cdfName){ |
|
482 |
+## |
|
483 |
+#### if(save.it){ |
|
484 |
+#### message("Saving callSet to", callsFile) |
|
485 |
+#### ##save(callSet, cnrmaResult, file=callsFile) |
|
486 |
+#### save(callSet, file=callsFile) |
|
487 |
+#### } |
|
488 |
+#### } else { |
|
489 |
+#### message("Loading ", callsFile, "...") |
|
490 |
+#### ##load(snprmaFile) |
|
491 |
+#### load(callsFile) |
|
492 |
+#### callSet <- get("callSet") |
|
493 |
+#### ##cnrmaResult <- get("cnrmaResult") |
|
494 |
+#### } |
|
495 |
+## |
|
496 |
+##} |
|
497 |
+## if(platform=="affymetrix") { |
|
498 |
+## protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames)) |
|
499 |
+## sampleNames(protocolData(callSet)) <- sampleNames(callSet) |
|
500 |
+## } |
|
501 |
+ |
|
502 |
+##loadAffyCnrma <- function(filenames, cnrmaFile, cdfName, outdir, load.it, save.it, use.bigmemory=FALSE){ |
|
503 |
+## if(!file.exists(cnrmaFile) | !load.it){ |
|
504 |
+## message("Quantile normalizing the copy number probes...") |
|
505 |
+## cnrmaResult <- cnrma2(filenames=filenames, cdfName=cdfName, outdir=outdir, cnrmaFile=cnrmaFile) |
|
506 |
+## } else cnrmaResult <- attach.big.matrix("NP.desc", outdir) |
|
507 |
+## return(cnrmaResult) |
|
508 |
+##} |
|
509 |
+ |
|
510 |
+loadIlluminaCnrma <- function(){ |
|
511 |
+ if(exists("cnAB")){ |
|
512 |
+ np.A <- as.integer(cnAB$A) |
|
513 |
+ np.B <- as.integer(cnAB$B) |
|
514 |
+ np <- ifelse(np.A > np.B, np.A, np.B) |
|
515 |
+ np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A)) |
|
516 |
+ rownames(np) <- cnAB$gns |
|
517 |
+ colnames(np) <- cnAB$sns |
|
518 |
+ cnAB$NP <- np |
|
519 |
+ ##sampleNames(callSet) <- res$sns |
|
520 |
+ sampleNames(callSet) <- cnAB$sns |
|
521 |
+ cnrmaResult <- get("cnAB") |
|
522 |
+ } else cnrmaResult <- NULL |
|
523 |
+ return(cnrmaResult) |
|
524 |
+} |
|
288 | 525 |
|
289 | 526 |
crlmmWrapper <- function(filenames, cnOptions, ...){ |
290 | 527 |
cdfName <- cnOptions[["cdfName"]] |
291 | 528 |
load.it <- cnOptions[["load.it"]] |
292 | 529 |
save.it <- cnOptions[["save.it"]] |
293 |
- splitByChr <- cnOptions[["splitByChr"]] |
|
294 |
- crlmmFile <- cnOptions[["crlmmFile"]] |
|
295 |
- intensityFile=cnOptions[["intensityFile"]] |
|
530 |
+ callsFile <- cnOptions[["callsFile"]] |
|
531 |
+ confsFile <- cnOptions[["confsFile"]] |
|
532 |
+ AFile=cnOptions[["AFile"]] |
|
533 |
+ BFile=cnOptions[["BFile"]] |
|
534 |
+ snprmaFile=cnOptions[["snprmaFile"]] |
|
535 |
+ cnrmaFile=cnOptions[["cnrmaFile"]] |
|
296 | 536 |
rgFile=cnOptions[["rgFile"]] |
297 |
- ##use.ff=cnOptions[["use.ff"]] |
|
298 | 537 |
outdir <- cnOptions[["outdir"]] |
299 | 538 |
if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required. See crlmm:::validCdfNames()") |
300 | 539 |
platform <- whichPlatform(cdfName) |
... | ... |
@@ -307,148 +546,57 @@ crlmmWrapper <- function(filenames, cnOptions, ...){ |
307 | 546 |
stop(cdfName, " is not a valid entry. See crlmm:::validCdfNames(platform)") |
308 | 547 |
} else cat("TRUE\n") |
309 | 548 |
} |
310 |
- if(missing(intensityFile)) stop("must specify 'intensityFile'.") |
|
311 |
- if(missing(crlmmFile)) stop("must specify 'crlmmFile'.") |
|
312 |
- if(platform == "illumina"){ |
|
313 |
- if(missing(rgFile)){ |
|
314 |
- ##stop("must specify 'rgFile'.") |
|
315 |
- rgFile <- file.path(dirname(crlmmFile), "rgFile.rda") |
|
316 |
- message("rgFile not specified. Using ", rgFile) |
|
317 |
- } |
|
318 |
- if(!load.it){ |
|
319 |
- RG <- readIdatFiles(...) |
|
320 |
- if(save.it) save(RG, file=rgFile) |
|
321 |
- } |
|
322 |
- if(load.it & !file.exists(rgFile)){ |
|
323 |
- message("load.it is TRUE, but rgFile not present. Attempting to read the idatFiles.") |
|
324 |
- RG <- readIdatFiles(...) |
|
325 |
- if(save.it) save(RG, file=rgFile) |
|
326 |
- } |
|
327 |
- if(load.it & file.exists(rgFile)){ |
|
328 |
- message("Loading RG file") |
|
329 |
- load(rgFile) |
|
330 |
- RG <- get("RG") |
|
331 |
- } |
|
332 |
- } |
|
333 |
- if(!(file.exists(dirname(crlmmFile)))) stop(dirname(crlmmFile), " does not exist.") |
|
334 |
- if(!(file.exists(dirname(intensityFile)))) stop(dirname(intensityFile), " does not exist.") |
|
335 |
- ##--------------------------------------------------------------------------- |
|
336 |
- ## FIX |
|
337 |
- outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep="")) |
|
338 |
- if(load.it & all(file.exists(outfiles))){ |
|
339 |
- load(outfiles[1]) |
|
340 |
- crlmmSetList <- get("crlmmSetList") |
|
341 |
- if(!all(sampleNames(crlmmSetList) == basename(filenames))){ |
|
342 |
- stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))") |
|
343 |
- } else{ |
|
344 |
- return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...") |
|
345 |
- } |
|
346 |
- } |
|
549 |
+ if(platform == "illumina") RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it) |
|
550 |
+ if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.") |
|
551 |
+ if(!(file.exists(dirname(snprmaFile[1])))) stop(dirname(snprmaFile[1]), " does not exist.") |
|
347 | 552 |
if(load.it){ |
348 |
- if(!file.exists(crlmmFile)){ |
|
349 |
- message("load.it is TRUE, but ", crlmmFile, " does not exist. Rerunning the genotype calling algorithm") |
|
350 |
- load.it <- FALSE |
|
553 |
+ if(!file.exists(callsFile)){ |
|
554 |
+ message("load.it is TRUE, but ", callsFile, " does not exist. Rerunning the genotype calling algorithm") |
|
555 |
+ ##load.it <- FALSE |
|
351 | 556 |
} |
352 | 557 |
} |
353 | 558 |
if(platform == "affymetrix"){ |
354 |
- if(!file.exists(crlmmFile) | !load.it){ |
|
355 |
- callSet <- crlmm(filenames=filenames, |
|
356 |
- cdfName=cdfName, |
|
357 |
- save.it=TRUE, |
|
358 |
- load.it=load.it, |
|
359 |
- intensityFile=intensityFile) |
|
360 |
- message("Quantile normalizing the copy number probes...") |
|
361 |
- cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir) |
|
362 |
- if(save.it){ |
|
363 |
- message("Saving callSet and cnrmaResult to", crlmmFile) |
|
364 |
- save(callSet, cnrmaResult, file=crlmmFile) |
|
365 |
- } |
|
366 |
- } else { |
|
367 |
- message("Loading ", crlmmFile, "...") |
|
368 |
- load(intensityFile) |
|
369 |
- load(crlmmFile) |
|
370 |
- callSet <- get("callSet") |
|
371 |
- cnrmaResult <- get("cnrmaResult") |
|
372 |
- } |
|
373 |
- scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate)) |
|
374 |
- protocolData(callSet) <- new("AnnotatedDataFrame", |
|
375 |
- data=scanDates, |
|
376 |
- varMetadata=data.frame(labelDescription=colnames(scanDates), |
|
377 |
- row.names=colnames(scanDates))) |
|
378 |
- } |
|
379 |
- if(platform == "illumina"){ |
|
380 |
- if(!file.exists(crlmmFile) | !load.it){ |
|
381 |
- callSet <- crlmmIllumina(RG=RG, |
|
382 |
- cdfName=cdfName, |
|
383 |
- sns=sampleNames(RG), |
|
384 |
- returnParams=TRUE, |
|
385 |
- save.it=TRUE, |
|
386 |
- intensityFile=intensityFile) |
|
387 |
- if(save.it) save(callSet, file=crlmmFile) |
|
388 |
- } else { |
|
389 |
- message("Loading ", crlmmFile, "...") |
|
390 |
- load(crlmmFile) |
|
391 |
- callSet <- get("callSet") |
|
559 |
+ if(!file.exists(callsFile) | !load.it){ |
|
560 |
+ crlmm(filenames=filenames, |
|
561 |
+ cdfName=cdfName, |
|
562 |
+ save.it=TRUE, |
|
563 |
+ load.it=load.it, |
|
564 |
+ intensityFile=snprmaFile, |
|
565 |
+ callsFile=callsFile, |
|
566 |
+ confsFile=confsFile, |
|
567 |
+ AFile=AFile, |
|
568 |
+ BFile=BFile) |
|
392 | 569 |
} |
393 |
- protocolData(callSet) <- protocolData(RG) |
|
394 | 570 |
} |
395 |
- if(platform=="affymetrix") { |
|
396 |
- protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames)) |
|
397 |
- sampleNames(protocolData(callSet)) <- sampleNames(callSet) |
|
398 |
- } |
|
399 |
- load(intensityFile) |
|
400 |
- snprmaResult <- get("res") |
|
401 |
- if(platform=="illumina"){ |
|
402 |
- if(exists("cnAB")){ |
|
403 |
- np.A <- as.integer(cnAB$A) |
|
404 |
- np.B <- as.integer(cnAB$B) |
|
405 |
- np <- ifelse(np.A > np.B, np.A, np.B) |
|
406 |
- np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A)) |
|
407 |
- rownames(np) <- cnAB$gns |
|
408 |
- colnames(np) <- cnAB$sns |
|
409 |
- cnAB$NP <- np |
|
410 |
- ##sampleNames(callSet) <- res$sns |
|
411 |
- sampleNames(callSet) <- cnAB$sns |
|
412 |
- cnrmaResult <- get("cnAB") |
|
413 |
- } else cnrmaResult <- NULL |
|
571 |
+ gc() |
|
572 |
+ if(platform == "illumina") { |
|
573 |
+ browser() |
|
574 |
+ callSet <- loadIlluminaCallSet(callsFile, snprmaFile, RG, load.it, save.it, cdfName) |
|
414 | 575 |
} |
415 |
- if(platform=="affymetrix"){ |
|
416 |
- if(exists("cnrmaResult")){ |
|
417 |
- cnrmaResult <- get("cnrmaResult") |
|
418 |
- } else cnrmaResult <- NULL |
|
576 |
+ if(platform == "affymetrix"){ |
|
577 |
+ if(!file.exists(cnrmaFile) | !load.it){ |
|
578 |
+ message("Quantile normalizing the copy number probes...") |
|
579 |
+ ## updates A matrix and saves cnrmaFile |
|
580 |
+ cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir, cnrmaFile=cnrmaFile, AFile=AFile) |
|
581 |
+ } |
|
419 | 582 |
} |
420 |
- crlmmResults <- list(snprmaResult=snprmaResult, |
|
421 |
- cnrmaResult=cnrmaResult, |
|
422 |
- callSet=callSet) |
|
423 |
- |
|
583 |
+## if(!is.null(cnrmaResult)){ |
|
584 |
+## for(CHR in chromosome){ |
|
585 |
+## cat(CHR, " ") |
|
586 |
+## cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
|
587 |
+## index.nps <- match(cnps, rownames(cnrmaResult[["NP"]])) |
|
588 |
+## NP <- cnrmaResult$NP[index.nps, ] |
|
589 |
+## save(NP, file=file.path(tmpdir, paste("NP_", CHR, ".rda", sep=""))) |
|
590 |
+## rm(NP); gc() |
|
591 |
+## } |
|
592 |
+## } |
|
424 | 593 |
if(!save.it){ |
425 | 594 |
message("Cleaning up") |
426 |
- unlink(intensityFile) |
|
595 |
+ unlink(snprmaFile); unlink(cnrmaFile) |
|
427 | 596 |
} |
428 |
- return(crlmmResults) |
|
429 | 597 |
} |
430 | 598 |
|
431 |
-validCdfNames <- function(){ |
|
432 |
- c("genomewidesnp6", |
|
433 |
- "genomewidesnp5", |
|
434 |
- "human370v1c", |
|
435 |
- "human370quadv3c", |
|
436 |
- "human550v3b", |
|
437 |
- "human650v3a", |
|
438 |
- "human610quadv1b", |
|
439 |
- "human660quadv1a", |
|
440 |
- "human1mduov3b") |
|
441 |
-} |
|
442 | 599 |
|
443 |
-isValidCdfName <- function(cdfName){ |
|
444 |
- chipList <- validCdfNames() |
|
445 |
- result <- cdfName %in% chipList |
|
446 |
- if(!(result)){ |
|
447 |
- warning("cdfName must be one of the following: ", |
|
448 |
- chipList) |
|
449 |
- } |
|
450 |
- return(result) |
|
451 |
-} |
|
452 | 600 |
|
453 | 601 |
whichPlatform <- function(cdfName){ |
454 | 602 |
index <- grep("genomewidesnp", cdfName) |
... | ... |
@@ -463,7 +611,50 @@ whichPlatform <- function(cdfName){ |
463 | 611 |
|
464 | 612 |
|
465 | 613 |
# steps: quantile normalize hapmap: create 1m_reference_cn.rda object |
466 |
-cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){ |
|
614 |
+##cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){ |
|
615 |
+## if(missing(cdfName)) stop("must specify cdfName") |
|
616 |
+## pkgname <- getCrlmmAnnotationName(cdfName) |
|
617 |
+## require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
|
618 |
+## if (missing(sns)) sns <- basename(filenames) |
|
619 |
+## loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
620 |
+## fid <- getVarInEnv("npProbesFid") |
|
621 |
+## set.seed(seed) |
|
622 |
+## idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
623 |
+## SKW <- vector("numeric", length(filenames)) |
|
624 |
+## |
|
625 |
+## |
|
626 |
+## NP <- matrix(NA, length(fid), length(filenames)) |
|
627 |
+## verbose <- TRUE |
|
628 |
+## if(verbose){ |
|
629 |
+## message("Processing ", length(filenames), " files.") |
|
630 |
+## if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
631 |
+## } |
|
632 |
+## if(cdfName=="genomewidesnp6"){ |
|
633 |
+## loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
634 |
+## } |
|
635 |
+## if(cdfName=="genomewidesnp5"){ |
|
636 |
+## loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
637 |
+## } |
|
638 |
+## reference <- getVarInEnv("reference") |
|
639 |
+## ##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.") |
|
640 |
+## for(i in seq(along=filenames)){ |
|
641 |
+## y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
642 |
+## x <- log2(y[idx2]) |
|
643 |
+## SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
644 |
+## rm(x) |
|
645 |
+## NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
646 |
+## if (verbose) |
|
647 |
+## if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
648 |
+## else cat(".") |
|
649 |
+## } |
|
650 |
+## dimnames(NP) <- list(names(fid), sns) |
|
651 |
+## ##dimnames(NP) <- list(map[, "man_fsetid"], sns) |
|
652 |
+## res3 <- list(NP=NP, SKW=SKW) |
|
653 |
+## cat("\n") |
|
654 |
+## return(res3) |
|
655 |
+##} |
|
656 |
+ |
|
657 |
+cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir, cnrmaFile, AFile){ |
|
467 | 658 |
if(missing(cdfName)) stop("must specify cdfName") |
468 | 659 |
pkgname <- getCrlmmAnnotationName(cdfName) |
469 | 660 |
require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
... | ... |
@@ -473,17 +664,15 @@ cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){ |
473 | 664 |
set.seed(seed) |
474 | 665 |
idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
475 | 666 |
SKW <- vector("numeric", length(filenames)) |
476 |
-## if(bigmemory){ |
|
477 |
-## NP <- filebacked.big.matrix(length(pnsa), length(filenames), |
|
478 |
-## type="integer", |
|
479 |
-## init=as.integer(0), |
|
480 |
-## backingpath=outdir, |
|
481 |
-## backingfile="NP.bin", |
|
482 |
-## descriptorfile="NP.desc") |
|
483 |
-## } else{ |
|
484 |
- NP <- matrix(NA, length(fid), length(filenames)) |
|
485 |
-## } |
|
486 |
- verbose <- TRUE |
|
667 |
+ load(AFile) |
|
668 |
+ if(isPackageLoaded("ff")) open(A) |
|
669 |
+ path <- system.file("extdata", package=pkgname) |
|
670 |
+ load(file.path(path, "cnProbes.rda")) |
|
671 |
+ cnProbes <- get("cnProbes") |
|
672 |
+ cnps <- rownames(cnProbes) |
|
673 |
+ cnps <- cnps[cnps %in% rownames(A)] |
|
674 |
+ index <- match(cnps, rownames(A), nomatch=0) |
|
675 |
+ index <- index[index != 0] |
|
487 | 676 |
if(verbose){ |
488 | 677 |
message("Processing ", length(filenames), " files.") |
489 | 678 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
... | ... |
@@ -500,17 +689,21 @@ cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){ |
500 | 689 |
y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
501 | 690 |
x <- log2(y[idx2]) |
502 | 691 |
SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
503 |
- rm(x) |
|
504 |
- NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
692 |
+ rm(x); gc() |
|
693 |
+ A[index, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
694 |
+ ##NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
505 | 695 |
if (verbose) |
506 | 696 |
if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
507 | 697 |
else cat(".") |
698 |
+ rm(y); gc() |
|
508 | 699 |
} |
509 |
- dimnames(NP) <- list(names(fid), sns) |
|
510 |
- ##dimnames(NP) <- list(map[, "man_fsetid"], sns) |
|
511 |
- res3 <- list(NP=NP, SKW=SKW) |
|
512 |
- cat("\n") |
|
513 |
- return(res3) |
|
700 |
+ message("Done") |
|
701 |
+ cnrmaResults <- list(SKW=SKW) |
|
702 |
+ save(cnrmaResults, file=cnrmaFile) |
|
703 |
+ if(isPackageLoaded("ff")) close(A) |
|
704 |
+ save(A, file=AFile) |
|
705 |
+ rm(list=ls()); gc() |
|
706 |
+ return() |
|
514 | 707 |
} |
515 | 708 |
|
516 | 709 |
getFlags <- function(object, PHI.THR){ |
... | ... |
@@ -555,21 +748,29 @@ thresholdCopynumber <- function(object){ |
555 | 748 |
return(object) |
556 | 749 |
} |
557 | 750 |
|
558 |
-##preprocessOptions <- function(crlmmFile="snpsetObject.rda", |
|
559 |
-## intensityFile="normalizedIntensities.rda", |
|
751 |
+##preprocessOptions <- function(callsFile="snpsetObject.rda", |
|
752 |
+## snprmaFile="normalizedIntensities.rda", |
|
560 | 753 |
## rgFile="rgFile.rda"){ |
561 | 754 |
## |
562 | 755 |
##} |
563 | 756 |
|
564 | 757 |
cnOptions <- function(outdir="./", |
565 | 758 |
cdfName, |
566 |
- crlmmFile, |
|
567 |
- intensityFile, |
|
568 |
- rgFile="rgFile.rda", |
|
759 |
+ AFile="A", |
|
760 |
+ BFile="B", |
|
761 |
+ callsFile="genotypes", |
|
762 |
+ rgFile="rgFile", |
|
763 |
+ cnFile="cnSet", |
|
764 |
+ cnrmaFile="cn_rmaResult", |
|
765 |
+ ##npBinary="NP.bin", ##name of file.backed.big.matrix |
|
766 |
+ ##npDesc="NP.desc", |
|
767 |
+ snprmaFile="snp_rmaResult", |
|
768 |
+ ##callsFile="calls.desc", |
|
769 |
+ ##confsFile="confsFile.desc", |
|
770 |
+ confsFile="confsFile", |
|
569 | 771 |
save.it=TRUE, |
570 | 772 |
save.cnset=TRUE, |
571 | 773 |
load.it=TRUE, |
572 |
- splitByChr=TRUE, |
|
573 | 774 |
MIN.OBS=3, |
574 | 775 |
MIN.SAMPLES=10, |
575 | 776 |
batch=NULL, |
... | ... |
@@ -588,11 +789,17 @@ cnOptions <- function(outdir="./", |
588 | 789 |
THR.NU.PHI=TRUE, |
589 | 790 |
thresholdCopynumber=TRUE, |
590 | 791 |
unlink=TRUE, |
591 |
- ##hiddenMarkovModel=FALSE, |
|
592 |
- ##circularBinarySegmentation=FALSE, |
|
593 |
-## cbsOpts=NULL, |
|
594 |
- ##hmmOpts=NULL, |
|
792 |
+ use.poe=FALSE, |
|
793 |
+ crlmmBatchSize=1100, ## this is about right for Affy 6.0 platform 1.8e6 * 1100 < 2^31 |
|
595 | 794 |
...){ |
795 |
+ if(isPackageLoaded("ff")){ |
|
796 |
+ AFile=paste(AFile, "ff", sep="_") |
|
797 |
+ BFile=paste(BFile, "ff", sep="_") |
|
798 |
+ confsFile=paste(confsFile, "ff", sep="_") |
|
799 |
+ callsFile=paste(callsFile, "ff", sep="_") |
|
800 |
+ cnFile=paste(cnFile, "ff", sep="") |
|
801 |
+ } |
|
802 |
+ if(use.poe) require(metaArray) |
|
596 | 803 |
if(missing(cdfName)) stop("must specify cdfName") |
597 | 804 |
if(!file.exists(outdir)){ |
598 | 805 |
message(outdir, " does not exist. Trying to create it.") |
... | ... |
@@ -602,23 +809,24 @@ cnOptions <- function(outdir="./", |
602 | 809 |
## if(hiddenMarkovModel){ |
603 | 810 |
## hmmOpts <- hmmOptions(...) |
604 | 811 |
## } |
605 |
- if(missing(crlmmFile)){ |
|
606 |
- crlmmFile <- file.path(outdir, "snpsetObject.rda") |
|
607 |
- } |
|
608 |
- if(missing(intensityFile)){ |
|
609 |
- intensityFile <- file.path(outdir, "normalizedIntensities.rda") |
|
610 |
- } |
|
812 |
+## if(missing(snprmaFile)){ |
|
813 |
+## snprmaFile <- file.path(outdir, "normalizedIntensities.rda") |
|
814 |
+## } |
|
611 | 815 |
if(is.null(batch)) |
612 | 816 |
stop("must specify batch -- should be the same length as the number of files") |
613 | 817 |
list(outdir=outdir, |
614 | 818 |
cdfName=cdfName, |
615 |
- crlmmFile=crlmmFile, |
|
616 |
- intensityFile=intensityFile, |
|
617 |
- rgFile=file.path(outdir, rgFile), |
|
819 |
+ callsFile=callsFile, |
|
820 |
+ confsFile=confsFile, |
|
821 |
+ AFile=AFile, |
|
822 |
+ BFile=BFile, |
|
823 |
+ snprmaFile=snprmaFile, |
|
824 |
+ cnrmaFile=cnrmaFile, |
|
825 |
+ rgFile=rgFile, |
|
826 |
+ cnFile=cnFile, |
|
618 | 827 |
save.it=save.it, |
619 | 828 |
save.cnset=save.cnset, |
620 | 829 |
load.it=load.it, |
621 |
- splitByChr=splitByChr, |
|
622 | 830 |
MIN.OBS=MIN.OBS, |
623 | 831 |
MIN.SAMPLES=MIN.SAMPLES, |
624 | 832 |
batch=batch, |
... | ... |
@@ -636,7 +844,10 @@ cnOptions <- function(outdir="./", |
636 | 844 |
MIN.PHI=MIN.PHI, |
637 | 845 |
THR.NU.PHI=THR.NU.PHI, |
638 | 846 |
thresholdCopynumber=thresholdCopynumber, |
639 |
- unlink=unlink) |
|
847 |
+ unlink=unlink, |
|
848 |
+ use.poe=use.poe, |
|
849 |
+ crlmmBatchSize=crlmmBatchSize) |
|
850 |
+## use.bigmemory=use.bigmemory) |
|
640 | 851 |
## hiddenMarkovModel=hiddenMarkovModel, |
641 | 852 |
## circularBinarySegmentation=circularBinarySegmentation, |
642 | 853 |
## cbsOpts=cbsOpts, |
... | ... |
@@ -846,6 +1057,27 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
846 | 1057 |
return(object) |
847 | 1058 |
} |
848 | 1059 |
|
1060 |
+nonpolymorphic.poe <- function(object, cnOptions, tmp.object){ |
|
1061 |
+ require(metaArray) |
|
1062 |
+ nps <- log2(A(object)[!isSnp(object), ]) |
|
1063 |
+ nps <- (nps-rowMedians(nps))/rowMAD(nps) |
|
1064 |
+ runAvg <- apply(nps, 2, myfilter, filter=rep(1/10, 10)) |
|
1065 |
+ rownames(runAvg) <- featureNames(object)[!isSnp(object)] |
|
1066 |
+ rm.nas <- rowSums(is.na(runAvg)) == 0 |
|
1067 |
+ runAvg <- runAvg[rm.nas, ] |
|
1068 |
+ |
|
1069 |
+ poe.scale <- poe.em(runAvg, cl=rep(0, ncol(nps)))$data |
|
1070 |
+ pinegg <- piposg <- poe.scale |
|
1071 |
+ piposg[piposg < 0] <- 0 |
|
1072 |
+ pinegg[pinegg > 0] <- 0 |
|
1073 |
+ pinegg <- pinegg*-1 |
|
1074 |
+ pm.em <- 1*pinegg + 2*(1-pinegg-piposg) + 3*piposg |
|
1075 |
+ rownames(pm.em) <- rownames(runAvg) |
|
1076 |
+ CA(object)[match(rownames(pm.em), featureNames(object)), ] <- pm.em |
|
1077 |
+ ##CA(object)[!isSnp(object), ] <- pm.em |
|
1078 |
+ return(object) |
|
1079 |
+} |
|
1080 |
+ |
|
849 | 1081 |
##sufficient statistics on the intensity scale |
850 | 1082 |
withinGenotypeMoments <- function(object, cnOptions, tmp.objects){ |
851 | 1083 |
normal <- tmp.objects[["normal"]] |
... | ... |
@@ -977,15 +1209,18 @@ oneBatch <- function(object, cnOptions, tmp.objects){ |
977 | 1209 |
muA[index[[j]], j+2] <- mus[, 1] |
978 | 1210 |
muB[index[[j]], j+2] <- mus[, 2] |
979 | 1211 |
} |
980 |
- nobsA <- Ns[, "A"] > 10 |
|
981 |
- nobsB <- Ns[, "B"] > 10 |
|
982 |
- notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"])) |
|
983 |
- complete <- list() |
|
984 |
- complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here |
|
985 |
- complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here |
|
986 |
- size <- min(5000, length(complete[[1]])) |
|
987 |
- if(size == 5000) complete <- lapply(complete, function(x) sample(x, size)) |
|
1212 |
+ |
|
988 | 1213 |
if(CHR == 23){ |
1214 |
+ nobsA <- Ns[, "A"] > MIN.OBS |
|
1215 |
+ nobsB <- Ns[, "B"] > MIN.OBS |
|
1216 |
+ notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"])) |
|
1217 |
+ complete <- list() |
|
1218 |
+ complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here |
|
1219 |
+ complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here |
|
1220 |
+ if(length(complete[[1]]) < 1 | length(complete[[2]]) < 1) stop("Too few observations to estimate the center for 'A' and 'B' clusters on chrom X") |
|
1221 |
+ size <- min(5000, length(complete[[1]])) |
|
1222 |
+ if(size == 5000) complete <- lapply(complete, function(x) sample(x, size)) |
|
1223 |
+ |
|
989 | 1224 |
index <- list() |
990 | 1225 |
index[[1]] <- which(Ns[, "A"] == 0) |
991 | 1226 |
index[[2]] <- which(Ns[, "B"] == 0) |
... | ... |
@@ -1011,7 +1246,6 @@ oneBatch <- function(object, cnOptions, tmp.objects){ |
1011 | 1246 |
## snpflags <- envir[["snpflags"]] |
1012 | 1247 |
snpflags <- index[[1]] | index[[2]] | index[[3]] |
1013 | 1248 |
## snpflags[, p] <- index[[1]] | index[[2]] | index[[3]] |
1014 |
- |
|
1015 | 1249 |
##--------------------------------------------------------------------------- |
1016 | 1250 |
## Two genotype clusters not observed -- would sequence help? (didn't seem to) |
1017 | 1251 |
## 1. extract index of complete data |
... | ... |
@@ -1318,6 +1552,8 @@ posteriorProbability.snps <- function(object, cnOptions, tmp.objects=list()){ |
1318 | 1552 |
return(list(tmp.objects, posteriorProb)) |
1319 | 1553 |
} |
1320 | 1554 |
|
1555 |
+ |
|
1556 |
+ |
|
1321 | 1557 |
biasAdj <- function(object, cnOptions, tmp.objects){ |
1322 | 1558 |
gender <- object$gender |
1323 | 1559 |
CHR <- unique(chromosome(object)) |
... | ... |
@@ -1502,6 +1738,7 @@ computeCopynumber.CNSet <- function(object, cnOptions){ |
1502 | 1738 |
if(length(batch) != ncol(object)) stop("Batch must be the same length as the number of samples") |
1503 | 1739 |
MIN.SAMPLES <- cnOptions$MIN.SAMPLES |
1504 | 1740 |
verbose <- cnOptions$verbose |
1741 |
+ use.poe <- cnOptions[["use.poe"]] |
|
1505 | 1742 |
for(i in seq(along=unique(batch))){ |
1506 | 1743 |
PLATE <- unique(batch)[i] |
1507 | 1744 |
if(sum(batch == PLATE) < MIN.SAMPLES) { |
... | ... |
@@ -1518,7 +1755,8 @@ computeCopynumber.CNSet <- function(object, cnOptions){ |
1518 | 1755 |
} |
1519 | 1756 |
if(bias.adj){ |
1520 | 1757 |
if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)") |
1521 |
- tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects) |
|
1758 |
+ if(!use.poe) |
|
1759 |
+ tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects) |
|
1522 | 1760 |
tmp.objects <- biasAdj(object.batch, cnOptions, tmp.objects) |
1523 | 1761 |
if(verbose) message("Recomputing location and scale parameters") |
1524 | 1762 |
} |
... | ... |
@@ -1542,8 +1780,12 @@ computeCopynumber.CNSet <- function(object, cnOptions){ |
1542 | 1780 |
if(verbose) message("\nAllele specific copy number") |
1543 | 1781 |
object.batch <- polymorphic(object.batch, cnOptions, tmp.objects) |
1544 | 1782 |
if(any(!isSnp(object))){ ## there are nonpolymorphic probes |
1545 |
- if(verbose) message("\nCopy number for nonpolymorphic probes...") |
|
1546 |
- object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects) |
|
1783 |
+ if(verbose) message("\nCopy number for nonpolymorphic probes...") |
|
1784 |
+ if(!use.poe){ |
|
1785 |
+ object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects) |
|
1786 |
+ } else { |
|
1787 |
+ object.batch <- nonpolymorphic.poe(object.batch, cnOptions, tmp.objects) |
|
1788 |
+ } |
|
1547 | 1789 |
} |
1548 | 1790 |
##--------------------------------------------------------------------------- |
1549 | 1791 |
##Note: the replacement method multiples by 100 |
... | ... |
@@ -1577,9 +1819,117 @@ computeCopynumber.CNSet <- function(object, cnOptions){ |
1577 | 1819 |
return(object) |
1578 | 1820 |
} |
1579 | 1821 |
|
1822 |
+## cite metaArray: Choi/Ghosh |
|
1823 |
+crlmm_poe <- function(mat, cl, threshold=0.00001, every=100,use.mad=TRUE) { |
|
1824 |
+ mat <- as.matrix(mat) |
|
1825 |
+ nc <- ncol(mat) |
|
1826 |
+ nr <- nrow(mat) |
|
1827 |
+ if(all(is.null(cl))) cl <- rep(0,dim(mat)[2]) |
|
1828 |
+ cat("Number of Samples:", nc, "\n") |
|
1829 |
+ cat("Number of Genes:", nr, "\n") |
|
1830 |
+ cat("This model assumes that the samples are centered and scaled.\n") |
|
1831 |
+ new.mat <- matrix(0,nr,nc) |
|
1832 |
+ med.expr <- apply(mat,1,median,na.rm=TRUE) |
|
1833 |
+ new.mat <- sweep(mat,1,med.expr) |
|
1834 |
+ for(i in 1:nr) { |
|
1835 |
+ if(sum(is.na(as.numeric(mat[i,]))) > .25 * nc ) stop("More than 25% missing values for gene", i, "\n") |
|
1836 |
+ zvec <- crlmm_fit.em(as.numeric(mat[i,]), cl, threshold=threshold, use.mad=use.mad) |
|
1837 |
+ new.mat[i,] <- zvec$expr |
|
1838 |
+ if(i%%every==0) cat(i, "genes fitted\n") |
|
1839 |
+ } |
|
1840 |
+ rownames(new.mat) <- rownames(mat) |
|
1841 |
+ colnames(new.mat) <- colnames(mat) |
|
1842 |
+ return(list(data=new.mat)) |
|
1843 |
+} |
|
1580 | 1844 |
|
1581 |
- |
|
1582 |
- |
|
1583 |
- |
|
1584 |
- |
|
1585 |
- |
|
1845 |
+crlmm_fit.em <- function(x, cl, threshold=1e-6, use.mad=TRUE){ |
|
1846 |
+ sup <- sum(cl==0) > 0 && sum(cl==1) > 0 |
|
1847 |
+ x <- as.numeric(x) |
|
1848 |
+ len <- length(x) |
|
1849 |
+ z <- rep(0,length(x)) |
|
1850 |
+ log.lik <- 1000 |
|
1851 |
+ lik.rec <- NULL |
|
1852 |
+ num.iter <- 0 |
|
1853 |
+ a <- min(x,na.rm=TRUE); b <- max(x,na.rm=TRUE) |
|
1854 |
+ err <- err.old <- 1 |
|
1855 |
+ if(!sup) { |
|
1856 |
+ z <- find.init(x) |
|
1857 |
+ Pi <- mean(z) |
|
1858 |
+ mu <- sum((1-z)*x) / sum(1-z) |
|
1859 |
+ sigmasq <- sum((1-z)*(x-mu)^2) / sum(1-z) |
|
1860 |
+ tt <- len |
|
1861 |
+ while(err > threshold) { |
|
1862 |
+ num.iter <- num.iter + 1 |
|
1863 |
+ log.lik.old <- log.lik |
|
1864 |
+ err.old <- err |
|
1865 |
+ ## E Step |
|
1866 |
+ est.u <- dunif(x,a,b) |
|
1867 |
+ est.u.p <- Pi * est.u |
|
1868 |
+ est.n <- dnorm(x,mu,sqrt(sigmasq)) |
|
1869 |
+ est.n.p <- (1-Pi) * est.n |
|
1870 |
+ z <- est.u.p / (est.n.p + est.u.p) |
|
1871 |
+ |
|
1872 |
+ if(any(is.na(z))) stop("NA occurred in imputation\n") |
|
1873 |
+ ## M Step |
|
1874 |
+ mu <- sum((1-z)*x) / sum(1-z) |
|
1875 |
+ sigmasq <- sum((1-z)*((x-mu)^2)) / sum(1-z) |
|
1876 |
+ Pi <- sum(z) / len |
|
1877 |
+ sgn.z <- ifelse(x < mu, -1, 1) |
|
1878 |
+ |
|
1879 |
+ ## Likelihood |
|
1880 |
+ est.u <- dunif(x,a,b) |
|
1881 |
+ est.u.p <- Pi * est.u |
|
1882 |
+ est.n <- dnorm(x,mu,sqrt(sigmasq)) |
|
1883 |
+ est.n.p <- (1-Pi) * est.n |
|
1884 |
+ log.lik <- sum(log(est.u.p + est.n.p)) |
|
1885 |
+ err <- abs(log.lik.old - log.lik) |
|
1886 |
+ if(num.iter != 1) lik.rec[num.iter-1] <- log.lik |
|
1887 |
+ } |
|
1888 |
+ } else { |
|
1889 |
+ tt <- sum(cl==0) |
|
1890 |
+ z[cl==0] <- runif(tt,0,1) |
|
1891 |
+ Pi <- mean(z[cl==0]) |
|
1892 |
+ mu <- sum((1-z)*x) / sum(1-z) |
|
1893 |
+ sigmasq <- sum((1-z)*(x-mu)^2) / sum(1-z) |
|
1894 |
+ |
|
1895 |
+ while(err > threshold) { |
|
1896 |
+ num.iter <- num.iter + 1 |
|
1897 |
+ log.lik.old <- log.lik |
|
1898 |
+ err.old <- err |
|
1899 |
+ |
|
1900 |
+ ## E Step |
|
1901 |
+ est.u <- dunif(x,a,b) |
|
1902 |
+ est.u.p <- Pi * est.u |
|
1903 |
+ est.n <- dnorm(x,mu,sqrt(sigmasq)) |
|
1904 |
+ est.n.p <- (1-Pi) * est.n |
|
1905 |
+ z <- est.u.p / (est.n.p + est.u.p) |
|
1906 |
+ z[cl==1] <- 0 |
|
1907 |
+ if(any(is.na(z))) stop("NA occurred in imputation\n") |
|
1908 |
+ ## M Step |
|
1909 |
+ mu <- sum((1-z)*x) / sum(1-z) |
|
1910 |
+ sigmasq <- sum((1-z)*((x-mu)^2)) / sum(1-z) |
|
1911 |
+ Pi <- mean(z[cl==0]) |
|
1912 |
+ sgn.z <- ifelse(x < mu, -1, 1) |
|
1913 |
+ |
|
1914 |
+ ## Likelihood |
|
1915 |
+ est.u <- dunif(x,a,b) |
|
1916 |
+ est.u.p <- Pi * est.u |
|
1917 |
+ est.n <- dnorm(x,mu,sqrt(sigmasq)) |
|
1918 |
+ est.n.p <- (1-Pi) * est.n |
|
1919 |
+ log.lik <- sum(log(est.u.p[cl==0] + est.n.p[cl==0])) + sum(log(est.n[cl==1])) |
|
1920 |
+ err <- abs(log.lik.old - log.lik) |
|
1921 |
+ if(num.iter != 1) lik.rec[num.iter-1] <- log.lik |
|
1922 |
+ } |
|
1923 |
+ } |
|
1924 |
+ est.u.p <- Pi * dunif(x,a,b) |
|
1925 |
+ est.n.p <- (1-Pi) * dnorm(x,mu,sqrt(sigmasq)) |
|
1926 |
+ est.u.mu <- Pi * dunif(mu,a,b) |
|
1927 |
+ est.n.mu <- (1-Pi) * dnorm(mu,mu,sqrt(sigmasq)) |
|
1928 |
+ z0 <- est.u.p / (est.n.p + est.u.p) |
|
1929 |
+ zmu <- est.u.mu / (est.u.mu + est.n.mu) |
|
1930 |
+ sgn.z0 <- ifelse(x < mu, -1, 1) |
|
1931 |
+ #loc <- (max(lik.rec) != lik.rec[length(lik.rec)]) |
|
1932 |
+ expr <- rep(0, len) |
|
1933 |
+ expr <- (z0 - zmu) * sgn.z0 |
|
1934 |
+ return(list(expr=expr, a=a, b=b, sigmasq=sigmasq, mu=mu, Pi=Pi, lik.rec=lik.rec)) |
|
1935 |
+} |
... | ... |
@@ -1,48 +1,103 @@ |
1 | 1 |
crlmm <- function(filenames, row.names=TRUE, col.names=TRUE, |
2 | 2 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
3 |
- save.it=FALSE, load.it=FALSE, intensityFile, |
|
3 |
+ save.it=FALSE, load.it=FALSE, intensityFile, callsFile, confsFile, |
|
4 |
+ AFile, BFile, |
|
4 | 5 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
5 | 6 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
6 |
- returnParams=FALSE, badSNP=.7){ |
|
7 |
- if ((load.it | save.it) & missing(intensityFile)) |
|
8 |
- stop("'intensityFile' is missing, and you chose either load.it or save.it") |
|
9 |
- if (missing(sns)) sns <- basename(filenames) |
|
10 |
- if (!missing(intensityFile)) |
|
11 |
- if (load.it & !file.exists(intensityFile)){ |
|
12 |
- load.it <- FALSE |
|
13 |
- message("File ", intensityFile, " does not exist.") |
|
14 |
- message("Not loading it, but running SNPRMA from scratch.") |
|
15 |
- } |
|
16 |
- if (!load.it){ |
|
17 |
- res <- snprma(filenames, fitMixture=TRUE, |
|
18 |
- mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
19 |
- eps=eps, cdfName=cdfName, sns=sns) |
|
20 |
- if(save.it){ |
|
21 |
- t0 <- proc.time() |
|
22 |
- save(res, file=intensityFile) |
|
23 |
- t0 <- proc.time()-t0 |
|
24 |
- if (verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".") |
|
25 |
- } |
|
26 |
- }else{ |
|
27 |
- if (verbose) message("Loading ", intensityFile, ".") |
|
28 |
- obj <- load(intensityFile) |
|
29 |
- if (verbose) message("Done.") |
|
30 |
- if (obj != "res") |
|
31 |
- stop("Object in ", intensityFile, " seems to be invalid.") |
|
32 |
- } |
|
33 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
34 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
35 |
- res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]], |
|
36 |
- res[["mixtureParams"]], res[["cdfName"]], |
|
37 |
- gender=gender, row.names=row.names, |
|
38 |
- col.names=col.names, recallMin=recallMin, |
|
39 |
- recallRegMin=1000, SNRMin=SNRMin, |
|
40 |
- returnParams=returnParams, badSNP=badSNP, |
|
41 |
- verbose=verbose) |
|
42 |
- |
|
43 |
- res2[["SNR"]] <- res[["SNR"]] |
|
44 |
- res2[["SKW"]] <- res[["SKW"]] |
|
45 |
- return(list2SnpSet(res2, returnParams=returnParams)) |
|
7 |
+ returnParams=FALSE, badSNP=0.7){ |
|
8 |
+ if ((load.it | save.it) & missing(intensityFile)) |
|
9 |
+ stop("'intensityFile' is missing, and you chose either load.it or save.it") |
|
10 |
+ if (missing(sns)) sns <- basename(filenames) |
|
11 |
+ if (!missing(intensityFile)) |
|
12 |
+ if (load.it & !file.exists(intensityFile)){ |
|
13 |
+ load.it <- FALSE |
|
14 |
+ message("File ", intensityFile, " does not exist.") |
|
15 |
+ message("Not loading it, but running SNPRMA from scratch.") |
|
16 |
+ } |
|
17 |
+ if (!load.it){ |
|
18 |
+ res <- snprma(filenames, |
|
19 |
+ fitMixture=TRUE, |
|
20 |
+ mixtureSampleSize=mixtureSampleSize, |
|
21 |
+ verbose=verbose, |
|
22 |
+ eps=eps, |
|
23 |
+ cdfName=cdfName, |
|
24 |
+ sns=sns, |
|
25 |
+ AFile=AFile, |
|
26 |
+ BFile=BFile) |
|
27 |
+ save(res, file=intensityFile)##file.path(dirname(intensityFile), "res.rda")) |
|
28 |
+ } else { |
|
29 |
+ message("Loading quantile normalized intensities...") |
|
30 |
+ load(intensityFile) |
|
31 |
+ res <- get("res") |
|
32 |
+ } |
|
33 |
+ load(AFile) |
|
34 |
+ load(BFile) |
|
35 |
+ if(isPackageLoaded("ff")) {open(A); open(B)} |
|
36 |
+ if(row.names) row.names <- res$gns |
|
37 |
+ if(col.names) col.names <- res$sns |
|
38 |
+## }else{ |
|
39 |
+## if (verbose) message("Loading ", intensityFile, ".") |
|
40 |
+## obj <- load(intensityFile) |
|
41 |
+## if (verbose) message("Done.") |
|
42 |
+## if (obj != "res") |
|
43 |
+## stop("Object in ", intensityFile, " seems to be invalid.") |
|
44 |
+## } |
|
45 |
+ ##path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep="")) |
|
46 |
+ ##load(file.path(path, "snpProbes.rda")) |
|
47 |
+ gns <- res[["gns"]] |
|
48 |
+ sns <- colnames(A) |
|
49 |
+ ## |
|
50 |
+ ##Ensures that if ff package is loaded, ordinary matrices are still passed to crlmmGT |
|
51 |
+ aMatrix <- A[1:length(gns), ] |
|
52 |
+ bMatrix <- B[1:length(gns), ] |
|
53 |
+ if(isPackageLoaded("ff")) close(B) |
|
54 |
+ rm(B); gc() |
|
55 |
+ ## res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]], |
|
56 |
+ res2 <- crlmmGT(aMatrix, bMatrix, res[["SNR"]], |
|
57 |
+ res[["mixtureParams"]], res[["cdfName"]], |
|
58 |
+ gender=gender, row.names=row.names, |
|
59 |
+ col.names=col.names, recallMin=recallMin, |
|
60 |
+ recallRegMin=1000, SNRMin=SNRMin, |
|
61 |
+ returnParams=returnParams, badSNP=badSNP, |
|
62 |
+ verbose=verbose) |
|
63 |
+ rm(aMatrix, bMatrix); gc() |
|
64 |
+ res2[["SNR"]] <- res[["SNR"]] |
|
65 |
+ res2[["SKW"]] <- res[["SKW"]] |
|
66 |
+ if(isPackageLoaded("ff")){ |
|
67 |
+ if(file.exists(callsFile)) unlink(callsFile) |
|
68 |
+ calls <- ff(dim=c(nrow(A), ncol(A)), |
|
69 |
+ vmode="integer", |
|
70 |
+ finalizer="close", |
|
71 |
+ dimnames=dimnames(A), |
|
72 |
+ overwrite=TRUE) |
|
73 |
+ } else{ |
|
74 |
+ calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)) |
|
75 |
+ } |
|
76 |
+ calls[1:length(gns), ] <- res2[["calls"]] |
|
77 |
+ if(isPackageLoaded("ff")) close(calls) |
|
78 |
+ save(calls, file=callsFile) |
|
79 |
+ rm(calls); gc() |
|
80 |
+ if(isPackageLoaded("ff")){ |
|
81 |
+ if(file.exists(confsFile)) unlink(confsFile) |
|
82 |
+ confs <- ff(dim=c(nrow(A), ncol(A)), |
|
83 |
+ vmode="integer", |
|
84 |
+ finalizer="close", |
|
85 |
+ dimnames=dimnames(A), |
|
86 |
+ overwrite=TRUE) |
|
87 |
+ } else{ |
|
88 |
+ confs <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)) |
|
89 |
+ } |
|
90 |
+ confs[1:length(gns), ] <- res2[["confs"]] |
|
91 |
+ if(isPackageLoaded("ff")) close(confs) |
|
92 |
+ save(confs, file=confsFile) |
|
93 |
+ rm(confs); gc() |
|
94 |
+ sampleStats <- data.frame(SNR=res2[["SNR"]], |
|
95 |
+ SKW=res2[["SKW"]], |
|
96 |
+ gender=res2[["gender"]]) |
|
97 |
+ save(sampleStats, file=file.path(dirname(confsFile), "sampleStats.rda")) |
|
98 |
+ if(isPackageLoaded("ff")){ close(A)} |
|
99 |
+ rm(list=ls()); gc() |
|
100 |
+ return() |
|
46 | 101 |
} |
47 | 102 |
|
48 | 103 |
crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
... | ... |
@@ -50,13 +105,10 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
50 | 105 |
SNRMin=5, recallMin=10, recallRegMin=1000, |
51 | 106 |
gender=NULL, desctrucitve=FALSE, verbose=TRUE, |
52 | 107 |
returnParams=FALSE, badSNP=.7){ |
53 |
- |
|
54 | 108 |
keepIndex <- which(SNR>SNRMin) |
55 | 109 |
if(length(keepIndex)==0) stop("No arrays above quality threshold!") |
56 |
- |
|
57 | 110 |
NC <- ncol(A) |
58 | 111 |
NR <- nrow(A) |
59 |
- |
|
60 | 112 |
pkgname <- getCrlmmAnnotationName(cdfName) |
61 | 113 |
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
62 | 114 |
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
... | ... |
@@ -67,7 +119,6 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
67 | 119 |
stop("Package ", pkgname, " could not be found.") |
68 | 120 |
rm(suggCall, msg) |
69 | 121 |
} |
70 |
- |
|
71 | 122 |
if(verbose) message("Loading annotations.") |
72 | 123 |
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
73 | 124 |
loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
... | ... |
@@ -92,7 +143,6 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
92 | 143 |
gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]] |
93 | 144 |
} |
94 | 145 |
} |
95 |
- |
|
96 | 146 |
Indexes <- list(autosomeIndex, XIndex, YIndex) |
97 | 147 |
cIndexes <- list(keepIndex, |
98 | 148 |
keepIndex[which(gender[keepIndex]==2)], |
99 | 149 |
deleted file mode 100644 |
... | ... |
@@ -1,13 +0,0 @@ |
1 |
-setMethod("A", "AlleleSet", function(object) allele(object, "A")) |
|
2 |
-setMethod("B", "AlleleSet", function(object) allele(object, "B")) |
|
3 |
-##setReplaceMethod("A", signature(object="AlleleSet", value="matrix"), |
|
4 |
-## function(object, value){ |
|
5 |
-## assayDataElementReplace(object, "senseThetaA", value) |
|
6 |
-## }) |
|
7 |
-##setReplaceMethod("B", signature(object="AlleleSet", value="matrix"), |
|
8 |
-## function(object, value){ |
|
9 |