git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44783 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.5.25 |
|
5 |
-Date: 2010-02-20 |
|
4 |
+Version: 1.5.26 |
|
5 |
+Date: 2010-02-21 |
|
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms |
... | ... |
@@ -11,7 +11,8 @@ Depends: R (>= 2.11.0), methods, Biobase (>= 2.7.2), oligoClasses (>= 1.9.28) |
11 | 11 |
Imports: affyio (>= 1.15.2), preprocessCore, utils, stats, genefilter, splines, mvtnorm, ellipse, SNPchip |
12 | 12 |
Enhances: ff |
13 | 13 |
Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, metaArray |
14 |
-Collate: AllGenerics.R |
|
14 |
+Collate: AllClasses.R |
|
15 |
+ AllGenerics.R |
|
15 | 16 |
methods-CNSet.R |
16 | 17 |
methods-eSet.R |
17 | 18 |
methods-SnpSuperSet.R |
... | ... |
@@ -242,45 +242,47 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){ |
242 | 242 |
|
243 | 243 |
|
244 | 244 |
|
245 |
-crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
246 |
- cnOpts <- cnOptions |
|
247 |
- batchSize <- cnOptions[["crlmmBatchSize"]] |
|
248 |
- batch <- cnOptions[["batch"]] |
|
249 |
- outdir <- cnOptions[["outdir"]] |
|
250 |
- ##if necessary, split plates in separate batches |
|
251 |
- if(length(filenames) > batchSize){ |
|
252 |
- L <- length(table(batch)) |
|
253 |
- message("Processing the files in") |
|
254 |
- batchList <- split(names(table(batch)), rep(1:L, each=10, length.out=L)) |
|
255 |
- for(i in seq(along=batchList)){ |
|
256 |
- index <- as.character(batch) %in% as.character(batchList[[i]]) |
|
257 |
- fns <- filenames[index] |
|
258 |
- cnOpts[["batch"]] <- cnOptions[["batch"]][index] |
|
259 |
- cnOpts[["AFile"]] <- paste(outdir, "/", paste(cnOptions[["AFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
260 |
- cnOpts[["BFile"]] <- paste(outdir, "/", paste(cnOptions[["BFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
261 |
- cnOpts[["callsFile"]] <- paste(outdir, "/", paste(cnOptions[["callsFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
262 |
- cnOpts[["confsFile"]] <- paste(outdir, "/", paste(cnOptions[["confsFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
263 |
- cnOpts[["snprmaFile"]] <- paste(outdir, "/", paste(cnOptions[["snprmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
264 |
- cnOpts[["cnrmaFile"]] <- paste(outdir, "/", paste(cnOptions[["cnrmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
265 |
- cnOpts[["cnFile"]] <- file.path(outdir, paste(cnOptions[["cnFile"]], i, sep="_crlmmBatch")) |
|
266 |
- cnOpts[["rgFile"]] <- file.path(outdir, paste(cnOptions[["rgFile"]], i, sep="_crlmmBatch")) |
|
267 |
- crlmmCopynumber.crlmmBatch(fns, cnOpts, object, ...) |
|
268 |
- } |
|
269 |
- } else { |
|
270 |
- cnOpts[["AFile"]] <- paste(outdir, "/", cnOptions[["AFile"]], ".rda", sep="") |
|
271 |
- cnOpts[["BFile"]] <- paste(outdir, "/", cnOptions[["BFile"]], ".rda", sep="") |
|
272 |
- cnOpts[["callsFile"]] <- paste(outdir, "/", cnOptions[["callsFile"]], ".rda", sep="") |
|
273 |
- cnOpts[["confsFile"]] <- paste(outdir, "/", cnOptions[["confsFile"]], ".rda", sep="") |
|
274 |
- cnOpts[["snprmaFile"]] <- paste(outdir, "/",cnOptions[["snprmaFile"]], ".rda", sep="") |
|
275 |
- cnOpts[["cnrmaFile"]] <- paste(outdir, "/", cnOptions[["cnrmaFile"]], ".rda", sep="") |
|
276 |
- cnOpts[["cnFile"]] <- file.path(outdir, cnOptions[["cnFile"]]) |
|
277 |
- cnOpts[["rgFile"]] <- file.path(outdir, cnOptions[["rgFile"]]) |
|
278 |
- crlmmCopynumber.crlmmBatch(filenames, cnOpts, object, ...) |
|
279 |
- } |
|
280 |
-} |
|
245 |
+##crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
246 |
+## cnOpts <- cnOptions |
|
247 |
+## batchSize <- cnOptions[["crlmmBatchSize"]] |
|
248 |
+## batch <- cnOptions[["batch"]] |
|
249 |
+## outdir <- cnOptions[["outdir"]] |
|
250 |
+## ##if necessary, split plates in separate batches |
|
251 |
+## if(length(filenames) > batchSize){ |
|
252 |
+## L <- length(table(batch)) |
|
253 |
+## message("Processing the files in") |
|
254 |
+## batchList <- split(names(table(batch)), rep(1:L, each=10, length.out=L)) |
|
255 |
+## for(i in seq(along=batchList)){ |
|
256 |
+## index <- as.character(batch) %in% as.character(batchList[[i]]) |
|
257 |
+## fns <- filenames[index] |
|
258 |
+## cnOpts[["batch"]] <- cnOptions[["batch"]][index] |
|
259 |
+## cnOpts[["AFile"]] <- paste(outdir, "/", paste(cnOptions[["AFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
260 |
+## cnOpts[["BFile"]] <- paste(outdir, "/", paste(cnOptions[["BFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
261 |
+## cnOpts[["CAFile"]] <- paste(outdir, "/", paste(cnOptions[["CAFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
262 |
+##g cnOpts[["CBFile"]] <- paste(outdir, "/", paste(cnOptions[["CBFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
263 |
+## cnOpts[["callsFile"]] <- paste(outdir, "/", paste(cnOptions[["callsFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
264 |
+## cnOpts[["confsFile"]] <- paste(outdir, "/", paste(cnOptions[["confsFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
265 |
+## cnOpts[["snprmaFile"]] <- paste(outdir, "/", paste(cnOptions[["snprmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
266 |
+## cnOpts[["cnrmaFile"]] <- paste(outdir, "/", paste(cnOptions[["cnrmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="") |
|
267 |
+## cnOpts[["cnFile"]] <- file.path(outdir, paste(cnOptions[["cnFile"]], i, sep="_crlmmBatch")) |
|
268 |
+## cnOpts[["rgFile"]] <- file.path(outdir, paste(cnOptions[["rgFile"]], i, sep="_crlmmBatch")) |
|
269 |
+## crlmmCopynumber.crlmmBatch(fns, cnOpts, object, ...) |
|
270 |
+## } |
|
271 |
+## } else { |
|
272 |
+## cnOpts[["AFile"]] <- paste(outdir, "/", cnOptions[["AFile"]], ".rda", sep="") |
|
273 |
+## cnOpts[["BFile"]] <- paste(outdir, "/", cnOptions[["BFile"]], ".rda", sep="") |
|
274 |
+## cnOpts[["callsFile"]] <- paste(outdir, "/", cnOptions[["callsFile"]], ".rda", sep="") |
|
275 |
+## cnOpts[["confsFile"]] <- paste(outdir, "/", cnOptions[["confsFile"]], ".rda", sep="") |
|
276 |
+## cnOpts[["snprmaFile"]] <- paste(outdir, "/",cnOptions[["snprmaFile"]], ".rda", sep="") |
|
277 |
+## cnOpts[["cnrmaFile"]] <- paste(outdir, "/", cnOptions[["cnrmaFile"]], ".rda", sep="") |
|
278 |
+## cnOpts[["cnFile"]] <- file.path(outdir, cnOptions[["cnFile"]]) |
|
279 |
+## cnOpts[["rgFile"]] <- file.path(outdir, cnOptions[["rgFile"]]) |
|
280 |
+## crlmmCopynumber.crlmmBatch(filenames, cnOpts, object, ...) |
|
281 |
+## } |
|
282 |
+##} |
|
281 | 283 |
|
282 | 284 |
|
283 |
-crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){ |
|
285 |
+crlmmCopynumber <- function(filenames, cnOptions, object, ...){ |
|
284 | 286 |
set.seed(cnOptions[["seed"]]) ##for reproducibility |
285 | 287 |
if(!missing(object)){ |
286 | 288 |
stopifnot(class(object) == "CNSet") |
... | ... |
@@ -292,24 +294,30 @@ crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){ |
292 | 294 |
##crlmmResults <- crlmmWrapper(filenames, cnOptions, ...) |
293 | 295 |
crlmmWrapper(filenames, cnOptions, ...) |
294 | 296 |
} |
297 |
+ verbose <- cnOptions[["verbose"]] |
|
295 | 298 |
load.it <- cnOptions[["load.it"]] |
296 | 299 |
outdir <- cnOptions[["outdir"]] |
297 | 300 |
snprmaFile <- cnOptions[["snprmaFile"]] |
298 | 301 |
cnFile <- cnOptions[["cnFile"]] |
299 | 302 |
cnrmaFile <- cnOptions[["cnrmaFile"]] |
303 |
+ snprmaFile <- cnOptions[["snprmaFile"]] |
|
300 | 304 |
callsFile <- cnOptions[["callsFile"]] |
301 | 305 |
confsFile <- cnOptions[["confsFile"]] |
302 | 306 |
AFile <- cnOptions[["AFile"]] |
303 | 307 |
BFile <- cnOptions[["BFile"]] |
308 |
+ CAFile <- cnOptions[["CAFile"]] |
|
309 |
+ CBFile <- cnOptions[["CBFile"]] |
|
304 | 310 |
path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep="")) |
305 | 311 |
load(file.path(path, "snpProbes.rda")) |
306 | 312 |
snpProbes <- get("snpProbes") |
307 | 313 |
load(file.path(path, "cnProbes.rda")) |
308 | 314 |
cnProbes <- get("cnProbes") |
309 | 315 |
k <- grep("chr", colnames(snpProbes)) |
310 |
- message("Attaching quantile-normalized intensities...") |
|
316 |
+ if(verbose) message("Loading quantile-normalized intensities...") |
|
311 | 317 |
##cwd <- getwd() |
312 | 318 |
##setwd(dirname(snprmaFile[1])) |
319 |
+ load(snprmaFile) |
|
320 |
+ res <- get("res") |
|
313 | 321 |
load(AFile) |
314 | 322 |
if(isPackageLoaded("ff")) open(A) |
315 | 323 |
load(BFile) |
... | ... |
@@ -321,28 +329,27 @@ crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){ |
321 | 329 |
load(confsFile) |
322 | 330 |
confs <- get("confs") |
323 | 331 |
if(isPackageLoaded("ff")) open(confs) |
324 |
- fns <- rownames(A) |
|
332 |
+ ##fns <- rownames(A) |
|
333 |
+ fns <- c(res[["gns"]], res[["cnnames"]]) |
|
334 |
+ if(length(fns) != nrow(A)) stop("check featurenames. fns should be the rownames of A") |
|
325 | 335 |
nr <- nrow(A); nc <- ncol(A) |
326 | 336 |
if(isPackageLoaded("ff")){ |
327 |
- ##initialize CA, CB |
|
328 |
- if(!file.exists(file.path(outdir, "CA.rda"))){ |
|
329 |
- CA <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A)) |
|
330 |
- |
|
331 |
- } else{ |
|
332 |
- load(file.path(outdir, "CA.rda")) |
|
333 |
- if(!all(dim(CA) == dim(A))) |
|
334 |
- CA <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A), overwrite=TRUE) |
|
335 |
- open(CA) |
|
336 |
- } |
|
337 |
- if(!file.exists(file.path(outdir, "CB.rda"))){ |
|
338 |
- CB <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A)) |
|
339 |
- } else{ |
|
340 |
- load(file.path(outdir, "CB.rda")) |
|
341 |
- if(!all(dim(CB) == dim(A))) |
|
342 |
- CB <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A), overwrite=TRUE) |
|
343 |
- open(CB) |
|
344 |
- } |
|
345 |
- } |
|
337 |
+ if(file.exists(CAFile)){ |
|
338 |
+ load(CAFile) |
|
339 |
+ if(is.null(dim(CA)) | !all(dim(CA) == dim(A))) { |
|
340 |
+ unlink(CAFile) |
|
341 |
+ CA <- initializeBigMatrix(nr, nc) |
|
342 |
+ } |
|
343 |
+ } else CA <- initializeBigMatrix(nr, nc) |
|
344 |
+ if(file.exists(CBFile)){ |
|
345 |
+ load(CBFile) |
|
346 |
+ if(is.null(dim(CB)) | !all(dim(CB) == dim(A))) { |
|
347 |
+ CB <- initializeBigMatrix(nr, nc) |
|
348 |
+ } |
|
349 |
+ } else CB <- initializeBigMatrix(nr, nc) |
|
350 |
+ open(CA) |
|
351 |
+ open(CB) |
|
352 |
+ } |
|
346 | 353 |
##cnFile <- cnOptions[["cnFile"]] |
347 | 354 |
chromosome <- cnOptions[["chromosome"]] |
348 | 355 |
SNRmin <- cnOptions[["SNRmin"]] |
... | ... |
@@ -353,29 +360,47 @@ crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){ |
353 | 360 |
data=scanDates, |
354 | 361 |
varMetadata=data.frame(labelDescription=colnames(scanDates), |
355 | 362 |
row.names=colnames(scanDates))) |
356 |
- load(file.path(cnOptions[["outdir"]], "sampleStats.rda")) |
|
357 |
- sampleStats <- data.frame(sampleStats, batch=cnOptions[["batch"]]) |
|
363 |
+ ##load(file.path(cnOptions[["outdir"]], "sampleStats.rda")) |
|
364 |
+ sampleStats <- data.frame(SKW=res$SKW, |
|
365 |
+ SNR=res$SNR, |
|
366 |
+ mixtureParams=res$mixtureParams, |
|
367 |
+ gender=res$gender, |
|
368 |
+ batch=cnOptions[["batch"]]) |
|
369 |
+ ##sampleStats, batch=cnOptions[["batch"]]) |
|
358 | 370 |
pD <- new("AnnotatedDataFrame", |
359 | 371 |
data=sampleStats, |
360 | 372 |
varMetadata=data.frame(labelDescription=colnames(sampleStats))) |
361 | 373 |
batch <- cnOptions[["batch"]] |
374 |
+ if(isPackageLoaded("ff")){ |
|
375 |
+## featureDataFF <- ffdf(position=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"), |
|
376 |
+## chromosome=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"), |
|
377 |
+## isSnp=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"), |
|
378 |
+## row.names=fns) |
|
379 |
+## fD <- vector("list", length(chromosome)) |
|
380 |
+ featureDataFF <- ff(dim=c(nrow(A), 17*length(unique(batch))+3), vmode="double", finalizer="close", |
|
381 |
+ overwrite=TRUE) |
|
382 |
+ } |
|
362 | 383 |
for(CHR in chromosome){ |
363 | 384 |
snps <- rownames(snpProbes)[snpProbes[, k] == CHR] |
364 | 385 |
cns <- rownames(cnProbes)[cnProbes[, k] == CHR] |
365 |
- index.snps <- match(snps, rownames(A)) |
|
366 |
- index.cn <- match(cns, rownames(A)) |
|
386 |
+ index.snps <- match(snps, fns) |
|
387 |
+ index.cn <- match(cns, fns) |
|
367 | 388 |
row.index <- c(index.snps, index.cn) |
368 | 389 |
cat("Chromosome ", CHR, "\n") |
390 |
+ if(isPackageLoaded("ff")){ |
|
391 |
+ fD.batch <- vector("list", length(unique(batch))) |
|
392 |
+ } |
|
369 | 393 |
for(i in seq(along=unique(batch))){ |
370 | 394 |
PLATE <- unique(batch)[i] |
395 |
+ message("Plate: ", PLATE) |
|
371 | 396 |
sample.index <- which(batch==PLATE) |
372 | 397 |
##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]] >= SNRmin] |
373 | 398 |
if(isPackageLoaded("ff")){ |
374 | 399 |
ca <- CA[row.index, sample.index] |
375 | 400 |
cb <- CB[row.index, sample.index] |
376 | 401 |
} else{ |
377 |
- dns <- dimnames(A[row.index, sample.index]) |
|
378 |
- cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index), dimnames=dns) |
|
402 |
+ ##dns <- dimnames(A[row.index, sample.index]) |
|
403 |
+ cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index))##, dimnames=dns) |
|
379 | 404 |
} |
380 | 405 |
cnSet <- new("CNSet", |
381 | 406 |
alleleA=A[row.index, sample.index], |
... | ... |
@@ -386,52 +411,64 @@ crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){ |
386 | 411 |
CB=cb, |
387 | 412 |
featureData=annotatedDataFrameFrom(A[row.index, sample.index], byrow=TRUE), |
388 | 413 |
phenoData=pD[sample.index, ], |
389 |
- protocolData=protocoldata[sample.index, ], |
|
390 |
- annotation=cnOptions[["cdfName"]]) |
|
391 |
- if(any(cnSet$SNR > SNRmin)){ |
|
392 |
- message(paste("Excluding samples with SNR < ", SNRmin)) |
|
393 |
- cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
|
394 |
- } |
|
395 |
- ##rm(snpI, NP, gtSet); gc() |
|
414 |
+ protocolData=protocoldata[sample.index, ]) |
|
415 |
+ ##Verify this is correct |
|
416 |
+ annotation(cnSet) <- cnOptions[["cdfName"]] |
|
417 |
+ featureNames(cnSet) <- fns[row.index] |
|
418 |
+ ##add chromosome, position, isSnp |
|
419 |
+ cnSet <- annotate(cnSet) |
|
420 |
+## if(any(cnSet$SNR > SNRmin)){ |
|
421 |
+## if(CHR == chromosome[1]) message(paste("Excluding samples with SNR < ", SNRmin)) |
|
422 |
+## cnSet <- cnSet[, cnSet$SNR >= SNRmin] |
|
423 |
+## } |
|
396 | 424 |
featureData(cnSet) <- lm.parameters(cnSet, cnOptions) |
397 |
- } |
|
398 |
- if(CHR != 24){ |
|
399 |
- ## 60G for 1239 files |
|
400 |
- cnSet <- computeCopynumber(cnSet, cnOptions) |
|
401 |
- if(isPackageLoaded("ff")){ |
|
402 |
- CA[row.index, sample.index] <- cnSet@assayData[["CA"]] |
|
403 |
- CB[row.index, sample.index] <- cnSet@assayData[["CB"]] |
|
404 |
- } |
|
405 |
- } else{ |
|
406 |
- message("Copy number estimates not available for chromosome Y. Saving only the 'callSet' object for this chromosome") |
|
407 |
- alleleSet <- cnSet |
|
408 |
- rm(cnSet) ; gc() |
|
409 |
- save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", PLATE, "_", CHR, ".rda", sep=""))) |
|
410 |
- rm(alleleSet); gc() |
|
411 |
- next() |
|
412 |
- } |
|
413 |
- if(length(chromosome) == 1){ |
|
414 |
- if(cnOptions[["save.cnset"]]){ |
|
415 |
- save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
416 |
- } |
|
417 |
- } |
|
418 |
- if(length(chromosome) > 1){ |
|
419 |
- save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
420 |
- ##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep=""))) |
|
421 |
- rm(cnSet); gc() |
|
422 |
- } else { |
|
423 |
- if(isPackageLoaded("ff")) { |
|
424 |
- close(CA); close(CB) |
|
425 |
- save(CA, file=file.path(cnOptions[["outdir"]], "CA.rda")) |
|
426 |
- save(CB, file=file.path(cnOptions[["outdir"]], "CB.rda")) |
|
427 |
- } |
|
428 |
- return(cnSet) |
|
429 |
- } |
|
430 |
- } |
|
425 |
+ if(CHR < 24){ |
|
426 |
+ cnSet <- computeCopynumber(cnSet, cnOptions) |
|
427 |
+ if(isPackageLoaded("ff")){ |
|
428 |
+ if(i == 1) { ## first batch |
|
429 |
+ featureDataFF[row.index, 1:20] <- as.matrix(fData(cnSet)) |
|
430 |
+ fcol <- 20 |
|
431 |
+ } else { |
|
432 |
+ featureDataFF[row.index, (fcol+1):(fcol+17)] |
|
433 |
+ fcol <- fcol+17 |
|
434 |
+ ##remove redundant columns |
|
435 |
+ } |
|
436 |
+ CA[row.index, sample.index] <- cnSet@assayData[["CA"]] |
|
437 |
+ CB[row.index, sample.index] <- cnSet@assayData[["CB"]] |
|
438 |
+ next() ## next batch |
|
439 |
+ } else { |
|
440 |
+ save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
441 |
+ } |
|
442 |
+ } else break() ## break out of the batch loop. Go to next chromosome |
|
443 |
+ ## if(length(chromosome) == 1 & length(unique(batch)) == 1){ |
|
444 |
+ ## if(cnOptions[["save.cnset"]] & !isLoadedPackage("ff")) |
|
445 |
+ ## save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
446 |
+ ## return(cnSet) |
|
447 |
+ ## } else { |
|
448 |
+ ## ##either multiple chromosome or multiple batches...save to file if ff is not loaded |
|
449 |
+ ## if(!isPackageLoaded("ff")){ |
|
450 |
+ ## save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep="")) |
|
451 |
+ ## } |
|
452 |
+ ## rm(cnSet); gc() |
|
453 |
+ ## } |
|
454 |
+ } ## end of batch loop |
|
455 |
+ } ## end of chromosome loop |
|
431 | 456 |
if(isPackageLoaded("ff")) { |
457 |
+ close(A); close(B) |
|
458 |
+ close(featureDataFF) |
|
432 | 459 |
close(CA); close(CB) |
433 |
- save(CA, file=file.path(cnOptions[["outdir"]], "CA.rda")) |
|
434 |
- save(CB, file=file.path(cnOptions[["outdir"]], "CB.rda")) |
|
460 |
+ save(featureDataFF, file=file.path(outdir, "featureDataFF.rda")) |
|
461 |
+ save(CA, file=CAFile) |
|
462 |
+ save(CB, file=CBFile) |
|
463 |
+ close(calls); close(confs) |
|
464 |
+ ffSet <- new("FFSet", |
|
465 |
+ A=A, |
|
466 |
+ B=B, |
|
467 |
+ snpCall=calls, |
|
468 |
+ callProbability=confs, |
|
469 |
+ CA=CA, |
|
470 |
+ CB=CB) |
|
471 |
+ return(ffSet) |
|
435 | 472 |
} |
436 | 473 |
} |
437 | 474 |
|
... | ... |
@@ -524,6 +561,7 @@ loadIlluminaCnrma <- function(){ |
524 | 561 |
} |
525 | 562 |
|
526 | 563 |
crlmmWrapper <- function(filenames, cnOptions, ...){ |
564 |
+ crlmmBatchSize <- cnOptions[["crlmmBatchSize"]] |
|
527 | 565 |
cdfName <- cnOptions[["cdfName"]] |
528 | 566 |
load.it <- cnOptions[["load.it"]] |
529 | 567 |
save.it <- cnOptions[["save.it"]] |
... | ... |
@@ -548,25 +586,26 @@ crlmmWrapper <- function(filenames, cnOptions, ...){ |
548 | 586 |
} |
549 | 587 |
if(platform == "illumina") RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it) |
550 | 588 |
if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.") |
551 |
- if(!(file.exists(dirname(snprmaFile[1])))) stop(dirname(snprmaFile[1]), " does not exist.") |
|
552 |
- if(load.it){ |
|
553 |
- if(!file.exists(callsFile)){ |
|
554 |
- message("load.it is TRUE, but ", callsFile, " does not exist. Rerunning the genotype calling algorithm") |
|
555 |
- ##load.it <- FALSE |
|
556 |
- } |
|
557 |
- } |
|
589 |
+ if(!(file.exists(dirname(snprmaFile)))) stop(dirname(snprmaFile), " does not exist.") |
|
590 |
+## if(load.it){ |
|
591 |
+## if(!file.exists(callsFile)){ |
|
592 |
+## message("load.it is TRUE, but ", callsFile, " does not exist. Rerunning the genotype calling algorithm") |
|
593 |
+## ##load.it <- FALSE |
|
594 |
+## } |
|
595 |
+## } |
|
558 | 596 |
if(platform == "affymetrix"){ |
559 |
- if(!file.exists(callsFile) | !load.it){ |
|
597 |
+## if(!file.exists(callsFile) | !load.it){ |
|
560 | 598 |
crlmm(filenames=filenames, |
561 | 599 |
cdfName=cdfName, |
562 | 600 |
save.it=TRUE, |
563 | 601 |
load.it=load.it, |
564 |
- intensityFile=snprmaFile, |
|
602 |
+ snprmaFile=snprmaFile, |
|
565 | 603 |
callsFile=callsFile, |
566 | 604 |
confsFile=confsFile, |
567 | 605 |
AFile=AFile, |
568 |
- BFile=BFile) |
|
569 |
- } |
|
606 |
+ BFile=BFile, |
|
607 |
+ crlmmBatchSize=crlmmBatchSize) |
|
608 |
+## } |
|
570 | 609 |
} |
571 | 610 |
gc() |
572 | 611 |
if(platform == "illumina") { |
... | ... |
@@ -756,18 +795,20 @@ thresholdCopynumber <- function(object){ |
756 | 795 |
|
757 | 796 |
cnOptions <- function(outdir="./", |
758 | 797 |
cdfName, |
759 |
- AFile="A", |
|
760 |
- BFile="B", |
|
761 |
- callsFile="genotypes", |
|
762 |
- rgFile="rgFile", |
|
798 |
+ AFile="A.rda", |
|
799 |
+ BFile="B.rda", |
|
800 |
+ CAFile="CA.rda", |
|
801 |
+ CBFile="CB.rda", |
|
802 |
+ callsFile="genotypes.rda", |
|
803 |
+ rgFile="rgFile.rda", |
|
763 | 804 |
cnFile="cnSet", |
764 |
- cnrmaFile="cn_rmaResult", |
|
805 |
+ cnrmaFile="cn_rmaResult.rda", |
|
765 | 806 |
##npBinary="NP.bin", ##name of file.backed.big.matrix |
766 | 807 |
##npDesc="NP.desc", |
767 |
- snprmaFile="snp_rmaResult", |
|
808 |
+ snprmaFile="snp_rmaResult.rda", |
|
768 | 809 |
##callsFile="calls.desc", |
769 | 810 |
##confsFile="confsFile.desc", |
770 |
- confsFile="confsFile", |
|
811 |
+ confsFile="confs.rda", |
|
771 | 812 |
save.it=TRUE, |
772 | 813 |
save.cnset=TRUE, |
773 | 814 |
load.it=TRUE, |
... | ... |
@@ -790,15 +831,16 @@ cnOptions <- function(outdir="./", |
790 | 831 |
thresholdCopynumber=TRUE, |
791 | 832 |
unlink=TRUE, |
792 | 833 |
use.poe=FALSE, |
793 |
- crlmmBatchSize=1100, ## this is about right for Affy 6.0 platform 1.8e6 * 1100 < 2^31 |
|
834 |
+ crlmmBatchSize=1000, |
|
794 | 835 |
...){ |
795 |
- if(isPackageLoaded("ff")){ |
|
796 |
- AFile=paste(AFile, "ff", sep="_") |
|
797 |
- BFile=paste(BFile, "ff", sep="_") |
|
798 |
- confsFile=paste(confsFile, "ff", sep="_") |
|
799 |
- callsFile=paste(callsFile, "ff", sep="_") |
|
800 |
- cnFile=paste(cnFile, "ff", sep="") |
|
801 |
- } |
|
836 |
+## if(isPackageLoaded("ff")){ |
|
837 |
+## AFile=paste(AFile, "ff", sep="_") |
|
838 |
+## BFile=paste(BFile, "ff", sep="_") |
|
839 |
+## confsFile=paste(confsFile, "ff", sep="_") |
|
840 |
+## callsFile=paste(callsFile, "ff", sep="_") |
|
841 |
+## cnFile=paste(cnFile, "ff", sep="") |
|
842 |
+## } |
|
843 |
+ if(length(batch) > 200) warning("This job may require a lot of RAM. Consider using the ff package in conjuction with crlmm, as described in the copynumber_ff vignette...") |
|
802 | 844 |
if(use.poe) require(metaArray) |
803 | 845 |
if(missing(cdfName)) stop("must specify cdfName") |
804 | 846 |
if(!file.exists(outdir)){ |
... | ... |
@@ -816,14 +858,16 @@ cnOptions <- function(outdir="./", |
816 | 858 |
stop("must specify batch -- should be the same length as the number of files") |
817 | 859 |
list(outdir=outdir, |
818 | 860 |
cdfName=cdfName, |
819 |
- callsFile=callsFile, |
|
820 |
- confsFile=confsFile, |
|
821 |
- AFile=AFile, |
|
822 |
- BFile=BFile, |
|
823 |
- snprmaFile=snprmaFile, |
|
824 |
- cnrmaFile=cnrmaFile, |
|
825 |
- rgFile=rgFile, |
|
826 |
- cnFile=cnFile, |
|
861 |
+ callsFile=file.path(outdir, callsFile), |
|
862 |
+ confsFile=file.path(outdir, confsFile), |
|
863 |
+ AFile=file.path(outdir, AFile), |
|
864 |
+ BFile=file.path(outdir, BFile), |
|
865 |
+ CAFile=file.path(outdir, CAFile), |
|
866 |
+ CBFile=file.path(outdir, CBFile), |
|
867 |
+ snprmaFile=file.path(outdir, snprmaFile), |
|
868 |
+ cnrmaFile=file.path(outdir, cnrmaFile), |
|
869 |
+ rgFile=file.path(outdir, rgFile), |
|
870 |
+ cnFile=file.path(outdir, cnFile), |
|
827 | 871 |
save.it=save.it, |
828 | 872 |
save.cnset=save.cnset, |
829 | 873 |
load.it=load.it, |
... | ... |
@@ -888,8 +932,11 @@ lm.parameters <- function(object, cnOptions){ |
888 | 932 |
} |
889 | 933 |
|
890 | 934 |
nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
935 |
+ chromosome <- cnOptions[["chromosome"]] |
|
891 | 936 |
batch <- unique(object$batch) |
892 | 937 |
CHR <- unique(chromosome(object)) |
938 |
+ verbose <- cnOptions[["verbose"]] |
|
939 |
+ if(CHR != chromosome[1]) verbose <- FALSE |
|
893 | 940 |
goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){ |
894 | 941 |
Ns <- tmp.objects[["Ns"]] |
895 | 942 |
##Ns <- get("Ns", envir) |
... | ... |
@@ -1015,10 +1062,9 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
1015 | 1062 |
|
1016 | 1063 |
THR.NU.PHI <- cnOptions$THR.NU.PHI |
1017 | 1064 |
if(THR.NU.PHI){ |
1018 |
- verbose <- cnOptions$verbose |
|
1019 | 1065 |
##Assign values to object |
1020 | 1066 |
object <- pr(object, "nuA", batch, nuA) |
1021 |
- object <- pr(object, "phiA", batch, phiA) |
|
1067 |
+ object <- pr(object, "phiA", batch, phiA) |
|
1022 | 1068 |
if(verbose) message("Thresholding nu and phi") |
1023 | 1069 |
object <- thresholdModelParams(object, cnOptions) |
1024 | 1070 |
} else { |
... | ... |
@@ -1036,7 +1082,6 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
1036 | 1082 |
|
1037 | 1083 |
THR.NU.PHI <- cnOptions$THR.NU.PHI |
1038 | 1084 |
if(THR.NU.PHI){ |
1039 |
- verbose <- cnOptions$verbose |
|
1040 | 1085 |
##Assign values to object |
1041 | 1086 |
object <- pr(object, "nuA", batch, nuA) |
1042 | 1087 |
object <- pr(object, "phiA", batch, phiA) |
... | ... |
@@ -1738,6 +1783,7 @@ computeCopynumber.CNSet <- function(object, cnOptions){ |
1738 | 1783 |
if(length(batch) != ncol(object)) stop("Batch must be the same length as the number of samples") |
1739 | 1784 |
MIN.SAMPLES <- cnOptions$MIN.SAMPLES |
1740 | 1785 |
verbose <- cnOptions$verbose |
1786 |
+ if(CHR != cnOptions[["chromosome"]][1]) verbose <- FALSE |
|
1741 | 1787 |
use.poe <- cnOptions[["use.poe"]] |
1742 | 1788 |
for(i in seq(along=unique(batch))){ |
1743 | 1789 |
PLATE <- unique(batch)[i] |
... | ... |
@@ -1754,11 +1800,11 @@ computeCopynumber.CNSet <- function(object, cnOptions){ |
1754 | 1800 |
cnOptions$bias.adj <- bias.adj <- FALSE |
1755 | 1801 |
} |
1756 | 1802 |
if(bias.adj){ |
1757 |
- if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)") |
|
1803 |
+ if(verbose & i == 1) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)") |
|
1758 | 1804 |
if(!use.poe) |
1759 | 1805 |
tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects) |
1760 | 1806 |
tmp.objects <- biasAdj(object.batch, cnOptions, tmp.objects) |
1761 |
- if(verbose) message("Recomputing location and scale parameters") |
|
1807 |
+ if(verbose & i == 1) message("Recomputing location and scale parameters") |
|
1762 | 1808 |
} |
1763 | 1809 |
##update tmp.objects |
1764 | 1810 |
tmp.objects <- withinGenotypeMoments(object.batch, |
... | ... |
@@ -1773,14 +1819,13 @@ computeCopynumber.CNSet <- function(object, cnOptions){ |
1773 | 1819 |
##nuA=getParam(object.batch, "nuA", PLATE) |
1774 | 1820 |
THR.NU.PHI <- cnOptions$THR.NU.PHI |
1775 | 1821 |
if(THR.NU.PHI){ |
1776 |
- verbose <- cnOptions$verbose |
|
1777 |
- if(verbose) message("Thresholding nu and phi") |
|
1822 |
+ if(verbose & i == 1) message("Thresholding nu and phi") |
|
1778 | 1823 |
object.batch <- thresholdModelParams(object.batch, cnOptions) |
1779 | 1824 |
} |
1780 |
- if(verbose) message("\nAllele specific copy number") |
|
1825 |
+ if(verbose & i == 1) message("\nAllele specific copy number") |
|
1781 | 1826 |
object.batch <- polymorphic(object.batch, cnOptions, tmp.objects) |
1782 | 1827 |
if(any(!isSnp(object))){ ## there are nonpolymorphic probes |
1783 |
- if(verbose) message("\nCopy number for nonpolymorphic probes...") |
|
1828 |
+ if(verbose & i == 1) message("\nCopy number for nonpolymorphic probes...") |
|
1784 | 1829 |
if(!use.poe){ |
1785 | 1830 |
object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects) |
1786 | 1831 |
} else { |
... | ... |
@@ -1812,7 +1857,7 @@ computeCopynumber.CNSet <- function(object, cnOptions){ |
1812 | 1857 |
object <- pr(object, "corrB.AA", PLATE, getParam(object.batch, "corrB.AA", PLATE)) |
1813 | 1858 |
rm(object.batch, tmp.objects); gc(); |
1814 | 1859 |
} |
1815 |
- object <- object[order(chromosome(object), position(object)), ] |
|
1860 |
+ ##object <- object[order(chromosome(object), position(object)), ] |
|
1816 | 1861 |
if(cnOptions[["thresholdCopynumber"]]){ |
1817 | 1862 |
object <- thresholdCopynumber(object) |
1818 | 1863 |
} |
... | ... |
@@ -1,19 +1,23 @@ |
1 | 1 |
crlmm <- function(filenames, row.names=TRUE, col.names=TRUE, |
2 | 2 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
3 |
- save.it=FALSE, load.it=FALSE, intensityFile, callsFile, confsFile, |
|
4 |
- AFile, BFile, |
|
3 |
+ save.it=FALSE, load.it=FALSE, |
|
4 |
+ snprmaFile="snp_rmaResult.rda", |
|
5 |
+ callsFile="genotypes.rda", |
|
6 |
+ confsFile="confs.rda", |
|
7 |
+ AFile="A.rda", |
|
8 |
+ BFile="B.rda", |
|
5 | 9 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
6 | 10 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
7 |
- returnParams=FALSE, badSNP=0.7){ |
|
8 |
- if ((load.it | save.it) & missing(intensityFile)) |
|
9 |
- stop("'intensityFile' is missing, and you chose either load.it or save.it") |
|
11 |
+ returnParams=FALSE, badSNP=0.7, |
|
12 |
+ crlmmBatchSize=1000){ |
|
13 |
+ BS <- crlmmBatchSize |
|
14 |
+ if(load.it & file.exists(snprmaFile) & file.exists(callsFile)) return() ##nothing to do |
|
15 |
+ if (load.it & !file.exists(snprmaFile)){ |
|
16 |
+ ##stop("'intensityFile' is missing, and you chose either load.it or save.it") |
|
17 |
+ message("'snprmaFile' does not exist and you chose to load.it. Rerunning snprma...") |
|
18 |
+ load.it <- FALSE |
|
19 |
+ } |
|
10 | 20 |
if (missing(sns)) sns <- basename(filenames) |
11 |
- if (!missing(intensityFile)) |
|
12 |
- if (load.it & !file.exists(intensityFile)){ |
|
13 |
- load.it <- FALSE |
|
14 |
- message("File ", intensityFile, " does not exist.") |
|
15 |
- message("Not loading it, but running SNPRMA from scratch.") |
|
16 |
- } |
|
17 | 21 |
if (!load.it){ |
18 | 22 |
res <- snprma(filenames, |
19 | 23 |
fitMixture=TRUE, |
... | ... |
@@ -24,78 +28,96 @@ crlmm <- function(filenames, row.names=TRUE, col.names=TRUE, |
24 | 28 |
sns=sns, |
25 | 29 |
AFile=AFile, |
26 | 30 |
BFile=BFile) |
27 |
- save(res, file=intensityFile)##file.path(dirname(intensityFile), "res.rda")) |
|
31 |
+ save(res, file=snprmaFile)##file.path(dirname(snprmaFile), "res.rda")) |
|
28 | 32 |
} else { |
29 |
- message("Loading quantile normalized intensities...") |
|
30 |
- load(intensityFile) |
|
33 |
+ message("Loading snprmaFile...") |
|
34 |
+ load(snprmaFile) |
|
31 | 35 |
res <- get("res") |
32 | 36 |
} |
37 |
+ SKW <- res[["SKW"]] |
|
38 |
+ SNR <- res[["SNR"]] |
|
33 | 39 |
load(AFile) |
34 | 40 |
load(BFile) |
35 | 41 |
if(isPackageLoaded("ff")) {open(A); open(B)} |
36 | 42 |
if(row.names) row.names <- res$gns |
37 | 43 |
if(col.names) col.names <- res$sns |
38 | 44 |
## }else{ |
39 |
-## if (verbose) message("Loading ", intensityFile, ".") |
|
40 |
-## obj <- load(intensityFile) |
|
45 |
+## if (verbose) message("Loading ", snprmaFile, ".") |
|
46 |
+## obj <- load(snprmaFile) |
|
41 | 47 |
## if (verbose) message("Done.") |
42 | 48 |
## if (obj != "res") |
43 |
-## stop("Object in ", intensityFile, " seems to be invalid.") |
|
49 |
+## stop("Object in ", snprmaFile, " seems to be invalid.") |
|
44 | 50 |
## } |
45 | 51 |
##path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep="")) |
46 | 52 |
##load(file.path(path, "snpProbes.rda")) |
47 | 53 |
gns <- res[["gns"]] |
48 | 54 |
sns <- colnames(A) |
55 |
+ nc <- ncol(A) |
|
49 | 56 |
## |
50 | 57 |
##Ensures that if ff package is loaded, ordinary matrices are still passed to crlmmGT |
51 |
- aMatrix <- A[1:length(gns), ] |
|
52 |
- bMatrix <- B[1:length(gns), ] |
|
53 |
- if(isPackageLoaded("ff")) close(B) |
|
54 |
- rm(B); gc() |
|
55 |
- ## res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]], |
|
56 |
- res2 <- crlmmGT(aMatrix, bMatrix, res[["SNR"]], |
|
57 |
- res[["mixtureParams"]], res[["cdfName"]], |
|
58 |
- gender=gender, row.names=row.names, |
|
59 |
- col.names=col.names, recallMin=recallMin, |
|
60 |
- recallRegMin=1000, SNRMin=SNRMin, |
|
61 |
- returnParams=returnParams, badSNP=badSNP, |
|
62 |
- verbose=verbose) |
|
63 |
- rm(aMatrix, bMatrix); gc() |
|
64 |
- res2[["SNR"]] <- res[["SNR"]] |
|
65 |
- res2[["SKW"]] <- res[["SKW"]] |
|
58 |
+ if(nc > BS){ |
|
59 |
+ ## genotype the samples in batches |
|
60 |
+ |
|
61 |
+ ##number crlmm batches |
|
62 |
+ N <- ceiling(nc/BS) |
|
63 |
+ ##samples per batch |
|
64 |
+ S <- ceiling(nc/N) |
|
65 |
+ colindex <- split(1:nc, rep(1:nc, each=S, length.out=nc)) |
|
66 |
+ } else { |
|
67 |
+ colindex <- list(1:nc) |
|
68 |
+ } |
|
69 |
+ if(length(colindex) > 1) |
|
70 |
+ message("Calling genotypes in batches of size ", length(colindex[[1]]), " to reduce required RAM") |
|
66 | 71 |
if(isPackageLoaded("ff")){ |
67 |
- if(file.exists(callsFile)) unlink(callsFile) |
|
68 |
- calls <- ff(dim=c(nrow(A), ncol(A)), |
|
69 |
- vmode="integer", |
|
70 |
- finalizer="close", |
|
71 |
- dimnames=dimnames(A), |
|
72 |
- overwrite=TRUE) |
|
72 |
+ confs <- initializeBigMatrix(nrow(A), ncol(A)) |
|
73 |
+ calls <- initializeBigMatrix(nrow(A), ncol(A)) |
|
73 | 74 |
} else{ |
74 |
- calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)) |
|
75 |
+ confs <- matrix(NA, nrow(A), ncol(A)) |
|
76 |
+ calls <- matrix(NA, nrow(A), ncol(A)) |
|
77 |
+ } |
|
78 |
+ sex <- vector("list", length(colindex)) |
|
79 |
+ for(i in seq(along=colindex)){ |
|
80 |
+ aMatrix <- A[1:length(gns), colindex[[i]]] |
|
81 |
+ bMatrix <- B[1:length(gns), colindex[[i]]] |
|
82 |
+ snr <- res[["SNR"]][colindex[[i]]] |
|
83 |
+ mix <- res[["mixtureParameters"]][, colindex[[i]]] |
|
84 |
+ ## res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]], |
|
85 |
+ res2 <- tryCatch(crlmmGT(aMatrix, |
|
86 |
+ bMatrix, |
|
87 |
+ snr, |
|
88 |
+ mix, |
|
89 |
+ res[["cdfName"]], |
|
90 |
+ gender=gender, |
|
91 |
+ row.names=row.names, |
|
92 |
+ col.names=col.names[colindex[[i]]], |
|
93 |
+ recallMin=recallMin, |
|
94 |
+ recallRegMin=1000, |
|
95 |
+ SNRMin=SNRMin, |
|
96 |
+ returnParams=returnParams, |
|
97 |
+ badSNP=badSNP, |
|
98 |
+ verbose=verbose), error=function(e) NULL) |
|
99 |
+ if(is.null(res2)) { |
|
100 |
+ unlink(callsFile) |
|
101 |
+ unlink(confsFile) |
|
102 |
+ stop("error in call to crlmmGT") |
|
103 |
+ } |
|
104 |
+ rm(aMatrix, bMatrix, snr, mix); gc() |
|
105 |
+ calls[1:length(gns), colindex[[i]]] <- res2[["calls"]] |
|
106 |
+ confs[1:length(gns), colindex[[i]]] <- res2[["confs"]] |
|
107 |
+ sex[[i]] <- res2[["gender"]] |
|
108 |
+ rm(res2); gc() |
|
75 | 109 |
} |
76 |
- calls[1:length(gns), ] <- res2[["calls"]] |
|
110 |
+ if(isPackageLoaded("ff")) close(B) |
|
111 |
+ rm(B); gc() |
|
77 | 112 |
if(isPackageLoaded("ff")) close(calls) |
78 | 113 |
save(calls, file=callsFile) |
79 | 114 |
rm(calls); gc() |
80 |
- if(isPackageLoaded("ff")){ |
|
81 |
- if(file.exists(confsFile)) unlink(confsFile) |
|
82 |
- confs <- ff(dim=c(nrow(A), ncol(A)), |
|
83 |
- vmode="integer", |
|
84 |
- finalizer="close", |
|
85 |
- dimnames=dimnames(A), |
|
86 |
- overwrite=TRUE) |
|
87 |
- } else{ |
|
88 |
- confs <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)) |
|
89 |
- } |
|
90 |
- confs[1:length(gns), ] <- res2[["confs"]] |
|
91 | 115 |
if(isPackageLoaded("ff")) close(confs) |
92 | 116 |
save(confs, file=confsFile) |
93 | 117 |
rm(confs); gc() |
94 |
- sampleStats <- data.frame(SNR=res2[["SNR"]], |
|
95 |
- SKW=res2[["SKW"]], |
|
96 |
- gender=res2[["gender"]]) |
|
97 |
- save(sampleStats, file=file.path(dirname(confsFile), "sampleStats.rda")) |
|
98 |
- if(isPackageLoaded("ff")){ close(A)} |
|
118 |
+ res$gender <- unlist(sex) |
|
119 |
+ save(res, file=snprmaFile) |
|
120 |
+ if(isPackageLoaded("ff")) close(A) |
|
99 | 121 |
rm(list=ls()); gc() |
100 | 122 |
return() |
101 | 123 |
} |
... | ... |
@@ -50,13 +50,13 @@ snprma <- function(filenames, mixtureSampleSize=10^5, |
50 | 50 |
cnProbes <- get("cnProbes") |
51 | 51 |
nr <- length(pnsa) + nrow(cnProbes) |
52 | 52 |
nc <- length(filenames) |
53 |
- dns <- list(c(gns, rownames(cnProbes)), sns) |
|
53 |
+ dns <- list(c(gns, rownames(cnProbes)), sns) |
|
54 | 54 |
if(isPackageLoaded("ff")){ |
55 | 55 |
message("ff package is loaded. Creating ff objects for storing normalized and summarized intensities") |
56 |
- A <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns, |
|
57 |
- overwrite=TRUE) |
|
58 |
- B <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns, |
|
59 |
- overwrite=TRUE) |
|
56 |
+ A <- initializeBigMatrix(nr, nc) |
|
57 |
+ B <- initializeBigMatrix(nr, nc) |
|
58 |
+ open(A) |
|
59 |
+ open(B) |
|
60 | 60 |
} else { |
61 | 61 |
A <- matrix(0L, nr, nc, dimnames=dns) |
62 | 62 |
B <- matrix(0L, nr, nc, dimnames=dns) |
... | ... |
@@ -98,7 +98,8 @@ snprma <- function(filenames, mixtureSampleSize=10^5, |
98 | 98 |
if (!fitMixture) SNR <- mixtureParams <- NA |
99 | 99 |
save(A, file=AFile) |
100 | 100 |
save(B, file=BFile) |
101 |
- return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)) |
|
101 |
+ return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName, |
|
102 |
+ cnnames=rownames(cnProbes))) |
|
102 | 103 |
} |
103 | 104 |
|
104 | 105 |
fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){ |
... | ... |
@@ -183,4 +183,39 @@ isValidCdfName <- function(cdfName){ |
183 | 183 |
return(result) |
184 | 184 |
} |
185 | 185 |
|
186 |
+initializeBigMatrix <- function(nr, nc, batch, vmode="integer"){ |
|
187 |
+ if(prod(nr, nc) > 2^31){ |
|
188 |
+ ##Need multiple matrices |
|
189 |
+ ## -- use ffdf |
|
190 |
+ |
|
191 |
+ ## How many samples per ff object |
|
192 |
+ S <- floor(2^31/nr - 1) |
|
193 |
+ |
|
194 |
+ ## How many ff objects |
|
195 |
+ L <- ceiling(nc/S) |
|
196 |
+ |
|
197 |
+ results <- vector("list", L) |
|
198 |
+ ##resultsB <- vector("list", L) |
|
199 |
+ for(i in 1:(L-1)){ ## the Lth object may have fewer than nc columns |
|
200 |
+ results[[i]] <- ff(dim=c(nr, S), |
|
201 |
+ vmode=vmode, |
|
202 |
+ finalizer="close", |
|
203 |
+ overwrite=TRUE) |
|
204 |
+ } |
|
205 |
+ ##the Lth element |
|
206 |
+ leftOver <- nc - ((L-1)*S) |
|
207 |
+ results[[L]] <- ff(dim=c(nr, leftOver), |
|
208 |
+ vmode=vmode, |
|
209 |
+ finalizer="close", |
|
210 |
+ overwrite=TRUE) |
|
211 |
+ resultsff <- do.call(ffdf, results) |
|
212 |
+ } else { |
|
213 |
+ resultsff <- ff(dim=c(nr, nc), |
|
214 |
+ vmode=vmode, |
|
215 |
+ finalizer="close", |
|
216 |
+ overwrite=TRUE) |
|
217 |
+ } |
|
218 |
+ return(resultsff) |
|
219 |
+} |
|
220 |
+ |
|
186 | 221 |
|
187 | 222 |
deleted file mode 100644 |
... | ... |
@@ -1,24 +0,0 @@ |
1 |
-\name{AlleleSet-methods} |
|
2 |
-\docType{methods} |
|
3 |
- |
|
4 |
-\title{Indicator for polymorphic probes} |
|
5 |
-\description{ |
|
6 |
- |
|
7 |
- This functions uses the annotation slot of the AlleleSet object to load |
|
8 |
- the corresponding annotation package and determine whether each probe |
|
9 |
- in the object interrogates a polymorphic or nonpolymorphic allele. For |
|
10 |
- instance, in the Affy 6.0 platform roughly 900,000 of the 1.8 million |
|
11 |
- markers are for polymorphic alleles. |
|
12 |
- |
|
13 |
-} |
|
14 |
-\usage{ |
|
15 |
-A(object) |
|
16 |
-B(object) |
|
17 |
-} |
|
18 |
-\arguments{ |
|
19 |
- \item{object}{AlleleSet object} |
|
20 |
-} |
|
21 |
-\value{ |
|
22 |
- matrix of normalized intensities |
|
23 |
-} |
|
24 |
-\keyword{manip} |
... | ... |
@@ -8,14 +8,16 @@ |
8 | 8 |
} |
9 | 9 |
\usage{ |
10 | 10 |
cnOptions(outdir = "./", cdfName, |
11 |
- AFile="A", |
|
12 |
- BFile="B", |
|
13 |
- callsFile="genotypes", |
|
11 |
+ AFile="A.rda", |
|
12 |
+ BFile="B.rda", |
|
13 |
+ CAFile="CA.rda", |
|
14 |
+ CBFile="CB.rda", |
|
15 |
+ callsFile="genotypes.rda", |
|
14 | 16 |
rgFile="rgFile.rda", |
15 | 17 |
cnFile="cnSet", |
16 | 18 |
cnrmaFile="cn_rmaResult.rda", |
17 | 19 |
snprmaFile="snp_rmaResult.rda", |
18 |
- confsFile="confsFile", |
|
20 |
+ confsFile="confsFile.rda", |
|
19 | 21 |
save.it = TRUE, |
20 | 22 |
save.cnset = TRUE, |
21 | 23 |
load.it = TRUE, |
... | ... |
@@ -25,7 +27,8 @@ |
25 | 27 |
GT.CONF.THR = 0.99, PHI.THR = 2^6, nHOM.THR = 5, |
26 | 28 |
MIN.NU = 2^3, MIN.PHI = 2^3, |
27 | 29 |
THR.NU.PHI = TRUE, thresholdCopynumber = TRUE, unlink = TRUE, |
28 |
-xo use.poe=FALSE, crlmmBatchSize=1100, ...) |
|
30 |
+ use.poe=FALSE, |
|
31 |
+ crlmmBatchSize=1000, ...) |
|
29 | 32 |
} |
30 | 33 |
\arguments{ |
31 | 34 |
\item{outdir}{ |
... | ... |
@@ -45,6 +48,21 @@ xo use.poe=FALSE, crlmmBatchSize=1100, ...) |
45 | 48 |
|
46 | 49 |
File name for quantile normalized intensities for the B allele is <outdir>/BFile<.rda> |
47 | 50 |
|
51 |
+} |
|
52 |
+ |
|
53 |
+ \item{CAFile}{ |
|
54 |
+ |
|
55 |
+ File name for A allele copy number is <outdir>/AFile<.rda>. Copy |
|
56 |
+ number at nonpolymorphic loci is stored in this file. |
|
57 |
+ |
|
58 |
+} |
|
59 |
+ |
|
60 |
+ \item{CBFile}{ |
|
61 |
+ |
|
62 |
+ File name for B allele copy number is <outdir>/AFile<.rda>. The 'B |
|
63 |
+ allele' copy number at nonpolymorphic loci is zero (copy number for |
|
64 |
+ nonpolymorphic loci is stored in the 'A allele'). |
|
65 |
+ |
|
48 | 66 |
} |
49 | 67 |
|
50 | 68 |
\item{callsFile}{ |
... | ... |
@@ -288,12 +306,12 @@ xo use.poe=FALSE, crlmmBatchSize=1100, ...) |
288 | 306 |
|
289 | 307 |
\item{crlmmBatchSize}{ |
290 | 308 |
|
291 |
- 'integer'. The maximum number of samples that can be process |
|
292 |
- in a single batch. When using the ff package, the maximum |
|
293 |
- number of elements in a matrix is 2^31. For Affy 6.0, the |
|
294 |
- limit is therefore approximately 1100 samples (1.8e6 * 1100 < 2^31). |
|
309 |
+ 'integer'. The maximum number of samples to be genotyped in a |
|
310 |
+ batch. This is used to keep the RAM at manageable levels. |
|
311 |
+ |
|
295 | 312 |
} |
296 | 313 |
|
314 |
+ |
|
297 | 315 |
\item{\dots}{ |
298 | 316 |
|
299 | 317 |
Additional arguments are passed to \code{readIdatFiles} (Illumina |
... | ... |
@@ -10,11 +10,11 @@ |
10 | 10 |
\usage{ |
11 | 11 |
crlmm(filenames, row.names=TRUE, col.names=TRUE, |
12 | 12 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, |
13 |
- gender=NULL, save.it=FALSE, load.it=FALSE, intensityFile, callsFile, |
|
13 |
+ gender=NULL, save.it=FALSE, load.it=FALSE, snprmaFile, callsFile, |
|
14 | 14 |
confsFile, AFile, BFile, |
15 | 15 |
mixtureSampleSize=10^5, |
16 | 16 |
eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
17 |
- recallRegMin=1000, returnParams=FALSE, badSNP=0.7) |
|
17 |
+ recallRegMin=1000, returnParams=FALSE, badSNP=0.7, crlmmBatchSize=1000) |
|
18 | 18 |
} |
19 | 19 |
|
20 | 20 |
\arguments{ |
... | ... |
@@ -29,8 +29,7 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE, |
29 | 29 |
defining sex. (1 - male; 2 - female)} |
30 | 30 |
\item{save.it}{'logical'. Save preprocessed data?} |
31 | 31 |
\item{load.it}{'logical'. Load preprocessed data to speed up analysis?} |
32 |
- \item{intensityFile}{'character' with filename to be saved/loaded - |
|
33 |
- results from snprma function.} |
|
32 |
+ \item{snprmaFile}{'character' with filename to be saved/loaded. Statistics saved in this file include the skew (SKW), signal to noise ratio (SNR), mixture model parameters (mixtureParams), and gender. } |
|
34 | 33 |
\item{callsFile}{'character' string -- filename to be saved/loaded for biallelic genotype calls.} |
35 | 34 |
\item{confsFile}{'character' string -- filename to be saved/loaded for confidence scores of the biallelic genotype calls.} |
36 | 35 |
\item{AFile}{'character' string -- filename to be saved/loaded for quantile normalized intensities of the A allele} |
... | ... |
@@ -45,6 +44,7 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE, |
45 | 44 |
\item{recallRegMin}{Minimum number of SNP's for regression.} |
46 | 45 |
\item{returnParams}{'logical'. Return recalibrated parameters.} |
47 | 46 |
\item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)} |
47 |
+ \item{crlmmBatchSize}{'integer', indicating the number of samples to be genotyped in a batch by crlmm. The purpose of this argument is to keep the RAM at manageable levels.} |
|
48 | 48 |
} |
49 | 49 |
\value{ |
50 | 50 |
|
... | ... |
@@ -52,9 +52,13 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE, |
52 | 52 |
|
53 | 53 |
- AFile |
54 | 54 |
- BFile |
55 |
+ - CAFile |
|
56 |
+ - CBFile |
|
55 | 57 |
- callsFile |
56 | 58 |
- confsFile |
57 |
- - intensityFile |
|
59 |
+ - snprmaFile |
|
60 |
+ |
|
61 |
+ See the \code{cnOptions} function for the default filenames. |
|
58 | 62 |
|
59 | 63 |
} |
60 | 64 |
|
... | ... |
@@ -76,7 +80,13 @@ if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){ |
76 | 80 |
## the filenames with full path... |
77 | 81 |
## very useful when genotyping samples not in the working directory |
78 | 82 |
cels <- list.celfiles(path, full.names=TRUE) |
79 |
- (crlmmOutput <- crlmm(cels, cdfName="genomewidesnp5")) |
|
83 |
+ crlmm(cels, cdfName="genomewidesnp5") |
|
84 |
+ ## clean |
|
85 |
+ unlink("A.rda") |
|
86 |
+ unlink("B.rda") |
|
87 |
+ unlink("snp_rmaResult.rda") |
|
88 |
+ unlink("calls.rda") |
|
89 |
+ unlink("confs.rda") |
|
80 | 90 |
} |
81 | 91 |
} |
82 | 92 |
\seealso{\code{\link{cnOptions}}} |