git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@46584 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -516,4 +516,6 @@ function (which expects ff objects and supports parallel processing) |
516 | 516 |
2010-04-16 B Carvalho committed version 1.5.49 |
517 | 517 |
** cosmetics - looking for cause of memory spike |
518 | 518 |
|
519 |
+2010-04-24 B Carvalho committed version 1.7.1 |
|
520 |
+** fixed bug in gender prediction that cause a spike in memory usage |
|
519 | 521 |
|
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.7.0 |
|
4 |
+Version: 1.7.1 |
|
5 | 5 |
Date: 2010-04-16 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au> |
7 | 7 |
Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -316,6 +316,27 @@ crlmm2 <- function(filenames, row.names=TRUE, col.names=TRUE, |
316 | 316 |
return(list2SnpSet(res2, returnParams=returnParams)) |
317 | 317 |
} |
318 | 318 |
|
319 |
+predictGender <- function(cols, theA, theB, XIndex){ |
|
320 |
+ n <- length(cols) |
|
321 |
+ med <- numeric(n) |
|
322 |
+ if (n > 0){ |
|
323 |
+ open(theA) |
|
324 |
+ open(theB) |
|
325 |
+ for (i in 1:n){ |
|
326 |
+ vA <- log2(theA[XIndex, cols[i]]) |
|
327 |
+ vB <- log2(theB[XIndex, cols[i]]) |
|
328 |
+ med[i] <- median(vA+vB)/2 |
|
329 |
+ rm(vA, vB) |
|
330 |
+ } |
|
331 |
+ close(theA) |
|
332 |
+ close(theB) |
|
333 |
+ rm(theA, theB) |
|
334 |
+ } |
|
335 |
+ rm(n) |
|
336 |
+ gc(verbose=FALSE) |
|
337 |
+ med |
|
338 |
+} |
|
339 |
+ |
|
319 | 340 |
crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
320 | 341 |
col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6, |
321 | 342 |
SNRMin=5, recallMin=10, recallRegMin=1000, |
... | ... |
@@ -337,8 +358,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
337 | 358 |
stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
338 | 359 |
|
339 | 360 |
if(verbose) message("Loading annotations.") |
340 |
- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
341 |
- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
361 |
+ obj1 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
362 |
+ obj2 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
342 | 363 |
## this is toget rid of the 'no visible binding' notes |
343 | 364 |
## variable definitions |
344 | 365 |
XIndex <- getVarInEnv("XIndex") |
... | ... |
@@ -348,12 +369,16 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
348 | 369 |
theKnots <- getVarInEnv("theKnots") |
349 | 370 |
regionInfo <- getVarInEnv("regionInfo") |
350 | 371 |
params <- getVarInEnv("params") |
372 |
+ rm(list=c(obj1, obj2), envir=.crlmmPkgEnv) |
|
373 |
+ rm(obj1, obj2) |
|
351 | 374 |
|
352 | 375 |
## IF gender not provide, we predict |
353 | 376 |
## FIXME: XIndex may be greater than ocProbesets() |
354 | 377 |
if(is.null(gender)){ |
355 | 378 |
if(verbose) message("Determining gender.") |
356 |
- XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
379 |
+## XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
380 |
+ XMedian <- ocLapply(splitIndicesByNode(1:NC), predictGender, theA=A, theB=B, XIndex=XIndex, neededPkgs="crlmm") |
|
381 |
+ XMedian <- unlist(XMedian) |
|
357 | 382 |
if(sum(SNR[] > SNRMin)==1){ |
358 | 383 |
gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
359 | 384 |
}else{ |
... | ... |
@@ -403,8 +428,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
403 | 428 |
sapply(cIndexes, length), SMEDIAN, |
404 | 429 |
theKnots, mixtureParams[], DF, probs, 0.025) |
405 | 430 |
|
406 |
- last <- rSnps[2] |
|
407 |
- rm(snps, rSnps, IndexesBatch, tmpA, tmpB) |
|
431 |
+ rm(snps, rSnps, IndexesBatch, tmpA, tmpB, last) |
|
432 |
+ gc(verbose=FALSE) |
|
408 | 433 |
close(A) |
409 | 434 |
close(B) |
410 | 435 |
close(mixtureParams) |
... | ... |
@@ -425,7 +450,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
425 | 450 |
newparams[["centers"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 1)) |
426 | 451 |
newparams[["scales"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 2)) |
427 | 452 |
newparams[["N"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 3)) |
428 |
- rm(newparamsBatch) |
|
453 |
+ rm(newparamsBatch); gc(verbose=FALSE) |
|
429 | 454 |
if(verbose) message("Done.") |
430 | 455 |
if(verbose) message("Estimating recalibration parameters.") |
431 | 456 |
d <- newparams[["centers"]] - params$centers |
... | ... |
@@ -460,6 +485,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
460 | 485 |
mu <- newparams[["centers"]][i, ] |
461 | 486 |
j <- which(is.na(mu)) |
462 | 487 |
newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j] |
488 |
+ rm(mu, j) |
|
463 | 489 |
} |
464 | 490 |
|
465 | 491 |
##remaing NAs are made like originals |
... | ... |
@@ -534,8 +560,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
534 | 560 |
which(regionInfo[snps, 1])) |
535 | 561 |
A[snps,] <- tmpA |
536 | 562 |
B[snps,] <- tmpB |
537 |
- last <- rSnps[2] |
|
538 |
- rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull) |
|
563 |
+ rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull, last) |
|
564 |
+ gc(verbose=FALSE) |
|
539 | 565 |
close(A) |
540 | 566 |
close(B) |
541 | 567 |
close(mixtureParams) |
... | ... |
@@ -142,10 +142,12 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
142 | 142 |
stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
143 | 143 |
|
144 | 144 |
if(verbose) message("Loading annotations and mixture model parameters.") |
145 |
- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
145 |
+ obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
146 | 146 |
pnsa <- getVarInEnv("pnsa") |
147 | 147 |
pnsb <- getVarInEnv("pnsb") |
148 | 148 |
gns <- getVarInEnv("gns") |
149 |
+ rm(list=obj, envir=.crlmmPkgEnv) |
|
150 |
+ rm(obj) |
|
149 | 151 |
|
150 | 152 |
##We will read each cel file, summarize, and run EM one by one |
151 | 153 |
##We will save parameters of EM to use later |
... | ... |
@@ -173,6 +175,11 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
173 | 175 |
mixtureParams=mixtureParams, eps=eps, seed=seed, |
174 | 176 |
mixtureSampleSize=mixtureSampleSize, pkgname=pkgname, |
175 | 177 |
neededPkgs=c("crlmm", pkgname)) |
178 |
+ close(mixtureParams) |
|
179 |
+ close(SNR) |
|
180 |
+ close(SKW) |
|
181 |
+ close(A) |
|
182 |
+ close(B) |
|
176 | 183 |
|
177 | 184 |
list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
178 | 185 |
} |
... | ... |
@@ -182,9 +189,9 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
182 | 189 |
mixtureParams, eps, seed, mixtureSampleSize, |
183 | 190 |
pkgname){ |
184 | 191 |
|
185 |
- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
186 |
- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
187 |
- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
192 |
+ obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
193 |
+ obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
194 |
+ obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
188 | 195 |
autosomeIndex <- getVarInEnv("autosomeIndex") |
189 | 196 |
pnsa <- getVarInEnv("pnsa") |
190 | 197 |
pnsb <- getVarInEnv("pnsb") |
... | ... |
@@ -195,6 +202,8 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
195 | 202 |
SMEDIAN <- getVarInEnv("SMEDIAN") |
196 | 203 |
theKnots <- getVarInEnv("theKnots") |
197 | 204 |
gns <- getVarInEnv("gns") |
205 |
+ rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv) |
|
206 |
+ rm(obj1, obj2, obj3) |
|
198 | 207 |
|
199 | 208 |
## for mixture |
200 | 209 |
set.seed(seed) |
... | ... |
@@ -222,8 +231,10 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
222 | 231 |
S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN |
223 | 232 |
M <- log2(A[idx, k])-log2(B[idx, k]) |
224 | 233 |
tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps) |
234 |
+ rm(S, M) |
|
225 | 235 |
mixtureParams[, k] <- tmp[["coef"]] |
226 | 236 |
SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
237 |
+ rm(tmp) |
|
227 | 238 |
} else { |
228 | 239 |
mixtureParams[, k] <- NA |
229 | 240 |
SNR[k] <- NA |
... | ... |
@@ -234,5 +245,7 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
234 | 245 |
close(SKW) |
235 | 246 |
close(mixtureParams) |
236 | 247 |
close(SNR) |
248 |
+ rm(list=ls()) |
|
249 |
+ gc(verbose=FALSE) |
|
237 | 250 |
TRUE |
238 | 251 |
} |