Browse code

Fixed problem with memory management

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@46584 bc3139a8-67e5-0310-9ffc-ced21a209358

Benilton Carvalho authored on 27/04/2010 14:40:30
Showing 4 changed files

... ...
@@ -516,4 +516,6 @@ function (which expects ff objects and supports parallel processing)
516 516
 2010-04-16 B Carvalho committed version 1.5.49
517 517
 ** cosmetics - looking for cause of memory spike
518 518
 
519
+2010-04-24 B Carvalho committed version 1.7.1
520
+** fixed bug in gender prediction that cause a spike in memory usage
519 521
 
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.7.0
4
+Version: 1.7.1
5 5
 Date: 2010-04-16
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -316,6 +316,27 @@ crlmm2 <- function(filenames, row.names=TRUE, col.names=TRUE,
316 316
   return(list2SnpSet(res2, returnParams=returnParams))
317 317
 }
318 318
 
319
+predictGender <- function(cols, theA, theB, XIndex){
320
+  n <- length(cols)
321
+  med <- numeric(n)
322
+  if (n > 0){
323
+    open(theA)
324
+    open(theB)
325
+    for (i in 1:n){
326
+      vA <- log2(theA[XIndex, cols[i]])
327
+      vB <- log2(theB[XIndex, cols[i]])
328
+      med[i] <- median(vA+vB)/2
329
+      rm(vA, vB)
330
+    }
331
+    close(theA)
332
+    close(theB)
333
+    rm(theA, theB)
334
+  }
335
+  rm(n)
336
+  gc(verbose=FALSE)
337
+  med
338
+}
339
+
319 340
 crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
320 341
                      col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6,
321 342
                      SNRMin=5, recallMin=10, recallRegMin=1000,
... ...
@@ -337,8 +358,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
337 358
   stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
338 359
 
339 360
   if(verbose) message("Loading annotations.")
340
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
341
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
361
+  obj1 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
362
+  obj2 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
342 363
   ## this is toget rid of the 'no visible binding' notes
343 364
   ## variable definitions
344 365
   XIndex <- getVarInEnv("XIndex")
... ...
@@ -348,12 +369,16 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
348 369
   theKnots <- getVarInEnv("theKnots")
349 370
   regionInfo <- getVarInEnv("regionInfo")
350 371
   params <- getVarInEnv("params")
372
+  rm(list=c(obj1, obj2), envir=.crlmmPkgEnv)
373
+  rm(obj1, obj2)
351 374
   
352 375
   ## IF gender not provide, we predict
353 376
   ## FIXME: XIndex may be greater than ocProbesets()
354 377
   if(is.null(gender)){
355 378
     if(verbose) message("Determining gender.")
356
-    XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2
379
+##    XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2
380
+    XMedian <- ocLapply(splitIndicesByNode(1:NC), predictGender, theA=A, theB=B, XIndex=XIndex, neededPkgs="crlmm")
381
+    XMedian <- unlist(XMedian)
357 382
     if(sum(SNR[] > SNRMin)==1){
358 383
       gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5)))
359 384
     }else{
... ...
@@ -403,8 +428,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
403 428
                         sapply(cIndexes, length), SMEDIAN,
404 429
                         theKnots, mixtureParams[], DF, probs, 0.025)
405 430
     
406
-    last <- rSnps[2]
407
-    rm(snps, rSnps, IndexesBatch, tmpA, tmpB)
431
+    rm(snps, rSnps, IndexesBatch, tmpA, tmpB, last)
432
+    gc(verbose=FALSE)
408 433
     close(A)
409 434
     close(B)
410 435
     close(mixtureParams)
... ...
@@ -425,7 +450,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
425 450
   newparams[["centers"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 1))
426 451
   newparams[["scales"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 2))
427 452
   newparams[["N"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 3))
428
-  rm(newparamsBatch)
453
+  rm(newparamsBatch); gc(verbose=FALSE)
429 454
   if(verbose) message("Done.")
430 455
   if(verbose) message("Estimating recalibration parameters.")
431 456
   d <- newparams[["centers"]] - params$centers
... ...
@@ -460,6 +485,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
460 485
       mu <- newparams[["centers"]][i, ]
461 486
       j <- which(is.na(mu))
462 487
       newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j]
488
+      rm(mu, j)
463 489
     }
464 490
     
465 491
     ##remaing NAs are made like originals
... ...
@@ -534,8 +560,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
534 560
                             which(regionInfo[snps, 1]))
535 561
     A[snps,] <- tmpA
536 562
     B[snps,] <- tmpB
537
-    last <- rSnps[2]
538
-    rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull)
563
+    rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull, last)
564
+    gc(verbose=FALSE)
539 565
     close(A)
540 566
     close(B)
541 567
     close(mixtureParams)
... ...
@@ -142,10 +142,12 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
142 142
   stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
143 143
   
144 144
   if(verbose) message("Loading annotations and mixture model parameters.")
145
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
145
+  obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
146 146
   pnsa <- getVarInEnv("pnsa")
147 147
   pnsb <- getVarInEnv("pnsb")
148 148
   gns <- getVarInEnv("gns")
149
+  rm(list=obj, envir=.crlmmPkgEnv)
150
+  rm(obj)
149 151
   
150 152
   ##We will read each cel file, summarize, and run EM one by one
151 153
   ##We will save parameters of EM to use later
... ...
@@ -173,6 +175,11 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
173 175
            mixtureParams=mixtureParams, eps=eps, seed=seed,
174 176
            mixtureSampleSize=mixtureSampleSize, pkgname=pkgname,
175 177
            neededPkgs=c("crlmm", pkgname))
178
+  close(mixtureParams)
179
+  close(SNR)
180
+  close(SKW)
181
+  close(A)
182
+  close(B)
176 183
   
177 184
   list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
178 185
 }
... ...
@@ -182,9 +189,9 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
182 189
                        mixtureParams, eps, seed, mixtureSampleSize,
183 190
                        pkgname){
184 191
   
185
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
186
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
187
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
192
+  obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
193
+  obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
194
+  obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
188 195
   autosomeIndex <- getVarInEnv("autosomeIndex")
189 196
   pnsa <- getVarInEnv("pnsa")
190 197
   pnsb <- getVarInEnv("pnsb")
... ...
@@ -195,6 +202,8 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
195 202
   SMEDIAN <- getVarInEnv("SMEDIAN")
196 203
   theKnots <- getVarInEnv("theKnots")
197 204
   gns <- getVarInEnv("gns")
205
+  rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv)
206
+  rm(obj1, obj2, obj3)
198 207
 
199 208
   ## for mixture
200 209
   set.seed(seed)
... ...
@@ -222,8 +231,10 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
222 231
       S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
223 232
       M <- log2(A[idx, k])-log2(B[idx, k])
224 233
       tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
234
+      rm(S, M)
225 235
       mixtureParams[, k] <- tmp[["coef"]]
226 236
       SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
237
+      rm(tmp)
227 238
     } else {
228 239
       mixtureParams[, k] <- NA
229 240
       SNR[k] <- NA
... ...
@@ -234,5 +245,7 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
234 245
   close(SKW)
235 246
   close(mixtureParams)
236 247
   close(SNR)
248
+  rm(list=ls())
249
+  gc(verbose=FALSE)
237 250
   TRUE
238 251
 }