Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@49990 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 07/10/2010 07:22:57
Showing4 changed files

... ...
@@ -561,6 +561,7 @@ function (which expects ff objects and supports parallel processing)
561 561
 ** copy number A and B intensities now stored in callSet from genotype.Illumina()
562 562
 
563 563
 
564
-2010-10-06 M. Ritchie 1.7.18
564
+2010-10-06 M. Ritchie 1.7.19
565 565
 ** new internal function processIDAT which uses ocLapply() to parallelize pre-processing of Illumina data
566 566
 ** changes to genotype.Illumina()
567
+** updated vignettes - crlmmIllumina.Rnw and crlmmIllumina.pdf
... ...
@@ -510,6 +510,9 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
510 510
 
511 511
   is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
512 512
   if(is.lds) {
513
+    close(RG@assayData$G)
514
+    close(RG@assayData$R)
515
+    close(RG@assayData$zero)    
513 516
     close(XY@assayData$X)
514 517
     close(XY@assayData$Y)
515 518
     close(XY@assayData$zero)
... ...
@@ -722,6 +725,9 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
722 725
 #    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
723 726
 #  }
724 727
   if(is.lds) {
728
+    close(XY@assayData$X)
729
+    close(XY@assayData$Y)
730
+    close(XY@assayData$zero)    
725 731
     close(A)
726 732
     close(B)
727 733
     close(zero)
... ...
@@ -888,6 +894,12 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
888 894
 
889 895
   res2[["SNR"]] <- res[["SNR"]]
890 896
   res2[["SKW"]] <- res[["SKW"]]
897
+#  if(is.lds) {
898
+#    delete(res[["A"]])
899
+#    delete(res[["B"]])
900
+#    delete(res[["SNR"]])
901
+#    delete(res[["mixtureParams"]])
902
+#  }
891 903
   rm(res); gc()
892 904
   return(list2SnpSet(res2, returnParams=returnParams))
893 905
 }
... ...
@@ -917,6 +929,9 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
917 929
 
918 930
   if(missing(cdfName)) stop("must specify cdfName")
919 931
   if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
932
+  
933
+  is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
934
+
920 935
 #  if(missing(sns)) sns <- basename(arrayNames)
921 936
 
922 937
 #  if (save.rg & missing(rgFile))
... ...
@@ -944,25 +959,33 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
944 959
 #	save(RG, file=rgFile)
945 960
 
946 961
     XY = RGtoXY(RG, chipType=cdfName)
962
+    if(is.lds) {
963
+      open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero)
964
+      delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero)
965
+    }
947 966
     rm(RG); gc()
967
+  
948 968
     if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
949 969
     } # else subsns = sns[j]
950 970
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
951 971
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
952 972
 #                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
953 973
 
954
-    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
955
-
956
-    if(is.lds) {
957
-      open(res[["A"]])
958
-      open(res[["B"]])
959
-      open(res[["SNR"]])
960
-      open(res[["mixtureParams"]])
961
-    }
974
+#    if(is.lds) {
975
+#      open(res[["A"]])
976
+#      open(res[["B"]])
977
+#      open(res[["SNR"]])
978
+#      open(res[["mixtureParams"]])
979
+#    }
962 980
   
963 981
 #    fD = featureData(XY)
964 982
 #    phenD = XY@phenoData
965 983
 #    protD = XY@protocolData
984
+
985
+    if(is.lds) {
986
+      open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
987
+      delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero)
988
+    }
966 989
     rm(XY); gc()
967 990
 #    if(k == 1){
968 991
 #    if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
... ...
@@ -1063,9 +1086,16 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1063 1086
 #    callSet$gender <- tmp$gender
1064 1087
 #    rm(tmp); gc()
1065 1088
 #    return(callSet)
1089
+  if(is.lds) {
1090
+     open(res[["SNR"]]); open(res[["SKW"]])
1066 1091
 
1092
+  }
1067 1093
   res2[["SNR"]] <- res[["SNR"]]
1068 1094
   res2[["SKW"]] <- res[["SKW"]]
1095
+#  if(is.lds) {
1096
+#    delete(res[["A"]]); delete(res[["B"]])
1097
+#    delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
1098
+#  }
1069 1099
   rm(res); gc()
1070 1100
   return(list2SnpSet(res2, returnParams=returnParams))
1071 1101
 #    tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
... ...
@@ -1353,6 +1383,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1353 1383
           bb = ocProbesets()*length(sns)*8
1354 1384
 	  ffrowapply(tmpA[i1:i2, ] <- A(callSet)[snp.index,][i1:i2, ], X=A(callSet)[snp.index,], BATCHBYTES=bb)
1355 1385
 	  ffrowapply(tmpB[i1:i2, ] <- B(callSet)[snp.index,][i1:i2, ], X=B(callSet)[snp.index,], BATCHBYTES=bb)
1386
+          close(A(callSet))
1387
+          close(B(callSet))
1356 1388
           close(tmpA)
1357 1389
           close(tmpB)
1358 1390
         } else {
1359 1391
Binary files a/inst/doc/crlmmIllumina.pdf and b/inst/doc/crlmmIllumina.pdf differ
... ...
@@ -45,11 +45,11 @@ which have been analyzed using Illumina's 370k Duo BeadChips.
45 45
 This data is available in the \Rpackage{hapmap370k} package.  
46 46
 Additional chip-specific model parameters and basic SNP annotation 
47 47
 information used by CRLMM is stored in the \Rpackage{human370v1cCrlmm} package.
48
-These packages can be installed in the usual way using the \Rfunction{biocLite} function.
48
+The required packages can be installed in the usual way using the \Rfunction{biocLite} function.
49 49
 
50 50
 <<<echo=TRUE, results=hide, eval=FALSE>>=
51 51
 source("http://www.bioconductor.org/biocLite.R")
52
-biocLite(c("hapmap370k", "human370v1cCrlmm"))
52
+biocLite(c("crlmm", "hapmap370k", "human370v1cCrlmm"))
53 53
 @ 
54 54
 
55 55
 \section{Reading in data} 
... ...
@@ -80,6 +80,8 @@ RG = readIdatFiles(samples, path=data.dir, arrayInfoColNames=list(barcode=NULL,
80 80
 
81 81
 Reading in this data takes approximately 90 seconds and peak memory usage 
82 82
 was 1.2 GB of RAM on our linux system.
83
+If memory is limiting, load the \Rpackage{ff} package and run the same command.
84
+When this package is available, the objects are stored using disk rather then RAM.
83 85
 The \Robject{RG} object is an \Rclass{NChannelSet} which stores the 
84 86
 Red and Green intensities, the number of beads and standard errors for 
85 87
 each bead-type.