Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44484 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 04/02/2010 04:09:27
Showing 5 changed files

... ...
@@ -363,9 +363,17 @@ is decoded and scanned
363 363
 
364 364
  * Added 'humanomni1quadv1b" to chip types allowed in RGtoXY()
365 365
 
366
-
367 366
 2010-01-20 R. Scharpf - committed version 1.5.21
368 367
  * Added CopyNumberVariants to DESCRIPTION
369 368
 
370 369
 2010-01-20 R. Scharpf - committed version 1.5.22
371 370
  * restored inst/doc/crlmmDownstream.Rnw
371
+
372
+2010-02-04 M. Ritchie - committed version 1.5.23
373
+ * crlmmIllumina now exported in NAMESPACE (removed at some point)
374
+ * new function crlmmIlluminaV2() which reads in .idats and genotypes 
375
+in the one function to reduce memory usage (not exported as yet)
376
+ * readIdatFiles() modified to no longer store number of beads and beads SE
377
+to save memory.  Instead, 'zero', which indicates which SNPs have zero beads 
378
+is stored in the assayData slot.
379
+ * now store 'zero' with copy number AB intensities in preprocessInfinium2()
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.22
5
-Date: 2010-01-20
4
+Version: 1.5.23
5
+Date: 2010-02-04
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -50,7 +50,7 @@ importFrom(mvtnorm, dmvnorm)
50 50
 importFrom(ellipse, ellipse)
51 51
 
52 52
 exportMethods(A, B, copyNumber)
53
-export(cnOptions, crlmm, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
53
+export(cnOptions, crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
54 54
 
55 55
 ##export(##viterbi.CNSet, ##move to VanillaICE
56 56
 ##	combineIntensities,
... ...
@@ -119,10 +119,11 @@ readIdatFiles <- function(sampleSheet=NULL,
119 119
 		       RG <- new("NChannelSet",
120 120
 				 R=tmpmat,
121 121
 				 G=tmpmat,
122
-				 Rnb=tmpmat,
123
-				 Gnb=tmpmat,
124
-				 Rse=tmpmat,
125
-				 Gse=tmpmat,
122
+				 zero=tmpmat,
123
+#				 Rnb=tmpmat,
124
+#				 Gnb=tmpmat,
125
+#				 Rse=tmpmat,
126
+#				 Gse=tmpmat,
126 127
 				 annotation=headerInfo$Manifest[1],
127 128
 				 phenoData=pd,
128 129
 				 storage.mode="environment")
... ...
@@ -132,14 +133,16 @@ readIdatFiles <- function(sampleSheet=NULL,
132 133
 	       if(length(ids)==length(idsG)) {
133 134
 		       if(sum(ids==idsG)==nprobes) {
134 135
 			       RG@assayData$G[,i] = G$Quants[, "Mean"]
135
-			       RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
136
-			       RG@assayData$Gse[,i] = G$Quants[, "SD"]
136
+				   zeroG = G$Quants[, "NBeads"]==0
137
+#			       RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
138
+#			       RG@assayData$Gse[,i] = G$Quants[, "SD"]
137 139
 		       }
138 140
 	       } else {
139 141
 		       indG = match(ids, idsG)
140 142
 		       RG@assayData$G[,i] = G$Quants[indG, "Mean"]
141
-		       RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
142
-		       RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
143
+			   zeroG = G$Quants[indG, "NBeads"]==0
144
+#		       RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
145
+#		       RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
143 146
 	       }
144 147
 	       rm(G)
145 148
 	       gc()
... ...
@@ -151,16 +154,19 @@ readIdatFiles <- function(sampleSheet=NULL,
151 154
 	       if(length(ids)==length(idsG)) {   
152 155
 		       if(sum(ids==idsR)==nprobes) {
153 156
 			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
154
-			       RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
155
-			       RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
157
+				   zeroR = R$Quants[ ,"NBeads"]==0
158
+#			       RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
159
+#			       RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
156 160
 		       }
157 161
 	       } else {
158 162
 		       indR = match(ids, idsR)
159 163
 		       RG@assayData$R[,i] = R$Quants[indR, "Mean"]
160
-		       RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
161
-		       RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
164
+			   zeroR = R$Quants[indR, "NBeads"]==0
165
+#		       RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
166
+#		       RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
162 167
 	       }
163
-	       rm(R)
168
+		   RG@assayData$zero[,i] = zeroG | zeroR
169
+	       rm(R, zeroG, zeroR)
164 170
 	       gc()
165 171
        }
166 172
        if(saveDate) {
... ...
@@ -482,7 +488,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
482 488
   tmpmat = matrix(0, nsnps, narrays)
483 489
   rownames(tmpmat) = ids
484 490
   colnames(tmpmat) = sampleNames(RG)
485
-  XY = new("NChannelSet", X=tmpmat, Y=tmpmat, Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
491
+  XY = new("NChannelSet", X=tmpmat, Y=tmpmat, zero=tmpmat, # Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
486 492
                  annotation=chipType, phenoData=RG@phenoData, protocolData=RG@protocolData, storage.mode="environment")
487 493
   rm(tmpmat)
488 494
   gc()
... ...
@@ -490,10 +496,11 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
490 496
   # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
491 497
   XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
492 498
   XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
493
-  XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
494
-  XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
495
-  XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
496
-  XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
499
+  XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
500
+#  XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
501
+#  XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
502
+#  XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
503
+#  XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
497 504
   gc()
498 505
   
499 506
   ## Warning - not 100% sure that the code below is correct - could be more complicated than this
... ...
@@ -521,12 +528,13 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
521 528
   #  For now zero out Infinium I probes
522 529
   XY@assayData$X[infI,] = 0
523 530
   XY@assayData$Y[infI,] = 0
524
-  XY@assayData$Xnb[infI,] = 0
525
-  XY@assayData$Ynb[infI,] = 0
526
-  XY@assayData$Xse[infI,] = 0
527
-  XY@assayData$Yse[infI,] = 0
531
+  XY@assayData$zero[infI,] = 0  
532
+#  XY@assayData$Xnb[infI,] = 0
533
+#  XY@assayData$Ynb[infI,] = 0
534
+#  XY@assayData$Xse[infI,] = 0
535
+#  XY@assayData$Yse[infI,] = 0
528 536
 
529
-  XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
537
+#  XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
530 538
   gc()
531 539
 
532 540
 #  storageMode(XY) = "lockedEnvironment"
... ...
@@ -644,7 +652,15 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
644 652
   narrays = ncol(XY)
645 653
   A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
646 654
   B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
647
-  cnAB = list(A=A, B=B, sns=sns, gns=names(npIndex), cdfName=cdfName)
655
+
656
+  # new lines below - useful to keep track of zeroed out probes
657
+  zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) 
658
+
659
+  colnames(A) <- colnames(B) <- colnames(zero) <- sns
660
+  rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
661
+  
662
+  cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
663
+  rm(A, B, zero)
648 664
   
649 665
   # next process snp probes
650 666
   snpIndex = getVarInEnv("snpProbesFid")
... ...
@@ -768,3 +784,64 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
768 784
   res2[["SKW"]] <- res[["SKW"]]
769 785
   return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
770 786
 }
787
+
788
+## MR: Below is a more memory efficient version of crlmmIllumina() which 
789
+## reads in the .idats and genotypes in the one function and removes objects 
790
+## after they have been used
791
+crlmmIlluminaV2 = function(sampleSheet=NULL,
792
+			  arrayNames=NULL,
793
+			  ids=NULL,
794
+			  path=".",
795
+			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
796
+			  highDensity=FALSE,
797
+			  sep="_",
798
+			  fileExt=list(green="Grn.idat", red="Red.idat"),
799
+			  saveDate=FALSE,
800
+              save.rg=FALSE,
801
+              rgFile,
802
+			  stripNorm=TRUE,
803
+			  useTarget=TRUE,
804
+          	  row.names=TRUE, 
805
+			  col.names=TRUE,
806
+			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
807
+              seed=1, save.ab=FALSE, abFile,
808
+              mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
809
+              cdfName, sns, recallMin=10, recallRegMin=1000,
810
+              returnParams=FALSE, badSNP=.7) {
811
+			  
812
+  if (save.rg & missing(rgFile))
813
+    stop("'rgFile' is missing, and you chose save.rg")
814
+  if (save.ab & missing(abFile))
815
+    stop("'abFile' is missing, and you chose save.ab")
816
+				  
817
+  RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
818
+                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
819
+                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
820
+  if(save.rg)
821
+	save(RG, file=rgFile)
822
+
823
+  XY = RGtoXY(RG, chipType=cdfName)
824
+  rm(RG)
825
+  gc()
826
+  if (missing(sns)) sns = sampleNames(XY)
827
+    
828
+  res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
829
+                        seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
830
+                        save.it=save.ab, intensityFile=abFile)
831
+  rm(XY)
832
+  gc()
833
+  if(row.names) row.names=res$gns else row.names=NULL
834
+  if(col.names) col.names=res$sns else col.names=NULL
835
+
836
+  res2 = crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
837
+                  res[["mixtureParams"]], res[["cdfName"]],
838
+                  gender=gender, row.names=row.names,
839
+                  col.names=col.names, recallMin=recallMin,
840
+                  recallRegMin=1000, SNRMin=SNRMin,
841
+                  returnParams=returnParams, badSNP=badSNP,
842
+                  verbose=verbose)
843
+  
844
+  res2[["SNR"]] = res[["SNR"]]
845
+  res2[["SKW"]] = res[["SKW"]]
846
+  return(list2SnpSet(res2, returnParams=returnParams))
847
+}
... ...
@@ -46,8 +46,7 @@ readIdatFiles(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path="",
46 46
 }
47 47
 
48 48
 \details{
49
-The summarised Cy3 (G) and Cy5 (R) intensity, number of beads that
50
-were used in each channel and standard errors (all on the orginal scale)
49
+The summarised Cy3 (G) and Cy5 (R) intensities (on the orginal scale)
51 50
 are read in from the .idat files.
52 51
 
53 52
 Where available, a \code{sampleSheet} data.frame, in the same format
... ...
@@ -59,9 +58,8 @@ Thanks to Keith Baggerly who provided the code to read in the binary .idat files
59 58
 }
60 59
 
61 60
 \value{
62
-  NChannelSet with intensity data (\code{R}, \code{G}), number of beads
63
-  (\code{Rnb}, \code{Gnb}) and standard errors (\code{Rse}, \code{Gse})
64
-  for each bead type.
61
+  NChannelSet with intensity data (\code{R}, \code{G}), and indicator 
62
+  for SNPs with 0 beads (\code{zero}) for each bead type.
65 63
 }
66 64
 
67 65
 \author{Matt Ritchie}