... | ... |
@@ -363,9 +363,17 @@ is decoded and scanned |
363 | 363 |
|
364 | 364 |
* Added 'humanomni1quadv1b" to chip types allowed in RGtoXY() |
365 | 365 |
|
366 |
- |
|
367 | 366 |
2010-01-20 R. Scharpf - committed version 1.5.21 |
368 | 367 |
* Added CopyNumberVariants to DESCRIPTION |
369 | 368 |
|
370 | 369 |
2010-01-20 R. Scharpf - committed version 1.5.22 |
371 | 370 |
* restored inst/doc/crlmmDownstream.Rnw |
371 |
+ |
|
372 |
+2010-02-04 M. Ritchie - committed version 1.5.23 |
|
373 |
+ * crlmmIllumina now exported in NAMESPACE (removed at some point) |
|
374 |
+ * new function crlmmIlluminaV2() which reads in .idats and genotypes |
|
375 |
+in the one function to reduce memory usage (not exported as yet) |
|
376 |
+ * readIdatFiles() modified to no longer store number of beads and beads SE |
|
377 |
+to save memory. Instead, 'zero', which indicates which SNPs have zero beads |
|
378 |
+is stored in the assayData slot. |
|
379 |
+ * now store 'zero' with copy number AB intensities in preprocessInfinium2() |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.5.22 |
|
5 |
-Date: 2010-01-20 |
|
4 |
+Version: 1.5.23 |
|
5 |
+Date: 2010-02-04 |
|
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms |
... | ... |
@@ -50,7 +50,7 @@ importFrom(mvtnorm, dmvnorm) |
50 | 50 |
importFrom(ellipse, ellipse) |
51 | 51 |
|
52 | 52 |
exportMethods(A, B, copyNumber) |
53 |
-export(cnOptions, crlmm, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) |
|
53 |
+export(cnOptions, crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) |
|
54 | 54 |
|
55 | 55 |
##export(##viterbi.CNSet, ##move to VanillaICE |
56 | 56 |
## combineIntensities, |
... | ... |
@@ -119,10 +119,11 @@ readIdatFiles <- function(sampleSheet=NULL, |
119 | 119 |
RG <- new("NChannelSet", |
120 | 120 |
R=tmpmat, |
121 | 121 |
G=tmpmat, |
122 |
- Rnb=tmpmat, |
|
123 |
- Gnb=tmpmat, |
|
124 |
- Rse=tmpmat, |
|
125 |
- Gse=tmpmat, |
|
122 |
+ zero=tmpmat, |
|
123 |
+# Rnb=tmpmat, |
|
124 |
+# Gnb=tmpmat, |
|
125 |
+# Rse=tmpmat, |
|
126 |
+# Gse=tmpmat, |
|
126 | 127 |
annotation=headerInfo$Manifest[1], |
127 | 128 |
phenoData=pd, |
128 | 129 |
storage.mode="environment") |
... | ... |
@@ -132,14 +133,16 @@ readIdatFiles <- function(sampleSheet=NULL, |
132 | 133 |
if(length(ids)==length(idsG)) { |
133 | 134 |
if(sum(ids==idsG)==nprobes) { |
134 | 135 |
RG@assayData$G[,i] = G$Quants[, "Mean"] |
135 |
- RG@assayData$Gnb[,i] = G$Quants[, "NBeads"] |
|
136 |
- RG@assayData$Gse[,i] = G$Quants[, "SD"] |
|
136 |
+ zeroG = G$Quants[, "NBeads"]==0 |
|
137 |
+# RG@assayData$Gnb[,i] = G$Quants[, "NBeads"] |
|
138 |
+# RG@assayData$Gse[,i] = G$Quants[, "SD"] |
|
137 | 139 |
} |
138 | 140 |
} else { |
139 | 141 |
indG = match(ids, idsG) |
140 | 142 |
RG@assayData$G[,i] = G$Quants[indG, "Mean"] |
141 |
- RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"] |
|
142 |
- RG@assayData$Gse[,i] = G$Quants[indG, "SD"] |
|
143 |
+ zeroG = G$Quants[indG, "NBeads"]==0 |
|
144 |
+# RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"] |
|
145 |
+# RG@assayData$Gse[,i] = G$Quants[indG, "SD"] |
|
143 | 146 |
} |
144 | 147 |
rm(G) |
145 | 148 |
gc() |
... | ... |
@@ -151,16 +154,19 @@ readIdatFiles <- function(sampleSheet=NULL, |
151 | 154 |
if(length(ids)==length(idsG)) { |
152 | 155 |
if(sum(ids==idsR)==nprobes) { |
153 | 156 |
RG@assayData$R[,i] = R$Quants[ ,"Mean"] |
154 |
- RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"] |
|
155 |
- RG@assayData$Rse[,i] = R$Quants[ ,"SD"] |
|
157 |
+ zeroR = R$Quants[ ,"NBeads"]==0 |
|
158 |
+# RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"] |
|
159 |
+# RG@assayData$Rse[,i] = R$Quants[ ,"SD"] |
|
156 | 160 |
} |
157 | 161 |
} else { |
158 | 162 |
indR = match(ids, idsR) |
159 | 163 |
RG@assayData$R[,i] = R$Quants[indR, "Mean"] |
160 |
- RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"] |
|
161 |
- RG@assayData$Rse[,i] = R$Quants[indR, "SD"] |
|
164 |
+ zeroR = R$Quants[indR, "NBeads"]==0 |
|
165 |
+# RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"] |
|
166 |
+# RG@assayData$Rse[,i] = R$Quants[indR, "SD"] |
|
162 | 167 |
} |
163 |
- rm(R) |
|
168 |
+ RG@assayData$zero[,i] = zeroG | zeroR |
|
169 |
+ rm(R, zeroG, zeroR) |
|
164 | 170 |
gc() |
165 | 171 |
} |
166 | 172 |
if(saveDate) { |
... | ... |
@@ -482,7 +488,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
482 | 488 |
tmpmat = matrix(0, nsnps, narrays) |
483 | 489 |
rownames(tmpmat) = ids |
484 | 490 |
colnames(tmpmat) = sampleNames(RG) |
485 |
- XY = new("NChannelSet", X=tmpmat, Y=tmpmat, Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat, |
|
491 |
+ XY = new("NChannelSet", X=tmpmat, Y=tmpmat, zero=tmpmat, # Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat, |
|
486 | 492 |
annotation=chipType, phenoData=RG@phenoData, protocolData=RG@protocolData, storage.mode="environment") |
487 | 493 |
rm(tmpmat) |
488 | 494 |
gc() |
... | ... |
@@ -490,10 +496,11 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
490 | 496 |
# First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
491 | 497 |
XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red |
492 | 498 |
XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green |
493 |
- XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],] |
|
494 |
- XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],] |
|
495 |
- XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],] |
|
496 |
- XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],] |
|
499 |
+ XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green |
|
500 |
+# XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],] |
|
501 |
+# XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],] |
|
502 |
+# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],] |
|
503 |
+# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],] |
|
497 | 504 |
gc() |
498 | 505 |
|
499 | 506 |
## Warning - not 100% sure that the code below is correct - could be more complicated than this |
... | ... |
@@ -521,12 +528,13 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
521 | 528 |
# For now zero out Infinium I probes |
522 | 529 |
XY@assayData$X[infI,] = 0 |
523 | 530 |
XY@assayData$Y[infI,] = 0 |
524 |
- XY@assayData$Xnb[infI,] = 0 |
|
525 |
- XY@assayData$Ynb[infI,] = 0 |
|
526 |
- XY@assayData$Xse[infI,] = 0 |
|
527 |
- XY@assayData$Yse[infI,] = 0 |
|
531 |
+ XY@assayData$zero[infI,] = 0 |
|
532 |
+# XY@assayData$Xnb[infI,] = 0 |
|
533 |
+# XY@assayData$Ynb[infI,] = 0 |
|
534 |
+# XY@assayData$Xse[infI,] = 0 |
|
535 |
+# XY@assayData$Yse[infI,] = 0 |
|
528 | 536 |
|
529 |
- XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1 |
|
537 |
+# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1 |
|
530 | 538 |
gc() |
531 | 539 |
|
532 | 540 |
# storageMode(XY) = "lockedEnvironment" |
... | ... |
@@ -644,7 +652,15 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
644 | 652 |
narrays = ncol(XY) |
645 | 653 |
A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays) |
646 | 654 |
B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays) |
647 |
- cnAB = list(A=A, B=B, sns=sns, gns=names(npIndex), cdfName=cdfName) |
|
655 |
+ |
|
656 |
+ # new lines below - useful to keep track of zeroed out probes |
|
657 |
+ zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) |
|
658 |
+ |
|
659 |
+ colnames(A) <- colnames(B) <- colnames(zero) <- sns |
|
660 |
+ rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex) |
|
661 |
+ |
|
662 |
+ cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName) |
|
663 |
+ rm(A, B, zero) |
|
648 | 664 |
|
649 | 665 |
# next process snp probes |
650 | 666 |
snpIndex = getVarInEnv("snpProbesFid") |
... | ... |
@@ -768,3 +784,64 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
768 | 784 |
res2[["SKW"]] <- res[["SKW"]] |
769 | 785 |
return(list2SnpSet(res2, returnParams=returnParams)) # return(res2) |
770 | 786 |
} |
787 |
+ |
|
788 |
+## MR: Below is a more memory efficient version of crlmmIllumina() which |
|
789 |
+## reads in the .idats and genotypes in the one function and removes objects |
|
790 |
+## after they have been used |
|
791 |
+crlmmIlluminaV2 = function(sampleSheet=NULL, |
|
792 |
+ arrayNames=NULL, |
|
793 |
+ ids=NULL, |
|
794 |
+ path=".", |
|
795 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
796 |
+ highDensity=FALSE, |
|
797 |
+ sep="_", |
|
798 |
+ fileExt=list(green="Grn.idat", red="Red.idat"), |
|
799 |
+ saveDate=FALSE, |
|
800 |
+ save.rg=FALSE, |
|
801 |
+ rgFile, |
|
802 |
+ stripNorm=TRUE, |
|
803 |
+ useTarget=TRUE, |
|
804 |
+ row.names=TRUE, |
|
805 |
+ col.names=TRUE, |
|
806 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
807 |
+ seed=1, save.ab=FALSE, abFile, |
|
808 |
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
809 |
+ cdfName, sns, recallMin=10, recallRegMin=1000, |
|
810 |
+ returnParams=FALSE, badSNP=.7) { |
|
811 |
+ |
|
812 |
+ if (save.rg & missing(rgFile)) |
|
813 |
+ stop("'rgFile' is missing, and you chose save.rg") |
|
814 |
+ if (save.ab & missing(abFile)) |
|
815 |
+ stop("'abFile' is missing, and you chose save.ab") |
|
816 |
+ |
|
817 |
+ RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
818 |
+ ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
|
819 |
+ highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate) |
|
820 |
+ if(save.rg) |
|
821 |
+ save(RG, file=rgFile) |
|
822 |
+ |
|
823 |
+ XY = RGtoXY(RG, chipType=cdfName) |
|
824 |
+ rm(RG) |
|
825 |
+ gc() |
|
826 |
+ if (missing(sns)) sns = sampleNames(XY) |
|
827 |
+ |
|
828 |
+ res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
|
829 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, |
|
830 |
+ save.it=save.ab, intensityFile=abFile) |
|
831 |
+ rm(XY) |
|
832 |
+ gc() |
|
833 |
+ if(row.names) row.names=res$gns else row.names=NULL |
|
834 |
+ if(col.names) col.names=res$sns else col.names=NULL |
|
835 |
+ |
|
836 |
+ res2 = crlmmGT(res[["A"]], res[["B"]], res[["SNR"]], |
|
837 |
+ res[["mixtureParams"]], res[["cdfName"]], |
|
838 |
+ gender=gender, row.names=row.names, |
|
839 |
+ col.names=col.names, recallMin=recallMin, |
|
840 |
+ recallRegMin=1000, SNRMin=SNRMin, |
|
841 |
+ returnParams=returnParams, badSNP=badSNP, |
|
842 |
+ verbose=verbose) |
|
843 |
+ |
|
844 |
+ res2[["SNR"]] = res[["SNR"]] |
|
845 |
+ res2[["SKW"]] = res[["SKW"]] |
|
846 |
+ return(list2SnpSet(res2, returnParams=returnParams)) |
|
847 |
+} |
... | ... |
@@ -46,8 +46,7 @@ readIdatFiles(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path="", |
46 | 46 |
} |
47 | 47 |
|
48 | 48 |
\details{ |
49 |
-The summarised Cy3 (G) and Cy5 (R) intensity, number of beads that |
|
50 |
-were used in each channel and standard errors (all on the orginal scale) |
|
49 |
+The summarised Cy3 (G) and Cy5 (R) intensities (on the orginal scale) |
|
51 | 50 |
are read in from the .idat files. |
52 | 51 |
|
53 | 52 |
Where available, a \code{sampleSheet} data.frame, in the same format |
... | ... |
@@ -59,9 +58,8 @@ Thanks to Keith Baggerly who provided the code to read in the binary .idat files |
59 | 58 |
} |
60 | 59 |
|
61 | 60 |
\value{ |
62 |
- NChannelSet with intensity data (\code{R}, \code{G}), number of beads |
|
63 |
- (\code{Rnb}, \code{Gnb}) and standard errors (\code{Rse}, \code{Gse}) |
|
64 |
- for each bead type. |
|
61 |
+ NChannelSet with intensity data (\code{R}, \code{G}), and indicator |
|
62 |
+ for SNPs with 0 beads (\code{zero}) for each bead type. |
|
65 | 63 |
} |
66 | 64 |
|
67 | 65 |
\author{Matt Ritchie} |