git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@54170 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -54,6 +54,12 @@ importClassesFrom(oligoClasses, ff_matrix, ffdf) |
54 | 54 |
## Important to export these classes |
55 | 55 |
##exportClasses(ff_or_matrix, ff_matrix, ffdf) |
56 | 56 |
##exportClasses(ff_or_matrix) |
57 |
+##--------------------------------------------------------------------------- |
|
58 |
+## lattice imports |
|
59 |
+##--------------------------------------------------------------------------- |
|
60 |
+##import(panel.number, panel.grid, panel.xyplot, lpoints, lsegments, lrect, ltext) |
|
61 |
+ |
|
62 |
+ |
|
57 | 63 |
exportMethods(lines) |
58 | 64 |
exportMethods(CA, CB) |
59 | 65 |
export(crlmm, |
... | ... |
@@ -143,6 +143,7 @@ genotype <- function(filenames, |
143 | 143 |
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double") |
144 | 144 |
SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double") |
145 | 145 |
SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double") |
146 |
+ genderff <- initializeBigVector("gender", length(filenames), "integer") |
|
146 | 147 |
featureData <- getFeatureData(cdfName, copynumber=TRUE) |
147 | 148 |
nr <- nrow(featureData); nc <- length(sns) |
148 | 149 |
A <- initializeBigMatrix("crlmmA-", nr, length(filenames), "integer") |
... | ... |
@@ -182,15 +183,17 @@ genotype <- function(filenames, |
182 | 183 |
protocolData <- getProtocolData.Affy(filenames) |
183 | 184 |
rownames(pData(protocolData)) <- sns |
184 | 185 |
protocolData(cnSet) <- protocolData |
185 |
- ##protocolData(cnSet) <- protocolData |
|
186 |
- pd <- data.frame(matrix(NA, nc, 3), row.names=sns) |
|
187 |
- colnames(pd)=c("SKW", "SNR", "gender") |
|
188 |
- phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd) |
|
186 |
+ ##pd <- data.frame(matrix(NA, nc, 3), row.names=sns) |
|
187 |
+ ##colnames(pd)=c("SKW", "SNR", "gender") |
|
188 |
+ ##phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd) |
|
189 |
+ cnSet$SKW <- SKW |
|
190 |
+ cnSet$SNR <- SNR |
|
191 |
+ cnSet$gender <- genderff |
|
189 | 192 |
stopifnot(validObject(cnSet)) |
190 | 193 |
snp.I <- isSnp(cnSet) |
191 | 194 |
snp.index <- which(snp.I) |
192 |
- pData(cnSet)$SKW <- SKW |
|
193 |
- pData(cnSet)$SNR <- SNR |
|
195 |
+ ##pData(cnSet)$SKW <- SKW |
|
196 |
+ ##pData(cnSet)$SNR <- SNR |
|
194 | 197 |
np.index <- which(!snp.I) |
195 | 198 |
if(verbose) message("Normalizing nonpolymorphic markers") |
196 | 199 |
FUN <- ifelse(is.lds, "cnrma2", "cnrma") |
... | ... |
@@ -203,22 +206,24 @@ genotype <- function(filenames, |
203 | 206 |
seed=seed, |
204 | 207 |
verbose=verbose) |
205 | 208 |
tmp <- crlmmGT2(A=calls(cnSet), |
206 |
- B=snpCallProbability(cnSet), |
|
207 |
- SNR=SNR, |
|
208 |
- mixtureParams=mixtureParams, |
|
209 |
- cdfName=cdfName, |
|
210 |
- row.names=NULL, |
|
211 |
- col.names=sampleNames(cnSet), |
|
212 |
- SNRMin=SNRMin, |
|
213 |
- recallMin=recallMin, |
|
214 |
- recallRegMin=recallRegMin, |
|
215 |
- gender=gender, |
|
216 |
- verbose=verbose, |
|
217 |
- returnParams=returnParams, |
|
218 |
- badSNP=badSNP, |
|
219 |
- snp.names=featureNames(cnSet)[snp.index]) |
|
209 |
+ B=snpCallProbability(cnSet), |
|
210 |
+ SNR=SNR, |
|
211 |
+ mixtureParams=mixtureParams, |
|
212 |
+ cdfName=cdfName, |
|
213 |
+ row.names=NULL, |
|
214 |
+ col.names=sampleNames(cnSet), |
|
215 |
+ SNRMin=SNRMin, |
|
216 |
+ recallMin=recallMin, |
|
217 |
+ recallRegMin=recallRegMin, |
|
218 |
+ gender=gender, |
|
219 |
+ verbose=verbose, |
|
220 |
+ returnParams=returnParams, |
|
221 |
+ badSNP=badSNP, |
|
222 |
+ snp.names=featureNames(cnSet)[snp.index]) |
|
220 | 223 |
if(verbose) message("Genotyping finished. Updating container with genotype calls and confidence scores.") |
221 |
- cnSet$gender <- tmp[["gender"]] |
|
224 |
+ open(cnSet$gender) |
|
225 |
+ cnSet$gender[,] <- tmp[["gender"]] |
|
226 |
+ close(cnSet$gender) |
|
222 | 227 |
return(cnSet) |
223 | 228 |
} |
224 | 229 |
|
... | ... |
@@ -1180,8 +1180,8 @@ genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3), |
1180 | 1180 |
badSNP=0.7, |
1181 | 1181 |
gender=NULL, |
1182 | 1182 |
DF=6){ |
1183 |
-## is.snp = isSnp(cnSet) |
|
1184 |
-## snp.index = which(is.snp) |
|
1183 |
+ is.snp = isSnp(cnSet) |
|
1184 |
+ snp.index = which(is.snp) |
|
1185 | 1185 |
## narrays = ncol(cnSet) |
1186 | 1186 |
## open(A(cnSet)) |
1187 | 1187 |
## open(B(cnSet)) |
... | ... |
@@ -57,42 +57,42 @@ linesCNSet <- function(x, y, batch, copynumber, x.axis="A", grid=FALSE, ...){ |
57 | 57 |
} |
58 | 58 |
|
59 | 59 |
|
60 |
-cnPanel <- function(x, y, ..., pch.cols, gt, cbs.segs, hmm.segs=NULL, shades, subscripts, add.ideogram=TRUE){ |
|
61 |
- ##if(panel.number() == 2) browser() |
|
62 |
- add.ideogram <- add.ideogram[[panel.number()]] |
|
63 |
- ##cbs.segs <- cbs.segs[[panel.number()]] |
|
64 |
- draw.hmm.states <- ifelse(panel.number() <= length(hmm.segs), TRUE, FALSE) |
|
65 |
- panel.grid(h=6, v=10) |
|
66 |
- which.hom <- which(gt[subscripts] == 1 | gt[subscripts]==3) |
|
67 |
- which.het <- which(gt[subscripts] == 2) |
|
68 |
- panel.xyplot(x, y, col="grey60", ...) |
|
69 |
- lpoints(x[which.hom], y[which.hom], col=pch.cols[1], ...) |
|
70 |
- lpoints(x[which.het], y[which.het], col=pch.cols[2], ...) |
|
71 |
- lsegments(x0=start(cbs.segs)/1e6, x1=end(cbs.segs)/1e6, |
|
72 |
- y0=cbs.segs$seg.mean, |
|
73 |
- y1=cbs.segs$seg.mean, ...) |
|
74 |
- if(draw.hmm.states){ |
|
75 |
- hmm.segs <- hmm.segs[order(width(hmm.segs), decreasing=TRUE), ] |
|
76 |
- lrect(xleft=start(hmm.segs)/1e6, |
|
77 |
- xright=end(hmm.segs)/1e6, |
|
78 |
- ybottom=-0.4, |
|
79 |
- ytop=0, |
|
80 |
- border=shades[hmm.segs$state], |
|
81 |
- col=shades[hmm.segs$state]) |
|
82 |
- } |
|
83 |
- ltext(130, 5, paste("MAD:", round(mad(y, na.rm=TRUE), 2))) |
|
84 |
- if(add.ideogram){ |
|
85 |
- pathto <- system.file("hg18", package="SNPchip") |
|
86 |
- cytoband <- read.table(file.path(pathto, "cytoBand.txt"), as.is=TRUE) |
|
87 |
- cytoband$V2 <- cytoband$V2/1e6 |
|
88 |
- cytoband$V3 <- cytoband$V3/1e6 |
|
89 |
- colnames(cytoband) <- c("chrom", "start", "end", "name", "gieStain") |
|
90 |
- plotCytoband(unique(hmm.segs$chrom), |
|
91 |
- cytoband=cytoband, |
|
92 |
- cytoband.ycoords=c(5.6, 5.9), |
|
93 |
- new=FALSE, |
|
94 |
- label.cytoband=FALSE, |
|
95 |
- build="hg18", |
|
96 |
- use.lattice=TRUE) |
|
97 |
- } |
|
98 |
-} |
|
60 |
+##cnPanel <- function(x, y, ..., pch.cols, gt, cbs.segs, hmm.segs=NULL, shades, subscripts, add.ideogram=TRUE){ |
|
61 |
+## ##if(panel.number() == 2) browser() |
|
62 |
+## add.ideogram <- add.ideogram[[panel.number()]] |
|
63 |
+## ##cbs.segs <- cbs.segs[[panel.number()]] |
|
64 |
+## draw.hmm.states <- ifelse(panel.number() <= length(hmm.segs), TRUE, FALSE) |
|
65 |
+## panel.grid(h=6, v=10) |
|
66 |
+## which.hom <- which(gt[subscripts] == 1 | gt[subscripts]==3) |
|
67 |
+## which.het <- which(gt[subscripts] == 2) |
|
68 |
+## panel.xyplot(x, y, col="grey60", ...) |
|
69 |
+## lpoints(x[which.hom], y[which.hom], col=pch.cols[1], ...) |
|
70 |
+## lpoints(x[which.het], y[which.het], col=pch.cols[2], ...) |
|
71 |
+## lsegments(x0=start(cbs.segs)/1e6, x1=end(cbs.segs)/1e6, |
|
72 |
+## y0=cbs.segs$seg.mean, |
|
73 |
+## y1=cbs.segs$seg.mean, ...) |
|
74 |
+## if(draw.hmm.states){ |
|
75 |
+## hmm.segs <- hmm.segs[order(width(hmm.segs), decreasing=TRUE), ] |
|
76 |
+## lrect(xleft=start(hmm.segs)/1e6, |
|
77 |
+## xright=end(hmm.segs)/1e6, |
|
78 |
+## ybottom=-0.4, |
|
79 |
+## ytop=0, |
|
80 |
+## border=shades[hmm.segs$state], |
|
81 |
+## col=shades[hmm.segs$state]) |
|
82 |
+## } |
|
83 |
+## ltext(130, 5, paste("MAD:", round(mad(y, na.rm=TRUE), 2))) |
|
84 |
+## if(add.ideogram){ |
|
85 |
+## pathto <- system.file("hg18", package="SNPchip") |
|
86 |
+## cytoband <- read.table(file.path(pathto, "cytoBand.txt"), as.is=TRUE) |
|
87 |
+## cytoband$V2 <- cytoband$V2/1e6 |
|
88 |
+## cytoband$V3 <- cytoband$V3/1e6 |
|
89 |
+## colnames(cytoband) <- c("chrom", "start", "end", "name", "gieStain") |
|
90 |
+## plotCytoband(unique(hmm.segs$chrom), |
|
91 |
+## cytoband=cytoband, |
|
92 |
+## cytoband.ycoords=c(5.6, 5.9), |
|
93 |
+## new=FALSE, |
|
94 |
+## label.cytoband=FALSE, |
|
95 |
+## build="hg18", |
|
96 |
+## use.lattice=TRUE) |
|
97 |
+## } |
|
98 |
+##} |
... | ... |
@@ -1,8 +1,13 @@ |
1 |
-All: copynumber crlmmDownstream clean |
|
1 |
+All: copynumber illumina_copynumber crlmmDownstream clean |
|
2 | 2 |
|
3 | 3 |
copynumber: copynumber.tex |
4 |
- cp -p ../scripts/copynumber.pdf . |
|
5 |
- cp -p ../scripts/illumina_copynumber.pdf . |
|
4 |
+# cp -p ../scripts/copynumber.pdf . |
|
5 |
+# cp -p ../scripts/illumina_copynumber.pdf . |
|
6 |
+ cp ../scripts/copynumber.pdf . |
|
7 |
+ cp ../scripts/illumina_copynumber.pdf . |
|
8 |
+ |
|
9 |
+illumina_copynumber: illumina_copynumber.tex |
|
10 |
+ cp ../scripts/illumina_copynumber.pdf . |
|
6 | 11 |
|
7 | 12 |
crlmmDownstream: crlmmDownstream.tex |
8 | 13 |
cp -p ../scripts/crlmmDownstream.pdf . |
... | ... |
@@ -36,7 +36,7 @@ |
36 | 36 |
to the copy number vignette. |
37 | 37 |
\end{abstract} |
38 | 38 |
|
39 |
-\section{Infrastructure} |
|
39 |
+\section{Setup} |
|
40 | 40 |
|
41 | 41 |
<<crlmm, results=hide, echo=FALSE>>= |
42 | 42 |
library(crlmm) |
... | ... |
@@ -44,29 +44,16 @@ options(width=70) |
44 | 44 |
options(continue=" ") |
45 | 45 |
@ |
46 | 46 |
|
47 |
-\textbf{Supported platforms:} The supported Infinium platforms are |
|
48 |
-those for which a corresponding annotation package is available. The |
|
49 |
-annotation packages contain information on the markers, such as |
|
50 |
-physical position and chromosome, as well as pre-computed parameters |
|
51 |
-estimated from HapMap used during the preprocessing and genotyping |
|
52 |
-steps. Currently supported Infinium platforms are listed in the |
|
53 |
-following code chunk. |
|
54 |
- |
|
55 |
-<<supportedPlatforms>>= |
|
56 |
-pkgs <- annotationPackages() |
|
57 |
-crlmm.pkgs <- pkgs[grep("Crlmm", pkgs)] |
|
58 |
-crlmm.pkgs[grep("human", crlmm.pkgs)] |
|
59 |
-@ |
|
60 |
- |
|
61 |
-\textbf{Large data:} In order to reduce \crlmm{}'s memory footprint, |
|
62 |
-we require the \Rpackage{ff} for copy number estimation. The |
|
63 |
-\Rpackage{ff} package provides infrastructure for accessing and |
|
64 |
-writing data to disk instead of keeping data in memory. Each element |
|
65 |
-of the \verb+assayData+ and \verb+batchStatistics+ slot of the |
|
66 |
-\Rclass{CNSet} class are \Robject{ff} objects. \Robject{ff} objects |
|
67 |
-in the \R{} workspace contain pointers to several files with the |
|
68 |
-\verb+.ff+ extension. The location of where the data is stored on |
|
69 |
-disk is specified by use of the \Rfunction{ldPath} function. |
|
47 |
+%\textbf{Supported platforms:} The supported Infinium platforms are |
|
48 |
+%those for which a corresponding annotation package is available. The |
|
49 |
+%annotation packages contain information on the markers, such as |
|
50 |
+%physical position and chromosome, as well as pre-computed parameters |
|
51 |
+%estimated from HapMap used during the preprocessing and genotyping |
|
52 |
+%steps. Currently supported Infinium platforms are listed in the |
|
53 |
+%following code chunk. |
|
54 |
+The following codechunk declares a directory for saving \Robject{ff} |
|
55 |
+files that will contain the normalized intensities and the genotype |
|
56 |
+calls. |
|
70 | 57 |
|
71 | 58 |
<<ldpath,results=hide>>= |
72 | 59 |
library(ff) |
... | ... |
@@ -78,41 +65,23 @@ ldPath(outdir) |
78 | 65 |
dir.create(outdir, recursive=TRUE, showWarnings=FALSE) |
79 | 66 |
@ |
80 | 67 |
|
81 |
-Users should not move or rename this directory. If only output files |
|
82 |
-are stored in \verb+outdir+, one can either remove the entire |
|
83 |
-directory prior to rerunning the analysis or all of the '.ff' files. |
|
84 |
-Otherwise, one would accumulate a large number of '.ff' files on disk |
|
85 |
-that are no longer in use. |
|
86 |
- |
|
87 |
-As none of the functions for preprocessing, genotyping, or copy number |
|
88 |
-estimation simultaneously require all samples and all probes in |
|
89 |
-memory, memory-usage by \crlmm{} can be fine tuned by reading in and |
|
90 |
-processing subsets of the markers and/or samples. The functions |
|
91 |
-\Rfunction{ocSamples} and \Rfunction{ocProbesets} in the |
|
92 |
-\Rpackage{oligoClasses} package can be used to declare how many |
|
93 |
-markers and samples to read at once. In general, specifying smaller |
|
94 |
-values should reduce the RAM required for a particular job, but would |
|
95 |
-be expected to have an increased run-time. In the following |
|
96 |
-code-chunk, we declare that \crlmm{} should process 150,000 markers at |
|
97 |
-a time (when possible) and 500 samples at a time. (As our dataset in |
|
98 |
-this vignette only contains 43 samples, the \Rfunction{ocSamples} |
|
99 |
-option would not have any effect.) One can view the current settings |
|
100 |
-for these commands, by typing the functions without an argument. |
|
101 |
- |
|
102 |
-<<ram>>= |
|
103 |
-ocSamples() |
|
104 |
-ocProbesets() |
|
105 |
-ocProbesets(150e3) |
|
106 |
-ocSamples(500) |
|
107 |
-ocSamples() |
|
108 |
-ocProbesets() |
|
109 |
-@ |
|
68 |
+We will also store cached computations in the directory \verb+outdir+. |
|
110 | 69 |
|
111 | 70 |
<<cacheSweave, echo=FALSE, results=hide>>= |
112 | 71 |
library(cacheSweave) |
113 | 72 |
setCacheDir(outdir) |
114 | 73 |
@ |
115 | 74 |
|
75 |
+We declare that \crlmm{} should process 150,000 markers at a time |
|
76 |
+(when possible) and/or 500 samples at a time. As our example dataset |
|
77 |
+in this vignette contains fewer than 500 samples, all samples will be |
|
78 |
+processed simultaneously. |
|
79 |
+ |
|
80 |
+<<ram>>= |
|
81 |
+ocProbesets(150e3) |
|
82 |
+ocSamples(500) |
|
83 |
+@ |
|
84 |
+ |
|
116 | 85 |
\textbf{Limitations:} There is no minimum number of samples required |
117 | 86 |
for preprocessing and genotyping. However, for copy number estimation |
118 | 87 |
the \crlmm{} package currently requires at least 10 samples per batch. |
... | ... |
@@ -134,7 +103,7 @@ platforms, several steps are required. |
134 | 103 |
We begin by specifying the path containing the raw IDAT files for a |
135 | 104 |
set of samples from the Infinium 370k platform. |
136 | 105 |
|
137 |
-<libraries>>= |
|
106 |
+<<datadir>>= |
|
138 | 107 |
datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k" |
139 | 108 |
@ |
140 | 109 |
|