no longer uses ffrowapply. Updates assayData elements of callSet column by column
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50807 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -118,16 +118,13 @@ genotype <- function(filenames, |
118 | 118 |
sns=sns, |
119 | 119 |
verbose=verbose, |
120 | 120 |
batch=batch) |
121 |
- if(is.lds) open(callSet) |
|
122 |
- mixtureParams <- matrix(NA, 4, length(filenames)) |
|
123 |
- is.snp <- isSnp(callSet) |
|
124 |
- snp.index <- which(is.snp) |
|
125 | 121 |
FUN <- ifelse(is.lds, "snprma2", "snprma") |
126 | 122 |
snprmaFxn <- function(FUN,...){ |
127 | 123 |
switch(FUN, |
128 | 124 |
snprma=snprma(...), |
129 | 125 |
snprma2=snprma2(...)) |
130 | 126 |
} |
127 |
+ |
|
131 | 128 |
snprmaRes <- snprmaFxn(FUN, |
132 | 129 |
filenames=filenames, |
133 | 130 |
mixtureSampleSize=mixtureSampleSize, |
... | ... |
@@ -137,6 +134,15 @@ genotype <- function(filenames, |
137 | 134 |
seed=seed, |
138 | 135 |
cdfName=cdfName, |
139 | 136 |
sns=sns) |
137 |
+ gns <- snprmaRes[["gns"]] |
|
138 |
+ snp.I <- isSnp(callSet) |
|
139 |
+ is.snp <- which(snp.I) |
|
140 |
+ snp.index <- match(featureNames(callSet)[is.snp], gns) |
|
141 |
+ stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index])) |
|
142 |
+## is.snp <- isSnp(callSet) |
|
143 |
+## snp.index <- which(is.snp) |
|
144 |
+ if(is.lds) open(callSet) |
|
145 |
+ mixtureParams <- matrix(NA, 4, length(filenames)) |
|
140 | 146 |
##message("Saving snprmaRes file") |
141 | 147 |
##save(snprmaRes, file=file.path(outdir, "snprmaRes.rda")) |
142 | 148 |
if(verbose) message("Finished preprocessing.") |
... | ... |
@@ -147,17 +153,21 @@ genotype <- function(filenames, |
147 | 153 |
open(snprmaRes[["mixtureParams"]]) |
148 | 154 |
##bb <- getOption("ffbatchbytes") |
149 | 155 |
message("Writing normalized intensities to callSet") |
150 |
- ffrowapply(A(callSet)[i1:i2, ] <- snprmaRes[["A"]][i1:i2, ], X=snprmaRes[["A"]])##, BATCHBYTES=bb) |
|
151 |
- ffrowapply(B(callSet)[i1:i2, ] <- snprmaRes[["B"]][i1:i2, ], X=snprmaRes[["B"]])##, BATCHBYTES=bb) |
|
156 |
+ for(j in 1:ncol(callSet)){ |
|
157 |
+ A(callSet)[is.snp, j] <- snprmaRes[["A"]][snp.index, j] |
|
158 |
+ B(callSet)[is.snp, j] <- snprmaRes[["B"]][snp.index, j] |
|
159 |
+ } |
|
160 |
+ ##ffrowapply(A(callSet)[i1:i2, ] <- snprmaRes[["A"]][i1:i2, ], X=snprmaRes[["A"]])##, BATCHBYTES=bb) |
|
161 |
+ ##ffrowapply(B(callSet)[i1:i2, ] <- snprmaRes[["B"]][i1:i2, ], X=snprmaRes[["B"]])##, BATCHBYTES=bb) |
|
152 | 162 |
pData(callSet)$SKW <- snprmaRes[["SKW"]] |
153 | 163 |
pData(callSet)$SNR <- snprmaRes[["SNR"]] |
154 | 164 |
} else{ |
155 |
- A(callSet)[snp.index, ] <- snprmaRes[["A"]] |
|
156 |
- B(callSet)[snp.index, ] <- snprmaRes[["B"]] |
|
165 |
+ A(callSet)[is.snp, ] <- snprmaRes[["A"]][snp.index, ] |
|
166 |
+ B(callSet)[is.snp, ] <- snprmaRes[["B"]][snp.index, ] |
|
157 | 167 |
pData(callSet)$SKW <- snprmaRes[["SKW"]] |
158 | 168 |
pData(callSet)$SNR <- snprmaRes[["SNR"]] |
159 | 169 |
} |
160 |
- np.index <- which(!is.snp) |
|
170 |
+ np.index <- which(!snp.I) |
|
161 | 171 |
FUN <- ifelse(is.lds, "cnrma2", "cnrma") |
162 | 172 |
## main purpose is to update 'alleleA' |
163 | 173 |
cnrmaFxn <- function(FUN,...){ |
... | ... |
@@ -203,11 +213,15 @@ genotype <- function(filenames, |
203 | 213 |
if(is.lds){ |
204 | 214 |
open(tmp[["calls"]]) |
205 | 215 |
open(tmp[["confs"]]) |
206 |
- ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]])#, BATCHBYTES=bb) |
|
207 |
- ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]])#, BATCHBYTES=bb) |
|
216 |
+ for(j in 1:ncol(callSet)){ |
|
217 |
+ snpCall(callSet)[is.snp, j] <- tmp[["calls"]][snp.index, j] |
|
218 |
+ snpCallProbability(callSet)[is.snp, j] <- tmp[["confs"]][snp.index, j] |
|
219 |
+ } |
|
220 |
+ ## ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]])#, BATCHBYTES=bb) |
|
221 |
+ ## ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]])#, BATCHBYTES=bb) |
|
208 | 222 |
} else { |
209 |
- calls(callSet)[snp.index, ] <- tmp[["calls"]] |
|
210 |
- snpCallProbability(callSet)[snp.index, ] <- tmp[["confs"]] |
|
223 |
+ calls(callSet)[snp.index, ] <- tmp[["calls"]][snp.index, ] |
|
224 |
+ snpCallProbability(callSet)[snp.index, ] <- tmp[["confs"]][snp.index, ] |
|
211 | 225 |
} |
212 | 226 |
message("Finished updating. Cleaning up.") |
213 | 227 |
callSet$gender <- tmp$gender |
... | ... |
@@ -816,9 +830,9 @@ fit.lm3 <- function(strata, |
816 | 830 |
nuA[nuA < MIN.NU] <- MIN.NU |
817 | 831 |
nuB[nuB < MIN.NU] <- MIN.NU |
818 | 832 |
phiA[phiA < MIN.PHI] <- MIN.PHI |
819 |
- phiA2[phiA2 < MIN.PHI] <- MIN.PHI |
|
833 |
+ phiA2[phiA2 < 1] <- 1 |
|
820 | 834 |
phiB[phiB < MIN.PHI] <- MIN.PHI |
821 |
- phiB2[phiB2 < MIN.PHI] <- MIN.PHI |
|
835 |
+ phiB2[phiB2 < 1] <- 1 |
|
822 | 836 |
} |
823 | 837 |
nuA(object)[marker.index, ] <- nuA |
824 | 838 |
nuB(object)[marker.index, ] <- nuB |
... | ... |
@@ -958,7 +972,6 @@ cnrma <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){ |
958 | 972 |
if(verbose) message("Loading annotations for nonpolymorphic probes") |
959 | 973 |
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
960 | 974 |
fid <- getVarInEnv("npProbesFid") |
961 |
- |
|
962 | 975 |
if(cdfName=="genomewidesnp6"){ |
963 | 976 |
loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname) |
964 | 977 |
} |
... | ... |
@@ -1798,7 +1811,7 @@ crlmmCopynumber <- function(object, |
1798 | 1811 |
samplesPerBatch <- table(as.character(batch(object))) |
1799 | 1812 |
if(any(samplesPerBatch < MIN.SAMPLES)){ |
1800 | 1813 |
warning("The following batches have fewer than ", MIN.SAMPLES, ":") |
1801 |
- message(paste(samplesPerBatch[samplesPerBatch < MIN.SAMPLES], collapse=", ")) |
|
1814 |
+ message(paste(names(samplesPerBatch)[samplesPerBatch < MIN.SAMPLES], collapse=", ")) |
|
1802 | 1815 |
message("Not estimating copy number for the above batches") |
1803 | 1816 |
} |
1804 | 1817 |
mylabel <- function(marker.type){ |
... | ... |
@@ -364,14 +364,16 @@ C3 <- function(object, allele, marker.index, batch.index, sample.index){ |
364 | 364 |
phiA <- phiA(object)[marker.index, l] |
365 | 365 |
IA <- A(object)[marker.index, jj] |
366 | 366 |
IB <- B(object)[marker.index, jj] |
367 |
- phistar <- phiB2/phiA |
|
368 |
- tmp <- (IB - nuB - phistar*IA + phistar*nuA)/phiB |
|
369 |
- CB <- tmp/(1-phistar*phiA2/phiB) |
|
367 |
+ ##phistar <- phiB2/phiA |
|
368 |
+ ##tmp <- (IB - nuB - phistar*IA + phistar*nuA)/phiB |
|
369 |
+ CB <- 1/(1-phiA2*phiB2/(phiA*phiB)) * 1/phiB * (IA-nuB-phiB2/phiA*(IA-nuA)) |
|
370 |
+ ##CB <- tmp/(1-phistar*phiA2/phiB) |
|
370 | 371 |
if(allele == "B"){ |
371 | 372 |
acn[[k]] <- CB |
372 | 373 |
} |
373 | 374 |
if(allele == "A"){ |
374 |
- acn[[k]] <- (IA-nuA-phiA2*CB)/phiA |
|
375 |
+ ca <- (IA-nuA-phiA2*CB)/phiA |
|
376 |
+ acn[[k]] <- ca |
|
375 | 377 |
} |
376 | 378 |
if(allele == "AandB"){ |
377 | 379 |
CA <- tmp/(1-phistar*phiA2/phiB) |
... | ... |
@@ -25,7 +25,7 @@ testingFF: testingFF.Rnw |
25 | 25 |
qsub -m e -r y -cwd -l mem_free=12G,h_vmem=16G testingFF.R.sh |
26 | 26 |
|
27 | 27 |
affy: copynumber.Rnw |
28 |
- echo "Stangle(\"copynumber.Rnw\")" | R --no-save --no-restore; |
|
28 |
+ echo "Stangle(\"copynumber.Rnw\")" | R-devel --no-save --no-restore; |
|
29 | 29 |
cat ~/bin/cluster.template | perl -pe "s/Rprog/copynumber.R/" > copynumber.R.sh |
30 | 30 |
qsub -m e -r y -cwd -l mem_free=12G,h_vmem=16G copynumber.R.sh |
31 | 31 |
|
... | ... |
@@ -52,7 +52,7 @@ separately) that are listed below. |
52 | 52 |
|
53 | 53 |
<<annotationPackages>>= |
54 | 54 |
pkgs <- annotationPackages() |
55 |
-pkgs <- pgks[grep("Crlmm", pkgs)] |
|
55 |
+pkgs <- pkgs[grep("Crlmm", pkgs)] |
|
56 | 56 |
pkgs |
57 | 57 |
@ |
58 | 58 |
|
... | ... |
@@ -157,6 +157,7 @@ be run interactively. |
157 | 157 |
|
158 | 158 |
<<LDS_genotype>>= |
159 | 159 |
if(!file.exists(file.path(outdir, "cnSet.rda"))){ |
160 |
+ ##trace(genotype, browser) |
|
160 | 161 |
gtSet <- checkExists("gtSet", |
161 | 162 |
.path=outdir, |
162 | 163 |
.FUN=genotype, |
... | ... |
@@ -190,6 +191,9 @@ The \Rfunction{crlmmCopynumber} performs the following steps: |
190 | 191 |
<<LDS_copynumber>>= |
191 | 192 |
GT.CONF.THR <- 0.90 |
192 | 193 |
cnSet <- checkExists("cnSet", .path=outdir, .FUN=crlmmCopynumber, object=gtSet, GT.CONF.THR=GT.CONF.THR) |
194 |
+invisible(open(cnSet)) |
|
195 |
+q("no") |
|
196 |
+stop("here") |
|
193 | 197 |
@ |
194 | 198 |
|
195 | 199 |
In an effort to reduce I/O, the \Rpackage{crlmmCopynumber} function no |
... | ... |
@@ -243,15 +247,21 @@ ct2 <- totalCopynumber(cnSet, i=marker.index, j=1:5) |
243 | 247 |
stopifnot(all.equal(ct, ct2)) |
244 | 248 |
@ |
245 | 249 |
|
246 |
-TODO: FIX estimation for nonpolymorphic markers on X |
|
250 |
+Nonpolymorphic markers on X: |
|
247 | 251 |
|
248 |
-<<nonpolymorphicX, eval=FALSE>>= |
|
252 |
+<<nonpolymorphicX>>= |
|
249 | 253 |
## nu and phi are not estimated appropriately for nonpolymorphic X markers. |
250 |
-X.markers <- which(!isSnp(cnSet) & chromosome(cnSet) == 23) |
|
251 |
-cn.M <- CA(cnSet, i=X.markers, j=which(cnSet$gender==1)) |
|
252 |
-cn.F <- CA(cnSet, i=X.markers, j=which(cnSet$gender==2)) |
|
253 |
-phi(cnSet, "A")[X.markers[1:10]] |
|
254 |
-boxplot(data.frame(cbind(cn.M, cn.F)), pch=".") |
|
254 |
+library(RColorBrewer) |
|
255 |
+cols <- brewer.pal(8, "Paired")[3:4] |
|
256 |
+cols <- rep(cols, each=5) |
|
257 |
+##set.seed(123) |
|
258 |
+X.markers <- sample(which(!isSnp(cnSet) & chromosome(cnSet) == 23), 20e3) |
|
259 |
+cn.M <- CA(cnSet, i=X.markers, j=sample(which(cnSet$gender==1), 5)) |
|
260 |
+cn.F <- CA(cnSet, i=X.markers, j=sample(which(cnSet$gender==2), 5)) |
|
261 |
+##phi(cnSet, "A")[X.markers[1:10]] |
|
262 |
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", xaxt="n", main="nonpolymorphic markers on X", |
|
263 |
+ col=cols) |
|
264 |
+legend("topleft", fill=unique(cols), legend=c("Male", "Female")) |
|
255 | 265 |
@ |
256 | 266 |
|
257 | 267 |
|
... | ... |
@@ -261,17 +271,94 @@ Polymorphic markers, X chromosome: |
261 | 271 |
\centering |
262 | 272 |
<<polymorphicX, fig=TRUE, width=8, height=4>>= |
263 | 273 |
## copy number estimates on X for SNPs are biased towards small values. |
264 |
-X.markers <- which(isSnp(cnSet) & chromosome(cnSet) == 23) |
|
265 |
-ca.M <- CA(cnSet, i=X.markers, j=which(cnSet$gender==1)) |
|
266 |
-cb.M <- CB(cnSet, i=X.markers, j=which(cnSet$gender==1)) |
|
267 |
-ca.F <- CA(cnSet, i=X.markers, j=which(cnSet$gender==2)) |
|
268 |
-cb.F <- CB(cnSet, i=X.markers, j=which(cnSet$gender==2)) |
|
274 |
+X.markers <- sample(which(isSnp(cnSet) & chromosome(cnSet) == 23), 25e3) |
|
275 |
+ |
|
276 |
+##genomewidesnp6Crlmm is correct. FeatureData is incorrect. |
|
277 |
+index <- match(featureNames(cnSet), rownames(snpProbes)) |
|
278 |
+sum(snpProbes[index, "chrom"] != chromosome(cnSet)) |
|
279 |
+ |
|
280 |
+M <- sample(which(cnSet$gender==1), 5) |
|
281 |
+F <- sample(which(cnSet$gender==2), 5) |
|
282 |
+ |
|
283 |
+fns <- list.files("~/hapmap_metadata", full.names=T) |
|
284 |
+dat <- read.delim(fns[2]) |
|
285 |
+sns <- substr(sampleNames(cnSet), 1, 7) |
|
286 |
+dat <- dat[match(sns, dat$IID), ] |
|
287 |
+stopifnot(all.equal(as.character(dat$IID), sns)) |
|
288 |
+identical(cnSet$gender, dat$sex) |
|
289 |
+ |
|
290 |
+cols <- brewer.pal(8, "Paired")[3:4] |
|
291 |
+cols <- cols[cnSet$gender] |
|
292 |
+ia <- A(cnSet)[X.markers, ]##c(M, F)] |
|
293 |
+ib <- B(cnSet)[X.markers, ]##c(M, F)] |
|
294 |
+i.total <- ia+ib |
|
295 |
+boxplot(data.frame(log2(i.total)), pch=".", col=cols) |
|
296 |
+legend("topleft", fill=unique(cols), legend=c("Male", "Female")) |
|
297 |
+ |
|
298 |
+nus <- nuA(cnSet)[X.markers, ] |
|
299 |
+phiss <- phiA(cnSet)[X.markers, ] |
|
300 |
+ |
|
301 |
+ |
|
302 |
+xSet <- cnSet[X.markers, ] |
|
303 |
+a <- as.matrix(A(xSet)) |
|
304 |
+b <- as.matrix(B(xSet)) |
|
305 |
+gt <- as.matrix(calls(xSet)) |
|
306 |
+nuA <- nu(xSet, "A") |
|
307 |
+phA <- phi(xSet, "A") |
|
308 |
+col <- brewer.pal(7, "Accent")[c(1, 4, 7)] |
|
309 |
+fns.xset <- featureNames(xSet) |
|
310 |
+ |
|
311 |
+path <- system.file("extdata", package="genomewidesnp6Crlmm") |
|
312 |
+load(file.path(path, "snpProbes.rda")) |
|
313 |
+index <- match(fns.xset, rownames(snpProbes)) |
|
314 |
+all(snpProbes[index, "chrom"]==23) |
|
315 |
+ |
|
316 |
+pkg <- "pd.genomewidesnp.6" |
|
317 |
+library(pd.genomewidesnp.6) |
|
318 |
+conn <- db(get(pkg)) |
|
319 |
+sql <- "SELECT man_fsetid, chrom, physical_pos FROM featureSet" |
|
320 |
+snps <- dbGetQuery(conn, sql) |
|
321 |
+index <- match(fns.xset, snps$man_fsetid) |
|
322 |
+all(snps$chrom[index] == "X") |
|
323 |
+ |
|
324 |
+## are we writing intensities to the wrong location?? |
|
325 |
+ |
|
326 |
+ |
|
327 |
+ |
|
328 |
+################################################### |
|
329 |
+### chunk number 25: ABscatterplots |
|
330 |
+################################################### |
|
331 |
+#line 599 "manuscript.Rnw" |
|
332 |
+lA <- log2(a) |
|
333 |
+lB <- log2(b) |
|
334 |
+cols <- c("blue", "black", "red") |
|
335 |
+par(las=1, mfrow=c(4,4), mar=rep(0.5,4), oma=c(4,4, 4,4)) |
|
336 |
+for(i in 1:16){ |
|
337 |
+ plot(lB[i, ], lA[i, ], col="grey50", bg=col[gt[i, ]], xaxt="n", yaxt="n", pch=21, cex=0.8, |
|
338 |
+ xlim=c(6.5, 12.5), ylim=c(6.5, 12.5), xlab="", ylab="") |
|
339 |
+ for(CN in 1:3) lines(xSet, i, "GIGAS", CN, col=cols[CN], lwd=2, x.axis="B") |
|
340 |
+} |
|
341 |
+mtext(expression(log[2](I[B])), 1, outer=TRUE) |
|
342 |
+par(las=3) |
|
343 |
+mtext(expression(log[2](I[A])), 2, outer=TRUE) |
|
344 |
+ |
|
345 |
+ |
|
346 |
+ |
|
347 |
+boxplot(data.frame(ia+ib), pch=".", col=cols) |
|
348 |
+trace(ACN, browser) |
|
349 |
+trace(C3, browser) |
|
350 |
+ca.M <- CA(cnSet, i=X.markers, j=M) |
|
351 |
+cb.M <- CB(cnSet, i=X.markers, j=M) |
|
352 |
+ca.F <- CA(cnSet, i=X.markers, j=F) |
|
353 |
+cb.F <- CB(cnSet, i=X.markers, j=F) |
|
269 | 354 |
cn.M <- ca.M+cb.M |
270 | 355 |
cn.F <- ca.F+cb.F |
271 |
-boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", outline=FALSE, col="grey60") |
|
356 |
+tmp <- totalCopynumber(cnSet, i=X.markers, j=c(M, F)) |
|
357 |
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", outline=FALSE, col=cols, xaxt="n") |
|
358 |
+boxplot(data.frame(tmp), pch=".", outline=FALSE, col=cols, xaxt="n") |
|
272 | 359 |
abline(h=c(1,2)) |
273 | 360 |
## alternatively |
274 |
-cn.F2 <- totalCopynumber(cnSet, i=X.markers, j=which(cnSet$gender==2)) |
|
361 |
+cn.F2 <- totalCopynumber(cnSet, i=X.markers, j=F) |
|
275 | 362 |
stopifnot(all.equal(cn.F, cn.F2)) |
276 | 363 |
@ |
277 | 364 |
\caption{Copy number estimates for polymorphic markers on chromosome X} |
... | ... |
@@ -588,6 +675,13 @@ are drawbacks to this approach, including variance estimates that are |
588 | 675 |
overly optimistic. More direct approaches for outlier detection and |
589 | 676 |
removal may be explored in the future. |
590 | 677 |
|
678 |
+Copy number estimates for other chromosomes, such as mitochondrial and |
|
679 |
+chromosome Y, are not currently available in \crlmm{}. |
|
680 |
+ |
|
681 |
+<<echo=FALSE>>= |
|
682 |
+invisible(close(cnSet)) |
|
683 |
+@ |
|
684 |
+ |
|
591 | 685 |
|
592 | 686 |
\section{Session information} |
593 | 687 |
<<sessionInfo, results=tex>>= |