This fixes a bug that occurs when CA() or CB() for a random subset of
samples with mixed batches was accessed -- the order of samples
returned was incorrect. The dimnames of the resulting object are
returned as well.
copynumber.pdf vignette also updated
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50889 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -311,18 +311,19 @@ setReplaceMethod("flags", signature=signature(object="CNSet", value="ff_matrix") |
311 | 311 |
## autosome NPs |
312 | 312 |
## chromosome X NPs for women |
313 | 313 |
C1 <- function(object, marker.index, batch.index, sample.index){ |
314 |
- acn <- vector("list", length(batch.index)) |
|
314 |
+## acn <- vector("list", length(batch.index)) |
|
315 |
+ acn <- matrix(NA, nrow=length(marker.index), ncol=length(sample.index)) |
|
315 | 316 |
for(k in seq_along(batch.index)){ |
316 | 317 |
l <- batch.index[k] |
317 | 318 |
jj <- sample.index[as.character(batch(object))[sample.index] == batchNames(object)[l]] |
318 | 319 |
bg <- nuA(object)[marker.index, l] |
319 | 320 |
slope <- phiA(object)[marker.index, l] |
320 | 321 |
I <- A(object)[marker.index, jj] |
321 |
- acn[[k]] <- 1/slope * (I - bg) |
|
322 |
+ acn[, match(jj, sample.index)] <- 1/slope * (I - bg) |
|
322 | 323 |
} |
323 |
- if(length(acn) > 1){ |
|
324 |
- acn <- do.call("cbind", acn) |
|
325 |
- } else acn <- acn[[1]] |
|
324 |
+## if(length(acn) > 1){ |
|
325 |
+## acn <- do.call("cbind", acn) |
|
326 |
+## } else acn <- acn[[1]] |
|
326 | 327 |
return(as.matrix(acn)) |
327 | 328 |
} |
328 | 329 |
|
... | ... |
@@ -330,7 +331,7 @@ C1 <- function(object, marker.index, batch.index, sample.index){ |
330 | 331 |
## autosome SNPs |
331 | 332 |
## chromosome X for male nonpolymorphic markers |
332 | 333 |
C2 <- function(object, marker.index, batch.index, sample.index, NP.X=FALSE){ |
333 |
- acn <- vector("list", length(batch.index)) |
|
334 |
+ acn <- matrix(NA, nrow=length(marker.index), ncol=length(sample.index)) |
|
334 | 335 |
for(k in seq_along(batch.index)){ |
335 | 336 |
l <- batch.index[k] |
336 | 337 |
jj <- sample.index[as.character(batch(object))[sample.index] == batchNames(object)[l]] |
... | ... |
@@ -339,17 +340,18 @@ C2 <- function(object, marker.index, batch.index, sample.index, NP.X=FALSE){ |
339 | 340 |
if(!NP.X){ |
340 | 341 |
I <- B(object)[marker.index, jj] |
341 | 342 |
} else I <- A(object)[marker.index, jj] |
342 |
- acn[[k]] <- 1/slope * (I - bg) |
|
343 |
+ acn[, match(jj, sample.index)] <- 1/slope * (I - bg) |
|
343 | 344 |
} |
344 |
- if(length(acn) > 1){ |
|
345 |
- acn <- do.call("cbind", acn) |
|
346 |
- } else acn <- acn[[1]] |
|
345 |
+## if(length(acn) > 1){ |
|
346 |
+## acn <- do.call("cbind", acn) |
|
347 |
+## } else acn <- acn[[1]] |
|
347 | 348 |
return(as.matrix(acn)) |
348 | 349 |
} |
349 | 350 |
|
350 | 351 |
## Chromosome X SNPs |
351 | 352 |
C3 <- function(object, allele, marker.index, batch.index, sample.index){ |
352 |
- acn <- vector("list", length(batch.index)) |
|
353 |
+## acn <- vector("list", length(batch.index)) |
|
354 |
+ acn <- matrix(NA, nrow=length(marker.index), ncol=length(sample.index)) |
|
353 | 355 |
for(k in seq_along(batch.index)){ |
354 | 356 |
l <- batch.index[k] |
355 | 357 |
##j <- which(as.character(batch(object))[sample.index] == batchNames(object)[l]) |
... | ... |
@@ -368,10 +370,16 @@ C3 <- function(object, allele, marker.index, batch.index, sample.index){ |
368 | 370 |
##CB <- 1/(1-phiA2*phiB2/(phiA*phiB)) * 1/phiB * (IA-nuB-phiB2/phiA*(IA-nuA)) |
369 | 371 |
CA <- (IA-nuA-phiA2*CB)/phiA |
370 | 372 |
if(allele == "B"){ |
371 |
- acn[[k]] <- CB |
|
373 |
+ acn[, match(jj, sample.index)] <- CB |
|
374 |
+ ##acn[[k]] <- CB |
|
372 | 375 |
} |
373 | 376 |
if(allele == "A"){ |
374 |
- acn[[k]] <- CA |
|
377 |
+ acn[, match(jj, sample.index)] <- (IA-nuA-phiA2*CB)/phiA |
|
378 |
+ } |
|
379 |
+ if(allele == "AandB"){ |
|
380 |
+ CA <- tmp/(1-phistar*phiA2/phiB) |
|
381 |
+ CB <- (IA-nuA-phiA2*CB)/phiA |
|
382 |
+ acn[, match(jj, sample.index)] <- (IA-nuA-phiA2*CB)/phiA |
|
375 | 383 |
} |
376 | 384 |
## if(allele=="AandB") |
377 | 385 |
## CA <- tmp/(1-phistar*phiA2/phiB) |
... | ... |
@@ -379,9 +387,9 @@ C3 <- function(object, allele, marker.index, batch.index, sample.index){ |
379 | 387 |
## acn[[k]] <- CA+CB |
380 | 388 |
## } |
381 | 389 |
} |
382 |
- if(length(acn) > 1){ |
|
383 |
- acn <- do.call("cbind", acn) |
|
384 |
- } else acn <- acn[[1]] |
|
390 |
+## if(length(acn) > 1){ |
|
391 |
+## acn <- do.call("cbind", acn) |
|
392 |
+## } else acn <- acn[[1]] |
|
385 | 393 |
return(as.matrix(acn)) |
386 | 394 |
} |
387 | 395 |
|
... | ... |
@@ -408,6 +416,7 @@ ACN <- function(object, allele, i , j){ |
408 | 416 |
## Define batch.index and sample.index |
409 | 417 |
if(!missing.j) { |
410 | 418 |
batches <- unique(as.character(batch(object))[j]) |
419 |
+ ##batches <- as.character(batch(object)[j]) |
|
411 | 420 |
batch.index <- match(batches, batchNames(object)) |
412 | 421 |
} else { |
413 | 422 |
batch.index <- seq_along(batchNames(object)) |
... | ... |
@@ -416,6 +425,8 @@ ACN <- function(object, allele, i , j){ |
416 | 425 |
nr <- length(i) |
417 | 426 |
nc <- length(j) |
418 | 427 |
acn <- matrix(NA, nr, nc) |
428 |
+ dimnames(acn) <- list(featureNames(object)[i], |
|
429 |
+ sampleNames(object)[j]) |
|
419 | 430 |
if(allele == "A"){ |
420 | 431 |
if(is.ff){ |
421 | 432 |
open(nuA(object)) |
... | ... |
@@ -452,7 +463,7 @@ ACN <- function(object, allele, i , j){ |
452 | 463 |
close(B(object)) |
453 | 464 |
} |
454 | 465 |
} |
455 |
- if(any(!is.snp)){ ## nonpolymorphic X needs to be fixed |
|
466 |
+ if(any(!is.snp)){ |
|
456 | 467 |
marker.index <- i[is.X & !is.snp] |
457 | 468 |
acn.index <- which(is.X & !is.snp) |
458 | 469 |
acn[acn.index, ] <- NA |
... | ... |
@@ -492,10 +503,14 @@ ACN <- function(object, allele, i , j){ |
492 | 503 |
open(phiB(object)) |
493 | 504 |
open(B(object)) |
494 | 505 |
} |
506 |
+ if(any(!is.snp)){ |
|
507 |
+ acn.index <- which(!is.snp) |
|
508 |
+ acn[acn.index, ] <- 0 |
|
509 |
+ } |
|
495 | 510 |
if(any(is.auto)){ |
496 | 511 |
auto.index <- which(is.auto & is.snp) |
497 | 512 |
if(length(auto.index) > 0){ |
498 |
- marker.index <- i[is.auto] |
|
513 |
+ marker.index <- i[auto.index] |
|
499 | 514 |
acn[auto.index, ] <- C2(object, marker.index, batch.index, j) |
500 | 515 |
} |
501 | 516 |
} |
... | ... |
@@ -554,16 +569,6 @@ setMethod("totalCopynumber", signature=signature(object="CNSet"), |
554 | 569 |
function(object, ...){ |
555 | 570 |
ca <- CA(object, ...) |
556 | 571 |
cb <- CB(object, ...) |
557 |
-## is.snp <- isSnp(object) |
|
558 |
-## dotArgs <- list(...) |
|
559 |
-## if("i" %in% names(dotArgs)){ |
|
560 |
-## i <- dotArgs[["i"]] |
|
561 |
-## np.index <- which(!is.snp[i]) |
|
562 |
-## if(length(np.index) > 0) cb[np.index, ] <- 0 |
|
563 |
-## } else { |
|
564 |
-## np.index <- which(!is.snp) |
|
565 |
-## if(length(np.index) > 0) cb[np.index, ] <- 0 |
|
566 |
-## } |
|
567 | 572 |
return(ca+cb) |
568 | 573 |
}) |
569 | 574 |
|
... | ... |
@@ -164,12 +164,6 @@ if(!file.exists(file.path(outdir, "cnSet.rda"))){ |
164 | 164 |
filenames=celFiles, |
165 | 165 |
cdfName=cdfName, |
166 | 166 |
batch=batch) |
167 |
- ## try normalizing np probes on chr X |
|
168 |
-## callSet <- gtSet |
|
169 |
-## filenames <- celFiles |
|
170 |
-## snp.I <- isSnp(callSet) |
|
171 |
-## is.snp <- which(snp.I) |
|
172 |
-## np.index <- which(!is.snp) |
|
173 | 167 |
} |
174 | 168 |
@ |
175 | 169 |
|
... | ... |
@@ -194,9 +188,7 @@ The \Rfunction{crlmmCopynumber} performs the following steps: |
194 | 188 |
displayed during the processing. |
195 | 189 |
|
196 | 190 |
<<LDS_copynumber>>= |
197 |
-GT.CONF.THR <- 0.90 |
|
198 |
-cnSet <- checkExists("cnSet", .path=outdir, .FUN=crlmmCopynumber, object=gtSet, GT.CONF.THR=GT.CONF.THR) |
|
199 |
-invisible(open(cnSet)) |
|
191 |
+cnSet <- checkExists("cnSet", .path=outdir, .FUN=crlmmCopynumber, object=gtSet) |
|
200 | 192 |
@ |
201 | 193 |
|
202 | 194 |
In an effort to reduce I/O, the \Rpackage{crlmmCopynumber} function no |
... | ... |
@@ -251,31 +243,26 @@ stopifnot(all.equal(ct, ct2)) |
251 | 243 |
@ |
252 | 244 |
|
253 | 245 |
|
254 |
-Nonpolymorphic markers on chromosome X: |
|
255 |
- |
|
256 |
-\begin{figure} |
|
246 |
+\begin{figure}[t!] |
|
247 |
+ Nonpolymorphic markers on chromosome X: |
|
257 | 248 |
\centering |
258 |
-<<nonpolymorphicX, fig=TRUE>>= |
|
249 |
+<<nonpolymorphicX, fig=TRUE, width=8, height=4>>= |
|
259 | 250 |
npx.index <- which(chromosome(cnSet)==23 & !isSnp(cnSet)) |
260 | 251 |
M <- sample(which(cnSet$gender==1), 5) |
261 | 252 |
F <- sample(which(cnSet$gender==2), 5) |
262 |
-##a <- A(cnSet)[npx.index, M] |
|
263 |
-##nus <- nuA(cnSet)[npx.index, ] |
|
264 |
-## nu and phi are not estimated |
|
265 | 253 |
cn.M <- CA(cnSet, i=npx.index, j=M) |
266 | 254 |
cn.F <- CA(cnSet, i=npx.index, j=F) |
267 |
-boxplot(data.frame(cbind(cn.M, cn.F)), pch=".") |
|
255 |
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", col="grey60", outline=FALSE) |
|
268 | 256 |
@ |
269 |
-\caption{Copy number estimates for nonpolymorphic markers on |
|
270 |
- chromosome X. We assume that the median copy number (across |
|
271 |
- samples) at any given nonpolymorphic marker on chromosome X is 1 for |
|
272 |
- males and 2 for females.} |
|
257 |
+\caption{Copy number estimates for nonpolymorphic loci on chromosome |
|
258 |
+ X (5 men, 5 women). crlmm assumes that the median copy number |
|
259 |
+ across samples at a given marker on X is 1 for men and 2 for women.} |
|
273 | 260 |
\end{figure} |
274 | 261 |
|
275 |
-Polymorphic markers, X chromosome: |
|
276 | 262 |
|
277 |
-\begin{figure} |
|
278 |
- \centering |
|
263 |
+ |
|
264 |
+\begin{figure}[t!] |
|
265 |
+ \centering |
|
279 | 266 |
<<polymorphicX, fig=TRUE, width=8, height=4>>= |
280 | 267 |
## copy number estimates on X for SNPs are biased towards small values. |
281 | 268 |
X.markers <- which(isSnp(cnSet) & chromosome(cnSet) == 23) |
... | ... |
@@ -288,15 +275,12 @@ cn.F <- ca.F+cb.F |
288 | 275 |
boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", outline=FALSE, col=cols, xaxt="n", ylim=c(0,5)) |
289 | 276 |
legend("topleft", fill=unique(cols), legend=c("Male", "Female")) |
290 | 277 |
abline(h=c(1,2)) |
291 |
-## alternatively |
|
292 |
-cn.MF1 <- cbind(cn.M, cn.F) |
|
293 |
-cn.MF2 <- totalCopynumber(cnSet, i=X.markers, j=c(M,F)) |
|
294 |
-##stopifnot(all.equal(cn.MF1[!is.na(cn.MF1)], cn.MF2[!is.na(cn.MF2)])) |
|
278 |
+cn2 <- totalCopynumber(cnSet, i=X.markers, j=c(M, F)) |
|
279 |
+stopifnot(all.equal(cbind(cn.M, cn.F), cn2)) |
|
295 | 280 |
@ |
296 | 281 |
\caption{Copy number estimates for polymorphic markers on chromosome |
297 |
- X. We assume that the median copy number (across samples) at any |
|
298 |
- given polymorphic marker on chromosome X is 1 for males and 2 for |
|
299 |
- females.} |
|
282 |
+ X. crlmm assumes that the median copy number across samples at a |
|
283 |
+ given marker on X is 1 for men and 2 for women.} |
|
300 | 284 |
\end{figure} |
301 | 285 |
|
302 | 286 |
Accessors for physical position and chromosome are also provided. In |
... | ... |
@@ -415,12 +399,11 @@ invisible(open(B(cnSet))) |
415 | 399 |
b.snps <- (B(cnSet))[snp.index, ] |
416 | 400 |
@ |
417 | 401 |
|
418 |
-Note that NAs are recorded in the 'B' assay data element for |
|
419 |
-nonpolymorphic loci: |
|
402 |
+Note that NA's are stored in the slot for normalized 'B' allele |
|
403 |
+intensities: |
|
420 | 404 |
|
421 | 405 |
<<B.NAs>>= |
422 |
-all(is.na((B(cnSet))[np.index, ])) |
|
423 |
-invisible(close(B(cnSet))) |
|
406 |
+all(is.na(B(cnSet)[np.index, ])) |
|
424 | 407 |
@ |
425 | 408 |
|
426 | 409 |
<<clean, echo=FALSE>>= |
... | ... |
@@ -481,12 +464,9 @@ plot(x, cn, pch=".", |
481 | 464 |
cex=2, xaxt="n", col="grey60", ylim=c(0,6), |
482 | 465 |
ylab="copy number", xlab="physical position (Mb)", |
483 | 466 |
main=paste(sampleNames(cnSet)[1], ", CHR: 1")) |
484 |
-##np.index <- which(!isSnp(cnSet)[marker.index]) |
|
485 |
-##points(x[np.index], cn[np.index], |
|
486 |
-## pch=".", cex=2, col="lightblue") |
|
487 | 467 |
axis(1, at=pretty(x), labels=pretty(x)/1e6) |
488 | 468 |
require(SNPchip) |
489 |
-invisible(plotCytoband(1, new=FALSE, cytoband.ycoords=c(5.3, 5.9), label.cytoband=FALSE)) |
|
469 |
+invisible(plotCytoband(1, new=FALSE, cytoband.ycoords=c(5.5, 6), label.cytoband=FALSE)) |
|
490 | 470 |
@ |
491 | 471 |
|
492 | 472 |
\begin{figure} |
... | ... |
@@ -1 +1,2 @@ |
1 |
-rsync -avuzb --exclude '.git*' -e ssh ~/madman/Rpacks/git/crlmm/ enigma2.jhsph.edu:~/madman/Rpacks/release/crlmm |
|
1 |
+rsync -avuzb --exclude '~*' --exclude '.git*' -e ssh ~/madman/Rpacks/git/crlmm/ enigma2.jhsph.edu:~/madman/Rpacks/release/crlmm |
|
2 |
+rsync -avuzb --exclude '.git*' -e ssh enigma2.jhsph.edu:~/madman/Rpacks/release/crlmm/inst/scripts/copynumber.pdf ~/madman/Rpacks/git/crlmm/inst/scripts/copynumber.pdf |