git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@37748 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -8,4 +8,5 @@ importFrom(preprocessCore, normalize.quantiles.use.target) |
8 | 8 |
importFrom(utils, data, packageDescription, setTxtProgressBar, txtProgressBar) |
9 | 9 |
importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, sd) |
10 | 10 |
importFrom(genefilter, rowSds) |
11 |
+importFrom(mvtnorm, dmvnorm) |
|
11 | 12 |
|
... | ... |
@@ -75,8 +75,6 @@ nuphiAllele <- function(allele, Ystar, W, envir, p){ |
75 | 75 |
assign("phi.se", phi.se, envir=parent.frame(1)) |
76 | 76 |
} |
77 | 77 |
|
78 |
- |
|
79 |
- |
|
80 | 78 |
celDatesFrom <- function(celfiles, path=""){ |
81 | 79 |
celdates <- vector("character", length(celfiles)) |
82 | 80 |
celtimes <- vector("character", length(celfiles)) |
... | ... |
@@ -92,7 +90,6 @@ celDatesFrom <- function(celfiles, path=""){ |
92 | 90 |
return(celdts) |
93 | 91 |
} |
94 | 92 |
|
95 |
- |
|
96 | 93 |
cnrma <- function(filenames, sns, cdfName, seed=1, verbose=FALSE){ |
97 | 94 |
## require(preprocessCore) || stop("Package preprocessCore not available") |
98 | 95 |
if (missing(sns)) sns <- basename(filenames) |
... | ... |
@@ -236,6 +233,8 @@ instantiateObjects <- function(calls, NP, plate, envir, chrom){ |
236 | 233 |
assign("CB", CB, envir=envir) |
237 | 234 |
} |
238 | 235 |
|
236 |
+ |
|
237 |
+ |
|
239 | 238 |
computeCopynumber <- function(A, |
240 | 239 |
B, |
241 | 240 |
calls, |
... | ... |
@@ -243,10 +242,17 @@ computeCopynumber <- function(A, |
243 | 242 |
NP, |
244 | 243 |
plate, |
245 | 244 |
fit.variance=NULL, |
246 |
- MIN.OBS=3, |
|
245 |
+ MIN.OBS=1, |
|
247 | 246 |
envir, |
248 |
- chrom, P, DF.PRIOR=50, CONF.THR=0.99, trim=0, upperTail=TRUE, ...){ |
|
247 |
+ chrom, P, DF.PRIOR=50, CONF.THR=0.99, |
|
248 |
+ trim=0, upperTail=TRUE, |
|
249 |
+ bias.adj=FALSE, |
|
250 |
+ priorProb, ...){ |
|
249 | 251 |
if(length(ls(envir)) == 0) instantiateObjects(calls, NP, plate, envir, chrom) |
252 |
+ if(bias.adj){ |
|
253 |
+ ##assign uniform priors for total copy number states |
|
254 |
+ if(missing(priorProb)) priorProb <- rep(1/4, 4) |
|
255 |
+ } |
|
250 | 256 |
##will be updating these objects |
251 | 257 |
uplate <- get("uplate", envir) |
252 | 258 |
message("Sufficient statistics") |
... | ... |
@@ -261,7 +267,8 @@ computeCopynumber <- function(A, |
261 | 267 |
envir=envir, |
262 | 268 |
MIN.OBS=MIN.OBS, |
263 | 269 |
DF.PRIOR=DF.PRIOR, |
264 |
- trim=trim, upperTail=upperTail) |
|
270 |
+ trim=trim, upperTail=upperTail, |
|
271 |
+ bias.adj=bias.adj) |
|
265 | 272 |
} |
266 | 273 |
message("\nEstimating coefficients") |
267 | 274 |
for(p in P){ |
... | ... |
@@ -395,7 +402,7 @@ nonpolymorphic <- function(plateIndex, NP, envir){ |
395 | 402 |
## firstPass.NP |
396 | 403 |
} |
397 | 404 |
|
398 |
-oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, upperTail, ...){ |
|
405 |
+oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, upperTail, bias.adj=FALSE, priorProb, ...){ |
|
399 | 406 |
p <- plateIndex |
400 | 407 |
plate <- get("plate", envir) |
401 | 408 |
AA <- G == 1 |
... | ... |
@@ -405,27 +412,30 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe |
405 | 412 |
Ns[, p, "AA"] <- rowSums(AA) |
406 | 413 |
Ns[, p, "AB"] <- rowSums(AB) |
407 | 414 |
Ns[, p, "BB"] <- rowSums(BB) |
415 |
+ assign("Ns", Ns, envir) |
|
408 | 416 |
AA[AA == FALSE] <- NA |
409 | 417 |
AB[AB == FALSE] <- NA |
410 | 418 |
BB[BB == FALSE] <- NA |
411 |
- Ip <- array(NA, dim=c(nrow(G), ncol(G), 3, 2)) |
|
412 |
- dimnames(Ip) <- list(rownames(G), colnames(G), c("AA", "AB", "BB"), c("A", "B")) |
|
413 |
- Ip[, , "AA", "A"] <- AA*A |
|
414 |
- Ip[, , "AB", "A"] <- AB*A |
|
415 |
- Ip[, , "BB", "A"] <- BB*A |
|
416 |
- Ip[, , "AA", "B"] <- AA*B |
|
417 |
- Ip[, , "AB", "B"] <- AB*B |
|
418 |
- Ip[, , "BB", "B"] <- BB*B |
|
419 | 419 |
##--------------------------------------------------------------------------- |
420 | 420 |
## Sufficient statistics (plate-specific) |
421 | 421 |
##--------------------------------------------------------------------------- |
422 |
- ##These matrices instantiated in the calling function are updated |
|
422 |
+ AA.A <- AA*A |
|
423 |
+ AB.A <- AB*A |
|
424 |
+ BB.A <- BB*A |
|
425 |
+ AA.B <- AA*B |
|
426 |
+ AB.B <- AB*B |
|
427 |
+ BB.B <- BB*B |
|
428 |
+ locationAndScale(p=p, AA.A=AA.A, AB.A=AB.A, BB.A=BB.A, |
|
429 |
+ AA.B=AA.B, AB.B=AB.B, BB.B=BB.B, |
|
430 |
+ envir=envir, DF.PRIOR=DF.PRIOR) |
|
423 | 431 |
muA.AA <- get("muA.AA", envir) |
424 | 432 |
muA.AB <- get("muA.AB", envir) |
425 | 433 |
muA.BB <- get("muA.BB", envir) |
426 | 434 |
muB.AA <- get("muB.AA", envir) |
427 | 435 |
muB.AB <- get("muB.AB", envir) |
428 | 436 |
muB.BB <- get("muB.BB", envir) |
437 |
+ sigmaA <- get("sigmaA", envir) |
|
438 |
+ sigmaB <- get("sigmaB", envir) |
|
429 | 439 |
tau2A <- get("tau2A", envir) |
430 | 440 |
tau2B <- get("tau2B", envir) |
431 | 441 |
sig2A <- get("sig2A", envir) |
... | ... |
@@ -433,13 +443,48 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe |
433 | 443 |
corr <- get("corr", envir) |
434 | 444 |
corrA.BB <- get("corrA.BB", envir) ## B allele |
435 | 445 |
corrB.AA <- get("corrB.AA", envir) |
436 |
- |
|
437 |
- AA.A <- AA*A |
|
438 |
- AB.A <- AB*A |
|
439 |
- BB.A <- BB*A |
|
440 |
- AA.B <- AA*B |
|
441 |
- AB.B <- AB*B |
|
442 |
- BB.B <- BB*B |
|
446 |
+ if(bias.adj){ |
|
447 |
+ ##First check that nu and phi are available |
|
448 |
+ nuA <- get("nuA", envir) |
|
449 |
+ if(all(is.na(nuA))) { |
|
450 |
+ message("Background and signal coefficients have not yet been estimated -- can not do bias correction yet") |
|
451 |
+ message("Must run computeCopynumber a second time with bias.adj=TRUE to do the adjustment") |
|
452 |
+ } else { |
|
453 |
+ message("running bias adjustment") |
|
454 |
+ normal <- biasAdj(A=A, B=B, plateIndex=p, envir=envir, priorProb=priorProb) |
|
455 |
+ normal[normal == FALSE] <- NA |
|
456 |
+ AA.A <- AA.A*normal |
|
457 |
+ AB.A <- AB.A*normal |
|
458 |
+ BB.A <- BB.A*normal |
|
459 |
+ AA.B <- AA.B*normal |
|
460 |
+ AB.B <- AB.B*normal |
|
461 |
+ BB.B <- BB.B*normal |
|
462 |
+ message("Recomputing location and scale parameters") |
|
463 |
+ locationAndScale(p=p, AA.A=AA.A, AB.A=AB.A, BB.A=BB.A, |
|
464 |
+ AA.B=AA.B, AB.B=AB.B, BB.B=BB.B, |
|
465 |
+ envir=envir, DF.PRIOR=DF.PRIOR) |
|
466 |
+ muA.AA <- get("muA.AA", envir) |
|
467 |
+ muA.AB <- get("muA.AB", envir) |
|
468 |
+ muA.BB <- get("muA.BB", envir) |
|
469 |
+ muB.AA <- get("muB.AA", envir) |
|
470 |
+ muB.AB <- get("muB.AB", envir) |
|
471 |
+ muB.BB <- get("muB.BB", envir) |
|
472 |
+ tau2A <- get("tau2A", envir) |
|
473 |
+ tau2B <- get("tau2B", envir) |
|
474 |
+ sig2A <- get("sig2A", envir) |
|
475 |
+ sig2B <- get("sig2B", envir) |
|
476 |
+ sigmaA <- get("sigmaA", envir) |
|
477 |
+ sigmaB <- get("sigmaB", envir) |
|
478 |
+ corr <- get("corr", envir) |
|
479 |
+ corrA.BB <- get("corrA.BB", envir) ## B allele |
|
480 |
+ corrB.AA <- get("corrB.AA", envir) |
|
481 |
+ Ns.unadj <- Ns |
|
482 |
+ assign("Ns.unadj", Ns.unadj, envir) |
|
483 |
+ Ns[, p, "AA"] <- rowSums(!is.na(AA.A)) |
|
484 |
+ Ns[, p, "AB"] <- rowSums(!is.na(AB.A)) |
|
485 |
+ Ns[, p, "BB"] <- rowSums(!is.na(BB.A)) |
|
486 |
+ } |
|
487 |
+ } |
|
443 | 488 |
if(trim > 0){ |
444 | 489 |
##rowMedians is not robust enough when a variant is common |
445 | 490 |
##Try a trimmed rowMedian, trimming only one tail (must specify which) |
... | ... |
@@ -474,114 +519,13 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe |
474 | 519 |
Ns[, p, "AB"] <- rowSums(!is.na(AB.A)) |
475 | 520 |
Ns[, p, "BB"] <- rowSums(!is.na(BB.A)) |
476 | 521 |
} |
477 |
- muA.AA[, p] <- rowMedians(AA.A, na.rm=TRUE) |
|
478 |
- muA.AB[, p] <- rowMedians(AB.A, na.rm=TRUE) |
|
479 |
- muA.BB[, p] <- rowMedians(BB.A, na.rm=TRUE) |
|
480 |
- muB.AA[, p] <- rowMedians(AA.B, na.rm=TRUE) |
|
481 |
- muB.AB[, p] <- rowMedians(AB.B, na.rm=TRUE) |
|
482 |
- muB.BB[, p] <- rowMedians(BB.B, na.rm=TRUE) |
|
483 |
- sigmaA <- matrix(NA, nrow(AA), 3) |
|
484 |
- sigmaB <- matrix(NA, nrow(AA), 3) |
|
485 |
- colnames(sigmaB) <- colnames(sigmaA) <- c("AA", "AB", "BB") |
|
486 |
- |
|
487 |
- |
|
488 |
- sigmaA[, "AA"] <- rowMAD(AA.A, na.rm=TRUE) |
|
489 |
- sigmaA[, "AB"] <- rowMAD(AB.A, na.rm=TRUE) |
|
490 |
- sigmaA[, "BB"] <- rowMAD(BB.A, na.rm=TRUE) |
|
491 |
- sigmaB[, "AA"] <- rowMAD(AA.B, na.rm=TRUE) |
|
492 |
- sigmaB[, "AB"] <- rowMAD(AB.B, na.rm=TRUE) |
|
493 |
- sigmaB[, "BB"] <- rowMAD(BB.B, na.rm=TRUE) |
|
494 |
- |
|
495 |
-## sigmaA[, "AA"] <- rowSds(AA.A, na.rm=TRUE) |
|
496 |
-## sigmaA[, "AB"] <- rowSds(AB.A, na.rm=TRUE) |
|
497 |
-## sigmaA[, "BB"] <- rowSds(BB*A, na.rm=TRUE) |
|
498 |
-## sigmaB[, "AA"] <- rowSds(AA*B, na.rm=TRUE) |
|
499 |
-## sigmaB[, "AB"] <- rowSds(AB*B, na.rm=TRUE) |
|
500 |
-## sigmaB[, "BB"] <- rowSds(BB*B, na.rm=TRUE) |
|
501 |
- ##shrink |
|
502 |
- DF <- Ns[, p, ]-1 |
|
503 |
- DF[DF < 1] <- 1 |
|
504 |
- medsA <- apply(sigmaA, 2, "median", na.rm=TRUE) |
|
505 |
- medsB <- apply(sigmaB, 2, "median", na.rm=TRUE) |
|
506 |
- for(m in 1:3){ |
|
507 |
- sigmaA[, m] <- (sigmaA[, m]*DF[, m] + medsA[m]*DF.PRIOR)/(DF.PRIOR+DF[, m]) |
|
508 |
- sigmaA[is.na(sigmaA[, m]), m] <- medsA[m] |
|
509 |
- sigmaB[, m] <- (sigmaB[, m]*DF[, m] + medsB[m]*DF.PRIOR)/(DF.PRIOR+DF[, m]) |
|
510 |
- sigmaB[is.na(sigmaB[, m]), m] <- medsB[m] |
|
511 |
- } |
|
512 |
- index.AA <- which(Ns[, p, "AA"] >= 2) |
|
513 |
- index.AB <- which(Ns[, p, "AB"] >= 2) |
|
514 |
- index.BB <- which(Ns[, p, "BB"] >= 2) |
|
515 |
- |
|
516 |
- x <- Ip[index.BB, , "BB", "A"] |
|
517 |
- ##tau2A[index.BB, p] <- rowVars(x, na.rm=TRUE)##var(log(IA)| BB) |
|
518 |
- tau2A[index.BB, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
519 |
- DF <- Ns[, p, "BB"]-1 |
|
520 |
- DF[DF < 1] <- 1 |
|
521 |
- med <- median(tau2A[, p], na.rm=TRUE) |
|
522 |
- tau2A[, p] <- (tau2A[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
523 |
- tau2A[is.na(tau2A[, p]), p] <- med |
|
524 |
- |
|
525 |
- x <- Ip[index.BB, , "BB", "B"] |
|
526 |
-## sig2B[index.BB, p] <- rowVars(log2(x), na.rm=TRUE) ##var(log(IB)|BB) |
|
527 |
- sig2B[index.BB, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
528 |
- ##system.time(tmp <- rowCovs(x, x, na.rm=TRUE)) ##var(log(IB)|BB) |
|
529 |
- ##system.time(tmp2 <- rowVars(log2(x), na.rm=TRUE)) |
|
530 |
- med <- median(sig2B[, p], na.rm=TRUE) |
|
531 |
- sig2B[, p] <- (sig2B[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
532 |
- sig2B[is.na(sig2B[, p]), p] <- med |
|
533 |
- |
|
534 |
- x <- Ip[index.AA, , "AA", "B"] |
|
535 |
- ##tau2B[index.AA, p] <- rowCovs(x, x, na.rm=TRUE)##var(log(IB)| AA) |
|
536 |
- tau2B[index.AA, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
537 |
- DF <- Ns[, p, "AA"]-1 |
|
538 |
- DF[DF < 1] <- 1 |
|
539 |
- med <- median(tau2B[, p], na.rm=TRUE) |
|
540 |
- tau2B[, p] <- (tau2B[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
541 |
- tau2B[is.na(tau2B[, p]), p] <- med |
|
542 |
- |
|
543 |
- x <- Ip[index.AA, , "AA", "A"] |
|
544 |
- ##sig2A[index.AA, p] <- rowVars(log2(x), na.rm=TRUE)##var(log(IA)|AA) |
|
545 |
- sig2A[index.AA, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2##var(log(IA)|AA) |
|
546 |
- ##sig2A[index.AA, p] <- rowCovs(x, x, na.rm=TRUE) ##var(log(IA)|AA) |
|
547 |
- med <- median(sig2A[, p], na.rm=TRUE) |
|
548 |
- sig2A[, p] <- (sig2A[, p]*DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
549 |
- sig2A[is.na(sig2A[, p]), p] <- med |
|
550 |
- |
|
551 |
- ##estimate the correlation where there is at least 1 or more copies (AB genotypes) |
|
552 |
- if(length(index.AB) > 0){ ##all homozygous is possible |
|
553 |
- x <- Ip[index.AB, , "AB", "A"] |
|
554 |
- y <- Ip[index.AB, , "AB", "B"] |
|
555 |
- corr[index.AB, p] <- rowCors(x, y, na.rm=TRUE) |
|
556 |
- corr[corr < 0] <- 0 |
|
557 |
- DF <- Ns[, p, "AB"]-1 |
|
558 |
- DF[DF<1] <- 1 |
|
559 |
- med <- median(corr[, p], na.rm=TRUE) |
|
560 |
- corr[, p] <- (corr[, p]*DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
561 |
- corr[is.na(corr[, p]), p] <- med |
|
562 |
- } |
|
563 |
- ##estimate the correlation of the background errors and AA, BB genotypes |
|
564 |
- backgroundB <- Ip[index.AA, , "AA", "B"] |
|
565 |
- signalA <- Ip[index.AA, , "AA", "A"] |
|
566 |
- corrB.AA[index.AA, p] <- rowCors(backgroundB, signalA, na.rm=TRUE) |
|
567 |
- DF <- Ns[, p, "AA"]-1 |
|
568 |
- DF[DF < 1] <- 1 |
|
569 |
- med <- median(corrB.AA[, p], na.rm=TRUE) |
|
570 |
- corrB.AA[, p] <- (corrB.AA[, p]*DF + med*DF.PRIOR)/(DF.PRIOR + DF) |
|
571 |
- corrB.AA[is.na(corrB.AA[, p]), p] <- med |
|
572 |
- |
|
573 |
- backgroundA <- Ip[index.BB, , "BB", "A"] |
|
574 |
- signalB <- Ip[index.BB, , "BB", "B"] |
|
575 |
- corrA.BB[index.BB, p] <- rowCors(backgroundA, signalB, na.rm=TRUE) |
|
576 |
- DF <- Ns[, p, "BB"]-1 |
|
577 |
- DF[DF < 1] <- 1 |
|
578 |
- med <- median(corrA.BB[, p], na.rm=TRUE) |
|
579 |
- corrA.BB[, p] <- (corrA.BB[, p]*DF + med*DF.PRIOR)/(DF.PRIOR + DF) |
|
580 |
- corrA.BB[is.na(corrA.BB[, p]), p] <- med |
|
581 |
- |
|
582 | 522 |
##--------------------------------------------------------------------------- |
583 | 523 |
## Predict sufficient statistics for unobserved genotypes (plate-specific) |
584 | 524 |
##--------------------------------------------------------------------------- |
525 |
+ index.AA <- which(Ns[, p, "AA"] >= 3) |
|
526 |
+ index.AB <- which(Ns[, p, "AB"] >= 3) |
|
527 |
+ index.BB <- which(Ns[, p, "BB"] >= 3) |
|
528 |
+ |
|
585 | 529 |
correct.orderA <- muA.AA[, p] > muA.BB[, p] |
586 | 530 |
correct.orderB <- muB.BB[, p] > muB.AA[, p] |
587 | 531 |
if(length(index.AB) > 0){ |
... | ... |
@@ -654,9 +598,9 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe |
654 | 598 |
dev.off() |
655 | 599 |
} |
656 | 600 |
##missing two genotypes |
657 |
- noAA <- Ns[, p, "AA"] <= 2 |
|
658 |
- noAB <- Ns[, p, "AB"] <= 2 |
|
659 |
- noBB <- Ns[, p, "BB"] <= 2 |
|
601 |
+ noAA <- Ns[, p, "AA"] < MIN.OBS |
|
602 |
+ noAB <- Ns[, p, "AB"] < MIN.OBS |
|
603 |
+ noBB <- Ns[, p, "BB"] < MIN.OBS |
|
660 | 604 |
##--------------------------------------------------------------------------- |
661 | 605 |
## Two genotype clusters not observed -- would sequence help? (didn't seem that helpful) |
662 | 606 |
## 1 extract index of complete data |
... | ... |
@@ -717,8 +661,6 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe |
717 | 661 |
dn.Ns <- dimnames(Ns) |
718 | 662 |
Ns <- array(as.integer(Ns), dim=dim(Ns)) |
719 | 663 |
dimnames(Ns)[[3]] <- dn.Ns[[3]] |
720 |
- |
|
721 |
- assign("Ns", Ns, envir) |
|
722 | 664 |
assign("muA.AA", muA.AA, envir) |
723 | 665 |
assign("muA.AB", muA.AB, envir) |
724 | 666 |
assign("muA.BB", muA.BB, envir) |
... | ... |
@@ -733,12 +675,148 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe |
733 | 675 |
assign("sig2B", sig2B, envir) |
734 | 676 |
assign("corr", corr, envir) |
735 | 677 |
assign("corrB.AA", corrB.AA, envir) |
736 |
- assign("corrA.BB", corrA.BB, envir) |
|
678 |
+ assign("corrA.BB", corrA.BB, envir) |
|
679 |
+ assign("Ns", Ns, envir) |
|
737 | 680 |
plates.completed <- get("plates.completed", envir) |
738 | 681 |
plates.completed[p] <- TRUE |
739 | 682 |
assign("plates.completed", plates.completed, envir) |
740 | 683 |
} |
741 | 684 |
|
685 |
+locationAndScale <- function(p, AA.A, AB.A, BB.A, |
|
686 |
+ AA.B, AB.B, BB.B, envir, |
|
687 |
+ DF.PRIOR){ |
|
688 |
+ muA.AA <- get("muA.AA", envir) |
|
689 |
+ muA.AB <- get("muA.AB", envir) |
|
690 |
+ muA.BB <- get("muA.BB", envir) |
|
691 |
+ muB.AA <- get("muB.AA", envir) |
|
692 |
+ muB.AB <- get("muB.AB", envir) |
|
693 |
+ muB.BB <- get("muB.BB", envir) |
|
694 |
+ tau2A <- get("tau2A", envir) |
|
695 |
+ tau2B <- get("tau2B", envir) |
|
696 |
+ sig2A <- get("sig2A", envir) |
|
697 |
+ sig2B <- get("sig2B", envir) |
|
698 |
+ corr <- get("corr", envir) |
|
699 |
+ corrA.BB <- get("corrA.BB", envir) ## B allele |
|
700 |
+ corrB.AA <- get("corrB.AA", envir) |
|
701 |
+ Ns <- get("Ns", envir) |
|
702 |
+ muA.AA[, p] <- rowMedians(AA.A, na.rm=TRUE) |
|
703 |
+ muA.AB[, p] <- rowMedians(AB.A, na.rm=TRUE) |
|
704 |
+ muA.BB[, p] <- rowMedians(BB.A, na.rm=TRUE) |
|
705 |
+ muB.AA[, p] <- rowMedians(AA.B, na.rm=TRUE) |
|
706 |
+ muB.AB[, p] <- rowMedians(AB.B, na.rm=TRUE) |
|
707 |
+ muB.BB[, p] <- rowMedians(BB.B, na.rm=TRUE) |
|
708 |
+ sigmaA <- matrix(NA, nrow(AA.A), 3) |
|
709 |
+ sigmaB <- matrix(NA, nrow(AA.A), 3) |
|
710 |
+ colnames(sigmaB) <- colnames(sigmaA) <- c("AA", "AB", "BB") |
|
711 |
+ sigmaA[, "AA"] <- rowMAD(AA.A, na.rm=TRUE) |
|
712 |
+ sigmaA[, "AB"] <- rowMAD(AB.A, na.rm=TRUE) |
|
713 |
+ sigmaA[, "BB"] <- rowMAD(BB.A, na.rm=TRUE) |
|
714 |
+ sigmaB[, "AA"] <- rowMAD(AA.B, na.rm=TRUE) |
|
715 |
+ sigmaB[, "AB"] <- rowMAD(AB.B, na.rm=TRUE) |
|
716 |
+ sigmaB[, "BB"] <- rowMAD(BB.B, na.rm=TRUE) |
|
717 |
+ |
|
718 |
+ ##shrink |
|
719 |
+ DF <- Ns[, p, ]-1 |
|
720 |
+ DF[DF < 1] <- 1 |
|
721 |
+ medsA <- apply(sigmaA, 2, "median", na.rm=TRUE) |
|
722 |
+ medsB <- apply(sigmaB, 2, "median", na.rm=TRUE) |
|
723 |
+ for(m in 1:3){ |
|
724 |
+ sigmaA[, m] <- (sigmaA[, m]*DF[, m] + medsA[m]*DF.PRIOR)/(DF.PRIOR+DF[, m]) |
|
725 |
+ sigmaA[is.na(sigmaA[, m]), m] <- medsA[m] |
|
726 |
+ sigmaB[, m] <- (sigmaB[, m]*DF[, m] + medsB[m]*DF.PRIOR)/(DF.PRIOR+DF[, m]) |
|
727 |
+ sigmaB[is.na(sigmaB[, m]), m] <- medsB[m] |
|
728 |
+ } |
|
729 |
+ index.AA <- which(Ns[, p, "AA"] >= 2) |
|
730 |
+ index.AB <- which(Ns[, p, "AB"] >= 2) |
|
731 |
+ index.BB <- which(Ns[, p, "BB"] >= 2) |
|
732 |
+ |
|
733 |
+ x <- BB.A[index.BB, ] |
|
734 |
+ ##x <- Ip[index.BB, , "BB", "A"] |
|
735 |
+ ##tau2A[index.BB, p] <- rowVars(x, na.rm=TRUE)##var(log(IA)| BB) |
|
736 |
+ tau2A[index.BB, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
737 |
+ DF <- Ns[, p, "BB"]-1 |
|
738 |
+ DF[DF < 1] <- 1 |
|
739 |
+ med <- median(tau2A[, p], na.rm=TRUE) |
|
740 |
+ tau2A[, p] <- (tau2A[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
741 |
+ tau2A[is.na(tau2A[, p]), p] <- med |
|
742 |
+ |
|
743 |
+ ##x <- Ip[index.BB, , "BB", "B"] |
|
744 |
+ x <- BB.B[index.BB, ] |
|
745 |
+ sig2B[index.BB, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
746 |
+ med <- median(sig2B[, p], na.rm=TRUE) |
|
747 |
+ sig2B[, p] <- (sig2B[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
748 |
+ sig2B[is.na(sig2B[, p]), p] <- med |
|
749 |
+ |
|
750 |
+ ##x <- Ip[index.AA, , "AA", "B"] |
|
751 |
+ x <- AA.B[index.AA, ] |
|
752 |
+ tau2B[index.AA, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
753 |
+ DF <- Ns[, p, "AA"]-1 |
|
754 |
+ DF[DF < 1] <- 1 |
|
755 |
+ med <- median(tau2B[, p], na.rm=TRUE) |
|
756 |
+ tau2B[, p] <- (tau2B[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
757 |
+ tau2B[is.na(tau2B[, p]), p] <- med |
|
758 |
+ |
|
759 |
+ ##x <- Ip[index.AA, , "AA", "A"] |
|
760 |
+ x <- AA.A[index.AA, ] |
|
761 |
+ sig2A[index.AA, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2##var(log(IA)|AA) |
|
762 |
+ med <- median(sig2A[, p], na.rm=TRUE) |
|
763 |
+ sig2A[, p] <- (sig2A[, p]*DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
764 |
+ sig2A[is.na(sig2A[, p]), p] <- med |
|
765 |
+ |
|
766 |
+ ##estimate the correlation where there is at least 1 or more copies (AB genotypes) |
|
767 |
+ if(length(index.AB) > 0){ ##all homozygous is possible |
|
768 |
+ x <- AB.A[index.AB, ] |
|
769 |
+ y <- AB.B[index.AB, ] |
|
770 |
+ ##x <- Ip[index.AB, , "AB", "A"] |
|
771 |
+ ##y <- Ip[index.AB, , "AB", "B"] |
|
772 |
+ corr[index.AB, p] <- rowCors(x, y, na.rm=TRUE) |
|
773 |
+ corr[corr < 0] <- 0 |
|
774 |
+ DF <- Ns[, p, "AB"]-1 |
|
775 |
+ DF[DF<1] <- 1 |
|
776 |
+ med <- median(corr[, p], na.rm=TRUE) |
|
777 |
+ corr[, p] <- (corr[, p]*DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
778 |
+ corr[is.na(corr[, p]), p] <- med |
|
779 |
+ } |
|
780 |
+ ##estimate the correlation of the background errors and AA, BB genotypes |
|
781 |
+ ##backgroundB <- Ip[index.AA, , "AA", "B"] |
|
782 |
+ backgroundB <- AA.B[index.AA, ] |
|
783 |
+ signalA <- AA.A[index.AA, ] |
|
784 |
+ ##signalA <- Ip[index.AA, , "AA", "A"] |
|
785 |
+ corrB.AA[index.AA, p] <- rowCors(backgroundB, signalA, na.rm=TRUE) |
|
786 |
+ DF <- Ns[, p, "AA"]-1 |
|
787 |
+ DF[DF < 1] <- 1 |
|
788 |
+ med <- median(corrB.AA[, p], na.rm=TRUE) |
|
789 |
+ corrB.AA[, p] <- (corrB.AA[, p]*DF + med*DF.PRIOR)/(DF.PRIOR + DF) |
|
790 |
+ corrB.AA[is.na(corrB.AA[, p]), p] <- med |
|
791 |
+ |
|
792 |
+ ##backgroundA <- Ip[index.BB, , "BB", "A"] |
|
793 |
+ backgroundA <- BB.A[index.BB, ] |
|
794 |
+ signalB <- BB.B[index.BB, ] |
|
795 |
+ ##signalB <- Ip[index.BB, , "BB", "B"] |
|
796 |
+ corrA.BB[index.BB, p] <- rowCors(backgroundA, signalB, na.rm=TRUE) |
|
797 |
+ DF <- Ns[, p, "BB"]-1 |
|
798 |
+ DF[DF < 1] <- 1 |
|
799 |
+ med <- median(corrA.BB[, p], na.rm=TRUE) |
|
800 |
+ corrA.BB[, p] <- (corrA.BB[, p]*DF + med*DF.PRIOR)/(DF.PRIOR + DF) |
|
801 |
+ corrA.BB[is.na(corrA.BB[, p]), p] <- med |
|
802 |
+ |
|
803 |
+ assign("muA.AA", muA.AA, envir) |
|
804 |
+ assign("muA.AB", muA.AB, envir) |
|
805 |
+ assign("muA.BB", muA.BB, envir) |
|
806 |
+ assign("muB.AA", muB.AA, envir) |
|
807 |
+ assign("muB.AB", muB.AB, envir) |
|
808 |
+ assign("muB.BB", muB.BB, envir) |
|
809 |
+ assign("sigmaA", sigmaA, envir) |
|
810 |
+ assign("sigmaB", sigmaB, envir) |
|
811 |
+ assign("tau2A", tau2A, envir) |
|
812 |
+ assign("tau2B", tau2B, envir) |
|
813 |
+ assign("sig2A", sig2A, envir) |
|
814 |
+ assign("sig2B", sig2B, envir) |
|
815 |
+ assign("corr", corr, envir) |
|
816 |
+ assign("corrB.AA", corrB.AA, envir) |
|
817 |
+ assign("corrA.BB", corrA.BB, envir) |
|
818 |
+} |
|
819 |
+ |
|
742 | 820 |
coefs <- function(plateIndex, conf, MIN.OBS=3, envir, CONF.THR=0.99){ |
743 | 821 |
p <- plateIndex |
744 | 822 |
##if(p == 5) browser() |
... | ... |
@@ -967,3 +1045,86 @@ polymorphic <- function(plateIndex, A, B, envir){ |
967 | 1045 |
## nonpolymorphic probes |
968 | 1046 |
##--------------------------------------------------------------------------- |
969 | 1047 |
} |
1048 |
+ |
|
1049 |
+ |
|
1050 |
+biasAdj <- function(A, B, plateIndex, envir, priorProb){ |
|
1051 |
+ sig2A <- get("sig2A", envir) |
|
1052 |
+ sig2B <- get("sig2B", envir) |
|
1053 |
+ tau2A <- get("tau2A", envir) |
|
1054 |
+ tau2B <- get("tau2B", envir) |
|
1055 |
+ corrA.BB <- get("corrA.BB", envir) |
|
1056 |
+ corrB.AA <- get("corrB.AA", envir) |
|
1057 |
+ corr <- get("corr", envir) |
|
1058 |
+ nuA <- get("nuA", envir) |
|
1059 |
+ nuB <- get("nuB", envir) |
|
1060 |
+ phiA <- get("phiA", envir) |
|
1061 |
+ phiB <- get("phiB", envir) |
|
1062 |
+ p <- plateIndex |
|
1063 |
+ plate <- get("plate", envir) |
|
1064 |
+ if(missing(priorProb)) priorProb <- rep(1/4, 4) ##uniform |
|
1065 |
+ emit <- array(NA, dim=c(nrow(A), ncol(A), 10))##SNPs x sample x 'truth' |
|
1066 |
+ m <- 1##snp counter |
|
1067 |
+ for(i in 1:nrow(A)){ |
|
1068 |
+ if(i %% 100 == 0) cat(".") |
|
1069 |
+ counter <- 1##state counter |
|
1070 |
+ for(CT in 0:3){ |
|
1071 |
+ for(CA in 0:CT){ |
|
1072 |
+ CB <- CT-CA |
|
1073 |
+ A.scale <- sqrt(tau2A[i, p]*(CA==0) + sig2A[i, p]*(CA > 0)) |
|
1074 |
+ B.scale <- sqrt(tau2B[i, p]*(CB==0) + sig2B[i, p]*(CB > 0)) |
|
1075 |
+ scale <- c(A.scale, B.scale) |
|
1076 |
+ if(CA == 0 & CB == 0) rho <- 0 |
|
1077 |
+ if(CA == 0 & CB > 0) rho <- corrA.BB[i, p] |
|
1078 |
+ if(CA > 0 & CB == 0) rho <- corrB.AA[i, p] |
|
1079 |
+ if(CA > 0 & CB > 0) rho <- corr[i, p] |
|
1080 |
+ means <- c(log2(nuA[i, p]+CA*phiA[i, p]), log2(nuB[i, p]+CB*phiB[i, p])) |
|
1081 |
+ covs <- rho*A.scale*B.scale |
|
1082 |
+ ##ensure positive definite |
|
1083 |
+ ##Sigma <- as.matrix(nearPD(matrix(c(A.scale^2, covs, |
|
1084 |
+ ##covs, B.scale^2), 2, 2))[[1]]) |
|
1085 |
+ Sigma <- matrix(c(A.scale^2, covs, covs, B.scale^2), 2,2) |
|
1086 |
+ X <- log2(cbind(A[i, ], B[i, ])) |
|
1087 |
+ tmp <- dmvnorm(X, mean=means, sigma=Sigma) |
|
1088 |
+ emit[m, , counter] <- tmp |
|
1089 |
+ counter <- counter+1 |
|
1090 |
+ } |
|
1091 |
+ } |
|
1092 |
+ m <- m+1 |
|
1093 |
+ } |
|
1094 |
+ homDel <- priorProb[1]*emit[, , 1] |
|
1095 |
+ hemDel <- priorProb[2]*emit[, , c(2, 3)] # + priorProb[3]*emit[, c(4, 5, 6)] + priorProb[4]*emit[, c(7:10)] |
|
1096 |
+ norm <- priorProb[3]*emit[, , 4:6] |
|
1097 |
+ amp <- priorProb[4]*emit[, , 7:10] |
|
1098 |
+ ##sum over the different combinations within each copy number state |
|
1099 |
+ hemDel <- apply(hemDel, c(1,2), sum) |
|
1100 |
+ norm <- apply(norm, c(1, 2), sum) |
|
1101 |
+ amp <- apply(amp, c(1,2), sum) |
|
1102 |
+ |
|
1103 |
+ tmp <- array(NA, dim=c(nrow(A), ncol(A), 4)) |
|
1104 |
+ tmp[, , 1] <- homDel |
|
1105 |
+ tmp[, , 2] <- hemDel |
|
1106 |
+ tmp[, , 3] <- norm |
|
1107 |
+ tmp[, , 4] <- amp |
|
1108 |
+ tmp2 <- apply(tmp, c(1, 2), function(x) order(x, decreasing=TRUE)[1]) |
|
1109 |
+ ##Adjust for SNPs that have less than 80% in an altered state |
|
1110 |
+ ##flag the remainder? |
|
1111 |
+ tmp3 <- tmp2 != 3 |
|
1112 |
+ propAlt <- rowMeans(tmp3)##prop normal |
|
1113 |
+ ii <- propAlt < 0.75 |
|
1114 |
+ ##only exclude observations from one tail, depending on |
|
1115 |
+ ##whether more are up or down |
|
1116 |
+ ##(should probably iterate) |
|
1117 |
+ moreup <- rowSums(tmp2 > 3) > rowSums(tmp2 < 3) |
|
1118 |
+ notUp <- tmp2[ii & moreup, ] <= 3 |
|
1119 |
+ notDown <- tmp2[ii & !moreup, ] >= 3 |
|
1120 |
+ |
|
1121 |
+ normal <- matrix(TRUE, nrow(A), ncol(A)) |
|
1122 |
+ normal[ii & moreup, ] <- notUp |
|
1123 |
+ normal[ii & !moreup, ] <- notDown |
|
1124 |
+ flagAltered <- which(propAlt > 0.5) |
|
1125 |
+ assign("flagAltered", flagAltered, envir) |
|
1126 |
+## tmp3 <- tmp2[ii, ] == 3 |
|
1127 |
+## tmp4 <- matrix(TRUE, nrow(A), ncol(A)) |
|
1128 |
+## tmp4[ii, ] <- tmp3 |
|
1129 |
+ return(normal) |
|
1130 |
+} |
... | ... |
@@ -78,24 +78,23 @@ if(!exists("cnrmaResult")){ |
78 | 78 |
@ |
79 | 79 |
|
80 | 80 |
\subsection{Copy number} |
81 |
+<<chr>>= |
|
82 |
+CHR <- 21 |
|
83 |
+@ |
|
81 | 84 |
|
82 | 85 |
Copy number can be assessed one chromosome at a time. Here we specify |
83 |
-chromosome 15 and and load a list of indices to subset the data. The |
|
86 |
+chromosome \Sexpr{CHR} and and load a list of indices to subset the data. The |
|
84 | 87 |
first element in the list correspond to indices of polymorphic probes |
85 |
-on chromosome 15; the second element corresponds to indices of |
|
86 |
-nonpolymorphic probes on chromosome 15. |
|
88 |
+on chromosome \Sexpr{CHR}; the second element corresponds to indices of |
|
89 |
+nonpolymorphic probes on chromosome \Sexpr{21}. |
|
87 | 90 |
|
88 | 91 |
<<chromosomeIndex>>= |
89 |
-CHR <- 21 |
|
90 | 92 |
CHR_INDEX <- paste(CHR, "index", sep="") |
91 | 93 |
data(list=CHR_INDEX, package="genomewidesnp6Crlmm") |
92 | 94 |
str(index) |
93 | 95 |
@ |
94 | 96 |
|
95 |
-Next we load 3 files that were saved from the preprocessing step and |
|
96 |
-then subset these lists using the above indices to extract the |
|
97 |
-preprocessed intensities and genotypes needed for estimating copy |
|
98 |
-number. Specifically, we require 6 items: |
|
97 |
+We require 6 items for copy number estimation: |
|
99 | 98 |
|
100 | 99 |
\begin{itemize} |
101 | 100 |
\item quantile-normalized A intensities (I1 x J) |
... | ... |
@@ -9,7 +9,9 @@ |
9 | 9 |
|
10 | 10 |
} |
11 | 11 |
\usage{ |
12 |
-computeCopynumber(A, B, calls, conf, NP, plate, fit.variance = NULL, MIN.OBS = 3, envir, chrom, P, DF.PRIOR = 50, CONF.THR = 0.99, trim = 0, upperTail = TRUE, ...) |
|
12 |
+computeCopynumber(A, B, calls, conf, NP, plate, fit.variance = NULL, |
|
13 |
+MIN.OBS = 1, envir, chrom, P, DF.PRIOR = 50, CONF.THR = 0.99, trim = 0, |
|
14 |
+upperTail = TRUE, bias.adj=FALSE, priorProb, ...) |
|
13 | 15 |
} |
14 | 16 |
%- maybe also 'usage' for other objects documented here. |
15 | 17 |
\arguments{ |
... | ... |
@@ -34,7 +36,14 @@ computeCopynumber(A, B, calls, conf, NP, plate, fit.variance = NULL, MIN.OBS = 3 |
34 | 36 |
\item{trim}{Temporary option: a number between 0 and 1. Lops off a percentage of one tail of the |
35 | 37 |
intensities for the A and B alleles} |
36 | 38 |
\item{upperTail}{Logical: if TRUE and trim = p, quantiles greater than |
37 |
- 1-p are replaced with NAs} |
|
39 |
+ 1-p are replaced with NAs} |
|
40 |
+ \item{bias.adj}{Logical: whether to adjust the location and scale |
|
41 |
+ parameters to account for biases due to common copy number |
|
42 |
+ variants. This is a SNP-specific adjustment. Parameters for |
|
43 |
+ background and slope must have already been estimated.} |
|
44 |
+ \item{priorProb}{Numerical vector of length 4. The prior probability |
|
45 |
+ of each copy number state (0, 1, 2, 3, and 4). The default is a |
|
46 |
+ uniform prior. Ignored if bias.adj=FALSE} |
|
38 | 47 |
\item{\dots}{Currently ignored} |
39 | 48 |
} |
40 | 49 |
|