...
|
...
|
@@ -196,9 +196,10 @@ instantiateObjects <- function(calls, NP, plate, envir, chrom){
|
196
|
196
|
assign("plates.completed", plates.completed, envir)
|
197
|
197
|
|
198
|
198
|
fit.variance <- NULL
|
199
|
|
- snpflags <- NULL
|
|
199
|
+ npflags <- snpflags <- NULL
|
200
|
200
|
assign("fit.variance", fit.variance, envir=envir)
|
201
|
201
|
assign("snpflags", snpflags, envir=envir)
|
|
202
|
+ assign("npflags", npflags, envir=envir)
|
202
|
203
|
|
203
|
204
|
assign("Ns", Ns, envir=envir)
|
204
|
205
|
assign("uplate", uplate, envir=envir)
|
...
|
...
|
@@ -261,6 +262,8 @@ computeCopynumber <- function(A,
|
261
|
262
|
G=calls[, plate==uplate[p]],
|
262
|
263
|
A=A[, plate==uplate[p]],
|
263
|
264
|
B=B[, plate==uplate[p]],
|
|
265
|
+ conf=conf[, plate==uplate[p]],
|
|
266
|
+ CONF.THR=CONF.THR,
|
264
|
267
|
envir=envir,
|
265
|
268
|
MIN.OBS=MIN.OBS,
|
266
|
269
|
DF.PRIOR=DF.PRIOR,
|
...
|
...
|
@@ -271,7 +274,7 @@ computeCopynumber <- function(A,
|
271
|
274
|
for(p in P){
|
272
|
275
|
cat(".")
|
273
|
276
|
coefs(plateIndex=p, conf=conf[, plate==uplate[p]],
|
274
|
|
- envir=envir, CONF.THR=CONF.THR)
|
|
277
|
+ envir=envir, CONF.THR=CONF.THR, MIN.OBS=MIN.OBS)
|
275
|
278
|
}
|
276
|
279
|
message("\nAllele specific copy number")
|
277
|
280
|
for(p in P){
|
...
|
...
|
@@ -288,6 +291,12 @@ computeCopynumber <- function(A,
|
288
|
291
|
NP=NP[, plate==uplate[p]],
|
289
|
292
|
envir=envir)
|
290
|
293
|
}
|
|
294
|
+## snpflags <- get("snpflags", envir)
|
|
295
|
+## npflags <- get("npflags", envir)
|
|
296
|
+## flags <- sapply(snpflags, length)
|
|
297
|
+## flags.np <- sapply(npflags, length)
|
|
298
|
+## if(any(flags > 0) | any(flags.np > 0))
|
|
299
|
+## warning("some SNPs were flagged -- possible NAs. Check the indices in snpflags and npflags")
|
291
|
300
|
}
|
292
|
301
|
|
293
|
302
|
nonpolymorphic <- function(plateIndex, NP, envir){
|
...
|
...
|
@@ -365,7 +374,13 @@ nonpolymorphic <- function(plateIndex, NP, envir){
|
365
|
374
|
indexA <- which(rowSums(is.na(tmpA)) > 0)
|
366
|
375
|
indexB <- which(rowSums(is.na(tmpB)) > 0)
|
367
|
376
|
index <- union(indexA, indexB)
|
368
|
|
- if(length(index) > 0) browser()
|
|
377
|
+ if(length(index) > 0){
|
|
378
|
+ npflags <- get("npflags", envir)
|
|
379
|
+ ##warning(paste(length(index), "indices have NAs for the copy number estimates"))
|
|
380
|
+ npflags[[p]] <- unique(c(npflags[[p]], index))
|
|
381
|
+ assign("npflags", npflags, envir)
|
|
382
|
+ }
|
|
383
|
+ ##if(length(index) > 0) browser()
|
369
|
384
|
|
370
|
385
|
##---------------------------------------------------------------------------
|
371
|
386
|
## this part could stand improvement
|
...
|
...
|
@@ -399,7 +414,7 @@ nonpolymorphic <- function(plateIndex, NP, envir){
|
399
|
414
|
## firstPass.NP
|
400
|
415
|
}
|
401
|
416
|
|
402
|
|
-oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, upperTail, bias.adj=FALSE, priorProb, ...){
|
|
417
|
+oneBatch <- function(plateIndex, G, A, B, conf, CONF.THR=0.99, MIN.OBS=3, DF.PRIOR, envir, trim, upperTail, bias.adj=FALSE, priorProb, ...){
|
403
|
418
|
p <- plateIndex
|
404
|
419
|
plate <- get("plate", envir)
|
405
|
420
|
AA <- G == 1
|
...
|
...
|
@@ -482,76 +497,70 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
|
482
|
497
|
Ns[, p, "BB"] <- rowSums(!is.na(BB.A))
|
483
|
498
|
}
|
484
|
499
|
}
|
485
|
|
- if(trim > 0){
|
486
|
|
- ##rowMedians is not robust enough when a variant is common
|
487
|
|
- ##Try a trimmed rowMedian, trimming only one tail (must specify which)
|
488
|
|
- ## - for genotypes with 3 or more observations, exclude the upper X%
|
489
|
|
- ## - one way to do this is to replace these observations with NAs
|
490
|
|
- ## e.g, exclude round(Ns * X%, 0) observations
|
491
|
|
- ## - for genotypes with fewer than 10 observations,
|
492
|
|
- ## - recalculate rowMedians
|
493
|
|
- replaceWithNAs <- function(x, trim, upperTail){
|
494
|
|
- ##put NA's last if trimming the upperTail
|
495
|
|
- if(upperTail) decreasing <- TRUE else decreasing <- FALSE
|
496
|
|
- NN <- round(sum(!is.na(x)) * trim, 0)
|
497
|
|
- ix <- order(x, decreasing=decreasing, na.last=TRUE)[1:NN]
|
498
|
|
- x[ix] <- NA
|
499
|
|
- return(x)
|
500
|
|
- }
|
501
|
|
- ##which rows should be trimmed
|
502
|
|
- rowsToTrim <- which(round(Ns[, p, "AA"] * trim, 0) > 0)
|
503
|
|
- ##replace values in the tail of A with NAs
|
504
|
|
- AA.A[rowsToTrim, ] <- t(apply(AA.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
505
|
|
- AA.B[rowsToTrim, ] <- t(apply(AA.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
506
|
|
- rowsToTrim <- which(round(Ns[, p, "AB"] * trim, 0) > 0)
|
507
|
|
- AB.A[rowsToTrim, ] <- t(apply(AB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
508
|
|
- AB.B[rowsToTrim, ] <- t(apply(AB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
509
|
|
- rowsToTrim <- which(round(Ns[, p, "BB"] * trim, 0) > 0)
|
510
|
|
- BB.A[rowsToTrim, ] <- t(apply(BB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
511
|
|
- BB.B[rowsToTrim, ] <- t(apply(BB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
512
|
|
-
|
513
|
|
- ##Should probably recompute the Ns -- change the
|
514
|
|
- ##degrees of freedom
|
515
|
|
- Ns[, p, "AA"] <- rowSums(!is.na(AA.A))
|
516
|
|
- Ns[, p, "AB"] <- rowSums(!is.na(AB.A))
|
517
|
|
- Ns[, p, "BB"] <- rowSums(!is.na(BB.A))
|
518
|
|
- }
|
|
500
|
+## if(trim > 0){
|
|
501
|
+## ##rowMedians is not robust enough when a variant is common
|
|
502
|
+## ##Try a trimmed rowMedian, trimming only one tail (must specify which)
|
|
503
|
+## ## - for genotypes with 3 or more observations, exclude the upper X%
|
|
504
|
+## ## - one way to do this is to replace these observations with NAs
|
|
505
|
+## ## e.g, exclude round(Ns * X%, 0) observations
|
|
506
|
+## ## - for genotypes with fewer than 10 observations,
|
|
507
|
+## ## - recalculate rowMedians
|
|
508
|
+## replaceWithNAs <- function(x, trim, upperTail){
|
|
509
|
+## ##put NA's last if trimming the upperTail
|
|
510
|
+## if(upperTail) decreasing <- TRUE else decreasing <- FALSE
|
|
511
|
+## NN <- round(sum(!is.na(x)) * trim, 0)
|
|
512
|
+## ix <- order(x, decreasing=decreasing, na.last=TRUE)[1:NN]
|
|
513
|
+## x[ix] <- NA
|
|
514
|
+## return(x)
|
|
515
|
+## }
|
|
516
|
+## ##which rows should be trimmed
|
|
517
|
+## rowsToTrim <- which(round(Ns[, p, "AA"] * trim, 0) > 0)
|
|
518
|
+## ##replace values in the tail of A with NAs
|
|
519
|
+## AA.A[rowsToTrim, ] <- t(apply(AA.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
|
520
|
+## AA.B[rowsToTrim, ] <- t(apply(AA.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
|
521
|
+## rowsToTrim <- which(round(Ns[, p, "AB"] * trim, 0) > 0)
|
|
522
|
+## AB.A[rowsToTrim, ] <- t(apply(AB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
|
523
|
+## AB.B[rowsToTrim, ] <- t(apply(AB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
|
524
|
+## rowsToTrim <- which(round(Ns[, p, "BB"] * trim, 0) > 0)
|
|
525
|
+## BB.A[rowsToTrim, ] <- t(apply(BB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
|
526
|
+## BB.B[rowsToTrim, ] <- t(apply(BB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
|
|
527
|
+##
|
|
528
|
+## ##Should probably recompute the Ns -- change the
|
|
529
|
+## ##degrees of freedom
|
|
530
|
+## Ns[, p, "AA"] <- rowSums(!is.na(AA.A))
|
|
531
|
+## Ns[, p, "AB"] <- rowSums(!is.na(AB.A))
|
|
532
|
+## Ns[, p, "BB"] <- rowSums(!is.na(BB.A))
|
|
533
|
+## }
|
519
|
534
|
##---------------------------------------------------------------------------
|
520
|
535
|
## Predict sufficient statistics for unobserved genotypes (plate-specific)
|
521
|
536
|
##---------------------------------------------------------------------------
|
522
|
|
- index.AA <- which(Ns[, p, "AA"] >= 3)
|
523
|
|
- index.AB <- which(Ns[, p, "AB"] >= 3)
|
524
|
|
- index.BB <- which(Ns[, p, "BB"] >= 3)
|
525
|
|
-
|
|
537
|
+ highConf <- 1-exp(-conf/1000)
|
|
538
|
+ highConf <- highConf > CONF.THR
|
|
539
|
+ confInd <- rowMeans(highConf) > CONF.THR
|
|
540
|
+ NN <- Ns
|
|
541
|
+ NN[, p, "AA"] <- rowSums(AA & highConf, na.rm=TRUE) ##how many AA were called with high confidence
|
|
542
|
+ NN[, p, "AB"] <- rowSums(AB & highConf, na.rm=TRUE)
|
|
543
|
+ NN[, p, "BB"] <- rowSums(BB & highConf, na.rm=TRUE)
|
|
544
|
+ index.AA <- which(NN[, p, "AA"] >= 3)
|
|
545
|
+ index.AB <- which(NN[, p, "AB"] >= 3)
|
|
546
|
+ index.BB <- which(NN[, p, "BB"] >= 3)
|
526
|
547
|
correct.orderA <- muA.AA[, p] > muA.BB[, p]
|
527
|
548
|
correct.orderB <- muB.BB[, p] > muB.AA[, p]
|
528
|
549
|
if(length(index.AB) > 0){
|
529
|
|
- nobs <- rowSums(Ns[, p, ] >= MIN.OBS) == 3
|
530
|
|
- } else nobs <- rowSums(Ns[, p, c(1,3)] >= MIN.OBS) == 2
|
|
550
|
+ nobs <- rowSums(NN[, p, ] >= MIN.OBS) == 3
|
|
551
|
+ } else nobs <- rowSums(NN[, p, c(1,3)] >= MIN.OBS) == 2
|
531
|
552
|
index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here
|
532
|
553
|
size <- min(5000, length(index.complete))
|
533
|
554
|
if(size == 5000) index.complete <- sample(index.complete, 5000)
|
534
|
555
|
if(length(index.complete) < 200){
|
535
|
|
- warning("too few snps pass my criteria for predicting the sufficient statistics")
|
536
|
|
- ##message("Plate has too few samples to call many genotypes...can we use prior information to predict")
|
537
|
|
- browser()
|
538
|
|
- }
|
539
|
|
- index.AA <- which(Ns[, p, "AA"] == 0 & Ns[, p, "AB"] >= MIN.OBS)
|
540
|
|
- index.AB <- which(Ns[, p, "AB"] == 0 & (Ns[, p, "AA"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS))
|
541
|
|
- index.BB <- which(Ns[, p, "BB"] == 0 & Ns[, p, "AB"] >= MIN.OBS)
|
542
|
|
- if(FALSE){
|
543
|
|
- png("figures/predictedMusBB%02d.png", width=800, height=500)
|
544
|
|
- par(mfrow=c(1, 2), las=1, mar=c(3, 1, 1, 1), oma=c(1, 2, 1, 1), pty="s")
|
545
|
|
- plot(log(muA.BB[index.complete, 1]), log(muB.BB[index.complete, 1]), pch=".", ylim=c(5,12), xlim=c(5,12),
|
546
|
|
- xlab="A", ylab="B", main="good")
|
547
|
|
- points(log(muA.AB[index.complete, 1]), log(muB.AB[index.complete, 1]), pch=".")
|
548
|
|
- points(log(muA.AA[index.complete, 1]), log(muB.AA[index.complete, 1]), pch=".")
|
549
|
|
- plot(log(muA.BB[index.BB, 1]), log(muB.BB[index.BB, 1]), pch=".", ylim=c(5,12), xlim=c(5,12),
|
550
|
|
- xlab="A", ylab="B", main="missing BB")
|
551
|
|
- points(log(muA.AB[index.BB, 1]), log(muB.AB[index.BB, 1]), pch=".")
|
552
|
|
- points(log(muA.AA[index.BB, 1]), log(muB.AA[index.BB, 1]), pch=".")
|
|
556
|
+ warning("fewer than 200 snps pass criteria for predicting the sufficient statistics")
|
|
557
|
+ stop()
|
553
|
558
|
}
|
|
559
|
+ index.AA <- which(NN[, p, "AA"] == 0 & (NN[, p, "AB"] >= MIN.OBS & NN[, p, "BB"] >= MIN.OBS))
|
|
560
|
+ index.AB <- which(NN[, p, "AB"] == 0 & (NN[, p, "AA"] >= MIN.OBS & NN[, p, "BB"] >= MIN.OBS))
|
|
561
|
+ index.BB <- which(NN[, p, "BB"] == 0 & (NN[, p, "AB"] >= MIN.OBS & NN[, p, "AA"] >= MIN.OBS))
|
554
|
562
|
if(length(index.AA) > 0){
|
|
563
|
+ ##Predict mean for AA genotypes
|
555
|
564
|
X.AA <- cbind(1, muA.AB[index.complete, p], muA.BB[index.complete, p],
|
556
|
565
|
muB.AB[index.complete, p], muB.BB[index.complete, p])
|
557
|
566
|
Y <- cbind(muA.AA[index.complete, p], muB.AA[index.complete, p])
|
...
|
...
|
@@ -565,6 +574,7 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
|
565
|
574
|
muB.AA[index.AA, p] <- musAA[, 2]
|
566
|
575
|
}
|
567
|
576
|
if(length(index.AB) > 0){
|
|
577
|
+ ##Predict mean for AB genotypes
|
568
|
578
|
X.AB <- cbind(1, muA.AA[index.complete, p], muA.BB[index.complete, p],
|
569
|
579
|
muB.AA[index.complete, p], muB.BB[index.complete, p])
|
570
|
580
|
Y <- cbind(muA.AB[index.complete, p], muB.AB[index.complete, p])
|
...
|
...
|
@@ -578,6 +588,7 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
|
578
|
588
|
muB.AB[index.AB, p] <- musAB[, 2]
|
579
|
589
|
}
|
580
|
590
|
if(length(index.BB) > 0){
|
|
591
|
+ ##Predict mean for BB genotypes
|
581
|
592
|
X.BB <- cbind(1, muA.AA[index.complete, p], muA.AB[index.complete, p],
|
582
|
593
|
muB.AA[index.complete, p], muB.AB[index.complete, p])
|
583
|
594
|
Y <- cbind(muA.BB[index.complete, p], muB.BB[index.complete, p])
|
...
|
...
|
@@ -590,14 +601,13 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
|
590
|
601
|
muA.BB[index.BB, p] <- musBB[, 1]
|
591
|
602
|
muB.BB[index.BB, p] <- musBB[, 2]
|
592
|
603
|
}
|
593
|
|
- if(FALSE){
|
594
|
|
- points(log(muA.BB[index.BB, 1]), log(muB.BB[index.BB, 1]), pch=".", col="blue")
|
595
|
|
- dev.off()
|
596
|
|
- }
|
597
|
604
|
##missing two genotypes
|
598
|
|
- noAA <- Ns[, p, "AA"] < MIN.OBS
|
599
|
|
- noAB <- Ns[, p, "AB"] < MIN.OBS
|
600
|
|
- noBB <- Ns[, p, "BB"] < MIN.OBS
|
|
605
|
+# index.AA <- which(NN[, p, "AA"] == 0 & (NN[, p, "AB"] < MIN.OBS | NN[, p, "BB"] < MIN.OBS))
|
|
606
|
+# index.AB <- which(NN[, p, "AB"] == 0 & (NN[, p, "AA"] < MIN.OBS | NN[, p, "BB"] < MIN.OBS))
|
|
607
|
+# index.BB <- which(NN[, p, "BB"] == 0 & (NN[, p, "AB"] >= MIN.OBS | NN[, p, "AA"] >= MIN.OBS))
|
|
608
|
+ noAA <- NN[, p, "AA"] < MIN.OBS
|
|
609
|
+ noAB <- NN[, p, "AB"] < MIN.OBS
|
|
610
|
+ noBB <- NN[, p, "BB"] < MIN.OBS
|
601
|
611
|
##---------------------------------------------------------------------------
|
602
|
612
|
## Two genotype clusters not observed -- would sequence help? (didn't seem that helpful)
|
603
|
613
|
## 1 extract index of complete data
|
...
|
...
|
@@ -605,8 +615,7 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
|
605
|
615
|
## 3 Predict mu1*, mu3* for missing genotypes
|
606
|
616
|
##---------------------------------------------------------------------------
|
607
|
617
|
if(sum(noAA & noAB) > 0){
|
608
|
|
- ##predict other cluster centers using only the observed genotypes
|
609
|
|
- ##predict AA:
|
|
618
|
+ ##predict AA and AB centers
|
610
|
619
|
X <- cbind(1, muA.BB[index.complete, p], muB.BB[index.complete, p])
|
611
|
620
|
Y <- cbind(muA.AA[index.complete, p],
|
612
|
621
|
muB.AA[index.complete, p],
|
...
|
...
|
@@ -622,16 +631,8 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
|
622
|
631
|
muA.AB[noAA & noAB, p] <- mus[, 3]
|
623
|
632
|
muB.AB[noAA & noAB, p] <- mus[, 4]
|
624
|
633
|
}
|
625
|
|
- if(FALSE){
|
626
|
|
- i <- which(noAA & noAB)
|
627
|
|
- par(mfrow=c(1, 1), las=1, mar=c(3, 3, 1, 1), pty="s")
|
628
|
|
- plot(log(muA.BB[i, 1]), log(muB.BB[i, 1]), pch=".", ylim=c(5,12), xlim=c(5,12),
|
629
|
|
- xlab="A", ylab="B", main="missing BB")
|
630
|
|
- points(log(muA.AB[i, 1]), log(muB.AB[i, 1]), pch=".", col="blue")
|
631
|
|
- points(log(muA.AA[i, 1]), log(muB.AA[i, 1]), pch=".", col="red")
|
632
|
|
- }
|
633
|
634
|
if(sum(noBB & noAB) > 0){
|
634
|
|
- ##predict other cluster centers using only the observed genotypes
|
|
635
|
+ ##predict AB and BB centers
|
635
|
636
|
X <- cbind(1, muA.AA[index.complete, p], muB.AA[index.complete, p])
|
636
|
637
|
Y <- cbind(muA.AB[index.complete, p],
|
637
|
638
|
muB.AB[index.complete, p],
|
...
|
...
|
@@ -646,18 +647,29 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
|
646
|
647
|
muB.AB[noBB & noAB, p] <- mus[, 2]
|
647
|
648
|
muA.BB[noBB & noAB, p] <- mus[, 3]
|
648
|
649
|
muB.BB[noBB & noAB, p] <- mus[, 4]
|
649
|
|
- if(FALSE){
|
650
|
|
- i <- which(noBB & noAB)
|
651
|
|
- par(mfrow=c(1, 1), las=1, mar=c(3, 3, 1, 1), pty="s")
|
652
|
|
- plot(log(muA.BB[i, 1]), log(muB.BB[i, 1]), pch=".", ylim=c(5,12), xlim=c(5,12),
|
653
|
|
- xlab="A", ylab="B", main="missing BB")
|
654
|
|
- points(log(muA.AB[i, 1]), log(muB.AB[i, 1]), pch=".", col="blue")
|
655
|
|
- points(log(muA.AA[i, 1]), log(muB.AA[i, 1]), pch=".", col="red")
|
656
|
|
- }
|
|
650
|
+ }
|
|
651
|
+ if(sum(noAA & noBB) > 0){
|
|
652
|
+ ##predict AA and BB centers
|
|
653
|
+ X <- cbind(1, muA.AB[index.complete, p], muB.AB[index.complete, p])
|
|
654
|
+ Y <- cbind(muA.AA[index.complete, p],
|
|
655
|
+ muB.AA[index.complete, p],
|
|
656
|
+ muA.BB[index.complete, p], #muA.AB[index.complete, p],
|
|
657
|
+ muB.BB[index.complete, p])#, muB.AB[index.complete, p])
|
|
658
|
+ XtY <- crossprod(X, Y)
|
|
659
|
+ betahat <- solve(crossprod(X), XtY)
|
|
660
|
+ X <- cbind(1, muA.AB[noAA & noBB, p], muB.AB[noAA & noBB, p])
|
|
661
|
+ mus <- X %*% betahat
|
|
662
|
+ mus[mus <= 100] <- 100
|
|
663
|
+ muA.AA[noAA & noBB, p] <- mus[, 1]
|
|
664
|
+ muB.AA[noAA & noBB, p] <- mus[, 2]
|
|
665
|
+ muA.BB[noAA & noBB, p] <- mus[, 3]
|
|
666
|
+ muB.BB[noAA & noBB, p] <- mus[, 4]
|
657
|
667
|
}
|
658
|
668
|
dn.Ns <- dimnames(Ns)
|
659
|
669
|
Ns <- array(as.integer(Ns), dim=dim(Ns))
|
660
|
670
|
dimnames(Ns)[[3]] <- dn.Ns[[3]]
|
|
671
|
+ if(any(is.na(muA.AA) | any(is.na(muA.AB)) | any(is.na(muA.BB))))
|
|
672
|
+ warning("Some SNPs do not have any genotype calls above the confidence threshold. Check the indices in snpflags and npflags.")
|
661
|
673
|
assign("muA.AA", muA.AA, envir)
|
662
|
674
|
assign("muA.AB", muA.AB, envir)
|
663
|
675
|
assign("muA.BB", muA.BB, envir)
|
...
|
...
|
@@ -842,14 +854,24 @@ coefs <- function(plateIndex, conf, MIN.OBS=3, envir, CONF.THR=0.99){
|
842
|
854
|
IA <- cbind(muA.AA[, p], muA.AB[, p], muA.BB[, p])
|
843
|
855
|
IB <- cbind(muB.AA[, p], muB.AB[, p], muB.BB[, p])
|
844
|
856
|
NOHET <- mean(Ns[, p, 2], na.rm=TRUE) < 0.05
|
|
857
|
+ ##---------------------------------------------------------------------------
|
845
|
858
|
##predict missing variance (do only once)
|
846
|
|
- is.complete <- rowSums(Ns[, p, ] >= 1) == 3
|
847
|
|
- if(NOHET) is.complete <- rowSums(Ns[, p, c(1,3)] >= MIN.OBS) == 2
|
848
|
|
- correct.orderA <- muA.AA[, p] > muA.BB[, p]
|
849
|
|
- correct.orderB <- muB.BB[, p] > muB.AA[, p]
|
850
|
|
- highConf <- 1-exp(-conf/1000)
|
851
|
|
- confInd <- rowMeans(highConf) > CONF.THR
|
852
|
|
- keep <- confInd & is.complete & correct.orderA & correct.orderB
|
|
859
|
+ ##---------------------------------------------------------------------------
|
|
860
|
+ ##should consider replacing Ns with Ns & high conf
|
|
861
|
+## highConf <- 1-exp(-conf/1000)
|
|
862
|
+## highConf <- highConf > CONF.THR
|
|
863
|
+## NN <- Ns
|
|
864
|
+## NN[, p, "AA"] <- rowSums(AA & highConf, na.rm=TRUE) ##how many AA were called with high confidence
|
|
865
|
+## NN[, p, "AB"] <- rowSums(AB & highConf, na.rm=TRUE)
|
|
866
|
+## NN[, p, "BB"] <- rowSums(BB & highConf, na.rm=TRUE)
|
|
867
|
+
|
|
868
|
+## is.complete <- rowSums(Ns[, p, ] >= 3) == 3 ##Be selective
|
|
869
|
+## if(NOHET) is.complete <- rowSums(NN[, p, c(1,3)] >= 3) == 2
|
|
870
|
+## correct.orderA <- muA.AA[, p] > muA.BB[, p]
|
|
871
|
+## correct.orderB <- muB.BB[, p] > muB.AA[, p]
|
|
872
|
+## highConf <- 1-exp(-conf/1000)
|
|
873
|
+## confInd <- rowMeans(highConf) > CONF.THR
|
|
874
|
+## keep <- confInd & is.complete & correct.orderA & correct.orderB
|
853
|
875
|
## if(is.null(fit.variance)){
|
854
|
876
|
## log.sigma2 <- as.numeric(log2(sigma2A[keep, ]))
|
855
|
877
|
## log.mus <- as.numeric(log2(IA[keep, ]))
|
...
|
...
|
@@ -990,7 +1012,12 @@ polymorphic <- function(plateIndex, A, B, envir){
|
990
|
1012
|
indexA <- which(rowSums(is.na(tmpA)) > 0)
|
991
|
1013
|
indexB <- which(rowSums(is.na(tmpB)) > 0)
|
992
|
1014
|
index <- union(indexA, indexB)
|
993
|
|
- if(length(index) > 0) browser()
|
|
1015
|
+ if(length(index) > 0){
|
|
1016
|
+ snpflags <- get("snpflags", envir)
|
|
1017
|
+ ##warning(paste(length(index), "indices have NAs for the copy number estimates"))
|
|
1018
|
+ snpflags[[p]] <- unique(c(snpflags[[p]], index))
|
|
1019
|
+ assign("snpflags", snpflags, envir)
|
|
1020
|
+ }
|
994
|
1021
|
##---------------------------------------------------------------------------
|
995
|
1022
|
## Estimate var(CA), var(CB), var(CA+CB)
|
996
|
1023
|
##---------------------------------------------------------------------------
|