Browse code

require that the confidence scores exceed a threshold for computing within genotype means. SNPs not meeting this threshold are flagged and a warning message is produced

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@37806 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 07/03/2009 11:31:53
Showing 3 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling via CRLMM Algorithm
4
-Version: 1.0.51
4
+Version: 1.0.52
5 5
 Date: 2008-12-28
6 6
 Author: Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>
... ...
@@ -8,5 +8,5 @@ importFrom(preprocessCore, normalize.quantiles.use.target)
8 8
 importFrom(utils, data, packageDescription, setTxtProgressBar, txtProgressBar)
9 9
 importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, sd)
10 10
 importFrom(genefilter, rowSds)
11
-importFrom(mvtnorm, dmvnorm)
11
+##importFrom(mvtnorm, dmvnorm)
12 12
 
... ...
@@ -196,9 +196,10 @@ instantiateObjects <- function(calls, NP, plate, envir, chrom){
196 196
 	assign("plates.completed", plates.completed, envir)
197 197
 	
198 198
 	fit.variance <- NULL
199
-	snpflags <- NULL
199
+	npflags <- snpflags <- NULL
200 200
 	assign("fit.variance", fit.variance, envir=envir)
201 201
 	assign("snpflags", snpflags, envir=envir)
202
+	assign("npflags", npflags, envir=envir)
202 203
 	
203 204
 	assign("Ns", Ns, envir=envir)		
204 205
 	assign("uplate", uplate, envir=envir)	
... ...
@@ -261,6 +262,8 @@ computeCopynumber <- function(A,
261 262
 			 G=calls[, plate==uplate[p]],
262 263
 			 A=A[, plate==uplate[p]],
263 264
 			 B=B[, plate==uplate[p]],
265
+			 conf=conf[, plate==uplate[p]],
266
+			 CONF.THR=CONF.THR,
264 267
 			 envir=envir,
265 268
 			 MIN.OBS=MIN.OBS,
266 269
 			 DF.PRIOR=DF.PRIOR,
... ...
@@ -271,7 +274,7 @@ computeCopynumber <- function(A,
271 274
 	for(p in P){
272 275
 		cat(".")
273 276
 		coefs(plateIndex=p, conf=conf[, plate==uplate[p]],
274
-		      envir=envir, CONF.THR=CONF.THR)
277
+		      envir=envir, CONF.THR=CONF.THR, MIN.OBS=MIN.OBS)
275 278
 	}
276 279
 	message("\nAllele specific copy number")	
277 280
 	for(p in P){
... ...
@@ -288,6 +291,12 @@ computeCopynumber <- function(A,
288 291
 			       NP=NP[, plate==uplate[p]],
289 292
 			       envir=envir)
290 293
 	}
294
+##	snpflags <- get("snpflags", envir)
295
+##	npflags <- get("npflags", envir)
296
+##	flags <- sapply(snpflags, length)
297
+##	flags.np <- sapply(npflags, length)
298
+##	if(any(flags > 0) | any(flags.np > 0))
299
+##		warning("some SNPs were flagged -- possible NAs.  Check the indices in snpflags and npflags")
291 300
 }
292 301
 
293 302
 nonpolymorphic <- function(plateIndex, NP, envir){
... ...
@@ -365,7 +374,13 @@ nonpolymorphic <- function(plateIndex, NP, envir){
365 374
 	indexA <- which(rowSums(is.na(tmpA)) > 0)
366 375
 	indexB <- which(rowSums(is.na(tmpB)) > 0)
367 376
 	index <- union(indexA, indexB)
368
-	if(length(index) > 0) browser()
377
+	if(length(index) > 0){
378
+		npflags <- get("npflags", envir)
379
+		##warning(paste(length(index), "indices have NAs for the copy number estimates"))		
380
+		npflags[[p]] <- unique(c(npflags[[p]], index))
381
+		assign("npflags", npflags, envir)
382
+	}
383
+	##if(length(index) > 0) browser()
369 384
 	
370 385
 	##---------------------------------------------------------------------------
371 386
 	## this part could stand improvement
... ...
@@ -399,7 +414,7 @@ nonpolymorphic <- function(plateIndex, NP, envir){
399 414
 ##	firstPass.NP
400 415
 }
401 416
 
402
-oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, upperTail, bias.adj=FALSE, priorProb, ...){
417
+oneBatch <- function(plateIndex, G, A, B, conf, CONF.THR=0.99, MIN.OBS=3, DF.PRIOR, envir, trim, upperTail, bias.adj=FALSE, priorProb, ...){
403 418
 	p <- plateIndex
404 419
 	plate <- get("plate", envir)
405 420
 	AA <- G == 1
... ...
@@ -482,76 +497,70 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
482 497
 			Ns[, p, "BB"] <- rowSums(!is.na(BB.A))			
483 498
 		}
484 499
 	}
485
-	if(trim > 0){
486
-		##rowMedians is not robust enough when a variant is common
487
-		##Try a trimmed rowMedian, trimming only one tail (must specify which)
488
-		##  - for genotypes with 3 or more observations, exclude the upper X% 
489
-		##        - one way to do this is to replace these observations with NAs
490
-		##         e.g, exclude round(Ns * X%, 0) observations
491
-		##  - for genotypes with fewer than 10 observations,
492
-		##  - recalculate rowMedians
493
-		replaceWithNAs <- function(x, trim, upperTail){
494
-			##put NA's last if trimming the upperTail
495
-			if(upperTail) decreasing <- TRUE else decreasing <- FALSE
496
-			NN <- round(sum(!is.na(x)) * trim, 0)
497
-			ix <- order(x, decreasing=decreasing, na.last=TRUE)[1:NN]
498
-			x[ix] <- NA
499
-			return(x)
500
-		}
501
-		##which rows should be trimmed
502
-		rowsToTrim <- which(round(Ns[, p, "AA"] * trim, 0) > 0)
503
-		##replace values in the tail of A with NAs
504
-		AA.A[rowsToTrim, ] <- t(apply(AA.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
505
-		AA.B[rowsToTrim, ] <- t(apply(AA.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
506
-		rowsToTrim <- which(round(Ns[, p, "AB"] * trim, 0) > 0)
507
-		AB.A[rowsToTrim, ] <- t(apply(AB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
508
-		AB.B[rowsToTrim, ] <- t(apply(AB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
509
-		rowsToTrim <- which(round(Ns[, p, "BB"] * trim, 0) > 0)
510
-		BB.A[rowsToTrim, ] <- t(apply(BB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
511
-		BB.B[rowsToTrim, ] <- t(apply(BB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
512
-
513
-		##Should probably recompute the Ns -- change the
514
-		##degrees of freedom
515
-		Ns[, p, "AA"] <- rowSums(!is.na(AA.A))
516
-		Ns[, p, "AB"] <- rowSums(!is.na(AB.A))
517
-		Ns[, p, "BB"] <- rowSums(!is.na(BB.A))		
518
-	}
500
+##	if(trim > 0){
501
+##		##rowMedians is not robust enough when a variant is common
502
+##		##Try a trimmed rowMedian, trimming only one tail (must specify which)
503
+##		##  - for genotypes with 3 or more observations, exclude the upper X% 
504
+##		##        - one way to do this is to replace these observations with NAs
505
+##		##         e.g, exclude round(Ns * X%, 0) observations
506
+##		##  - for genotypes with fewer than 10 observations,
507
+##		##  - recalculate rowMedians
508
+##		replaceWithNAs <- function(x, trim, upperTail){
509
+##			##put NA's last if trimming the upperTail
510
+##			if(upperTail) decreasing <- TRUE else decreasing <- FALSE
511
+##			NN <- round(sum(!is.na(x)) * trim, 0)
512
+##			ix <- order(x, decreasing=decreasing, na.last=TRUE)[1:NN]
513
+##			x[ix] <- NA
514
+##			return(x)
515
+##		}
516
+##		##which rows should be trimmed
517
+##		rowsToTrim <- which(round(Ns[, p, "AA"] * trim, 0) > 0)
518
+##		##replace values in the tail of A with NAs
519
+##		AA.A[rowsToTrim, ] <- t(apply(AA.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
520
+##		AA.B[rowsToTrim, ] <- t(apply(AA.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
521
+##		rowsToTrim <- which(round(Ns[, p, "AB"] * trim, 0) > 0)
522
+##		AB.A[rowsToTrim, ] <- t(apply(AB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
523
+##		AB.B[rowsToTrim, ] <- t(apply(AB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
524
+##		rowsToTrim <- which(round(Ns[, p, "BB"] * trim, 0) > 0)
525
+##		BB.A[rowsToTrim, ] <- t(apply(BB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
526
+##		BB.B[rowsToTrim, ] <- t(apply(BB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
527
+##
528
+##		##Should probably recompute the Ns -- change the
529
+##		##degrees of freedom
530
+##		Ns[, p, "AA"] <- rowSums(!is.na(AA.A))
531
+##		Ns[, p, "AB"] <- rowSums(!is.na(AB.A))
532
+##		Ns[, p, "BB"] <- rowSums(!is.na(BB.A))		
533
+##	}
519 534
 	##---------------------------------------------------------------------------
520 535
 	## Predict sufficient statistics for unobserved genotypes (plate-specific)
521 536
 	##---------------------------------------------------------------------------
522
-	index.AA <- which(Ns[, p, "AA"] >= 3)
523
-	index.AB <- which(Ns[, p, "AB"] >= 3)
524
-	index.BB <- which(Ns[, p, "BB"] >= 3)
525
-	
537
+	highConf <- 1-exp(-conf/1000)
538
+	highConf <- highConf > CONF.THR
539
+	confInd <- rowMeans(highConf) > CONF.THR
540
+	NN <- Ns
541
+	NN[, p, "AA"] <- rowSums(AA & highConf, na.rm=TRUE) ##how many AA were called with high confidence
542
+	NN[, p, "AB"] <- rowSums(AB & highConf, na.rm=TRUE)
543
+	NN[, p, "BB"] <- rowSums(BB & highConf, na.rm=TRUE)
544
+	index.AA <- which(NN[, p, "AA"] >= 3)
545
+	index.AB <- which(NN[, p, "AB"] >= 3)
546
+	index.BB <- which(NN[, p, "BB"] >= 3)
526 547
 	correct.orderA <- muA.AA[, p] > muA.BB[, p]
527 548
 	correct.orderB <- muB.BB[, p] > muB.AA[, p]
528 549
 	if(length(index.AB) > 0){
529
-		nobs <- rowSums(Ns[, p, ] >= MIN.OBS) == 3
530
-	} else nobs <- rowSums(Ns[, p, c(1,3)] >= MIN.OBS) == 2
550
+		nobs <- rowSums(NN[, p, ] >= MIN.OBS) == 3
551
+	} else nobs <- rowSums(NN[, p, c(1,3)] >= MIN.OBS) == 2
531 552
 	index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here
532 553
 	size <- min(5000, length(index.complete))
533 554
 	if(size == 5000) index.complete <- sample(index.complete, 5000)
534 555
 	if(length(index.complete) < 200){
535
-		warning("too few snps pass my criteria for predicting the sufficient statistics")
536
-		##message("Plate has too few samples to call many genotypes...can we use prior information to predict")
537
-		browser()
538
-	}
539
-	index.AA <- which(Ns[, p, "AA"] == 0 & Ns[, p, "AB"] >= MIN.OBS)
540
-	index.AB <- which(Ns[, p, "AB"] == 0 & (Ns[, p, "AA"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS))
541
-	index.BB <- which(Ns[, p, "BB"] == 0 & Ns[, p, "AB"] >= MIN.OBS)
542
-	if(FALSE){
543
-		png("figures/predictedMusBB%02d.png", width=800, height=500)
544
-		par(mfrow=c(1, 2), las=1, mar=c(3, 1, 1, 1), oma=c(1, 2, 1, 1), pty="s")				
545
-		plot(log(muA.BB[index.complete, 1]), log(muB.BB[index.complete, 1]), pch=".", ylim=c(5,12), xlim=c(5,12),
546
-		     xlab="A", ylab="B", main="good")
547
-		points(log(muA.AB[index.complete, 1]), log(muB.AB[index.complete, 1]), pch=".")
548
-		points(log(muA.AA[index.complete, 1]), log(muB.AA[index.complete, 1]), pch=".")
549
-		plot(log(muA.BB[index.BB, 1]), log(muB.BB[index.BB, 1]), pch=".", ylim=c(5,12), xlim=c(5,12),
550
-		     xlab="A", ylab="B", main="missing BB")
551
-		points(log(muA.AB[index.BB, 1]), log(muB.AB[index.BB, 1]), pch=".")
552
-		points(log(muA.AA[index.BB, 1]), log(muB.AA[index.BB, 1]), pch=".")				
556
+		warning("fewer than 200 snps pass criteria for predicting the sufficient statistics")
557
+		stop()
553 558
 	}
559
+	index.AA <- which(NN[, p, "AA"] == 0 & (NN[, p, "AB"] >= MIN.OBS & NN[, p, "BB"] >= MIN.OBS))
560
+	index.AB <- which(NN[, p, "AB"] == 0 & (NN[, p, "AA"] >= MIN.OBS & NN[, p, "BB"] >= MIN.OBS))
561
+	index.BB <- which(NN[, p, "BB"] == 0 & (NN[, p, "AB"] >= MIN.OBS & NN[, p, "AA"] >= MIN.OBS))
554 562
 	if(length(index.AA) > 0){
563
+		##Predict mean for AA genotypes
555 564
 		X.AA <- cbind(1, muA.AB[index.complete, p], muA.BB[index.complete, p],
556 565
 			      muB.AB[index.complete, p], muB.BB[index.complete, p])
557 566
 		Y <- cbind(muA.AA[index.complete, p], muB.AA[index.complete, p])
... ...
@@ -565,6 +574,7 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
565 574
 		muB.AA[index.AA, p] <- musAA[, 2]
566 575
 	}
567 576
 	if(length(index.AB) > 0){
577
+		##Predict mean for AB genotypes
568 578
 		X.AB <- cbind(1, muA.AA[index.complete, p], muA.BB[index.complete, p],
569 579
 			      muB.AA[index.complete, p], muB.BB[index.complete, p])
570 580
 		Y <- cbind(muA.AB[index.complete, p], muB.AB[index.complete, p])
... ...
@@ -578,6 +588,7 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
578 588
 		muB.AB[index.AB, p] <- musAB[, 2]				
579 589
 	}
580 590
 	if(length(index.BB) > 0){
591
+		##Predict mean for BB genotypes
581 592
 		X.BB <- cbind(1, muA.AA[index.complete, p], muA.AB[index.complete, p],
582 593
 			      muB.AA[index.complete, p], muB.AB[index.complete, p])
583 594
 		Y <- cbind(muA.BB[index.complete, p], muB.BB[index.complete, p])
... ...
@@ -590,14 +601,13 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
590 601
 		muA.BB[index.BB, p] <- musBB[, 1]
591 602
 		muB.BB[index.BB, p] <- musBB[, 2]								
592 603
 	}
593
-	if(FALSE){
594
-		points(log(muA.BB[index.BB, 1]), log(muB.BB[index.BB, 1]), pch=".", col="blue")
595
-		dev.off()
596
-	}
597 604
 	##missing two genotypes
598
-	noAA <- Ns[, p, "AA"] < MIN.OBS
599
-	noAB <- Ns[, p, "AB"] < MIN.OBS
600
-	noBB <- Ns[, p, "BB"] < MIN.OBS
605
+#	index.AA <- which(NN[, p, "AA"] == 0 & (NN[, p, "AB"] < MIN.OBS | NN[, p, "BB"] < MIN.OBS))
606
+#	index.AB <- which(NN[, p, "AB"] == 0 & (NN[, p, "AA"] < MIN.OBS | NN[, p, "BB"] < MIN.OBS))
607
+#	index.BB <- which(NN[, p, "BB"] == 0 & (NN[, p, "AB"] >= MIN.OBS | NN[, p, "AA"] >= MIN.OBS))	
608
+	noAA <- NN[, p, "AA"] < MIN.OBS
609
+	noAB <- NN[, p, "AB"] < MIN.OBS
610
+	noBB <- NN[, p, "BB"] < MIN.OBS
601 611
 	##---------------------------------------------------------------------------
602 612
 	## Two genotype clusters not observed -- would sequence help? (didn't seem that helpful)
603 613
 	## 1 extract index of complete data
... ...
@@ -605,8 +615,7 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
605 615
 	## 3 Predict mu1*, mu3* for missing genotypes
606 616
 	##---------------------------------------------------------------------------			
607 617
 	if(sum(noAA & noAB) > 0){
608
-		##predict other cluster centers using only the observed genotypes
609
-		##predict AA:
618
+		##predict AA and AB centers
610 619
 		X <- cbind(1, muA.BB[index.complete, p], muB.BB[index.complete, p])
611 620
 		Y <- cbind(muA.AA[index.complete, p],
612 621
 			   muB.AA[index.complete, p],
... ...
@@ -622,16 +631,8 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
622 631
 		muA.AB[noAA & noAB, p] <- mus[, 3]
623 632
 		muB.AB[noAA & noAB, p] <- mus[, 4]				
624 633
 	}
625
-	if(FALSE){
626
-		i <- which(noAA & noAB)
627
-		par(mfrow=c(1, 1), las=1, mar=c(3, 3, 1, 1), pty="s")
628
-		plot(log(muA.BB[i, 1]), log(muB.BB[i, 1]), pch=".", ylim=c(5,12), xlim=c(5,12),
629
-		     xlab="A", ylab="B", main="missing BB")
630
-		points(log(muA.AB[i, 1]), log(muB.AB[i, 1]), pch=".", col="blue")
631
-		points(log(muA.AA[i, 1]), log(muB.AA[i, 1]), pch=".", col="red")
632
-	}
633 634
 	if(sum(noBB & noAB) > 0){
634
-		##predict other cluster centers using only the observed genotypes
635
+		##predict AB and BB centers
635 636
 		X <- cbind(1, muA.AA[index.complete, p], muB.AA[index.complete, p])
636 637
 		Y <- cbind(muA.AB[index.complete, p],
637 638
 			   muB.AB[index.complete, p],
... ...
@@ -646,18 +647,29 @@ oneBatch <- function(plateIndex, G, A, B, MIN.OBS=3, DF.PRIOR, envir, trim, uppe
646 647
 		muB.AB[noBB & noAB, p] <- mus[, 2]								
647 648
 		muA.BB[noBB & noAB, p] <- mus[, 3]
648 649
 		muB.BB[noBB & noAB, p] <- mus[, 4]				
649
-		if(FALSE){
650
-			i <- which(noBB & noAB)
651
-			par(mfrow=c(1, 1), las=1, mar=c(3, 3, 1, 1), pty="s")
652
-			plot(log(muA.BB[i, 1]), log(muB.BB[i, 1]), pch=".", ylim=c(5,12), xlim=c(5,12),
653
-			     xlab="A", ylab="B", main="missing BB")
654
-			points(log(muA.AB[i, 1]), log(muB.AB[i, 1]), pch=".", col="blue")
655
-			points(log(muA.AA[i, 1]), log(muB.AA[i, 1]), pch=".", col="red")
656
-		}				
650
+	}
651
+	if(sum(noAA & noBB) > 0){
652
+		##predict AA and BB centers
653
+		X <- cbind(1, muA.AB[index.complete, p], muB.AB[index.complete, p])
654
+		Y <- cbind(muA.AA[index.complete, p],
655
+			   muB.AA[index.complete, p],
656
+			   muA.BB[index.complete, p], #muA.AB[index.complete, p],
657
+			   muB.BB[index.complete, p])#, muB.AB[index.complete, p])
658
+		XtY <- crossprod(X, Y)
659
+		betahat <- solve(crossprod(X), XtY)
660
+		X <- cbind(1, muA.AB[noAA & noBB, p], muB.AB[noAA & noBB, p])
661
+		mus <- X %*% betahat
662
+		mus[mus <= 100] <- 100
663
+		muA.AA[noAA & noBB, p] <- mus[, 1]
664
+		muB.AA[noAA & noBB, p] <- mus[, 2]								
665
+		muA.BB[noAA & noBB, p] <- mus[, 3]
666
+		muB.BB[noAA & noBB, p] <- mus[, 4]				
657 667
 	}
658 668
 	dn.Ns <- dimnames(Ns)
659 669
 	Ns <- array(as.integer(Ns), dim=dim(Ns))
660 670
 	dimnames(Ns)[[3]] <- dn.Ns[[3]]
671
+	if(any(is.na(muA.AA) | any(is.na(muA.AB)) | any(is.na(muA.BB))))
672
+		warning("Some SNPs do not have any genotype calls above the confidence threshold. Check the indices in snpflags and npflags.")
661 673
 	assign("muA.AA", muA.AA, envir)
662 674
 	assign("muA.AB", muA.AB, envir)
663 675
 	assign("muA.BB", muA.BB, envir)
... ...
@@ -842,14 +854,24 @@ coefs <- function(plateIndex, conf, MIN.OBS=3, envir, CONF.THR=0.99){
842 854
 	IA <- cbind(muA.AA[, p], muA.AB[, p], muA.BB[, p])
843 855
 	IB <- cbind(muB.AA[, p], muB.AB[, p], muB.BB[, p])
844 856
 	NOHET <- mean(Ns[, p, 2], na.rm=TRUE) < 0.05
857
+	##---------------------------------------------------------------------------
845 858
 	##predict missing variance (do only once)
846
-	is.complete <- rowSums(Ns[, p, ] >= 1) == 3
847
-	if(NOHET) is.complete <- rowSums(Ns[, p, c(1,3)] >= MIN.OBS) == 2		
848
-	correct.orderA <- muA.AA[, p] > muA.BB[, p]
849
-	correct.orderB <- muB.BB[, p] > muB.AA[, p]
850
-	highConf <- 1-exp(-conf/1000)
851
-	confInd <- rowMeans(highConf) > CONF.THR
852
-	keep <- confInd & is.complete & correct.orderA & correct.orderB
859
+	##---------------------------------------------------------------------------
860
+	##should consider replacing Ns with Ns & high conf
861
+##	highConf <- 1-exp(-conf/1000)
862
+##	highConf <- highConf > CONF.THR
863
+##	NN <- Ns
864
+##	NN[, p, "AA"] <- rowSums(AA & highConf, na.rm=TRUE) ##how many AA were called with high confidence
865
+##	NN[, p, "AB"] <- rowSums(AB & highConf, na.rm=TRUE)
866
+##	NN[, p, "BB"] <- rowSums(BB & highConf, na.rm=TRUE)
867
+	
868
+##	is.complete <- rowSums(Ns[, p, ] >= 3) == 3 ##Be selective
869
+##	if(NOHET) is.complete <- rowSums(NN[, p, c(1,3)] >= 3) == 2		
870
+##	correct.orderA <- muA.AA[, p] > muA.BB[, p]
871
+##	correct.orderB <- muB.BB[, p] > muB.AA[, p]
872
+##	highConf <- 1-exp(-conf/1000)
873
+##	confInd <- rowMeans(highConf) > CONF.THR
874
+##	keep <- confInd & is.complete & correct.orderA & correct.orderB
853 875
 ##	if(is.null(fit.variance)){
854 876
 ##		log.sigma2 <- as.numeric(log2(sigma2A[keep, ]))
855 877
 ##		log.mus <- as.numeric(log2(IA[keep, ]))
... ...
@@ -990,7 +1012,12 @@ polymorphic <- function(plateIndex, A, B, envir){
990 1012
 	indexA <- which(rowSums(is.na(tmpA)) > 0)
991 1013
 	indexB <- which(rowSums(is.na(tmpB)) > 0)
992 1014
 	index <- union(indexA, indexB)
993
-	if(length(index) > 0) browser()	
1015
+	if(length(index) > 0){
1016
+		snpflags <- get("snpflags", envir)
1017
+		##warning(paste(length(index), "indices have NAs for the copy number estimates"))		
1018
+		snpflags[[p]] <- unique(c(snpflags[[p]], index))
1019
+		assign("snpflags", snpflags, envir)
1020
+	}
994 1021
 	##---------------------------------------------------------------------------
995 1022
 	## Estimate var(CA), var(CB), var(CA+CB)
996 1023
 	##---------------------------------------------------------------------------