Browse code

commented all of the old crlmmCopynumber code. Starting to move crlmmCopynumberLD.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48944 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 21/08/2010 02:48:35
Showing 2 changed files

... ...
@@ -101,93 +101,93 @@ construct <- function(filenames, cdfName, copynumber=FALSE,
101 101
 	return(callSet)
102 102
 }
103 103
 
104
-genotype <- function(filenames,
105
-		     cdfName,
106
-		     batch,
107
-		     mixtureSampleSize=10^5,
108
-		     eps=0.1,
109
-		     verbose=TRUE,
110
-		     seed=1,
111
-		     sns,
112
-		     copynumber=TRUE,
113
-		     probs=rep(1/3, 3),
114
-		     DF=6,
115
-		     SNRMin=5,
116
-		     recallMin=10,
117
-		     recallRegMin=1000,
118
-		     gender=NULL,
119
-		     returnParams=TRUE,
120
-		     badSNP=0.7){
121
-	if(missing(cdfName)) stop("must specify cdfName")
122
-	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
123
-	if(missing(sns)) sns <- basename(filenames)
124
-	## callSet contains potentially very big matrices
125
-	## More big matrices are created within snprma, that will then be removed.
126
-	callSet <- construct(filenames=filenames,
127
-			     cdfName=cdfName,
128
-			     copynumber=copynumber,
129
-			     sns=sns,
130
-			     verbose=verbose,
131
-			     batch=batch)
132
-	##lM(callSet) <- initializeParamObject(list(featureNames(callSet), unique(protocolData(callSet)$batch)))
133
-	mixtureParams <- matrix(NA, 4, length(filenames))
134
-	snp.index <- which(isSnp(callSet)==1)
135
-	sample.strata <- splitIndicesByLength(1:ncol(callSet), ocSamples())
136
-	iter <- 1
137
-##	for(j in batches){
138
-	j <- unlist(sample.strata)
139
-		if(verbose) message("Batch ", iter, " of ", length(sample.strata))
140
-		snprmaRes <- snprma(filenames=filenames[j],
141
-				    mixtureSampleSize=mixtureSampleSize,
142
-				    fitMixture=TRUE,
143
-				    eps=eps,
144
-				    verbose=verbose,
145
-				    seed=seed,
146
-				    cdfName=cdfName,
147
-				    sns=sns[j])
148
-		stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns))
149
-		pData(callSet)$SKW[j] <- snprmaRes$SKW
150
-		pData(callSet)$SNR[j] <- snprmaRes$SNR
151
-		suppressWarnings(A(callSet)[snp.index, j] <- snprmaRes[["A"]])
152
-		suppressWarnings(B(callSet)[snp.index, j] <- snprmaRes[["B"]])
153
-		mixtureParams[, j] <- snprmaRes$mixtureParams
154
-		rm(snprmaRes); ##gc()
155
-		if(copynumber){
156
-			np.index <- which(isSnp(callSet) == 0)
157
-			cnrmaRes <- cnrma(filenames=filenames[j],
158
-					  cdfName=cdfName,
159
-					  row.names=featureNames(callSet)[np.index],				  
160
-					  sns=sns[j],
161
-					  seed=seed,
162
-					  verbose=verbose)
163
-			stopifnot(identical(featureNames(callSet)[np.index], rownames(cnrmaRes)))
164
-			A(callSet)[np.index, j] <- cnrmaRes
165
-			rm(cnrmaRes); ##gc()
166
-		}
167
-		## as.matrix needed when ffdf is used
168
-		tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index, j]),
169
-			       B=as.matrix(B(callSet)[snp.index, j]),
170
-			       SNR=callSet$SNR[j],
171
-			       mixtureParams=mixtureParams[, j],
172
-			       cdfName=annotation(callSet),
173
-			       row.names=featureNames(callSet)[snp.index],
174
-			       col.names=sampleNames(callSet)[j],
175
-			       probs=probs,
176
-			       DF=DF,
177
-			       SNRMin=SNRMin,
178
-			       recallMin=recallMin,
179
-			       recallRegMin=recallRegMin,
180
-			       gender=gender,
181
-			       verbose=verbose,
182
-			       returnParams=returnParams,
183
-			       badSNP=badSNP)
184
-		snpCall(callSet)[snp.index, j] <- tmp[["calls"]]
185
-		snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]
186
-		callSet$gender[j] <- tmp$gender
187
-		iter <- iter+1
188
-##	}
189
-	return(callSet)
190
-}
104
+##genotype <- function(filenames,
105
+##		     cdfName,
106
+##		     batch,
107
+##		     mixtureSampleSize=10^5,
108
+##		     eps=0.1,
109
+##		     verbose=TRUE,
110
+##		     seed=1,
111
+##		     sns,
112
+##		     copynumber=TRUE,
113
+##		     probs=rep(1/3, 3),
114
+##		     DF=6,
115
+##		     SNRMin=5,
116
+##		     recallMin=10,
117
+##		     recallRegMin=1000,
118
+##		     gender=NULL,
119
+##		     returnParams=TRUE,
120
+##		     badSNP=0.7){
121
+##	if(missing(cdfName)) stop("must specify cdfName")
122
+##	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
123
+##	if(missing(sns)) sns <- basename(filenames)
124
+##	## callSet contains potentially very big matrices
125
+##	## More big matrices are created within snprma, that will then be removed.
126
+##	callSet <- construct(filenames=filenames,
127
+##			     cdfName=cdfName,
128
+##			     copynumber=copynumber,
129
+##			     sns=sns,
130
+##			     verbose=verbose,
131
+##			     batch=batch)
132
+##	##lM(callSet) <- initializeParamObject(list(featureNames(callSet), unique(protocolData(callSet)$batch)))
133
+##	mixtureParams <- matrix(NA, 4, length(filenames))
134
+##	snp.index <- which(isSnp(callSet)==1)
135
+##	sample.strata <- splitIndicesByLength(1:ncol(callSet), ocSamples())
136
+##	iter <- 1
137
+####	for(j in batches){
138
+##	j <- unlist(sample.strata)
139
+##		if(verbose) message("Batch ", iter, " of ", length(sample.strata))
140
+##		snprmaRes <- snprma(filenames=filenames[j],
141
+##				    mixtureSampleSize=mixtureSampleSize,
142
+##				    fitMixture=TRUE,
143
+##				    eps=eps,
144
+##				    verbose=verbose,
145
+##				    seed=seed,
146
+##				    cdfName=cdfName,
147
+##				    sns=sns[j])
148
+##		stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns))
149
+##		pData(callSet)$SKW[j] <- snprmaRes$SKW
150
+##		pData(callSet)$SNR[j] <- snprmaRes$SNR
151
+##		suppressWarnings(A(callSet)[snp.index, j] <- snprmaRes[["A"]])
152
+##		suppressWarnings(B(callSet)[snp.index, j] <- snprmaRes[["B"]])
153
+##		mixtureParams[, j] <- snprmaRes$mixtureParams
154
+##		rm(snprmaRes); ##gc()
155
+##		if(copynumber){
156
+##			np.index <- which(isSnp(callSet) == 0)
157
+##			cnrmaRes <- cnrma(filenames=filenames[j],
158
+##					  cdfName=cdfName,
159
+##					  row.names=featureNames(callSet)[np.index],				  
160
+##					  sns=sns[j],
161
+##					  seed=seed,
162
+##					  verbose=verbose)
163
+##			stopifnot(identical(featureNames(callSet)[np.index], rownames(cnrmaRes)))
164
+##			A(callSet)[np.index, j] <- cnrmaRes
165
+##			rm(cnrmaRes); ##gc()
166
+##		}
167
+##		## as.matrix needed when ffdf is used
168
+##		tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index, j]),
169
+##			       B=as.matrix(B(callSet)[snp.index, j]),
170
+##			       SNR=callSet$SNR[j],
171
+##			       mixtureParams=mixtureParams[, j],
172
+##			       cdfName=annotation(callSet),
173
+##			       row.names=featureNames(callSet)[snp.index],
174
+##			       col.names=sampleNames(callSet)[j],
175
+##			       probs=probs,
176
+##			       DF=DF,
177
+##			       SNRMin=SNRMin,
178
+##			       recallMin=recallMin,
179
+##			       recallRegMin=recallRegMin,
180
+##			       gender=gender,
181
+##			       verbose=verbose,
182
+##			       returnParams=returnParams,
183
+##			       badSNP=badSNP)
184
+##		snpCall(callSet)[snp.index, j] <- tmp[["calls"]]
185
+##		snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]
186
+##		callSet$gender[j] <- tmp$gender
187
+##		iter <- iter+1
188
+####	}
189
+##	return(callSet)
190
+##}
191 191
 
192 192
 
193 193
 
... ...
@@ -437,71 +437,71 @@ fit.wls <- function(allele, Ystar, W, Ns, autosome=TRUE){
437 437
 ##}
438 438
 
439 439
 
440
-nuphiAllele <- function(object, allele, Ystar, W, tmp.objects, cnOptions){
441
-	I <- isSnp(object)
442
-	Ystar <- Ystar[I, ]
443
-	rownames(Ystar) <- featureNames(object)[isSnp(object)]
444
-	complete <- rowSums(is.na(W)) == 0 & I
445
-	W <- W[I, ]
446
-	if(any(!is.finite(W))){## | any(!is.finite(V))){
447
-		i <- which(rowSums(!is.finite(W)) > 0)
448
-		stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ")
449
-	}	
450
-	Ns <- tmp.objects[["Ns"]]	
451
-	Ns <- Ns[I, ]
452
-	CHR <- unique(chromosome(object))
453
-	batch <- unique(batch(object))
454
-	nuA <- getParam(object, "nuA", batch)
455
-	nuB <- getParam(object, "nuB", batch)
456
-	phiA <- getParam(object, "phiA", batch)
457
-	phiB <- getParam(object, "phiB", batch)
458
-	if(CHR==23){
459
-		phiAX <- getParam(object, "phiAX", batch)
460
-		phiBX <- getParam(object, "phiBX", batch)		
461
-	}
462
-	NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05
463
-	if(missing(allele)) stop("must specify allele")
464
-	if(CHR == 23){
465
-		if(length(grep("AB", colnames(W))) > 0){
466
-			if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
467
-			if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
468
-		} else{
469
-		if(allele == "A") X <- cbind(1, c(1, 0, 2, 0), c(0, 1, 0, 2))
470
-			if(allele == "B") X <- cbind(1, c(0, 1, 0, 2), c(1, 0, 2, 0))
471
-		}
472
-		betahat <- matrix(NA, 3, nrow(Ystar))
473
-	} else {##autosome
474
-		if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2)
475
-		if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous
476
-		betahat <- matrix(NA, 2, nrow(Ystar))
477
-	}
478
-	ww <- rep(1, ncol(Ystar))
479
-	II <- which(rowSums(is.nan(Ystar)) == 0)
480
-	for(i in II){
481
-		betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww)
482
-	}
483
-	if(allele == "A"){
484
-		nuA[complete] <- betahat[1, ]
485
-		phiA[complete] <- betahat[2, ]
486
-		object <- pr(object, "nuA", batch, nuA)
487
-		object <- pr(object, "phiA", batch, phiA)
488
-		if(CHR == 23){
489
-			phiAX[complete] <- betahat[3, ]
490
-			object <- pr(object, "phiAX", batch, phiAX)			
491
-		}
492
-	}
493
-	if(allele=="B"){
494
-		nuB[complete] <- betahat[1, ]
495
-		phiB[complete] <- betahat[2, ]
496
-		object <- pr(object, "nuB", batch, nuB)
497
-		object <- pr(object, "phiB", batch, phiB)		
498
-		if(CHR == 23){
499
-			phiBX[complete] <- betahat[3, ]
500
-			object <- pr(object, "phiBX", batch, phiBX)
501
-		}
502
-	}
503
-	return(object)
504
-}
440
+##nuphiAllele <- function(object, allele, Ystar, W, tmp.objects, cnOptions){
441
+##	I <- isSnp(object)
442
+##	Ystar <- Ystar[I, ]
443
+##	rownames(Ystar) <- featureNames(object)[isSnp(object)]
444
+##	complete <- rowSums(is.na(W)) == 0 & I
445
+##	W <- W[I, ]
446
+##	if(any(!is.finite(W))){## | any(!is.finite(V))){
447
+##		i <- which(rowSums(!is.finite(W)) > 0)
448
+##		stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ")
449
+##	}	
450
+##	Ns <- tmp.objects[["Ns"]]	
451
+##	Ns <- Ns[I, ]
452
+##	CHR <- unique(chromosome(object))
453
+##	batch <- unique(batch(object))
454
+##	nuA <- getParam(object, "nuA", batch)
455
+##	nuB <- getParam(object, "nuB", batch)
456
+##	phiA <- getParam(object, "phiA", batch)
457
+##	phiB <- getParam(object, "phiB", batch)
458
+##	if(CHR==23){
459
+##		phiAX <- getParam(object, "phiAX", batch)
460
+##		phiBX <- getParam(object, "phiBX", batch)		
461
+##	}
462
+##	NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05
463
+##	if(missing(allele)) stop("must specify allele")
464
+##	if(CHR == 23){
465
+##		if(length(grep("AB", colnames(W))) > 0){
466
+##			if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
467
+##			if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
468
+##		} else{
469
+##		if(allele == "A") X <- cbind(1, c(1, 0, 2, 0), c(0, 1, 0, 2))
470
+##			if(allele == "B") X <- cbind(1, c(0, 1, 0, 2), c(1, 0, 2, 0))
471
+##		}
472
+##		betahat <- matrix(NA, 3, nrow(Ystar))
473
+##	} else {##autosome
474
+##		if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2)
475
+##		if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous
476
+##		betahat <- matrix(NA, 2, nrow(Ystar))
477
+##	}
478
+##	ww <- rep(1, ncol(Ystar))
479
+##	II <- which(rowSums(is.nan(Ystar)) == 0)
480
+##	for(i in II){
481
+##		betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww)
482
+##	}
483
+##	if(allele == "A"){
484
+##		nuA[complete] <- betahat[1, ]
485
+##		phiA[complete] <- betahat[2, ]
486
+##		object <- pr(object, "nuA", batch, nuA)
487
+##		object <- pr(object, "phiA", batch, phiA)
488
+##		if(CHR == 23){
489
+##			phiAX[complete] <- betahat[3, ]
490
+##			object <- pr(object, "phiAX", batch, phiAX)			
491
+##		}
492
+##	}
493
+##	if(allele=="B"){
494
+##		nuB[complete] <- betahat[1, ]
495
+##		phiB[complete] <- betahat[2, ]
496
+##		object <- pr(object, "nuB", batch, nuB)
497
+##		object <- pr(object, "phiB", batch, phiB)		
498
+##		if(CHR == 23){
499
+##			phiBX[complete] <- betahat[3, ]
500
+##			object <- pr(object, "phiBX", batch, phiBX)
501
+##		}
502
+##	}
503
+##	return(object)
504
+##}
505 505
 
506 506
 
507 507
 
... ...
@@ -532,114 +532,114 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
532 532
 
533 533
 
534 534
 ##replace with release/crlmm/R/cnrma-functions
535
-crlmmCopynumber <- function(object,
536
-			    chromosome=1:23,
537
-			    which.batches,
538
-			    MIN.SAMPLES=10,
539
-			    SNRMin=5,
540
-			    MIN.OBS=3,
541
-			    DF.PRIOR=50,
542
-			    bias.adj=FALSE,
543
-			    prior.prob=rep(1/4,4),
544
-			    seed=1,
545
-			    verbose=TRUE,
546
-			    GT.CONF.THR=0.99,
547
-			    PHI.THR=2^6,
548
-			    nHOM.THR=5,
549
-			    MIN.NU=2^3,
550
-			    MIN.PHI=2^3,
551
-			    THR.NU.PHI=TRUE,
552
-			    thresholdCopynumber=TRUE,
553
-			    weighted.lm=TRUE){
554
-	ffIsLoaded <- class(calls(object))[[1]] == "ff"
555
-	if(ffIsLoaded){
556
-		open(object)
557
-		open(object$SKW)
558
-		open(object$SNR)
559
-	}
560
-	stopifnot("batch" %in% varLabels(protocolData(object)))
561
-	stopifnot("chromosome" %in% fvarLabels(object))
562
-	stopifnot("position" %in% fvarLabels(object))
563
-	stopifnot("isSnp" %in% fvarLabels(object))
564
-	##batch <- object$batch
565
-	batch <- batch(object)
566
-	if(ffIsLoaded) {
567
-		open(object$SNR)
568
-		SNR <- object$SNR[, ]
569
-	} else SNR <-  object$SNR
570
-	batches <- split((1:ncol(object))[SNR > SNRMin], batch[SNR > SNRMin])
571
-	if(any(sapply(batches, length) < MIN.SAMPLES)) message("Excluding batches with fewer than ", MIN.SAMPLES, " samples")
572
-	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
573
-	if(missing(which.batches)) which.batches <- seq(along=batches)
574
-	for(i in chromosome){
575
-		if(verbose) cat("Chromosome ", i, "\n")
576
-		if(i >= 24) next()
577
-		ii <- which.batches[1]
578
-		for(j in batches[which.batches]){
579
-			if(verbose) message("Batch ", ii, " of ", length(which.batches))
580
-			row.index <- which(chromosome(object) == i)
581
-			##Note that ffdf assayDataElements are data.frames after subsetting(not matrices)
582
-##			ca <- as.matrix(CA(object)[row.index, j])
583
-##			cb <- as.matrix(CB(object)[row.index, j])
584
-##			dimnames(ca) <- dimnames(cb) <- list(featureNames(object)[row.index], sampleNames(object)[j])
585
-			tmp <- new("CNSet",
586
-				   call=as.matrix(calls(object)[row.index, j]),
587
-				   callProbability=as.matrix(snpCallProbability(object)[row.index, j]),
588
-				   alleleA=as.matrix(A(object)[row.index, j]),
589
-				   alleleB=as.matrix(B(object)[row.index, j]),
590
-##				   CA=ca, CB=cb,
591
-				   phenoData=phenoData(object)[j, ],
592
-				   annotation=annotation(object))
593
-			featureData(tmp) <- addFeatureAnnotation(tmp)
594
-			featureData(tmp) <- lm.parameters(tmp, batch=unique(batch[j]))
595
-			tmp$batch <- batch(object)[j]
596
-			tmp <- computeCopynumber(tmp,
597
-						 MIN.OBS=MIN.OBS,
598
-						 DF.PRIOR=DF.PRIOR,
599
-						 bias.adj=bias.adj,
600
-						 prior.prob=prior.prob,
601
-						 seed=seed,
602
-						 verbose=verbose,
603
-						 GT.CONF.THR=GT.CONF.THR,
604
-						 PHI.THR=PHI.THR,
605
-						 nHOM.THR=nHOM.THR,
606
-						 MIN.NU=MIN.NU,
607
-						 MIN.PHI=MIN.PHI,
608
-						 THR.NU.PHI=THR.NU.PHI,
609
-						 thresholdCopynumber=thresholdCopynumber)
610
-			fData(tmp) <- fData(tmp)[, -(1:3)]
611
-##			CA(tmp) <- matrix(as.integer(CA(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp),
612
-##					  dimnames=list(featureNames(tmp), sampleNames(tmp)))
613
-##			CB(tmp) <- matrix(as.integer(CB(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp),
614
-##					  dimnames=list(featureNames(tmp), sampleNames(tmp)))
615
-##			CA(object)[row.index, j] <- CA(tmp)
616
-##			CB(object)[row.index, j] <- CB(tmp)
617
-			##ad-hocery
618
-			batchName <- unique(batch(object)[j])
619
-			fvarLabels(tmp)[15:17] <- paste(c("corrAB", "corrBB", "corrAA"), batchName, sep=".")
620
-			fvarLabels(tmp)[13:14] <- paste(c("phiPrimeA", "phiPrimeB"), batchName, sep=".")
621
-			fvarLabels(tmp) <- gsub("_", ".", fvarLabels(tmp))
622
-			##fvarLabels(tmp) <- gsub("\\.[1-9]", "", fvarLabels(tmp))
623
-			if(ffIsLoaded){
624
-				physical <- get("physical")
625
-				fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% names(physical(lM(object)))]
626
-				jj <- match(fvarLabels(tmp), names(lM(object)))
627
-				lM(object)[row.index, jj] <- fData(tmp)
628
-			} else {
629
-				nms <- paste(names(lM(object)), batchName, sep=".")
630
-				fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% nms]
631
-				for(k in seq_along(fvarLabels(tmp))){
632
-					kk <- match(fvarLabels(tmp)[k], paste(names(lM(object)), batchName, sep="."))
633
-					column <- match(batchName, colnames(lM(object)[[k]]))
634
-					lM(object)[[k]][row.index, column] <- fData(tmp)[, k]
635
-				}
636
-			}			
637
-			rm(tmp); ##gc()
638
-			ii <- ii+1
639
-		}
640
-	}
641
-	return(object)
642
-}
535
+##crlmmCopynumber <- function(object,
536
+##			    chromosome=1:23,
537
+##			    which.batches,
538
+##			    MIN.SAMPLES=10,
539
+##			    SNRMin=5,
540
+##			    MIN.OBS=3,
541
+##			    DF.PRIOR=50,
542
+##			    bias.adj=FALSE,
543
+##			    prior.prob=rep(1/4,4),
544
+##			    seed=1,
545
+##			    verbose=TRUE,
546
+##			    GT.CONF.THR=0.99,
547
+##			    PHI.THR=2^6,
548
+##			    nHOM.THR=5,
549
+##			    MIN.NU=2^3,
550
+##			    MIN.PHI=2^3,
551
+##			    THR.NU.PHI=TRUE,
552
+##			    thresholdCopynumber=TRUE,
553
+##			    weighted.lm=TRUE){
554
+##	ffIsLoaded <- class(calls(object))[[1]] == "ff"
555
+##	if(ffIsLoaded){
556
+##		open(object)
557
+##		open(object$SKW)
558
+##		open(object$SNR)
559
+##	}
560
+##	stopifnot("batch" %in% varLabels(protocolData(object)))
561
+##	stopifnot("chromosome" %in% fvarLabels(object))
562
+##	stopifnot("position" %in% fvarLabels(object))
563
+##	stopifnot("isSnp" %in% fvarLabels(object))
564
+##	##batch <- object$batch
565
+##	batch <- batch(object)
566
+##	if(ffIsLoaded) {
567
+##		open(object$SNR)
568
+##		SNR <- object$SNR[, ]
569
+##	} else SNR <-  object$SNR
570
+##	batches <- split((1:ncol(object))[SNR > SNRMin], batch[SNR > SNRMin])
571
+##	if(any(sapply(batches, length) < MIN.SAMPLES)) message("Excluding batches with fewer than ", MIN.SAMPLES, " samples")
572
+##	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
573
+##	if(missing(which.batches)) which.batches <- seq(along=batches)
574
+##	for(i in chromosome){
575
+##		if(verbose) cat("Chromosome ", i, "\n")
576
+##		if(i >= 24) next()
577
+##		ii <- which.batches[1]
578
+##		for(j in batches[which.batches]){
579
+##			if(verbose) message("Batch ", ii, " of ", length(which.batches))
580
+##			row.index <- which(chromosome(object) == i)
581
+##			##Note that ffdf assayDataElements are data.frames after subsetting(not matrices)
582
+####			ca <- as.matrix(CA(object)[row.index, j])
583
+####			cb <- as.matrix(CB(object)[row.index, j])
584
+####			dimnames(ca) <- dimnames(cb) <- list(featureNames(object)[row.index], sampleNames(object)[j])
585
+##			tmp <- new("CNSet",
586
+##				   call=as.matrix(calls(object)[row.index, j]),
587
+##				   callProbability=as.matrix(snpCallProbability(object)[row.index, j]),
588
+##				   alleleA=as.matrix(A(object)[row.index, j]),
589
+##				   alleleB=as.matrix(B(object)[row.index, j]),
590
+####				   CA=ca, CB=cb,
591
+##				   phenoData=phenoData(object)[j, ],
592
+##				   annotation=annotation(object))
593
+##			featureData(tmp) <- addFeatureAnnotation(tmp)
594
+##			featureData(tmp) <- lm.parameters(tmp, batch=unique(batch[j]))
595
+##			tmp$batch <- batch(object)[j]
596
+##			tmp <- computeCopynumber(tmp,
597
+##						 MIN.OBS=MIN.OBS,
598
+##						 DF.PRIOR=DF.PRIOR,
599
+##						 bias.adj=bias.adj,
600
+##						 prior.prob=prior.prob,
601
+##						 seed=seed,
602
+##						 verbose=verbose,
603
+##						 GT.CONF.THR=GT.CONF.THR,
604
+##						 PHI.THR=PHI.THR,
605
+##						 nHOM.THR=nHOM.THR,
606
+##						 MIN.NU=MIN.NU,
607
+##						 MIN.PHI=MIN.PHI,
608
+##						 THR.NU.PHI=THR.NU.PHI,
609
+##						 thresholdCopynumber=thresholdCopynumber)
610
+##			fData(tmp) <- fData(tmp)[, -(1:3)]
611
+####			CA(tmp) <- matrix(as.integer(CA(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp),
612
+####					  dimnames=list(featureNames(tmp), sampleNames(tmp)))
613
+####			CB(tmp) <- matrix(as.integer(CB(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp),
614
+####					  dimnames=list(featureNames(tmp), sampleNames(tmp)))
615
+####			CA(object)[row.index, j] <- CA(tmp)
616
+####			CB(object)[row.index, j] <- CB(tmp)
617
+##			##ad-hocery
618
+##			batchName <- unique(batch(object)[j])
619
+##			fvarLabels(tmp)[15:17] <- paste(c("corrAB", "corrBB", "corrAA"), batchName, sep=".")
620
+##			fvarLabels(tmp)[13:14] <- paste(c("phiPrimeA", "phiPrimeB"), batchName, sep=".")
621
+##			fvarLabels(tmp) <- gsub("_", ".", fvarLabels(tmp))
622
+##			##fvarLabels(tmp) <- gsub("\\.[1-9]", "", fvarLabels(tmp))
623
+##			if(ffIsLoaded){
624
+##				physical <- get("physical")
625
+##				fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% names(physical(lM(object)))]
626
+##				jj <- match(fvarLabels(tmp), names(lM(object)))
627
+##				lM(object)[row.index, jj] <- fData(tmp)
628
+##			} else {
629
+##				nms <- paste(names(lM(object)), batchName, sep=".")
630
+##				fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% nms]
631
+##				for(k in seq_along(fvarLabels(tmp))){
632
+##					kk <- match(fvarLabels(tmp)[k], paste(names(lM(object)), batchName, sep="."))
633
+##					column <- match(batchName, colnames(lM(object)[[k]]))
634
+##					lM(object)[[k]][row.index, column] <- fData(tmp)[, k]
635
+##				}
636
+##			}			
637
+##			rm(tmp); ##gc()
638
+##			ii <- ii+1
639
+##		}
640
+##	}
641
+##	return(object)
642
+##}
643 643
 
644 644
 crlmmCopynumberLD <- function(object,
645 645
 			     which.batches,
... ...
@@ -1571,47 +1571,47 @@ processCEL2 <- function(i, filenames, row.names, A, SKW, seed, cdfName, pkgname)
1571 1571
 
1572 1572
 
1573 1573
 # steps: quantile normalize hapmap: create 1m_reference_cn.rda object
1574
-cnrma <- function(filenames, cdfName, row.names, sns, seed=1, verbose=FALSE){
1575
-	if(missing(cdfName)) stop("must specify cdfName")
1576
-	pkgname <- getCrlmmAnnotationName(cdfName)
1577
-	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
1578
-	if (missing(sns)) sns <- basename(filenames)
1579
-        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
1580
-	fid <- getVarInEnv("npProbesFid")
1581
-	fid <- fid[match(row.names, names(fid))]
1582
-	set.seed(seed)
1583
-	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
1584
-	SKW <- vector("numeric", length(filenames))
1585
-	NP <- matrix(NA, length(fid), length(filenames))
1586
-	verbose <- TRUE
1587
-	if(verbose){
1588
-		message("Processing ", length(filenames), " files.")
1589
-		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
1590
-	}
1591
-	if(cdfName=="genomewidesnp6"){
1592
-		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
1593
-	}
1594
-	if(cdfName=="genomewidesnp5"){
1595
-		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
1596
-	}
1597
-	reference <- getVarInEnv("reference")
1598
-	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
1599
-	for(i in seq(along=filenames)){
1600
-		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
1601
-		x <- log2(y[idx2])
1602
-		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
1603
-		rm(x)
1604
-		NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
1605
-		if (verbose)
1606
-			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
1607
-			else cat(".")
1608
-	}
1609
-	dimnames(NP) <- list(names(fid), sns)
1610
-	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
1611
-	##res3 <- list(NP=NP, SKW=SKW)
1612
-	cat("\n")
1613
-	return(NP)
1614
-}
1574
+##cnrma <- function(filenames, cdfName, row.names, sns, seed=1, verbose=FALSE){
1575
+##	if(missing(cdfName)) stop("must specify cdfName")
1576
+##	pkgname <- getCrlmmAnnotationName(cdfName)
1577
+##	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
1578
+##	if (missing(sns)) sns <- basename(filenames)
1579
+##        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
1580
+##	fid <- getVarInEnv("npProbesFid")
1581
+##	fid <- fid[match(row.names, names(fid))]
1582
+##	set.seed(seed)
1583
+##	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
1584
+##	SKW <- vector("numeric", length(filenames))
1585
+##	NP <- matrix(NA, length(fid), length(filenames))
1586
+##	verbose <- TRUE
1587
+##	if(verbose){
1588
+##		message("Processing ", length(filenames), " files.")
1589
+##		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
1590
+##	}
1591
+##	if(cdfName=="genomewidesnp6"){
1592
+##		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
1593
+##	}
1594
+##	if(cdfName=="genomewidesnp5"){
1595
+##		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
1596
+##	}
1597
+##	reference <- getVarInEnv("reference")
1598
+##	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
1599
+##	for(i in seq(along=filenames)){
1600
+##		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
1601
+##		x <- log2(y[idx2])
1602
+##		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
1603
+##		rm(x)
1604
+##		NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
1605
+##		if (verbose)
1606
+##			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
1607
+##			else cat(".")
1608
+##	}
1609
+##	dimnames(NP) <- list(names(fid), sns)
1610
+##	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
1611
+##	##res3 <- list(NP=NP, SKW=SKW)
1612
+##	cat("\n")
1613
+##	return(NP)
1614
+##}
1615 1615
 
1616 1616
 getFlags <- function(object, PHI.THR){
1617 1617
 	batch <- unique(object$batch)
... ...
@@ -1628,777 +1628,777 @@ getFlags <- function(object, PHI.THR){
1628 1628
 }
1629 1629
 
1630 1630
 
1631
-instantiateObjects <- function(object, cnOptions){
1632
-	Ns <- matrix(NA, nrow(object), 5)
1633
-	colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
1634
-	vA <- vB <- muB <- muA <- Ns
1635
-	normal <- matrix(TRUE, nrow(object), ncol(object))
1636
-	dimnames(normal) <- list(featureNames(object), sampleNames(object))
1637
-	tmp.objects <- list(vA=vA,
1638
-			    vB=vB,
1639
-			    muB=muB,
1640
-			    muA=muA,
1641
-			    Ns=Ns,
1642
-			    normal=normal)
1643
-        return(tmp.objects)
1644
-}
1645
-
1646
-thresholdCopynumber <- function(object){
1647
-	ca <- CA(object)
1648
-	cb <- CB(object)
1649
-	ca[ca < 0.05] <- 0.05
1650
-	ca[ca > 5] <- 5
1651
-	cb[cb < 0.05] <- 0.05
1652
-	cb[cb > 5] <- 5
1653
-	CA(object) <- ca
1654
-	CB(object) <- cb
1655
-	return(object)
1656
-}
1657
-
1658
-##linear model parameters
1659
-lm.parameters <- function(object, batch){##cnOptions){
1660
-	fD <- fData(object)
1661
-	##batch <- object$batch
1662
-	uplate <- unique(batch)
1663
-	parameterNames <- c(paste("tau2A", uplate, sep="_"),
1664
-			    paste("tau2B", uplate, sep="_"),
1665
-			    paste("sig2A", uplate, sep="_"),
1666
-			    paste("sig2B", uplate, sep="_"),
1667
-			    paste("nuA", uplate, sep="_"),
1668
-			    paste("nuA.se", uplate, sep="_"),			    
1669
-			    paste("nuB", uplate, sep="_"),
1670
-			    paste("nuB.se", uplate, sep="_"),			    			    
1671
-			    paste("phiA", uplate, sep="_"),
1672
-			    paste("phiA.se", uplate, sep="_"),			    
1673
-			    paste("phiB", uplate, sep="_"),
1674
-			    paste("phiB.se", uplate, sep="_"),			    
1675
-			    paste("phiAX", uplate, sep="_"),
1676
-			    paste("phiBX", uplate, sep="_"),			    
1677
-			    paste("corr", uplate, sep="_"),
1678
-			    paste("corrA.BB", uplate, sep="_"),
1679
-			    paste("corrB.AA", uplate, sep="_"))
1680
-	pMatrix <- data.frame(matrix(numeric(0),
1681
-				     nrow(object),
1682
-				     length(parameterNames)),
1683
-				     row.names=featureNames(object))
1684
-	colnames(pMatrix) <- parameterNames
1685
-	fD2 <- cbind(fD, pMatrix)
1686
-	new("AnnotatedDataFrame", data=fD2,
1687
-	    varMetadata=data.frame(labelDescription=colnames(fD2),
1688
-	    row.names=colnames(fD2)))
1689
-}
1690
-
1691
-nonpolymorphic <- function(object, cnOptions, tmp.objects){
1692
-	batch <- unique(object$batch)
1693
-	CHR <- unique(chromosome(object))
1694
-	goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){
1695
-		Ns <- tmp.objects[["Ns"]]
1696
-		##Ns <- get("Ns", envir)
1697
-		flags <- getFlags(object, PHI.THR)
1698
-		fewAA <- Ns[, "AA"] < nAA.THR
1699
-		fewBB <- Ns[, "BB"] < nBB.THR
1700
-		flagsA <- flags | fewAA
1701
-		flagsB <- flags | fewBB
1702
-		flags <- list(A=flagsA, B=flagsB)
1703
-		return(flags)
1704
-	}
1705
-	nAA.THR <- cnOptions$nHOM.THR
1706
-	nBB.THR <- cnOptions$nHOM.THR
1707
-	PHI.THR <- cnOptions$PHI.THR
1708
-	snpflags <- goodSnps(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR)
1709
-	flagsA <- snpflags$A
1710
-	flagsB <- snpflags$B
1711
-##	if(all(flagsA) | all(flagsB)) stop("all snps are flagged")
1712
-	nuA <- getParam(object, "nuA", batch)
1713
-	nuB <- getParam(object, "nuB", batch)
1714
-	phiA <- getParam(object, "phiA", batch)
1715
-	phiB <- getParam(object, "phiB", batch)	
1716
-	sns <- sampleNames(object)
1717
-	muA <- tmp.objects[["muA"]]
1718
-	muB <- tmp.objects[["muB"]]
1719
-	A <- A(object)
1720
-	B <- B(object)
1721
-##	CA <- CA(object)
1722
-##	CB <- CB(object)
1723
-	if(CHR == 23){
1724
-		phiAX <- getParam(object, "phiAX", batch)
1725
-		phiBX <- getParam(object, "phiBX", batch)
1726
-	}
1727
-	##---------------------------------------------------------------------------
1728
-	## Train on unflagged SNPs
1729
-	##---------------------------------------------------------------------------
1730
-	##Might be best to train using the X chromosome, since for the
1731
-	##X phi and nu have been adjusted for cross-hybridization
1732
-	##plateInd <- plate == uplate[p]
1733
-	##muA <- muA[!flagsA, p, c("A", "AA")]
1734
-	##muB <- muB[!flagsB, p, c("B", "BB")]
1735
-	muA <- muA[!flagsA, "AA"]
1736
-	muB <- muB[!flagsB, "BB"]
1737
-	X <- cbind(1, log2(c(muA, muB)))
1738
-	Y <- log2(c(phiA[!flagsA], phiB[!flagsB]))
1739
-	if(nrow(X) > 5000){
1740
-		ix <- sample(1:nrow(X), 5000)
1741
-	} else {
1742
-		ix <- 1:nrow(X)
1743
-	}
1744
-	betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix]))
1745
-	normal <- tmp.objects[["normal"]][!isSnp(object), ]	
1746
-	if(CHR == 23){
1747
-		##normalNP <- envir[["normalNP"]]
1748
-		##normalNP <- normalNP[, plate==uplate[p]]
1749
-		##nuT <- envir[["nuT"]]
1750
-		##phiT <- envir[["phiT"]]
1751
-		
1752
-		##cnvs <- envir[["cnvs"]]
1753
-                ##loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
1754
-                ##cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
1755
-		##cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ]
1756
-
1757
-		##For build Hg18
1758
-		##http://genome.ucsc.edu/cgi-bin/hgGateway
1759
-		##pseudo-autosomal regions on X
1760
-		##chrX:1-2,709,520 and chrX:154584237-154913754, respectively
1761
-		##par:pseudo-autosomal regions
1762
-		pseudoAR <- position(object) < 2709520 | (position(object) > 154584237 & position(object) < 154913754)
1763
-		##pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754)
1764
-		##in case some of the cnProbes are not annotated
1765
-		pseudoAR[is.na(pseudoAR)] <- FALSE
1766
-		pseudoAR <- pseudoAR[!isSnp(object)]
1767
-		##gender <- envir[["gender"]]
1768
-		gender <- object$gender
1769
-		obj1 <- object[!isSnp(object), ]
1770
-		A.male <- A(obj1[, gender==1])
1771
-		mu1 <- rowMedians(A.male, na.rm=TRUE)
1772
-		##mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE)
1773
-		##mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE)
1774
-		A.female <- A(obj1[, gender==2])
1775
-		mu2 <- rowMedians(A.female, na.rm=TRUE)
1776
-		mus <- log2(cbind(mu1, mu2))
1777
-		X.men <- cbind(1, mus[, 1])
1778
-		X.fem <- cbind(1, mus[, 2])
1779
-		
1780
-		Yhat1 <- as.numeric(X.men %*% betahat)
1781
-		Yhat2 <- as.numeric(X.fem %*% betahat)
1782
-		phi1 <- 2^(Yhat1)
1783
-		phi2 <- 2^(Yhat2)
1784
-		nu1 <- 2^(mus[, 1]) - phi1
1785
-		nu2 <- 2^(mus[, 2]) - 2*phi2
1786
-
1787
-		if(any(pseudoAR)){
1788
-			nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR]
1789
-		}
1790
-##		CT1 <- 1/phi1*(A.male-nu1)
1791
-##		CT2 <- 1/phi2*(A.female-nu2)
1792
-##		CA <- CA(obj1)
1793
-##		CA[, gender==1] <- CT1
1794
-##		CA[, gender==2] <- CT2
1795
-##		CA(object)[!isSnp(object), ] <- CA
1796
-		##only using females to compute the variance
1797
-		##normalNP[, gender=="male"] <- NA
1798
-		normal[, gender==1] <- NA
1799
-		sig2A <- getParam(object, "sig2A", batch)
1800
-		normal.f <- normal[, object$gender==2]
1801
-		sig2A[!isSnp(object)] <- rowMAD(log2(A.female*normal.f), na.rm=TRUE)^2
1802
-		sig2A[!isSnp(object) & is.na(sig2A)] <- median(sig2A[!isSnp(object)], na.rm=TRUE)
1803
-		##sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2
1804
-		object <- pr(object, "sig2A", batch, sig2A)
1805
-
1806
-		nuA[!isSnp(object)] <- nu2
1807
-		phiA[!isSnp(object)] <- phi2
1808
-		
1809
-		THR.NU.PHI <- cnOptions$THR.NU.PHI
1810
-		if(THR.NU.PHI){
1811
-			verbose <- cnOptions$verbose
1812
-			##Assign values to object
1813
-			object <- pr(object, "nuA", batch, nuA)
1814
-			object <- pr(object, "phiA", batch, phiA)			
1815
-			##if(verbose) message("Thresholding nu and phi")
1816
-			object <- thresholdModelParams(object, cnOptions)
1817
-		} else {
1818
-			object <- pr(object, "nuA", batch, nuA)		
1819
-			object <- pr(object, "phiA", batch, phiA)
1820
-		}
1821
-	} else {
1822
-		A <- A(object)[!isSnp(object), ]
1823
-		mus <- rowMedians(A * normal, na.rm=TRUE)
1824
-		crosshyb <- max(median(muA) - median(mus), 0)
1825
-		X <- cbind(1, log2(mus+crosshyb))
1826
-		logPhiT <- X %*% betahat
1827
-		phiA[!isSnp(object)] <- 2^(logPhiT)
1828
-		nuA[!isSnp(object)] <- mus-2*phiA[!isSnp(object)]
1829
-
1830
-		THR.NU.PHI <- cnOptions$THR.NU.PHI
1831
-		if(THR.NU.PHI){
1832
-			verbose <- cnOptions$verbose
1833
-			##Assign values to object
1834
-			object <- pr(object, "nuA", batch, nuA)
1835
-			object <- pr(object, "phiA", batch, phiA)			
1836
-			##if(verbose) message("Thresholding nu and phi")
1837
-			object <- thresholdModelParams(object, cnOptions)
1838
-			##reassign values (now thresholded at MIN.NU and MIN.PHI
1839
-			nuA <- getParam(object, "nuA", batch)
1840
-			phiA <- getParam(object, "phiA", batch)
1841
-		}
1842
-		##CA(object)[!isSnp(object), ] <- 1/phiA[!isSnp(object)]*(A - nuA[!isSnp(object)])
1843
-		sig2A <- getParam(object, "sig2A", batch)
1844
-		sig2A[!isSnp(object)] <- rowMAD(log2(A*normal), na.rm=TRUE)^2
1845
-		object <- pr(object, "sig2A", batch, sig2A)
1846
-		##added
1847
-		object <- pr(object, "nuA", batch, nuA)
1848
-		object <- pr(object, "phiA", batch, phiA)
1849
-	}
1850
-	return(object)
1851
-}
1852
-
1631
+##instantiateObjects <- function(object, cnOptions){
1632
+##	Ns <- matrix(NA, nrow(object), 5)
1633
+##	colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
1634
+##	vA <- vB <- muB <- muA <- Ns
1635
+##	normal <- matrix(TRUE, nrow(object), ncol(object))
1636
+##	dimnames(normal) <- list(featureNames(object), sampleNames(object))
1637
+##	tmp.objects <- list(vA=vA,
1638
+##			    vB=vB,
1639
+##			    muB=muB,
1640
+##			    muA=muA,
1641
+##			    Ns=Ns,
1642
+##			    normal=normal)
1643
+##        return(tmp.objects)
1644
+##}
1645
+##
1646
+##thresholdCopynumber <- function(object){
1647
+##	ca <- CA(object)
1648
+##	cb <- CB(object)
1649
+##	ca[ca < 0.05] <- 0.05
1650
+##	ca[ca > 5] <- 5
1651
+##	cb[cb < 0.05] <- 0.05
1652
+##	cb[cb > 5] <- 5
1653
+##	CA(object) <- ca
1654
+##	CB(object) <- cb
1655
+##	return(object)
1656
+##}
1657
+##
1658
+####linear model parameters
1659
+##lm.parameters <- function(object, batch){##cnOptions){
1660
+##	fD <- fData(object)
1661
+##	##batch <- object$batch
1662
+##	uplate <- unique(batch)
1663
+##	parameterNames <- c(paste("tau2A", uplate, sep="_"),
1664
+##			    paste("tau2B", uplate, sep="_"),
1665
+##			    paste("sig2A", uplate, sep="_"),
1666
+##			    paste("sig2B", uplate, sep="_"),
1667
+##			    paste("nuA", uplate, sep="_"),
1668
+##			    paste("nuA.se", uplate, sep="_"),			    
1669
+##			    paste("nuB", uplate, sep="_"),
1670
+##			    paste("nuB.se", uplate, sep="_"),			    			    
1671
+##			    paste("phiA", uplate, sep="_"),
1672
+##			    paste("phiA.se", uplate, sep="_"),			    
1673
+##			    paste("phiB", uplate, sep="_"),
1674
+##			    paste("phiB.se", uplate, sep="_"),			    
1675
+##			    paste("phiAX", uplate, sep="_"),
1676
+##			    paste("phiBX", uplate, sep="_"),			    
1677
+##			    paste("corr", uplate, sep="_"),
1678
+##			    paste("corrA.BB", uplate, sep="_"),
1679
+##			    paste("corrB.AA", uplate, sep="_"))
1680
+##	pMatrix <- data.frame(matrix(numeric(0),
1681
+##				     nrow(object),
1682
+##				     length(parameterNames)),
1683
+##				     row.names=featureNames(object))
1684
+##	colnames(pMatrix) <- parameterNames
1685
+##	fD2 <- cbind(fD, pMatrix)
1686
+##	new("AnnotatedDataFrame", data=fD2,
1687
+##	    varMetadata=data.frame(labelDescription=colnames(fD2),
1688
+##	    row.names=colnames(fD2)))
1689
+##}
1690
+##
1691
+##nonpolymorphic <- function(object, cnOptions, tmp.objects){
1692
+##	batch <- unique(object$batch)
1693
+##	CHR <- unique(chromosome(object))
1694
+##	goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){
1695
+##		Ns <- tmp.objects[["Ns"]]
1696
+##		##Ns <- get("Ns", envir)
1697
+##		flags <- getFlags(object, PHI.THR)
1698
+##		fewAA <- Ns[, "AA"] < nAA.THR
1699
+##		fewBB <- Ns[, "BB"] < nBB.THR
1700
+##		flagsA <- flags | fewAA
1701
+##		flagsB <- flags | fewBB
1702
+##		flags <- list(A=flagsA, B=flagsB)
1703
+##		return(flags)
1704
+##	}
1705
+##	nAA.THR <- cnOptions$nHOM.THR
1706
+##	nBB.THR <- cnOptions$nHOM.THR
1707
+##	PHI.THR <- cnOptions$PHI.THR
1708
+##	snpflags <- goodSnps(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR)
1709
+##	flagsA <- snpflags$A
1710
+##	flagsB <- snpflags$B
1711
+####	if(all(flagsA) | all(flagsB)) stop("all snps are flagged")
1712
+##	nuA <- getParam(object, "nuA", batch)
1713
+##	nuB <- getParam(object, "nuB", batch)
1714
+##	phiA <- getParam(object, "phiA", batch)
1715
+##	phiB <- getParam(object, "phiB", batch)	
1716
+##	sns <- sampleNames(object)
1717
+##	muA <- tmp.objects[["muA"]]
1718
+##	muB <- tmp.objects[["muB"]]
1719
+##	A <- A(object)
1720
+##	B <- B(object)
1721
+####	CA <- CA(object)
1722
+####	CB <- CB(object)
1723
+##	if(CHR == 23){
1724
+##		phiAX <- getParam(object, "phiAX", batch)
1725
+##		phiBX <- getParam(object, "phiBX", batch)
1726
+##	}
1727
+##	##---------------------------------------------------------------------------
1728
+##	## Train on unflagged SNPs
1729
+##	##---------------------------------------------------------------------------
1730
+##	##Might be best to train using the X chromosome, since for the
1731
+##	##X phi and nu have been adjusted for cross-hybridization
1732
+##	##plateInd <- plate == uplate[p]
1733
+##	##muA <- muA[!flagsA, p, c("A", "AA")]
1734
+##	##muB <- muB[!flagsB, p, c("B", "BB")]
1735
+##	muA <- muA[!flagsA, "AA"]
1736
+##	muB <- muB[!flagsB, "BB"]
1737
+##	X <- cbind(1, log2(c(muA, muB)))
1738
+##	Y <- log2(c(phiA[!flagsA], phiB[!flagsB]))
1739
+##	if(nrow(X) > 5000){
1740
+##		ix <- sample(1:nrow(X), 5000)
1741
+##	} else {
1742
+##		ix <- 1:nrow(X)
1743
+##	}
1744
+##	betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix]))
1745
+##	normal <- tmp.objects[["normal"]][!isSnp(object), ]	
1746
+##	if(CHR == 23){
1747
+##		##normalNP <- envir[["normalNP"]]
1748
+##		##normalNP <- normalNP[, plate==uplate[p]]
1749
+##		##nuT <- envir[["nuT"]]
1750
+##		##phiT <- envir[["phiT"]]
1751
+##		
1752
+##		##cnvs <- envir[["cnvs"]]
1753
+##                ##loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
1754
+##                ##cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
1755
+##		##cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ]
1756
+##
1757
+##		##For build Hg18
1758
+##		##http://genome.ucsc.edu/cgi-bin/hgGateway
1759
+##		##pseudo-autosomal regions on X
1760
+##		##chrX:1-2,709,520 and chrX:154584237-154913754, respectively
1761
+##		##par:pseudo-autosomal regions
1762
+##		pseudoAR <- position(object) < 2709520 | (position(object) > 154584237 & position(object) < 154913754)
1763
+##		##pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754)
1764
+##		##in case some of the cnProbes are not annotated
1765
+##		pseudoAR[is.na(pseudoAR)] <- FALSE
1766
+##		pseudoAR <- pseudoAR[!isSnp(object)]
1767
+##		##gender <- envir[["gender"]]
1768
+##		gender <- object$gender
1769
+##		obj1 <- object[!isSnp(object), ]
1770
+##		A.male <- A(obj1[, gender==1])
1771
+##		mu1 <- rowMedians(A.male, na.rm=TRUE)
1772
+##		##mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE)
1773
+##		##mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE)
1774
+##		A.female <- A(obj1[, gender==2])
1775
+##		mu2 <- rowMedians(A.female, na.rm=TRUE)
1776
+##		mus <- log2(cbind(mu1, mu2))
1777
+##		X.men <- cbind(1, mus[, 1])
1778
+##		X.fem <- cbind(1, mus[, 2])
1779
+##		
1780
+##		Yhat1 <- as.numeric(X.men %*% betahat)
1781
+##		Yhat2 <- as.numeric(X.fem %*% betahat)
1782
+##		phi1 <- 2^(Yhat1)
1783
+##		phi2 <- 2^(Yhat2)
1784
+##		nu1 <- 2^(mus[, 1]) - phi1
1785
+##		nu2 <- 2^(mus[, 2]) - 2*phi2
1786
+##
1787
+##		if(any(pseudoAR)){
1788
+##			nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR]
1789
+##		}
1790
+####		CT1 <- 1/phi1*(A.male-nu1)
1791
+####		CT2 <- 1/phi2*(A.female-nu2)
1792
+####		CA <- CA(obj1)
1793
+####		CA[, gender==1] <- CT1
1794
+####		CA[, gender==2] <- CT2
1795
+####		CA(object)[!isSnp(object), ] <- CA
1796
+##		##only using females to compute the variance
1797
+##		##normalNP[, gender=="male"] <- NA
1798
+##		normal[, gender==1] <- NA
1799
+##		sig2A <- getParam(object, "sig2A", batch)
1800
+##		normal.f <- normal[, object$gender==2]
1801
+##		sig2A[!isSnp(object)] <- rowMAD(log2(A.female*normal.f), na.rm=TRUE)^2
1802
+##		sig2A[!isSnp(object) & is.na(sig2A)] <- median(sig2A[!isSnp(object)], na.rm=TRUE)
1803
+##		##sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2
1804
+##		object <- pr(object, "sig2A", batch, sig2A)
1805
+##
1806
+##		nuA[!isSnp(object)] <- nu2
1807
+##		phiA[!isSnp(object)] <- phi2
1808
+##		
1809
+##		THR.NU.PHI <- cnOptions$THR.NU.PHI
1810
+##		if(THR.NU.PHI){
1811
+##			verbose <- cnOptions$verbose
1812
+##			##Assign values to object
1813
+##			object <- pr(object, "nuA", batch, nuA)
1814
+##			object <- pr(object, "phiA", batch, phiA)			
1815
+##			##if(verbose) message("Thresholding nu and phi")
1816
+##			object <- thresholdModelParams(object, cnOptions)
1817
+##		} else {
1818
+##			object <- pr(object, "nuA", batch, nuA)		
1819
+##			object <- pr(object, "phiA", batch, phiA)
1820
+##		}
1821
+##	} else {
1822
+##		A <- A(object)[!isSnp(object), ]
1823
+##		mus <- rowMedians(A * normal, na.rm=TRUE)
1824
+##		crosshyb <- max(median(muA) - median(mus), 0)
1825
+##		X <- cbind(1, log2(mus+crosshyb))
1826
+##		logPhiT <- X %*% betahat
1827
+##		phiA[!isSnp(object)] <- 2^(logPhiT)
1828
+##		nuA[!isSnp(object)] <- mus-2*phiA[!isSnp(object)]
1829
+##
1830
+##		THR.NU.PHI <- cnOptions$THR.NU.PHI
1831
+##		if(THR.NU.PHI){
1832
+##			verbose <- cnOptions$verbose
1833
+##			##Assign values to object
1834
+##			object <- pr(object, "nuA", batch, nuA)
1835
+##			object <- pr(object, "phiA", batch, phiA)			
1836
+##			##if(verbose) message("Thresholding nu and phi")
1837
+##			object <- thresholdModelParams(object, cnOptions)
1838
+##			##reassign values (now thresholded at MIN.NU and MIN.PHI
1839
+##			nuA <- getParam(object, "nuA", batch)
1840
+##			phiA <- getParam(object, "phiA", batch)
1841
+##		}
1842
+##		##CA(object)[!isSnp(object), ] <- 1/phiA[!isSnp(object)]*(A - nuA[!isSnp(object)])
1843
+##		sig2A <- getParam(object, "sig2A", batch)
1844
+##		sig2A[!isSnp(object)] <- rowMAD(log2(A*normal), na.rm=TRUE)^2
1845
+##		object <- pr(object, "sig2A", batch, sig2A)
1846
+##		##added
1847
+##		object <- pr(object, "nuA", batch, nuA)
1848
+##		object <- pr(object, "phiA", batch, phiA)
1849
+##	}
1850
+##	return(object)
1851
+##}
1852
+##
1853 1853
 ##sufficient statistics on the intensity scale
1854
-withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
1855
-	normal <- tmp.objects[["normal"]]
1856
-	## muA, muB: robust estimates of the within-genotype center (intensity scale)
1857
-	muA <- tmp.objects[["muA"]]
1858
-	muB <- tmp.objects[["muB"]]
1859
-	## vA, vB: robust estimates of the within-genotype variance (intensity scale)
1860
-	vA <- tmp.objects[["vA"]]
1861
-	vB <- tmp.objects[["vB"]]
1862
-	Ns <- tmp.objects[["Ns"]]
1863
-	G <- snpCall(object) 
1864
-	GT.CONF.THR <- cnOptions$GT.CONF.THR
1865
-	CHR <- unique(chromosome(object))
1866
-	A <- A(object)
1867
-	B <- B(object)
1868
-##	highConf <- (1-exp(-confs(object)/1000)) > GT.CONF.THR
1869
-	xx <- snpCallProbability(object)
1870
-	highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1871
-	##highConf <- confs(object) > GT.CONF.THR
1872
-	##highConf <- highConf > GT.CONF.THR
1873
-	if(CHR == 23){
1874
-		gender <- object$gender
1875
-##		gender <- envir[["gender"]]
1876
-		IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE)
1877
-##		IX <- IX == "female"
1878
-		IX <- IX == 2  ##2=female, 1=male
1879
-	} else IX <- matrix(TRUE, nrow(G), ncol(G))
1880
-	index <- GT.B <- GT.A <- vector("list", 3)
1881
-	names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
1882
-	##--------------------------------------------------
1883
-	##within-genotype sufficient statistics
1884
-	##--------------------------------------------------
1885
-	##GT.B <- GT.A <- list()
1886
-	snpIndicator <- matrix(isSnp(object), nrow(object), ncol(object)) ##RS: added
1887
-	for(j in 1:3){
1888
-		GT <- G==j & highConf & IX & snpIndicator
1889
-		GT <- GT * normal
1890
-		Ns[, j+2] <- rowSums(GT, na.rm=TRUE)				
1891
-		GT[GT == FALSE] <- NA
1892
-		GT.A[[j]] <- GT*A
1893
-		GT.B[[j]] <- GT*B
1894
-		index[[j]] <- which(Ns[, j+2] > 0 & isSnp(object)) ##RS: added		
1895
-		muA[, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE)
1896
-		muB[, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE)
1897
-		vA[, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE)
1898
-		vB[, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE)
1899
-
1900
-		##Shrink towards the typical variance
1901
-		DF <- Ns[, j+2]-1
1902
-		DF[DF < 1] <- 1
1903
-		v0A <- median(vA[, j+2], na.rm=TRUE)
1904
-		v0B <- median(vB[, j+2], na.rm=TRUE)
1905
-		if(v0A == 0) v0A <- NA
1906
-		if(v0B == 0) v0B <- NA
1907
-		DF.PRIOR <- cnOptions$DF.PRIOR
1908
-		vA[, j+2] <- (vA[, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF)
1909
-		vA[is.na(vA[, j+2]), j+2] <- v0A
1910
-		vB[, j+2] <- (vB[, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF)
1911
-		vB[is.na(vB[, j+2]), j+2] <- v0B
1912
-	}
1913
-	if(CHR == 23){
1914
-		k <- 1
1915
-		for(j in c(1,3)){
1916
-			GT <- G==j & highConf & !IX 
1917
-			Ns[, k] <- rowSums(GT)
1918
-			GT[GT == FALSE] <- NA
1919
-			muA[, k] <- rowMedians(GT*A, na.rm=TRUE)
1920
-			muB[, k] <- rowMedians(GT*B, na.rm=TRUE)
1921
-			vA[, k] <- rowMAD(GT*A, na.rm=TRUE)
1922
-			vB[, k] <- rowMAD(GT*B, na.rm=TRUE)
1923
-			
1924
-			DF <- Ns[, k]-1
1925
-			DF[DF < 1] <- 1
1926
-			v0A <- median(vA[, k], na.rm=TRUE)
1927
-			v0B <- median(vB[, k], na.rm=TRUE)
1928
-			vA[, k] <- (vA[, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF)
1929
-			vA[is.na(vA[, k]), k] <- v0A
1930
-			vB[, k] <- (vB[, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF)
1931
-			vB[is.na(vB[, k]), k] <- v0B			
1932
-			k <- k+1
1933
-		}
1934
-	}
1935
-	tmp.objects[["Ns"]] <- Ns
1936
-	tmp.objects[["vA"]] <- vA
1937
-	tmp.objects[["vB"]] <- vB
1938
-	tmp.objects[["muA"]] <- muA
1939
-	tmp.objects[["muB"]] <- muB
1940
-	tmp.objects$index <- index
1941
-	tmp.objects$GT.A <- GT.A
1942
-	tmp.objects$GT.B <- GT.B
1943
-	return(tmp.objects)
1944
-}
1945
-
1946
-imputeCenter <- function(muA, muB, index.complete, index.missing){
1947
-	index <- index.missing
1948
-	mnA <- muA
1949
-	mnB <- muB
1950
-	for(j in 1:3){
1951
-		if(length(index[[j]]) == 0) next()
1952
-		X <- cbind(1, mnA[index.complete,  -j, drop=FALSE], mnB[index.complete,  -j, drop=FALSE])
1953
-		Y <- cbind(mnA[index.complete, j], mnB[index.complete,  j])
1954
-		betahat <- solve(crossprod(X), crossprod(X,Y))
1955
-		X <- cbind(1, mnA[index[[j]],  -j, drop=FALSE],  mnB[index[[j]],  -j, drop=FALSE])
1956
-		mus <- X %*% betahat
1957
-		muA[index[[j]], j] <- mus[, 1]
1958
-		muB[index[[j]], j] <- mus[, 2]
1959
-	}
1960
-	list(muA, muB)
1961
-}
1962
-
1963
-imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
1964
-	index1 <- which(Ns[, 1] == 0 & Ns[, 3] > MIN.OBS)
1965
-	if(length(index1) > 0){
1966
-		X <- cbind(1, muA[index.complete[[1]], 3], muB[index.complete[[1]], 3])
1967
-		Y <- cbind(1, muA[index.complete[[1]], 1], muB[index.complete[[1]], 1])
1968
-		betahat <- solve(crossprod(X), crossprod(X,Y))
1969
-		##now with the incomplete SNPs
1970
-		X <- cbind(1, muA[index1, 3], muB[index1, 3])
1971
-		mus <- X %*% betahat
1972
-		muA[index1, 1] <- mus[, 2]
1973
-		muB[index1, 1] <- mus[, 3]
1974
-	}
1975
-	index1 <- which(Ns[, 3] == 0)
1976
-	if(length(index1) > 0){
1977
-		X <- cbind(1, muA[index.complete[[2]], 1], muB[index.complete[[2]], 1])
1978
-		Y <- cbind(1, muA[index.complete[[2]], 3], muB[index.complete[[2]], 3])
1979
-		betahat <- solve(crossprod(X), crossprod(X,Y))
1980
-		##now with the incomplete SNPs
1981
-		X <- cbind(1, muA[index1, 1], muB[index1, 1])
1982
-		mus <- X %*% betahat
1983
-		muA[index1, 3] <- mus[, 2]
1984
-		muB[index1, 3] <- mus[, 3]
1985
-	}
1986
-	list(muA, muB)
1987
-}
1854
+##withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
1855
+##	normal <- tmp.objects[["normal"]]
1856
+##	## muA, muB: robust estimates of the within-genotype center (intensity scale)
1857
+##	muA <- tmp.objects[["muA"]]
1858
+##	muB <- tmp.objects[["muB"]]
1859
+##	## vA, vB: robust estimates of the within-genotype variance (intensity scale)
1860
+##	vA <- tmp.objects[["vA"]]
1861
+##	vB <- tmp.objects[["vB"]]
1862
+##	Ns <- tmp.objects[["Ns"]]
1863
+##	G <- snpCall(object) 
1864
+##	GT.CONF.THR <- cnOptions$GT.CONF.THR
1865
+##	CHR <- unique(chromosome(object))
1866
+##	A <- A(object)
1867
+##	B <- B(object)
1868
+####	highConf <- (1-exp(-confs(object)/1000)) > GT.CONF.THR
1869
+##	xx <- snpCallProbability(object)
1870
+##	highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1871
+##	##highConf <- confs(object) > GT.CONF.THR
1872
+##	##highConf <- highConf > GT.CONF.THR
1873
+##	if(CHR == 23){
1874
+##		gender <- object$gender
1875
+####		gender <- envir[["gender"]]
1876
+##		IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE)
1877
+####		IX <- IX == "female"
1878
+##		IX <- IX == 2  ##2=female, 1=male
1879
+##	} else IX <- matrix(TRUE, nrow(G), ncol(G))
1880
+##	index <- GT.B <- GT.A <- vector("list", 3)
1881
+##	names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
1882
+##	##--------------------------------------------------
1883
+##	##within-genotype sufficient statistics
1884
+##	##--------------------------------------------------
1885
+##	##GT.B <- GT.A <- list()
1886
+##	snpIndicator <- matrix(isSnp(object), nrow(object), ncol(object)) ##RS: added
1887
+##	for(j in 1:3){
1888
+##		GT <- G==j & highConf & IX & snpIndicator
1889
+##		GT <- GT * normal
1890
+##		Ns[, j+2] <- rowSums(GT, na.rm=TRUE)				
1891
+##		GT[GT == FALSE] <- NA
1892
+##		GT.A[[j]] <- GT*A
1893
+##		GT.B[[j]] <- GT*B
1894
+##		index[[j]] <- which(Ns[, j+2] > 0 & isSnp(object)) ##RS: added		
1895
+##		muA[, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE)
1896
+##		muB[, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE)
1897
+##		vA[, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE)
1898
+##		vB[, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE)
1899
+##
1900
+##		##Shrink towards the typical variance
1901
+##		DF <- Ns[, j+2]-1
1902
+##		DF[DF < 1] <- 1
1903
+##		v0A <- median(vA[, j+2], na.rm=TRUE)
1904
+##		v0B <- median(vB[, j+2], na.rm=TRUE)
1905
+##		if(v0A == 0) v0A <- NA
1906
+##		if(v0B == 0) v0B <- NA
1907
+##		DF.PRIOR <- cnOptions$DF.PRIOR
1908
+##		vA[, j+2] <- (vA[, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF)
1909
+##		vA[is.na(vA[, j+2]), j+2] <- v0A
1910
+##		vB[, j+2] <- (vB[, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF)
1911
+##		vB[is.na(vB[, j+2]), j+2] <- v0B
1912
+##	}
1913
+##	if(CHR == 23){
1914
+##		k <- 1
1915
+##		for(j in c(1,3)){
1916
+##			GT <- G==j & highConf & !IX 
1917
+##			Ns[, k] <- rowSums(GT)
1918
+##			GT[GT == FALSE] <- NA
1919
+##			muA[, k] <- rowMedians(GT*A, na.rm=TRUE)
1920
+##			muB[, k] <- rowMedians(GT*B, na.rm=TRUE)
1921
+##			vA[, k] <- rowMAD(GT*A, na.rm=TRUE)
1922
+##			vB[, k] <- rowMAD(GT*B, na.rm=TRUE)
1923
+##			
1924
+##			DF <- Ns[, k]-1
1925
+##			DF[DF < 1] <- 1
1926
+##			v0A <- median(vA[, k], na.rm=TRUE)
1927
+##			v0B <- median(vB[, k], na.rm=TRUE)
1928
+##			vA[, k] <- (vA[, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF)
1929
+##			vA[is.na(vA[, k]), k] <- v0A
1930
+##			vB[, k] <- (vB[, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF)
1931
+##			vB[is.na(vB[, k]), k] <- v0B			
1932
+##			k <- k+1
1933
+##		}
1934
+##	}
1935
+##	tmp.objects[["Ns"]] <- Ns
1936
+##	tmp.objects[["vA"]] <- vA
1937
+##	tmp.objects[["vB"]] <- vB
1938
+##	tmp.objects[["muA"]] <- muA
1939
+##	tmp.objects[["muB"]] <- muB
1940
+##	tmp.objects$index <- index
1941
+##	tmp.objects$GT.A <- GT.A
1942
+##	tmp.objects$GT.B <- GT.B
1943
+##	return(tmp.objects)
1944
+##}
1988 1945
 
1989
-oneBatch <- function(object, cnOptions, tmp.objects){
1990
-	muA <- tmp.objects[["muA"]]
1991
-	muB <- tmp.objects[["muB"]]
1992
-	Ns <- tmp.objects[["Ns"]]
1993
-	CHR <- unique(chromosome(object))
1994
-	##---------------------------------------------------------------------------
1995
-	## Impute sufficient statistics for unobserved genotypes (plate-specific)
1996
-	##---------------------------------------------------------------------------
1997
-	MIN.OBS <- cnOptions$MIN.OBS
1998
-	index.AA <- which(Ns[, "AA"] >= MIN.OBS)
1999
-	index.AB <- which(Ns[, "AB"] >= MIN.OBS)
2000
-	index.BB <- which(Ns[, "BB"] >= MIN.OBS)
2001
-	correct.orderA <- muA[, "AA"] > muA[, "BB"]
2002
-	correct.orderB <- muB[, "BB"] > muB[, "AA"]
2003
-	##For chr X, this will ignore the males 
2004
-	nobs <- rowSums(Ns[, 3:5] >= MIN.OBS, na.rm=TRUE) == 3
2005
-	index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here
2006
-	size <- min(5000, length(index.complete))
2007
-	if(size == 5000) index.complete <- sample(index.complete, 5000)
2008
-	if(length(index.complete) < 200){
2009
-		stop("fewer than 200 snps pass criteria for predicting the sufficient statistics")
2010
-	}
2011
-	index <- tmp.objects[["index"]]
2012
-	index[[1]] <- which(Ns[, "AA"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS))
2013
-	index[[2]] <- which(Ns[, "AB"] == 0 & (Ns[, "AA"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS))
2014
-	index[[3]] <- which(Ns[, "BB"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "AA"] >= MIN.OBS))
2015
-	mnA <- muA[, 3:5]
2016
-	mnB <- muB[, 3:5]
2017
-	for(j in 1:3){
2018
-		if(length(index[[j]]) == 0) next()
2019
-		X <- cbind(1, mnA[index.complete,  -j, drop=FALSE], mnB[index.complete,  -j, drop=FALSE])
2020
-		Y <- cbind(mnA[index.complete, j], mnB[index.complete,  j])
2021
-		betahat <- solve(crossprod(X), crossprod(X,Y))
2022
-		X <- cbind(1, mnA[index[[j]],  -j, drop=FALSE],  mnB[index[[j]],  -j, drop=FALSE])
2023
-		mus <- X %*% betahat
2024
-		muA[index[[j]], j+2] <- mus[, 1]
2025
-		muB[index[[j]], j+2] <- mus[, 2]
2026
-	}
2027
-	nobsA <- Ns[, "A"] > MIN.OBS
2028
-	nobsB <- Ns[, "B"] > MIN.OBS
2029
-	notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"]))
2030
-	complete <- list()
2031
-	complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
2032
-	complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here	
2033
-	size <- min(5000, length(complete[[1]]))
2034
-	if(size > 5000) complete <- lapply(complete, function(x) sample(x, size))
2035
-	if(CHR == 23){
2036
-		index <- list()
2037
-		index[[1]] <- which(Ns[, "A"] == 0)
2038
-		index[[2]] <- which(Ns[, "B"] == 0)
2039
-		cols <- 2:1
2040
-		for(j in 1:2){
2041
-			if(length(index[[j]]) == 0) next()
2042
-			X <- cbind(1, muA[complete[[j]], cols[j]], muB[complete[[j]], cols[j]])
2043
-			Y <- cbind(muA[complete[[j]], j], muB[complete[[j]], j])
2044
-			betahat <- solve(crossprod(X), crossprod(X,Y))
2045
-			X <- cbind(1, muA[index[[j]], cols[j]],  muB[index[[j]], cols[j]])
2046
-			mus <- X %*% betahat
2047
-			muA[index[[j]], j] <- mus[, 1]
2048
-			muB[index[[j]], j] <- mus[, 2]
2049
-		}
2050
-	}
2051
-	##missing two genotypes
2052
-	noAA <- Ns[, "AA"] < MIN.OBS
2053
-	noAB <- Ns[, "AB"] < MIN.OBS
2054
-	noBB <- Ns[, "BB"] < MIN.OBS
2055
-	index[[1]] <- noAA & noAB
2056
-	index[[2]] <- noBB & noAB
2057
-	index[[3]] <- noAA & noBB
2058
-##	snpflags <- envir[["snpflags"]]
2059
-	snpflags <- index[[1]] | index[[2]] | index[[3]]
2060
-##	snpflags[, p] <- index[[1]] | index[[2]] | index[[3]]
2061
-
2062
-	##---------------------------------------------------------------------------
2063
-	## Two genotype clusters not observed -- would sequence help? (didn't seem to)
2064
-	## 1. extract index of complete data
2065
-	## 2. Regress  mu_missing ~ sequence + mu_observed
2066
-	## 3. solve for nu assuming the median is 2
2067
-	##---------------------------------------------------------------------------
2068
-	cols <- c(3, 1, 2)
2069