Browse code

changes to computeCopynumber function. Exporting wrapper for extracting dates from the cel file header

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38183 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 25/03/2009 13:12:49
Showing 9 changed files

... ...
@@ -28,3 +28,27 @@ BioC data packages
28 28
 
29 29
 * Loaded data in cnrma-functions.R to an environment and extracted from there
30 30
   so we can get rid of the NOTES complaining about 'no visible bindings'.
31
+
32
+2009-03-18 Rob Scharpf <rscharpf@jhsph.edu> - committed version 1.0.57
33
+* Added steps object to instantiateObjects to speed up debugging
34
+* Reformulated the regression for chromosome X
35
+   -  estimate cross-hybridization using chromosome X
36
+* Take into account pseudo-autosomal regions on chr X for copy number estimation
37
+* Replaced get() and assign() with '<-' operations to improve readability
38
+* Function to compute posterior means of copy number estimates
39
+
40
+2009-03-19 Rob Scharpf - committed version 1.0.59
41
+* Requires genomewidesnp6Crlmm version 1.0.1 or greater
42
+
43
+2009-03-25 Rob Scharpf - committed version 1.0.60
44
+* simplified some of the preliminary steps for the computeCopynumber function
45
+** crlmm output subset within the body of the function
46
+* modified vignette -- store results in an oligoSnpSet object (for now)
47
+* added function to extract the date from the celfile headers (celDates)
48
+
49
+
50
+
51
+
52
+
53
+
54
+
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for SNP 5.0 and 6.0 arrays.
4
-Version: 1.0.59
4
+Version: 1.0.60
5 5
 Date: 2008-12-28
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>
... ...
@@ -1,5 +1,5 @@
1 1
 useDynLib("crlmm", .registration=TRUE)
2
-export("crlmm", "list.celfiles", "computeCopynumber", "cnrma")
2
+export("crlmm", "list.celfiles", "computeCopynumber", "cnrma", "celDates")
3 3
 exportMethods("list2crlmmSet", "calls", "confs")
4 4
 exportClasses("crlmmSet")
5 5
 import(Biobase)
... ...
@@ -22,21 +22,34 @@ rowCors <- function(x, y, ...){
22 22
 	return(covar/(sd.x*sd.y))
23 23
 }
24 24
 
25
-nuphiAllele <- function(allele, Ystar, W, envir, p){
26
-	Ns <- get("Ns", envir)
27
-	NOHET <- mean(Ns[, p, 2], na.rm=TRUE) < 0.05
28
-	chrom <- get("chrom", envir)
25
+generateX <- function(w, X) as.numeric(diag(w) %*% X)
26
+generateIXTX <- function(x, nrow=3) {
27
+	X <- matrix(x, nrow=nrow)
28
+	XTX <- crossprod(X)
29
+	solve(XTX)
30
+}
31
+nuphiAllele <- function(p, allele, Ystar, W, envir){
32
+	Ns <- envir[["Ns"]]
33
+	CHR <- envir[["chrom"]]
34
+	nuA <- envir[["nuA"]]
35
+	nuB <- envir[["nuB"]]
36
+	nuA.se <- envir[["nuA.se"]]
37
+	nuB.se <- envir[["nuB.se"]]
38
+	phiA <- envir[["phiA"]]
39
+	phiB <- envir[["phiB"]]
40
+	phiA.se <- envir[["phiA.se"]]
41
+	phiB.se <- envir[["phiB.se"]]
42
+	if(CHR==23){
43
+		phiAx <- envir[["phiAx"]]
44
+		phiBx <- envir[["phiBx"]]
45
+	}
46
+	complete <- rowSums(is.na(W)) == 0
47
+	NOHET <- mean(Ns[, p, "AB"], na.rm=TRUE) < 0.05
29 48
 	if(missing(allele)) stop("must specify allele")
30
-	if(chrom == 23){
31
-		gender <- get("gender", envir)
32
-		if(allele == "A" & gender == "female")
33
-			X <- cbind(1, 2:0)
34
-		if(allele == "B" & gender == "female")
35
-			X <- cbind(1, 0:2)
36
-		if(allele == "A" & gender == "male")
37
-			X <- cbind(1, 1:0)
38
-		if(allele == "B" & gender == "male")
39
-			X <- cbind(1, 0:1)
49
+	if(CHR == 23){
50
+		gender <- envir[["gender"]]
51
+		if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
52
+		if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))			
40 53
 	} else {##autosome
41 54
 		if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2)
42 55
 		if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous		
... ...
@@ -47,40 +60,57 @@ nuphiAllele <- function(allele, Ystar, W, envir, p){
47 60
 		stop("Inf values in W or V")
48 61
 	}
49 62
 	##How to quickly generate Xstar, Xstar = diag(W) %*% X
50
-	generateX <- function(w, X) as.numeric(diag(w) %*% X)
51
-	##Not instant
52 63
 	Xstar <- apply(W, 1, generateX, X)
53
-	generateIXTX <- function(x, nrow=3) {
54
-		X <- matrix(x, nrow=nrow)
55
-		XTX <- crossprod(X)
56
-		solve(XTX)
57
-	}
58
-	##a little slow
59 64
 	IXTX <- apply(Xstar, 2, generateIXTX, nrow=nrow(X))
60
-	betahat <- matrix(NA, 2, nrow(Ystar))
61
-	ses <- matrix(NA, 2, nrow(Ystar))
65
+	if(CHR == 23){
66
+		betahat <- matrix(NA, 3, nrow(Ystar))
67
+		ses <- matrix(NA, 3, nrow(Ystar))		
68
+	} else{
69
+		betahat <- matrix(NA, 2, nrow(Ystar))
70
+		ses <- matrix(NA, 2, nrow(Ystar))
71
+	}
62 72
 	for(i in 1:nrow(Ystar)){
63
-		betahat[, i] <- crossprod(matrix(IXTX[, i], 2, 2), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ]))
64
-		ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), 2) %*% matrix(betahat[, i], 2, 1))^2)
65
-		ses[, i] <- sqrt(diag(matrix(IXTX[, i], 2, 2) * ssr))
73
+		betahat[, i] <- crossprod(matrix(IXTX[, i], ncol(X), ncol(X)), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ]))
74
+		ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
75
+		ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr))
76
+	}
77
+	if(allele == "A"){
78
+		nuA[complete, p] <- betahat[1, ]
79
+		phiA[complete, p] <- betahat[2, ]
80
+		nuA.se[complete, p] <- ses[1, ]
81
+		phiA.se[complete, p] <- ses[2, ]
82
+		envir[["nuA"]] <- nuA
83
+		envir[["phiA"]] <- phiA
84
+		envir[["nuA.se"]] <- nuA.se
85
+		envir[["phiA.se"]] <- phiA.se
86
+		if(CHR == 23){
87
+			phiAx[complete, p] <- betahat[3, ]
88
+			envir[["phiAx"]] <- phiAx
89
+		}
90
+	}
91
+	if(allele=="B"){
92
+		nuB[complete, p] <- betahat[1, ]
93
+		phiB[complete, p] <- betahat[2, ]
94
+		nuB.se[complete, p] <- ses[1, ]
95
+		phiB.se[complete, p] <- ses[2, ]
96
+		envir[["nuB"]] <- nuB
97
+		envir[["phiB"]] <- phiB
98
+		envir[["nuB.se"]] <- nuB.se
99
+		envir[["phiB.se"]] <- phiB.se
100
+		if(CHR == 23){
101
+			phiBx[complete, p] <- betahat[3, ]
102
+			envir[["phiBx"]] <- phiBx
103
+		}
66 104
 	}
67
-	nu <- betahat[1, ]
68
-	phi <- betahat[2, ]
69
-	nu.se <- ses[1,]
70
-	phi.se <- ses[2,]
71
-	##dimnames(nu) <- dimnames(phi) <- dimnames(nu.ses) <- dimnames(phi.ses)
72
-	assign("nu", nu, envir=envir)
73
-	assign("phi", phi, envir=envir)
74
-	assign("nu.se", nu.se, envir=envir)
75
-	assign("phi.se", phi.se, envir=envir)	
76 105
 }
77 106
 
78
-celDatesFrom <- function(celfiles, path=""){
107
+celDates <- function(celfiles){
108
+	if(!all(file.exists(celfiles))) stop("1 or more cel file does not exist")
79 109
 	celdates <- vector("character", length(celfiles))
80 110
 	celtimes <- vector("character", length(celfiles))
81 111
 	for(i in seq(along=celfiles)){
82 112
 		if(i %% 100 == 0) cat(".")
83
-		tmp <- read.celfile.header(file.path(path, celfiles[i]), info="full")$DatHeader
113
+		tmp <- read.celfile.header(celfiles[i], info="full")$DatHeader
84 114
 		tmp <- strsplit(tmp, "\ +")
85 115
 		celdates[i] <- tmp[[1]][6]
86 116
 		celtimes[i] <- tmp[[1]][7]
... ...
@@ -91,14 +121,13 @@ celDatesFrom <- function(celfiles, path=""){
91 121
 }
92 122
 
93 123
 cnrma <- function(filenames, sns, cdfName, seed=1, verbose=FALSE){
94
-  ## BC: 03/14/09
95
-  ## getting pkgname from cdfName, in the future this might be useful
96
-  ## as the method might be implemented for other platforms
97
-  pkgname <- getCrlmmAnnotationName(cdfName)
98
-
124
+	## BC: 03/14/09
125
+	## getting pkgname from cdfName, in the future this might be useful
126
+	## as the method might be implemented for other platforms
127
+	pkgname <- getCrlmmAnnotationName(cdfName)
99 128
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
100 129
 	if (missing(sns)) sns <- basename(filenames)
101
-  ## Loading data in .crlmmPkgEnv and extracting from there
130
+	## Loading data in .crlmmPkgEnv and extracting from there
102 131
 	data("npProbesFid", package=pkgname, envir=.crlmmPkgEnv)
103 132
 	fid <- getVarInEnv("npProbesFid")
104 133
 	gc()
... ...
@@ -113,7 +142,7 @@ cnrma <- function(filenames, sns, cdfName, seed=1, verbose=FALSE){
113 142
 	}
114 143
 	##load reference distribution obtained from hapmap
115 144
 	data(list="1m_reference_cn", package="genomewidesnp6Crlmm", envir=.crlmmPkgEnv)
116
-  reference <- getVarInEnv("reference")
145
+	reference <- getVarInEnv("reference")
117 146
 	for(i in seq(along=filenames)){
118 147
 		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
119 148
 		x <- log2(y[idx2])
... ...
@@ -137,412 +166,471 @@ getFlags <- function(phi.thr, envir){
137 166
 	phiB <- get("phiB", envir)
138 167
 	
139 168
 	negativeNus <- nuA < 1 | nuB < 1
140
-	negativeNus <- negativeNus > 0
141
-
142 169
 	negativePhis <- phiA < phi.thr | phiB < phi.thr
143
-	negativePhis <- negativePhis > 0
144 170
 	negativeCoef <- negativeNus | negativePhis
145 171
 
146 172
 	notfinitePhi <- !is.finite(phiA) | !is.finite(phiB)
147
-	notfinitePhi <- notfinitePhi > 0
148
-	flags <- negativeCoef | notfinitePhi  
173
+	flags <- negativeCoef | notfinitePhi
174
+	return(flags)
149 175
 }
150 176
 
151 177
 goodSnps <- function(phi.thr, envir, fewAA=20, fewBB=20){
152 178
 	Ns <- get("Ns", envir)
153 179
 	flags <- getFlags(phi.thr=phi.thr, envir)
154
-	fewAA <- Ns[, , 1] < fewAA
155
-	fewBB <- Ns[, , 3] < fewBB
180
+	fewAA <- Ns[, , "AA"] < fewAA
181
+	fewBB <- Ns[, , "BB"] < fewBB
156 182
 	flagsA <- flags | fewAA
157 183
 	flagsB <- flags | fewBB
158 184
 	flags <- list(A=flagsA, B=flagsB)
159 185
 	return(flags)
160 186
 }
161 187
 
162
-instantiateObjects <- function(calls, NP, plate, envir, chrom){
163
-	if(missing(chrom)) stop("must specify chrom")
164
-	assign("chrom", chrom, envir)
188
+instantiateObjects <- function(calls, NP, plate, envir, chrom, A, B,
189
+			       gender, SNRmin=5, SNR){
190
+	envir[["chrom"]] <- chrom
191
+	CHR_INDEX <- paste(chrom, "index", sep="")
192
+	data(list=CHR_INDEX, package="genomewidesnp6Crlmm")	
193
+	A <- A[index[[1]], SNR > SNRmin]
194
+	B <- B[index[[1]], SNR > SNRmin]
195
+	calls <- calls[index[[1]], SNR > SNRmin]
196
+	conf <- conf[index[[1]], SNR > SNRmin]
197
+	NP <- NP[index[[2]], SNR > SNRmin]
198
+	plate <- plate[SNR > SNRmin]
199
+	uplate <- unique(plate)
200
+	SNR <- SNR[SNR > SNRmin]
201
+	
202
+	envir[["uplate"]] <- uplate
203
+	envir[["plate"]] <- plate	
204
+	envir[["NP"]] <- NP
205
+	envir[["A"]] <- A
206
+	envir[["B"]] <- B
207
+	envir[["calls"]] <- calls
208
+	envir[["conf"]] <- conf
165 209
 	snps <- rownames(calls)
166 210
 	cnvs <- rownames(NP)
167 211
 	sns <- basename(colnames(calls))
168 212
 	stopifnot(identical(colnames(calls), colnames(NP)))
169
-	assign("sns", sns, envir)	
170
-	assign("snps", snps, envir)
171
-	assign("cnvs", cnvs, envir)	
213
+	envir[["sns"]] <- sns
214
+	envir[["snps"]] <- snps
215
+	envir[["cnvs"]] <- cnvs
172 216
 
217
+	if(chrom == 23){
218
+		if(is.null(gender)){
219
+			message("Estimating gender")
220
+			XMedian <- apply(log2(A[, , drop=FALSE]) + log2(B[, , drop=FALSE]), 2, median)/2
221
+			gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRmin]), max(XMedian[SNR>SNRmin])))[["cluster"]]			
222
+			##gender <- getGender(res)
223
+			gender[gender==2] <- "female"
224
+			gender[gender=="1"] <- "male"
225
+			envir[["gender"]] <- gender
226
+		} else envir[["gender"]] <- gender
227
+		phiAx <- matrix(NA, nrow(calls), length(uplate))
228
+		envir[["phiAx"]] <- phiAx
229
+		envir[["phiBx"]] <- phiAx
230
+	}
173 231
 	CA <- CB <- matrix(NA, nrow(calls), ncol(calls))
174
-	assign("plate", plate, envir)
175
-	uplate <- unique(plate)	
176
-	Ns <- array(NA, dim=c(nrow(calls), length(uplate), 3))
177
-	dimnames(Ns)[[3]] <- c("AA", "AB", "BB")
178
-	muA.AA <- matrix(NA, nrow=nrow(calls), length(uplate))
179
-	##Sigma.AA <- array(NA, dim=c(nrow(calls), 3, length(uplate)))
180
-	##dimnames(Sigma.AA)[2:3] <- list(c("varA", "varB", "cov"), uplate)
232
+	envir[["CA"]] <- CA
233
+	envir[["CB"]] <- CB
234
+	
235
+	Ns <- array(NA, dim=c(nrow(calls), length(uplate), 5))
236
+	dimnames(Ns)[[3]] <- c("A", "B", "AA", "AB", "BB")
237
+
238
+	envir[["Ns"]] <- envir[["muB"]] <- envir[["muA"]] <- Ns
239
+	envir[["vB"]] <- envir[["vA"]] <- Ns
181 240
 
182
-	NP.CT <- matrix(NA, nrow(NP), ncol(NP))
183
-	NP.sds <- matrix(NA, nrow(NP), ncol(NP))
184
-	assign("NP.CT", NP.CT, envir)
185
-	assign("NP.sds", NP.sds, envir)
186
-	nus <- matrix(NA, nrow(NP), length(uplate))
187
-	assign("nus", nus, envir=envir)  
188
-	assign("phis", nus, envir=envir)
241
+	CT.sds <- CT <- matrix(NA, nrow(NP), length(sns))
242
+	##NP.CT <- matrix(NA, nrow(NP), ncol(NP))
243
+	##NP.sds <- matrix(NA, nrow(NP), ncol(NP))
244
+	envir[["CT"]] <- CT
245
+	envir[["CT.sds"]] <- CT.sds
246
+
247
+	##assign("NP.CT", NP.CT, envir)
248
+	##assign("NP.sds", NP.sds, envir)
249
+	nuT <- matrix(NA, nrow(NP), length(uplate))
250
+	phiT <- nuT
251
+	envir[["nuT"]] <- nuT
252
+	envir[["phiT"]] <- phiT
253
+	##assign("nus", nus, envir=envir)  
254
+	##assign("phis", nus, envir=envir)
189 255
 
190 256
 	plates.completed <- rep(FALSE, length(uplate))
191
-	assign("plates.completed", plates.completed, envir)
192
-	
193
-	fit.variance <- NULL
194
-	assign("fit.variance", fit.variance, envir=envir)
195
-	npflags <- snpflags <- vector("list", length(uplate))	
196
-	assign("snpflags", snpflags, envir=envir)
197
-	assign("npflags", npflags, envir=envir)
198
-	
199
-	assign("Ns", Ns, envir=envir)		
200
-	assign("uplate", uplate, envir=envir)	
201
-	assign("muA.AA", muA.AA, envir=envir)
202
-	assign("muA.AB", muA.AA, envir=envir)
203
-	assign("muA.BB", muA.AA, envir=envir)
204
-	assign("muB.AA", muA.AA, envir=envir)
205
-	assign("muB.AB", muA.AA, envir=envir)
206
-	assign("muB.BB", muA.AA, envir=envir)
207
-	
208
-	assign("tau2A", muA.AA, envir=envir)
209
-	assign("tau2B", muA.AA, envir=envir)
210
-	assign("sig2A", muA.AA, envir=envir)
211
-	assign("sig2B", muA.AA, envir=envir)
212
-	assign("corr", muA.AA, envir=envir)
213
-	assign("corrA.BB", muA.AA, envir=envir)
214
-	assign("corrB.AA", muA.AA, envir=envir)			
215
-	
216
-	assign("nuA", muA.AA, envir=envir)
217
-	assign("nuB", muA.AA, envir=envir)
218
-	assign("phiA", muA.AA, envir=envir)
219
-	assign("phiB", muA.AA, envir=envir)
220
-	assign("nuA.se", muA.AA, envir=envir)
221
-	assign("nuB.se", muA.AA, envir=envir)
222
-	assign("phiA.se", muA.AA, envir=envir)
223
-	assign("phiB.se", muA.AA, envir=envir)
224
-	assign("sd.CT", CA, envir=envir)
225
-	assign("CA", CA, envir=envir)
226
-	assign("CB", CB, envir=envir)
257
+	envir[["plates.completed"]] <- plates.completed
258
+
259
+	steps <- rep(FALSE, 4)
260
+	names(steps) <- c("suffStats", "coef", "snp-cn", "np-cn")
261
+	envir[["steps"]] <- steps
262
+
263
+	snpflags <- matrix(FALSE, length(snps), length(uplate))
264
+	npflags <- matrix(FALSE, length(cnvs), length(uplate))
265
+	##assign("snpflags", snpflags, envir=envir)
266
+	##assign("npflags", npflags, envir=envir)
267
+	envir[["snpflags"]] <- snpflags
268
+	envir[["npflags"]] <- npflags
269
+
270
+	tau2A <- matrix(NA, nrow(calls), length(uplate))
271
+	envir[["tau2A"]] <- tau2A
272
+	envir[["tau2B"]] <- tau2A
273
+	envir[["sig2A"]] <- tau2A
274
+	envir[["sig2B"]] <- tau2A
275
+	sig2T <- matrix(NA, nrow(NP), ncol(NP))
276
+	envir[["sig2T"]] <- sig2T
277
+	envir[["corr"]] <- tau2A
278
+	envir[["corrA.BB"]] <- tau2A
279
+	envir[["corrB.AA"]] <- tau2A
280
+	envir[["nuB"]] <- envir[["nuA"]] <- tau2A
281
+	envir[["phiB"]] <- envir[["phiA"]] <- tau2A
282
+	envir[["nuB.se"]] <- envir[["nuA.se"]] <- tau2A
283
+	envir[["phiB.se"]] <- envir[["phiA.se"]] <- tau2A
284
+	normal <- matrix(TRUE, nrow(A), ncol(A))
285
+	normalNP <- matrix(TRUE, nrow(NP), ncol(NP))	
286
+	envir[["normal"]] <- normal
287
+	envir[["normalNP"]] <- normalNP
227 288
 }
228 289
 
229 290
 
230 291
 
231
-computeCopynumber <- function(A,
292
+computeCopynumber <- function(chrom,
293
+			      A,
232 294
 			      B,
233 295
 			      calls,
234 296
 			      conf,
235 297
 			      NP,
236 298
 			      plate,
237
-			      fit.variance=NULL,
238 299
 			      MIN.OBS=1,
239 300
 			      envir,
240
-			      chrom, P, DF.PRIOR=50, CONF.THR=0.99,
241
-			      trim=0, upperTail=TRUE,
301
+			      P,
302
+			      DF.PRIOR=50,
303
+			      CONF.THR=0.99,
242 304
 			      bias.adj=FALSE,
243
-			      priorProb, ...){
244
-	if(length(ls(envir)) == 0) instantiateObjects(calls, NP, plate, envir, chrom)
305
+			      priorProb,
306
+			      gender=NULL,
307
+			      SNR,
308
+			      SNRmin=5, seed=123, verbose=TRUE, ...){
309
+	set.seed(seed)
310
+	if(missing(chrom)) stop("must specify chromosome")
311
+	if(length(ls(envir)) == 0) {
312
+		instantiateObjects(calls=calls, NP=NP, plate=plate,
313
+				   envir=envir, chrom=chrom, A=A, B=B,
314
+				   gender=gender, SNR=SNR, SNRmin=SNRmin)
315
+	}
316
+	plate <- envir[["plate"]]
317
+	uplate <- envir[["uplate"]]	
318
+	calls <- envir[["calls"]]
319
+	A <- envir[["A"]]
320
+	B <- envir[["B"]]
321
+	conf <- envir[["conf"]]
322
+	NP <- envir[["NP"]]
245 323
 	if(bias.adj){
246 324
 		##assign uniform priors for total copy number states
247 325
 		if(missing(priorProb)) priorProb <- rep(1/4, 4)
326
+		envir[["steps"]] <- rep(FALSE, 4)
248 327
 	}
249 328
 	##will be updating these objects
250
-	uplate <- get("uplate", envir)
251 329
 	message("Sufficient statistics")
252 330
 	if(missing(P)) P <- seq(along=uplate)
253
-	for(p in P){
254
-		cat(".")
255
-		if(sum(plate == uplate[p]) < 10) next()
256
-		oneBatch(plateIndex=p,
257
-			 G=calls[, plate==uplate[p]],
258
-			 A=A[, plate==uplate[p]],
259
-			 B=B[, plate==uplate[p]],
260
-			 conf=conf[, plate==uplate[p]],
261
-			 CONF.THR=CONF.THR,
262
-			 envir=envir,
263
-			 MIN.OBS=MIN.OBS,
264
-			 DF.PRIOR=DF.PRIOR,
265
-			 trim=trim, upperTail=upperTail,
266
-			 bias.adj=bias.adj)
331
+	steps <- envir[["steps"]]
332
+	if(!steps[1]){
333
+		for(p in P){
334
+			cat(".")
335
+			if(sum(plate == uplate[p]) < 10) next()
336
+			J <- plate==uplate[p]
337
+			oneBatch(plateIndex=p,
338
+				 A=A[, J],
339
+				 B=B[, J],
340
+				 calls=calls[, J],
341
+				 conf=conf[, J],
342
+				 gender=NULL,
343
+				 NP[, J],
344
+				 plate[J],
345
+				 MIN.OBS=1,
346
+				 envir=envir,
347
+				 DF.PRIOR=DF.PRIOR,
348
+				 CONF.THR=CONF.THR,
349
+				 bias.adj=bias.adj,
350
+				 priorProb=priorProb,...)
351
+		}
352
+		steps[1] <- TRUE
353
+		envir[["steps"]] <- steps
267 354
 	}
268
-	message("\nEstimating coefficients")	
269
-	for(p in P){
270
-		cat(".")
271
-		coefs(plateIndex=p, conf=conf[, plate==uplate[p]],
272
-		      envir=envir, CONF.THR=CONF.THR, MIN.OBS=MIN.OBS)
355
+	if(!steps[2]){
356
+		message("\nEstimating coefficients")	
357
+		for(p in P){
358
+			cat(".")
359
+			coefs(plateIndex=p, conf=conf[, plate==uplate[p]],
360
+			      envir=envir, CONF.THR=CONF.THR, MIN.OBS=MIN.OBS)
361
+		}
362
+		steps[2] <- TRUE
363
+		envir[["steps"]] <- steps		
273 364
 	}
274
-	message("\nAllele specific copy number")	
275
-	for(p in P){
276
-		cat(".")
277
-		polymorphic(plateIndex=p,
278
-			    A=A[, plate==uplate[p]],
279
-			    B=B[, plate==uplate[p]],
280
-			    envir=envir)
365
+	if(!steps[3]){
366
+		message("\nAllele specific copy number")	
367
+		for(p in P){
368
+			cat(".")
369
+			polymorphic(plateIndex=p,
370
+				    A=A[, plate==uplate[p]],
371
+				    B=B[, plate==uplate[p]],
372
+				    envir=envir)
373
+		}
374
+		steps[3] <- TRUE
375
+		envir[["snp-cn"]] <- steps						
281 376
 	}
282
-	message("\nCopy number for nonpolymorphic probes...")	
283
-	for(p in P){
284
-		cat(".")
285
-		nonpolymorphic(plateIndex=p,
286
-			       NP=NP[, plate==uplate[p]],
287
-			       envir=envir)
377
+	if(!steps[4]){
378
+		message("\nCopy number for nonpolymorphic probes...")	
379
+		for(p in P){
380
+			cat(".")
381
+			nonpolymorphic(plateIndex=p,
382
+				       NP=NP[, plate==uplate[p]],
383
+				       envir=envir)
384
+		}
385
+		steps[4] <- TRUE
386
+		envir[["np-cn"]] <- steps
288 387
 	}
289
-##	snpflags <- get("snpflags", envir)
290
-##	npflags <- get("npflags", envir)
291
-##	flags <- sapply(snpflags, length)
292
-##	flags.np <- sapply(npflags, length)
293
-##	if(any(flags > 0) | any(flags.np > 0))
294
-##		warning("some SNPs were flagged -- possible NAs.  Check the indices in snpflags and npflags")
295 388
 }
296 389
 
297
-nonpolymorphic <- function(plateIndex, NP, envir){
390
+nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50){
298 391
 	p <- plateIndex
299
-	plate <- get("plate", envir)
300
-	uplate <- get("uplate", envir)	
301
-	plates.completed <- get("plates.completed", envir)
392
+	CHR <- envir[["chrom"]]
393
+	plate <- envir[["plate"]]
394
+	uplate <- envir[["uplate"]]
395
+	plates.completed <- envir[["plates.completed"]]
302 396
 	if(!plates.completed[p]) return()
303
-	##snpflags <- get("snpflags", envir)
304
-	##if(is.null(snpflags)){
305
-	snpflags <- goodSnps(phi.thr=10, envir=envir, fewAA=10, fewBB=10)
306
-	##assign("snpflags", snpflags, envir=envir)
397
+	snpflags <- goodSnps(phi.thr=2^6, envir=envir, fewAA=10, fewBB=10)
307 398
 	flagsA <- snpflags$A[, p]
308 399
 	flagsB <- snpflags$B[, p]
309 400
 	if(all(flagsA) | all(flagsB)) stop("all snps are flagged")
310
-	nuA <- get("nuA", envir)
311
-	nuB <- get("nuB", envir)
312
-	phiA <- get("phiA", envir)
313
-	phiB <- get("phiB", envir)
314
-	uplate <- get("uplate", envir)
315
-	sns <- get("sns", envir)
316
-	muA.AA <- get("muA.AA", envir)
317
-	muA.AB <- get("muA.AB", envir)
318
-	muA.BB <- get("muA.BB", envir)
319
-	muB.AA <- get("muB.AA", envir)
320
-	muB.AB <- get("muB.AB", envir)
321
-	muB.BB <- get("muB.BB", envir)
322
-	NP.CT <- get("NP.CT", envir)
323
-	nus <- get("nus", envir)
324
-	phis <- get("phis", envir)
325
-	NP.sds <- get("NP.sds", envir)
326
-	##fit.variance <- get("fit.variance", envir)
327
-	NP.CT <- get("NP.CT", envir)
401
+	nuA <- envir[["nuA"]][, p]
402
+	nuB <- envir[["nuB"]][, p]
403
+	phiA <- envir[["phiA"]][, p]
404
+	phiB <- envir[["phiB"]][, p]
405
+	uplate <- envir[["uplate"]]
406
+	sns <- envir[["sns"]]
407
+	muA <- envir[["muA"]][, p, ]
408
+	muB <- envir[["muB"]][, p, ]
409
+	nuT <- envir[["nuT"]]
410
+	phiT <- envir[["phiT"]]
411
+	sig2T <- envir[["sig2T"]]
412
+	A <- envir[["A"]][, plate==uplate[p]]
413
+	B <- envir[["B"]][, plate==uplate[p]]
414
+	CA <- envir[["A"]][, plate==uplate[p]]
415
+	CB <- envir[["B"]][, plate==uplate[p]]
416
+	if(CHR == 23){
417
+		phiAx <- envir[["phiAx"]][, p]
418
+		phiBx <- envir[["phiBx"]][, p]
419
+	}
328 420
 	##---------------------------------------------------------------------------
329 421
 	## Train on unflagged SNPs
330
-	##---------------------------------------------------------------------------		
422
+	##---------------------------------------------------------------------------
423
+	##Might be best to train using the X chromosome, since for the
424
+	##X phi and nu have been adjusted for cross-hybridization
331 425
 	plateInd <- plate == uplate[p]
332
-	muA.AAp <- muA.AA[!flagsA, p]
333
-	muB.BBp <- muB.BB[!flagsB, p]
334
-		
335
-	##From SNP data
336
-	X <- cbind(1, log(c(muA.AAp, muB.BBp)))
337
-	Y <- log(c(phiA[!flagsA, p], phiB[!flagsB, p]))
338
-	##Y.nu <- log(c(nuA[!snpflags$A, p], nuB[!snpflags$B, p]))
339
-	##Y <- cbind(Y.nu, Y.phi)
340
-	betahat <- solve(crossprod(X), crossprod(X, Y))
341
-	##Prediction
342
-	mus <- rowMedians(NP, na.rm=TRUE)
343
-	X <- cbind(1, log(mus))
344
-	Yhat <- as.numeric(X %*% betahat)
345
-	##NP.nus[, p] <- exp(Yhat[, 1])
346
-	##NP.phis[, p] <- exp(Yhat[, 2])
347
-	phi <- exp(Yhat)
348
-	nu <- mus - 2*phi
349
-	
350
-	phis[, p] <- as.integer(phi)
351
-	nus[, p] <- as.integer(nu)
352
-	##plot(NP.phis[, p], NP.nus[, p], pch=".")
353
-	CT <- 1/phi*(NP-nu)
354
-	tmp <- matrix(as.integer(CT*100), nrow(CT), ncol(CT))
355
-	NP.CT[, plateInd] <- tmp
356
-	if(FALSE){
357
-		##should be centered at 2
358
-		tmp <- rowMeans(NP.CT[, plateInd])
359
-		hist(tmp/100, breaks=200)
360
-	}
426
+	##muA <- muA[!flagsA, p, c("A", "AA")]
427
+	##muB <- muB[!flagsB, p, c("B", "BB")]
428
+	muA <- muA[!flagsA, "AA"]
429
+	muB <- muB[!flagsB, "BB"]
430
+	X <- cbind(1, log2(c(muA, muB)))
431
+	Y <- log2(c(phiA[!flagsA], phiB[!flagsB]))
432
+	if(nrow(X) > 5000) ix <- sample(1:nrow(X), 5000)
433
+	##X <- X[ix, ]
434
+	##Y <- Y[ix]
435
+	betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix]))
436
+##	Yhat <- X%*%betahat
437
+##	phihat <- 2^Yhat
438
+##	nuhat <- 2^X[, 2] - 2*phihat
439
+##	nuAB <- c(nuA[!flagsA], nuB[!flagsB])
440
+##	plot(log2(nuhat), log2(nuAB), pch=".")
441
+##	plot(log2(nuhat)-log2(nuAB), pch=".")
442
+##	hist(log2(nuAB))
443
+	##plot(Y-Yhat, pch=".")
444
+	##plot(Y, Yhat, pch=".")
445
+	##calculate R2
446
+	if(CHR == 23){
447
+		cnvs <- envir[["cnvs"]]
448
+		data(cnProbes, package="genomewidesnp6Crlmm")
449
+		cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ]
450
+		par <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754)
451
+		gender <- envir[["gender"]]
452
+		mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE)
453
+		mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE)
454
+		mus <- log(cbind(mu1, mu2))
455
+		X <- cbind(1, mus[, 1])
456
+		##For build Hg18
457
+		##http://genome.ucsc.edu/cgi-bin/hgGateway
458
+		##pseudo-autosomal regions on X
459
+		##chrX:1-2,709,520 and chrX:154584237-154913754, respectively
460
+		Yhat1 <- as.numeric(X %*% betahat)
461
+		X <- cbind(1, mus[, 2])		
462
+		Yhat2 <- as.numeric(X %*% betahat)
463
+		phi1 <- exp(Yhat1)
464
+		phi2 <- exp(Yhat2)
465
+		nu1 <- exp(mus[, 1]) - phi1
466
+		nu1[par] <- exp(mus[par, 1]) - 2*phi1[par]
467
+		nu2 <- exp(mus[, 2]) - 2*phi2
468
+		CT1 <- 1/phi1*(NP[, gender=="male"]-nu1)
469
+		CT2 <- 1/phi2*(NP[, gender=="female"]-nu2)
470
+		CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1))
471
+		CT2 <- matrix(as.integer(100*CT2), nrow(CT2), ncol(CT2))
472
+		CT <- envir[["CT"]]
473
+		CT[, plate==uplate[p] & gender=="male"] <- CT1
474
+		CT[, plate==uplate[p] & gender=="female"] <- CT2
475
+		envir[["CT"]] <- CT
476
+	} else {
477
+		normalNP <- envir[["normalNP"]]
478
+		normalNP <- normalNP[, plate==uplate[p]]
479
+		mus <- rowMedians(NP * normalNP, na.rm=TRUE)
480
+		crosshyb <- median(muA) - median(mus)
481
+		X <- cbind(1, log2(mus+crosshyb))
482
+		##X <- cbind(1, log2(mus))
483
+		logPhiT <- X %*% betahat
484
+		phiT[, p] <- 2^(logPhiT)
485
+		nuT[, p] <- mus - 2*phiT[, p]
486
+		T <- 1/phiT[, p]*(NP - nuT[, p])
487
+		CT <- envir[["CT"]]
488
+		CT[, plate==uplate[p]] <- matrix(as.integer(100*T), nrow(T), ncol(T))
361 489
 
362
-	##---------------------------------------------------------------------------
363
-	## For NA SNPs, treat as nonpolymorphic
364
-	##---------------------------------------------------------------------------
365
-	CA <- get("CA", envir)
366
-	CB <- get("CB", envir)	
367
-	tmpA <- CA[, plate==uplate[p]]
368
-	tmpB <- CB[, plate==uplate[p]]	
369
-	indexA <- which(rowSums(is.na(tmpA)) > 0)
370
-	indexB <- which(rowSums(is.na(tmpB)) > 0)
371
-	index <- union(indexA, indexB)
372
-	if(length(index) > 0){
373
-		npflags <- get("npflags", envir)
374
-		##warning(paste(length(index), "indices have NAs for the copy number estimates"))		
375
-		npflags[[p]] <- unique(c(npflags[[p]], index))
376
-		assign("npflags", npflags, envir)
490
+		##Variance for prediction region
491
+		sig2T[, plate==uplate[[p]]] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2	
492
+		envir[["sig2T"]] <- sig2T
493
+		envir[["CT"]] <- CT
494
+		envir[["phiT"]] <- phiT
495
+		envir[["nuT"]] <- nuT
377 496
 	}
378
-	##if(length(index) > 0) browser()
379
-	
380
-	##---------------------------------------------------------------------------
381
-	## this part could stand improvement
382
-	##  - estimate the uncertainty
383 497
 	##---------------------------------------------------------------------------
384
-	
498
+	## For NA SNPs, treat as nonpolymorphic
385 499
 	##---------------------------------------------------------------------------
386
-	## Estimate variance for copy number probes
387
-	## VAR(CT) = var(1/phi * (NP - nu))
388
-	##         = 1/phi^2 * VAR(NP)
389
-	##         = 1/phi^2 * f(mu) * sigma
390
-	## ** Phi is predicted from a regression using the row medians as predictors
391
-	##   -- this may underestimate the uncertainty
392
-	##---------------------------------------------------------------------------		
393
-##	log.mus <- log2(mus)
394
-##	log.var <- log2(rowVars(NP))
395
-##	f <- predict(fit.variance, newdata=data.frame(log.mus=log.mus))
396
-##	resid <- log.var - f
397
-##	sds <- 1/phi*sqrt(2^(predict(fit.variance, newdata=data.frame(log.mus=log.mus+resid))))
398
-##
399
-##	robustSD <- function(X) diff(quantile(X, probs=c(0.16, (1-0.16)), na.rm=TRUE))/2
400
-##	sample.sds <- apply(CT, 2, robustSD)
401
-##	sample.sds <- sample.sds/median(sample.sds, na.rm=TRUE)
402
-##	NP.sds[, plate==uplate[p]] <- sds %*% t(sample.sds)
403
-	assign("phis", phis, envir)
404
-	assign("nus", nus, envir)
405
-	assign("NP.CT", NP.CT, envir)
406
-##	assign("NP.sds", NP.sds, envir)
407
-##	firstPass.NP <- list(phis=phis, nus=nus, CT=NP.CT, sds=sds,
408
-##			     gns=gns, sns=sns, bns=bns)
409
-##	firstPass.NP
410 500
 }
411 501
 
412
-oneBatch <- function(plateIndex, G, A, B, conf, CONF.THR=0.99, MIN.OBS=3, DF.PRIOR, envir, trim, upperTail, bias.adj=FALSE, priorProb, ...){
413
-	p <- plateIndex
414
-	plate <- get("plate", envir)
415
-	Ns <- get("Ns", envir)	
502
+withinGenotypeMoments <- function(p, A, B, calls, conf, CONF.THR, DF.PRIOR, envir){
503
+	CHR <- envir[["chrom"]]
504
+	Ns <- envir[["Ns"]]
505
+	muA <- envir[["muA"]]
506
+	muB <- envir[["muB"]]
507
+	vA <- envir[["vA"]]
508
+	vB <- envir[["vB"]]
509
+	plate <- envir[["plate"]]
510
+	uplate <- envir[["uplate"]]
511
+	normal <- envir[["normal"]][, plate==uplate[p]]
512
+	G <- calls; rm(calls); gc()	
513
+	if(is.null(normal)) normal <- matrix(TRUE, nrow(G), ncol(G))
514
+
416 515
 	highConf <- 1-exp(-conf/1000)
417 516
 	highConf <- highConf > CONF.THR
418
-	AA <- G == 1 & highConf
419
-	AB <- G == 2 & highConf
420
-	BB <- G == 3 & highConf
421
-	Ns[, p, "AA"] <- rowSums(AA)
422
-	Ns[, p, "AB"] <- rowSums(AB)
423
-	Ns[, p, "BB"] <- rowSums(BB)
424
-	assign("Ns", Ns, envir)
425
-	AA[AA == FALSE] <- NA
426
-	AB[AB == FALSE] <- NA
427
-	BB[BB == FALSE] <- NA
428
-	##---------------------------------------------------------------------------
429
-	## Sufficient statistics (plate-specific)
430
-	##---------------------------------------------------------------------------
431
-	AA.A <- AA*A
432
-	AB.A <- AB*A
433
-	BB.A <- BB*A
434
-	AA.B <- AA*B
435
-	AB.B <- AB*B
436
-	BB.B <- BB*B
437
-	locationAndScale(p=p, AA.A=AA.A, AB.A=AB.A, BB.A=BB.A,
438
-			 AA.B=AA.B, AB.B=AB.B, BB.B=BB.B,
439
-			 envir=envir, DF.PRIOR=DF.PRIOR)
440
-	muA.AA <- get("muA.AA", envir)
441
-	muA.AB <- get("muA.AB", envir)
442
-	muA.BB <- get("muA.BB", envir)
443
-	muB.AA <- get("muB.AA", envir)
444
-	muB.AB <- get("muB.AB", envir)
445
-	muB.BB <- get("muB.BB", envir)
446
-	sigmaA <- get("sigmaA", envir)
447
-	sigmaB <- get("sigmaB", envir)
448
-	tau2A <- get("tau2A", envir)
449
-	tau2B <- get("tau2B", envir)
450
-	sig2A <- get("sig2A", envir)
451
-	sig2B <- get("sig2B", envir)
452
-	corr <- get("corr", envir)
453
-	corrA.BB <- get("corrA.BB", envir)  ## B allele
454
-	corrB.AA <- get("corrB.AA", envir)
517
+	if(CHR == 23){
518
+		gender <- envir[["gender"]]
519
+		IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE)
520
+		IX <- IX == "female"
521
+	} else IX <- matrix(TRUE, nrow(G), ncol(G))
522
+	
523
+	index <- GT.B <- GT.A <- vector("list", 3)
524
+	names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
525
+	##--------------------------------------------------
526
+	##within-genotype sufficient statistics
527
+	##--------------------------------------------------
528
+	GT.B <- GT.A <- list()
529
+	for(j in 1:3){
530
+		GT <- G==j & highConf & IX & normal
531
+		Ns[, p, j+2] <- rowSums(GT, na.rm=TRUE)		
532
+		GT[GT == FALSE] <- NA
533
+		GT.A[[j]] <- GT*A
534
+		GT.B[[j]] <- GT*B
535
+		index[[j]] <- which(Ns[, p, j+2] > 0)
536
+		
537
+		muA[, p, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE)
538
+		muB[, p, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE)
539
+		vA[, p, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE)
540
+		vB[, p, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE)
541
+
542
+		##Shrink towards the typical variance
543
+		DF <- Ns[, p, j+2]-1
544
+		DF[DF < 1] <- 1
545
+		v0A <- median(vA[, p, j+2], na.rm=TRUE)
546
+		v0B <- median(vB[, p, j+2], na.rm=TRUE)
547
+		vA[, p, j+2] <- (vA[, p, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF)
548
+		vA[is.na(vA[, p, j+2]), p, j+2] <- v0A
549
+		vB[, p, j+2] <- (vB[, p, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF)
550
+		vB[is.na(vB[, p, j+2]), p, j+2] <- v0B
551
+	}
552
+	if(CHR == 23){
553
+		k <- 1
554
+		for(j in c(1,3)){
555
+			GT <- G==j & highConf & !IX 
556
+			Ns[, p, k] <- rowSums(GT)
557
+			GT[GT == FALSE] <- NA
558
+			muA[, p, k] <- rowMedians(GT*A, na.rm=TRUE)
559
+			muB[, p, k] <- rowMedians(GT*B, na.rm=TRUE)
560
+			vA[, p, k] <- rowMAD(GT*A, na.rm=TRUE)
561
+			vB[, p, k] <- rowMAD(GT*B, na.rm=TRUE)
562
+			
563
+			DF <- Ns[, p, k]-1
564
+			DF[DF < 1] <- 1
565
+			v0A <- median(vA[, p, k], na.rm=TRUE)
566
+			v0B <- median(vB[, p, k], na.rm=TRUE)
567
+			vA[, p, k] <- (vA[, p, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF)
568
+			vA[is.na(vA[, p, k]), p, k] <- v0A
569
+			vB[, p, k] <- (vB[, p, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF)
570
+			vB[is.na(vB[, p, k]), p, k] <- v0B			
571
+			k <- k+1
572
+		}
573
+	}
574
+	envir[["GT.A"]] <- GT.A
575
+	envir[["GT.B"]] <- GT.B
576
+	envir[["Ns"]] <- Ns
577
+	envir[["index"]] <- index
578
+	envir[["muA"]] <- muA
579
+	envir[["muB"]] <- muB
580
+	envir[["vA"]] <- vA
581
+	envir[["vB"]] <- vB
582
+}
583
+	
584
+
585
+oneBatch <- function(plateIndex,
586
+		     A,
587
+		     B,
588
+		     calls,
589
+		     conf,
590
+		     gender,
591
+		     NP,
592
+		     plate,
593
+		     MIN.OBS=1,
594
+		     envir,
595
+		     DF.PRIOR=50,
596
+		     CONF.THR=0.99,
597
+		     trim=0,
598
+		     bias.adj=FALSE, priorProb, ...){
599
+	p <- plateIndex
600
+	CHR <- envir[["chrom"]]
455 601
 	if(bias.adj){
456
-		##First check that nu and phi are available
457
-		nuA <- get("nuA", envir)
602
+		nuA <- envir[["nuA"]]
458 603
 		if(all(is.na(nuA))) {
459 604
 			message("Background and signal coefficients have not yet been estimated -- can not do bias correction yet")
460
-			message("Must run computeCopynumber a second time with bias.adj=TRUE to do the adjustment")
461
-		} else {
462
-			message("running bias adjustment")			
463
-			normal <- biasAdj(A=A, B=B, plateIndex=p, envir=envir, priorProb=priorProb)
464
-			normal[normal == FALSE] <- NA
465
-			AA.A <- AA.A*normal
466
-			AB.A <- AB.A*normal
467
-			BB.A <- BB.A*normal
468
-			AA.B <- AA.B*normal
469
-			AB.B <- AB.B*normal
470
-			BB.B <- BB.B*normal
471
-			message("Recomputing location and scale parameters")						
472
-			locationAndScale(p=p, AA.A=AA.A, AB.A=AB.A, BB.A=BB.A,
473
-					 AA.B=AA.B, AB.B=AB.B, BB.B=BB.B,
474
-					 envir=envir, DF.PRIOR=DF.PRIOR)
475
-			muA.AA <- get("muA.AA", envir)
476
-			muA.AB <- get("muA.AB", envir)
477
-			muA.BB <- get("muA.BB", envir)
478
-			muB.AA <- get("muB.AA", envir)
479
-			muB.AB <- get("muB.AB", envir)
480
-			muB.BB <- get("muB.BB", envir)
481
-			tau2A <- get("tau2A", envir)
482
-			tau2B <- get("tau2B", envir)
483
-			sig2A <- get("sig2A", envir)
484
-			sig2B <- get("sig2B", envir)
485
-			sigmaA <- get("sigmaA", envir)
486
-			sigmaB <- get("sigmaB", envir)
487
-			corr <- get("corr", envir)
488
-			corrA.BB <- get("corrA.BB", envir)  ## B allele
489
-			corrB.AA <- get("corrB.AA", envir)
490
-			Ns.unadj <- Ns
491
-			assign("Ns.unadj", Ns.unadj, envir)
492
-			Ns[, p, "AA"] <- rowSums(!is.na(AA.A))
493
-			Ns[, p, "AB"] <- rowSums(!is.na(AB.A))
494
-			Ns[, p, "BB"] <- rowSums(!is.na(BB.A))			
605
+			stop("Must run computeCopynumber a second time with bias.adj=TRUE to do the adjustment")
495 606
 		}
607
+		message("running bias adjustment")		
608
+		##adjustment for nonpolymorphic probes
609
+		biasAdjNP(plateIndex=p, envir=envir, priorProb=priorProb)
610
+		##adjustment for SNPs
611
+		biasAdj(plateIndex=p, envir=envir, priorProb=priorProb)
612
+		message("Recomputing location and scale parameters")		
496 613
 	}
497
-##	if(trim > 0){
498
-##		##rowMedians is not robust enough when a variant is common
499
-##		##Try a trimmed rowMedian, trimming only one tail (must specify which)
500
-##		##  - for genotypes with 3 or more observations, exclude the upper X% 
501
-##		##        - one way to do this is to replace these observations with NAs
502
-##		##         e.g, exclude round(Ns * X%, 0) observations
503
-##		##  - for genotypes with fewer than 10 observations,
504
-##		##  - recalculate rowMedians
505
-##		replaceWithNAs <- function(x, trim, upperTail){
506
-##			##put NA's last if trimming the upperTail
507
-##			if(upperTail) decreasing <- TRUE else decreasing <- FALSE
508
-##			NN <- round(sum(!is.na(x)) * trim, 0)
509
-##			ix <- order(x, decreasing=decreasing, na.last=TRUE)[1:NN]
510
-##			x[ix] <- NA
511
-##			return(x)
512
-##		}
513
-##		##which rows should be trimmed
514
-##		rowsToTrim <- which(round(Ns[, p, "AA"] * trim, 0) > 0)
515
-##		##replace values in the tail of A with NAs
516
-##		AA.A[rowsToTrim, ] <- t(apply(AA.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
517
-##		AA.B[rowsToTrim, ] <- t(apply(AA.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
518
-##		rowsToTrim <- which(round(Ns[, p, "AB"] * trim, 0) > 0)
519
-##		AB.A[rowsToTrim, ] <- t(apply(AB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
520
-##		AB.B[rowsToTrim, ] <- t(apply(AB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
521
-##		rowsToTrim <- which(round(Ns[, p, "BB"] * trim, 0) > 0)
522
-##		BB.A[rowsToTrim, ] <- t(apply(BB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
523
-##		BB.B[rowsToTrim, ] <- t(apply(BB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail))
524
-##
525
-##		##Should probably recompute the Ns -- change the
526
-##		##degrees of freedom
527
-##		Ns[, p, "AA"] <- rowSums(!is.na(AA.A))
528
-##		Ns[, p, "AB"] <- rowSums(!is.na(AB.A))
529
-##		Ns[, p, "BB"] <- rowSums(!is.na(BB.A))		
530
-##	}
614
+	withinGenotypeMoments(p=p, A=A, B=B, calls=calls, conf=conf, CONF.THR=CONF.THR,
615
+			      DF.PRIOR=DF.PRIOR, envir=envir)
616
+	GT.A <- envir[["GT.A"]]
617
+	GT.B <- envir[["GT.B"]]
618
+	index <- envir[["index"]]
619
+	locationAndScale(p=p, GT.A=GT.A, GT.B=GT.B, index=index, envir=envir, DF.PRIOR=DF.PRIOR)
620
+	muA <- envir[["muA"]]
621
+	muB <- envir[["muB"]]
622
+	Ns <- envir[["Ns"]]
623
+
531 624
 	##---------------------------------------------------------------------------
532 625
 	## Predict sufficient statistics for unobserved genotypes (plate-specific)
533 626
 	##---------------------------------------------------------------------------
534
-##	NN <- Ns
535
-##	NN[, p, "AA"] <- rowSums(AA & highConf, na.rm=TRUE) ##how many AA were called with high confidence
536
-##	NN[, p, "AB"] <- rowSums(AB & highConf, na.rm=TRUE)
537
-##	NN[, p, "BB"] <- rowSums(BB & highConf, na.rm=TRUE)
538 627
 	index.AA <- which(Ns[, p, "AA"] >= 3)
539 628
 	index.AB <- which(Ns[, p, "AB"] >= 3)
540 629
 	index.BB <- which(Ns[, p, "BB"] >= 3)
541
-	correct.orderA <- muA.AA[, p] > muA.BB[, p]
542
-	correct.orderB <- muB.BB[, p] > muB.AA[, p]
543
-	if(length(index.AB) > 0){
544
-		nobs <- rowSums(Ns[, p, ] >= MIN.OBS) == 3
545
-	} else nobs <- rowSums(Ns[, p, c(1,3)] >= MIN.OBS) == 2
630
+	correct.orderA <- muA[, p, "AA"] > muA[, p, "BB"]
631
+	correct.orderB <- muB[, p, "BB"] > muB[, p, "AA"]
632
+	##For chr X, this will ignore the males 
633
+	nobs <- rowSums(Ns[, p, 3:5] >= MIN.OBS, na.rm=TRUE) == 3
546 634
 	index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here
547 635
 	size <- min(5000, length(index.complete))
548 636
 	if(size == 5000) index.complete <- sample(index.complete, 5000)
... ...
@@ -550,192 +638,112 @@ oneBatch <- function(plateIndex, G, A, B, conf, CONF.THR=0.99, MIN.OBS=3, DF.PRI
550 638
 		warning("fewer than 200 snps pass criteria for predicting the sufficient statistics")
551 639
 		stop()
552 640
 	}
553
-	index.AA <- which(Ns[, p, "AA"] == 0 & (Ns[, p, "AB"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS))
554
-	index.AB <- which(Ns[, p, "AB"] == 0 & (Ns[, p, "AA"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS))
555
-	index.BB <- which(Ns[, p, "BB"] == 0 & (Ns[, p, "AB"] >= MIN.OBS & Ns[, p, "AA"] >= MIN.OBS))
556
-	if(length(index.AA) > 0){
557
-		##Predict mean for AA genotypes
558
-		X.AA <- cbind(1, muA.AB[index.complete, p], muA.BB[index.complete, p],
559
-			      muB.AB[index.complete, p], muB.BB[index.complete, p])
560
-		Y <- cbind(muA.AA[index.complete, p], muB.AA[index.complete, p])
561
-		XtY <- crossprod(X.AA, Y)
562
-		betahat <- solve(crossprod(X.AA), XtY)
563
-		X.AA <- cbind(1, muA.AB[index.AA, p], muA.BB[index.AA, p],
564
-			      muB.AB[index.AA, p], muB.BB[index.AA, p])
565
-		musAA <- X.AA %*% betahat
566
-		musAA[musAA <= 100] <- 100
567
-		muA.AA[index.AA, p] <- musAA[, 1]
568
-		muB.AA[index.AA, p] <- musAA[, 2]
569
-	}
570
-	if(length(index.AB) > 0){
571
-		##Predict mean for AB genotypes
572
-		X.AB <- cbind(1, muA.AA[index.complete, p], muA.BB[index.complete, p],
573
-			      muB.AA[index.complete, p], muB.BB[index.complete, p])
574
-		Y <- cbind(muA.AB[index.complete, p], muB.AB[index.complete, p])
575
-		XtY <- crossprod(X.AB, Y)
576
-		betahat <- solve(crossprod(X.AB), XtY)
577
-		X.AB <- cbind(1, muA.AA[index.AB, p], muA.BB[index.AB, p],
578
-			      muB.AA[index.AB, p], muB.BB[index.AB, p])
579
-		musAB <- X.AB %*% betahat
580
-		musAB[musAB <= 100] <- 100					
581
-		muA.AB[index.AB, p] <- musAB[, 1]
582
-		muB.AB[index.AB, p] <- musAB[, 2]				
641
+	index[[1]] <- which(Ns[, p, "AA"] == 0 & (Ns[, p, "AB"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS))
642
+	index[[2]] <- which(Ns[, p, "AB"] == 0 & (Ns[, p, "AA"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS))
643
+	index[[3]] <- which(Ns[, p, "BB"] == 0 & (Ns[, p, "AB"] >= MIN.OBS & Ns[, p, "AA"] >= MIN.OBS))
644
+	mnA <- muA[, p, 3:5]
645
+	mnB <- muB[, p, 3:5]
646
+	for(j in 1:3){
647
+		if(length(index[[j]]) == 0) next()
648
+		X <- cbind(1, mnA[index.complete,  -j], mnB[index.complete,  -j])
649
+		Y <- cbind(mnA[index.complete, j], mnB[index.complete,  j])
650
+		betahat <- solve(crossprod(X), crossprod(X,Y))
651
+		X <- cbind(1, mnA[index[[j]],  -j],  mnB[index[[j]],  -j])
652
+		mus <- X %*% betahat
653
+		muA[index[[j]], p, j+2] <- mus[, 1]
654
+		muB[index[[j]], p, j+2] <- mus[, 2]
583 655
 	}
584
-	if(length(index.BB) > 0){
585
-		##Predict mean for BB genotypes
586
-		X.BB <- cbind(1, muA.AA[index.complete, p], muA.AB[index.complete, p],
587
-			      muB.AA[index.complete, p], muB.AB[index.complete, p])
588
-		Y <- cbind(muA.BB[index.complete, p], muB.BB[index.complete, p])
589
-		XtY <- crossprod(X.BB, Y)
590
-		betahat <- solve(crossprod(X.BB), XtY)
591
-		X.BB <- cbind(1, muA.AA[index.BB, p], muA.AB[index.BB, p],
592
-			      muB.AA[index.BB, p], muB.AB[index.BB, p])
593
-		musBB <- X.BB %*% betahat
594
-		musBB[musBB <= 100] <- 100				
595
-		muA.BB[index.BB, p] <- musBB[, 1]
596
-		muB.BB[index.BB, p] <- musBB[, 2]								
656
+	nobsA <- Ns[, p, "A"] > 10
657
+	nobsB <- Ns[, p, "B"] > 10
658
+	complete <- list()
659
+	complete[[1]] <- which(correct.orderA & correct.orderB & nobsA) ##be selective here
660
+	complete[[2]] <- which(correct.orderA & correct.orderB & nobsB) ##be selective here	
661
+	size <- min(5000, length(complete[[1]]))
662
+	if(size == 5000) complete <- lapply(complete, function(x) sample(x, size))
663
+	if(CHR == 23){
664
+		index[[1]] <- which(Ns[, p, "A"] == 0)
665
+		index[[2]] <- which(Ns[, p, "B"] == 0)
666
+		cols <- 2:1
667
+		for(j in 1:2){
668
+			if(length(index[[j]]) == 0) next()
669
+			X <- cbind(1, muA[complete[[j]], p, cols[j]], muB[complete[[j]], p, cols[j]])
670
+			Y <- cbind(muA[complete[[j]], p, j], muB[complete[[j]], p, j])
671
+			betahat <- solve(crossprod(X), crossprod(X,Y))
672
+			X <- cbind(1, muA[index[[j]], p, cols[j]],  muB[index[[j]], p, cols[j]])
673
+			mus <- X %*% betahat
674
+			muA[index[[j]], p, j] <- mus[, 1]
675
+			muB[index[[j]], p, j] <- mus[, 2]
676
+		}
597 677
 	}
598 678
 	##missing two genotypes
599
-#	index.AA <- which(NN[, p, "AA"] == 0 & (NN[, p, "AB"] < MIN.OBS | NN[, p, "BB"] < MIN.OBS))
600
-#	index.AB <- which(NN[, p, "AB"] == 0 & (NN[, p, "AA"] < MIN.OBS | NN[, p, "BB"] < MIN.OBS))
601
-#	index.BB <- which(NN[, p, "BB"] == 0 & (NN[, p, "AB"] >= MIN.OBS | NN[, p, "AA"] >= MIN.OBS))	
602 679
 	noAA <- Ns[, p, "AA"] < MIN.OBS
603 680
 	noAB <- Ns[, p, "AB"] < MIN.OBS
604 681
 	noBB <- Ns[, p, "BB"] < MIN.OBS
682
+	index[[1]] <- noAA & noAB
683
+	index[[2]] <- noBB & noAB
684
+	index[[3]] <- noAA & noBB
685
+	snpflags <- envir[["snpflags"]]
686
+	snpflags[, p] <- index[[1]] | index[[2]] | index[[3]]
687
+
605 688
 	##---------------------------------------------------------------------------
606 689
 	## Two genotype clusters not observed -- would sequence help? (didn't seem that helpful)
607 690
 	## 1 extract index of complete data
608 691
 	## 2 Regress  mu1,mu3 ~ sequence + mu2
609 692
 	## 3 Predict mu1*, mu3* for missing genotypes
610
-	##---------------------------------------------------------------------------			
611
-	if(sum(noAA & noAB) > 0){
612
-		##predict AA and AB centers
613
-		X <- cbind(1, muA.BB[index.complete, p], muB.BB[index.complete, p])
614
-		Y <- cbind(muA.AA[index.complete, p],
615
-			   muB.AA[index.complete, p],
616
-			   muA.AB[index.complete, p],
617
-			   muB.AB[index.complete, p])
618
-		XtY <- crossprod(X, Y)
619
-		betahat <- solve(crossprod(X), XtY)
620
-		X <- cbind(1, muA.BB[noAA & noAB, p], muB.BB[noAA & noAB, p])
621
-		mus <- X %*% betahat
622
-		mus[mus <= 100] <- 100				
623
-		muA.AA[noAA & noAB, p] <- mus[, 1]
624
-		muB.AA[noAA & noAB, p] <- mus[, 2]
625
-		muA.AB[noAA & noAB, p] <- mus[, 3]
626
-		muB.AB[noAA & noAB, p] <- mus[, 4]				
627
-	}
628
-	if(sum(noBB & noAB) > 0){
629
-		##predict AB and BB centers
630
-		X <- cbind(1, muA.AA[index.complete, p], muB.AA[index.complete, p])
631
-		Y <- cbind(muA.AB[index.complete, p],
632
-			   muB.AB[index.complete, p],
633
-			   muA.BB[index.complete, p], #muA.AB[index.complete, p],
634
-			   muB.BB[index.complete, p])#, muB.AB[index.complete, p])
635
-		XtY <- crossprod(X, Y)
636
-		betahat <- solve(crossprod(X), XtY)
637
-		X <- cbind(1, muA.AA[noBB & noAB, p], muB.AA[noBB & noAB, p])
638
-		mus <- X %*% betahat
639
-		mus[mus <= 100] <- 100
640
-		muA.AB[noBB & noAB, p] <- mus[, 1]
641
-		muB.AB[noBB & noAB, p] <- mus[, 2]								
642
-		muA.BB[noBB & noAB, p] <- mus[, 3]
643
-		muB.BB[noBB & noAB, p] <- mus[, 4]				
644
-	}
645
-	if(sum(noAA & noBB) > 0){
646
-		##predict AA and BB centers
647
-		X <- cbind(1, muA.AB[index.complete, p], muB.AB[index.complete, p])
648
-		Y <- cbind(muA.AA[index.complete, p],
649
-			   muB.AA[index.complete, p],
650
-			   muA.BB[index.complete, p], #muA.AB[index.complete, p],
651
-			   muB.BB[index.complete, p])#, muB.AB[index.complete, p])
652
-		XtY <- crossprod(X, Y)
653
-		betahat <- solve(crossprod(X), XtY)
654
-		X <- cbind(1, muA.AB[noAA & noBB, p], muB.AB[noAA & noBB, p])
693
+	##---------------------------------------------------------------------------
694
+	cols <- c(3, 1, 2)
695
+	for(j in 1:3){
696
+		if(sum(index[[1]]) == 0) next()
697
+		k <- cols[j]
698
+		X <- cbind(1, mnA[index.complete, k], mnB[index.complete, k])
699
+		Y <- cbind(mnA[index.complete,  -k],
700
+			   mnB[index.complete,  -k])
701
+		betahat <- solve(crossprod(X), crossprod(X,Y))
702
+		X <- cbind(1, mnA[index[[j]],  k], mnB[index[[j]],  k])
655 703
 		mus <- X %*% betahat
656
-		mus[mus <= 100] <- 100
657
-		muA.AA[noAA & noBB, p] <- mus[, 1]
658
-		muB.AA[noAA & noBB, p] <- mus[, 2]								
659
-		muA.BB[noAA & noBB, p] <- mus[, 3]
660
-		muB.BB[noAA & noBB, p] <- mus[, 4]				
704
+		muA[index[[j]], p, -c(1, 2, k+2)] <- mus[, 1:2]
705
+		muB[index[[j]], p, -c(1, 2, k+2)] <- mus[, 3:4]
661 706
 	}
707
+	
708
+	negA <- rowSums(muA[, p, ] < 0) > 0
709
+	negB <- rowSums(muB[, p, ] < 0) > 0	
710
+	snpflags[, p] <- snpflags[, p] | negA | negB | rowSums(is.na(muA[, p, 3:5]), na.rm=TRUE) > 0
711
+	envir[["snpflags"]] <- snpflags
662 712
 	dn.Ns <- dimnames(Ns)
663 713
 	Ns <- array(as.integer(Ns), dim=dim(Ns))
664 714
 	dimnames(Ns)[[3]] <- dn.Ns[[3]]
665
-	if(any(is.na(muA.AA) | any(is.na(muA.AB)) | any(is.na(muA.BB))))
666
-		warning("Some SNPs do not have any genotype calls above the confidence threshold. Check the indices in snpflags and npflags.")
667
-	assign("muA.AA", muA.AA, envir)
668
-	assign("muA.AB", muA.AB, envir)
669
-	assign("muA.BB", muA.BB, envir)
670
-	assign("muB.AA", muB.AA, envir)
671
-	assign("muB.AB", muB.AB, envir)
672
-	assign("muB.BB", muB.BB, envir)
673
-	assign("sigmaA", sigmaA, envir)
674
-	assign("sigmaB", sigmaB, envir)	
675
-	assign("tau2A", tau2A, envir)
676
-	assign("tau2B", tau2B, envir)
677
-	assign("sig2A", sig2A, envir)
678
-	assign("sig2B", sig2B, envir)
679
-	assign("corr", corr, envir)
680
-	assign("corrB.AA", corrB.AA, envir)
681
-	assign("corrA.BB", corrA.BB, envir)				
682
-	assign("Ns", Ns, envir)	
683
-	plates.completed <- get("plates.completed", envir)
715
+	envir[["Ns"]] <- Ns
716
+	envir[["muA"]] <- muA
717
+	envir[["muB"]] <- muB
718
+	plates.completed <- envir[["plates.completed"]]
684 719
 	plates.completed[p] <- TRUE
685
-	assign("plates.completed", plates.completed, envir)
720
+	envir[["plates.completed"]] <- plates.completed
686 721
 }
687 722
 
688
-locationAndScale <- function(p, AA.A, AB.A, BB.A,
689
-			     AA.B, AB.B, BB.B, envir,
690
-			     DF.PRIOR){
691
-	muA.AA <- get("muA.AA", envir)
692
-	muA.AB <- get("muA.AB", envir)
693
-	muA.BB <- get("muA.BB", envir)
694
-	muB.AA <- get("muB.AA", envir)
695
-	muB.AB <- get("muB.AB", envir)
696
-	muB.BB <- get("muB.BB", envir)
697
-	tau2A <- get("tau2A", envir)
698
-	tau2B <- get("tau2B", envir)
699
-	sig2A <- get("sig2A", envir)
700
-	sig2B <- get("sig2B", envir)
701
-	corr <- get("corr", envir)
702
-	corrA.BB <- get("corrA.BB", envir)  ## B allele
703
-	corrB.AA <- get("corrB.AA", envir)
704
-	Ns <- get("Ns", envir)	
705
-	muA.AA[, p] <- rowMedians(AA.A, na.rm=TRUE)
706
-	muA.AB[, p] <- rowMedians(AB.A, na.rm=TRUE)
707
-	muA.BB[, p] <- rowMedians(BB.A, na.rm=TRUE)
708
-	muB.AA[, p] <- rowMedians(AA.B, na.rm=TRUE)
709
-	muB.AB[, p] <- rowMedians(AB.B, na.rm=TRUE)
710
-	muB.BB[, p] <- rowMedians(BB.B, na.rm=TRUE)
711
-	sigmaA <- matrix(NA, nrow(AA.A), 3)
712
-	sigmaB <- matrix(NA, nrow(AA.A), 3)	
713
-	colnames(sigmaB) <- colnames(sigmaA) <- c("AA", "AB", "BB")
714
-	sigmaA[, "AA"] <- rowMAD(AA.A, na.rm=TRUE)
715
-	sigmaA[, "AB"] <- rowMAD(AB.A, na.rm=TRUE)
716
-	sigmaA[, "BB"] <- rowMAD(BB.A, na.rm=TRUE)
717
-	sigmaB[, "AA"] <- rowMAD(AA.B, na.rm=TRUE)
718
-	sigmaB[, "AB"] <- rowMAD(AB.B, na.rm=TRUE)
719
-	sigmaB[, "BB"] <- rowMAD(BB.B, na.rm=TRUE)
723
+locationAndScale <- function(p, GT.A, GT.B, index, envir, DF.PRIOR){
724
+	tau2A <- envir[["tau2A"]]
725
+	tau2B <- envir[["tau2B"]]
726
+	sig2A <- envir[["sig2A"]]
727
+	sig2B <- envir[["sig2B"]]
720 728
 
721
-	##shrink
722
-	DF <- Ns[, p, ]-1
723
-	DF[DF < 1] <- 1
724
-	medsA <- apply(sigmaA, 2, "median", na.rm=TRUE)
725
-	medsB <- apply(sigmaB, 2, "median", na.rm=TRUE)			
726
-	for(m in 1:3){
727
-		sigmaA[, m] <- (sigmaA[, m]*DF[, m] + medsA[m]*DF.PRIOR)/(DF.PRIOR+DF[, m])
728
-		sigmaA[is.na(sigmaA[, m]), m] <- medsA[m]
729
-		sigmaB[, m] <- (sigmaB[, m]*DF[, m] + medsB[m]*DF.PRIOR)/(DF.PRIOR+DF[, m])
730
-		sigmaB[is.na(sigmaB[, m]), m] <- medsB[m]		
731
-	}
732
-	index.AA <- which(Ns[, p, "AA"] >= 2)
733
-	index.AB <- which(Ns[, p, "AB"] >= 2)
734
-	index.BB <- which(Ns[, p, "BB"] >= 2)
729
+	corr <- envir[["corr"]]
730
+	corrA.BB <- envir[["corrA.BB"]]
731
+	corrB.AA <- envir[["corrB.AA"]]
732
+	Ns <- get("Ns", envir)
733
+	
734
+	index.AA <- index[[1]]
735
+	index.AB <- index[[2]]
736
+	index.BB <- index[[3]]
737
+	rm(index); gc()
735 738
 
739
+	AA.A <- GT.A[[1]]
740
+	AB.A <- GT.A[[2]]
741
+	BB.A <- GT.A[[3]]
742
+	
743
+	AA.B <- GT.B[[1]]
744
+	AB.B <- GT.B[[2]]
745
+	BB.B <- GT.B[[3]]	
736 746
 	x <- BB.A[index.BB, ]
737
-	##x <- Ip[index.BB, , "BB", "A"]
738
-	##tau2A[index.BB, p] <- rowVars(x, na.rm=TRUE)##var(log(IA)| BB)
739 747
 	tau2A[index.BB, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2
740 748
 	DF <- Ns[, p, "BB"]-1
741 749
 	DF[DF < 1] <- 1
... ...
@@ -743,14 +751,12 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A,
743 751
 	tau2A[, p] <- (tau2A[, p] * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
744 752
 	tau2A[is.na(tau2A[, p]), p] <- med
745 753
 	
746
-	##x <- Ip[index.BB, , "BB", "B"]
747 754
 	x <- BB.B[index.BB, ]
748 755
 	sig2B[index.BB, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2	
749 756
 	med <- median(sig2B[, p], na.rm=TRUE)
750 757
 	sig2B[, p] <- (sig2B[, p] * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
751 758
 	sig2B[is.na(sig2B[, p]), p] <- med
752 759
 	
753
-	##x <- Ip[index.AA, , "AA", "B"]
754 760
 	x <- AA.B[index.AA, ]
755 761
 	tau2B[index.AA, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2		
756 762
 	DF <- Ns[, p, "AA"]-1
... ...
@@ -759,19 +765,15 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A,
759 765
 	tau2B[, p] <- (tau2B[, p] * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
760 766
 	tau2B[is.na(tau2B[, p]), p] <- med
761 767
 	
762
-	##x <- Ip[index.AA, , "AA", "A"]
763 768
 	x <- AA.A[index.AA, ]
764 769
 	sig2A[index.AA, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2##var(log(IA)|AA)	
765 770
 	med <- median(sig2A[, p], na.rm=TRUE)
766 771
 	sig2A[, p] <- (sig2A[, p]*DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
767 772
 	sig2A[is.na(sig2A[, p]), p] <- med	
768 773
 
769
-	##estimate the correlation where there is at least 1 or more copies (AB genotypes)
770 774
 	if(length(index.AB) > 0){ ##all homozygous is possible
771 775
 		x <- AB.A[index.AB, ]
772 776
 		y <- AB.B[index.AB, ]
773
-		##x <- Ip[index.AB, , "AB", "A"]
774
-		##y <- Ip[index.AB, , "AB", "B"]
775 777
 		corr[index.AB, p] <- rowCors(x, y, na.rm=TRUE)
776 778
 		corr[corr < 0] <- 0
777 779
 		DF <- Ns[, p, "AB"]-1
... ...
@@ -780,11 +782,8 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A,
780 782
 		corr[, p] <- (corr[, p]*DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
781 783
 		corr[is.na(corr[, p]), p] <- med
782 784
 	}
783
-	##estimate the correlation of the background errors and AA, BB genotypes
784
-	##backgroundB <- Ip[index.AA, , "AA", "B"]
785 785
 	backgroundB <- AA.B[index.AA, ]
786 786
 	signalA <- AA.A[index.AA, ]
787
-	##signalA <- Ip[index.AA, , "AA", "A"]
788 787
 	corrB.AA[index.AA, p] <- rowCors(backgroundB, signalA, na.rm=TRUE)
789 788
 	DF <- Ns[, p, "AA"]-1
790 789
 	DF[DF < 1] <- 1
... ...
@@ -792,10 +791,8 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A,
792 791
 	corrB.AA[, p] <- (corrB.AA[, p]*DF + med*DF.PRIOR)/(DF.PRIOR + DF)
793 792
 	corrB.AA[is.na(corrB.AA[, p]), p] <- med
794 793
 
795
-	##backgroundA <- Ip[index.BB, , "BB", "A"]
796 794
 	backgroundA <- BB.A[index.BB, ]
797 795
 	signalB <- BB.B[index.BB, ]
798
-	##signalB <- Ip[index.BB, , "BB", "B"]
799 796
 	corrA.BB[index.BB, p] <- rowCors(backgroundA, signalB, na.rm=TRUE)
800 797
 	DF <- Ns[, p, "BB"]-1
801 798
 	DF[DF < 1] <- 1
... ...
@@ -803,310 +800,183 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A,
803 800
 	corrA.BB[, p] <- (corrA.BB[, p]*DF + med*DF.PRIOR)/(DF.PRIOR + DF)
804 801
 	corrA.BB[is.na(corrA.BB[, p]), p] <- med
805 802
 
806
-	assign("muA.AA", muA.AA, envir)
807
-	assign("muA.AB", muA.AB, envir)
808
-	assign("muA.BB", muA.BB, envir)
809
-	assign("muB.AA", muB.AA, envir)
810
-	assign("muB.AB", muB.AB, envir)
811
-	assign("muB.BB", muB.BB, envir)
812
-	assign("sigmaA", sigmaA, envir)
813
-	assign("sigmaB", sigmaB, envir)	
814
-	assign("tau2A", tau2A, envir)
815
-	assign("tau2B", tau2B, envir)
816
-	assign("sig2A", sig2A, envir)
817
-	assign("sig2B", sig2B, envir)
818
-	assign("corr", corr, envir)
819
-	assign("corrB.AA", corrB.AA, envir)
820
-	assign("corrA.BB", corrA.BB, envir)			
803
+	envir[["tau2A"]] <- tau2A
804
+	envir[["tau2B"]] <- tau2B
805
+	envir[["sig2A"]] <- sig2A
806
+	envir[["sig2B"]] <- sig2B
807
+	envir[["corr"]] <- corr
808
+	envir[["corrB.AA"]] <- corrB.AA
809
+	envir[["corrA.BB"]] <- corrA.BB	
821 810
 }
822 811
 
823 812
 coefs <- function(plateIndex, conf, MIN.OBS=3, envir, CONF.THR=0.99){
824 813
 	p <- plateIndex
825
-	plates.completed <- get("plates.completed", envir)
814
+	plates.completed <- envir[["plates.completed"]]
826 815
 	if(!plates.completed[p]) return()
827
-	plate <- get("plate", envir)
828
-	nuA <- get("nuA", envir)
829
-	nuB <- get("nuB", envir)
830
-	nuA.se <- get("nuA.se", envir)
831
-	nuB.se <- get("nuB.se", envir)
832
-	phiA <- get("phiA", envir)
833
-	phiB <- get("phiB", envir)
834
-	phiA.se <- get("phiA.se", envir)
835
-	phiB.se <- get("phiB.se", envir)
836
-	muA.AA <- get("muA.AA", envir)
837
-	muA.AB <- get("muA.AB", envir)
838
-	muA.BB <- get("muA.BB", envir)
839
-	muB.AA <- get("muB.AA", envir)
840
-	muB.AB <- get("muB.AB", envir)
841