Browse code

added cnrma-functions.R

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@43029 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 16/11/2009 11:50:13
Showing1 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,2071 @@
1
+rowCovs <- function(x, y, ...){
2
+	notna <- !is.na(x)
3
+	N <- rowSums(notna)
4
+	x <- suppressWarnings(log2(x))
5
+	if(!missing(y)) y <- suppressWarnings(log2(y))
6
+	return(rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1))
7
+}
8
+
9
+rowMAD <- function(x, y, ...){
10
+	notna <- !is.na(x)
11
+	mad <- 1.4826*rowMedians(abs(x-rowMedians(x, ...)), ...)
12
+	return(mad)
13
+}
14
+
15
+rowCors <- function(x, y, ...){
16
+	N <- rowSums(!is.na(x))
17
+	x <- suppressWarnings(log2(x))
18
+	y <- suppressWarnings(log2(y))
19
+	sd.x <- rowSds(x, ...)
20
+	sd.y <- rowSds(y, ...)
21
+	covar <- rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1)
22
+	return(covar/(sd.x*sd.y))
23
+}
24
+
25
+generateX <- function(w, X) as.numeric(diag(w) %*% X)
26
+generateIXTX <- function(x, nrow=3) {
27
+	X <- matrix(x, nrow=nrow)
28
+	XTX <- crossprod(X)
29
+	solve(XTX)
30
+}
31
+
32
+
33
+nuphiAllele <- function(object, allele, Ystar, W, tmp.objects, cnOptions){
34
+	I <- isSnp(object)
35
+	object.snp <- object[I, ]
36
+	Ystar <- Ystar[I, ]
37
+	complete <- rowSums(is.na(W)) == 0 & I
38
+	W <- W[I, ]
39
+	if(any(!is.finite(W))){## | any(!is.finite(V))){
40
+		i <- which(rowSums(!is.finite(W)) > 0)
41
+		##browser()
42
+		stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ")
43
+	}	
44
+	Ns <- tmp.objects[["Ns"]]	
45
+	Ns <- Ns[I, ]
46
+	
47
+	CHR <- unique(chromosome(object))
48
+	batch <- unique(object$batch)
49
+	nuA <- getParam(object, "nuA", batch)
50
+	nuB <- getParam(object, "nuB", batch)
51
+	nuA.se <- getParam(object, "nuA.se", batch)
52
+	nuB.se <- getParam(object, "nuB.se", batch)
53
+	phiA <- getParam(object, "phiA", batch)
54
+	phiB <- getParam(object, "phiB", batch)
55
+	phiA.se <- getParam(object, "phiA.se", batch)
56
+	phiB.se <- getParam(object, "phiB.se", batch)	
57
+	if(CHR==23){
58
+		phiAX <- getParam(object, "phiAX", batch)
59
+		phiBX <- getParam(object, "phiBX", batch)		
60
+	}
61
+	NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05
62
+	if(missing(allele)) stop("must specify allele")
63
+	if(CHR == 23){
64
+		##Design matrix for X chromosome depends on whether there was a sufficient number of AB genotypes
65
+		if(length(grep("AB", colnames(W))) > 0){
66
+			if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
67
+			if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
68
+		} else{
69
+			if(allele == "A") X <- cbind(1, c(1, 0, 2, 0), c(0, 1, 0, 2))
70
+			if(allele == "B") X <- cbind(1, c(0, 1, 0, 2), c(1, 0, 2, 0))
71
+		}
72
+	} else {##autosome
73
+		if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2)
74
+		if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous		
75
+	}
76
+
77
+	##How to quickly generate Xstar, Xstar = diag(W) %*% X
78
+	Xstar <- apply(W, 1, generateX, X)
79
+	IXTX <- apply(Xstar, 2, generateIXTX, nrow=nrow(X))
80
+	##as.numeric(diag(W[1, ]) %*% X)
81
+	if(CHR == 23){
82
+		betahat <- matrix(NA, 3, nrow(Ystar))
83
+		ses <- matrix(NA, 3, nrow(Ystar))		
84
+	} else{
85
+		betahat <- matrix(NA, 2, nrow(Ystar))
86
+		ses <- matrix(NA, 2, nrow(Ystar))
87
+	}
88
+	for(i in 1:nrow(Ystar)){
89
+		betahat[, i] <- crossprod(matrix(IXTX[, i], ncol(X), ncol(X)), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ]))
90
+		ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
91
+		ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr))
92
+	}
93
+	if(allele == "A"){
94
+		nuA[complete] <- betahat[1, ]
95
+		phiA[complete] <- betahat[2, ]
96
+		nuA.se[complete] <- ses[1, ]
97
+		phiA.se[complete] <- ses[2, ]
98
+		object <- pr(object, "nuA", batch, nuA)
99
+		object <- pr(object, "nuA.se", batch, nuA.se)
100
+		object <- pr(object, "phiA", batch, phiA)
101
+		object <- pr(object, "phiA.se", batch, phiA.se)
102
+		if(CHR == 23){
103
+			phiAX[complete] <- betahat[3, ]
104
+			object <- pr(object, "phiAX", batch, phiAX)			
105
+		}
106
+	}
107
+	if(allele=="B"){
108
+		nuB[complete] <- betahat[1, ]
109
+		phiB[complete] <- betahat[2, ]
110
+		nuB.se[complete] <- ses[1, ]
111
+		phiB.se[complete] <- ses[2, ]
112
+		object <- pr(object, "nuB", batch, nuB)
113
+		object <- pr(object, "nuB.se", batch, nuB.se)
114
+		object <- pr(object, "phiB", batch, phiB)		
115
+		object <- pr(object, "phiB.se", batch, phiB.se)
116
+		if(CHR == 23){
117
+			phiBX[complete] <- betahat[3, ]
118
+			object <- pr(object, "phiBX", batch, phiBX)
119
+		}
120
+	}
121
+	THR.NU.PHI <- cnOptions$THR.NU.PHI
122
+	if(THR.NU.PHI){
123
+		verbose <- cnOptions$verbose
124
+		if(verbose) message("Thresholding nu and phi")
125
+		object <- thresholdModelParams(object, cnOptions)
126
+	}
127
+	return(object)
128
+}
129
+
130
+celDates <- function(celfiles){
131
+	if(!all(file.exists(celfiles))) stop("1 or more cel file does not exist")
132
+	celdates <- vector("character", length(celfiles))
133
+	celtimes <- vector("character", length(celfiles))
134
+	for(i in seq(along=celfiles)){
135
+		if(i %% 100 == 0) cat(".")
136
+		tmp <- read.celfile.header(celfiles[i], info="full")$DatHeader
137
+		tmp <- strsplit(tmp, "\ +")
138
+		celdates[i] <- tmp[[1]][6]
139
+		celtimes[i] <- tmp[[1]][7]
140
+	}
141
+	tmp <- paste(celdates, celtimes)
142
+	celdts <- strptime(tmp, "%m/%d/%y %H:%M:%S")
143
+	return(celdts)
144
+}
145
+
146
+predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
147
+	pkgname <- paste(cdfName, "Crlmm", sep="")
148
+	path <- system.file("extdata", package=pkgname)
149
+        loader("23index.rda", .crlmmPkgEnv, pkgname)
150
+	index <- getVarInEnv("index")
151
+	##load(file.path(path, "23index.rda"))
152
+	XIndex <- index[[1]]	
153
+	if(class(res) != "ABset"){
154
+		A <- res$A
155
+		B <- res$B
156
+	} else{
157
+		A <- res@assayData[["A"]]
158
+		B <- res@assayData[["B"]]
159
+	}
160
+	tmp <- which(rowSums(is.na(A)) > 0)
161
+	XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median, na.rm=TRUE)/2
162
+	SNR <- res$SNR
163
+	if(sum(SNR>SNRMin)==1){
164
+		gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5)))
165
+	}else{
166
+		gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]]
167
+	}
168
+	return(gender)
169
+}
170
+
171
+
172
+combineIntensities <- function(res, cnrmaResult, cdfName){
173
+	rownames(res$B) <- rownames(res$A) <- res$gns
174
+	colnames(res$B) <- colnames(res$A) <- res$sns
175
+	if(!is.null(cnrmaResult)){
176
+		NP <- cnrmaResult$NP
177
+		blank <- matrix(NA, nrow(NP), ncol(NP))
178
+		dimnames(blank) <- dimnames(NP)
179
+		A <- rbind(res$A, NP)
180
+		B <- rbind(res$B, blank)
181
+	} else {
182
+		A <- res$A
183
+		B <- res$B
184
+	}
185
+	ABset <- new("SnpQSet",
186
+		     senseThetaA=A,
187
+		     senseThetaB=B,
188
+		     annotation=cdfName)
189
+	return(ABset)
190
+}
191
+
192
+##harmonizeSnpSet <- function(callSet, ABset, cdfName){
193
+##	blank <- matrix(NA, length(cnNames(ABset, cdfName)), ncol(ABset))
194
+##	rownames(blank) <- cnNames(ABset, cdfName)
195
+##	colnames(blank) <- sampleNames(ABset)
196
+##	crlmmCalls <- rbind(calls(callSet), blank)
197
+##	crlmmConf <- rbind(confs(callSet), blank)
198
+##	fD <- as.matrix(fData(callSet))
199
+##	fD2 <- matrix(NA, nrow(blank), ncol(fD))
200
+##	rownames(fD2) <- rownames(blank)
201
+##	fD <- rbind(fD, fD2)
202
+##	aD <- assayDataNew("lockedEnvironment",
203
+##			   calls=crlmmCalls,
204
+##			   callProbability=crlmmConf)
205
+##	##Make callSet the same dimension as ABset
206
+##	fD <- new("AnnotatedDataFrame",
207
+##		  data=data.frame(fD),
208
+##		  varMetadata=fvarMetadata(callSet))
209
+##	callSet <- new("SnpSet",
210
+##			   call=crlmmCalls,
211
+##			   callProbability=crlmmConf,
212
+##			   featureData=fD,
213
+##			   phenoData=phenoData(callSet),
214
+##			   protocolData=protocolData(ABset),
215
+##			   annotation=annotation(ABset))
216
+##	stopifnot(all.equal(dimnames(callSet), dimnames(ABset)))
217
+##	callSet
218
+##}
219
+
220
+harmonizeDimnamesTo <- function(object1, object2){
221
+	#object2 should be a subset of object 1
222
+	object2 <- object2[featureNames(object2) %in% featureNames(object1), ]
223
+	object1 <- object1[match(featureNames(object2), featureNames(object1)), ]
224
+	object1 <- object1[, match(sampleNames(object2), sampleNames(object1))]
225
+	stopifnot(all.equal(featureNames(object1), featureNames(object2)))
226
+	stopifnot(all.equal(sampleNames(object1), sampleNames(object2)))
227
+	return(object1)
228
+}
229
+
230
+crlmmCopynumber <- function(filenames, cnOptions, ...){
231
+	crlmmWrapper(filenames, cnOptions, ...)
232
+}
233
+
234
+crlmmWrapper <- function(filenames, cnOptions, ...){
235
+	cdfName <- cnOptions[["cdfName"]]
236
+	load.it <- cnOptions[["load.it"]]
237
+	save.it <- cnOptions[["save.it"]]
238
+	splitByChr <- cnOptions[["splitByChr"]]
239
+	crlmmFile <- cnOptions[["crlmmFile"]]
240
+	intensityFile=cnOptions[["intensityFile"]]
241
+	rgFile=cnOptions[["rgFile"]]
242
+	use.ff=cnOptions[["use.ff"]]
243
+	outdir <- cnOptions[["outdir"]]
244
+	tmpdir <- cnOptions[["tmpdir"]]
245
+	
246
+	if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required.  See crlmm:::validCdfNames()")
247
+	platform <- whichPlatform(cdfName)
248
+	if(!(platform %in% c("affymetrix", "illumina"))){
249
+		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
250
+	} else {
251
+		message("Checking whether annotation package for the ", platform, " platform is available")
252
+		if(!isValidCdfName(cdfName)){
253
+			cat("FALSE\n")
254
+			stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform)")
255
+		} else cat("TRUE\n")
256
+	}
257
+	if(missing(intensityFile)) stop("must specify 'intensityFile'.")
258
+	if(missing(crlmmFile)) stop("must specify 'crlmmFile'.")
259
+	if(platform == "illumina"){
260
+		if(missing(rgFile)){
261
+			##stop("must specify 'rgFile'.")
262
+			rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
263
+			message("rgFile not specified.  Using ", rgFile)
264
+		}
265
+		if(!load.it){
266
+			RG <- readIdatFiles(...)
267
+			if(save.it) save(RG, file=rgFile)
268
+		}
269
+		if(load.it & !file.exists(rgFile)){
270
+			message("load.it is TRUE, bug rgFile not present.  Attempting to read the idatFiles.")
271
+			RG <- readIdatFiles(...)
272
+			if(save.it) save(RG, file=rgFile)
273
+		}
274
+		if(load.it & file.exists(rgFile)){
275
+			message("Loading RG file")
276
+			load(rgFile)
277
+			RG <- get("RG")
278
+		}
279
+	}
280
+	if(!(file.exists(dirname(crlmmFile)))) stop(dirname(crlmmFile), " does not exist.")
281
+	if(!(file.exists(dirname(intensityFile)))) stop(dirname(intensityFile), " does not exist.")
282
+
283
+	##---------------------------------------------------------------------------
284
+	## FIX
285
+	outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep=""))
286
+	if(load.it & all(file.exists(outfiles))){
287
+		load(outfiles[1])
288
+		crlmmSetList <- get("crlmmSetList")
289
+		if(!all(sampleNames(crlmmSetList) == basename(filenames))){
290
+			stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))")
291
+		} else{
292
+			return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...")
293
+		}
294
+	}
295
+	if(load.it){
296
+		if(!file.exists(crlmmFile)){
297
+			message("load.it is TRUE, but ", crlmmFile, " does not exist.  Rerunning the genotype calling algorithm") 
298
+			load.it <- FALSE
299
+		}
300
+	}
301
+
302
+	if(platform == "affymetrix"){
303
+		if(!file.exists(crlmmFile) | !load.it){
304
+			callSet <- crlmm(filenames=filenames,
305
+					     cdfName=cdfName,
306
+					     save.it=TRUE,
307
+					     load.it=load.it,
308
+					     intensityFile=intensityFile)
309
+			message("Quantile normalizing the copy number probes...")		
310
+			cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir)
311
+			if(save.it){
312
+				message("Saving callSet and cnrmaResult to", crlmmFile)
313
+				save(callSet, cnrmaResult, file=crlmmFile)
314
+			}
315
+		} else {
316
+			message("Loading ", crlmmFile, "...")
317
+			load(intensityFile)				
318
+			load(crlmmFile)
319
+			callSet <- get("callSet")
320
+			cnrmaResult <- get("cnrmaResult")
321
+		}
322
+		scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
323
+		protocolData(callSet) <- new("AnnotatedDataFrame",
324
+					     data=scanDates,
325
+					     varMetadata=data.frame(labelDescription=colnames(scanDates),
326
+					     row.names=colnames(scanDates)))
327
+	}
328
+	if(platform == "illumina"){
329
+		if(!file.exists(crlmmFile) | !load.it){		
330
+			callSet <- crlmmIllumina(RG=RG,
331
+						 cdfName=cdfName,
332
+						 sns=sampleNames(RG),
333
+						 returnParams=TRUE,
334
+						 save.it=TRUE,
335
+						 intensityFile=intensityFile)
336
+			if(save.it) save(callSet, file=crlmmFile)
337
+		} else {
338
+			message("Loading ", crlmmFile, "...")
339
+			load(crlmmFile)
340
+			callSet <- get("callSet")
341
+		}
342
+		protocolData(callSet) <- protocolData(RG)
343
+	}
344
+	load(intensityFile)
345
+	if(platform=="illumina"){
346
+		if(exists("cnAB")){
347
+			np.A <- as.integer(cnAB$A)
348
+			np.B <- as.integer(cnAB$B)
349
+			np <- ifelse(np.A > np.B, np.A, np.B)
350
+			np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
351
+			rownames(np) <- cnAB$gns
352
+			colnames(np) <- cnAB$sns
353
+			cnAB$NP <- np
354
+			##sampleNames(callSet) <- res$sns
355
+			sampleNames(callSet) <- cnAB$sns
356
+			cnrmaResult <- get("cnAB")
357
+		} else cnrmaResult <- NULL
358
+	}
359
+	if(platform=="affymetrix"){
360
+		if(exists("cnrmaResult")){
361
+			cnrmaResult <- get("cnrmaResult")
362
+		} else cnrmaResult <- NULL
363
+	}
364
+	ABset <- combineIntensities(get("res"), cnrmaResult, cdfName)
365
+	if(platform=="affymetrix") {
366
+		protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames))
367
+		sampleNames(protocolData(callSet)) <- sampleNames(callSet)
368
+	}
369
+	##callSet <- harmonizeSnpSet(callSet, ABset, cdfName)
370
+	##Make callSet the same dimension as ABset
371
+	if(nrow(callSet) != nrow(ABset)){
372
+		callsConfs <- calls <- matrix(NA, nrow(ABset), ncol(ABset))
373
+		dimnames(callsConfs) <- dimnames(calls) <- list(featureNames(ABset), sampleNames(ABset))
374
+		fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet))))
375
+		rownames(fd) <- featureNames(ABset)
376
+		colnames(fd) <- fvarLabels(callSet)
377
+		fD <- new("AnnotatedDataFrame",
378
+			  data=data.frame(fd),
379
+			  varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd)))
380
+		callSet2 <- new("SnpSet",
381
+				call=calls,
382
+				callProbability=callsConfs,
383
+				featureData=fD,
384
+				phenoData=phenoData(callSet),
385
+				experimentData=experimentData(callSet),
386
+				protocolData=protocolData(callSet),
387
+				annotation=annotation(callSet))
388
+		## match the featureNames of the original callSet (snps only) to the ABset object
389
+		index <- match(featureNames(callSet), featureNames(ABset))
390
+		if(any(is.na(index))) stop("missing values in match")
391
+		## Next, assign the calls to the appropriate subset of the full callSet (includes NPs)
392
+		calls(callSet2)[index, ] <- calls(callSet)
393
+		confs(callSet2)[index, ] <- confs(callSet)
394
+		fData(callSet2)[index, ] <- fData(callSet)
395
+		callSet <- callSet2
396
+		rm(callsConfs, calls, callSet2, fd, fD); gc()
397
+	} else{
398
+		callSet <- callSet[match(featureNames(ABset), featureNames(callSet)), ]
399
+	}
400
+	stopifnot(all.equal(featureNames(callSet), featureNames(ABset)))
401
+	stopifnot(all.equal(sampleNames(callSet), sampleNames(ABset)))
402
+
403
+	## create object with all of the assay data elements
404
+	## add an indicator to featureData for whether it is a snp or a np probe
405
+	## add annotation
406
+	## how should we combine the phenoData?
407
+	pd1 <- phenoData(ABset)
408
+	pd2 <- phenoData(callSet)
409
+	pd <- cbind(pData(pd1), pData(pd2))
410
+	pD <- new("AnnotatedDataFrame", data=pd,
411
+		  varMetadata=data.frame(labelDescription=colnames(pd),
412
+		  row.names=colnames(pd)))
413
+	nr <- nrow(ABset); nc <- ncol(ABset)
414
+	if(!use.ff){
415
+		callSetPlus <- new("SnpCallSetPlus",
416
+				   senseThetaA=A(ABset),
417
+				   senseThetaB=B(ABset), 
418
+				   calls=calls(callSet), 
419
+				   callsConfidence=confs(callSet),
420
+				   phenoData=pD,
421
+				   featureData=featureData(callSet),
422
+				   annotation=annotation(ABset),
423
+				   experimentData=experimentData(callSet),
424
+				   protocolData=protocolData(callSet))
425
+
426
+	} else {
427
+		callSetPlus <- new("SnpCallSetPlusFF",
428
+				   senseThetaA=ff(as.integer(A(ABset)), dim=c(nr,nc), vmode="integer", dimnames=list(featureNames(ABset), sampleNames(ABset))),
429
+				   senseThetaB=ff(as.integer(B(ABset)), dim=c(nr, nc), vmode="integer", dimnames=list(featureNames(ABset), sampleNames(ABset))),
430
+				   calls=ff(as.integer(calls(callSet)), dim=c(nr, nc), vmode="integer", dimnames=list(featureNames(ABset), sampleNames(ABset))),
431
+				   callsConfidence=ff(as.integer(confs(callSet)), dim=c(nr, nc), vmode="integer", dimnames=list(featureNames(ABset), sampleNames(ABset))),
432
+				   phenoData=pD,
433
+				   featureData=featureData(callSet),
434
+				   annotation=annotation(ABset),
435
+				   experimentData=experimentData(callSet),
436
+				   protocolData=protocolData(callSet))
437
+	}
438
+	featureData(callSetPlus) <- addFeatureAnnotation(callSetPlus)
439
+	if(splitByChr){
440
+		saved.objects <- splitByChromosome(callSetPlus, cnOptions)
441
+		##callSetPlus <- list.files(outdir, pattern="", full.names=TRUE)
442
+		if(!save.it) unlink(intensityFile)
443
+		return(saved.objects)
444
+	} 
445
+	if(!save.it){
446
+		message("Cleaning up")
447
+		unlink(intensityFile)
448
+	}
449
+	return(callSetPlus)
450
+}
451
+
452
+validCdfNames <- function(){
453
+	c("genomewidesnp6",
454
+	  "genomewidesnp5",
455
+	  "human370v1c",
456
+	  "human370quadv3c",
457
+	  "human550v3b",
458
+	  "human650v3a",
459
+	  "human610quadv1b",
460
+	  "human660quadv1a",
461
+	  "human1mduov3b")
462
+}
463
+
464
+##validCdfNames <- function(platform){
465
+##	if(!missing(platform)){
466
+##		if(!platform %in% c("illumina", "affymetrix"))
467
+##			stop("only illumina and affymetrix platforms are supported.")
468
+##		if(platform=="illumina"){
469
+##			chipList = c("human1mv1c",             # 1M
470
+##			"human370v1c",            # 370CNV
471
+##			"human650v3a",            # 650Y
472
+##			"human610quadv1b",        # 610 quad
473
+##			"human660quadv1a",        # 660 quad
474
+##			"human370quadv3c",        # 370CNV quad
475
+##			"human550v3b",            # 550K
476
+##			"human1mduov3b")          # 1M Duo
477
+##		}
478
+##		if(platform=="affymetrix"){
479
+##			chipList=c("genomewidesnp6", "genomewidesnp5")
480
+##		}
481
+##	} else{
482
+##		chipList <- list()
483
+##		chipList$affymetrix <- c("genomewidesnp6","genomewidesnp5")
484
+##		chipList$illumina <- c("human370v1c",
485
+##				       "human370quadv3c",
486
+##				       "human550v3b",
487
+##				       "human650v3a",
488
+##				       "human610quadv1b",
489
+##				       "human660quadv1a",
490
+##				       "human1mduov3b")
491
+##	}
492
+##	return(chipList)
493
+##}
494
+isValidCdfName <- function(cdfName){
495
+	chipList <- validCdfNames()
496
+	result <- cdfName %in% chipList	
497
+	if(!(result)){
498
+		warning("cdfName must be one of the following: ",
499
+			chipList)
500
+	}
501
+	return(result)
502
+}
503
+
504
+whichPlatform <- function(cdfName){
505
+	index <- grep("genomewidesnp", cdfName)
506
+	if(length(index) > 0){
507
+		platform <- "affymetrix"
508
+	} else{
509
+		index <- grep("human", cdfName)
510
+		platform <- "illumina"
511
+	}
512
+	return(platform)
513
+}
514
+
515
+
516
+##isValidCdfName <- function(cdfName, platform){
517
+##	chipList <- validCdfNames(platform)
518
+##	if(!(cdfName %in% chipList)){
519
+##		warning("cdfName must be one of the following: ",
520
+##			chipList)
521
+##	}
522
+##	result <- cdfName %in% chipList
523
+##	return(result)
524
+##}
525
+	
526
+	
527
+	
528
+# steps: quantile normalize hapmap: create 1m_reference_cn.rda object
529
+cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){
530
+	if(missing(cdfName)) stop("must specify cdfName")
531
+	pkgname <- getCrlmmAnnotationName(cdfName)
532
+	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
533
+	if (missing(sns)) sns <- basename(filenames)
534
+        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
535
+	fid <- getVarInEnv("npProbesFid")
536
+	set.seed(seed)
537
+	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
538
+	SKW <- vector("numeric", length(filenames))
539
+##	if(bigmemory){
540
+##		NP <- filebacked.big.matrix(length(pnsa), length(filenames),
541
+##					    type="integer",
542
+##					    init=as.integer(0),
543
+##					    backingpath=outdir,
544
+##					    backingfile="NP.bin",
545
+##					    descriptorfile="NP.desc")
546
+##	} else{
547
+		NP <- matrix(NA, length(fid), length(filenames))
548
+##	}
549
+	verbose <- TRUE
550
+	if(verbose){
551
+		message("Processing ", length(filenames), " files.")
552
+		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
553
+	}
554
+	if(cdfName=="genomewidesnp6"){
555
+		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
556
+	}
557
+	if(cdfName=="genomewidesnp5"){
558
+		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
559
+	}
560
+	reference <- getVarInEnv("reference")
561
+	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
562
+	for(i in seq(along=filenames)){
563
+		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
564
+		x <- log2(y[idx2])
565
+		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
566
+		rm(x)
567
+		NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
568
+		if (verbose)
569
+			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
570
+			else cat(".")
571
+	}
572
+	dimnames(NP) <- list(names(fid), sns)
573
+	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
574
+	res3 <- list(NP=NP, SKW=SKW)
575
+	cat("\n")
576
+	return(res3)
577
+}
578
+
579
+getFlags <- function(object, PHI.THR){
580
+	batch <- unique(object$batch)
581
+	nuA <- getParam(object, "nuA", batch)
582
+	nuB <- getParam(object, "nuB", batch)
583
+	phiA <- getParam(object, "phiA", batch)
584
+	phiB <- getParam(object, "phiB", batch)
585
+	negativeNus <- nuA < 1 | nuB < 1
586
+	negativePhis <- phiA < PHI.THR | phiB < PHI.THR
587
+	negativeCoef <- negativeNus | negativePhis
588
+	notfinitePhi <- !is.finite(phiA) | !is.finite(phiB)
589
+	flags <- negativeCoef | notfinitePhi
590
+	return(flags)
591
+}
592
+
593
+
594
+instantiateObjects <- function(object, cnOptions){
595
+	Ns <- matrix(NA, nrow(object), 5)
596
+	colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
597
+	vA <- vB <- muB <- muA <- Ns
598
+	normal <- matrix(TRUE, nrow(object), ncol(object))
599
+	dimnames(normal) <- list(featureNames(object), sampleNames(object))
600
+	tmp.objects <- list(vA=vA,
601
+			    vB=vB,
602
+			    muB=muB,
603
+			    muA=muA,
604
+			    Ns=Ns,
605
+			    normal=normal)
606
+        return(tmp.objects)
607
+}
608
+
609
+##updateNuPhi <- function(crlmmSetList, cnSet){
610
+##	##TODO: remove the use of environments.
611
+##	cdfName <- annotation(crlmmSetList[[1]])
612
+##	##repopulate the environment
613
+##	crlmmSetList <- crlmmSetList[, match(sampleNames(cnSet), sampleNames(crlmmSetList))]
614
+##	crlmmSetList <- crlmmSetList[match(featureNames(cnSet), featureNames(crlmmSetList)), ]
615
+##	##envir <- getCopyNumberEnvironment(crlmmSetList, cnSet)
616
+##	Nset <- crlmmSetList[[1]]
617
+##	##indices of polymorphic loci
618
+##	index <- snpIndex(Nset)
619
+##	ABset <- Nset[index, ]
620
+##	##indices of nonpolymorphic loci
621
+##	NPset <- Nset[-index, ]
622
+##	##genotypes/confidences
623
+##	snpset <- crlmmSetList[[2]][index,]
624
+##	##previous version of compute copy number
625
+##	envir <- new.env()	
626
+##	message("Running bias adjustment...")
627
+####	.computeCopynumber(chrom=CHR,
628
+####			   A=A(ABset),
629
+####			   B=B(ABset),
630
+####			   calls=calls(snpset),
631
+####			   conf=confs(snpset),
632
+####			   NP=A(NPset),
633
+####			   plate=batch,
634
+####			   envir=envir,
635
+####			   SNR=ABset$SNR,
636
+####			   bias.adj=TRUE,
637
+####			   SNRmin=SNRmin,
638
+####			   ...)	
639
+##}
640
+
641
+##ist2locusSet <- function(envir, ABset, NPset, CHR, cdfName){
642
+##	if(missing(CHR)) stop("Must specify chromosome")
643
+##	pkgname <- paste(cdfName, "Crlmm", sep="")	
644
+##	path <- system.file("extdata", package=pkgname)
645
+##	loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
646
+##	cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
647
+##	loader("snpProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
648
+##	snpProbes <- get("snpProbes", envir=.crlmmPkgEnv)	
649
+##	##require(oligoClasses) || stop("oligoClasses package not available")
650
+##	if(length(ls(envir)) == 0) stop("environment empty")
651
+##	batch <- envir[["plate"]]
652
+##	##SNP copy number	
653
+##	CA <- envir[["CA"]]
654
+##	dimnames(CA) <- list(envir[["snps"]], envir[["sns"]])	
655
+##	CB <- envir[["CB"]]
656
+##	dimnames(CB) <- dimnames(CA)
657
+##	##NP copy number
658
+##	if(!is.null(NPset)){
659
+##		CT <- envir[["CT"]]
660
+##		rownames(CT) <- envir[["cnvs"]]
661
+##		colnames(CT) <- envir[["sns"]]
662
+##		sig2T <- envir[["sig2T"]]
663
+##		rownames(sig2T) <- rownames(CT)
664
+##		nuT <- envir[["nuT"]]
665
+##		colnames(nuT) <- paste("nuT", unique(batch), sep="_")
666
+##		phiT <- envir[["phiT"]]
667
+##		colnames(phiT) <- paste("phiT", unique(batch), sep="_")
668
+##		naMatrix <- matrix(NA, nrow(CT), ncol(CT))
669
+##		dimnames(naMatrix) <- dimnames(CT)
670
+##	} else{
671
+##		sig2T <- nuT <- phiT <- naMatrix <- CT <- NULL
672
+##	}
673
+##	CA <- rbind(CA, CT)
674
+##	CB <- rbind(CB, naMatrix)	
675
+##
676
+##	##SNP parameters
677
+##	tau2A <- envir[["tau2A"]]
678
+##	colnames(tau2A) <- paste("tau2A", unique(batch), sep="_")
679
+##	tau2B <- envir[["tau2B"]]
680
+##	colnames(tau2B) <- paste("tau2B", unique(batch), sep="_")
681
+##	sig2A <- envir[["sig2A"]]
682
+##	colnames(sig2A) <- paste("sig2A", unique(batch), sep="_")
683
+##	sig2B <- envir[["sig2B"]]
684
+##	colnames(sig2B) <- paste("sig2B", unique(batch), sep="_")
685
+##	nuA <- envir[["nuA"]]
686
+##	colnames(nuA) <- paste("nuA", unique(batch), sep="_")
687
+##	nuB <- envir[["nuB"]]
688
+##	colnames(nuB) <- paste("nuB", unique(batch), sep="_")
689
+##	phiA <- envir[["phiA"]]
690
+##	colnames(phiA) <- paste("phiA", unique(batch), sep="_")
691
+##	phiB <- envir[["phiB"]]
692
+##	colnames(phiB) <- paste("phiB", unique(batch), sep="_")
693
+##	corr <- envir[["corr"]]
694
+##	colnames(corr) <- paste("corr", unique(batch), sep="_")
695
+##	corrA.BB <- envir[["corrA.BB"]]
696
+##	colnames(corrA.BB) <- paste("corrA.BB", unique(batch), sep="_")
697
+##	corrB.AA <- envir[["corrB.AA"]]
698
+##	colnames(corrB.AA) <- paste("corrB.AA", unique(batch), sep="_")
699
+##
700
+##
701
+##	##Combine SNP and NP parameters
702
+##	if(!is.null(NPset)){
703
+##		naMatrixParams <- matrix(NA, nrow(CT), length(unique(batch)))
704
+##		dimnames(naMatrixParams) <- list(rownames(CT), unique(batch))
705
+##	} else{
706
+##		naMatrixParams <- NULL
707
+##	}
708
+##	tau2A <- rbind(tau2A, naMatrixParams)
709
+##	tau2B <- rbind(tau2B, naMatrixParams)
710
+##	sig2A <- rbind(sig2A, sig2T)
711
+##	sig2B <- rbind(sig2B, naMatrixParams)
712
+##	corr <- rbind(corr, naMatrixParams)
713
+##	corrA.BB <- rbind(corrA.BB, naMatrixParams)
714
+##	corrB.AA <- rbind(corrB.AA, naMatrixParams)
715
+##	nuA <- rbind(nuA, nuT)
716
+##	phiA <- rbind(phiA, phiT)
717
+##	nuB <- rbind(nuB, naMatrixParams)
718
+##	phiB <- rbind(phiB, naMatrixParams)
719
+##	rownames(tau2A) <- rownames(tau2B) <- rownames(sig2A) <- rownames(sig2B) <- rownames(CA)
720
+##	rownames(corr) <- rownames(corrA.BB) <- rownames(corrB.AA) <- rownames(CA)
721
+##	rownames(nuA) <- rownames(phiA) <- rownames(nuB) <- rownames(phiB) <- rownames(CA)	
722
+##	##phenodata
723
+##	phenodata <- phenoData(ABset)
724
+##	phenodata <- phenodata[match(envir[["sns"]], sampleNames(phenodata)), ]
725
+##	if(!("batch" %in% varLabels(phenodata))) phenodata$batch <- envir[["plate"]]
726
+##
727
+##	##Feature Data
728
+##	position.snp <- snpProbes[match(envir[["snps"]], rownames(snpProbes)), "position"]
729
+##	names(position.snp) <- envir[["snps"]]
730
+##	if(!is.null(NPset)){
731
+##		position.np <- cnProbes[match(envir[["cnvs"]], rownames(cnProbes)), "position"]
732
+##		names(position.np) <- envir[["cnvs"]]
733
+##	} else position.np <- NULL
734
+##	position <- c(position.snp, position.np)
735
+##	if(!(identical(names(position), rownames(CA)))){
736
+##		position <- position[match(rownames(CA), names(position))]
737
+##	}
738
+##	if(sum(duplicated(names(position))) > 0){
739
+##		warning("Removing rows with NA identifiers...")
740
+##		##RS: fix this
741
+##		I <- which(!is.na(names(position)))
742
+##	}  else I <- seq(along=names(position))
743
+##	fd <- data.frame(cbind(CHR,
744
+##			       position[I],
745
+##			       tau2A[I,, drop=FALSE],
746
+##			       tau2B[I,, drop=FALSE],
747
+##			       sig2A[I,, drop=FALSE],
748
+##			       sig2B[I,, drop=FALSE],
749
+##			       nuA[I,, drop=FALSE],
750
+##			       nuB[I,, drop=FALSE],
751
+##			       phiA[I,, drop=FALSE],
752
+##			       phiB[I,, drop=FALSE],
753
+##			       corr[I,, drop=FALSE],
754
+##			       corrA.BB[I,, drop=FALSE],
755
+##			       corrB.AA[I,, drop=FALSE]))
756
+##	colnames(fd)[1:2] <- c("chromosome", "position")
757
+##	rownames(fd) <- rownames(CA)[I]
758
+##	fD <- new("AnnotatedDataFrame",
759
+##		  data=fd,
760
+##		  varMetadata=data.frame(labelDescription=colnames(fd)))
761
+##	placeholder <- matrix(NA, nrow(CA), ncol(CA))
762
+##	dimnames(placeholder) <- dimnames(CA)
763
+##	assayData <- assayDataNew("lockedEnvironment",
764
+##				  CA=placeholder[I, ],
765
+##				  CB=placeholder[I, ])
766
+##	cnset <- new("CopyNumberSet",
767
+##		      assayData=assayData,
768
+##		      featureData=fD,
769
+##		      phenoData=phenodata,
770
+##		      annotation=cdfName)
771
+##	CA(cnset) <- CA  ##replacement method converts to integer
772
+##	CB(cnset) <- CB  ##replacement method converts to integer
773
+##	cnset <- thresholdCopyNumberSet(cnset)
774
+##	return(cnset)
775
+##
776
+
777
+
778
+
779
+thresholdCopynumber <- function(object){
780
+	ca <- CA(object)
781
+	cb <- CB(object)
782
+	ca[ca < 0.05] <- 0.05
783
+	ca[ca > 5] <- 5
784
+	cb[cb < 0.05] <- 0.05
785
+	cb[cb > 5] <- 5
786
+	CA(object) <- ca
787
+	CB(object) <- cb
788
+	return(object)
789
+}
790
+
791
+##preprocessOptions <- function(crlmmFile="snpsetObject.rda",
792
+##			      intensityFile="normalizedIntensities.rda",
793
+##			      rgFile="rgFile.rda"){
794
+##
795
+##}
796
+
797
+cnOptions <- function(tmpdir=tempdir(),
798
+		      outdir="./",
799
+		      cdfName,
800
+		      crlmmFile="snpsetObject.rda",
801
+		      intensityFile="normalizedIntensities.rda",
802
+		      rgFile="rgFile.rda",
803
+		      save.it=TRUE,
804
+		      load.it=TRUE,
805
+		      splitByChr=TRUE,
806
+		      use.ff=FALSE,
807
+		      MIN.OBS=3,
808
+		      MIN.SAMPLES=10,
809
+		      batch=NULL,
810
+		      DF.PRIOR=50,
811
+		      bias.adj=FALSE,
812
+		      prior.prob=rep(1/4, 4),
813
+		      SNRmin=5,
814
+		      seed=123,
815
+		      verbose=TRUE,
816
+		      GT.CONF.THR=0.99,
817
+		      PHI.THR=2^6,##used in nonpolymorphic fxn for training
818
+		      nAA.THR=5, ##used in nonpolymorphic fxn for training
819
+		      nBB.THR=5, ##used in nonpolymorphic fxn for training
820
+		      MIN.NU=2^3,
821
+		      MIN.PHI=2^3,
822
+		      THR.NU.PHI=TRUE,
823
+		      thresholdCopynumber=TRUE,
824
+		      unlink=TRUE,
825
+		      hiddenMarkovModel=FALSE,
826
+		      circularBinarySegmentation=FALSE,
827
+		      cbsOpts=NULL,
828
+		      hmmOpts=NULL, ...){
829
+	if(missing(cdfName)) stop("must specify cdfName")
830
+	if(!file.exists(outdir)){
831
+		message(outdir, " does not exist.  Trying to create it.")
832
+		dir.create(outdir, recursive=TRUE)
833
+	}
834
+	stopifnot(isValidCdfName(cdfName))
835
+	if(hiddenMarkovModel){
836
+		hmmOpts <- hmmOptions(...)
837
+	}
838
+	if(is.null(batch))
839
+		stop("batch must have the same length as the number of samples")
840
+	list(tmpdir=tmpdir,
841
+	     outdir=outdir,
842
+	     cdfName=cdfName,
843
+	     crlmmFile=file.path(outdir, crlmmFile),
844
+	     intensityFile=file.path(outdir, intensityFile),
845
+	     rgFile=file.path(outdir, rgFile),
846
+	     save.it=save.it,
847
+	     load.it=load.it,
848
+	     splitByChr=splitByChr,
849
+	     use.ff=use.ff,
850
+	     MIN.OBS=MIN.OBS,
851
+	     MIN.SAMPLES=MIN.SAMPLES,
852
+	     batch=batch,
853
+	     DF.PRIOR=DF.PRIOR,
854
+	     GT.CONF.THR=GT.CONF.THR,
855
+	     prior.prob=prior.prob,
856
+	     bias.adj=bias.adj,
857
+	     SNRmin=SNRmin,
858
+	     seed=seed,
859
+	     verbose=verbose,
860
+	     PHI.THR=PHI.THR,
861
+	     nAA.THR=nAA.THR,
862
+	     nBB.THR=nBB.THR,
863
+	     MIN.NU=MIN.NU,
864
+	     MIN.PHI=MIN.PHI,
865
+	     THR.NU.PHI=THR.NU.PHI,
866
+	     unlink=unlink,
867
+	     hiddenMarkovModel=hiddenMarkovModel,
868
+	     circularBinarySegmentation=circularBinarySegmentation,
869
+	     cbsOpts=cbsOpts,
870
+	     hmmOpts=hmmOpts) ##remove SnpCallSetPlus object
871
+}
872
+
873
+##linear model parameters
874
+lm.parameters <- function(object, cnOptions){
875
+	fD <- fData(object)
876
+	batch <- object$batch
877
+	uplate <- unique(batch)
878
+	parameterNames <- c(paste("tau2A", uplate, sep="_"),
879
+			    paste("tau2B", uplate, sep="_"),
880
+			    paste("sig2A", uplate, sep="_"),
881
+			    paste("sig2B", uplate, sep="_"),
882
+			    paste("nuA", uplate, sep="_"),
883
+			    paste("nuA.se", uplate, sep="_"),			    
884
+			    paste("nuB", uplate, sep="_"),
885
+			    paste("nuB.se", uplate, sep="_"),			    			    
886
+			    paste("phiA", uplate, sep="_"),
887
+			    paste("phiA.se", uplate, sep="_"),			    
888
+			    paste("phiB", uplate, sep="_"),
889
+			    paste("phiB.se", uplate, sep="_"),			    
890
+			    paste("phiAX", uplate, sep="_"),
891
+			    paste("phiBX", uplate, sep="_"),			    
892
+			    paste("corr", uplate, sep="_"),
893
+			    paste("corrA.BB", uplate, sep="_"),
894
+			    paste("corrB.AA", uplate, sep="_"))
895
+	pMatrix <- data.frame(matrix(numeric(0),
896
+				     nrow(object),
897
+				     length(parameterNames)),
898
+				     row.names=featureNames(object))
899
+	colnames(pMatrix) <- parameterNames
900
+	fD2 <- cbind(fD, pMatrix)
901
+	new("AnnotatedDataFrame", data=fD2,
902
+	    varMetadata=data.frame(labelDescription=colnames(fD2),
903
+	    row.names=colnames(fD2)))
904
+}
905
+
906
+nonpolymorphic <- function(object, cnOptions, tmp.objects){
907
+	batch <- unique(object$batch)
908
+	CHR <- unique(chromosome(object))
909
+	goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){
910
+		Ns <- tmp.objects[["Ns"]]
911
+		##Ns <- get("Ns", envir)
912
+		flags <- getFlags(object, PHI.THR)
913
+		fewAA <- Ns[, "AA"] < nAA.THR
914
+		fewBB <- Ns[, "BB"] < nBB.THR
915
+		flagsA <- flags | fewAA
916
+		flagsB <- flags | fewBB
917
+		flags <- list(A=flagsA, B=flagsB)
918
+		return(flags)
919
+	}
920
+	nAA.THR <- cnOptions$nAA.THR
921
+	nBB.THR <- cnOptions$nBB.THR
922
+	PHI.THR <- cnOptions$PHI.THR
923
+	snpflags <- goodSnps(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR)
924
+	flagsA <- snpflags$A
925
+	flagsB <- snpflags$B
926
+##	if(all(flagsA) | all(flagsB)) stop("all snps are flagged")
927
+	nuA <- getParam(object, "nuA", batch)
928
+	nuB <- getParam(object, "nuB", batch)
929
+	phiA <- getParam(object, "phiA", batch)
930
+	phiB <- getParam(object, "phiB", batch)	
931
+	sns <- sampleNames(object)
932
+	muA <- tmp.objects[["muA"]]
933
+	muB <- tmp.objects[["muB"]]
934
+	A <- A(object)
935
+	B <- B(object)
936
+	CA <- CA(object)
937
+	CB <- CB(object)
938
+	if(CHR == 23){
939
+		phiAX <- getParam(object, "phiAX", batch)
940
+		phiBX <- getParam(object, "phiBX", batch)
941
+	}
942
+	##---------------------------------------------------------------------------
943
+	## Train on unflagged SNPs
944
+	##---------------------------------------------------------------------------
945
+	##Might be best to train using the X chromosome, since for the
946
+	##X phi and nu have been adjusted for cross-hybridization
947
+	##plateInd <- plate == uplate[p]
948
+	##muA <- muA[!flagsA, p, c("A", "AA")]
949
+	##muB <- muB[!flagsB, p, c("B", "BB")]
950
+	muA <- muA[!flagsA, "AA"]
951
+	muB <- muB[!flagsB, "BB"]
952
+	X <- cbind(1, log2(c(muA, muB)))
953
+	Y <- log2(c(phiA[!flagsA], phiB[!flagsB]))
954
+	if(nrow(X) > 5000){
955
+		ix <- sample(1:nrow(X), 5000)
956
+	} else {
957
+		ix <- 1:nrow(X)
958
+	}
959
+	betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix]))
960
+	normal <- tmp.objects[["normal"]][!isSnp(object), ]	
961
+	if(CHR == 23){
962
+		##normalNP <- envir[["normalNP"]]
963
+		##normalNP <- normalNP[, plate==uplate[p]]
964
+		##nuT <- envir[["nuT"]]
965
+		##phiT <- envir[["phiT"]]
966
+		
967
+		##cnvs <- envir[["cnvs"]]
968
+                ##loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
969
+                ##cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
970
+		##cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ]
971
+
972
+		##For build Hg18
973
+		##http://genome.ucsc.edu/cgi-bin/hgGateway
974
+		##pseudo-autosomal regions on X
975
+		##chrX:1-2,709,520 and chrX:154584237-154913754, respectively
976
+		##par:pseudo-autosomal regions
977
+		pseudoAR <- position(object) < 2709520 | (position(object) > 154584237 & position(object) < 154913754)
978
+		##pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754)
979
+		##in case some of the cnProbes are not annotated
980
+		pseudoAR[is.na(pseudoAR)] <- FALSE
981
+		pseudoAR <- pseudoAR[!isSnp(object)]
982
+		##gender <- envir[["gender"]]
983
+		gender <- object$gender
984
+		obj1 <- object[!isSnp(object), ]
985
+		A.male <- A(obj1[, gender==1])
986
+		mu1 <- rowMedians(A.male, na.rm=TRUE)
987
+		##mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE)
988
+		##mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE)
989
+		A.female <- A(obj1[, gender==2])
990
+		mu2 <- rowMedians(A.female, na.rm=TRUE)
991
+		mus <- log2(cbind(mu1, mu2))
992
+		X.men <- cbind(1, mus[, 1])
993
+		X.fem <- cbind(1, mus[, 2])
994
+		
995
+		Yhat1 <- as.numeric(X.men %*% betahat)
996
+		Yhat2 <- as.numeric(X.fem %*% betahat)
997
+		phi1 <- 2^(Yhat1)
998
+		phi2 <- 2^(Yhat2)
999
+		nu1 <- 2^(mus[, 1]) - phi1
1000
+		nu2 <- 2^(mus[, 2]) - 2*phi2
1001
+		if(any(pseudoAR)){
1002
+			nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR]
1003
+		}
1004
+		CT1 <- 1/phi1*(A.male-nu1)
1005
+		CT2 <- 1/phi2*(A.female-nu2)
1006
+		##CT2 <- 1/phi2*(NP[, gender=="female"]-nu2)
1007
+		##CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1))
1008
+		##CT2 <- matrix(as.integer(100*CT2), nrow(CT2), ncol(CT2))
1009
+		##CT <- envir[["CT"]]
1010
+		CA <- CA(obj1)
1011
+		CA[, gender==1] <- CT1
1012
+		CA[, gender==2] <- CT2
1013
+		CA(object)[!isSnp(object), ] <- CA
1014
+		##CT[, plate==uplate[p] & gender=="male"] <- CT1
1015
+		##CT[, plate==uplate[p] & gender=="female"] <- CT2
1016
+		##envir[["CT"]] <- CT
1017
+
1018
+		##only using females to compute the variance
1019
+		##normalNP[, gender=="male"] <- NA
1020
+		normal[, gender==1] <- NA
1021
+		sig2A <- getParam(object, "sig2A", batch)
1022
+		normal.f <- normal[, object$gender==2]
1023
+		sig2A[!isSnp(object)] <- rowMAD(log2(A.female*normal.f), na.rm=TRUE)^2
1024
+		##sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2
1025
+		object <- pr(object, "sig2A", batch, sig2A)
1026
+
1027
+		nuA[!isSnp(object)] <- nu2
1028
+		object <- pr(object, "nuA", batch, nuA)		
1029
+		##nuT[, p] <- nu2
1030
+		phiA[!isSnp(object)] <- phi2
1031
+		object <- pr(object, "phiA", batch, phiA)
1032
+		
1033
+	} else {
1034
+		A <- A(object)[!isSnp(object), ]
1035
+		mus <- rowMedians(A * normal, na.rm=TRUE)
1036
+		crosshyb <- max(median(muA) - median(mus), 0)
1037
+		X <- cbind(1, log2(mus+crosshyb))
1038
+		logPhiT <- X %*% betahat
1039
+		phiA[!isSnp(object)] <- 2^(logPhiT)
1040
+		nuA[!isSnp(object)] <- mus-2*phiA[!isSnp(object)]
1041
+		CA(object)[!isSnp(object), ] <- 1/phiA[!isSnp(object)]*(A - nuA[!isSnp(object)])
1042
+		sig2A <- getParam(object, "sig2A", batch)
1043
+		sig2A[!isSnp(object)] <- rowMAD(log2(A*normal), na.rm=TRUE)^2
1044
+		object <- pr(object, "sig2A", batch, sig2A)
1045
+		##added
1046
+		object <- pr(object, "nuA", batch, nuA)
1047
+		object <- pr(object, "phiA", batch, phiA)
1048
+	}
1049
+	return(object)
1050
+}
1051
+
1052
+##sufficient statistics on the intensity scale
1053
+withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
1054
+	normal <- tmp.objects[["normal"]]
1055
+
1056
+	## muA, muB: robust estimates of the within-genotype center (intensity scale)
1057
+	muA <- tmp.objects[["muA"]]
1058
+	muB <- tmp.objects[["muB"]]
1059
+	## vA, vB: robust estimates of the within-genotype variance (intensity scale)
1060
+	vA <- tmp.objects[["vA"]]
1061
+	vB <- tmp.objects[["vB"]]
1062
+	Ns <- tmp.objects[["Ns"]]
1063
+	G <- calls(object) 
1064
+	GT.CONF.THR <- cnOptions$GT.CONF.THR
1065
+	CHR <- unique(chromosome(object))
1066
+
1067
+	A <- A(object)
1068
+	B <- B(object)
1069
+	highConf <- (1-exp(-callsConfidence(object)/1000)) > GT.CONF.THR
1070
+	##highConf <- highConf > GT.CONF.THR
1071
+	if(CHR == 23){
1072
+		gender <- object$gender
1073
+##		gender <- envir[["gender"]]
1074
+		IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE)
1075
+##		IX <- IX == "female"
1076
+		IX <- IX == 2  ##2=female, 1=male
1077
+	} else IX <- matrix(TRUE, nrow(G), ncol(G))
1078
+	index <- GT.B <- GT.A <- vector("list", 3)
1079
+	names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
1080
+	##--------------------------------------------------
1081
+	##within-genotype sufficient statistics
1082
+	##--------------------------------------------------
1083
+	##GT.B <- GT.A <- list()
1084
+	snpIndicator <- matrix(isSnp(object), nrow(object), ncol(object)) ##RS: added
1085
+	for(j in 1:3){
1086
+		GT <- G==j & highConf & IX & snpIndicator
1087
+		GT <- GT * normal
1088
+		Ns[, j+2] <- rowSums(GT, na.rm=TRUE)				
1089
+		GT[GT == FALSE] <- NA
1090
+		GT.A[[j]] <- GT*A
1091
+		GT.B[[j]] <- GT*B
1092
+		index[[j]] <- which(Ns[, j+2] > 0 & isSnp(object)) ##RS: added		
1093
+		muA[, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE)
1094
+		muB[, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE)
1095
+		vA[, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE)
1096
+		vB[, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE)
1097
+
1098
+		##Shrink towards the typical variance
1099
+		DF <- Ns[, j+2]-1
1100
+		DF[DF < 1] <- 1
1101
+		v0A <- median(vA[, j+2], na.rm=TRUE)
1102
+		v0B <- median(vB[, j+2], na.rm=TRUE)
1103
+		if(v0A == 0) v0A <- NA
1104
+		if(v0B == 0) v0B <- NA
1105
+		DF.PRIOR <- cnOptions$DF.PRIOR
1106
+		vA[, j+2] <- (vA[, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF)
1107
+		vA[is.na(vA[, j+2]), j+2] <- v0A
1108
+		vB[, j+2] <- (vB[, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF)
1109
+		vB[is.na(vB[, j+2]), j+2] <- v0B
1110
+	}
1111
+	if(CHR == 23){
1112
+		k <- 1
1113
+		for(j in c(1,3)){
1114
+			GT <- G==j & highConf & !IX 
1115
+			Ns[, k] <- rowSums(GT)
1116
+			GT[GT == FALSE] <- NA
1117
+			muA[, k] <- rowMedians(GT*A, na.rm=TRUE)
1118
+			muB[, k] <- rowMedians(GT*B, na.rm=TRUE)
1119
+			vA[, k] <- rowMAD(GT*A, na.rm=TRUE)
1120
+			vB[, k] <- rowMAD(GT*B, na.rm=TRUE)
1121
+			
1122
+			DF <- Ns[, k]-1
1123
+			DF[DF < 1] <- 1
1124
+			v0A <- median(vA[, k], na.rm=TRUE)
1125
+			v0B <- median(vB[, k], na.rm=TRUE)
1126
+			vA[, k] <- (vA[, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF)
1127
+			vA[is.na(vA[, k]), k] <- v0A
1128
+			vB[, k] <- (vB[, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF)
1129
+			vB[is.na(vB[, k]), k] <- v0B			
1130
+			k <- k+1
1131
+		}
1132
+	}
1133
+	tmp.objects[["Ns"]] <- Ns
1134
+	tmp.objects[["vA"]] <- vA
1135
+	tmp.objects[["vB"]] <- vB
1136
+	tmp.objects[["muA"]] <- muA
1137
+	tmp.objects[["muB"]] <- muB
1138
+	tmp.objects$index <- index
1139
+	tmp.objects$GT.A <- GT.A
1140
+	tmp.objects$GT.B <- GT.B
1141
+	return(tmp.objects)
1142
+}
1143
+
1144
+
1145
+oneBatch <- function(object, cnOptions, tmp.objects){
1146
+	muA <- tmp.objects[["muA"]]
1147
+	muB <- tmp.objects[["muB"]]
1148
+	Ns <- tmp.objects[["Ns"]]
1149
+	CHR <- unique(chromosome(object))
1150
+	##---------------------------------------------------------------------------
1151
+	## Impute sufficient statistics for unobserved genotypes (plate-specific)
1152
+	##---------------------------------------------------------------------------
1153
+	MIN.OBS <- cnOptions$MIN.OBS
1154
+	index.AA <- which(Ns[, "AA"] >= MIN.OBS)
1155
+	index.AB <- which(Ns[, "AB"] >= MIN.OBS)
1156
+	index.BB <- which(Ns[, "BB"] >= MIN.OBS)
1157
+	correct.orderA <- muA[, "AA"] > muA[, "BB"]
1158
+	correct.orderB <- muB[, "BB"] > muB[, "AA"]
1159
+	##For chr X, this will ignore the males 
1160
+	nobs <- rowSums(Ns[, 3:5] >= MIN.OBS, na.rm=TRUE) == 3
1161
+	index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here
1162
+	size <- min(5000, length(index.complete))
1163
+	if(size == 5000) index.complete <- sample(index.complete, 5000)
1164
+	if(length(index.complete) < 200){
1165
+		stop("fewer than 200 snps pass criteria for predicting the sufficient statistics")
1166
+	}
1167
+	index <- tmp.objects[["index"]]
1168
+	index[[1]] <- which(Ns[, "AA"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS))
1169
+	index[[2]] <- which(Ns[, "AB"] == 0 & (Ns[, "AA"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS))
1170
+	index[[3]] <- which(Ns[, "BB"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "AA"] >= MIN.OBS))
1171
+	mnA <- muA[, 3:5]
1172
+	mnB <- muB[, 3:5]
1173
+	for(j in 1:3){
1174
+		if(length(index[[j]]) == 0) next()
1175
+		X <- cbind(1, mnA[index.complete,  -j, drop=FALSE], mnB[index.complete,  -j, drop=FALSE])
1176
+		Y <- cbind(mnA[index.complete, j], mnB[index.complete,  j])
1177
+		betahat <- solve(crossprod(X), crossprod(X,Y))
1178
+		X <- cbind(1, mnA[index[[j]],  -j, drop=FALSE],  mnB[index[[j]],  -j, drop=FALSE])
1179
+		mus <- X %*% betahat
1180
+		muA[index[[j]], j+2] <- mus[, 1]
1181
+		muB[index[[j]], j+2] <- mus[, 2]
1182
+	}
1183
+	nobsA <- Ns[, "A"] > 10
1184
+	nobsB <- Ns[, "B"] > 10
1185
+	notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"]))
1186
+	complete <- list()
1187
+	complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
1188
+	complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here	
1189
+	size <- min(5000, length(complete[[1]]))
1190
+	if(size == 5000) complete <- lapply(complete, function(x) sample(x, size))
1191
+	if(CHR == 23){
1192
+		index <- list()
1193
+		index[[1]] <- which(Ns[, "A"] == 0)
1194
+		index[[2]] <- which(Ns[, "B"] == 0)
1195
+		cols <- 2:1
1196
+		for(j in 1:2){
1197
+			if(length(index[[j]]) == 0) next()
1198
+			X <- cbind(1, muA[complete[[j]], cols[j]], muB[complete[[j]], cols[j]])
1199
+			Y <- cbind(muA[complete[[j]], j], muB[complete[[j]], j])
1200
+			betahat <- solve(crossprod(X), crossprod(X,Y))
1201
+			X <- cbind(1, muA[index[[j]], cols[j]],  muB[index[[j]], cols[j]])
1202
+			mus <- X %*% betahat
1203
+			muA[index[[j]], j] <- mus[, 1]
1204
+			muB[index[[j]], j] <- mus[, 2]
1205
+		}
1206
+	}
1207
+	##missing two genotypes
1208
+	noAA <- Ns[, "AA"] < MIN.OBS
1209
+	noAB <- Ns[, "AB"] < MIN.OBS
1210
+	noBB <- Ns[, "BB"] < MIN.OBS
1211
+	index[[1]] <- noAA & noAB
1212
+	index[[2]] <- noBB & noAB
1213
+	index[[3]] <- noAA & noBB
1214
+##	snpflags <- envir[["snpflags"]]
1215
+	snpflags <- index[[1]] | index[[2]] | index[[3]]
1216
+##	snpflags[, p] <- index[[1]] | index[[2]] | index[[3]]
1217
+
1218
+	##---------------------------------------------------------------------------
1219
+	## Two genotype clusters not observed -- would sequence help? (didn't seem to)
1220
+	## 1. extract index of complete data
1221
+	## 2. Regress  mu_missing ~ sequence + mu_observed
1222
+	## 3. solve for nu assuming the median is 2
1223
+	##---------------------------------------------------------------------------
1224
+	cols <- c(3, 1, 2)
1225
+	for(j in 1:3){
1226
+		if(sum(index[[j]]) == 0) next()
1227
+		k <- cols[j]
1228
+		X <- cbind(1, mnA[index.complete, k], mnB[index.complete, k])
1229
+		Y <- cbind(mnA[index.complete,  -k],
1230
+			   mnB[index.complete,  -k])
1231
+		betahat <- solve(crossprod(X), crossprod(X,Y))
1232
+		X <- cbind(1, mnA[index[[j]],  k], mnB[index[[j]],  k])
1233
+		mus <- X %*% betahat
1234
+		muA[index[[j]], -c(1, 2, k+2)] <- mus[, 1:2]
1235
+		muB[index[[j]], -c(1, 2, k+2)] <- mus[, 3:4]
1236
+	}
1237
+	negA <- rowSums(muA < 0) > 0
1238
+	negB <- rowSums(muB < 0) > 0	
1239
+	snpflags <- snpflags| negA | negB | rowSums(is.na(muA[, 3:5]), na.rm=TRUE) > 0
1240
+	tmp.objects$snpflags <- snpflags
1241
+	tmp.objects[["muA"]] <- muA
1242
+	tmp.objects[["muB"]] <- muB
1243
+	tmp.objects[["snpflags"]] <- snpflags
1244
+	return(tmp.objects)
1245
+}
1246
+
1247
+##Estimate tau2, sigma2, and correlation (updates the object)
1248
+locationAndScale <- function(object, cnOptions, tmp.objects){
1249
+	Ns <- tmp.objects[["Ns"]]
1250
+	index <- tmp.objects[["index"]]
1251
+	
1252
+	index.AA <- index[[1]]
1253
+	index.AB <- index[[2]]
1254
+	index.BB <- index[[3]]
1255
+	rm(index); gc()
1256
+
1257
+	GT.A <- tmp.objects[["GT.A"]]
1258
+	GT.B <- tmp.objects[["GT.B"]]
1259
+	AA.A <- GT.A[[1]]
1260
+	AB.A <- GT.A[[2]]
1261
+	BB.A <- GT.A[[3]]
1262
+	
1263
+	AA.B <- GT.B[[1]]
1264
+	AB.B <- GT.B[[2]]
1265
+	BB.B <- GT.B[[3]]
1266
+	
1267
+	x <- BB.A[index.BB, ]
1268
+	batch <- unique(object$batch)
1269
+	DF.PRIOR <- cnOptions$DF.PRIOR
1270
+
1271
+	tau2A <- getParam(object, "tau2A", batch)
1272
+	tau2A[index.BB] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2	
1273
+	DF <- Ns[, "BB"]-1
1274
+	DF[DF < 1] <- 1
1275
+	med <- median(tau2A, na.rm=TRUE)
1276
+	tau2A <- (tau2A * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1277
+	tau2A[is.na(tau2A) & isSnp(object)] <- med
1278
+	object <- pr(object, "tau2A", batch, tau2A)
1279
+
1280
+	sig2B <- getParam(object, "sig2B", batch)	
1281
+	x <- BB.B[index.BB, ]
1282
+	sig2B[index.BB] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2	
1283
+	med <- median(sig2B, na.rm=TRUE)
1284
+	sig2B <- (sig2B * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1285
+	sig2B[is.na(sig2B) & isSnp(object)] <- med
1286
+	object <- pr(object, "sig2B", batch, sig2B)	
1287
+
1288
+	tau2B <- getParam(object, "tau2B", batch)	
1289
+	x <- AA.B[index.AA, ]
1290
+	tau2B[index.AA] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2		
1291
+	DF <- Ns[, "AA"]-1
1292
+	DF[DF < 1] <- 1
1293
+	med <- median(tau2B, na.rm=TRUE)
1294
+	tau2B <- (tau2B * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1295
+	tau2B[is.na(tau2B) & isSnp(object)] <- med
1296
+	object <- pr(object, "tau2B", batch, tau2B)
1297
+
1298
+	sig2A <- getParam(object, "sig2A", batch)	
1299
+	x <- AA.A[index.AA, ]
1300
+	sig2A[index.AA] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2##var(log(IA)|AA)	
1301
+	med <- median(sig2A, na.rm=TRUE)
1302
+	sig2A <- (sig2A * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1303
+	sig2A[is.na(sig2A) & isSnp(object)] <- med
1304
+	object <- pr(object, "sig2A", batch, sig2A)
1305
+
1306
+	if(length(index.AB) > 0){ ##all homozygous is possible
1307
+		corr <- getParam(object, "corr", batch)
1308
+		x <- AB.A[index.AB, ]
1309
+		y <- AB.B[index.AB, ]
1310
+		corr[index.AB] <- rowCors(x, y, na.rm=TRUE)
1311
+		corr[corr < 0] <- 0
1312
+		DF <- Ns[, "AB"]-1
1313
+		DF[DF<1] <- 1
1314
+		med <- median(corr, na.rm=TRUE)
1315
+		corr <- (corr*DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1316
+		corr[is.na(corr) & isSnp(object)] <- med
1317
+		object <- pr(object, "corr", batch, corr)		
1318
+	}
1319
+	corrB.AA <- getParam(object, "corrB.AA", batch)
1320
+	backgroundB <- AA.B[index.AA, ]
1321
+	signalA <- AA.A[index.AA, ]
1322
+	corrB.AA[index.AA] <- rowCors(backgroundB, signalA, na.rm=TRUE)
1323
+	DF <- Ns[, "AA"]-1
1324
+	DF[DF < 1] <- 1
1325
+	med <- median(corrB.AA, na.rm=TRUE)
1326
+	corrB.AA <- (corrB.AA*DF + med*DF.PRIOR)/(DF.PRIOR + DF)
1327
+	corrB.AA[is.na(corrB.AA) & isSnp(object)] <- med
1328
+	object <- pr(object, "corrB.AA", batch, corrB.AA)
1329
+
1330
+	corrA.BB <- getParam(object, "corrA.BB", batch)	
1331
+	backgroundA <- BB.A[index.BB, ]
1332
+	signalB <- BB.B[index.BB, ]
1333
+	corrA.BB[index.BB] <- rowCors(backgroundA, signalB, na.rm=TRUE)
1334
+	DF <- Ns[, "BB"]-1
1335
+	DF[DF < 1] <- 1
1336
+	med <- median(corrA.BB, na.rm=TRUE)
1337
+	corrA.BB <- (corrA.BB*DF + med*DF.PRIOR)/(DF.PRIOR + DF)
1338
+	corrA.BB[is.na(corrA.BB) & isSnp(object)] <- med
1339
+	object <- pr(object, "corrA.BB", batch, corrA.BB)
1340
+	return(object)
1341
+}
1342
+
1343
+##coefs <- function(plateIndex, conf, MIN.OBS=3, envir, CONF.THR=0.99){
1344
+coefs <- function(object, cnOptions, tmp.objects){
1345
+##	p <- plateIndex
1346
+	batch <- unique(object$batch)
1347
+##	plates.completed <- envir[["plates.completed"]]
1348
+##	if(!plates.completed[p]) return()
1349
+##	CHR <- envir[["chrom"]]
1350
+	CHR <- unique(chromosome(object))
1351
+##	plate <- envir[["plate"]]
1352
+	muA <- tmp.objects[["muA"]]
1353
+	muB <- tmp.objects[["muB"]]
1354
+	vA <- tmp.objects[["vA"]]
1355
+	vB <- tmp.objects[["vB"]]
1356
+	Ns <- tmp.objects[["Ns"]]
1357
+##	uplate <- tmp.objects[["uplate"]]
1358
+	if(CHR != 23){
1359
+		IA <- muA[, 3:5]
1360
+		IB <- muB[, 3:5]
1361
+		vA <- vA[, 3:5]
1362
+		vB <- vB[, 3:5]
1363
+		Np <- Ns[, 3:5]
1364
+	} else {
1365
+		NOHET <- is.na(median(vA[, "AB"], na.rm=TRUE))
1366
+		if(NOHET){
1367
+			IA <- muA[, -4]
1368
+			IB <- muB[, -4]
1369
+			vA <- vA[, -4]
1370
+			vB <- vB[, -4]
1371
+			Np <- Ns[, -4] 
1372
+		} else{
1373
+			IA <- muA
1374
+			IB <- muB
1375
+			vA <- vA
1376
+			vB <- vB
1377
+			Np <- Ns
1378
+		}
1379
+		
1380
+	}
1381
+	Np[Np < 1] <- 1
1382
+	vA2 <- vA^2/Np
1383
+	vB2 <- vB^2/Np
1384
+	wA <- sqrt(1/vA2)
1385
+	wB <- sqrt(1/vB2)
1386
+	YA <- IA*wA
1387
+	YB <- IB*wB
1388
+	##update lm.coefficients stored in object
1389
+	object <- nuphiAllele(object, allele="A", Ystar=YA, W=wA, tmp.objects=tmp.objects, cnOptions=cnOptions)
1390
+	object <- nuphiAllele(object, allele="B", Ystar=YB, W=wB, tmp.objects=tmp.objects, cnOptions=cnOptions)	
1391
+	##---------------------------------------------------------------------------
1392
+	##Estimate crosshyb using X chromosome and sequence information
1393
+	##---------------------------------------------------------------------------
1394
+	##browser()
1395
+	####data(sequences, package="genomewidesnp6Crlmm")
1396
+	##snpflags <- envir[["snpflags"]]
1397
+	##muA <- envir[["muA"]][, p, 3:5]
1398
+	##muB <- envir[["muB"]][, p, 3:5]
1399
+	##Y <- envir[["phiAx"]]
1400
+	##load("sequences.rda")
1401
+	##seqA <- sequences[, "A", ][, 1]
1402
+	##seqA <- seqA[match(snps, names(seqA))]
1403
+	##X <- cbind(1, sequenceDesignMatrix(seqA))
1404
+	##X <- cbind(X, nuA[, p], phiA[, p], nuB[, p], phiB[, p])
1405
+	##missing <- rowSums(is.na(X)) > 0
1406
+	##betahat <- solve(crossprod(X[!missing, ]), crossprod(X[!missing, ], Y[!missing]))
1407
+	return(object)
1408
+}
1409
+
1410
+polymorphic <- function(object, cnOptions, tmp.objects){
1411
+	batch <- unique(object$batch)
1412
+	CHR <- unique(chromosome(object))
1413
+	vA <- tmp.objects[["vA"]]
1414
+	vB <- tmp.objects[["vB"]]
1415
+	Ns <- tmp.objects[["Ns"]]
1416
+	
1417
+	nuA <- getParam(object, "nuA", batch)
1418
+	nuB <- getParam(object, "nuB", batch)
1419
+	nuA.se <- getParam(object, "nuA.se", batch)
1420
+	nuB.se <- getParam(object, "nuB.se", batch)
1421
+
1422
+	phiA <- getParam(object, "phiA", batch)
1423
+	phiB <- getParam(object, "phiB", batch)
1424
+	phiA.se <- getParam(object, "phiA.se", batch)
1425
+	phiB.se <- getParam(object, "phiB.se", batch)
1426
+	A <- A(object)
1427
+	B <- B(object)
1428
+
1429
+	NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05
1430
+	##---------------------------------------------------------------------------
1431
+	## Estimate CA, CB
1432
+	##---------------------------------------------------------------------------
1433
+	if(CHR == 23){
1434
+		phiAX <- getParam(object, "phiAX", batch)  ##nonspecific hybridization coef
1435
+		phiBX <- getParam(object, "phiBX", batch)  ##nonspecific hybridization coef			
1436
+		phistar <- phiBX/phiA  
1437
+		tmp <- (B-nuB - phistar*A + phistar*nuA)/phiB
1438
+		copyB <- tmp/(1-phistar*phiAX/phiB)
1439
+		copyA <- (A-nuA-phiAX*copyB)/phiA
1440
+		CB(object) <- copyB  ## multiplies by 100 and converts to integer 
1441
+		CA(object) <- copyA
1442
+	} else{
1443
+		CA(object) <- matrix((1/phiA*(A-nuA)), nrow(A), ncol(A))
1444
+		CB(object) <- matrix((1/phiB*(B-nuB)), nrow(B), ncol(B))
1445
+
1446
+	}
1447
+	return(object)
1448
+}
1449
+
1450
+biasAdj <- function(object, cnOptions, tmp.objects){
1451
+	I <- isSnp(object)
1452
+	gender <- object$gender
1453
+	CHR <- unique(chromosome(object))
1454
+	batch <- unique(object$batch)
1455
+	if(CHR == 23){
1456
+		phiAX <- getParam(object, "phiAX", batch)[I]
1457
+		phiBX <- getParam(object, "phiBX", batch)[I]
1458
+	}
1459
+	A <- A(object)[I, ]
1460
+	B <- B(object)[I, ]
1461
+	sig2A <- getParam(object, "sig2A", batch)[I]
1462
+	sig2B <- getParam(object, "sig2B", batch)[I]
1463
+	tau2A <- getParam(object, "tau2A", batch)[I]
1464
+	tau2B <- getParam(object, "tau2B", batch)[I]	
1465
+	corrA.BB <- getParam(object, "corrA.BB", batch)[I]
1466
+	corrB.AA <- getParam(object, "corrB.AA", batch)[I]
1467
+	corr <- getParam(object, "corr", batch)[I]		
1468
+	nuA <- getParam(object, "nuA", batch)[I]
1469
+	nuB <- getParam(object, "nuB", batch)[I]	
1470
+	phiA <- getParam(object, "phiA", batch)[I]
1471
+	phiB <- getParam(object, "phiB", batch)[I]
1472
+	normal <- tmp.objects[["normal"]][I, ]
1473
+	prior.prob <- cnOptions$prior.prob
1474
+	emit <- array(NA, dim=c(nrow(A), ncol(A), 10))##SNPs x sample x 'truth'	
1475
+	lA <- log2(A)
1476
+	lB <- log2(B)	
1477
+	X <- cbind(lA, lB)
1478
+	counter <- 1##state counter								
1479
+	for(CT in 0:3){
1480
+		for(CA in 0:CT){
1481
+			cat(".")
1482
+			CB <- CT-CA
1483
+			A.scale <- sqrt(tau2A*(CA==0) + sig2A*(CA > 0))
1484
+			B.scale <- sqrt(tau2B*(CA==0) + sig2B*(CA > 0))
1485
+			if(CA == 0 & CB == 0) rho <- 0
1486
+			if(CA == 0 & CB > 0) rho <- corrA.BB
1487
+			if(CA > 0 & CB == 0) rho <- corrB.AA
1488
+			if(CA > 0 & CB > 0) rho <- corr
1489
+			if(CHR == 23){
1490
+				##means <- cbind(suppressWarnings(log2(nuA[, p]+CA*phiA[, p] + CB*phiAx[, p])), suppressWarnings(log2(nuB[, p]+CB*phiB[, p] + CA*phiBx[, p])))
1491
+				meanA <- suppressWarnings(log2(nuA+CA*phiA + CB*phiAX))
1492
+				meanB <- suppressWarnings(log2(nuB+CB*phiB + CA*phiBX))				
1493
+			} else{
1494
+				##means <- cbind(suppressWarnings(log2(nuA+CA*phiA)), suppressWarnings(log2(nuB+CB*phiB)))
1495
+				meanA <- suppressWarnings(log2(nuA+CA*phiA))
1496
+				meanB <- suppressWarnings(log2(nuB+CB*phiB))
1497
+				covs <- rho*A.scale*B.scale
1498
+				A.scale2 <- A.scale^2
1499
+				B.scale2 <- B.scale^2
1500
+			}
1501
+			Q.x.y <- 1/(1-rho^2)*(((lA - meanA)/A.scale)^2 + ((lB - meanB)/B.scale)^2 - 2*rho*((lA - meanA)*(lB - meanB))/(A.scale*B.scale))
1502
+			f.x.y <- 1/(2*pi*A.scale*B.scale*sqrt(1-rho^2))*exp(-0.5*Q.x.y)
1503
+			emit[, , counter] <- f.x.y
1504
+			counter <- counter+1
1505
+		}
1506
+	}
1507
+	priorProb <- cnOptions$prior.prob
1508
+	homDel <- priorProb[1]*emit[, , 1]
1509
+	hemDel <- priorProb[2]*emit[, , c(2, 3)] # + priorProb[3]*emit[, c(4, 5, 6)] + priorProb[4]*emit[, c(7:10)]
1510
+	norm <- priorProb[3]*emit[, , 4:6]
1511
+	amp <- priorProb[4]*emit[, , 7:10]
1512
+	##sum over the different combinations within each copy number state
1513
+	hemDel <- hemDel[, , 1] + hemDel[, , 2]
1514
+	norm <- norm[, , 1] + norm[, , 2] + norm[, , 3]
1515
+	amp <- amp[, , 1] + amp[, , 2] + amp[ , , 3] + amp[, , 4]
1516
+	total <- homDel + hemDel + norm + amp
1517
+	homDel <- homDel/total
1518
+	hemDel <- hemDel/total
1519
+	norm <- norm/total
1520
+	amp <- amp/total
1521
+	tmp.objects$posteriorProb <- list(hemDel=hemDel, norm=norm, amp=amp)
1522
+	##envir[["posteriorProb"]] <- list(hemDel=hemDel, norm=norm, amp=amp)
1523
+	posteriorProb <- array(NA, dim=c(nrow(A), ncol(A), 4))
1524
+	posteriorProb[, , 1] <- homDel
1525
+	posteriorProb[, , 2] <- hemDel
1526
+	posteriorProb[, , 3] <- norm
1527
+	posteriorProb[, , 4] <- amp
1528
+	mostLikelyState <- apply(posteriorProb, c(1, 2), function(x) order(x, decreasing=TRUE)[1])
1529
+	if(CHR == 23){
1530
+		##so state index 3 is the most likely state for men and women
1531
+		mostLikelyState[, gender==1] <- mostLikelyState[, gender==1] + 1
1532
+	}
1533
+	proportionSamplesAltered <- rowMeans(mostLikelyState != 3)
1534
+	ii <- proportionSamplesAltered < 0.8 & proportionSamplesAltered > 0.01
1535
+	##  only exclude observations from one tail, depending on
1536
+	##  whether more are up or down
1537
+	moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) ##3 is normal
1538
+	NORM <- matrix(FALSE, nrow(A), ncol(A))
1539
+	NORM[proportionSamplesAltered > 0.8, ] <- FALSE
1540
+	ratioUp <- posteriorProb[, , 4]/posteriorProb[, , 3]
1541
+	NORM[ii & moreup, ] <- ratioUp[moreup & ii] < 1  ##normal more likely
1542
+	ratioDown <- posteriorProb[, , 2]/posteriorProb[, , 3]
1543
+	NORM[ii & !moreup, ] <- ratioDown[!moreup & ii] < 1  ##normal more likely
1544
+	normal <- NORM*normal
1545
+	tmp <- tmp.objects[["normal"]]
1546
+	tmp[I, ] <- normal
1547
+	tmp.objects[["normal"]] <- tmp
1548
+	return(tmp.objects)
1549
+}
1550
+
1551
+
1552
+##biasAdjNP <- function(plateIndex, envir, priorProb){
1553
+biasAdjNP <- function(object, cnOptions, tmp.objects){
1554
+	batch <- unique(object$batch)
1555
+	normalNP <- tmp.objects[["normal"]][!isSnp(object), ]
1556
+	CHR <- unique(chromosome(object))
1557
+	A <- A(object)[!isSnp(object), ]
1558
+	sig2A <- getParam(object, "sig2A", batch)
1559
+	gender <- object$gender
1560
+	##Assume that on the log-scale, that the background variance is the same...
1561
+	tau2A <- sig2A
1562
+	nuA <- getParam(object, "nuA", batch)
1563
+	phiA <- getParam(object, "phiA", batch)
1564
+	prior.prob <- cnOptions$prior.prob
1565
+	emit <- array(NA, dim=c(nrow(A), ncol(A), 4))##SNPs x sample x 'truth'	
1566
+	lT <- log2(A)
1567
+	I <- isSnp(object)
1568
+	counter <- 1 ##state counter
1569
+	for(CT in 0:3){
1570
+		sds <- sqrt(tau2A[I]*(CT==0) + sig2A[I]*(CT > 0))
1571
+		means <- suppressWarnings(log2(nuA[I]+CT*phiA[I]))
1572
+		tmp <- dnorm(lT, mean=means, sd=sds)
1573
+		emit[, , counter] <- tmp
1574
+		counter <- counter+1
1575
+	}
1576
+	mostLikelyState <- apply(emit, c(1, 2), function(x) order(x, decreasing=TRUE)[1])
1577
+	if(CHR == 23){
1578
+		## the state index for male on chromosome 23  is 2
1579
+		## add 1 so that the state index is 3 for 'normal' state
1580
+		mostLikelyState[, gender=="male"] <- mostLikelyState[, gender==1] + 1
1581
+	}
1582
+	tmp3 <- mostLikelyState != 3
1583
+	##Those near 1 have NaNs for nu and phi.  this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome
1584
+	proportionSamplesAltered <- rowMeans(tmp3)##prop normal
1585
+	ii <- proportionSamplesAltered < 0.75
1586
+	moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3)
1587
+	notUp <-  mostLikelyState[ii & moreup, ] <= 3
1588
+	notDown <- mostLikelyState[ii & !moreup, ] >= 3
1589
+	NORM <- matrix(TRUE, nrow(A), ncol(A))
1590
+	NORM[ii & moreup, ] <- notUp
1591
+	NORM[ii & !moreup, ] <- notDown
1592
+	normalNP <- normalNP*NORM
1593
+
1594
+	##flagAltered <- which(proportionSamplesAltered > 0.5)
1595
+	##envir[["flagAlteredNP"]] <- flagAltered
1596
+	normal <- tmp.objects[["normal"]]
1597
+	normal[!isSnp(object), ] <- normalNP
1598
+	tmp.objects[["normal"]] <- normal
1599
+	return(tmp.objects)
1600
+}
1601
+
1602
+
1603
+getParams <- function(object, batch){
1604
+	batch <- unique(object$batch)
1605
+	if(length(batch) > 1) stop("batch variable not unique")		
1606
+	nuA <- as.numeric(fData(object)[, match(paste("nuA", batch, sep="_"), fvarLabels(object))])
1607
+	nuB <- as.numeric(fData(object)[, match(paste("nuB", batch, sep="_"), fvarLabels(object))])	
1608
+	phiA <- as.numeric(fData(object)[, match(paste("phiA", batch, sep="_"), fvarLabels(object))])
1609
+	phiB <- as.numeric(fData(object)[, match(paste("phiB", batch, sep="_"), fvarLabels(object))])	
1610
+	tau2A <- as.numeric(fData(object)[, match(paste("tau2A", batch, sep="_"), fvarLabels(object))])
1611
+	tau2B <- as.numeric(fData(object)[, match(paste("tau2B", batch, sep="_"), fvarLabels(object))])
1612
+	sig2A <- as.numeric(fData(object)[, match(paste("sig2A", batch, sep="_"), fvarLabels(object))])
1613
+	sig2B <- as.numeric(fData(object)[, match(paste("sig2B", batch, sep="_"), fvarLabels(object))])
1614
+	corrA.BB <- as.numeric(fData(object)[, match(paste("corrA.BB", batch, sep="_"), fvarLabels(object))])
1615
+	corrB.AA <- as.numeric(fData(object)[, match(paste("corrB.AA", batch, sep="_"), fvarLabels(object))])
1616
+	corr <- as.numeric(fData(object)[, match(paste("corr", batch, sep="_"), fvarLabels(object))])
1617
+	params <- list(nuA=nuA,
1618
+		       nuB=nuB,
1619
+		       phiA=phiA,
1620
+		       phiB=phiB,
1621
+		       tau2A=tau2A,
1622
+		       tau2B=tau2B,
1623
+		       sig2A=sig2A,
1624
+		       sig2B=sig2B,
1625
+		       corrA.BB=corrA.BB,
1626
+		       corrB.AA=corrB.AA,
1627
+		       corr=corr)
1628
+	return(params)
1629
+}
1630
+
1631
+hmmOptions <- function(copynumberStates=0:4,
1632
+		       EMIT.THR=-10,
1633
+		       scaleSds=TRUE,
1634
+		       verbose=TRUE,
1635
+		       log.initialP,
1636
+		       normalIndex=3,
1637
+		       normal2altered=0.01,
1638
+		       altered2normal=1,
1639
+		       altered2altered=0.001,
1640
+		       TAUP=1e8,
1641
+		       save.it=TRUE,
1642
+		       MIN.MARKERS=5){  ## whether the save the emission probabilities
1643
+	if(missing(log.initialP)) log.initialP <- log(rep(1/length(copynumberStates), length(copynumberStates)))
1644
+	list(copynumberStates=copynumberStates,
1645
+	     EMIT.THR=EMIT.THR,
1646
+	     scaleSds=scaleSds,
1647
+	     verbose=verbose,
1648
+	     log.initialP=log.initialP,
1649
+	     normalIndex=normalIndex,
1650
+	     normal2altered=normal2altered,
1651
+	     altered2normal=altered2normal,
1652
+	     altered2altered=altered2altered,
1653
+	     TAUP=TAUP,
1654
+	     save.it=save.it,
1655
+	     MIN.MARKERS=MIN.MARKERS)
1656
+}
1657
+
1658
+computeHmm.CrlmmSet <- function(object, cnOptions){
1659
+	hmmOptions <- cnOptions[["hmmOpts"]]
1660
+	object <- object[order(chromosome(object), position(object)), ]
1661
+	##emission <- hmmOptions[["emission"]]
1662
+	chrom <- unique(chromosome(object))
1663
+	tPr <- transitionProbability(chromosome=chromosome(object),
1664
+				     position=position(object),
1665
+				     TAUP=hmmOptions[["TAUP"]])
1666
+
1667
+	emission <- computeEmission(object, hmmOptions)
1668
+##	if(cnOptions[["save.it"]])
1669
+##		save(emission,
1670
+##		     file=file.path(cnOptions[["outdir"]], paste("emission_", chrom, ".rda", sep="")))
1671
+	hmmPredictions <- viterbi.CrlmmSet(object,
1672
+					   hmmOptions=hmmOptions,
1673
+					   emissionPr=emission,
1674
+					   transitionPr=tPr[, "transitionPr"],
1675
+					   chromosomeArm=tPr[, "arm"])
1676
+	segments <- breaks(x=hmmPredictions,
1677
+			   states=hmmOptions[["copynumberStates"]],
1678
+			   position=position(object),
1679
+			   chromosome=chromosome(object))
1680
+	##
1681
+	object <- new("SegmentSet",
1682
+		      CA=object@assayData[["CA"]],  ## keep as an integer
1683
+		      CB=object@assayData[["CB"]],  ## keep as an integer
1684
+		      senseThetaA=A(object),
1685
+		      senseThetaB=B(object),
1686
+		      calls=calls(object),
1687
+		      callsConfidence=callsConfidence(object),
1688
+		      featureData=featureData(object),
1689
+		      phenoData=phenoData(object),
1690
+		      protocolData=protocolData(object),
1691
+		      experimentData=experimentData(object),
1692
+		      annotation=annotation(object),
1693
+		      segmentData=segments,
1694
+		      emissionPr=emission)
1695
+
1696
+}
1697
+
1698
+viterbi.CrlmmSet <- function(object, hmmOptions, emissionPr, transitionPr, chromosomeArm){
1699
+	viterbi(emission=emissionPr,
1700
+		tau=transitionPr,
1701
+		initialStateProbs=hmmOptions[["log.initialP"]],
1702
+		arm=chromosomeArm,
1703
+		normalIndex=hmmOptions[["normalIndex"]],
1704
+		normal2altered=hmmOptions[["normal2altered"]],
1705
+		altered2normal=hmmOptions[["altered2normal"]],
1706
+		altered2altered=hmmOptions[["altered2altered"]])
1707
+}
1708
+
1709
+
1710
+
1711
+
1712
+
1713
+thresholdModelParams <- function(object, cnOptions){
1714
+	MIN.NU <- cnOptions$MIN.NU
1715
+	MIN.PHI <- cnOptions$MIN.PHI
1716
+	batch <- unique(object$batch)
1717
+	nuA <- getParam(object, "nuA", batch)
1718
+	nuA[nuA < MIN.NU] <- MIN.NU
1719
+	nuB <- getParam(object, "nuB", batch)
1720
+	nuB[nuB < MIN.NU] <- MIN.NU
1721
+	phiA <- getParam(object, "phiA", batch)
1722
+	phiA[phiA < MIN.PHI] <- MIN.PHI
1723
+	phiB <- getParam(object, "phiB", batch)
1724
+	phiB[phiB < MIN.PHI] <- MIN.PHI
1725
+	phiAX <- as.numeric(getParam(object, "phiAX", batch))
1726
+	phiAX[phiAX < MIN.PHI] <- MIN.PHI	
1727
+	phiBX <- as.numeric(getParam(object, "phiBX", batch))
1728
+	phiBX[phiBX < MIN.PHI] <- MIN.PHI	
1729
+
1730
+	object <- pr(object, "nuA", batch, nuA)
1731
+	object <- pr(object, "nuB", batch, nuB)
1732
+	object <- pr(object, "phiA", batch, phiA)
1733
+	object <- pr(object, "phiB", batch, phiB)
1734
+	object <- pr(object, "phiAX", batch, phiAX)
1735
+	object <- pr(object, "phiBX", batch, phiBX)	
1736
+	return(object)
1737
+}
1738
+
1739
+
1740
+computeEmission.CrlmmSet <- function(object, hmmOptions){
1741
+	EMIT.THR <- hmmOptions[["EMIT.THR"]]
1742
+	cnStates <- hmmOptions[["copynumberStates"]]
1743
+	object <- object[order(chromosome(object), position(object)), ]
1744
+	if(any(diff(position(object)) < 0)) stop("must be ordered by chromosome and physical position")
1745
+	emissionProbs <- array(NA, dim=c(nrow(object), ncol(object), length(hmmOptions[["copynumberStates"]])))
1746
+	dimnames(emissionProbs) <- list(featureNames(object),
1747
+					sampleNames(object),
1748
+					paste("copy.number_", hmmOptions[["copynumberStates"]], sep=""))	
1749
+	batch <- object$batch
1750
+	for(i in seq(along=unique(batch))){
1751
+		emissionProbs[, batch == unique(batch)[i], ] <- getEmission(object[, batch==unique(batch)[i]], hmmOptions)
1752
+	}
1753
+	if(EMIT.THR > -Inf){  ## truncate emission probabilities for outliers
1754
+		emissionProbs[emissionProbs < EMIT.THR] <- EMIT.THR
1755
+	}
1756
+	emissionProbs
1757
+}
1758
+
1759
+getEmission <- function(object, hmmOptions){
1760
+	emissionProbs <- array(NA, dim=c(nrow(object),
1761
+				   ncol(object), length(hmmOptions[["copynumberStates"]])))
1762
+	emit.snps <- getEmission.snps(object[isSnp(object), ], hmmOptions)
1763
+	emit.nps <- getEmission.nps(object[!isSnp(object), ], hmmOptions)
1764
+	emissionProbs[isSnp(object), , ] <- emit.snps
1765
+	emissionProbs[!isSnp(object), , ] <- emit.nps
1766
+	emissionProbs
1767
+}
1768
+
1769
+getEmission.nps <- function(object, hmmOptions){
1770
+	##****************************************************
1771
+	##	                                             *
1772
+	##  Emission probabilities for nonpolymorphic probes *
1773
+	##	                                             *
1774
+	##****************************************************
1775
+	batch <- unique(object$batch)
1776
+	scaleSds <- hmmOptions[["scaleSds"]]
1777
+	cnStates <- hmmOptions[["copynumberStates"]]
1778
+	verbose <- hmmOptions[["verbose"]]
1779
+	if(verbose) message("Computing emission probabilities for nonpolymorphic probes.")
1780
+	if(scaleSds){
1781
+		a <- CA(object)
1782
+		sds.a <- apply(a, 2, sd, na.rm=TRUE)
1783
+		sds.a <- log2(sds.a/median(sds.a))
1784
+		sds.a <- matrix(sds.a, nrow(object), ncol(object), byrow=TRUE)
1785
+	} else sds.a <- matrix(0, nrow(object), ncol(object))	
1786
+	emissionProbs <- array(NA, dim=c(nrow(object),
1787
+				   ncol(object), length(cnStates)))
1788
+	nuA <- getParam(object, "nuA", batch)
1789
+	phiA <- getParam(object, "phiA", batch)
1790
+	sig2A <- getParam(object, "sig2A", batch)
1791
+	##tau2A <- getParam(object, "tau2A", batch)
1792
+	##Assume that on the log-scale, that the background variance is the same...
1793
+	##tau2A <- sig2A	
1794
+	a <- as.numeric(log2(A(object)))
1795
+	for(k in seq(along=cnStates)){
1796
+		CT <- cnStates[k]
1797
+		mus.matrix=matrix(log2(nuA + CT*phiA), nrow(object), ncol(object))
1798
+		mus <- as.numeric(matrix(log2(nuA + CT*phiA), nrow(object), ncol(object)))
1799
+		sds.matrix <- matrix(sqrt(sig2A), nrow(object), ncol(object))
1800
+		sds.matrix <- sds.matrix + sds.a
1801
+		sds <- as.numeric(sds.matrix)
1802
+		suppressWarnings(tmp <- matrix(dnorm(a, mean=mus, sd=sds), nrow(object), ncol(object)))
1803
+		emissionProbs[, , k] <- log(tmp)
1804
+	}
1805
+	emissionProbs
1806
+}
1807
+	
1808
+
1809
+getEmission.snps <- function(object, hmmOptions){
1810
+	batch <- unique(object$batch)
1811
+	if(length(batch) > 1) stop("batch variable not unique")
1812
+	scaleSds <- hmmOptions[["scaleSds"]]
1813
+	cnStates <- hmmOptions[["copynumberStates"]]
1814
+	verbose <- hmmOptions[["verbose"]]
1815
+	if(scaleSds){
1816
+		a <- CA(object)
1817
+		b <- CB(object)
1818
+		sds.a <- apply(a, 2, sd, na.rm=TRUE)
1819
+		sds.b <- apply(b, 2, sd, na.rm=TRUE)	
1820
+		sds.a <- log2(sds.a/median(sds.a))
1821
+		sds.b <- log2(sds.b/median(sds.b))
1822
+		sds.a <- matrix(sds.a, nrow(object), ncol(object), byrow=TRUE)
1823
+		sds.b <- matrix(sds.b, nrow(object), ncol(object), byrow=TRUE)
1824
+	} else sds.a <- sds.b <- matrix(0, nrow(object), ncol(object))
1825
+	emissionProbs <- array(NA, dim=c(nrow(object),
1826
+				   ncol(object), length(cnStates)))
1827
+	corr <- getParam(object, "corr", batch)
1828
+	corrA.BB <- getParam(object, "corrA.BB", batch)
1829
+	corrB.AA <- getParam(object, "corrB.AA", batch)
1830
+	nuA <- getParam(object, "nuA", batch)
1831
+	nuB <- getParam(object, "nuB", batch)
1832
+	phiA <- getParam(object, "phiA", batch)
1833
+	phiB <- getParam(object, "phiB", batch)
1834
+	sig2A <- getParam(object, "sig2A", batch)
1835
+	sig2B <- getParam(object, "sig2B", batch)
1836
+	tau2A <- getParam(object, "tau2A", batch)
1837
+	tau2B <- getParam(object, "tau2B", batch)	
1838
+	a <- as.numeric(log2(A(object)))
1839
+	b <- as.numeric(log2(B(object)))
1840
+	
1841
+	for(k in seq(along=cnStates)){
1842
+		T <- cnStates[k]
1843
+		f.x.y <- matrix(0, sum(nrow(object)), ncol(object))
1844
+		for(copyA in 0:T){
1845
+			copyB <- T-copyA
1846
+			sigmaA <- sqrt(tau2A*(copyA==0) + sig2A*(copyA > 0))
1847
+			sigmaB <- sqrt(tau2B*(copyB==0) + sig2B*(copyB > 0))
1848
+			if(copyA == 0 & copyB > 0) r <- corrA.BB
1849
+			if(copyA > 0 & copyB == 0) r <- corrB.AA
1850
+			if(copyA > 0 & copyB > 0) r <- corr
1851
+			if(copyA == 0 & copyB == 0) r <- 0
1852
+			muA <- log2(nuA+copyA*phiA)
1853
+			muB <- log2(nuB+copyB*phiB)
1854
+
1855
+			sigmaA <- matrix(sigmaA, nrow=length(sigmaA), ncol=ncol(object), byrow=FALSE)
1856
+			sigmaB <- matrix(sigmaB, nrow=length(sigmaB), ncol=ncol(object), byrow=FALSE)
1857
+			## scale the variances by a sample-specific estimate of the variances
1858
+			## var(I_A, ijp) = sigma_A_ip * sigma_A_jp			
1859
+			sigmaA <- sigmaA+sds.a
1860
+			sigmaB <- sigmaB+sds.b
1861
+			meanA <- as.numeric(matrix(muA, nrow(object), ncol(object)))
1862
+			meanB <- as.numeric(matrix(muB, nrow(object), ncol(object)))			
1863
+			rho <- as.numeric(matrix(r, nrow(object), ncol(object)))
1864
+			sd.A <- as.numeric(matrix(sigmaA, nrow(object), ncol(object)))
1865
+			sd.B <- as.numeric(matrix(sigmaB, nrow(object), ncol(object)))
1866
+			Q.x.y <- 1/(1-rho^2)*(((a - meanA)/sd.A)^2 + ((b - meanB)/sd.B)^2 - 2*rho*((a - meanA)*(b - meanB))/(sd.A*sd.B))
1867
+			## For CN states > 1, assume that any of the possible genotypes are equally likely a priori...just take the sum
1868
+			## For instance, for state copy number 2 there are three combinations: AA, AB, BB
1869
+			##   -- two of the three combinations should be near zero.
1870
+			## TODO: copy-neutral LOH would put near-zero mass on both copyA > 0, copyB > 0
1871
+			f.x.y <- f.x.y + matrix(1/(2*pi*sd.A*sd.B*sqrt(1-rho^2))*exp(-0.5*Q.x.y), nrow(object), ncol(object))
1872
+		}
1873
+		emissionProbs[, , k] <- log(f.x.y)
1874
+	}
1875
+	emissionProbs
1876
+}
1877
+
1878
+setMethod("update", "character", function(object, ...){
1879
+	crlmmFile <- object
1880
+	for(i in seq(along=crlmmFile)){
1881
+		cat("Processing ", crlmmFile[i], "...\n")
1882
+		load(crlmmFile[i])
1883
+		crlmmSetList <- get("crlmmSetList")
1884
+		if(length(crlmmSetList) == 3) next()  ##copy number object already present. 
1885
+		if(!"chromosome" %in% fvarLabels(crlmmSetList[[1]])){
1886
+			featureData(crlmmSetList[[1]]) <- addFeatureAnnotation(crlmmSetList)
1887
+		} 
1888
+		CHR <- unique(chromosome(crlmmSetList[[1]]))
1889
+		if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.")		
1890
+		if(CHR==24){
1891
+			message("skipping chromosome 24")
1892
+			next()
1893
+		}
1894
+		cat("----------------------------------------------------------------------------\n")
1895
+		cat("-        Estimating copy number for chromosome", CHR, "\n")
1896
+		cat("----------------------------------------------------------------------------\n")		
1897
+		crlmmSetList <- update(crlmmSetList, CHR=CHR, ...)
1898
+		save(crlmmSetList, file=crlmmFile[i])
1899
+		rm(crlmmSetList); gc();
1900
+	}
1901
+})
1902
+
1903
+
1904
+addFeatureAnnotation.SnpCallSetPlus <- function(object, ...){
1905
+	##if(missing(CHR)) stop("Must specificy chromosome")
1906
+	cdfName <- annotation(object)
1907
+	pkgname <- paste(cdfName, "Crlmm", sep="")	
1908
+	path <- system.file("extdata", package=pkgname)
1909
+	loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
1910
+	cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
1911
+	loader("snpProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
1912
+	snpProbes <- get("snpProbes", envir=.crlmmPkgEnv)	
1913
+
1914
+	##Feature Data
1915
+	isSnp <- rep(as.integer(0), nrow(object))
1916
+	isSnp[snpIndex(object)] <- as.integer(1)
1917
+	names(isSnp) <- featureNames(object)
1918
+##	snps <- featureNames(object)[snpIndex(object)]
1919
+##	nps <- featureNames(object)[cnIndex(object)]
1920
+	snps <- featureNames(object)[isSnp == 1]
1921
+	nps <- featureNames(object)[isSnp == 0]
1922
+	position.snp <- snpProbes[match(snps, rownames(snpProbes)), "position"]
1923
+	names(position.snp) <- snps
1924
+	position.np <- cnProbes[match(nps, rownames(cnProbes)), "position"]
1925
+	names(position.np) <- nps
1926
+
1927
+	J <- grep("chr", colnames(snpProbes))
1928
+	chr.snp <- snpProbes[match(snps, rownames(snpProbes)), J]
1929
+	chr.np <- cnProbes[match(nps, rownames(cnProbes)), J]	
1930
+	
1931
+	position <- c(position.snp, position.np)
1932
+	chrom <- c(chr.snp, chr.np)
1933
+
1934
+	##We may not have annotation for all of the snps
1935
+	if(!all(featureNames(object) %in% names(position))){
1936
+		message("Dropping loci for which physical position  is not available.")
1937
+		object <- object[featureNames(object) %in% names(position), ]
1938
+	}
1939
+	ix <- match(featureNames(object), names(position))
1940
+	position <- position[ix]
1941
+	chrom <- chrom[ix]
1942
+	##require(SNPchip)