Browse code

begin adding support for ff. added an option to use poe

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44778 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 20/02/2010 23:39:45
Showing19 changed files

... ...
@@ -385,3 +385,19 @@ is stored in the assayData slot.
385 385
 
386 386
 2010-02-07 B. Carvalho - committed version 1.5.25
387 387
  * Updated requirement of affyio to support Win64
388
+
389
+2010-02-20 R. Scharpf - committed version 1.5.25
390
+
391
+** begin adding support for ff
392
+
393
+** added use.poe option for nonpolymorphic loci
394
+
395
+** crlmm does not return anything, but saves the following objects:
396
+      - genotypes.rda
397
+      - confs.rda
398
+      - snp_rmaResult.rda  (gns, sns, SNR, SKW, mixtureParams, cdfName)
399
+      - A.rda
400
+      - B.rda
401
+      - cn_rmaResult.rda (SKW)
402
+
403
+
... ...
@@ -1,17 +1,17 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.24
5
-Date: 2010-02-05
4
+Version: 1.5.25
5
+Date: 2010-02-20
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
9 9
 License: Artistic-2.0
10
-Depends: methods, Biobase (>= 2.7.2), R (>= 2.11.0), oligoClasses (>= 1.9.21)
10
+Depends: R (>= 2.11.0), methods, Biobase (>= 2.7.2), oligoClasses (>= 1.9.28)
11 11
 Imports: affyio (>= 1.15.2), preprocessCore, utils, stats, genefilter, splines, mvtnorm, ellipse, SNPchip
12
-Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix
12
+Enhances: ff
13
+Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, metaArray
13 14
 Collate: AllGenerics.R
14
-         methods-AlleleSet.R
15 15
 	 methods-CNSet.R
16 16
 	 methods-eSet.R
17 17
          methods-SnpSuperSet.R
... ...
@@ -1,3 +1,4 @@
1
+## crlmm NAMESPACE
1 2
 useDynLib("crlmm", .registration=TRUE)
2 3
 
3 4
 ## Biobase
... ...
@@ -23,7 +24,7 @@ importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet)
23 24
 
24 25
 importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
25 26
 		  "confs<-", cnConfidence, "cnConfidence<-", 
26
-		  isSnp, chromosome, position, CA, "CA<-", CB, "CB<-")
27
+		  isSnp, chromosome, position, CA, "CA<-", CB, "CB<-", A, B)
27 28
 
28 29
 
29 30
 importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles)
... ...
@@ -49,14 +50,14 @@ importFrom(mvtnorm, dmvnorm)
49 50
 
50 51
 importFrom(ellipse, ellipse)
51 52
 
52
-exportMethods(A, B, copyNumber)
53
+exportMethods(copyNumber)
53 54
 export(cnOptions, crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
54 55
 
55
-##export(##viterbi.CNSet, ##move to VanillaICE
56
-##	combineIntensities,
57
-##	whichPlatform,
58
-##	isValidCdfName,
59
-##	crlmmWrapper,
60
-##        withinGenotypeMoments,  
61
-##	locationAndScale, getParam.SnpSuperSet)
62
-export(thresholdModelParams, computeCopynumber.CNSet, nuphiAllele, coefs, biasAdjNP)
56
+##export everything that does not start with a .
57
+##exportPattern("^[^\\.]")
58
+
59
+##export(thresholdModelParams, computeCopynumber.CNSet, nuphiAllele, coefs, biasAdjNP, 
60
+##       nonpolymorphic.poe, crlmmWrapper,
61
+##       loadIlluminaCallSet, loadIlluminaRG, loadIlluminaCnrma,
62
+##       cnrma, 
63
+##       crlmmGT, oneBatch)
... ...
@@ -1,9 +1,7 @@
1
-setGeneric("A", function(object) standardGeneric("A"))
2
-setGeneric("B", function(object) standardGeneric("B"))
3 1
 ##setGeneric("A<-", function(object, value) standardGeneric("A<-"))
4 2
 ##setGeneric("B<-", function(object, value) standardGeneric("B<-"))
5 3
 
6
-setGeneric("getParam", function(object, name, batch) standardGeneric("getParam"))
4
+setGeneric("getParam", function(object, name, ...) standardGeneric("getParam"))
7 5
 setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
8 6
 setGeneric("cnNames", function(object) standardGeneric("cnNames"))
9 7
 setGeneric("computeCopynumber", function(object, cnOptions) standardGeneric("computeCopynumber"))
... ...
@@ -153,148 +153,387 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
153 153
 	return(gender)
154 154
 }
155 155
 
156
-combineIntensities <- function(res, NP=NULL, callSet){
157
-	rownames(res$B) <- rownames(res$A) <- res$gns
158
-	colnames(res$B) <- colnames(res$A) <- res$sns
159
-	if(!is.null(NP)){
160
-		blank <- matrix(NA, nrow(NP), ncol(NP))
161
-		dimnames(blank) <- dimnames(NP)
162
-		A <- rbind(res$A, NP)
163
-		B <- rbind(res$B, blank)
156
+##crlmmCopynumber <- function(filenames, cnOptions, object, ...){
157
+##	if(!missing(object)){
158
+##		stopifnot(class(object) == "CNSet")
159
+##		createIntermediateObjects <- FALSE
160
+##	} else {
161
+##		createIntermediateObjects <- TRUE
162
+##		## 33G for 1239 files
163
+##		crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
164
+##		snprmaResult <- crlmmResults[["snprmaResult"]]
165
+##		cnrmaResult <- crlmmResults[["cnrmaResult"]]
166
+##		callSet <- crlmmResults[["callSet"]]
167
+##		rm(crlmmResults); gc()
168
+##		annotation(callSet) <- cnOptions[["cdfName"]]
169
+##		stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]]))
170
+##		path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep=""))	
171
+##		load(file.path(path, "snpProbes.rda"))
172
+##		snpProbes <- get("snpProbes")
173
+##		load(file.path(path, "cnProbes.rda"))
174
+##		cnProbes <- get("cnProbes")	
175
+##		k <- grep("chr", colnames(snpProbes))
176
+##		if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
177
+##	}
178
+##	set.seed(cnOptions[["seed"]])  ##for reproducibility
179
+##	cnFile <- cnOptions[["cnFile"]]
180
+##	chromosome <- cnOptions[["chromosome"]]
181
+##	SNRmin <- cnOptions[["SNRmin"]]
182
+##	for(CHR in chromosome){
183
+##		cat("Chromosome ", CHR, "\n")
184
+##		if(createIntermediateObjects){
185
+##			snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
186
+##			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
187
+##			index.snps <- match(snps, featureNames(callSet))
188
+##			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
189
+##			if(!is.null(cnrmaResult)){
190
+##				npI <- cnrmaResult$NP[index.nps, ]
191
+##			} else npI <- NULL
192
+##			snpI <- list(A=snprmaResult$A[index.snps, ],
193
+##				     B=snprmaResult$B[index.snps, ],
194
+##				     sns=snprmaResult$sns,
195
+##				     gns=snprmaResult$gns[index.snps],
196
+##				     SNR=snprmaResult$SNR,
197
+##				     SKW=snprmaResult$SKW,
198
+##				     mixtureParams=snprmaResult$mixtureParams,
199
+##				     cdfName=snprmaResult$cdfName)
200
+##			cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
201
+##			cnSet <- combineIntensities(res=snpI,
202
+##						    NP=npI,
203
+##						    callSet=callSet[index.snps, ])
204
+##			if(any(cnSet$SNR > SNRmin)){
205
+##				message(paste("Excluding samples with SNR < ", SNRmin))
206
+##				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
207
+##			}
208
+##			rm(snpI, npI, snps, cnps, index.snps, index.nps); gc()
209
+##			pData(cnSet)$batch <- cnOptions[["batch"]]
210
+##			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
211
+##		} else {
212
+##			cnSet <- object
213
+##		}
214
+##		if(CHR != 24){
215
+##			## 60G for 1239 files
216
+##			cnSet <- computeCopynumber(cnSet, cnOptions)
217
+##		} else{
218
+##			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
219
+##			alleleSet <- cnSet
220
+##			save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep="")))
221
+##			rm(cnSet, alleleSet); gc()
222
+##			next()
223
+##		}
224
+##		if(length(chromosome) == 1){
225
+##			if(cnOptions[["save.cnset"]]){
226
+##				save(cnSet, file=file.path(paste(cnFile, "_", CHR, ".rda", sep="")))				
227
+##				##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
228
+##			}
229
+##		}
230
+##		if(length(chromosome) > 1){
231
+##			message(paste("Saving ", file.path(cnOptions[["outdir"]], paste(cnFile, "_", CHR, ".rda", sep=""))))
232
+##			save(cnSet, file=file.path(paste(cnFile, "_", CHR, ".rda", sep="")))				
233
+##			##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
234
+##			rm(cnSet); gc()
235
+##		} else {
236
+##			return(cnSet)
237
+##		}
238
+##	}
239
+##	saved.objects <- list.files(cnOptions[["outdir"]], pattern=cnFile, full.names=TRUE)		
240
+##	return(saved.objects)
241
+##}
242
+
243
+
244
+
245
+crlmmCopynumber <- function(filenames, cnOptions, object, ...){
246
+	cnOpts <- cnOptions
247
+	batchSize <- cnOptions[["crlmmBatchSize"]]
248
+	batch <- cnOptions[["batch"]]
249
+	outdir <- cnOptions[["outdir"]]
250
+	##if necessary, split plates in separate batches
251
+	if(length(filenames) > batchSize){
252
+		L <- length(table(batch))
253
+		message("Processing the files in")
254
+		batchList <- split(names(table(batch)), rep(1:L, each=10, length.out=L))
255
+		for(i in seq(along=batchList)){
256
+			index <- as.character(batch) %in% as.character(batchList[[i]])
257
+			fns <- filenames[index]
258
+			cnOpts[["batch"]] <- cnOptions[["batch"]][index]
259
+			cnOpts[["AFile"]] <- paste(outdir, "/", paste(cnOptions[["AFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
260
+			cnOpts[["BFile"]] <- paste(outdir, "/", paste(cnOptions[["BFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
261
+			cnOpts[["callsFile"]] <- paste(outdir, "/", paste(cnOptions[["callsFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
262
+			cnOpts[["confsFile"]] <- paste(outdir, "/", paste(cnOptions[["confsFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
263
+			cnOpts[["snprmaFile"]] <- paste(outdir, "/", paste(cnOptions[["snprmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
264
+			cnOpts[["cnrmaFile"]] <- paste(outdir, "/", paste(cnOptions[["cnrmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
265
+			cnOpts[["cnFile"]] <- file.path(outdir, paste(cnOptions[["cnFile"]], i, sep="_crlmmBatch"))
266
+			cnOpts[["rgFile"]] <- file.path(outdir, paste(cnOptions[["rgFile"]], i, sep="_crlmmBatch"))
267
+			crlmmCopynumber.crlmmBatch(fns, cnOpts, object, ...)
268
+		}
164 269
 	} else {
165
-		A <- res$A
166
-		B <- res$B
270
+		cnOpts[["AFile"]] <- paste(outdir, "/", cnOptions[["AFile"]], ".rda", sep="")
271
+		cnOpts[["BFile"]] <- paste(outdir, "/", cnOptions[["BFile"]], ".rda", sep="")
272
+		cnOpts[["callsFile"]] <- paste(outdir, "/", cnOptions[["callsFile"]], ".rda", sep="")
273
+		cnOpts[["confsFile"]] <- paste(outdir, "/", cnOptions[["confsFile"]], ".rda", sep="")
274
+		cnOpts[["snprmaFile"]] <- paste(outdir, "/",cnOptions[["snprmaFile"]], ".rda", sep="")
275
+		cnOpts[["cnrmaFile"]] <- paste(outdir, "/", cnOptions[["cnrmaFile"]], ".rda", sep="")
276
+		cnOpts[["cnFile"]] <- file.path(outdir, cnOptions[["cnFile"]])
277
+		cnOpts[["rgFile"]] <- file.path(outdir, cnOptions[["rgFile"]])
278
+		crlmmCopynumber.crlmmBatch(filenames, cnOpts, object, ...)
167 279
 	}
168
-	dimnames(B) <- dimnames(A)
169
-	index.snps <- match(res$gns, rownames(A))
170
-	callsConfs <- calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
171
-	
172
-	calls[index.snps, ] <- calls(callSet)
173
-	callsConfs[index.snps, ] <- assayData(callSet)[["callProbability"]]
174
-	fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet))))
175
-	fd[index.snps, ] <- fData(callSet)
176
-	rownames(fd) <- rownames(A)
177
-	colnames(fd) <- fvarLabels(callSet)
178
-	fD <- new("AnnotatedDataFrame",
179
-		  data=data.frame(fd),
180
-		  varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd)))
181
-	superSet <- new("CNSet",
182
-			CA=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
183
-			CB=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
184
-			alleleA=A,
185
-			alleleB=B,
186
-			call=calls,
187
-			callProbability=callsConfs,
188
-			featureData=fD,
189
-			phenoData=phenoData(callSet),
190
-			experimentData=experimentData(callSet),
191
-			protocolData=protocolData(callSet),
192
-			annotation=annotation(callSet))
193
-	return(superSet)
194 280
 }
195 281
 
196
-harmonizeDimnamesTo <- function(object1, object2){
197
-	#object2 should be a subset of object 1
198
-	object2 <- object2[featureNames(object2) %in% featureNames(object1), ]
199
-	object1 <- object1[match(featureNames(object2), featureNames(object1)), ]
200
-	object1 <- object1[, match(sampleNames(object2), sampleNames(object1))]
201
-	stopifnot(all.equal(featureNames(object1), featureNames(object2)))
202
-	stopifnot(all.equal(sampleNames(object1), sampleNames(object2)))
203
-	return(object1)
204
-}
205 282
 
206
-crlmmCopynumber <- function(filenames, cnOptions, object, ...){
283
+crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){
284
+	set.seed(cnOptions[["seed"]])  ##for reproducibility
207 285
 	if(!missing(object)){
208 286
 		stopifnot(class(object) == "CNSet")
209 287
 		createIntermediateObjects <- FALSE
210 288
 	} else {
211 289
 		createIntermediateObjects <- TRUE
212
-		crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
213
-		snprmaResult <- crlmmResults[["snprmaResult"]]
214
-		cnrmaResult <- crlmmResults[["cnrmaResult"]]
215
-		callSet <- crlmmResults[["callSet"]]
216
-		rm(crlmmResults); gc()
217
-		annotation(callSet) <- cnOptions[["cdfName"]]
218
-		stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]]))
219
-		path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep=""))	
220
-		load(file.path(path, "snpProbes.rda"))
221
-		snpProbes <- get("snpProbes")
222
-		load(file.path(path, "cnProbes.rda"))
223
-		cnProbes <- get("cnProbes")	
224
-		k <- grep("chr", colnames(snpProbes))
225
-		if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
290
+		## 33G for 1239 files
291
+		cnOptions[["save.it"]] <- TRUE
292
+		##crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
293
+		crlmmWrapper(filenames, cnOptions, ...)
226 294
 	}
227
-	set.seed(cnOptions[["seed"]])  ##for reproducibility
295
+	load.it <- cnOptions[["load.it"]]
296
+	outdir <- cnOptions[["outdir"]]
297
+	snprmaFile <- cnOptions[["snprmaFile"]]
298
+	cnFile <- cnOptions[["cnFile"]]
299
+	cnrmaFile <- cnOptions[["cnrmaFile"]]
300
+	callsFile <- cnOptions[["callsFile"]]
301
+	confsFile <- cnOptions[["confsFile"]]
302
+	AFile <- cnOptions[["AFile"]]
303
+	BFile <- cnOptions[["BFile"]]
304
+	path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep=""))
305
+	load(file.path(path, "snpProbes.rda"))
306
+	snpProbes <- get("snpProbes")
307
+	load(file.path(path, "cnProbes.rda"))
308
+	cnProbes <- get("cnProbes")	
309
+	k <- grep("chr", colnames(snpProbes))
310
+	message("Attaching quantile-normalized intensities...")
311
+	##cwd <- getwd()
312
+	##setwd(dirname(snprmaFile[1]))
313
+	load(AFile)
314
+	if(isPackageLoaded("ff")) open(A)
315
+	load(BFile)
316
+	if(isPackageLoaded("ff")) open(B)	
317
+	message("Loading genotype calls...")	
318
+	load(callsFile)
319
+	calls <- get("calls")
320
+	if(isPackageLoaded("ff")) open(calls)	
321
+	load(confsFile)
322
+	confs <- get("confs")
323
+	if(isPackageLoaded("ff")) open(confs)	
324
+	fns <- rownames(A)
325
+	nr <- nrow(A); nc <- ncol(A)
326
+	if(isPackageLoaded("ff")){
327
+		##initialize CA, CB
328
+		if(!file.exists(file.path(outdir, "CA.rda"))){
329
+			CA <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A))
330
+
331
+		} else{
332
+			load(file.path(outdir, "CA.rda"))
333
+			if(!all(dim(CA) == dim(A)))
334
+				CA <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A), overwrite=TRUE)				
335
+			open(CA)
336
+		}
337
+		if(!file.exists(file.path(outdir, "CB.rda"))){		
338
+			CB <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A))
339
+		} else{
340
+			load(file.path(outdir, "CB.rda"))
341
+			if(!all(dim(CB) == dim(A)))
342
+				CB <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A), overwrite=TRUE)							
343
+			open(CB)
344
+		}
345
+	}
346
+	##cnFile <- cnOptions[["cnFile"]]
228 347
 	chromosome <- cnOptions[["chromosome"]]
229 348
 	SNRmin <- cnOptions[["SNRmin"]]
349
+	ll <- 1
350
+	scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
351
+	rownames(scanDates) <- basename(rownames(scanDates))
352
+	protocoldata <- new("AnnotatedDataFrame",
353
+			    data=scanDates,
354
+			    varMetadata=data.frame(labelDescription=colnames(scanDates),
355
+			    row.names=colnames(scanDates)))
356
+	load(file.path(cnOptions[["outdir"]], "sampleStats.rda"))
357
+	sampleStats <- data.frame(sampleStats, batch=cnOptions[["batch"]])
358
+	pD <- new("AnnotatedDataFrame",
359
+		  data=sampleStats,
360
+		  varMetadata=data.frame(labelDescription=colnames(sampleStats)))
361
+	batch <- cnOptions[["batch"]]
230 362
 	for(CHR in chromosome){
363
+		snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
364
+		cns <- rownames(cnProbes)[cnProbes[, k] == CHR]		
365
+		index.snps <- match(snps, rownames(A))
366
+		index.cn <- match(cns, rownames(A))
367
+		row.index <- c(index.snps, index.cn)
231 368
 		cat("Chromosome ", CHR, "\n")
232
-		if(createIntermediateObjects){
233
-			snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
234
-			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
235
-			index.snps <- match(snps, featureNames(callSet))
236
-			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
237
-			if(!is.null(cnrmaResult)){
238
-				npI <- cnrmaResult$NP[index.nps, ]
239
-			} else npI <- NULL
240
-			snpI <- list(A=snprmaResult$A[index.snps, ],
241
-				     B=snprmaResult$B[index.snps, ],
242
-				     sns=snprmaResult$sns,
243
-				     gns=snprmaResult$gns[index.snps],
244
-				     SNR=snprmaResult$SNR,
245
-				     SKW=snprmaResult$SKW,
246
-				     mixtureParams=snprmaResult$mixtureParams,
247
-				     cdfName=snprmaResult$cdfName)
248
-			cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
249
-			cnSet <- combineIntensities(res=snpI,
250
-						    NP=npI,
251
-						    callSet=callSet[index.snps, ])
369
+		for(i in seq(along=unique(batch))){
370
+			PLATE <- unique(batch)[i]
371
+			sample.index <- which(batch==PLATE)
372
+			##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
373
+			if(isPackageLoaded("ff")){
374
+				ca <- CA[row.index, sample.index]
375
+				cb <- CB[row.index, sample.index]
376
+			} else{
377
+				dns <- dimnames(A[row.index, sample.index])
378
+				cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index), dimnames=dns)
379
+			}
380
+			cnSet <- new("CNSet",
381
+				     alleleA=A[row.index, sample.index],
382
+				     alleleB=B[row.index, sample.index],
383
+				     call=calls[row.index, sample.index],
384
+				     callProbability=confs[row.index, sample.index],
385
+				     CA=ca,
386
+				     CB=cb,
387
+				     featureData=annotatedDataFrameFrom(A[row.index, sample.index], byrow=TRUE),
388
+				     phenoData=pD[sample.index, ],
389
+				     protocolData=protocoldata[sample.index, ],
390
+				     annotation=cnOptions[["cdfName"]])			
252 391
 			if(any(cnSet$SNR > SNRmin)){
253 392
 				message(paste("Excluding samples with SNR < ", SNRmin))
254 393
 				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
255 394
 			}
256
-			rm(snpI, npI, snps, cnps, index.snps, index.nps); gc()
257
-			pData(cnSet)$batch <- cnOptions[["batch"]]
395
+			##rm(snpI, NP, gtSet); gc()
258 396
 			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
259
-		} else {
260
-			cnSet <- object
261
-		}
262
-		if(CHR != 24){		
397
+		} 
398
+		if(CHR != 24){
399
+			## 60G for 1239 files
263 400
 			cnSet <- computeCopynumber(cnSet, cnOptions)
401
+			if(isPackageLoaded("ff")){
402
+				CA[row.index, sample.index] <- cnSet@assayData[["CA"]]
403
+				CB[row.index, sample.index] <- cnSet@assayData[["CB"]]
404
+			}
264 405
 		} else{
265 406
 			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
266 407
 			alleleSet <- cnSet
267
-			save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep="")))
268
-			rm(cnSet, alleleSet); gc()
408
+			rm(cnSet) ; gc()
409
+			save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", PLATE, "_", CHR, ".rda", sep="")))
410
+			rm(alleleSet); gc()
269 411
 			next()
270 412
 		}
271 413
 		if(length(chromosome) == 1){
272 414
 			if(cnOptions[["save.cnset"]]){
273
-				save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
415
+				save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))				
274 416
 			}
275 417
 		}
276 418
 		if(length(chromosome) > 1){
277
-			save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
419
+			save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
420
+			##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
278 421
 			rm(cnSet); gc()
279 422
 		} else {
423
+			if(isPackageLoaded("ff")) {
424
+				close(CA); close(CB)
425
+				save(CA, file=file.path(cnOptions[["outdir"]], "CA.rda"))
426
+				save(CB, file=file.path(cnOptions[["outdir"]], "CB.rda"))		
427
+			}			
280 428
 			return(cnSet)
281 429
 		}
282 430
 	}
283
-	saved.objects <- list.files(cnOptions[["outdir"]], pattern="cnSet", full.names=TRUE)		
284
-	return(saved.objects)
431
+	if(isPackageLoaded("ff")) {
432
+		close(CA); close(CB)
433
+		save(CA, file=file.path(cnOptions[["outdir"]], "CA.rda"))
434
+		save(CB, file=file.path(cnOptions[["outdir"]], "CB.rda"))		
435
+	}
285 436
 }
286 437
 
287 438
 
439
+loadIlluminaRG <- function(rgFile, crlmmFile, load.it, save.it,...){
440
+##	if(missing(rgFile)){
441
+##		##stop("must specify 'rgFile'.")
442
+##		rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
443
+##		message("rgFile not specified.  Using ", rgFile)
444
+##	}
445
+	if(!load.it){
446
+		RG <- readIdatFiles(...)
447
+		if(save.it) save(RG, file=rgFile)
448
+	}
449
+	if(load.it & !file.exists(rgFile)){
450
+		message("load.it is TRUE, but rgFile not present.  Attempting to read the idatFiles.")
451
+		RG <- readIdatFiles(...)
452
+		if(save.it) save(RG, file=rgFile)
453
+	}
454
+	if(load.it & file.exists(rgFile)){
455
+		message("Loading RG file")
456
+		load(rgFile)
457
+		RG <- get("RG")
458
+	}
459
+	return(RG)
460
+}
461
+
462
+loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdfName){
463
+	if(!file.exists(crlmmFile) | !load.it){		
464
+		callSet <- crlmmIllumina(RG=RG,
465
+					 cdfName=cdfName,
466
+					 sns=sampleNames(RG),
467
+					 returnParams=TRUE,
468
+					 save.it=TRUE,
469
+					 intensityFile=snprmaFile)
470
+		if(save.it) save(callSet, file=crlmmFile)
471
+	} else {
472
+		message("Loading ", crlmmFile, "...")
473
+		load(crlmmFile)
474
+		callSet <- get("callSet")
475
+	}
476
+	protocolData(callSet) <- protocolData(RG)
477
+	return(callSet)
478
+}
479
+
480
+
481
+##loadAffyCallSet <- function(filenames, confsFile, callsFile, snprmaFile, load.it, save.it,  cdfName){
482
+##
483
+####		if(save.it){
484
+####			message("Saving callSet to", callsFile)
485
+####			##save(callSet, cnrmaResult, file=callsFile)
486
+####			save(callSet, file=callsFile)
487
+####		}
488
+####	} else {
489
+####		message("Loading ", callsFile, "...")
490
+####		##load(snprmaFile)				
491
+####		load(callsFile)
492
+####		callSet <- get("callSet")
493
+####		##cnrmaResult <- get("cnrmaResult")
494
+####	}
495
+##
496
+##}
497
+##	if(platform=="affymetrix") {
498
+##		protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames))
499
+##		sampleNames(protocolData(callSet)) <- sampleNames(callSet)
500
+##	}
501
+
502
+##loadAffyCnrma <- function(filenames, cnrmaFile, cdfName, outdir, load.it, save.it, use.bigmemory=FALSE){
503
+##	if(!file.exists(cnrmaFile) | !load.it){	
504
+##		message("Quantile normalizing the copy number probes...")
505
+##		cnrmaResult <- cnrma2(filenames=filenames, cdfName=cdfName, outdir=outdir, cnrmaFile=cnrmaFile)
506
+##	} else cnrmaResult <- attach.big.matrix("NP.desc", outdir)
507
+##	return(cnrmaResult)
508
+##}
509
+
510
+loadIlluminaCnrma <- function(){
511
+	if(exists("cnAB")){
512
+		np.A <- as.integer(cnAB$A)
513
+		np.B <- as.integer(cnAB$B)
514
+		np <- ifelse(np.A > np.B, np.A, np.B)
515
+		np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
516
+		rownames(np) <- cnAB$gns
517
+		colnames(np) <- cnAB$sns
518
+		cnAB$NP <- np
519
+		##sampleNames(callSet) <- res$sns
520
+		sampleNames(callSet) <- cnAB$sns
521
+		cnrmaResult <- get("cnAB")
522
+	} else cnrmaResult <- NULL
523
+	return(cnrmaResult)
524
+}
288 525
 
289 526
 crlmmWrapper <- function(filenames, cnOptions, ...){
290 527
 	cdfName <- cnOptions[["cdfName"]]
291 528
 	load.it <- cnOptions[["load.it"]]
292 529
 	save.it <- cnOptions[["save.it"]]
293
-	splitByChr <- cnOptions[["splitByChr"]]
294
-	crlmmFile <- cnOptions[["crlmmFile"]]
295
-	intensityFile=cnOptions[["intensityFile"]]
530
+	callsFile <- cnOptions[["callsFile"]]
531
+	confsFile <- cnOptions[["confsFile"]]
532
+	AFile=cnOptions[["AFile"]]
533
+	BFile=cnOptions[["BFile"]]	
534
+	snprmaFile=cnOptions[["snprmaFile"]]
535
+	cnrmaFile=cnOptions[["cnrmaFile"]]
296 536
 	rgFile=cnOptions[["rgFile"]]
297
-	##use.ff=cnOptions[["use.ff"]]
298 537
 	outdir <- cnOptions[["outdir"]]
299 538
 	if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required.  See crlmm:::validCdfNames()")
300 539
 	platform <- whichPlatform(cdfName)
... ...
@@ -307,148 +546,57 @@ crlmmWrapper <- function(filenames, cnOptions, ...){
307 546
 			stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform)")
308 547
 		} else cat("TRUE\n")
309 548
 	}
310
-	if(missing(intensityFile)) stop("must specify 'intensityFile'.")
311
-	if(missing(crlmmFile)) stop("must specify 'crlmmFile'.")
312
-	if(platform == "illumina"){
313
-		if(missing(rgFile)){
314
-			##stop("must specify 'rgFile'.")
315
-			rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
316
-			message("rgFile not specified.  Using ", rgFile)
317
-		}
318
-		if(!load.it){
319
-			RG <- readIdatFiles(...)
320
-			if(save.it) save(RG, file=rgFile)
321
-		}
322
-		if(load.it & !file.exists(rgFile)){
323
-			message("load.it is TRUE, but rgFile not present.  Attempting to read the idatFiles.")
324
-			RG <- readIdatFiles(...)
325
-			if(save.it) save(RG, file=rgFile)
326
-		}
327
-		if(load.it & file.exists(rgFile)){
328
-			message("Loading RG file")
329
-			load(rgFile)
330
-			RG <- get("RG")
331
-		}
332
-	}
333
-	if(!(file.exists(dirname(crlmmFile)))) stop(dirname(crlmmFile), " does not exist.")
334
-	if(!(file.exists(dirname(intensityFile)))) stop(dirname(intensityFile), " does not exist.")
335
-	##---------------------------------------------------------------------------
336
-	## FIX
337
-	outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep=""))
338
-	if(load.it & all(file.exists(outfiles))){
339
-		load(outfiles[1])
340
-		crlmmSetList <- get("crlmmSetList")
341
-		if(!all(sampleNames(crlmmSetList) == basename(filenames))){
342
-			stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))")
343
-		} else{
344
-			return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...")
345
-		}
346
-	}
549
+	if(platform == "illumina") RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it)
550
+	if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.")
551
+	if(!(file.exists(dirname(snprmaFile[1])))) stop(dirname(snprmaFile[1]), " does not exist.")
347 552
 	if(load.it){
348
-		if(!file.exists(crlmmFile)){
349
-			message("load.it is TRUE, but ", crlmmFile, " does not exist.  Rerunning the genotype calling algorithm") 
350
-			load.it <- FALSE
553
+		if(!file.exists(callsFile)){
554
+			message("load.it is TRUE, but ", callsFile, " does not exist.  Rerunning the genotype calling algorithm") 
555
+			##load.it <- FALSE
351 556
 		}
352 557
 	}
353 558
 	if(platform == "affymetrix"){
354
-		if(!file.exists(crlmmFile) | !load.it){
355
-			callSet <- crlmm(filenames=filenames,
356
-					     cdfName=cdfName,
357
-					     save.it=TRUE,
358
-					     load.it=load.it,
359
-					     intensityFile=intensityFile)
360
-			message("Quantile normalizing the copy number probes...")		
361
-			cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir)
362
-			if(save.it){
363
-				message("Saving callSet and cnrmaResult to", crlmmFile)
364
-				save(callSet, cnrmaResult, file=crlmmFile)
365
-			}
366
-		} else {
367
-			message("Loading ", crlmmFile, "...")
368
-			load(intensityFile)				
369
-			load(crlmmFile)
370
-			callSet <- get("callSet")
371
-			cnrmaResult <- get("cnrmaResult")
372
-		}
373
-		scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
374
-		protocolData(callSet) <- new("AnnotatedDataFrame",
375
-					     data=scanDates,
376
-					     varMetadata=data.frame(labelDescription=colnames(scanDates),
377
-					     row.names=colnames(scanDates)))
378
-	}
379
-	if(platform == "illumina"){
380
-		if(!file.exists(crlmmFile) | !load.it){		
381
-			callSet <- crlmmIllumina(RG=RG,
382
-						 cdfName=cdfName,
383
-						 sns=sampleNames(RG),
384
-						 returnParams=TRUE,
385
-						 save.it=TRUE,
386
-						 intensityFile=intensityFile)
387
-			if(save.it) save(callSet, file=crlmmFile)
388
-		} else {
389
-			message("Loading ", crlmmFile, "...")
390
-			load(crlmmFile)
391
-			callSet <- get("callSet")
559
+		if(!file.exists(callsFile) | !load.it){
560
+			crlmm(filenames=filenames,
561
+			      cdfName=cdfName,
562
+			      save.it=TRUE,
563
+			      load.it=load.it,
564
+			      intensityFile=snprmaFile,
565
+			      callsFile=callsFile,
566
+			      confsFile=confsFile,
567
+			      AFile=AFile,
568
+			      BFile=BFile)
392 569
 		}
393
-		protocolData(callSet) <- protocolData(RG)
394 570
 	}
395
-	if(platform=="affymetrix") {
396
-		protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames))
397
-		sampleNames(protocolData(callSet)) <- sampleNames(callSet)
398
-	}	
399
-	load(intensityFile)
400
-	snprmaResult <- get("res")
401
-	if(platform=="illumina"){
402
-		if(exists("cnAB")){
403
-			np.A <- as.integer(cnAB$A)
404
-			np.B <- as.integer(cnAB$B)
405
-			np <- ifelse(np.A > np.B, np.A, np.B)
406
-			np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
407
-			rownames(np) <- cnAB$gns
408
-			colnames(np) <- cnAB$sns
409
-			cnAB$NP <- np
410
-			##sampleNames(callSet) <- res$sns
411
-			sampleNames(callSet) <- cnAB$sns
412
-			cnrmaResult <- get("cnAB")
413
-		} else cnrmaResult <- NULL
571
+	gc()
572
+	if(platform == "illumina") {
573
+		browser()
574
+		callSet <- loadIlluminaCallSet(callsFile, snprmaFile, RG, load.it, save.it, cdfName)
414 575
 	}
415
-	if(platform=="affymetrix"){
416
-		if(exists("cnrmaResult")){
417
-			cnrmaResult <- get("cnrmaResult")
418
-		} else cnrmaResult <- NULL
576
+	if(platform == "affymetrix"){
577
+		if(!file.exists(cnrmaFile) | !load.it){	
578
+			message("Quantile normalizing the copy number probes...")
579
+			## updates A matrix and saves cnrmaFile
580
+			cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir, cnrmaFile=cnrmaFile, AFile=AFile)
581
+		} 
419 582
 	}
420
-	crlmmResults <- list(snprmaResult=snprmaResult,
421
-			     cnrmaResult=cnrmaResult,
422
-			     callSet=callSet)
423
-
583
+##	if(!is.null(cnrmaResult)){
584
+##		for(CHR in chromosome){
585
+##			cat(CHR, " ")
586
+##			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
587
+##			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
588
+##			NP <- cnrmaResult$NP[index.nps, ]
589
+##			save(NP, file=file.path(tmpdir, paste("NP_", CHR, ".rda", sep="")))
590
+##			rm(NP); gc()
591
+##		}
592
+##	}
424 593
 	if(!save.it){
425 594
 		message("Cleaning up")		
426
-		unlink(intensityFile)
595
+		unlink(snprmaFile); unlink(cnrmaFile)
427 596
 	}
428
-	return(crlmmResults)
429 597
 }
430 598
 
431
-validCdfNames <- function(){
432
-	c("genomewidesnp6",
433
-	  "genomewidesnp5",
434
-	  "human370v1c",
435
-	  "human370quadv3c",
436
-	  "human550v3b",
437
-	  "human650v3a",
438
-	  "human610quadv1b",
439
-	  "human660quadv1a",
440
-	  "human1mduov3b")
441
-}
442 599
 
443
-isValidCdfName <- function(cdfName){
444
-	chipList <- validCdfNames()
445
-	result <- cdfName %in% chipList	
446
-	if(!(result)){
447
-		warning("cdfName must be one of the following: ",
448
-			chipList)
449
-	}
450
-	return(result)
451
-}
452 600
 
453 601
 whichPlatform <- function(cdfName){
454 602
 	index <- grep("genomewidesnp", cdfName)
... ...
@@ -463,7 +611,50 @@ whichPlatform <- function(cdfName){
463 611
 
464 612
 
465 613
 # steps: quantile normalize hapmap: create 1m_reference_cn.rda object
466
-cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){
614
+##cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){
615
+##	if(missing(cdfName)) stop("must specify cdfName")
616
+##	pkgname <- getCrlmmAnnotationName(cdfName)
617
+##	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
618
+##	if (missing(sns)) sns <- basename(filenames)
619
+##        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
620
+##	fid <- getVarInEnv("npProbesFid")
621
+##	set.seed(seed)
622
+##	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
623
+##	SKW <- vector("numeric", length(filenames))
624
+##
625
+##	
626
+##	NP <- matrix(NA, length(fid), length(filenames))
627
+##	verbose <- TRUE
628
+##	if(verbose){
629
+##		message("Processing ", length(filenames), " files.")
630
+##		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
631
+##	}
632
+##	if(cdfName=="genomewidesnp6"){
633
+##		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
634
+##	}
635
+##	if(cdfName=="genomewidesnp5"){
636
+##		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
637
+##	}
638
+##	reference <- getVarInEnv("reference")
639
+##	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
640
+##	for(i in seq(along=filenames)){
641
+##		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
642
+##		x <- log2(y[idx2])
643
+##		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
644
+##		rm(x)
645
+##		NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
646
+##		if (verbose)
647
+##			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
648
+##			else cat(".")
649
+##	}
650
+##	dimnames(NP) <- list(names(fid), sns)
651
+##	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
652
+##	res3 <- list(NP=NP, SKW=SKW)
653
+##	cat("\n")
654
+##	return(res3)
655
+##}
656
+
657
+cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir, cnrmaFile, AFile){
467 658
 	if(missing(cdfName)) stop("must specify cdfName")
468 659
 	pkgname <- getCrlmmAnnotationName(cdfName)
469 660
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
... ...
@@ -473,17 +664,15 @@ cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){
473 664
 	set.seed(seed)
474 665
 	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
475 666
 	SKW <- vector("numeric", length(filenames))
476
-##	if(bigmemory){
477
-##		NP <- filebacked.big.matrix(length(pnsa), length(filenames),
478
-##					    type="integer",
479
-##					    init=as.integer(0),
480
-##					    backingpath=outdir,
481
-##					    backingfile="NP.bin",
482
-##					    descriptorfile="NP.desc")
483
-##	} else{
484
-		NP <- matrix(NA, length(fid), length(filenames))
485
-##	}
486
-	verbose <- TRUE
667
+	load(AFile)
668
+	if(isPackageLoaded("ff")) open(A)
669
+	path <- system.file("extdata", package=pkgname)
670
+	load(file.path(path, "cnProbes.rda"))
671
+	cnProbes <- get("cnProbes")
672
+	cnps <- rownames(cnProbes)
673
+	cnps <- cnps[cnps %in% rownames(A)]
674
+	index <- match(cnps, rownames(A), nomatch=0)
675
+	index <- index[index != 0]
487 676
 	if(verbose){
488 677
 		message("Processing ", length(filenames), " files.")
489 678
 		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
... ...
@@ -500,17 +689,21 @@ cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){
500 689
 		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
501 690
 		x <- log2(y[idx2])
502 691
 		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
503
-		rm(x)
504
-		NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
692
+		rm(x); gc()
693
+		A[index, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
694
+		##NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
505 695
 		if (verbose)
506 696
 			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
507 697
 			else cat(".")
698
+		rm(y); gc()
508 699
 	}
509
-	dimnames(NP) <- list(names(fid), sns)
510
-	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
511
-	res3 <- list(NP=NP, SKW=SKW)
512
-	cat("\n")
513
-	return(res3)
700
+	message("Done")
701
+	cnrmaResults <- list(SKW=SKW)
702
+	save(cnrmaResults, file=cnrmaFile)
703
+	if(isPackageLoaded("ff")) close(A)
704
+	save(A, file=AFile)
705
+	rm(list=ls()); gc()
706
+	return()
514 707
 }
515 708
 
516 709
 getFlags <- function(object, PHI.THR){
... ...
@@ -555,21 +748,29 @@ thresholdCopynumber <- function(object){
555 748
 	return(object)
556 749
 }
557 750
 
558
-##preprocessOptions <- function(crlmmFile="snpsetObject.rda",
559
-##			      intensityFile="normalizedIntensities.rda",
751
+##preprocessOptions <- function(callsFile="snpsetObject.rda",
752
+##			      snprmaFile="normalizedIntensities.rda",
560 753
 ##			      rgFile="rgFile.rda"){
561 754
 ##
562 755
 ##}
563 756
 
564 757
 cnOptions <- function(outdir="./",
565 758
 		      cdfName,
566
-		      crlmmFile,
567
-		      intensityFile,
568
-		      rgFile="rgFile.rda",
759
+		      AFile="A",
760
+		      BFile="B",
761
+		      callsFile="genotypes",
762
+		      rgFile="rgFile",
763
+		      cnFile="cnSet",
764
+		      cnrmaFile="cn_rmaResult",
765
+		      ##npBinary="NP.bin",  ##name of file.backed.big.matrix
766
+		      ##npDesc="NP.desc",
767
+		      snprmaFile="snp_rmaResult",
768
+		      ##callsFile="calls.desc",
769
+		      ##confsFile="confsFile.desc",
770
+		      confsFile="confsFile",
569 771
 		      save.it=TRUE,
570 772
 		      save.cnset=TRUE,
571 773
 		      load.it=TRUE,
572
-		      splitByChr=TRUE,
573 774
 		      MIN.OBS=3,
574 775
 		      MIN.SAMPLES=10,
575 776
 		      batch=NULL,
... ...
@@ -588,11 +789,17 @@ cnOptions <- function(outdir="./",
588 789
 		      THR.NU.PHI=TRUE,
589 790
 		      thresholdCopynumber=TRUE,
590 791
 		      unlink=TRUE,
591
-		      ##hiddenMarkovModel=FALSE,
592
-		      ##circularBinarySegmentation=FALSE,
593
-##		      cbsOpts=NULL,
594
-		      ##hmmOpts=NULL,
792
+		      use.poe=FALSE,
793
+		      crlmmBatchSize=1100, ## this is about right for Affy 6.0 platform 1.8e6 * 1100 < 2^31
595 794
 		      ...){
795
+	if(isPackageLoaded("ff")){
796
+		AFile=paste(AFile, "ff", sep="_")
797
+		BFile=paste(BFile, "ff", sep="_")
798
+		confsFile=paste(confsFile, "ff", sep="_")
799
+		callsFile=paste(callsFile, "ff", sep="_")
800
+		cnFile=paste(cnFile, "ff", sep="")
801
+	}
802
+	if(use.poe) require(metaArray)
596 803
 	if(missing(cdfName)) stop("must specify cdfName")
597 804
 	if(!file.exists(outdir)){
598 805
 		message(outdir, " does not exist.  Trying to create it.")
... ...
@@ -602,23 +809,24 @@ cnOptions <- function(outdir="./",
602 809
 ##	if(hiddenMarkovModel){
603 810
 ##		hmmOpts <- hmmOptions(...)
604 811
 ##	}
605
-	if(missing(crlmmFile)){
606
-		crlmmFile <- file.path(outdir, "snpsetObject.rda")
607
-	}
608
-	if(missing(intensityFile)){
609
-		intensityFile <- file.path(outdir, "normalizedIntensities.rda")
610
-	}
812
+##	if(missing(snprmaFile)){
813
+##		snprmaFile <- file.path(outdir, "normalizedIntensities.rda")
814
+##	}
611 815
 	if(is.null(batch))
612 816
 		stop("must specify batch -- should be the same length as the number of files")
613 817
 	list(outdir=outdir,
614 818
 	     cdfName=cdfName,
615
-	     crlmmFile=crlmmFile,
616
-	     intensityFile=intensityFile,
617
-	     rgFile=file.path(outdir, rgFile),
819
+	     callsFile=callsFile,
820
+	     confsFile=confsFile,
821
+	     AFile=AFile,
822
+	     BFile=BFile,
823
+	     snprmaFile=snprmaFile,
824
+	     cnrmaFile=cnrmaFile,
825
+	     rgFile=rgFile,
826
+	     cnFile=cnFile,
618 827
 	     save.it=save.it,
619 828
 	     save.cnset=save.cnset,
620 829
 	     load.it=load.it,
621
-	     splitByChr=splitByChr,
622 830
 	     MIN.OBS=MIN.OBS,
623 831
 	     MIN.SAMPLES=MIN.SAMPLES,
624 832
 	     batch=batch,
... ...
@@ -636,7 +844,10 @@ cnOptions <- function(outdir="./",
636 844
 	     MIN.PHI=MIN.PHI,
637 845
 	     THR.NU.PHI=THR.NU.PHI,
638 846
 	     thresholdCopynumber=thresholdCopynumber,
639
-	     unlink=unlink)
847
+	     unlink=unlink,
848
+	     use.poe=use.poe,
849
+	     crlmmBatchSize=crlmmBatchSize)
850
+##	     use.bigmemory=use.bigmemory)
640 851
 ##	     hiddenMarkovModel=hiddenMarkovModel,
641 852
 ##	     circularBinarySegmentation=circularBinarySegmentation,
642 853
 ##	     cbsOpts=cbsOpts,
... ...
@@ -846,6 +1057,27 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
846 1057
 	return(object)
847 1058
 }
848 1059
 
1060
+nonpolymorphic.poe <- function(object, cnOptions, tmp.object){
1061
+	require(metaArray)
1062
+	nps <- log2(A(object)[!isSnp(object), ])
1063
+	nps <- (nps-rowMedians(nps))/rowMAD(nps)
1064
+	runAvg <- apply(nps, 2, myfilter, filter=rep(1/10, 10))
1065
+	rownames(runAvg) <- featureNames(object)[!isSnp(object)]
1066
+	rm.nas <- rowSums(is.na(runAvg)) == 0
1067
+	runAvg <- runAvg[rm.nas, ]
1068
+	
1069
+	poe.scale <- poe.em(runAvg, cl=rep(0, ncol(nps)))$data
1070
+	pinegg <- piposg <- poe.scale
1071
+	piposg[piposg < 0] <- 0
1072
+	pinegg[pinegg > 0] <- 0
1073
+	pinegg <- pinegg*-1
1074
+	pm.em <- 1*pinegg + 2*(1-pinegg-piposg) + 3*piposg
1075
+	rownames(pm.em) <- rownames(runAvg)
1076
+	CA(object)[match(rownames(pm.em), featureNames(object)), ] <- pm.em
1077
+	##CA(object)[!isSnp(object), ] <- pm.em
1078
+	return(object)
1079
+}
1080
+
849 1081
 ##sufficient statistics on the intensity scale
850 1082
 withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
851 1083
 	normal <- tmp.objects[["normal"]]
... ...
@@ -977,15 +1209,18 @@ oneBatch <- function(object, cnOptions, tmp.objects){
977 1209
 		muA[index[[j]], j+2] <- mus[, 1]
978 1210
 		muB[index[[j]], j+2] <- mus[, 2]
979 1211
 	}
980
-	nobsA <- Ns[, "A"] > 10
981
-	nobsB <- Ns[, "B"] > 10
982
-	notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"]))
983
-	complete <- list()
984
-	complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
985
-	complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here	
986
-	size <- min(5000, length(complete[[1]]))
987
-	if(size == 5000) complete <- lapply(complete, function(x) sample(x, size))
1212
+
988 1213
 	if(CHR == 23){
1214
+		nobsA <- Ns[, "A"] > MIN.OBS
1215
+		nobsB <- Ns[, "B"] > MIN.OBS
1216
+		notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"]))
1217
+		complete <- list()
1218
+		complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
1219
+		complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here
1220
+		if(length(complete[[1]]) < 1 | length(complete[[2]]) < 1) stop("Too few observations to estimate the center for 'A' and 'B'  clusters on chrom X")		
1221
+		size <- min(5000, length(complete[[1]]))
1222
+		if(size == 5000) complete <- lapply(complete, function(x) sample(x, size))
1223
+	
989 1224
 		index <- list()
990 1225
 		index[[1]] <- which(Ns[, "A"] == 0)
991 1226
 		index[[2]] <- which(Ns[, "B"] == 0)
... ...
@@ -1011,7 +1246,6 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1011 1246
 ##	snpflags <- envir[["snpflags"]]
1012 1247
 	snpflags <- index[[1]] | index[[2]] | index[[3]]
1013 1248
 ##	snpflags[, p] <- index[[1]] | index[[2]] | index[[3]]
1014
-
1015 1249
 	##---------------------------------------------------------------------------
1016 1250
 	## Two genotype clusters not observed -- would sequence help? (didn't seem to)
1017 1251
 	## 1. extract index of complete data
... ...
@@ -1318,6 +1552,8 @@ posteriorProbability.snps <- function(object, cnOptions, tmp.objects=list()){
1318 1552
 	return(list(tmp.objects, posteriorProb))
1319 1553
 }
1320 1554
 
1555
+
1556
+
1321 1557
 biasAdj <- function(object, cnOptions, tmp.objects){
1322 1558
 	gender <- object$gender
1323 1559
 	CHR <- unique(chromosome(object))
... ...
@@ -1502,6 +1738,7 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1502 1738
 	if(length(batch) != ncol(object)) stop("Batch must be the same length as the number of samples")
1503 1739
 	MIN.SAMPLES <- cnOptions$MIN.SAMPLES
1504 1740
 	verbose <- cnOptions$verbose
1741
+	use.poe <- cnOptions[["use.poe"]]
1505 1742
 	for(i in seq(along=unique(batch))){
1506 1743
 		PLATE <- unique(batch)[i]
1507 1744
 		if(sum(batch == PLATE) < MIN.SAMPLES) {
... ...
@@ -1518,7 +1755,8 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1518 1755
 		}
1519 1756
 		if(bias.adj){
1520 1757
 			if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)")
1521
-			tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects)
1758
+			if(!use.poe)
1759
+				tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects)
1522 1760
 			tmp.objects <- biasAdj(object.batch, cnOptions, tmp.objects)
1523 1761
 			if(verbose) message("Recomputing location and scale parameters")
1524 1762
 		}
... ...
@@ -1542,8 +1780,12 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1542 1780
 		if(verbose) message("\nAllele specific copy number")	
1543 1781
 		object.batch <- polymorphic(object.batch, cnOptions, tmp.objects)
1544 1782
 		if(any(!isSnp(object))){ ## there are nonpolymorphic probes
1545
-			if(verbose) message("\nCopy number for nonpolymorphic probes...")	
1546
-			object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects)
1783
+			if(verbose) message("\nCopy number for nonpolymorphic probes...")
1784
+			if(!use.poe){
1785
+				object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects)
1786
+			} else {
1787
+				object.batch <- nonpolymorphic.poe(object.batch, cnOptions, tmp.objects)
1788
+			}
1547 1789
 		}
1548 1790
 		##---------------------------------------------------------------------------
1549 1791
 		##Note: the replacement method multiples by 100
... ...
@@ -1577,9 +1819,117 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1577 1819
 	return(object)
1578 1820
 }
1579 1821
 
1822
+## cite metaArray: Choi/Ghosh
1823
+crlmm_poe <- function(mat, cl, threshold=0.00001, every=100,use.mad=TRUE) {
1824
+  mat <- as.matrix(mat)
1825
+  nc <- ncol(mat)
1826
+  nr <- nrow(mat)
1827
+  if(all(is.null(cl))) cl <- rep(0,dim(mat)[2])
1828
+  cat("Number of Samples:", nc, "\n")
1829
+  cat("Number of Genes:", nr, "\n")
1830
+  cat("This model assumes that the samples are centered and scaled.\n")
1831
+  new.mat <- matrix(0,nr,nc)
1832
+  med.expr <- apply(mat,1,median,na.rm=TRUE)
1833
+  new.mat <- sweep(mat,1,med.expr)
1834
+  for(i in 1:nr) {
1835
+    if(sum(is.na(as.numeric(mat[i,]))) > .25 * nc ) stop("More than 25% missing values for gene", i, "\n")
1836
+    zvec <- crlmm_fit.em(as.numeric(mat[i,]), cl, threshold=threshold, use.mad=use.mad) 
1837
+    new.mat[i,] <- zvec$expr
1838
+    if(i%%every==0) cat(i, "genes fitted\n")   
1839
+  }
1840
+  rownames(new.mat) <- rownames(mat)
1841
+  colnames(new.mat) <- colnames(mat)
1842
+  return(list(data=new.mat))
1843
+}
1580 1844
 
1581
-
1582
-
1583
-
1584
-
1585
-
1845
+crlmm_fit.em <- function(x, cl, threshold=1e-6, use.mad=TRUE){
1846
+	sup <- sum(cl==0) > 0 && sum(cl==1) > 0
1847
+	x <- as.numeric(x)
1848
+	len <- length(x)
1849
+	z <- rep(0,length(x))
1850
+	log.lik <- 1000
1851
+	lik.rec <- NULL
1852
+	num.iter <- 0
1853
+	a <- min(x,na.rm=TRUE); b <- max(x,na.rm=TRUE)
1854
+	err <- err.old <- 1
1855
+	if(!sup) {
1856
+		z <- find.init(x)
1857
+		Pi <- mean(z)
1858
+		mu <- sum((1-z)*x) / sum(1-z)
1859
+		sigmasq <- sum((1-z)*(x-mu)^2) / sum(1-z)
1860
+		tt <- len
1861
+		while(err > threshold) {
1862
+			num.iter <- num.iter + 1
1863
+			log.lik.old <- log.lik
1864
+			err.old <- err
1865
+			## E Step
1866
+			est.u <- dunif(x,a,b)
1867
+			est.u.p <- Pi * est.u
1868
+			est.n <- dnorm(x,mu,sqrt(sigmasq))
1869
+			est.n.p <- (1-Pi) * est.n
1870
+			z <- est.u.p / (est.n.p + est.u.p)
1871
+
1872
+			if(any(is.na(z))) stop("NA occurred in imputation\n")
1873
+			## M Step
1874
+			mu <- sum((1-z)*x) / sum(1-z)
1875
+			sigmasq <- sum((1-z)*((x-mu)^2)) / sum(1-z)
1876
+			Pi <- sum(z) / len    
1877
+			sgn.z <- ifelse(x < mu, -1, 1)
1878
+
1879
+			## Likelihood
1880
+			est.u <- dunif(x,a,b)
1881
+			est.u.p <- Pi * est.u
1882
+			est.n <- dnorm(x,mu,sqrt(sigmasq))
1883
+			est.n.p <- (1-Pi) * est.n   
1884
+			log.lik <- sum(log(est.u.p + est.n.p))
1885
+			err <- abs(log.lik.old - log.lik)
1886
+			if(num.iter != 1) lik.rec[num.iter-1] <- log.lik
1887
+		}    
1888
+	}  else {
1889
+		tt <- sum(cl==0)
1890
+		z[cl==0] <- runif(tt,0,1)
1891
+		Pi <- mean(z[cl==0])
1892
+		mu <- sum((1-z)*x) / sum(1-z)
1893
+		sigmasq <- sum((1-z)*(x-mu)^2) / sum(1-z)
1894
+
1895
+		while(err > threshold) {
1896
+			num.iter <- num.iter + 1
1897
+			log.lik.old <- log.lik
1898
+			err.old <- err
1899
+
1900
+			## E Step
1901
+			est.u <- dunif(x,a,b)
1902
+			est.u.p <- Pi * est.u
1903
+			est.n <- dnorm(x,mu,sqrt(sigmasq))
1904
+			est.n.p <- (1-Pi) * est.n
1905
+			z <- est.u.p / (est.n.p + est.u.p)
1906
+			z[cl==1] <- 0
1907
+			if(any(is.na(z))) stop("NA occurred in imputation\n")
1908
+			## M Step
1909
+			mu <- sum((1-z)*x) / sum(1-z)
1910
+			sigmasq <- sum((1-z)*((x-mu)^2)) / sum(1-z)
1911
+			Pi <- mean(z[cl==0])    
1912
+			sgn.z <- ifelse(x < mu, -1, 1)
1913
+
1914
+			## Likelihood
1915
+			est.u <- dunif(x,a,b)
1916
+			est.u.p <- Pi * est.u
1917
+			est.n <- dnorm(x,mu,sqrt(sigmasq))
1918
+			est.n.p <- (1-Pi) * est.n   
1919
+			log.lik <- sum(log(est.u.p[cl==0] + est.n.p[cl==0])) + sum(log(est.n[cl==1]))
1920
+			err <- abs(log.lik.old - log.lik)
1921
+			if(num.iter != 1) lik.rec[num.iter-1] <- log.lik
1922
+		}    
1923
+	}
1924
+	est.u.p <- Pi * dunif(x,a,b) 
1925
+	est.n.p <- (1-Pi) * dnorm(x,mu,sqrt(sigmasq))
1926
+	est.u.mu <- Pi * dunif(mu,a,b)
1927
+	est.n.mu <- (1-Pi) * dnorm(mu,mu,sqrt(sigmasq))
1928
+	z0 <- est.u.p / (est.n.p + est.u.p)
1929
+	zmu <- est.u.mu / (est.u.mu + est.n.mu)
1930
+	sgn.z0 <- ifelse(x < mu, -1, 1) 
1931
+					#loc <- (max(lik.rec) != lik.rec[length(lik.rec)])
1932
+	expr <- rep(0, len)
1933
+	expr <- (z0 - zmu) * sgn.z0
1934
+	return(list(expr=expr, a=a, b=b, sigmasq=sigmasq, mu=mu, Pi=Pi, lik.rec=lik.rec))
1935
+}
... ...
@@ -1,48 +1,103 @@
1 1
 crlmm <- function(filenames, row.names=TRUE, col.names=TRUE,
2 2
                   probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
3
-                  save.it=FALSE, load.it=FALSE, intensityFile,
3
+                  save.it=FALSE, load.it=FALSE, intensityFile, callsFile, confsFile,
4
+		  AFile, BFile,
4 5
                   mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
5 6
                   cdfName, sns, recallMin=10, recallRegMin=1000,
6
-                  returnParams=FALSE, badSNP=.7){
7
-  if ((load.it | save.it) & missing(intensityFile))
8
-	  stop("'intensityFile' is missing, and you chose either load.it or save.it")
9
-  if (missing(sns)) sns <- basename(filenames)
10
-  if (!missing(intensityFile))
11
-    if (load.it & !file.exists(intensityFile)){
12
-      load.it <- FALSE
13
-      message("File ", intensityFile, " does not exist.")
14
-      message("Not loading it, but running SNPRMA from scratch.")
15
-    }
16
-  if (!load.it){
17
-    res <- snprma(filenames, fitMixture=TRUE,
18
-                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
19
-                  eps=eps, cdfName=cdfName, sns=sns)
20
-    if(save.it){
21
-      t0 <- proc.time()
22
-      save(res, file=intensityFile)
23
-      t0 <- proc.time()-t0
24
-      if (verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".")
25
-    }
26
-  }else{
27
-    if (verbose) message("Loading ", intensityFile, ".")
28
-    obj <- load(intensityFile)
29
-    if (verbose) message("Done.")
30
-    if (obj != "res")
31
-      stop("Object in ", intensityFile, " seems to be invalid.")
32
-  }
33
-  if(row.names) row.names=res$gns else row.names=NULL
34
-  if(col.names) col.names=res$sns else col.names=NULL
35
-  res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
36
-                  res[["mixtureParams"]], res[["cdfName"]],
37
-                  gender=gender, row.names=row.names,
38
-                  col.names=col.names, recallMin=recallMin,
39
-                  recallRegMin=1000, SNRMin=SNRMin,
40
-                  returnParams=returnParams, badSNP=badSNP,
41
-                  verbose=verbose)
42
-
43
-  res2[["SNR"]] <- res[["SNR"]]
44
-  res2[["SKW"]] <- res[["SKW"]]
45
-  return(list2SnpSet(res2, returnParams=returnParams))
7
+                  returnParams=FALSE, badSNP=0.7){
8
+	if ((load.it | save.it) & missing(intensityFile))
9
+		stop("'intensityFile' is missing, and you chose either load.it or save.it")
10
+	if (missing(sns)) sns <- basename(filenames)
11
+	if (!missing(intensityFile))
12
+		if (load.it & !file.exists(intensityFile)){
13
+			load.it <- FALSE
14
+			message("File ", intensityFile, " does not exist.")
15
+			message("Not loading it, but running SNPRMA from scratch.")
16
+		}
17
+	if (!load.it){
18
+		res <- snprma(filenames,
19
+			      fitMixture=TRUE,
20
+			      mixtureSampleSize=mixtureSampleSize,
21
+			      verbose=verbose,
22
+			      eps=eps,
23
+			      cdfName=cdfName,
24
+			      sns=sns,
25
+			      AFile=AFile,
26
+			      BFile=BFile)
27
+		save(res, file=intensityFile)##file.path(dirname(intensityFile), "res.rda"))
28
+	} else {
29
+		message("Loading quantile normalized intensities...")
30
+		load(intensityFile)
31
+		res <- get("res")
32
+	}
33
+	load(AFile)
34
+	load(BFile)
35
+	if(isPackageLoaded("ff")) {open(A); open(B)}
36
+	if(row.names) row.names <- res$gns
37
+	if(col.names) col.names <- res$sns	
38
+##	}else{
39
+##		if (verbose) message("Loading ", intensityFile, ".")
40
+##		obj <- load(intensityFile)
41
+##		if (verbose) message("Done.")
42
+##		if (obj != "res")
43
+##			stop("Object in ", intensityFile, " seems to be invalid.")
44
+##	}
45
+	##path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
46
+	##load(file.path(path, "snpProbes.rda"))
47
+	gns <- res[["gns"]]
48
+	sns <- colnames(A)
49
+	##
50
+	##Ensures that if ff package is loaded, ordinary matrices are still passed to crlmmGT
51
+	aMatrix <- A[1:length(gns), ]
52
+	bMatrix <- B[1:length(gns), ]
53
+	if(isPackageLoaded("ff")) close(B)
54
+	rm(B); gc()
55
+	##	res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
56
+	res2 <- crlmmGT(aMatrix, bMatrix, res[["SNR"]],
57
+			res[["mixtureParams"]], res[["cdfName"]],
58
+			gender=gender, row.names=row.names,
59
+			col.names=col.names, recallMin=recallMin,
60
+			recallRegMin=1000, SNRMin=SNRMin,
61
+			returnParams=returnParams, badSNP=badSNP,
62
+			verbose=verbose)
63
+	rm(aMatrix, bMatrix); gc()
64
+	res2[["SNR"]] <- res[["SNR"]]
65
+	res2[["SKW"]] <- res[["SKW"]]
66
+	if(isPackageLoaded("ff")){
67
+		if(file.exists(callsFile)) unlink(callsFile)
68
+		calls <- ff(dim=c(nrow(A), ncol(A)),
69
+			    vmode="integer",
70
+			    finalizer="close",
71
+			    dimnames=dimnames(A),
72
+			    overwrite=TRUE)
73
+	} else{
74
+		calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
75
+	}
76
+	calls[1:length(gns), ] <- res2[["calls"]]
77
+	if(isPackageLoaded("ff")) close(calls)
78
+	save(calls, file=callsFile)
79
+	rm(calls); gc()
80
+	if(isPackageLoaded("ff")){
81
+		if(file.exists(confsFile)) unlink(confsFile)		
82
+		confs <- ff(dim=c(nrow(A), ncol(A)),
83
+			    vmode="integer",
84
+			    finalizer="close",
85
+			    dimnames=dimnames(A),
86
+			    overwrite=TRUE)
87
+	} else{
88
+		confs <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
89
+	}
90
+	confs[1:length(gns), ] <- res2[["confs"]]
91
+	if(isPackageLoaded("ff")) close(confs)
92
+	save(confs, file=confsFile)
93
+	rm(confs); gc()
94
+	sampleStats <- data.frame(SNR=res2[["SNR"]],
95
+				  SKW=res2[["SKW"]],
96
+				  gender=res2[["gender"]])
97
+	save(sampleStats, file=file.path(dirname(confsFile), "sampleStats.rda"))
98
+	if(isPackageLoaded("ff")){ close(A)}
99
+	rm(list=ls()); gc()
100
+	return()
46 101
 }
47 102
 
48 103
 crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
... ...
@@ -50,13 +105,10 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
50 105
                     SNRMin=5, recallMin=10, recallRegMin=1000,
51 106
                     gender=NULL, desctrucitve=FALSE, verbose=TRUE,
52 107
                     returnParams=FALSE, badSNP=.7){
53
-  
54 108
   keepIndex <- which(SNR>SNRMin)
55 109
   if(length(keepIndex)==0) stop("No arrays above quality threshold!")
56
-  
57 110
   NC <- ncol(A)
58 111
   NR <- nrow(A)
59
-  
60 112
   pkgname <- getCrlmmAnnotationName(cdfName)
61 113
   if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
62 114
     suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
... ...
@@ -67,7 +119,6 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
67 119
     stop("Package ", pkgname, " could not be found.")
68 120
     rm(suggCall, msg)
69 121
   }
70
-
71 122
   if(verbose) message("Loading annotations.")
72 123
   loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
73 124
   loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
... ...
@@ -92,7 +143,6 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
92 143
       gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]]
93 144
     }
94 145
   }
95
-  
96 146
   Indexes <- list(autosomeIndex, XIndex, YIndex)
97 147
   cIndexes <- list(keepIndex, 
98 148
                    keepIndex[which(gender[keepIndex]==2)], 
99 149
deleted file mode 100644
... ...
@@ -1,13 +0,0 @@
1
-setMethod("A", "AlleleSet", function(object) allele(object, "A"))
2
-setMethod("B", "AlleleSet", function(object) allele(object, "B"))
3
-##setReplaceMethod("A", signature(object="AlleleSet", value="matrix"),
4
-##		 function(object, value){
5
-##			 assayDataElementReplace(object, "senseThetaA", value)			
6
-##		 })
7
-##setReplaceMethod("B", signature(object="AlleleSet", value="matrix"),
8
-##		 function(object, value){
9
-##			 assayDataElementReplace(object, "senseThetaB", value)			
10
-##		 })
11
-
12
-
13
-
... ...
@@ -109,6 +109,16 @@ setMethod("copyNumber", "CNSet", function(object){
109 109
 	CN
110 110
 })
111 111
 
112
+setMethod("plot", signature("CNSet"), function(x, y, add=FALSE, ...){
113
+	A <- log2(A(x))
114
+	B <- log2(B(x))
115
+	if(!add){
116
+		plot(A, B, ...)
117
+	} else{
118
+		points(A, B, ...)
119
+	}
120
+})
121
+
112 122
 
113 123
 setMethod("ellipse", "CNSet", function(x, copynumber, batch, ...){
114 124
 	ellipse.CNSet(x, copynumber, batch, ...)
... ...
@@ -142,22 +142,17 @@
142 142
 ##})
143 143
 
144 144
 getParam.SnpSuperSet <- function(object, name, batch){
145
-		  label <- paste(name, batch, sep="_")
146
-		  colindex <- grep(label, fvarLabels(object))
147
-		  if(length(colindex) == 1){
148
-			  param <- fData(object)[, colindex]
149
-		  }
150
-		  if(length(colindex) < 1){
151
-			  param <- NULL
152
-		  }
153
-		  if(is.na(colindex)){
154
-			  stop(paste(label, " not found in object"))
155
-		  }
156
-		  if(length(colindex) > 1){
157
-			  stop(paste(label, " not unique"))
158
-		  }
159
-		  return(param)
160
-	  }
145
+	if(missing(batch)) label <- name
146
+	else  label <- paste(name, batch, sep="_")
147
+	colindex <- grep(label, fvarLabels(object))
148
+	if(length(colindex) >= 1){
149
+		param <- fData(object)[, colindex]
150
+	} else param <- NULL
151
+	if(any(is.na(colindex))){
152
+		stop(paste(label, " not found in object"))
153
+	}
154
+	return(param)
155
+}
161 156
 
162 157
 
163 158
 
... ...
@@ -35,14 +35,9 @@ setMethod("chromosome", "eSet", function(object) fData(object)$chromosome)
35 35
 setMethod("position", "eSet", function(object) fData(object)$position)
36 36
 
37 37
 setMethod("getParam", signature(object="eSet",
38
-				name="character",
39
-				batch="ANY"),
40
-	  function(object, name, batch){
41
-		  if(length(batch) > 1){
42
-			  warning("batch argument to getParam should have length 1.  Only using the first")
43
-			  batch <- batch[1]
44
-		  }
45
-		  getParam.SnpSuperSet(object, name, batch)
38
+				name="character"),
39
+	  function(object, name, ...){
40
+		  getParam.SnpSuperSet(object, name, ...)
46 41
 })
47 42
 
48 43
 ##setMethod("combine", signature=signature(x="eSet", y="eSet"),
... ...
@@ -1,85 +1,104 @@
1
-snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
2
-  if (missing(sns)) sns <- basename(filenames)
3
-  ##ADD CHECK TO SEE IF LOADED
4
-  if (missing(cdfName))
5
-    cdfName <- read.celfile.header(filenames[1])$cdfName
6
-  ##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
7
-  pkgname <- getCrlmmAnnotationName(cdfName)
8
-  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
9
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
10
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
11
-    message(strwrap(msg))
12
-    stop("Package ", pkgname, " could not be found.")
13
-    rm(suggCall, msg)
14
-  }
15
-  if(verbose) message("Loading annotations and mixture model parameters.")
16
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
17
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
18
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
19
-  autosomeIndex <- getVarInEnv("autosomeIndex")
20
-  pnsa <- getVarInEnv("pnsa")
21
-  pnsb <- getVarInEnv("pnsb")
22
-  fid <- getVarInEnv("fid")
23
-  reference <- getVarInEnv("reference")
24
-  aIndex <- getVarInEnv("aIndex")
25
-  bIndex <- getVarInEnv("bIndex")
26
-  SMEDIAN <- getVarInEnv("SMEDIAN")
27
-  theKnots <- getVarInEnv("theKnots")
28
-  gns <- getVarInEnv("gns")
1
+snprma <- function(filenames, mixtureSampleSize=10^5,
2
+		   fitMixture=FALSE, eps=0.1, verbose=TRUE,
3
+		   seed=1, cdfName, sns, AFile="A.rda", BFile="B.rda"){
4
+	if (missing(sns)) sns <- basename(filenames)
5
+	##ADD CHECK TO SEE IF LOADED
6
+	if (missing(cdfName))
7
+		cdfName <- read.celfile.header(filenames[1])$cdfName
8
+	##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
9
+	pkgname <- getCrlmmAnnotationName(cdfName)
10
+	if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
11
+		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
12
+		msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
13
+		message(strwrap(msg))
14
+		stop("Package ", pkgname, " could not be found.")
15
+		rm(suggCall, msg)
16
+	}
17
+	if(verbose) message("Loading annotations and mixture model parameters.")
18
+	loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
19
+	loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
20
+	loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
21
+	autosomeIndex <- getVarInEnv("autosomeIndex")
22
+	pnsa <- getVarInEnv("pnsa")
23
+	pnsb <- getVarInEnv("pnsb")
24
+	fid <- getVarInEnv("fid")
25
+	reference <- getVarInEnv("reference")
26
+	aIndex <- getVarInEnv("aIndex")
27
+	bIndex <- getVarInEnv("bIndex")
28
+	SMEDIAN <- getVarInEnv("SMEDIAN")
29
+	theKnots <- getVarInEnv("theKnots")
30
+	gns <- getVarInEnv("gns")
29 31
   
30
-  ##We will read each cel file, summarize, and run EM one by one
31
-  ##We will save parameters of EM to use later
32
-  mixtureParams <- matrix(0, 4, length(filenames))
33
-  SNR <- vector("numeric", length(filenames))
34
-  SKW <- vector("numeric", length(filenames))
32
+	##We will read each cel file, summarize, and run EM one by one
33
+	##We will save parameters of EM to use later
34
+	mixtureParams <- matrix(0, 4, length(filenames))
35
+	SNR <- vector("numeric", length(filenames))
36
+	SKW <- vector("numeric", length(filenames))
35 37
   
36
-  ## This is the sample for the fitting of splines
37
-  ## BC: I like better the idea of the user passing the seed,
38
-  ##     because this might intefere with other analyses
39
-  ##     (like what happened to GCRMA)
40
-  set.seed(seed)
41
-  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
42
-  ##S will hold (A+B)/2 and M will hold A-B
43
-  ##NOTE: We actually dont need to save S. Only for pics etc...
44
-  ##f is the correction. we save to avoid recomputing
45
-  A <- matrix(as.integer(0), length(pnsa), length(filenames))
46
-  B <- matrix(as.integer(0), length(pnsb), length(filenames))
38
+	## This is the sample for the fitting of splines
39
+	## BC: I like better the idea of the user passing the seed,
40
+	##     because this might intefere with other analyses
41
+	##     (like what happened to GCRMA)
42
+	set.seed(seed)
43
+	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
44
+	##S will hold (A+B)/2 and M will hold A-B
45
+	##NOTE: We actually dont need to save S. Only for pics etc...
46
+	##f is the correction. we save to avoid recomputing
47 47
   
48
-  if(verbose){
49
-    message("Processing ", length(filenames), " files.")
50
-    if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
51
-  }
52
-  ##We start looping throug cel files
53
-  idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
54
-  for(i in seq(along=filenames)){
55
-    y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
56
-    x <- log2(y[idx2])
57
-    SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
58
-    rm(x)
59
-    y <- normalize.quantiles.use.target(y, target=reference)
60
-    A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
61
-    B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
62
-    ##Now to fit the EM
63
-    if(fitMixture){
64
-      S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
65
-      M <- log2(A[idx, i])-log2(B[idx, i])
66
-      
67
-      ##we need to test the choice of eps.. it is not the max diff between funcs
68
-      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
69
-      
70
-      mixtureParams[, i] <- tmp[["coef"]]
71
-      SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
72
-    }
73
-    if (verbose)
74
-      if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
75
-      else cat(".")
76
-  }
77
-  if (verbose)
78
-    if (getRversion() > '2.7.0') close(pb)
79
-    else cat("\n")
80
-  if (!fitMixture) SNR <- mixtureParams <- NA
81
-  ## gns comes from preprocStuff.rda
82
-  list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
48
+	path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
49
+	load(file.path(path, "cnProbes.rda"))
50
+	cnProbes <- get("cnProbes")
51
+	nr <- length(pnsa) + nrow(cnProbes)
52
+	nc <- length(filenames)
53
+	dns <- list(c(gns, rownames(cnProbes)), sns)  
54
+	if(isPackageLoaded("ff")){
55
+		message("ff package is loaded.  Creating ff objects for storing normalized and summarized intensities")
56
+		A <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns,
57
+			overwrite=TRUE)
58
+		B <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns,
59
+			overwrite=TRUE)
60
+	}  else {
61
+		A <- matrix(0L, nr, nc, dimnames=dns)
62
+		B <- matrix(0L, nr, nc, dimnames=dns)
63
+	}
64
+	if(verbose){
65
+		message("Processing ", length(filenames), " files.")
66
+		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
67
+	}
68
+	##We start looping through cel files
69
+	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
70
+	message("Quantile normalizing to HapMap target distribution")
71
+	for(i in seq(along=filenames)){
72
+		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
73
+		x <- log2(y[idx2])
74
+		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
75
+		rm(x)
76
+		y <- normalize.quantiles.use.target(y, target=reference)
77
+		A[1:length(pnsa), i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
78
+		B[1:length(pnsa), i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
79
+		##Now to fit the EM
80
+		if(fitMixture){
81
+			S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
82
+			M <- log2(A[idx, i])-log2(B[idx, i])
83
+			
84
+			##we need to test the choice of eps.. it is not the max diff between funcs
85
+			tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
86
+			
87
+			mixtureParams[, i] <- tmp[["coef"]]
88
+			SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
89
+		}
90
+		if (verbose)
91
+			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
92
+			else cat(".")
93
+	}
94
+	if(isPackageLoaded("ff")) { close(A); close(B)}
95
+	if (verbose)
96
+		if (getRversion() > '2.7.0') close(pb)
97
+		else cat("\n")
98
+	if (!fitMixture) SNR <- mixtureParams <- NA
99
+	save(A, file=AFile)
100
+	save(B, file=BFile)
101
+	return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName))
83 102
 }
84 103
 
85 104
 fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
... ...
@@ -155,6 +155,32 @@ celDates <- function(celfiles){
155 155
 	return(celdts)
156 156
 }
157 157
 
158
+isPackageLoaded <- function(pkg){
159
+	stopifnot(is.character(pkg))
160
+	pkg <- paste("package:", pkg, sep="")
161
+	pkg %in% search()
162
+}
163
+
164
+validCdfNames <- function(){
165
+	c("genomewidesnp6",
166
+	  "genomewidesnp5",
167
+	  "human370v1c",
168
+	  "human370quadv3c",
169
+	  "human550v3b",
170
+	  "human650v3a",
171
+	  "human610quadv1b",
172
+	  "human660quadv1a",
173
+	  "human1mduov3b")
174
+}
158 175
 
176
+isValidCdfName <- function(cdfName){
177
+	chipList <- validCdfNames()
178
+	result <- cdfName %in% chipList	
179
+	if(!(result)){
180
+		warning("cdfName must be one of the following: ",
181
+			chipList)
182
+	}
183
+	return(result)
184
+}
159 185
 
160 186
 
161 187
Binary files a/inst/scripts/copynumber.pdf and b/inst/scripts/copynumber.pdf differ
... ...
@@ -1,13 +1,6 @@
1 1
 \name{AlleleSet-methods}
2 2
 \docType{methods}
3
-\alias{A}
4
-\alias{A,AlleleSet-method}
5
-\alias{A<-}
6
-\alias{A<-,AlleleSet,matrix-method}
7
-\alias{B}
8
-\alias{B,AlleleSet-method}
9
-\alias{B<-}
10
-\alias{B<-,AlleleSet,matrix-method}
3
+
11 4
 \title{Indicator for polymorphic probes}
12 5
 \description{
13 6
   
... ...
@@ -7,7 +7,25 @@
7 7
 	This function returns all the user-modifiable arguments to the crlmm copy number function.
8 8
 }
9 9
 \usage{
10
-    cnOptions(outdir = "./", cdfName, crlmmFile = "snpsetObject.rda", intensityFile = "normalizedIntensities.rda", rgFile = "rgFile.rda", save.it = TRUE, save.cnset = TRUE, load.it = TRUE, splitByChr = TRUE, MIN.OBS = 3, MIN.SAMPLES = 10, batch = NULL, DF.PRIOR = 50, bias.adj = FALSE, prior.prob = rep(1/4, 4), SNRmin = 4, chromosome = 1:24, seed = 123, verbose = TRUE, GT.CONF.THR = 0.99, PHI.THR = 2^6, nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3, THR.NU.PHI = TRUE, thresholdCopynumber = TRUE, unlink = TRUE, ...)
10
+    cnOptions(outdir = "./", cdfName, 
11
+              AFile="A",
12
+	      BFile="B",
13
+	      callsFile="genotypes",
14
+	      rgFile="rgFile.rda",
15
+	      cnFile="cnSet",
16
+	      cnrmaFile="cn_rmaResult.rda",
17
+	      snprmaFile="snp_rmaResult.rda",
18
+	      confsFile="confsFile",
19
+	      save.it = TRUE, 
20
+	      save.cnset = TRUE, 
21
+	      load.it = TRUE, 
22
+	      MIN.OBS = 3, MIN.SAMPLES = 10, batch = NULL, DF.PRIOR = 50, 
23
+	      bias.adj = FALSE, prior.prob = rep(1/4, 4), 
24
+	      SNRmin = 4, chromosome = 1:24, seed = 123, verbose = TRUE, 
25
+	      GT.CONF.THR = 0.99, PHI.THR = 2^6, nHOM.THR = 5, 
26
+	      MIN.NU = 2^3, MIN.PHI = 2^3, 
27
+	      THR.NU.PHI = TRUE, thresholdCopynumber = TRUE, unlink = TRUE, 
28
+xo	      use.poe=FALSE, crlmmBatchSize=1100, ...)
11 29
 }
12 30
 \arguments{
13 31
   \item{outdir}{
... ...
@@ -16,53 +34,71 @@
16 34
   \item{cdfName}{
17 35
         Character string indicating array-type.  See crlmm:::validCdfNames().
18 36
 }
19
-  \item{crlmmFile}{
20 37
 
21
-        When \code{save.it} is \code{TRUE}, the output from the crlmm
22
-        genotyping will be saved to \code{<outdir>/<crlmmFile>}.  This
23
-        object should not be directly loaded by the user.  When
24
-        \code{load.it} is \code{TRUE}, the function
25
-        \code{crlmmCopynumber} will load this object from
26
-        \code{outdir} without rerunning the quantile normalization and
27
-        genotyping steps. 
38
+  \item{AFile}{
39
+
40
+  File name for quantile normalized intensities for the A allele is <outdir>/AFile<.rda>
28 41
 
29 42
 }
30 43
 
31
-  \item{intensityFile}{
44
+  \item{BFile}{
45
+
46
+  File name for quantile normalized intensities for the B allele is <outdir>/BFile<.rda>
47
+
48
+}
49
+
50
+  \item{callsFile}{
51
+
52
+  File name for storing biallelic genotypes from the crlmm algorithm is <outdir>/genotypes<.rda>
32 53
 
33
-        When \code{save.it} is \code{TRUE}, the allele summaries of
34
-        the quantile normalized intensities at polymorphic loci will
35
-        be saved to \code{<outdir>/<intensityFile>}.  This object is
36
-        not intended to be loaded directly by the user.  When
37
-        \code{load.it} is \code{TRUE}, the function
38
-        \code{crlmmCopynumber} will load this object from
39
-        \code{outdir} without rerunning the quantile normalization and
40
-        genotyping steps.
41
-       
42 54
 }
43 55
 
44 56
   \item{rgFile}{
57
+
58
+  Applicable for the Illumina platform only.
59
+
60
+  File name for storing the normalized R and G summaries is <outdir>/rgFile<.rda>
61
+
62
+}
63
+
64
+  \item{cnFile}{
65
+
66
+  Filename for CNSet object is <outdir>/cnSet_<batch>_<CHR><.rda>
67
+       
68
+}
69
+
70
+\item{cnrmaFile}{
71
+
72
+  Filename for summary statistics from the quantile normalization of
73
+  the nonpolymorphic probes is <outdir>/cnrmaFile<.rda>.  Currently,
74
+  this file contains only the skew and is not used elswhere. This file
75
+  is not meant to be loaded by the user.
76
+
77
+}
78
+
79
+\item{snprmaFile}{
80
+
81
+  Filename for summary statistics from the quantile normalization of
82
+  the A and B alleles is <outdir>/snprmaFile<.rda>.  Currently, this
83
+  file contains skew, signal to noise ratio, mixture model parameter,
84
+  the platform ids for the SNPs, and the sample names.  This file is
85
+  not meant to be loaded by the user.
86
+
87
+}
88
+
89
+\item{confsFile}{
90
+
91
+	The crlmm confidence scores for the biallelic genotype calls
92
+	are stored in <outdir>/consFile<.rda>.  
45 93
 	
46
-	For Affymetrix platforms, \code{rgFile} is ignored.  When
47
-        \code{save.it} is \code{TRUE}, the R and G summaries of the
48
-        quantile normalized intensities for illumina platforms is
49
-        saved to \code{<outdir>/<rgFile>}.  This object is not
50
-        intended to be loaded directly by the user.  When
51
-        \code{load.it} is \code{TRUE}, the function
52
-        \code{crlmmCopynumber} will load this object from
53
-        \code{outdir} without rerunning the quantile normalization and
54
-        genotyping steps.
94
+	This file is not meant to be loaded by the user.
55 95
 
56 96
 }
57 97
 
98
+
58 99
   \item{save.it}{
59 100
   
60
-  When \code{TRUE}, intermediate files containing quantile-normalized
61
-  intensities and genotyping results are saved to \code{outdir}.
62
-  Saving these objects can save time if the copy number estimation is
63
-  repeated.  Intermediate files for the Affymetrix platform are
64
-  \code{intensityFile} and \code{crlmmFile}; for the Illumina
65
-  platform, \code{rgfile} is an intermediate file.
101
+  Whether to save intermediate files.  Not currently implemented.
66 102
   
67 103
 }
68 104
 
... ...
@@ -82,13 +118,6 @@
82 118
   If \code{TRUE}, intermediate files are loaded from \code{outdir}.
83 119
   See \code{rgFile}, \code{intensityFile}, \code{crlmmFile}.
84 120
 
85
-}
86
-
87
-  \item{splitByChr}{
88
-  
89
-  If \code{TRUE}, results are saved for each chromosome.  In general,
90
-  this should always be \code{TRUE}.
91
-
92 121
 }
93 122
 
94 123
   \item{MIN.OBS}{
... ...
@@ -247,6 +276,24 @@
247 276
 
248 277
 }
249 278
 
279
+\item{use.poe}{
280
+	
281
+	Under development.  Whether to use the EM implementation of
282
+	POE (unsupervised) in the R package metaArray to estimate copy
283
+	number at nonpolymorphic loci.  A supervised POE could be used
284
+	in, for instance, a case-control study.  Otherwise, we assume
285
+	the marginal mode is copy number 2 (normal).
286
+
287
+}
288
+
289
+\item{crlmmBatchSize}{
290
+
291
+	'integer'.  The maximum number of samples that can be process
292
+	in a single batch.  When using the ff package, the maximum
293
+	number of elements in a matrix is 2^31.  For Affy 6.0, the
294
+	limit is therefore approximately 1100 samples (1.8e6 * 1100 < 2^31).
295
+}
296
+
250 297
   \item{\dots}{
251 298
 
252 299
   Additional arguments are passed to \code{readIdatFiles} (Illumina
253 300
new file mode 100644
... ...
@@ -0,0 +1,20 @@
1
+\name{copyNumber-methods}
2
+\docType{methods}
3
+\alias{copyNumber-methods}
4
+\alias{copyNumber,CNSet-method}
5
+\title{Computes the total copy number}
6
+\description{
7
+
8
+	This function calculates the total copy number by adding the
9
+	copy number for the two alleles at polymorphic loci, and
10
+	returning the copy number for one allele at nonpolymorphic
11
+	loci.
12
+
13
+}
14
+\section{Methods}{
15
+\describe{
16
+\item{\code{signature(object = "CNSet")}}{Calculates CA + CB, where CA is the copy number for allele A and CB is the copy number for allele B}
17
+}}
18
+\seealso{\code{\linkS4class{oligoClasses}{CNSet}}}
19
+\keyword{methods}
20
+
... ...
@@ -10,8 +10,9 @@
10 10
 \usage{
11 11
 crlmm(filenames, row.names=TRUE, col.names=TRUE,
12 12
       probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5,
13
-      gender=NULL, save.it=FALSE, load.it=FALSE,
14
-      intensityFile, mixtureSampleSize=10^5,
13
+      gender=NULL, save.it=FALSE, load.it=FALSE, intensityFile, callsFile,
14
+      confsFile, AFile, BFile,
15
+      mixtureSampleSize=10^5,
15 16
       eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10,
16 17
       recallRegMin=1000, returnParams=FALSE, badSNP=0.7)
17 18
 }
... ...
@@ -29,7 +30,11 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE,
29 30
   \item{save.it}{'logical'. Save preprocessed data?}
30 31
   \item{load.it}{'logical'. Load preprocessed data to speed up analysis?}
31 32
   \item{intensityFile}{'character' with filename to be saved/loaded -
32
-    preprocessed data.}
33
+    results from snprma function.}
34
+  \item{callsFile}{'character' string -- filename to be saved/loaded for biallelic genotype calls.}
35
+  \item{confsFile}{'character' string -- filename to be saved/loaded for confidence scores of the biallelic genotype calls.}
36
+  \item{AFile}{'character' string -- filename to be saved/loaded for quantile normalized intensities of the A allele}
37
+  \item{BFile}{'character' string -- filename to be saved/loaded for quantile normalized intensities of the B allele}
33 38
   \item{mixtureSampleSize}{Number of SNP's to be used with the mixture model.}
34 39
   \item{eps}{Minimum change for mixture model.}
35 40
   \item{verbose}{'logical'.}
... ...
@@ -42,13 +47,17 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE,
42 47
   \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
43 48
 }
44 49
 \value{
45
-  A \code{SnpSet} object.
46
-  \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)}
47
-  \item{confs}{Confidence scores 'round(-1000*log2(1-p))'}
48
-  \item{SNPQC}{SNP Quality Scores}
49
-  \item{batchQC}{Batch Quality Score}
50
-  \item{params}{Recalibrated parameters}
50
+	
51
+	Nothing is returned.  The following files are created as a result of this function:
52
+
53
+	- AFile
54
+	- BFile
55
+	- callsFile
56
+	- confsFile
57
+	- intensityFile
58
+	
51 59
 }
60
+
52 61
 \references{
53 62
   Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration,
54 63
   normalization, and genotype calls of high-density oligonucleotide SNP
... ...
@@ -67,8 +76,9 @@ if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){
67 76
   ## the filenames with full path...
68 77
   ## very useful when genotyping samples not in the working directory
69 78
   cels <- list.celfiles(path, full.names=TRUE)
70
-  (crlmmOutput <- crlmm(cels))
79
+  (crlmmOutput <- crlmm(cels, cdfName="genomewidesnp5"))
71 80
 }
72 81
 }
82
+\seealso{\code{\link{cnOptions}}}
73 83
 \keyword{classif}
74 84
 
75 85
new file mode 100644
... ...
@@ -0,0 +1,23 @@
1
+\name{ellipse-methods}
2
+\docType{methods}
3
+\alias{ellipse-methods}