Browse code

more ff support

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44783 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 22/02/2010 03:25:51
Showing 9 changed files

... ...
@@ -400,4 +400,8 @@ is stored in the assayData slot.
400 400
       - B.rda
401 401
       - cn_rmaResult.rda (SKW)
402 402
 
403
+2010-02-21 R. Scharpf - committed version 1.5.26
404
+ 
405
+** additional support for ff in crlmm and computeCopynumber functions
406
+
403 407
 
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.25
5
-Date: 2010-02-20
4
+Version: 1.5.26
5
+Date: 2010-02-21
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -11,7 +11,8 @@ Depends: R (>= 2.11.0), methods, Biobase (>= 2.7.2), oligoClasses (>= 1.9.28)
11 11
 Imports: affyio (>= 1.15.2), preprocessCore, utils, stats, genefilter, splines, mvtnorm, ellipse, SNPchip
12 12
 Enhances: ff
13 13
 Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, metaArray
14
-Collate: AllGenerics.R
14
+Collate: AllClasses.R
15
+	 AllGenerics.R
15 16
 	 methods-CNSet.R
16 17
 	 methods-eSet.R
17 18
          methods-SnpSuperSet.R
... ...
@@ -242,45 +242,47 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
242 242
 
243 243
 
244 244
 
245
-crlmmCopynumber <- function(filenames, cnOptions, object, ...){
246
-	cnOpts <- cnOptions
247
-	batchSize <- cnOptions[["crlmmBatchSize"]]
248
-	batch <- cnOptions[["batch"]]
249
-	outdir <- cnOptions[["outdir"]]
250
-	##if necessary, split plates in separate batches
251
-	if(length(filenames) > batchSize){
252
-		L <- length(table(batch))
253
-		message("Processing the files in")
254
-		batchList <- split(names(table(batch)), rep(1:L, each=10, length.out=L))
255
-		for(i in seq(along=batchList)){
256
-			index <- as.character(batch) %in% as.character(batchList[[i]])
257
-			fns <- filenames[index]
258
-			cnOpts[["batch"]] <- cnOptions[["batch"]][index]
259
-			cnOpts[["AFile"]] <- paste(outdir, "/", paste(cnOptions[["AFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
260
-			cnOpts[["BFile"]] <- paste(outdir, "/", paste(cnOptions[["BFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
261
-			cnOpts[["callsFile"]] <- paste(outdir, "/", paste(cnOptions[["callsFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
262
-			cnOpts[["confsFile"]] <- paste(outdir, "/", paste(cnOptions[["confsFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
263
-			cnOpts[["snprmaFile"]] <- paste(outdir, "/", paste(cnOptions[["snprmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
264
-			cnOpts[["cnrmaFile"]] <- paste(outdir, "/", paste(cnOptions[["cnrmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
265
-			cnOpts[["cnFile"]] <- file.path(outdir, paste(cnOptions[["cnFile"]], i, sep="_crlmmBatch"))
266
-			cnOpts[["rgFile"]] <- file.path(outdir, paste(cnOptions[["rgFile"]], i, sep="_crlmmBatch"))
267
-			crlmmCopynumber.crlmmBatch(fns, cnOpts, object, ...)
268
-		}
269
-	} else {
270
-		cnOpts[["AFile"]] <- paste(outdir, "/", cnOptions[["AFile"]], ".rda", sep="")
271
-		cnOpts[["BFile"]] <- paste(outdir, "/", cnOptions[["BFile"]], ".rda", sep="")
272
-		cnOpts[["callsFile"]] <- paste(outdir, "/", cnOptions[["callsFile"]], ".rda", sep="")
273
-		cnOpts[["confsFile"]] <- paste(outdir, "/", cnOptions[["confsFile"]], ".rda", sep="")
274
-		cnOpts[["snprmaFile"]] <- paste(outdir, "/",cnOptions[["snprmaFile"]], ".rda", sep="")
275
-		cnOpts[["cnrmaFile"]] <- paste(outdir, "/", cnOptions[["cnrmaFile"]], ".rda", sep="")
276
-		cnOpts[["cnFile"]] <- file.path(outdir, cnOptions[["cnFile"]])
277
-		cnOpts[["rgFile"]] <- file.path(outdir, cnOptions[["rgFile"]])
278
-		crlmmCopynumber.crlmmBatch(filenames, cnOpts, object, ...)
279
-	}
280
-}
245
+##crlmmCopynumber <- function(filenames, cnOptions, object, ...){
246
+##	cnOpts <- cnOptions
247
+##	batchSize <- cnOptions[["crlmmBatchSize"]]
248
+##	batch <- cnOptions[["batch"]]
249
+##	outdir <- cnOptions[["outdir"]]
250
+##	##if necessary, split plates in separate batches
251
+##	if(length(filenames) > batchSize){
252
+##		L <- length(table(batch))
253
+##		message("Processing the files in")
254
+##		batchList <- split(names(table(batch)), rep(1:L, each=10, length.out=L))
255
+##		for(i in seq(along=batchList)){
256
+##			index <- as.character(batch) %in% as.character(batchList[[i]])
257
+##			fns <- filenames[index]
258
+##			cnOpts[["batch"]] <- cnOptions[["batch"]][index]
259
+##			cnOpts[["AFile"]] <- paste(outdir, "/", paste(cnOptions[["AFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
260
+##			cnOpts[["BFile"]] <- paste(outdir, "/", paste(cnOptions[["BFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
261
+##			cnOpts[["CAFile"]] <- paste(outdir, "/", paste(cnOptions[["CAFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
262
+##g			cnOpts[["CBFile"]] <- paste(outdir, "/", paste(cnOptions[["CBFile"]], i, sep="_crlmmBatch"), ".rda", sep="")			
263
+##			cnOpts[["callsFile"]] <- paste(outdir, "/", paste(cnOptions[["callsFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
264
+##			cnOpts[["confsFile"]] <- paste(outdir, "/", paste(cnOptions[["confsFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
265
+##			cnOpts[["snprmaFile"]] <- paste(outdir, "/", paste(cnOptions[["snprmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
266
+##			cnOpts[["cnrmaFile"]] <- paste(outdir, "/", paste(cnOptions[["cnrmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
267
+##			cnOpts[["cnFile"]] <- file.path(outdir, paste(cnOptions[["cnFile"]], i, sep="_crlmmBatch"))
268
+##			cnOpts[["rgFile"]] <- file.path(outdir, paste(cnOptions[["rgFile"]], i, sep="_crlmmBatch"))
269
+##			crlmmCopynumber.crlmmBatch(fns, cnOpts, object, ...)
270
+##		}
271
+##	} else {
272
+##		cnOpts[["AFile"]] <- paste(outdir, "/", cnOptions[["AFile"]], ".rda", sep="")
273
+##		cnOpts[["BFile"]] <- paste(outdir, "/", cnOptions[["BFile"]], ".rda", sep="")
274
+##		cnOpts[["callsFile"]] <- paste(outdir, "/", cnOptions[["callsFile"]], ".rda", sep="")
275
+##		cnOpts[["confsFile"]] <- paste(outdir, "/", cnOptions[["confsFile"]], ".rda", sep="")
276
+##		cnOpts[["snprmaFile"]] <- paste(outdir, "/",cnOptions[["snprmaFile"]], ".rda", sep="")
277
+##		cnOpts[["cnrmaFile"]] <- paste(outdir, "/", cnOptions[["cnrmaFile"]], ".rda", sep="")
278
+##		cnOpts[["cnFile"]] <- file.path(outdir, cnOptions[["cnFile"]])
279
+##		cnOpts[["rgFile"]] <- file.path(outdir, cnOptions[["rgFile"]])
280
+##		crlmmCopynumber.crlmmBatch(filenames, cnOpts, object, ...)
281
+##	}
282
+##}
281 283
 
282 284
 
283
-crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){
285
+crlmmCopynumber <- function(filenames, cnOptions, object, ...){
284 286
 	set.seed(cnOptions[["seed"]])  ##for reproducibility
285 287
 	if(!missing(object)){
286 288
 		stopifnot(class(object) == "CNSet")
... ...
@@ -292,24 +294,30 @@ crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){
292 294
 		##crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
293 295
 		crlmmWrapper(filenames, cnOptions, ...)
294 296
 	}
297
+	verbose <- cnOptions[["verbose"]]
295 298
 	load.it <- cnOptions[["load.it"]]
296 299
 	outdir <- cnOptions[["outdir"]]
297 300
 	snprmaFile <- cnOptions[["snprmaFile"]]
298 301
 	cnFile <- cnOptions[["cnFile"]]
299 302
 	cnrmaFile <- cnOptions[["cnrmaFile"]]
303
+	snprmaFile <- cnOptions[["snprmaFile"]]
300 304
 	callsFile <- cnOptions[["callsFile"]]
301 305
 	confsFile <- cnOptions[["confsFile"]]
302 306
 	AFile <- cnOptions[["AFile"]]
303 307
 	BFile <- cnOptions[["BFile"]]
308
+	CAFile <- cnOptions[["CAFile"]]
309
+	CBFile <- cnOptions[["CBFile"]]
304 310
 	path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep=""))
305 311
 	load(file.path(path, "snpProbes.rda"))
306 312
 	snpProbes <- get("snpProbes")
307 313
 	load(file.path(path, "cnProbes.rda"))
308 314
 	cnProbes <- get("cnProbes")	
309 315
 	k <- grep("chr", colnames(snpProbes))
310
-	message("Attaching quantile-normalized intensities...")
316
+	if(verbose) message("Loading quantile-normalized intensities...")
311 317
 	##cwd <- getwd()
312 318
 	##setwd(dirname(snprmaFile[1]))
319
+	load(snprmaFile)
320
+	res <- get("res")
313 321
 	load(AFile)
314 322
 	if(isPackageLoaded("ff")) open(A)
315 323
 	load(BFile)
... ...
@@ -321,28 +329,27 @@ crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){
321 329
 	load(confsFile)
322 330
 	confs <- get("confs")
323 331
 	if(isPackageLoaded("ff")) open(confs)	
324
-	fns <- rownames(A)
332
+	##fns <- rownames(A)
333
+	fns <- c(res[["gns"]], res[["cnnames"]])
334
+	if(length(fns) != nrow(A)) stop("check featurenames. fns should be the rownames of A")
325 335
 	nr <- nrow(A); nc <- ncol(A)
326 336
 	if(isPackageLoaded("ff")){
327
-		##initialize CA, CB
328
-		if(!file.exists(file.path(outdir, "CA.rda"))){
329
-			CA <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A))
330
-
331
-		} else{
332
-			load(file.path(outdir, "CA.rda"))
333
-			if(!all(dim(CA) == dim(A)))
334
-				CA <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A), overwrite=TRUE)				
335
-			open(CA)
336
-		}
337
-		if(!file.exists(file.path(outdir, "CB.rda"))){		
338
-			CB <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A))
339
-		} else{
340
-			load(file.path(outdir, "CB.rda"))
341
-			if(!all(dim(CB) == dim(A)))
342
-				CB <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dimnames(A), overwrite=TRUE)							
343
-			open(CB)
344
-		}
345
-	}
337
+		if(file.exists(CAFile)){
338
+			load(CAFile)
339
+			if(is.null(dim(CA)) | !all(dim(CA) == dim(A))) {
340
+				unlink(CAFile)
341
+				CA <- initializeBigMatrix(nr, nc)
342
+			}
343
+		} else CA <- initializeBigMatrix(nr, nc)
344
+		if(file.exists(CBFile)){
345
+			load(CBFile)
346
+			if(is.null(dim(CB)) | !all(dim(CB) == dim(A))) {			     
347
+				CB <- initializeBigMatrix(nr, nc)
348
+			}
349
+		} else CB <- initializeBigMatrix(nr, nc)
350
+		open(CA)
351
+		open(CB)
352
+	} 
346 353
 	##cnFile <- cnOptions[["cnFile"]]
347 354
 	chromosome <- cnOptions[["chromosome"]]
348 355
 	SNRmin <- cnOptions[["SNRmin"]]
... ...
@@ -353,29 +360,47 @@ crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){
353 360
 			    data=scanDates,
354 361
 			    varMetadata=data.frame(labelDescription=colnames(scanDates),
355 362
 			    row.names=colnames(scanDates)))
356
-	load(file.path(cnOptions[["outdir"]], "sampleStats.rda"))
357
-	sampleStats <- data.frame(sampleStats, batch=cnOptions[["batch"]])
363
+	##load(file.path(cnOptions[["outdir"]], "sampleStats.rda"))
364
+	sampleStats <- data.frame(SKW=res$SKW,
365
+				  SNR=res$SNR,
366
+				  mixtureParams=res$mixtureParams,
367
+				  gender=res$gender,
368
+				  batch=cnOptions[["batch"]])
369
+	##sampleStats, batch=cnOptions[["batch"]])
358 370
 	pD <- new("AnnotatedDataFrame",
359 371
 		  data=sampleStats,
360 372
 		  varMetadata=data.frame(labelDescription=colnames(sampleStats)))
361 373
 	batch <- cnOptions[["batch"]]
374
+	if(isPackageLoaded("ff")){
375
+##		featureDataFF <- ffdf(position=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"),
376
+##				      chromosome=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"),
377
+##				      isSnp=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"),
378
+##				      row.names=fns)
379
+##		fD <- vector("list", length(chromosome))
380
+		featureDataFF <- ff(dim=c(nrow(A), 17*length(unique(batch))+3), vmode="double", finalizer="close",
381
+				    overwrite=TRUE)
382
+	}
362 383
 	for(CHR in chromosome){
363 384
 		snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
364 385
 		cns <- rownames(cnProbes)[cnProbes[, k] == CHR]		
365
-		index.snps <- match(snps, rownames(A))
366
-		index.cn <- match(cns, rownames(A))
386
+		index.snps <- match(snps, fns)
387
+		index.cn <- match(cns, fns)
367 388
 		row.index <- c(index.snps, index.cn)
368 389
 		cat("Chromosome ", CHR, "\n")
390
+		if(isPackageLoaded("ff")){		
391
+			fD.batch <- vector("list", length(unique(batch)))
392
+		}
369 393
 		for(i in seq(along=unique(batch))){
370 394
 			PLATE <- unique(batch)[i]
395
+			message("Plate: ", PLATE)
371 396
 			sample.index <- which(batch==PLATE)
372 397
 			##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
373 398
 			if(isPackageLoaded("ff")){
374 399
 				ca <- CA[row.index, sample.index]
375 400
 				cb <- CB[row.index, sample.index]
376 401
 			} else{
377
-				dns <- dimnames(A[row.index, sample.index])
378
-				cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index), dimnames=dns)
402
+				##dns <- dimnames(A[row.index, sample.index])
403
+				cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index))##, dimnames=dns)
379 404
 			}
380 405
 			cnSet <- new("CNSet",
381 406
 				     alleleA=A[row.index, sample.index],
... ...
@@ -386,52 +411,64 @@ crlmmCopynumber.crlmmBatch <- function(filenames, cnOptions, object, ...){
386 411
 				     CB=cb,
387 412
 				     featureData=annotatedDataFrameFrom(A[row.index, sample.index], byrow=TRUE),
388 413
 				     phenoData=pD[sample.index, ],
389
-				     protocolData=protocoldata[sample.index, ],
390
-				     annotation=cnOptions[["cdfName"]])			
391
-			if(any(cnSet$SNR > SNRmin)){
392
-				message(paste("Excluding samples with SNR < ", SNRmin))
393
-				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
394
-			}
395
-			##rm(snpI, NP, gtSet); gc()
414
+				     protocolData=protocoldata[sample.index, ])
415
+			##Verify this is correct
416
+			annotation(cnSet) <- cnOptions[["cdfName"]]
417
+			featureNames(cnSet) <- fns[row.index]
418
+			##add chromosome, position, isSnp
419
+			cnSet <- annotate(cnSet)
420
+##			if(any(cnSet$SNR > SNRmin)){
421
+##				if(CHR == chromosome[1]) message(paste("Excluding samples with SNR < ", SNRmin))
422
+##				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
423
+##			}
396 424
 			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
397
-		} 
398
-		if(CHR != 24){
399
-			## 60G for 1239 files
400
-			cnSet <- computeCopynumber(cnSet, cnOptions)
401
-			if(isPackageLoaded("ff")){
402
-				CA[row.index, sample.index] <- cnSet@assayData[["CA"]]
403
-				CB[row.index, sample.index] <- cnSet@assayData[["CB"]]
404
-			}
405
-		} else{
406
-			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
407
-			alleleSet <- cnSet
408
-			rm(cnSet) ; gc()
409
-			save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", PLATE, "_", CHR, ".rda", sep="")))
410
-			rm(alleleSet); gc()
411
-			next()
412
-		}
413
-		if(length(chromosome) == 1){
414
-			if(cnOptions[["save.cnset"]]){
415
-				save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))				
416
-			}
417
-		}
418
-		if(length(chromosome) > 1){
419
-			save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
420
-			##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
421
-			rm(cnSet); gc()
422
-		} else {
423
-			if(isPackageLoaded("ff")) {
424
-				close(CA); close(CB)
425
-				save(CA, file=file.path(cnOptions[["outdir"]], "CA.rda"))
426
-				save(CB, file=file.path(cnOptions[["outdir"]], "CB.rda"))		
427
-			}			
428
-			return(cnSet)
429
-		}
430
-	}
425
+			if(CHR < 24){
426
+				cnSet <- computeCopynumber(cnSet, cnOptions)
427
+				if(isPackageLoaded("ff")){
428
+					if(i == 1) {  ## first batch
429
+						featureDataFF[row.index, 1:20] <- as.matrix(fData(cnSet))
430
+						fcol <- 20
431
+					} else {
432
+						featureDataFF[row.index, (fcol+1):(fcol+17)]
433
+						fcol <- fcol+17
434
+						##remove redundant columns
435
+					}
436
+					CA[row.index, sample.index] <- cnSet@assayData[["CA"]]
437
+					CB[row.index, sample.index] <- cnSet@assayData[["CB"]]
438
+					next() ## next batch
439
+				} else {
440
+					save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
441
+				}
442
+			} else break()  ## break out of the batch loop.  Go to next chromosome
443
+			##		if(length(chromosome) == 1 & length(unique(batch)) == 1){
444
+			##			if(cnOptions[["save.cnset"]] & !isLoadedPackage("ff"))
445
+			##				save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
446
+			##			return(cnSet)
447
+			##		} else {
448
+			##			##either multiple chromosome or multiple batches...save to file if ff is not loaded
449
+			##			if(!isPackageLoaded("ff")){
450
+			##				save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
451
+			##			}
452
+			##			rm(cnSet); gc()
453
+			##	}
454
+		} ## end of batch loop
455
+	} ## end of chromosome loop
431 456
 	if(isPackageLoaded("ff")) {
457
+		close(A); close(B)
458
+		close(featureDataFF)
432 459
 		close(CA); close(CB)
433
-		save(CA, file=file.path(cnOptions[["outdir"]], "CA.rda"))
434
-		save(CB, file=file.path(cnOptions[["outdir"]], "CB.rda"))		
460
+		save(featureDataFF, file=file.path(outdir, "featureDataFF.rda"))
461
+		save(CA, file=CAFile)
462
+		save(CB, file=CBFile)
463
+		close(calls); close(confs)
464
+		ffSet <- new("FFSet",
465
+			     A=A,
466
+			     B=B,
467
+			     snpCall=calls,
468
+			     callProbability=confs,
469
+			     CA=CA,
470
+			     CB=CB)
471
+		return(ffSet)
435 472
 	}
436 473
 }
437 474
 
... ...
@@ -524,6 +561,7 @@ loadIlluminaCnrma <- function(){
524 561
 }
525 562
 
526 563
 crlmmWrapper <- function(filenames, cnOptions, ...){
564
+	crlmmBatchSize <- cnOptions[["crlmmBatchSize"]]
527 565
 	cdfName <- cnOptions[["cdfName"]]
528 566
 	load.it <- cnOptions[["load.it"]]
529 567
 	save.it <- cnOptions[["save.it"]]
... ...
@@ -548,25 +586,26 @@ crlmmWrapper <- function(filenames, cnOptions, ...){
548 586
 	}
549 587
 	if(platform == "illumina") RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it)
550 588
 	if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.")
551
-	if(!(file.exists(dirname(snprmaFile[1])))) stop(dirname(snprmaFile[1]), " does not exist.")
552
-	if(load.it){
553
-		if(!file.exists(callsFile)){
554
-			message("load.it is TRUE, but ", callsFile, " does not exist.  Rerunning the genotype calling algorithm") 
555
-			##load.it <- FALSE
556
-		}
557
-	}
589
+	if(!(file.exists(dirname(snprmaFile)))) stop(dirname(snprmaFile), " does not exist.")
590
+##	if(load.it){
591
+##		if(!file.exists(callsFile)){
592
+##			message("load.it is TRUE, but ", callsFile, " does not exist.  Rerunning the genotype calling algorithm") 
593
+##			##load.it <- FALSE
594
+##		}
595
+##	}
558 596
 	if(platform == "affymetrix"){
559
-		if(!file.exists(callsFile) | !load.it){
597
+##		if(!file.exists(callsFile) | !load.it){
560 598
 			crlmm(filenames=filenames,
561 599
 			      cdfName=cdfName,
562 600
 			      save.it=TRUE,
563 601
 			      load.it=load.it,
564
-			      intensityFile=snprmaFile,
602
+			      snprmaFile=snprmaFile,
565 603
 			      callsFile=callsFile,
566 604
 			      confsFile=confsFile,
567 605
 			      AFile=AFile,
568
-			      BFile=BFile)
569
-		}
606
+			      BFile=BFile,
607
+			      crlmmBatchSize=crlmmBatchSize)
608
+##		}
570 609
 	}
571 610
 	gc()
572 611
 	if(platform == "illumina") {
... ...
@@ -756,18 +795,20 @@ thresholdCopynumber <- function(object){
756 795
 
757 796
 cnOptions <- function(outdir="./",
758 797
 		      cdfName,
759
-		      AFile="A",
760
-		      BFile="B",
761
-		      callsFile="genotypes",
762
-		      rgFile="rgFile",
798
+		      AFile="A.rda",
799
+		      BFile="B.rda",
800
+		      CAFile="CA.rda",
801
+		      CBFile="CB.rda",
802
+		      callsFile="genotypes.rda",
803
+		      rgFile="rgFile.rda",
763 804
 		      cnFile="cnSet",
764
-		      cnrmaFile="cn_rmaResult",
805
+		      cnrmaFile="cn_rmaResult.rda",
765 806
 		      ##npBinary="NP.bin",  ##name of file.backed.big.matrix
766 807
 		      ##npDesc="NP.desc",
767
-		      snprmaFile="snp_rmaResult",
808
+		      snprmaFile="snp_rmaResult.rda",
768 809
 		      ##callsFile="calls.desc",
769 810
 		      ##confsFile="confsFile.desc",
770
-		      confsFile="confsFile",
811
+		      confsFile="confs.rda",
771 812
 		      save.it=TRUE,
772 813
 		      save.cnset=TRUE,
773 814
 		      load.it=TRUE,
... ...
@@ -790,15 +831,16 @@ cnOptions <- function(outdir="./",
790 831
 		      thresholdCopynumber=TRUE,
791 832
 		      unlink=TRUE,
792 833
 		      use.poe=FALSE,
793
-		      crlmmBatchSize=1100, ## this is about right for Affy 6.0 platform 1.8e6 * 1100 < 2^31
834
+		      crlmmBatchSize=1000,
794 835
 		      ...){
795
-	if(isPackageLoaded("ff")){
796
-		AFile=paste(AFile, "ff", sep="_")
797
-		BFile=paste(BFile, "ff", sep="_")
798
-		confsFile=paste(confsFile, "ff", sep="_")
799
-		callsFile=paste(callsFile, "ff", sep="_")
800
-		cnFile=paste(cnFile, "ff", sep="")
801
-	}
836
+##	if(isPackageLoaded("ff")){
837
+##		AFile=paste(AFile, "ff", sep="_")
838
+##		BFile=paste(BFile, "ff", sep="_")
839
+##		confsFile=paste(confsFile, "ff", sep="_")
840
+##		callsFile=paste(callsFile, "ff", sep="_")
841
+##		cnFile=paste(cnFile, "ff", sep="")
842
+##	}
843
+	if(length(batch) > 200) warning("This job may require a lot of RAM. Consider using the ff package in conjuction with crlmm, as described in the copynumber_ff vignette...")
802 844
 	if(use.poe) require(metaArray)
803 845
 	if(missing(cdfName)) stop("must specify cdfName")
804 846
 	if(!file.exists(outdir)){
... ...
@@ -816,14 +858,16 @@ cnOptions <- function(outdir="./",
816 858
 		stop("must specify batch -- should be the same length as the number of files")
817 859
 	list(outdir=outdir,
818 860
 	     cdfName=cdfName,
819
-	     callsFile=callsFile,
820
-	     confsFile=confsFile,
821
-	     AFile=AFile,
822
-	     BFile=BFile,
823
-	     snprmaFile=snprmaFile,
824
-	     cnrmaFile=cnrmaFile,
825
-	     rgFile=rgFile,
826
-	     cnFile=cnFile,
861
+	     callsFile=file.path(outdir, callsFile),
862
+	     confsFile=file.path(outdir, confsFile),
863
+	     AFile=file.path(outdir, AFile),
864
+	     BFile=file.path(outdir, BFile),
865
+	     CAFile=file.path(outdir, CAFile),
866
+	     CBFile=file.path(outdir, CBFile),
867
+	     snprmaFile=file.path(outdir, snprmaFile),
868
+	     cnrmaFile=file.path(outdir, cnrmaFile),
869
+	     rgFile=file.path(outdir, rgFile),
870
+	     cnFile=file.path(outdir, cnFile),
827 871
 	     save.it=save.it,
828 872
 	     save.cnset=save.cnset,
829 873
 	     load.it=load.it,
... ...
@@ -888,8 +932,11 @@ lm.parameters <- function(object, cnOptions){
888 932
 }
889 933
 
890 934
 nonpolymorphic <- function(object, cnOptions, tmp.objects){
935
+	chromosome <- cnOptions[["chromosome"]]
891 936
 	batch <- unique(object$batch)
892 937
 	CHR <- unique(chromosome(object))
938
+	verbose <- cnOptions[["verbose"]]
939
+	if(CHR != chromosome[1]) verbose <- FALSE
893 940
 	goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){
894 941
 		Ns <- tmp.objects[["Ns"]]
895 942
 		##Ns <- get("Ns", envir)
... ...
@@ -1015,10 +1062,9 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
1015 1062
 		
1016 1063
 		THR.NU.PHI <- cnOptions$THR.NU.PHI
1017 1064
 		if(THR.NU.PHI){
1018
-			verbose <- cnOptions$verbose
1019 1065
 			##Assign values to object
1020 1066
 			object <- pr(object, "nuA", batch, nuA)
1021
-			object <- pr(object, "phiA", batch, phiA)			
1067
+			object <- pr(object, "phiA", batch, phiA)
1022 1068
 			if(verbose) message("Thresholding nu and phi")
1023 1069
 			object <- thresholdModelParams(object, cnOptions)
1024 1070
 		} else {
... ...
@@ -1036,7 +1082,6 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
1036 1082
 
1037 1083
 		THR.NU.PHI <- cnOptions$THR.NU.PHI
1038 1084
 		if(THR.NU.PHI){
1039
-			verbose <- cnOptions$verbose
1040 1085
 			##Assign values to object
1041 1086
 			object <- pr(object, "nuA", batch, nuA)
1042 1087
 			object <- pr(object, "phiA", batch, phiA)			
... ...
@@ -1738,6 +1783,7 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1738 1783
 	if(length(batch) != ncol(object)) stop("Batch must be the same length as the number of samples")
1739 1784
 	MIN.SAMPLES <- cnOptions$MIN.SAMPLES
1740 1785
 	verbose <- cnOptions$verbose
1786
+	if(CHR != cnOptions[["chromosome"]][1]) verbose <- FALSE
1741 1787
 	use.poe <- cnOptions[["use.poe"]]
1742 1788
 	for(i in seq(along=unique(batch))){
1743 1789
 		PLATE <- unique(batch)[i]
... ...
@@ -1754,11 +1800,11 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1754 1800
 			cnOptions$bias.adj <- bias.adj <- FALSE
1755 1801
 		}
1756 1802
 		if(bias.adj){
1757
-			if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)")
1803
+			if(verbose & i == 1) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)")
1758 1804
 			if(!use.poe)
1759 1805
 				tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects)
1760 1806
 			tmp.objects <- biasAdj(object.batch, cnOptions, tmp.objects)
1761
-			if(verbose) message("Recomputing location and scale parameters")
1807
+			if(verbose & i == 1) message("Recomputing location and scale parameters")
1762 1808
 		}
1763 1809
 		##update tmp.objects
1764 1810
 		tmp.objects <- withinGenotypeMoments(object.batch,
... ...
@@ -1773,14 +1819,13 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1773 1819
 		##nuA=getParam(object.batch, "nuA", PLATE)
1774 1820
 		THR.NU.PHI <- cnOptions$THR.NU.PHI
1775 1821
 		if(THR.NU.PHI){
1776
-			verbose <- cnOptions$verbose
1777
-			if(verbose) message("Thresholding nu and phi")
1822
+			if(verbose & i == 1) message("Thresholding nu and phi")
1778 1823
 			object.batch <- thresholdModelParams(object.batch, cnOptions)
1779 1824
 		}		
1780
-		if(verbose) message("\nAllele specific copy number")	
1825
+		if(verbose & i == 1) message("\nAllele specific copy number")	
1781 1826
 		object.batch <- polymorphic(object.batch, cnOptions, tmp.objects)
1782 1827
 		if(any(!isSnp(object))){ ## there are nonpolymorphic probes
1783
-			if(verbose) message("\nCopy number for nonpolymorphic probes...")
1828
+			if(verbose & i == 1) message("\nCopy number for nonpolymorphic probes...")
1784 1829
 			if(!use.poe){
1785 1830
 				object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects)
1786 1831
 			} else {
... ...
@@ -1812,7 +1857,7 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1812 1857
 		object <- pr(object, "corrB.AA", PLATE, getParam(object.batch, "corrB.AA", PLATE))		
1813 1858
 		rm(object.batch, tmp.objects); gc();
1814 1859
 	}
1815
-	object <- object[order(chromosome(object), position(object)), ]
1860
+	##object <- object[order(chromosome(object), position(object)), ]
1816 1861
 	if(cnOptions[["thresholdCopynumber"]]){
1817 1862
 		object <- thresholdCopynumber(object)
1818 1863
 	}
... ...
@@ -1,19 +1,23 @@
1 1
 crlmm <- function(filenames, row.names=TRUE, col.names=TRUE,
2 2
                   probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
3
-                  save.it=FALSE, load.it=FALSE, intensityFile, callsFile, confsFile,
4
-		  AFile, BFile,
3
+                  save.it=FALSE, load.it=FALSE,
4
+		  snprmaFile="snp_rmaResult.rda",
5
+		  callsFile="genotypes.rda",
6
+		  confsFile="confs.rda",
7
+		  AFile="A.rda",
8
+		  BFile="B.rda",
5 9
                   mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
6 10
                   cdfName, sns, recallMin=10, recallRegMin=1000,
7
-                  returnParams=FALSE, badSNP=0.7){
8
-	if ((load.it | save.it) & missing(intensityFile))
9
-		stop("'intensityFile' is missing, and you chose either load.it or save.it")
11
+                  returnParams=FALSE, badSNP=0.7,
12
+		  crlmmBatchSize=1000){
13
+	BS <- crlmmBatchSize
14
+	if(load.it & file.exists(snprmaFile) & file.exists(callsFile)) return() ##nothing to do
15
+	if (load.it & !file.exists(snprmaFile)){
16
+		##stop("'intensityFile' is missing, and you chose either load.it or save.it")
17
+		message("'snprmaFile' does not exist and you chose to load.it.  Rerunning snprma...")
18
+		load.it <- FALSE
19
+	} 
10 20
 	if (missing(sns)) sns <- basename(filenames)
11
-	if (!missing(intensityFile))
12
-		if (load.it & !file.exists(intensityFile)){
13
-			load.it <- FALSE
14
-			message("File ", intensityFile, " does not exist.")
15
-			message("Not loading it, but running SNPRMA from scratch.")
16
-		}
17 21
 	if (!load.it){
18 22
 		res <- snprma(filenames,
19 23
 			      fitMixture=TRUE,
... ...
@@ -24,78 +28,96 @@ crlmm <- function(filenames, row.names=TRUE, col.names=TRUE,
24 28
 			      sns=sns,
25 29
 			      AFile=AFile,
26 30
 			      BFile=BFile)
27
-		save(res, file=intensityFile)##file.path(dirname(intensityFile), "res.rda"))
31
+		save(res, file=snprmaFile)##file.path(dirname(snprmaFile), "res.rda"))
28 32
 	} else {
29
-		message("Loading quantile normalized intensities...")
30
-		load(intensityFile)
33
+		message("Loading snprmaFile...")
34
+		load(snprmaFile)
31 35
 		res <- get("res")
32 36
 	}
37
+	SKW <- res[["SKW"]]
38
+	SNR <- res[["SNR"]]	
33 39
 	load(AFile)
34 40
 	load(BFile)
35 41
 	if(isPackageLoaded("ff")) {open(A); open(B)}
36 42
 	if(row.names) row.names <- res$gns
37 43
 	if(col.names) col.names <- res$sns	
38 44
 ##	}else{
39
-##		if (verbose) message("Loading ", intensityFile, ".")
40
-##		obj <- load(intensityFile)
45
+##		if (verbose) message("Loading ", snprmaFile, ".")
46
+##		obj <- load(snprmaFile)
41 47
 ##		if (verbose) message("Done.")
42 48
 ##		if (obj != "res")
43
-##			stop("Object in ", intensityFile, " seems to be invalid.")
49
+##			stop("Object in ", snprmaFile, " seems to be invalid.")
44 50
 ##	}
45 51
 	##path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
46 52
 	##load(file.path(path, "snpProbes.rda"))
47 53
 	gns <- res[["gns"]]
48 54
 	sns <- colnames(A)
55
+	nc <- ncol(A)	
49 56
 	##
50 57
 	##Ensures that if ff package is loaded, ordinary matrices are still passed to crlmmGT
51
-	aMatrix <- A[1:length(gns), ]
52
-	bMatrix <- B[1:length(gns), ]
53
-	if(isPackageLoaded("ff")) close(B)
54
-	rm(B); gc()
55
-	##	res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
56
-	res2 <- crlmmGT(aMatrix, bMatrix, res[["SNR"]],
57
-			res[["mixtureParams"]], res[["cdfName"]],
58
-			gender=gender, row.names=row.names,
59
-			col.names=col.names, recallMin=recallMin,
60
-			recallRegMin=1000, SNRMin=SNRMin,
61
-			returnParams=returnParams, badSNP=badSNP,
62
-			verbose=verbose)
63
-	rm(aMatrix, bMatrix); gc()
64
-	res2[["SNR"]] <- res[["SNR"]]
65
-	res2[["SKW"]] <- res[["SKW"]]
58
+	if(nc > BS){
59
+		## genotype the samples in batches
60
+
61
+		##number crlmm batches
62
+		N <- ceiling(nc/BS)
63
+		##samples per batch
64
+		S <- ceiling(nc/N)
65
+		colindex <- split(1:nc, rep(1:nc, each=S, length.out=nc))
66
+	} else {
67
+		colindex <- list(1:nc)
68
+	}
69
+	if(length(colindex) > 1)
70
+		message("Calling genotypes in batches of size ", length(colindex[[1]]), " to reduce required RAM")	
66 71
 	if(isPackageLoaded("ff")){
67
-		if(file.exists(callsFile)) unlink(callsFile)
68
-		calls <- ff(dim=c(nrow(A), ncol(A)),
69
-			    vmode="integer",
70
-			    finalizer="close",
71
-			    dimnames=dimnames(A),
72
-			    overwrite=TRUE)
72
+		confs <- initializeBigMatrix(nrow(A), ncol(A))
73
+		calls <- initializeBigMatrix(nrow(A), ncol(A))
73 74
 	} else{
74
-		calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
75
+		confs <- matrix(NA, nrow(A), ncol(A))
76
+		calls <- matrix(NA, nrow(A), ncol(A))
77
+	}	
78
+	sex <- vector("list", length(colindex))
79
+	for(i in seq(along=colindex)){
80
+		aMatrix <- A[1:length(gns), colindex[[i]]]
81
+		bMatrix <- B[1:length(gns), colindex[[i]]]
82
+		snr <- res[["SNR"]][colindex[[i]]]
83
+		mix <- res[["mixtureParameters"]][, colindex[[i]]]
84
+		##	res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
85
+		res2 <- tryCatch(crlmmGT(aMatrix,
86
+					 bMatrix,
87
+					 snr,
88
+					 mix,
89
+					 res[["cdfName"]],
90
+					 gender=gender,
91
+					 row.names=row.names,
92
+					 col.names=col.names[colindex[[i]]],
93
+					 recallMin=recallMin,
94
+					 recallRegMin=1000,
95
+					 SNRMin=SNRMin,
96
+					 returnParams=returnParams,
97
+					 badSNP=badSNP,
98
+					 verbose=verbose), error=function(e) NULL)
99
+		if(is.null(res2)) {
100
+			unlink(callsFile)
101
+			unlink(confsFile)
102
+			stop("error in call to crlmmGT")
103
+		}
104
+		rm(aMatrix, bMatrix, snr, mix); gc()
105
+		calls[1:length(gns), colindex[[i]]] <- res2[["calls"]]
106
+		confs[1:length(gns), colindex[[i]]] <- res2[["confs"]]
107
+		sex[[i]] <- res2[["gender"]]
108
+		rm(res2); gc()
75 109
 	}
76
-	calls[1:length(gns), ] <- res2[["calls"]]
110
+	if(isPackageLoaded("ff")) close(B)
111
+	rm(B); gc()
77 112
 	if(isPackageLoaded("ff")) close(calls)
78 113
 	save(calls, file=callsFile)
79 114
 	rm(calls); gc()
80
-	if(isPackageLoaded("ff")){
81
-		if(file.exists(confsFile)) unlink(confsFile)		
82
-		confs <- ff(dim=c(nrow(A), ncol(A)),
83
-			    vmode="integer",
84
-			    finalizer="close",
85
-			    dimnames=dimnames(A),
86
-			    overwrite=TRUE)
87
-	} else{
88
-		confs <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
89
-	}
90
-	confs[1:length(gns), ] <- res2[["confs"]]
91 115
 	if(isPackageLoaded("ff")) close(confs)
92 116
 	save(confs, file=confsFile)
93 117
 	rm(confs); gc()
94
-	sampleStats <- data.frame(SNR=res2[["SNR"]],
95
-				  SKW=res2[["SKW"]],
96
-				  gender=res2[["gender"]])
97
-	save(sampleStats, file=file.path(dirname(confsFile), "sampleStats.rda"))
98
-	if(isPackageLoaded("ff")){ close(A)}
118
+	res$gender <- unlist(sex)
119
+	save(res, file=snprmaFile)
120
+	if(isPackageLoaded("ff")) close(A)
99 121
 	rm(list=ls()); gc()
100 122
 	return()
101 123
 }
... ...
@@ -50,13 +50,13 @@ snprma <- function(filenames, mixtureSampleSize=10^5,
50 50
 	cnProbes <- get("cnProbes")
51 51
 	nr <- length(pnsa) + nrow(cnProbes)
52 52
 	nc <- length(filenames)
53
-	dns <- list(c(gns, rownames(cnProbes)), sns)  
53
+	dns <- list(c(gns, rownames(cnProbes)), sns)
54 54
 	if(isPackageLoaded("ff")){
55 55
 		message("ff package is loaded.  Creating ff objects for storing normalized and summarized intensities")
56
-		A <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns,
57
-			overwrite=TRUE)
58
-		B <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns,
59
-			overwrite=TRUE)
56
+		A <- initializeBigMatrix(nr, nc)
57
+		B <- initializeBigMatrix(nr, nc)
58
+		open(A)
59
+		open(B)
60 60
 	}  else {
61 61
 		A <- matrix(0L, nr, nc, dimnames=dns)
62 62
 		B <- matrix(0L, nr, nc, dimnames=dns)
... ...
@@ -98,7 +98,8 @@ snprma <- function(filenames, mixtureSampleSize=10^5,
98 98
 	if (!fitMixture) SNR <- mixtureParams <- NA
99 99
 	save(A, file=AFile)
100 100
 	save(B, file=BFile)
101
-	return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName))
101
+	return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName,
102
+		    cnnames=rownames(cnProbes)))
102 103
 }
103 104
 
104 105
 fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
... ...
@@ -183,4 +183,39 @@ isValidCdfName <- function(cdfName){
183 183
 	return(result)
184 184
 }
185 185
 
186
+initializeBigMatrix <- function(nr, nc, batch, vmode="integer"){
187
+	if(prod(nr, nc) > 2^31){
188
+		##Need multiple matrices
189
+		## -- use ffdf
190
+			
191
+		## How many samples per ff object
192
+		S <- floor(2^31/nr - 1)
193
+
194
+		## How many ff objects
195
+		L <- ceiling(nc/S)
196
+
197
+		results <- vector("list", L)
198
+		##resultsB <- vector("list", L)			
199
+		for(i in 1:(L-1)){  ## the Lth object may have fewer than nc columns
200
+			results[[i]] <- ff(dim=c(nr, S),
201
+					   vmode=vmode,
202
+					   finalizer="close",
203
+					   overwrite=TRUE)
204
+		}
205
+		##the Lth element
206
+		leftOver <- nc - ((L-1)*S)
207
+		results[[L]] <- ff(dim=c(nr, leftOver),
208
+				   vmode=vmode,
209
+				   finalizer="close",
210
+				   overwrite=TRUE)
211
+		resultsff <- do.call(ffdf, results)
212
+	} else {
213
+		resultsff <- ff(dim=c(nr, nc),
214
+				vmode=vmode,
215
+				finalizer="close",
216
+				overwrite=TRUE)
217
+	}
218
+	return(resultsff)
219
+}
220
+
186 221
 
187 222
deleted file mode 100644
... ...
@@ -1,24 +0,0 @@
1
-\name{AlleleSet-methods}
2
-\docType{methods}
3
-
4
-\title{Indicator for polymorphic probes}
5
-\description{
6
-  
7
-  This functions uses the annotation slot of the AlleleSet object to load
8
-  the corresponding annotation package and determine whether each probe
9
-  in the object interrogates a polymorphic or nonpolymorphic allele. For
10
-  instance, in the Affy 6.0 platform roughly 900,000 of the 1.8 million
11
-  markers are for polymorphic alleles.
12
-  
13
-}
14
-\usage{
15
-A(object)
16
-B(object)
17
-}
18
-\arguments{
19
-  \item{object}{AlleleSet object}
20
-}
21
-\value{
22
-	matrix of normalized intensities
23
-}
24
-\keyword{manip}
... ...
@@ -8,14 +8,16 @@
8 8
 }
9 9
 \usage{
10 10
     cnOptions(outdir = "./", cdfName, 
11
-              AFile="A",
12
-	      BFile="B",
13
-	      callsFile="genotypes",
11
+              AFile="A.rda",
12
+	      BFile="B.rda",
13
+	      CAFile="CA.rda",
14
+	      CBFile="CB.rda",
15
+	      callsFile="genotypes.rda",
14 16
 	      rgFile="rgFile.rda",
15 17
 	      cnFile="cnSet",
16 18
 	      cnrmaFile="cn_rmaResult.rda",
17 19
 	      snprmaFile="snp_rmaResult.rda",
18
-	      confsFile="confsFile",
20
+	      confsFile="confsFile.rda",
19 21
 	      save.it = TRUE, 
20 22
 	      save.cnset = TRUE, 
21 23
 	      load.it = TRUE, 
... ...
@@ -25,7 +27,8 @@
25 27
 	      GT.CONF.THR = 0.99, PHI.THR = 2^6, nHOM.THR = 5, 
26 28
 	      MIN.NU = 2^3, MIN.PHI = 2^3, 
27 29
 	      THR.NU.PHI = TRUE, thresholdCopynumber = TRUE, unlink = TRUE, 
28
-xo	      use.poe=FALSE, crlmmBatchSize=1100, ...)
30
+	      use.poe=FALSE, 
31
+	      crlmmBatchSize=1000, ...)
29 32
 }
30 33
 \arguments{
31 34
   \item{outdir}{
... ...
@@ -45,6 +48,21 @@ xo	      use.poe=FALSE, crlmmBatchSize=1100, ...)
45 48
 
46 49
   File name for quantile normalized intensities for the B allele is <outdir>/BFile<.rda>
47 50
 
51
+}
52
+
53
+  \item{CAFile}{
54
+
55
+  File name for A allele copy number is <outdir>/AFile<.rda>.  Copy
56
+  number at nonpolymorphic loci is stored in this file.
57
+
58
+}
59
+
60
+  \item{CBFile}{
61
+
62
+  File name for B allele copy number is <outdir>/AFile<.rda>.  The 'B
63
+  allele' copy number at nonpolymorphic loci is zero (copy number for
64
+  nonpolymorphic loci is stored in the 'A allele').
65
+
48 66
 }
49 67
 
50 68
   \item{callsFile}{
... ...
@@ -288,12 +306,12 @@ xo	      use.poe=FALSE, crlmmBatchSize=1100, ...)
288 306
 
289 307
 \item{crlmmBatchSize}{
290 308
 
291
-	'integer'.  The maximum number of samples that can be process
292
-	in a single batch.  When using the ff package, the maximum
293
-	number of elements in a matrix is 2^31.  For Affy 6.0, the
294
-	limit is therefore approximately 1100 samples (1.8e6 * 1100 < 2^31).
309
+	'integer'.  The maximum number of samples to be genotyped in a
310
+	batch.  This is used to keep the RAM at manageable levels.
311
+
295 312
 }
296 313
 
314
+
297 315
   \item{\dots}{
298 316
 
299 317
   Additional arguments are passed to \code{readIdatFiles} (Illumina
... ...
@@ -10,11 +10,11 @@
10 10
 \usage{
11 11
 crlmm(filenames, row.names=TRUE, col.names=TRUE,
12 12
       probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5,
13
-      gender=NULL, save.it=FALSE, load.it=FALSE, intensityFile, callsFile,
13
+      gender=NULL, save.it=FALSE, load.it=FALSE, snprmaFile, callsFile,
14 14
       confsFile, AFile, BFile,
15 15
       mixtureSampleSize=10^5,
16 16
       eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10,
17
-      recallRegMin=1000, returnParams=FALSE, badSNP=0.7)
17
+      recallRegMin=1000, returnParams=FALSE, badSNP=0.7, crlmmBatchSize=1000)
18 18
 }
19 19
 
20 20
 \arguments{
... ...
@@ -29,8 +29,7 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE,
29 29
     defining sex. (1 - male; 2 - female)}
30 30
   \item{save.it}{'logical'. Save preprocessed data?}
31 31
   \item{load.it}{'logical'. Load preprocessed data to speed up analysis?}
32
-  \item{intensityFile}{'character' with filename to be saved/loaded -
33
-    results from snprma function.}
32
+  \item{snprmaFile}{'character' with filename to be saved/loaded.  Statistics saved in this file include the skew (SKW), signal to noise ratio (SNR), mixture model parameters (mixtureParams), and gender.  }
34 33
   \item{callsFile}{'character' string -- filename to be saved/loaded for biallelic genotype calls.}
35 34
   \item{confsFile}{'character' string -- filename to be saved/loaded for confidence scores of the biallelic genotype calls.}
36 35
   \item{AFile}{'character' string -- filename to be saved/loaded for quantile normalized intensities of the A allele}
... ...
@@ -45,6 +44,7 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE,
45 44
   \item{recallRegMin}{Minimum number of SNP's for regression.}
46 45
   \item{returnParams}{'logical'. Return recalibrated parameters.}
47 46
   \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
47
+  \item{crlmmBatchSize}{'integer', indicating the number of samples to be genotyped in a batch by crlmm.  The purpose of this argument is to keep the RAM at manageable levels.}
48 48
 }
49 49
 \value{
50 50
 	
... ...
@@ -52,9 +52,13 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE,
52 52
 
53 53
 	- AFile
54 54
 	- BFile
55
+	- CAFile
56
+	- CBFile
55 57
 	- callsFile
56 58
 	- confsFile
57
-	- intensityFile
59
+	- snprmaFile
60
+
61
+	See the \code{cnOptions} function for the default filenames.
58 62
 	
59 63
 }
60 64
 
... ...
@@ -76,7 +80,13 @@ if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){
76 80
   ## the filenames with full path...
77 81
   ## very useful when genotyping samples not in the working directory
78 82
   cels <- list.celfiles(path, full.names=TRUE)
79
-  (crlmmOutput <- crlmm(cels, cdfName="genomewidesnp5"))
83
+  crlmm(cels, cdfName="genomewidesnp5")
84
+  ## clean
85
+  unlink("A.rda")
86
+  unlink("B.rda")
87
+  unlink("snp_rmaResult.rda")
88
+  unlink("calls.rda")
89
+  unlink("confs.rda")
80 90
 }
81 91
 }
82 92
 \seealso{\code{\link{cnOptions}}}