Browse code

testing crlmmIlluminaRS in cnrma-functions

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45221 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 15/03/2010 01:43:23
Showing 6 changed files

... ...
@@ -416,19 +416,29 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
416 416
 
417 417
 2010-03-07 R. Scharpf committed version 1.5.30
418 418
 
419
-- one can use ff in conjunction with affy platforms 5.0 and 6.0
420
-
421
-- more s4-style code
422
-
423
-- preprocessing / genotyping is basically the same set of commands with either illumina/affy platforms (though illumina-users may have to play with some of the options for reading idat files
424
-
425
-- if ff package is loaded, the assayData elements are ff objects
426
-
427
-- the classes all inherit from 'CrlmmContainer' that contains an additional slot 'options' and 'genomeAnnotation'.   options is a list with the default arguments to snprma, crlmm, etc, as well as a few global settings such as 'verbose' and 'seed'.  I added the genomeAnnotation slot simply because I want to be able to use ff-objects for the feature-level data.   Maybe with setClassUnion we could avoid adding the genomeAnnotation slot (and use featureData instead), but I didn't have much success with this.
428
-
429
-- the batchSize argument will run the genotyping (crlmmGT) in batches to reduce the RAM.  The default is batches of size 1000.
430
-
431
-- The crlmm.Rd file contains an example with / without ff for Affymetrix data.
419
+       - one can use ff in conjunction with affy platforms 5.0 and 6.0
420
+       - preprocessing / genotyping is basically the same set of
421
+        commands with either illumina/affy platforms (though
422
+        illumina-users may have to play with some of the options for
423
+        reading idat files
424
+       - if ff package is loaded, the assayData elements are ff objects
425
+
426
+       - the classes all inherit from 'CrlmmContainer' that contains
427
+         an additional slot 'options' and 'genomeAnnotation'.  options
428
+         is a list with the default arguments to snprma, crlmm, etc,
429
+         as well as a few global settings such as 'verbose' and
430
+         'seed'.  I added the genomeAnnotation slot simply because I
431
+         want to be able to use ff-objects for the feature-level data.
432
+         Maybe with setClassUnion we could avoid adding the
433
+         genomeAnnotation slot (and use featureData instead), but I
434
+         didn't have much success with this.
435
+
436
+	 - the batchSize argument will run the genotyping (crlmmGT) in
437
+           batches to reduce the RAM.  The default is batches of size
438
+           1000.
439
+
440
+	 - The crlmm.Rd file contains an example with / without ff
441
+             for Affymetrix data.
432 442
 
433 443
 2010-03-08 M. Ritchie committed version 1.5.31
434 444
  * removed a few unnecessary lines of code from crlmm-illumina.R (zero not needed as argument for preprocessInfinium2() and storageMode should not be "lockedEnvironment" in RGtoXY()) 
... ...
@@ -436,7 +446,7 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
436 446
 
437 447
 2010-03-08 R.Scharpf committed version 1.5.32
438 448
 
439
-**	   This is a roll back to version 1.5.24 
449
+**	   Rolled back to version 1.5.24 
440 450
 	   
441 451
 2010-03-08 R.Scharpf committed version 1.5.33
442 452
 
... ...
@@ -447,3 +457,13 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
447 457
 **	  updated DESCRIPTION to import ff
448 458
 **        added AllClasses.R with defitions for ff-derived classes
449 459
 **        temporarily exporting everything in the NAMESPACE
460
+
461
+2010-03-11 R.Scharpf committed version 1.5.35
462
+
463
+**        added genotype() in cnrma-functions -- for preprocessing and
464
+          genotyping affy
465
+
466
+2010-03-14 R.Scharpf committed version 1.5.36
467
+
468
+**   added crlmmIlluminaRS to cnrma-functions.R (testing)
469
+
... ...
@@ -34,7 +34,7 @@ Collate: AllGenerics.R
34 34
          cnrma-functions.R
35 35
          crlmm-functions.R
36 36
          crlmm-illumina.R
37
-         snprma-functions.R
37
+	 snprma-functions.R
38 38
          utils.R
39 39
          zzz.R
40 40
 LazyLoad: yes
... ...
@@ -85,6 +85,7 @@ setMethod("open", "AlleleSet", function(con, ...){
85 85
 	for(i in 1:L) open(eval(substitute(assayData(object)[[NAME]], list(NAME=names[i]))))
86 86
 	return()
87 87
 })
88
+
88 89
 setReplaceMethod("calls", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "call", value))
89 90
 setReplaceMethod("confs", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "callProbability", value))
90 91
 setMethod("confs", "SnpSuperSet", function(object) assayDataElement(object, "callProbability"))
... ...
@@ -175,6 +176,176 @@ genotype <- function(filenames, cdfName, mixtureSampleSize=10^5,
175 176
 
176 177
 ##---------------------------------------------------------------------------
177 178
 ##---------------------------------------------------------------------------
179
+## For Illumina
180
+##---------------------------------------------------------------------------
181
+##---------------------------------------------------------------------------
182
+constructRG <- function(filenames, cdfName, sns, verbose, fileExt, sep){
183
+	if(verbose)	message("reading first idat file to extract feature data")
184
+	grnfile <- paste(filenames[1], fileExt$green, sep=sep)
185
+	if(!file.exists(grnfile)){
186
+                stop(paste(grnfile, " does not exist. Check fileExt argument"))
187
+        }
188
+        G <- readIDAT(grnfile)
189
+        idsG = rownames(G$Quants)
190
+        nr <- length(idsG)
191
+	fD <- new("AnnotatedDataFrame", data=data.frame(row.names=idsG))##, varMetadata=data.frame(labelDescript
192
+	nr <- nrow(fD)
193
+	dns <- list(featureNames(fD), basename(filenames))
194
+	RG <- new("NChannelSet",
195
+		  R=initializeBigMatrix(name="R", nr=nr, nc=length(filenames)),
196
+		  G=initializeBigMatrix(name="G", nr=nr, nc=length(filenames)),
197
+		  zero=initializeBigMatrix(name="zero", nr=nr, nc=length(filenames)),
198
+		  featureData=fD,
199
+		  annotation=cdfName)
200
+	pD <- data.frame(matrix(NA, length(sampleNames(RG)), 12), row.names=sampleNames(RG))
201
+	colnames(pD) <- c("Index","HapMap.Name","Name","ID",
202
+			  "Gender", "Plate", "Well", "Group", "Parent1",
203
+			  "Parent2","Replicate","SentrixPosition")
204
+	phenoData(RG) <- new("AnnotatedDataFrame", data=pD)
205
+	pD <- data.frame(matrix(NA, length(sampleNames(RG)), 1), row.names=sampleNames(RG))
206
+	colnames(pD) <- "ScanDate"
207
+	protocolData(RG) <- new("AnnotatedDataFrame", data=pD)
208
+	sampleNames(RG) <- basename(filenames)
209
+	storageMode(RG) <- "environment"
210
+	RG##featureData=ops$illuminaOpts[["featureData"]])
211
+}
212
+crlmmIlluminaRS <- function(sampleSheet=NULL,
213
+			    arrayNames=NULL,
214
+			    ids=NULL,
215
+			    path=".",
216
+			    arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
217
+			    highDensity=FALSE,
218
+			    sep="_",
219
+			    fileExt=list(green="Grn.idat", red="Red.idat"),
220
+			    stripNorm=TRUE,
221
+			    useTarget=TRUE,
222
+			    row.names=TRUE, 
223
+			    col.names=TRUE,
224
+			    probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
225
+			    seed=1, save.ab=FALSE, snpFile, cnFile,
226
+			    mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
227
+			    cdfName, sns, recallMin=10, recallRegMin=1000,
228
+			    returnParams=FALSE, badSNP=.7) {
229
+	if(missing(cdfName)) stop("must specify cdfName")
230
+	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
231
+	if(missing(sns)) sns <- basename(arrayNames)
232
+	RG <- constructRG(filenames=arrayNames,
233
+			  cdfName=cdfName,
234
+			  sns=sns,
235
+			  verbose=verbose,
236
+			  fileExt=fileExt,
237
+			  sep=sep)	
238
+	batches <- splitIndicesByLength(seq(along=arrayNames), ocSamples())
239
+	for(j in batches){
240
+		tmp <- readIdatFiles(sampleSheet=sampleSheet[j, ],
241
+				     arrayNames=arrayNames[j],
242
+				     ids=ids,
243
+				     path=path,
244
+				     arrayInfoColNames=arrayInfoColNames,
245
+				     highDensity=highDensity,
246
+				     sep=sep,
247
+				     fileExt=fileExt,
248
+				     saveDate=TRUE)
249
+		assayData(RG)$R[, j] <- assayData(tmp)$R
250
+		assayData(RG)$G[, j] <- assayData(tmp)$G
251
+		assayData(RG)$zero[, j] <- assayData(tmp)$zero
252
+		pData(RG)[j, ] <- pData(tmp)
253
+		annotation(RG) <- annotation(tmp)
254
+		pData(protocolData(RG))[j, ] <- pData(protocolData(tmp))
255
+		rm(tmp); gc()
256
+	}
257
+	k <- 1
258
+	for(j in batches){
259
+		##MR: Might want to to nonpolymorphic markers in a
260
+		##separate step and make that optional
261
+		tmp <- RGtoXY(RG[, j], chipType=cdfName)
262
+		if(k == 1){
263
+			##initialize XYSet
264
+			nc <- ncol(tmp)
265
+			nr <- nrow(tmp)
266
+			XY <- new("NChannelSet",
267
+				  X=initializeBigMatrix(name="X", nr, nc),
268
+				  Y=initializeBigMatrix(name="Y", nr, nc),
269
+				  zero=initializeBigMatrix(name="zero", nr, nc),
270
+				  experimentData=experimentData(RG),
271
+				  phenoData=phenoData(RG),
272
+				  protocolData=protocolData(RG),
273
+				  annotation=annotation(RG))
274
+		}
275
+		storageMode(XY) <- "environment"
276
+		assayData(XY)$X[, j] <- assayData(tmp)$X
277
+		assayData(XY)$Y[, j] <- assayData(tmp)$Y
278
+		assayData(XY)$zero[, j] <- assayData(tmp)$zero
279
+		k <- k+1
280
+		rm(tmp); gc()
281
+	}
282
+	annotation(XY) <- cdfName
283
+	mixtureParams <- matrix(NA, 4, length(filenames))
284
+	k <- 1
285
+	for(j in batches){
286
+		res <- preprocessInfinium2(XY[, j],
287
+					   mixtureSampleSize=mixtureSampleSize,
288
+					   fitMixture=TRUE,
289
+					   verbose=verbose,
290
+					   seed=seed,
291
+					   eps=eps,
292
+					   cdfName=cdfName,
293
+					   sns=sns[j],
294
+					   stripNorm=stripNorm,
295
+					   useTarget=useTarget)
296
+		if(k==1){
297
+			## MR: number of rows should be number of SNPs + number of nonpolymorphic markers.
298
+			##  Here, I'm just using the # of rows returned from the above function
299
+			callSet <- new("SnpSuperSet",
300
+				       alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=ncol(XY)),
301
+				       alleleB=initializeBigMatrix(name="B", nr=nrow(res[[2]]), nc=ncol(XY)),
302
+				       call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=ncol(XY)),
303
+				       callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=ncol(XY)),
304
+				       protocolData=protocolData(XY),
305
+				       experimentData=experimentData(XY),
306
+				       phenoData=phenoData(XY),
307
+				       annotation=annotation(XY))
308
+			featureNames(callSet) <- res[["gns"]]
309
+			sampleNames(callSet) <- sns
310
+		}
311
+		## MR: we need to define a snp.index
312
+		A(callSet)[, j] <- res[["A"]]
313
+		B(callSet)[, j] <- res[["B"]]
314
+		pData(callSet)$SKW[j] <- res$SKW
315
+		pData(callSet)$SNR[j] <- res$SNR
316
+		mixtureParams[, j] <- res$mixtureParams
317
+		rm(res); gc()
318
+	}
319
+	for(j in batches){
320
+		##MR:  edit snp.index
321
+		snp.index <- 1:nrow(callSet)
322
+		tmp <- crlmmGT(A=A(callSet)[snp.index, j],
323
+			       B=B(callSet)[snp.index, j],
324
+			       SNR=callSet$SNR[j],
325
+			       mixtureParams=mixtureParams[, j],
326
+			       cdfName=annotation(callSet),
327
+			       row.names=featureNames(callSet)[snp.index],
328
+			       col.names=sampleNames(callSet)[j],
329
+			       probs=probs,
330
+			       DF=DF,
331
+			       SNRMin=SNRMin,
332
+			       recallMin=recallMin,
333
+			       recallRegMin=recallRegMin,
334
+			       gender=gender,
335
+			       verbose=verbose,
336
+			       returnParams=returnParams,
337
+			       badSNP=badSNP)
338
+		calls(callSet)[snp.index, j] <- tmp[["calls"]]
339
+		## many zeros here (?)
340
+		confs(callSet)[snp.index, j] <- tmp[["confs"]]
341
+		callSet$gender[j] <- tmp$gender
342
+		rm(tmp); gc()
343
+	}
344
+	return(callSet)
345
+}
346
+##---------------------------------------------------------------------------
347
+##---------------------------------------------------------------------------
348
+
178 349
 rowCovs <- function(x, y, ...){
179 350
 	notna <- !is.na(x)
180 351
 	N <- rowSums(notna)
... ...
@@ -330,46 +501,46 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
330 501
 	return(gender)
331 502
 }
332 503
 
333
-combineIntensities <- function(res, NP=NULL, callSet){
334
-	rownames(res$B) <- rownames(res$A) <- res$gns
335
-	colnames(res$B) <- colnames(res$A) <- res$sns
336
-	if(!is.null(NP)){
337
-		blank <- matrix(NA, nrow(NP), ncol(NP))
338
-		dimnames(blank) <- dimnames(NP)
339
-		A <- rbind(res$A, NP)
340
-		B <- rbind(res$B, blank)
341
-	} else {
342
-		A <- res$A
343
-		B <- res$B
344
-	}
345
-	dimnames(B) <- dimnames(A)
346
-	index.snps <- match(res$gns, rownames(A))
347
-	callsConfs <- calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
348
-	
349
-	calls[index.snps, ] <- calls(callSet)
350
-	callsConfs[index.snps, ] <- assayData(callSet)[["callProbability"]]
351
-	fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet))))
352
-	fd[index.snps, ] <- fData(callSet)
353
-	rownames(fd) <- rownames(A)
354
-	colnames(fd) <- fvarLabels(callSet)
355
-	fD <- new("AnnotatedDataFrame",
356
-		  data=data.frame(fd),
357
-		  varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd)))
358
-	superSet <- new("CNSet",
359
-			CA=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
360
-			CB=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
361
-			alleleA=A,
362
-			alleleB=B,
363
-			call=calls,
364
-			callProbability=callsConfs,
365
-			featureData=fD,
366
-			phenoData=phenoData(callSet),
367
-			experimentData=experimentData(callSet),
368
-			protocolData=protocolData(callSet),
369
-			annotation=annotation(callSet))
370
-	return(superSet)
371
-}
372
-
504
+##combineIntensities <- function(res, NP=NULL, callSet){
505
+##	rownames(res$B) <- rownames(res$A) <- res$gns
506
+##	colnames(res$B) <- colnames(res$A) <- res$sns
507
+##	if(!is.null(NP)){
508
+##		blank <- matrix(NA, nrow(NP), ncol(NP))
509
+##		dimnames(blank) <- dimnames(NP)
510
+##		A <- rbind(res$A, NP)
511
+##		B <- rbind(res$B, blank)
512
+##	} else {
513
+##		A <- res$A
514
+##		B <- res$B
515
+##	}
516
+##	dimnames(B) <- dimnames(A)
517
+##	index.snps <- match(res$gns, rownames(A))
518
+##	callsConfs <- calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
519
+##	
520
+##	calls[index.snps, ] <- calls(callSet)
521
+##	callsConfs[index.snps, ] <- assayData(callSet)[["callProbability"]]
522
+##	fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet))))
523
+##	fd[index.snps, ] <- fData(callSet)
524
+##	rownames(fd) <- rownames(A)
525
+##	colnames(fd) <- fvarLabels(callSet)
526
+##	fD <- new("AnnotatedDataFrame",
527
+##		  data=data.frame(fd),
528
+##		  varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd)))
529
+##	superSet <- new("CNSet",
530
+##			CA=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
531
+##			CB=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
532
+##			alleleA=A,
533
+##			alleleB=B,
534
+##			call=calls,
535
+##			callProbability=callsConfs,
536
+##			featureData=fD,
537
+##			phenoData=phenoData(callSet),
538
+##			experimentData=experimentData(callSet),
539
+##			protocolData=protocolData(callSet),
540
+##			annotation=annotation(callSet))
541
+##	return(superSet)
542
+##}
543
+##
373 544
 harmonizeDimnamesTo <- function(object1, object2){
374 545
 	#object2 should be a subset of object 1
375 546
 	object2 <- object2[featureNames(object2) %in% featureNames(object1), ]
... ...
@@ -382,10 +553,8 @@ harmonizeDimnamesTo <- function(object1, object2){
382 553
 
383 554
 
384 555
 
385
-crlmmCopynumber <- function(object, cnOptions, ...){
386
-	chromosome <- cnOptions[["chromosome"]]
556
+crlmmCopynumber <- function(object, cnOptions, chromosome=1:23, MIN.SAMPLES=10, SNRMin=5, ...){
387 557
 	which.batch <- cnOptions[["whichbatch"]]
388
-	SNRMin <- cnOptions[["SNRMin"]]
389 558
 	cnSet <- new("CNSetLM",
390 559
 		     alleleA=A(object),
391 560
 		     alleleB=B(object),
... ...
@@ -397,6 +566,7 @@ crlmmCopynumber <- function(object, cnOptions, ...){
397 566
 		     featureData=featureData(object),
398 567
 		     experimentData=experimentData(object),
399 568
 		     phenoData=phenoData(object))
569
+	lM(cnSet) <- initializeParamObject(list(featureNames(cnSet), unique(cnSet$batch)))
400 570
 	rm(object); gc()
401 571
 	if(any(cnSet$SNR < SNRMin)){
402 572
 		message("Excluding ", sum(cnSet$SNR < SNRMin), " samples with SNR below ", SNRMin)
... ...
@@ -407,13 +577,23 @@ crlmmCopynumber <- function(object, cnOptions, ...){
407 577
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
408 578
 	for(i in chromosome){
409 579
 		cat("Chromosome ", i, "\n")
580
+		if(i >= 24) next()
410 581
 		for(j in batches){
411
-			if(i >= 24) next()
412 582
 			row.index <- which(chromosome(cnSet) == i)
413
-			tmp <- computeCopynumber(cnSet[row.index, j], cnOptions)
414
-			##message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
583
+			tmp <- cnSet[row.index, j]
584
+			featureData(tmp) <- lm.parameters(tmp)
585
+			tmp <- computeCopynumber(tmp, cnOptions)
586
+			fData(tmp) <- fData(tmp)[, -(1:3)]
587
+			CA(cnSet)[row.index, j] <- tmp@assayData[["CA"]]
588
+			CB(cnSet)[row.index, j] <- tmp@assayData[["CB"]]
589
+			labels.asis <- fvarLabels(tmp)
590
+			labels.asis <- gsub("_", ".", labels.asis)
591
+			k <- match(labels.asis, colnames(lM(cnSet)))
592
+			lM(cnSet)[row.index, k] <- fData(tmp)
593
+			rm(tmp); gc()
415 594
 		}
416 595
 	}
596
+	return(cnSet)
417 597
 }
418 598
 
419 599
 
... ...
@@ -679,30 +859,11 @@ thresholdCopynumber <- function(object){
679 859
 	return(object)
680 860
 }
681 861
 
682
-##preprocessOptions <- function(crlmmFile="snpsetObject.rda",
683
-##			      intensityFile="normalizedIntensities.rda",
684
-##			      rgFile="rgFile.rda"){
685
-##
686
-##}
687
-
688 862
 cnOptions <- function(
689
-##		      outdir="./",
690
-##		      cdfName,
691
-##		      crlmmFile,
692
-##		      intensityFile,
693
-##		      rgFile="rgFile.rda",
694
-##		      save.it=TRUE,
695
-##		      save.cnset=TRUE,
696
-##		      load.it=TRUE,
697
-##		      splitByChr=TRUE,
698 863
 		      MIN.OBS=3,
699
-		      MIN.SAMPLES=10,
700
-		      batch=NULL,
701 864
 		      DF.PRIOR=50,
702 865
 		      bias.adj=FALSE,
703 866
 		      prior.prob=rep(1/4, 4),
704
-		      SNRmin=4,
705
-		      chromosome=1:24,
706 867
 		      seed=123,
707 868
 		      verbose=TRUE,
708 869
 		      GT.CONF.THR=0.99,
... ...
@@ -711,49 +872,13 @@ cnOptions <- function(
711 872
 		      MIN.NU=2^3,
712 873
 		      MIN.PHI=2^3,
713 874
 		      THR.NU.PHI=TRUE,
714
-		      thresholdCopynumber=TRUE,
715
-		      unlink=TRUE,
716
-		      ##hiddenMarkovModel=FALSE,
717
-		      ##circularBinarySegmentation=FALSE,
718
-##		      cbsOpts=NULL,
719
-		      ##hmmOpts=NULL,
720
-		      ...){
721
-##	if(missing(cdfName)) stop("must specify cdfName")
722
-##	if(!file.exists(outdir)){
723
-##		message(outdir, " does not exist.  Trying to create it.")
724
-##		dir.create(outdir, recursive=TRUE)
725
-##	}
726
-##	stopifnot(isValidCdfName(cdfName))
727
-####	if(hiddenMarkovModel){
728
-####		hmmOpts <- hmmOptions(...)
729
-####	}
730
-##	if(missing(crlmmFile)){
731
-##		crlmmFile <- file.path(outdir, "snpsetObject.rda")
732
-##	}
733
-##	if(missing(intensityFile)){
734
-##		intensityFile <- file.path(outdir, "normalizedIntensities.rda")
735
-##	}
736
-##	if(is.null(batch))
737
-##		stop("must specify batch -- should be the same length as the number of files")
875
+		      thresholdCopynumber=TRUE){
738 876
 	list(
739
-##	     outdir=outdir,
740
-##	     cdfName=cdfName,
741
-##	     crlmmFile=crlmmFile,
742
-##	     intensityFile=intensityFile,
743
-##	     rgFile=file.path(outdir, rgFile),
744
-##	     save.it=save.it,
745
-##	     save.cnset=save.cnset,
746
-##	     load.it=load.it,
747
-##	     splitByChr=splitByChr,
748 877
 	     MIN.OBS=MIN.OBS,
749
-	     MIN.SAMPLES=MIN.SAMPLES,
750
-	     batch=batch,
751 878
 	     DF.PRIOR=DF.PRIOR,
752 879
 	     GT.CONF.THR=GT.CONF.THR,
753
-	     prior.prob=prior.prob,
754 880
 	     bias.adj=bias.adj,
755
-	     SNRmin=SNRmin,
756
-	     chromosome=chromosome,
881
+	     prior.prob=prior.prob,
757 882
 	     seed=seed,
758 883
 	     verbose=verbose,
759 884
 	     PHI.THR=PHI.THR,
... ...
@@ -761,12 +886,7 @@ cnOptions <- function(
761 886
 	     MIN.NU=MIN.NU,
762 887
 	     MIN.PHI=MIN.PHI,
763 888
 	     THR.NU.PHI=THR.NU.PHI,
764
-	     thresholdCopynumber=thresholdCopynumber,
765
-	     unlink=unlink)
766
-##	     hiddenMarkovModel=hiddenMarkovModel,
767
-##	     circularBinarySegmentation=circularBinarySegmentation,
768
-##	     cbsOpts=cbsOpts,
769
-##	     hmmOpts=hmmOpts) ##remove SnpSuperSet object
889
+	     thresholdCopynumber=thresholdCopynumber)
770 890
 }
771 891
 
772 892
 ##linear model parameters
... ...
@@ -989,7 +1109,9 @@ withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
989 1109
 	A <- A(object)
990 1110
 	B <- B(object)
991 1111
 ##	highConf <- (1-exp(-confs(object)/1000)) > GT.CONF.THR
992
-	highConf <- confs(object) > GT.CONF.THR
1112
+	xx <- assayData(object)$callProbability
1113
+	highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1114
+	##highConf <- confs(object) > GT.CONF.THR
993 1115
 	##highConf <- highConf > GT.CONF.THR
994 1116
 	if(CHR == 23){
995 1117
 		gender <- object$gender
... ...
@@ -1169,9 +1291,9 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1169 1291
 
1170 1292
 ##Estimate tau2, sigma2, and correlation (updates the object)
1171 1293
 locationAndScale <- function(object, cnOptions, tmp.objects){
1294
+	DF.PRIOR <- cnOptions$DF.PRIOR
1172 1295
 	Ns <- tmp.objects[["Ns"]]
1173 1296
 	index <- tmp.objects[["index"]]
1174
-	
1175 1297
 	index.AA <- index[[1]]
1176 1298
 	index.AB <- index[[2]]
1177 1299
 	index.BB <- index[[3]]
... ...
@@ -1186,11 +1308,8 @@ locationAndScale <- function(object, cnOptions, tmp.objects){
1186 1308
 	AA.B <- GT.B[[1]]
1187 1309
 	AB.B <- GT.B[[2]]
1188 1310
 	BB.B <- GT.B[[3]]
1189
-	
1190 1311
 	x <- BB.A[index.BB, ]
1191 1312
 	batch <- unique(object$batch)
1192
-	DF.PRIOR <- cnOptions$DF.PRIOR
1193
-
1194 1313
 	tau2A <- getParam(object, "tau2A", batch)
1195 1314
 	tau2A[index.BB] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2	
1196 1315
 	DF <- Ns[, "BB"]-1
... ...
@@ -1602,101 +1721,88 @@ thresholdModelParams <- function(object, cnOptions){
1602 1721
 	return(object)
1603 1722
 }
1604 1723
 
1605
-computeCopynumber.SnpSuperSet <- function(object, cnOptions){
1606
-##	use.ff <- cnOptions[["use.ff"]]
1607
-##	if(!use.ff){
1608
-##		object <- as(object, "CrlmmSet")
1609
-##	} else	object <- as(object, "CrlmmSetFF")
1610
-	bias.adj <- cnOptions[["bias.adj"]]
1611
-	##must be FALSE to initialize parameters
1612
-	cnOptions[["bias.adj"]] <- FALSE
1613
-	## Add linear model parameters to the CrlmmSet object
1614
-	featureData(object) <- lm.parameters(object, cnOptions)
1615
-	if(!isValidCdfName(annotation(object))) stop(annotation(object), " not supported.")
1616
-	object <- as(object, "CNSet")
1617
-	object <- computeCopynumber.CNSet(object, cnOptions)
1618
-	if(bias.adj==TRUE){## run a second time
1619
-		object <- computeCopynumber.CNSet(object, cnOptions)
1620
-	}
1621
-	return(object)
1622
-}
1724
+##computeCopynumber.SnpSuperSet <- function(object, cnOptions){
1725
+####	use.ff <- cnOptions[["use.ff"]]
1726
+####	if(!use.ff){
1727
+####		object <- as(object, "CrlmmSet")
1728
+####	} else	object <- as(object, "CrlmmSetFF")
1729
+##	bias.adj <- cnOptions[["bias.adj"]]
1730
+##	##must be FALSE to initialize parameters
1731
+##	cnOptions[["bias.adj"]] <- FALSE
1732
+##	## Add linear model parameters to the CrlmmSet object
1733
+##	featureData(object) <- lm.parameters(object, cnOptions)
1734
+##	if(!isValidCdfName(annotation(object))) stop(annotation(object), " not supported.")
1735
+##	object <- as(object, "CNSet")
1736
+##	object <- computeCopynumber.CNSet(object, cnOptions)
1737
+##	if(bias.adj==TRUE){## run a second time
1738
+##		object <- computeCopynumber.CNSet(object, cnOptions)
1739
+##	}
1740
+##	return(object)
1741
+##}
1623 1742
 
1624 1743
 
1625 1744
 computeCopynumber.CNSet <- function(object, cnOptions){
1626
-	CHR <- unique(chromosome(object))
1627
-	batch <- object$batch
1628
-	if(length(batch) != ncol(object)) stop("Batch must be the same length as the number of samples")
1629
-	MIN.SAMPLES <- cnOptions$MIN.SAMPLES
1745
+	PLATE <- unique(object$batch)
1630 1746
 	verbose <- cnOptions$verbose
1631
-	for(i in seq(along=unique(batch))){
1632
-		PLATE <- unique(batch)[i]
1633
-		if(sum(batch == PLATE) < MIN.SAMPLES) {
1634
-			message("Skipping plate ", PLATE)
1635
-			next()
1636
-		}		
1637
-		object.batch <- object[, batch==PLATE]
1638
-		tmp.objects <- instantiateObjects(object.batch,
1639
-						  cnOptions)
1640
-		bias.adj <- cnOptions$bias.adj
1641
-		if(bias.adj & ncol(object) <= 15){
1642
-			warning(paste("bias.adj is TRUE, but too few samples to perform this step"))
1643
-			cnOptions$bias.adj <- bias.adj <- FALSE
1644
-		}
1645
-		if(bias.adj){
1646
-			if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)")
1647
-			tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects)
1648
-			tmp.objects <- biasAdj(object.batch, cnOptions, tmp.objects)
1649
-			if(verbose) message("Recomputing location and scale parameters")
1650
-		}
1651
-		##update tmp.objects
1652
-		tmp.objects <- withinGenotypeMoments(object.batch,
1653
-						     cnOptions=cnOptions,
1654
-						     tmp.objects=tmp.objects)
1655
-		object.batch <- locationAndScale(object.batch, cnOptions, tmp.objects)
1656
-		tmp.objects <- oneBatch(object.batch,
1657
-					cnOptions=cnOptions,
1658
-					tmp.objects=tmp.objects)
1659
-		##coefs calls nuphiAllele.
1660
-		object.batch <- coefs(object.batch, cnOptions, tmp.objects)
1661
-		##nuA=getParam(object.batch, "nuA", PLATE)
1662
-		THR.NU.PHI <- cnOptions$THR.NU.PHI
1663
-		if(THR.NU.PHI){
1664
-			verbose <- cnOptions$verbose
1665
-			if(verbose) message("Thresholding nu and phi")
1666
-			object.batch <- thresholdModelParams(object.batch, cnOptions)
1667
-		}		
1668
-		if(verbose) message("\nAllele specific copy number")	
1669
-		object.batch <- polymorphic(object.batch, cnOptions, tmp.objects)
1670
-		if(any(!isSnp(object))){ ## there are nonpolymorphic probes
1671
-			if(verbose) message("\nCopy number for nonpolymorphic probes...")	
1672
-			object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects)
1673
-		}
1674
-		##---------------------------------------------------------------------------
1675
-		##Note: the replacement method multiples by 100
1676
-		CA(object)[, batch==PLATE] <- CA(object.batch)
1677
-		CB(object)[, batch==PLATE] <- CB(object.batch)
1678
-		##---------------------------------------------------------------------------
1679
-		##update-the plate-specific parameters for copy number
1680
-		object <- pr(object, "nuA", PLATE, getParam(object.batch, "nuA", PLATE))
1681
-		object <- pr(object, "nuA.se", PLATE, getParam(object.batch, "nuA.se", PLATE))
1682
-		object <- pr(object, "nuB", PLATE, getParam(object.batch, "nuB", PLATE))
1683
-		object <- pr(object, "nuB.se", PLATE, getParam(object.batch, "nuB.se", PLATE))
1684
-		object <- pr(object, "phiA", PLATE, getParam(object.batch, "phiA", PLATE))
1685
-		object <- pr(object, "phiA.se", PLATE, getParam(object.batch, "phiA.se", PLATE))
1686
-		object <- pr(object, "phiB", PLATE, getParam(object.batch, "phiB", PLATE))
1687
-		object <- pr(object, "phiB.se", PLATE, getParam(object.batch, "phiB.se", PLATE))
1688
-		object <- pr(object, "tau2A", PLATE, getParam(object.batch, "tau2A", PLATE))
1689
-		object <- pr(object, "tau2B", PLATE, getParam(object.batch, "tau2B", PLATE))				
1690
-		object <- pr(object, "sig2A", PLATE, getParam(object.batch, "sig2A", PLATE))
1691
-		object <- pr(object, "sig2B", PLATE, getParam(object.batch, "sig2B", PLATE))		
1692
-		object <- pr(object, "phiAX", PLATE, as.numeric(getParam(object.batch, "phiAX", PLATE)))
1693
-		object <- pr(object, "phiBX", PLATE, as.numeric(getParam(object.batch, "phiBX", PLATE)))
1694
-		object <- pr(object, "corr", PLATE, getParam(object.batch, "corr", PLATE))
1695
-		object <- pr(object, "corrA.BB", PLATE, getParam(object.batch, "corrA.BB", PLATE))
1696
-		object <- pr(object, "corrB.AA", PLATE, getParam(object.batch, "corrB.AA", PLATE))		
1697
-		rm(object.batch, tmp.objects); gc();
1747
+	tmp.objects <- instantiateObjects(object, cnOptions)
1748
+	bias.adj <- cnOptions$bias.adj
1749
+	if(bias.adj & ncol(object) <= 15){
1750
+		warning(paste("bias.adj is TRUE, but too few samples to perform this step"))
1751
+		cnOptions$bias.adj <- bias.adj <- FALSE
1752
+	}
1753
+	if(bias.adj){
1754
+		if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)")
1755
+		tmp.objects <- biasAdjNP(object, cnOptions, tmp.objects)
1756
+		tmp.objects <- biasAdj(object, cnOptions, tmp.objects)
1757
+		if(verbose) message("Recomputing location and scale parameters")
1758
+	}
1759
+	##update tmp.objects
1760
+	tmp.objects <- withinGenotypeMoments(object,
1761
+					     cnOptions=cnOptions,
1762
+					     tmp.objects=tmp.objects)
1763
+	object <- locationAndScale(object, cnOptions, tmp.objects)
1764
+	tmp.objects <- oneBatch(object,
1765
+				cnOptions=cnOptions,
1766
+				tmp.objects=tmp.objects)
1767
+	##coefs calls nuphiAllele.
1768
+	object <- coefs(object, cnOptions, tmp.objects)
1769
+	##nuA=getParam(object, "nuA", PLATE)
1770
+	THR.NU.PHI <- cnOptions$THR.NU.PHI
1771
+	if(THR.NU.PHI){
1772
+		verbose <- cnOptions$verbose
1773
+		if(verbose) message("Thresholding nu and phi")
1774
+		object <- thresholdModelParams(object, cnOptions)
1775
+	}		
1776
+	if(verbose) message("\nAllele specific copy number")	
1777
+	object <- polymorphic(object, cnOptions, tmp.objects)
1778
+	if(any(!isSnp(object))){ ## there are nonpolymorphic probes
1779
+		if(verbose) message("\nCopy number for nonpolymorphic probes...")	
1780
+		object <- nonpolymorphic(object, cnOptions, tmp.objects)
1698 1781
 	}
1699
-	object <- object[order(chromosome(object), position(object)), ]
1782
+	##---------------------------------------------------------------------------
1783
+	##Note: the replacement method multiples by 100
1784
+##	CA(object)[, batch==PLATE] <- CA(object)
1785
+##	CB(object)[, batch==PLATE] <- CB(object)
1786
+	##---------------------------------------------------------------------------
1787
+	##update-the plate-specific parameters for copy number
1788
+	object <- pr(object, "nuA", PLATE, getParam(object, "nuA", PLATE))
1789
+	object <- pr(object, "nuA.se", PLATE, getParam(object, "nuA.se", PLATE))
1790
+	object <- pr(object, "nuB", PLATE, getParam(object, "nuB", PLATE))
1791
+	object <- pr(object, "nuB.se", PLATE, getParam(object, "nuB.se", PLATE))
1792
+	object <- pr(object, "phiA", PLATE, getParam(object, "phiA", PLATE))
1793
+	object <- pr(object, "phiA.se", PLATE, getParam(object, "phiA.se", PLATE))
1794
+	object <- pr(object, "phiB", PLATE, getParam(object, "phiB", PLATE))
1795
+	object <- pr(object, "phiB.se", PLATE, getParam(object, "phiB.se", PLATE))
1796
+	object <- pr(object, "tau2A", PLATE, getParam(object, "tau2A", PLATE))
1797
+	object <- pr(object, "tau2B", PLATE, getParam(object, "tau2B", PLATE))				
1798
+	object <- pr(object, "sig2A", PLATE, getParam(object, "sig2A", PLATE))
1799
+	object <- pr(object, "sig2B", PLATE, getParam(object, "sig2B", PLATE))		
1800
+	object <- pr(object, "phiAX", PLATE, as.numeric(getParam(object, "phiAX", PLATE)))
1801
+	object <- pr(object, "phiBX", PLATE, as.numeric(getParam(object, "phiBX", PLATE)))
1802
+	object <- pr(object, "corr", PLATE, getParam(object, "corr", PLATE))
1803
+	object <- pr(object, "corrA.BB", PLATE, getParam(object, "corrA.BB", PLATE))
1804
+	object <- pr(object, "corrB.AA", PLATE, getParam(object, "corrB.AA", PLATE))
1805
+	##object <- object[order(chromosome(object), position(object)), ]
1700 1806
 	if(cnOptions[["thresholdCopynumber"]]){
1701 1807
 		object <- thresholdCopynumber(object)
1702 1808
 	}
... ...
@@ -684,7 +684,6 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
684 684
   ##     because this might intefere with other analyses
685 685
   ##     (like what happened to GCRMA)
686 686
   set.seed(seed)
687
-
688 687
   idx <- sort(sample(autosomeIndex, mixtureSampleSize))
689 688
   
690 689
   ##S will hold (A+B)/2 and M will hold A-B
... ...
@@ -800,7 +799,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
800 799
 ## reads in the .idats and genotypes in the one function and removes objects 
801 800
 ## after they have been used
802 801
 crlmmIlluminaV2 = function(sampleSheet=NULL,
803
-			  arrayNames=NULL,
802
+                          arrayNames=NULL,
804 803
 			  ids=NULL,
805 804
 			  path=".",
806 805
 			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
... ...
@@ -127,3 +127,5 @@ fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=1
127 127
  return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2]))
128 128
 }
129 129
 
130
+
131
+
... ...
@@ -183,6 +183,29 @@ isPackageLoaded <- function(pkg){
183 183
 	pkg %in% search()
184 184
 }
185 185
 
186
+paramNames <- function(){
187
+	c("tau2A", "tau2B", "sig2A", "sig2B",
188
+	  "nuA", "nuA.se", "nuB",  "nuB.se", "phiA", "phiB", "phiAX", "phiBX",
189
+	  "phiA.se", "phiB.se", "corr", "corrA.BB", "corrB.AA")
190
+}
191
+
192
+initializeParamObject <- function(dimnames){
193
+	nr <- length(dimnames[[1]])
194
+	nc <- length(dimnames[[2]])		
195
+	ll <- vector("list", 17)
196
+	name <- paramNames()
197
+	if(isPackageLoaded("ff")){
198
+		for(i in 1:17) ll[[i]] <- ff(vmode="double", dim=c(nr, nc), pattern=file.path(ldPath(), name[i]), dimnames=dimnames, overwrite=TRUE)
199
+		names(ll) <- paramNames()
200
+		ll <- do.call(ffdf, ll)
201
+	} else {
202
+		for(i in 1:17) ll[[i]] <- matrix(NA, nr, nc, dimnames=dimnames)
203
+		names(ll) <- paramNames()
204
+	}
205
+	return(ll)
206
+}
207
+
208
+
186 209
 initializeBigMatrix <- function(name, nr, nc, vmode="integer"){
187 210
 	if(isPackageLoaded("ff")){
188 211
 		if(prod(nr, nc) > 2^31){
... ...
@@ -221,3 +244,17 @@ initializeBigMatrix <- function(name, nr, nc, vmode="integer"){
221 244
 }
222 245
 setMethod("annotatedDataFrameFrom", "ff_matrix", Biobase:::annotatedDataFrameFromMatrix)
223 246
 setMethod("annotatedDataFrameFrom", "ffdf", Biobase:::annotatedDataFrameFromMatrix)
247
+
248
+
249
+##annotatedDataFrameFromFF <- function (object, byrow = FALSE, ...){
250
+##    dims <- dim(object)
251
+##    if (is.null(dims) || all(dims == 0)) 
252
+##        annotatedDataFrameFrom(NULL, byrow = byrow, ...)
253
+##    else {
254
+##        N <- if (byrow)  dims[1]       else dims[2]
255
+##        nms <- if (byrow) rownames(object)  else colnames(object)
256
+##        data <- data.frame(numeric(N), row.names = nms)[, FALSE]
257
+##        dimLabels <- if (byrow) c("featureNames", "featureColumns")  else c("sampleNames", "sampleColumns")
258
+##        new("AnnotatedDataFrame", data = data, dimLabels = dimLabels)
259
+##    }
260
+##}