Browse code

updates to genotype, crlmmIlluminaRS, and crlmmCopynumber

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45497 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 25/03/2010 13:01:51
Showing12 changed files

... ...
@@ -478,9 +478,10 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
478 478
 ** a few updates to initializeBigMatrix
479 479
 ** show, [ defined for CNSetLM
480 480
 
481
-2010-03-18 R.Scharpf committed version 1.5.38
481
+2010-03-25 R.Scharpf committed version 1.5.41
482 482
 
483 483
 ** import snpCall, snpCallProbability, snpCall<-, snpCallProbability<-
484 484
    from Biobase
485
-** updates to genotype and crlmmIlluminaRS
485
+** updates to genotype, crlmmIlluminaRS, crlmmCopynumber
486 486
 ** class union of ff_matrix, matrix, and ffdf
487
+
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.39
4
+Version: 1.5.41
5 5
 Date: 2010-02-05
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -54,8 +54,19 @@ importFrom(mvtnorm, dmvnorm)
54 54
 
55 55
 importFrom(ellipse, ellipse)
56 56
 
57
-exportMethods(copyNumber)
58
-export(crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
59
-export(genotype, crlmmIlluminaRS)
57
+exportClasses(CNSetLM)
58
+exportMethods(copyNumber, open, "[", show, lM, "lM<-")
59
+export(crlmm, 
60
+	      crlmmCopynumber, 
61
+	      crlmmIllumina, 
62
+	      crlmmIlluminaRS, 
63
+	      ellipse, 
64
+	      genotype, 
65
+	      getParam, 
66
+	      readIdatFiles, 
67
+	      snprma) 
68
+
69
+export(initializeBigMatrix, initializeParamObject)
70
+
60 71
 
61 72
 
... ...
@@ -4,7 +4,7 @@ setOldClass("ff_matrix")
4 4
 setClassUnion("list_or_ffdf", c("list", "ffdf"))
5 5
 setClassUnion("ff_or_matrix", c("ff_matrix", "matrix", "ffdf"))
6 6
 setClass("CNSetLM", contains="CNSet", representation(lM="list_or_ffdf"))
7
-setMethod("initialize", "CNSetLM", function(.Object, CA=new("matrix"), CB=new("matrix"), lM=new("list"), ...){
8
-	.Object <- callNextMethod(.Object, CA=CA, CB=CB, lM=lM, ...)
9
-	.Object
7
+setMethod("initialize", "CNSetLM", function(.Object, lM=new("list"), ...){
8
+	.Object@lM <- lM
9
+	.Object <- callNextMethod(.Object, ...)
10 10
 })
... ...
@@ -1,6 +1,3 @@
1
-##setGeneric("A<-", function(object, value) standardGeneric("A<-"))
2
-##setGeneric("B<-", function(object, value) standardGeneric("B<-"))
3
-
4 1
 setGeneric("getParam", function(object, name, batch) standardGeneric("getParam"))
5 2
 setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
6 3
 setGeneric("cnNames", function(object) standardGeneric("cnNames"))
... ...
@@ -8,5 +5,6 @@ setGeneric("computeCopynumber", function(object, ...) standardGeneric("computeCo
8 5
 setGeneric("pr", function(object, name, batch, value) standardGeneric("pr"))
9 6
 setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
10 7
 setGeneric("snpNames", function(object) standardGeneric("snpNames"))
11
-##setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome"))
8
+setGeneric("lM", function(object) standardGeneric("lM"))
9
+setGeneric("lM<-", function(object, value) standardGeneric("lM<-"))
12 10
 
... ...
@@ -88,7 +88,6 @@ setMethod("open", "AlleleSet", function(con, ...){
88 88
 	for(i in 1:L) open(eval(substitute(assayData(object)[[NAME]], list(NAME=names[i]))))
89 89
 	return()
90 90
 })
91
-
92 91
 ##setReplaceMethod("calls", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "call", value))
93 92
 ##setReplaceMethod("confs", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "callProbability", value))
94 93
 ##setMethod("confs", "SnpSuperSet", function(object) assayDataElement(object, "callProbability"))
... ...
@@ -284,9 +283,9 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
284 283
 				     sep=sep,
285 284
 				     fileExt=fileExt,
286 285
 				     saveDate=TRUE)
287
-		XY <- RGtoXY(RG, chipType=cdfName)
288
-		rm(RG); gc()
289
-		res <- preprocessInfinium2(XY,
286
+		RG <- RGtoXY(RG, chipType=cdfName)
287
+		protocolData <- protocolData(RG)
288
+		res <- preprocessInfinium2(RG,
290 289
 					   mixtureSampleSize=mixtureSampleSize,
291 290
 					   fitMixture=TRUE,
292 291
 					   verbose=verbose,
... ...
@@ -296,6 +295,7 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
296 295
 					   sns=sns[j],
297 296
 					   stripNorm=stripNorm,
298 297
 					   useTarget=useTarget)
298
+		rm(RG); gc()
299 299
 		## MR: number of rows should be number of SNPs + number of nonpolymorphic markers.
300 300
 		##  Here, I'm just using the # of rows returned from the above function
301 301
 		if(k == 1){
... ...
@@ -305,22 +305,19 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
305 305
 				       alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
306 306
 				       call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
307 307
 				       callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
308
-				       experimentData=experimentData(XY),
309
-				       annotation=annotation(XY))
308
+				       annotation=cdfName)
310 309
 			sampleNames(callSet) <- sns
311 310
 			phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
312 311
 							   arrayNames=sns,
313 312
 							   arrayInfoColNames=arrayInfoColNames)
314
-			pD <- data.frame(matrix(NA, length(sns), 1),
315
-					 row.names=sns)
313
+			pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns)
316 314
 			colnames(pD) <- "ScanDate"
317 315
 			protocolData(callSet) <- new("AnnotatedDataFrame", data=pD)
318
-			pData(protocolData(callSet))[j, ] <- pData(protocolData(XY))
316
+			pData(protocolData(callSet))[j, ] <- pData(protocolData)
319 317
 			featureNames(callSet) <- res[["gns"]]
320 318
 			pData(callSet)$SKW <- rep(NA, length(sns))
321 319
 			pData(callSet)$SNR <- rep(NA, length(sns))
322 320
 			pData(callSet)$gender <- rep(NA, length(sns))
323
-			##sampleNames(callSet) <- sns
324 321
 		}
325 322
 		if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
326 323
 			##RS: I don't understand why the IDATS for the
... ...
@@ -334,7 +331,6 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
334 331
 		suppressWarnings(B(callSet)[snp.index, j] <- res[["B"]])
335 332
 		pData(callSet)$SKW[j] <- res$SKW
336 333
 		pData(callSet)$SNR[j] <- res$SNR
337
-		##mixtureParams[, j] <- res$mixtureParams
338 334
 		mixtureParams <- res$mixtureParams
339 335
 		rm(res); gc()
340 336
 		##MR:  edit snp.index
... ...
@@ -575,8 +571,8 @@ harmonizeDimnamesTo <- function(object1, object2){
575 571
 
576 572
 
577 573
 crlmmCopynumber <- function(object,
578
-			    batch,
579 574
 			    chromosome=1:23,
575
+			    which.batches,
580 576
 			    MIN.SAMPLES=10,
581 577
 			    SNRMin=5,
582 578
 			    MIN.OBS=3,
... ...
@@ -592,36 +588,38 @@ crlmmCopynumber <- function(object,
592 588
 			    MIN.PHI=2^3,
593 589
 			    THR.NU.PHI=TRUE,
594 590
 			    thresholdCopynumber=TRUE){
595
-	which.batch <- cnOptions[["whichbatch"]]
596
-	cnSet <- new("CNSetLM",
597
-		     alleleA=A(object),
598
-		     alleleB=B(object),
599
-		     call=snpCall(object),
600
-		     callProbability=snpCallProbability(object),
601
-		     CA=initializeBigMatrix("CA", nrow(object), ncol(object)),
602
-		     CB=initializeBigMatrix("CB", nrow(object), ncol(object)),
603
-		     annotation=annotation(object),
604
-		     featureData=featureData(object),
605
-		     experimentData=experimentData(object),
606
-		     phenoData=phenoData(object))
607
-	lM(cnSet) <- initializeParamObject(list(featureNames(cnSet), unique(cnSet$batch)))
608
-	rm(object); gc()
609
-	if(any(cnSet$SNR < SNRMin)){
610
-		message("Excluding ", sum(cnSet$SNR < SNRMin), " samples with SNR below ", SNRMin)
611
-		cnSet <- cnSet[, cnSet$SNR > SNRMin]
612
-	}
613
-	batches <- split(1:ncol(cnSet), cnSet$batch)
591
+	stopifnot("batch" %in% varLabels(object))
592
+	stopifnot("chromosome" %in% fvarLabels(object))
593
+	stopifnot("position" %in% fvarLabels(object))
594
+	stopifnot("isSnp" %in% fvarLabels(object))
595
+	batch <- object$batch
596
+	batches <- split((1:ncol(object))[object$SNR > SNRMin], batch[object$SNR > SNRMin])
614 597
 	if(any(sapply(batches, length) < MIN.SAMPLES)) message("Excluding batches with fewer than ", MIN.SAMPLES, " samples")
615 598
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
599
+	if(missing(which.batches)) which.batches <- seq(along=batches)
616 600
 	for(i in chromosome){
617
-		cat("Chromosome ", i, "\n")
601
+		if(verbose) cat("Chromosome ", i, "\n")
618 602
 		if(i >= 24) next()
619
-		for(j in batches){
620
-			row.index <- which(chromosome(cnSet) == i)
621
-			tmp <- cnSet[row.index, j]
622
-			featureData(tmp) <- lm.parameters(tmp)
603
+		ii <- which.batches[1]
604
+		for(j in batches[which.batches]){
605
+			if(verbose) message("Batch ", ii, " of ", length(which.batches))
606
+			row.index <- which(chromosome(object) == i)
607
+			##Note that ffdf assayDataElements are data.frames after subsetting(not matrices)
608
+			ca <- as.matrix(CA(object)[row.index, j])
609
+			cb <- as.matrix(CB(object)[row.index, j])
610
+			dimnames(ca) <- dimnames(cb) <- list(featureNames(object)[row.index], sampleNames(object)[j])
611
+			tmp <- new("CNSet",
612
+				   call=as.matrix(calls(object)[row.index, j]),
613
+				   callProbability=as.matrix(snpCallProbability(object)[row.index, j]),
614
+				   alleleA=as.matrix(A(object)[row.index, j]),
615
+				   alleleB=as.matrix(B(object)[row.index, j]),
616
+				   CA=ca, CB=cb,
617
+				   phenoData=phenoData(object)[j, ],
618
+				   annotation=annotation(object))
619
+			featureData(tmp) <- addFeatureAnnotation(tmp)
620
+			featureData(tmp) <- lm.parameters(tmp, batch=unique(batch[j]))
621
+			tmp$batch <- batch[j]
623 622
 			tmp <- computeCopynumber(tmp,
624
-						 SNRMin=SNRMin,
625 623
 						 MIN.OBS=MIN.OBS,
626 624
 						 DF.PRIOR=DF.PRIOR,
627 625
 						 bias.adj=bias.adj,
... ...
@@ -633,18 +631,24 @@ crlmmCopynumber <- function(object,
633 631
 						 nHOM.THR=nHOM.THR,
634 632
 						 MIN.NU=MIN.NU,
635 633
 						 MIN.PHI=MIN.PHI,
634
+						 THR.NU.PHI=THR.NU.PHI,
636 635
 						 thresholdCopynumber=thresholdCopynumber)
637 636
 			fData(tmp) <- fData(tmp)[, -(1:3)]
638
-			CA(cnSet)[row.index, j] <- tmp@assayData[["CA"]]
639
-			CB(cnSet)[row.index, j] <- tmp@assayData[["CB"]]
637
+			CA(tmp) <- matrix(as.integer(CA(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp),
638
+					  dimnames=list(featureNames(tmp), sampleNames(tmp)))
639
+			CB(tmp) <- matrix(as.integer(CB(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp),
640
+					  dimnames=list(featureNames(tmp), sampleNames(tmp)))
641
+			CA(object)[row.index, j] <- CA(tmp)
642
+			CB(object)[row.index, j] <- CB(tmp)
640 643
 			labels.asis <- fvarLabels(tmp)
641
-			labels.asis <- gsub("_", ".", labels.asis)
642
-			k <- match(labels.asis, colnames(lM(cnSet)))
643
-			lM(cnSet)[row.index, k] <- fData(tmp)
644
+			labels.asis <- gsub(paste("_", unique(tmp$batch), sep=""), paste(".", ii, sep=""), labels.asis)
645
+			k <- match(labels.asis, colnames(lM(object)))
646
+			lM(object)[row.index, k] <- fData(tmp)
644 647
 			rm(tmp); gc()
648
+			ii <- ii+1
645 649
 		}
646 650
 	}
647
-	return(cnSet)
651
+	return(object)
648 652
 }
649 653
 
650 654
 
... ...
@@ -941,9 +945,9 @@ cnOptions <- function(
941 945
 }
942 946
 
943 947
 ##linear model parameters
944
-lm.parameters <- function(object, cnOptions){
948
+lm.parameters <- function(object, batch){##cnOptions){
945 949
 	fD <- fData(object)
946
-	batch <- object$batch
950
+	##batch <- object$batch
947 951
 	uplate <- unique(batch)
948 952
 	parameterNames <- c(paste("tau2A", uplate, sep="_"),
949 953
 			    paste("tau2B", uplate, sep="_"),
... ...
@@ -1105,7 +1109,7 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
1105 1109
 			##Assign values to object
1106 1110
 			object <- pr(object, "nuA", batch, nuA)
1107 1111
 			object <- pr(object, "phiA", batch, phiA)			
1108
-			if(verbose) message("Thresholding nu and phi")
1112
+			##if(verbose) message("Thresholding nu and phi")
1109 1113
 			object <- thresholdModelParams(object, cnOptions)
1110 1114
 		} else {
1111 1115
 			object <- pr(object, "nuA", batch, nuA)		
... ...
@@ -1126,7 +1130,7 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
1126 1130
 			##Assign values to object
1127 1131
 			object <- pr(object, "nuA", batch, nuA)
1128 1132
 			object <- pr(object, "phiA", batch, phiA)			
1129
-			if(verbose) message("Thresholding nu and phi")
1133
+			##if(verbose) message("Thresholding nu and phi")
1130 1134
 			object <- thresholdModelParams(object, cnOptions)
1131 1135
 			##reassign values (now thresholded at MIN.NU and MIN.PHI
1132 1136
 			nuA <- getParam(object, "nuA", batch)
... ...
@@ -1153,10 +1157,9 @@ withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
1153 1157
 	vA <- tmp.objects[["vA"]]
1154 1158
 	vB <- tmp.objects[["vB"]]
1155 1159
 	Ns <- tmp.objects[["Ns"]]
1156
-	G <- snpCallProbability(object) 
1160
+	G <- snpCall(object) 
1157 1161
 	GT.CONF.THR <- cnOptions$GT.CONF.THR
1158 1162
 	CHR <- unique(chromosome(object))
1159
-
1160 1163
 	A <- A(object)
1161 1164
 	B <- B(object)
1162 1165
 ##	highConf <- (1-exp(-confs(object)/1000)) > GT.CONF.THR
... ...
@@ -1276,14 +1279,14 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1276 1279
 		muA[index[[j]], j+2] <- mus[, 1]
1277 1280
 		muB[index[[j]], j+2] <- mus[, 2]
1278 1281
 	}
1279
-	nobsA <- Ns[, "A"] > 10
1280
-	nobsB <- Ns[, "B"] > 10
1282
+	nobsA <- Ns[, "A"] > MIN.OBS
1283
+	nobsB <- Ns[, "B"] > MIN.OBS
1281 1284
 	notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"]))
1282 1285
 	complete <- list()
1283 1286
 	complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
1284 1287
 	complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here	
1285 1288
 	size <- min(5000, length(complete[[1]]))
1286
-	if(size == 5000) complete <- lapply(complete, function(x) sample(x, size))
1289
+	if(size > 5000) complete <- lapply(complete, function(x) sample(x, size))
1287 1290
 	if(CHR == 23){
1288 1291
 		index <- list()
1289 1292
 		index[[1]] <- which(Ns[, "A"] == 0)
... ...
@@ -1821,13 +1824,13 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1821 1824
 	THR.NU.PHI <- cnOptions$THR.NU.PHI
1822 1825
 	if(THR.NU.PHI){
1823 1826
 		verbose <- cnOptions$verbose
1824
-		if(verbose) message("Thresholding nu and phi")
1827
+		##if(verbose) message("Thresholding nu and phi")
1825 1828
 		object <- thresholdModelParams(object, cnOptions)
1826 1829
 	}		
1827
-	if(verbose) message("\nAllele specific copy number")	
1830
+	##if(verbose) message("\nAllele specific copy number")	
1828 1831
 	object <- polymorphic(object, cnOptions, tmp.objects)
1829 1832
 	if(any(!isSnp(object))){ ## there are nonpolymorphic probes
1830
-		if(verbose) message("\nCopy number for nonpolymorphic probes...")	
1833
+		##if(verbose) message("\nCopy number for nonpolymorphic probes...")	
1831 1834
 		object <- nonpolymorphic(object, cnOptions, tmp.objects)
1832 1835
 	}
1833 1836
 	##---------------------------------------------------------------------------
... ...
@@ -14,32 +14,28 @@ setMethod("[", "CNSetLM", function(x, i, j, ..., drop=FALSE){
14 14
 	}
15 15
 	x
16 16
 })
17
-setGeneric("lM", function(object) standardGeneric("lM"))
18
-setGeneric("lM<-", function(object, value) standardGeneric("lM<-"))
19 17
 setMethod("lM", "CNSetLM", function(object) object@lM)
20
-##setMethod("linearModelParam", "AffymetrixCNSet", function(object) object@linearModelParam)
21 18
 setReplaceMethod("lM", c("CNSetLM", "list_or_ffdf"), function(object, value){
22 19
 	object@lM <- value
23 20
 	object
24 21
 })
25 22
 
26
-##setAs("SnpSuperSet", "CNSet",
27
-##      function(from, to){
28
-##	      CA <- CB <- matrix(NA, nrow(from), ncol(from))
29
-##	      dimnames(CA) <- dimnames(CB) <- list(featureNames(from), sampleNames(from))		  
30
-##	      new("CNSet",
31
-##		  call=calls(from),
32
-##		  callProbability=assayData(from)[["callProbability"]],  ##confs(from) returns 1-exp(-x/1000)
33
-##		  alleleA=A(from),
34
-##		  alleleB=B(from),
35
-##		  CA=CA,
36
-##		  CB=CB,
37
-##		  phenoData=phenoData(from),
38
-##		  experimentData=experimentData(from),
39
-##		  annotation=annotation(from),
40
-##		  protocolData=protocolData(from),
41
-##		  featureData=featureData(from))
42
-##      })
23
+setAs("SnpSuperSet", "CNSetLM", function(from, to){
24
+	stopifnot("batch" %in% varLabels(from))
25
+	cnSet <- new("CNSetLM",
26
+		     alleleA=A(from),
27
+		     alleleB=B(from),
28
+		     call=snpCall(from),
29
+		     callProbability=snpCallProbability(from),
30
+		     CA=initializeBigMatrix("CA", nrow(from), ncol(from)),
31
+		     CB=initializeBigMatrix("CB", nrow(from), ncol(from)),
32
+		     annotation=annotation(from),
33
+		     featureData=featureData(from),
34
+		     experimentData=experimentData(from),
35
+		     phenoData=phenoData(from))
36
+	lM(cnSet) <- initializeParamObject(list(featureNames(cnSet), unique(from$batch)))
37
+	return(cnSet)
38
+})
43 39
 
44 40
 setMethod("computeCopynumber", "CNSet",
45 41
 	  function(object,
... ...
@@ -54,24 +50,24 @@ setMethod("computeCopynumber", "CNSet",
54 50
 		   nHOM.THR,
55 51
 		   MIN.NU,
56 52
 		   MIN.PHI,
53
+		   THR.NU.PHI,
57 54
 		   thresholdCopynumber){
58 55
 	## to do the bias adjustment, initial estimates of the parameters are needed
59 56
 	##  The initial estimates are gotten by running computeCopynumber with cnOptions[["bias.adj"]]=FALSE
60
-
61 57
 		  cnOptions <- list(
62
-				    DF.PRIOR=DF.PRIOR,
63 58
 				    MIN.OBS=MIN.OBS,
64
-				    GT.CONF.THR=GT.CONF.THR,
59
+				    DF.PRIOR=DF.PRIOR,
65 60
 				    bias.adj=bias.adj,
66 61
 				    prior.prob=prior.prob,
67 62
 				    seed=seed,
68 63
 				    verbose=verbose,
64
+				    GT.CONF.THR=GT.CONF.THR,
69 65
 				    PHI.THR=PHI.THR,
70 66
 				    nHOM.THR=nHOM.THR,
71 67
 				    MIN.NU=MIN.NU,
72 68
 				    MIN.PHI=MIN.PHI,
73 69
 				    THR.NU.PHI=THR.NU.PHI,
74
-				    thresholdCopynumber=thresholdCopynumber)		  
70
+				    thresholdCopynumber=thresholdCopynumber)
75 71
 	bias.adj <- cnOptions[["bias.adj"]]
76 72
 	if(bias.adj & all(is.na(CA(object)))){
77 73
 		cnOptions[["bias.adj"]] <- FALSE
... ...
@@ -85,72 +81,6 @@ setMethod("computeCopynumber", "CNSet",
85 81
 	object
86 82
 })
87 83
 
88
-##setMethod("computeCopynumber", "character", function(object, cnOptions){
89
-##	crlmmFile <- object
90
-##	isCNSet <- length(grep("cnSet", crlmmFile[1])) > 0
91
-##	for(i in seq(along=crlmmFile)){
92
-##		cat("Processing ", crlmmFile[i], "...\n")
93
-##		load(crlmmFile[i])
94
-##		if(isCNSet){
95
-##			object <- get("cnSet")
96
-##		} else {
97
-##			object <- get("callSetPlus")
98
-##		}
99
-##		CHR <- unique(chromosome(object))
100
-##		##if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.")		
101
-##		if(all(CHR==24)){
102
-##			message("skipping chromosome 24")
103
-##			next()
104
-##		}
105
-##		cat("----------------------------------------------------------------------------\n")
106
-##		cat("-        Estimating copy number for chromosome", CHR, "\n")
107
-##		cat("----------------------------------------------------------------------------\n")
108
-##		cnSet <- computeCopynumber(object, cnOptions)
109
-##		save(cnSet, file=file.path(dirname(crlmmFile), paste("cnSet_", CHR, ".rda", sep="")))
110
-##		if(!isCNSet) if(cnOptions[["unlink"]]) unlink(crlmmFile[i])
111
-##		rm(object, cnSet); gc();
112
-##	}	
113
-##})
114
-
115
-
116
-
117
-
118
-
119
-##setMethod("computeHmm", "SnpSuperSet", function(object, hmmOptions){
120
-##	cnSet <- computeCopynumber(object, hmmOptions)
121
-##	computeHmm(cnSet, hmmOptions)
122
-##})
123
-
124
-## Genotype everything to get callSetPlus objects
125
-## Go from callSets to Segments sets, writing only the segment set to file
126
-## Safe, but very inefficient. Writes the quantile normalized data to file several times...
127
-##setMethod("computeHmm", "character", function(object, hmmOptions){
128
-##	outdir <- cnOptions[["outdir"]]
129
-##	hmmOptions <- hmmOptions[["hmmOpts"]]
130
-##	filenames <- object
131
-##	for(i in seq(along=filenames)){
132
-##		chrom <- gsub(".rda", "", strsplit(filenames[i], "_")[[1]][[2]])
133
-##		if(hmmOptions[["verbose"]])
134
-##			message("Fitting HMM to chromosome ", chrom)
135
-##		if(file.exists(filenames[i])){
136
-##			message("Loading ", filenames[i])
137
-##			load(filenames[i])
138
-##			cnSet <- get("cnSet")
139
-##		} else {
140
-##			stop("File ", filenames[i], " does not exist.")
141
-##		}
142
-##		hmmOptions$emission <- computeEmission(filenames[i], hmmOptions)
143
-##		cnSet <- computeHmm(cnSet, hmmOptions)
144
-##		##MIN.MARKERS <- hmmOptions[["MIN.MARKERS"]]
145
-##		##segmentSet <- segments[segments$nprobes >= MIN.MARKERS, ]
146
-##		message("Saving ", file.path(outdir, paste("cnSet_", chrom, ".rda", sep="")))
147
-##		save(cnSet,
148
-##		     file=file.path(outdir, paste("cnSet_", chrom, ".rda", sep="")))
149
-##		unlink(file.path(outdir, paste("cnSet_", chrom, ".rda", sep="")))
150
-##	}
151
-##	fns <- list.files(outdir, pattern="cnSet", full.names=TRUE)
152
-##	return(fns)	
153
-##})
154 84
 
155 85
 setMethod("copyNumber", "CNSet", function(object){
156 86
 	I <- isSnp(object)
... ...
@@ -195,7 +195,7 @@ initializeParamObject <- function(dimnames){
195 195
 	ll <- vector("list", 17)
196 196
 	name <- paramNames()
197 197
 	if(isPackageLoaded("ff")){
198
-		for(i in 1:17) ll[[i]] <- ff(vmode="double", dim=c(nr, nc), pattern=file.path(ldPath(), name[i]), dimnames=dimnames, overwrite=TRUE)
198
+		for(i in 1:17) ll[[i]] <- createFF(name=name[i], dim=c(nr, nc), vmode="double")            ##ff(vmode="double", dim=c(nr, nc), pattern=file.path(ldPath(), name[i]), dimnames=dimnames, overwrite=TRUE)
199 199
 		names(ll) <- paramNames()
200 200
 		ll <- do.call(ffdf, ll)
201 201
 	} else {
202 202
deleted file mode 100644
... ...
@@ -1,31 +0,0 @@
1
-\name{AlleleSet-methods}
2
-\docType{methods}
3
-\alias{A}
4
-\alias{A,AlleleSet-method}
5
-\alias{A<-}
6
-\alias{A<-,AlleleSet,matrix-method}
7
-\alias{B}
8
-\alias{B,AlleleSet-method}
9
-\alias{B<-}
10
-\alias{B<-,AlleleSet,matrix-method}
11
-\title{Indicator for polymorphic probes}
12
-\description{
13
-  
14
-  This functions uses the annotation slot of the AlleleSet object to load
15
-  the corresponding annotation package and determine whether each probe
16
-  in the object interrogates a polymorphic or nonpolymorphic allele. For
17
-  instance, in the Affy 6.0 platform roughly 900,000 of the 1.8 million
18
-  markers are for polymorphic alleles.
19
-  
20
-}
21
-\usage{
22
-A(object)
23
-B(object)
24
-}
25
-\arguments{
26
-  \item{object}{AlleleSet object}
27
-}
28
-\value{
29
-	matrix of normalized intensities
30
-}
31
-\keyword{manip}
32 0
deleted file mode 100644
... ...
@@ -1,298 +0,0 @@
1
-\name{cnOptions}
2
-\alias{cnOptions}
3
-\title{
4
-	Options for copy number estimation
5
-}
6
-\description{
7
-	This function returns all the user-modifiable arguments to the crlmm copy number function.
8
-}
9
-\usage{
10
-    cnOptions(outdir = "./", cdfName, crlmmFile = "snpsetObject.rda", intensityFile = "normalizedIntensities.rda", rgFile = "rgFile.rda", save.it = TRUE, save.cnset = TRUE, load.it = TRUE, splitByChr = TRUE, MIN.OBS = 3, MIN.SAMPLES = 10, batch = NULL, DF.PRIOR = 50, bias.adj = FALSE, prior.prob = rep(1/4, 4), SNRmin = 4, chromosome = 1:24, seed = 123, verbose = TRUE, GT.CONF.THR = 0.99, PHI.THR = 2^6, nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3, THR.NU.PHI = TRUE, thresholdCopynumber = TRUE, unlink = TRUE, ...)
11
-}
12
-\arguments{
13
-  \item{outdir}{
14
-	Path to store output from genotyping / copy number algorithms
15
-}
16
-  \item{cdfName}{
17
-        Character string indicating array-type.  See crlmm:::validCdfNames().
18
-}
19
-  \item{crlmmFile}{
20
-
21
-        When \code{save.it} is \code{TRUE}, the output from the crlmm
22
-        genotyping will be saved to \code{<outdir>/<crlmmFile>}.  This
23
-        object should not be directly loaded by the user.  When
24
-        \code{load.it} is \code{TRUE}, the function
25
-        \code{crlmmCopynumber} will load this object from
26
-        \code{outdir} without rerunning the quantile normalization and
27
-        genotyping steps. 
28
-
29
-}
30
-
31
-  \item{intensityFile}{
32
-
33
-        When \code{save.it} is \code{TRUE}, the allele summaries of
34
-        the quantile normalized intensities at polymorphic loci will
35
-        be saved to \code{<outdir>/<intensityFile>}.  This object is
36
-        not intended to be loaded directly by the user.  When
37
-        \code{load.it} is \code{TRUE}, the function
38
-        \code{crlmmCopynumber} will load this object from
39
-        \code{outdir} without rerunning the quantile normalization and
40
-        genotyping steps.
41
-       
42
-}
43
-
44
-  \item{rgFile}{
45
-	
46
-	For Affymetrix platforms, \code{rgFile} is ignored.  When
47
-        \code{save.it} is \code{TRUE}, the R and G summaries of the
48
-        quantile normalized intensities for illumina platforms is
49
-        saved to \code{<outdir>/<rgFile>}.  This object is not
50
-        intended to be loaded directly by the user.  When
51
-        \code{load.it} is \code{TRUE}, the function
52
-        \code{crlmmCopynumber} will load this object from
53
-        \code{outdir} without rerunning the quantile normalization and
54
-        genotyping steps.
55
-
56
-}
57
-
58
-  \item{save.it}{
59
-  
60
-  When \code{TRUE}, intermediate files containing quantile-normalized
61
-  intensities and genotyping results are saved to \code{outdir}.
62
-  Saving these objects can save time if the copy number estimation is
63
-  repeated.  Intermediate files for the Affymetrix platform are
64
-  \code{intensityFile} and \code{crlmmFile}; for the Illumina
65
-  platform, \code{rgfile} is an intermediate file.
66
-  
67
-}
68
-
69
-  \item{save.cnset}{
70
-
71
-  This argument is ignored for the processing of more than one
72
-  chromosome (e.g., \code{length(chromosome) > 1}).  If \code{TRUE},
73
-  results from the copy number estimation for each chromosome is saved
74
-  to a file.  The format of the saved file is:
75
-
76
-  \code{<outdir>/cnSet_<chromosome>.rda}
77
-
78
-}
79
-
80
-  \item{load.it}{
81
-
82
-  If \code{TRUE}, intermediate files are loaded from \code{outdir}.
83
-  See \code{rgFile}, \code{intensityFile}, \code{crlmmFile}.
84
-
85
-}
86
-
87
-  \item{splitByChr}{
88
-  
89
-  If \code{TRUE}, results are saved for each chromosome.  In general,
90
-  this should always be \code{TRUE}.
91
-
92
-}
93
-
94
-  \item{MIN.OBS}{
95
-
96
-  For genotypes with fewer than \code{MIN.OBS}, the within-genotype
97
-  median is imputed from the observed within-genotype mediants at that
98
-  loci.  The parameters used in the regression are estimated from
99
-  polymorphic loci where the genotypes AA, AB, and BB each have a
100
-  frequency greater than \code{MIN.OBS}.
101
-  
102
-}
103
-
104
-  \item{MIN.SAMPLES}{
105
-
106
-  The minimum number of samples required for each batch. Batches
107
-  wither fewer than \code{MIN.SAMPLES} are skipped.
108
-
109
-}
110
-
111
-  \item{batch}{
112
-
113
-  Character string or factor denoting the batch for each file to be
114
-  processed.  The length of this argument should be the same as the
115
-  number of files. The batch covariate is a surrogate for experimental
116
-  conditions that change over calendar time. Typically, batch can be
117
-  denoted by the 96 well chemistry plate or the month / year.  
118
-
119
-}
120
-
121
-  \item{DF.PRIOR}{
122
-
123
-  The 2 x 2 covariance matrix of the background and signal variances
124
-  is estimated from the data at each locus.  This matrix is then
125
-  smoothed towards a common matrix estimated from all of the loci.
126
-  DF.PRIOR controls the amount of smoothing towards the common matrix,
127
-  with higher values corresponding to greater smoothing.  Currently,
128
-  DF.PRIOR is not estimated from the data.  Future versions may
129
-  estimate DF.PRIOR empirically.
130
-
131
-}
132
-
133
-  \item{bias.adj}{
134
-
135
-  If \code{TRUE}, initial estimates of the linear model are updated
136
-  after excluding samples that have a low posterior probability of
137
-  normal copy number.  Excluding samples that have a low posterior
138
-  probability can be helpful at loci in which a substantial fraction
139
-  of the samples have a copy number alteration.  For additional
140
-  information, see Scharpf et al., 2009.
141
-
142
-}
143
-
144
-  \item{prior.prob}{
145
-
146
-  A numerical vector providing prior probabilities for copy number
147
-  states corresponding to homozygous deletion, hemizygous deletion,
148
-  normal copy number, and amplification, respectively.
149
-
150
-}
151
-
152
-  \item{SNRmin}{
153
-
154
-  The signal to noise ratio (SNR) estimated during the CRLMM
155
-  genotyping is a summary measure of sample quality based on the
156
-  separation of the genotype clusters.  Smaller values of the SNR
157
-  correspond to samples of lower quality.  Samples are excluded from
158
-  the copy number estimation step if SNR values are less than this
159
-  value. A SNR less than 5 for the Affymetrix platform generally
160
-  corresponds to low quality.  For the Illumina platform, we have
161
-  observed samples with poor quality with SNR < 32.  More specific
162
-  recommendations for Illumina are being evaluated.
163
-
164
-}
165
-
166
-  \item{chromosome}{
167
-
168
-  The chromosome(s) to estimate copy number.  Valid entries are 1-23,
169
-  where 23 corresponds to chromosome X.  Quantile normalization and
170
-  genotyping are performed for all SNPs and nonpolymorphic features,
171
-  irrespective of the \code{chromosome} argument.
172
-
173
-}
174
-
175
-  \item{seed}{
176
-
177
-  Seed for random number generation (integer).  Used only for reproducibility.
178
-
179
-}
180
-
181
-  \item{verbose}{
182
-
183
-  If \code{TRUE}, verbose output.
184
-
185
-}
186
-
187
-  \item{GT.CONF.THR}{
188
-
189
-    Confidence threshold for genotype calls (0, 1).  Calls with
190
-    confidence scores below this theshold are not used to estimate the
191
-    within-genotype medians.
192
-
193
-}
194
-
195
-  \item{PHI.THR}{
196
-
197
-    SNPs with slopes (phi values) below this value are flagged.
198
-    Flagged SNPs are not used in a regression to impute background and
199
-    slope coefficients at nonpolymorphic loci.
200
-
201
-}
202
-
203
-  \item{nHOM.THR}{
204
-
205
-  If fewer than \code{nHOM.THR} homozygous genotypes (AA or BB) are
206
-    observed, the SNPs is flagged.  Flagged SNPs are not used in a
207
-    regression to impute background and slope coefficients at
208
-    nonpolymorphic loci.
209
-
210
-}
211
-
212
-  \item{MIN.NU}{
213
-
214
-  Minimum value for the estimate of background in the linear model.
215
-  Negative values are permissible from the estimation, but not
216
-  plausible.  Ignored if \code{THR.NU.PHI} is \code{FALSE}.
217
-
218
-}
219
-
220
-  \item{MIN.PHI}{
221
-
222
-  Minimum value for the estimate of signal (phi) in the linear model.
223
-  Negative values are permissible from the estimation, but not
224
-  plausible.   Ignored if \code{THR.NU.PHI} is \code{FALSE}.
225
-
226
-}
227
-
228
-  \item{THR.NU.PHI}{
229
-
230
-  If \code{THR.NU.PHI} is \code{FALSE}, \code{MIN.NU} and
231
-  \code{MIN.PHI} are ignored.
232
-
233
-}
234
-
235
-  \item{thresholdCopynumber}{
236
-
237
-  If \code{TRUE}, allele-specific number estimates are truncated.
238
-  Values less than 0.05 are assigned the value 0.05; values exceeding
239
-  5 are assigned the value 5.  
240
-
241
-}
242
-
243
-  \item{unlink}{
244
-
245
-  Whether to remove intermediate files storing the quantile normalized
246
-  intensities.  Ignored if \code{save.it} is \code{FALSE}.
247
-
248
-}
249
-
250
-  \item{\dots}{
251
-
252
-  Additional arguments are passed to \code{readIdatFiles} (Illumina
253
-  platforms only).
254
-
255
-}
256
-}
257
-\details{
258
-
259
-	The minimum required arguments when calling this function are
260
-	\code{cdfName} and \code{batch}.  The user will generally want
261
-	to assign a valid path to \code{outdir} that specifies where
262
-	the intermediate files and processed data are saved.
263
-	
264
-}
265
-
266
-\value{
267
-
268
-	In general, nothing is returned and all results are saved to
269
-	\code{outdir}.
270
-
271
-}
272
-
273
-
274
-\references{
275
-
276
-	RB Scharpf, I Ruczinski, B Carvalho, B Doan, A Chakravarti,
277
-	and R Irizarry (2009), A multilevel model to address batch
278
-	effects in copy number estimation using SNP arrays (Technical
279
-	Report).
280
-
281
-}
282
-
283
-\author{  R. Scharpf}
284
-
285
-
286
-\seealso{
287
-}
288
-\examples{
289
-require(hapmapsnp6)
290
-path <- system.file("celFiles", package="hapmapsnp6")
291
-celfiles <- list.celfiles(path)
292
-## the different populations were run in different batches.  For the
293
-##  files in this package, the batch is indicated by the 13th character
294
-##  in the string
295
-batch <- substr(celfiles, 13, 13)
296
-}
297
-\keyword{manip}
298
-
299 0
new file mode 100644
... ...
@@ -0,0 +1,127 @@
1
+\name{crlmmCopynumber}
2
+\alias{crlmmCopynumber}
3
+\title{Locus- and allele-specific estimation of copy number}
4
+\description{
5
+}
6
+\usage{
7
+crlmmCopynumber(object, batch, chromosome = 1:23, MIN.SAMPLES = 10, SNRMin = 5, MIN.OBS = 3, DF.PRIOR = 50, bias.adj = FALSE, prior.prob = rep(1/4, 4), seed = 1, verbose = TRUE, GT.CONF.THR = 0.99, PHI.THR = 2^6, nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3, THR.NU.PHI = TRUE, thresholdCopynumber = TRUE)
8
+}
9
+\arguments{
10
+  \item{object}{object of class \code{SnpSuperSet}.
11
+}
12
+  \item{batch}{ Character vector with length equal to the number of
13
+  samples.  Used to adjust for batch effects.  Chemistry plate or
14
+  date often work well.  See examples.
15
+}
16
+ \item{chromosome}{Numeric vector indicating which chromosomes to
17
+ process (length <= 23). For chromosome X, use 23. A copy number
18
+ method for chromosome Y is not yet available.
19
+}
20
+  \item{MIN.SAMPLES}{ 'Integer'.  The minimum number of samples in a
21
+  batch.  Bathes with fewer than MIN.SAMPLES are skipped.
22
+}
23
+  \item{SNRMin}{ Samples with low signal to noise ratios are
24
+  excluded.  
25
+}
26
+  \item{MIN.OBS}{ 
27
+
28
+  For genotypes with fewer than \code{MIN.OBS}, the within-genotype
29
+  median is imputed from the observed genotypes.  For example, assume
30
+  at at a given SNP genotypes AA and AB were observed and BB is an
31
+  unobserved genotype.  For SNPs in which all 3 genotypes were
32
+  observed, we fit the model E(mean_BB) = beta0 + beta1*mean_AA +
33
+  beta2*mean_AB, obtaining estimates; of beta0, beta1, and beta2.  The
34
+  imputed mean at the SNP with unobserved BB is then beta0hat +
35
+  beta1hat * mean_AA of beta2hat * mean_AB.
36
+
37
+}
38
+  \item{DF.PRIOR}{
39
+
40
+  The 2 x 2 covariance matrix of the background and signal variances
41
+  is estimated from the data at each locus.  This matrix is then
42
+  smoothed towards a common matrix estimated from all of the loci.
43
+  DF.PRIOR controls the amount of smoothing towards the common matrix,
44
+  with higher values corresponding to greater smoothing.  Currently,
45
+  DF.PRIOR is not estimated from the data.  Future versions may
46
+  estimate DF.PRIOR empirically.
47
+
48
+}
49
+  \item{bias.adj}{ 
50
+
51
+  If \code{TRUE}, initial estimates of the linear model are updated
52
+  after excluding samples that have a low posterior probability of
53
+  normal copy number.  Excluding samples that have a low posterior
54
+  probability can be helpful at loci in which a substantial fraction
55
+  of the samples have a copy number alteration.  For additional
56
+  information, see Scharpf et al., 2009.
57
+
58
+}
59
+  \item{prior.prob}{
60
+
61
+  A numerical vector providing prior probabilities for copy number
62
+  states corresponding to homozygous deletion, hemizygous deletion,
63
+  normal copy number, and amplification, respectively.
64
+
65
+}
66
+  \item{seed}{ Seed for sampling.
67
+}
68
+  \item{verbose}{ Logical. 
69
+}
70
+
71
+  \item{GT.CONF.THR}{ 
72
+
73
+    Confidence threshold for genotype calls (0, 1).  Calls with
74
+    confidence scores below this theshold are not used to estimate the
75
+    within-genotype medians.
76
+
77
+}
78
+
79
+  \item{PHI.THR}{ 
80
+    SNPs with slopes (phi values) below this value are flagged.
81
+    Flagged SNPs are not used in a regression to impute background and
82
+    slope coefficients at nonpolymorphic loci.
83
+}
84
+
85
+  \item{nHOM.THR}{ 
86
+
87
+  If fewer than \code{nHOM.THR} homozygous genotypes (AA or BB) are
88
+    observed, the SNPs is flagged.  Flagged SNPs are not used in a
89
+    regression to impute background and slope coefficients at
90
+    nonpolymorphic loci.
91
+
92
+}
93
+
94
+  \item{MIN.NU}{ numeric. Minimum threshold for background. Ignored if \code{THR.NU.PHI} is \code{FALSE}.
95
+}
96
+  \item{MIN.PHI}{numeric. Minimum threshold for slope. Ignored if \code{THR.NU.PHI} is \code{FALSE}.
97
+}
98
+  \item{THR.NU.PHI}{
99
+  If \code{THR.NU.PHI} is \code{FALSE}, \code{MIN.NU} and
100
+  \code{MIN.PHI} are ignored.
101
+}
102
+  \item{thresholdCopynumber}{
103
+  If \code{TRUE}, allele-specific number estimates are truncated.
104
+  Values less than 0.05 are assigned the value 0.05; values exceeding
105
+  5 are assigned the value 5.  
106
+}
107
+}
108
+\details{
109
+}
110
+\value{
111
+}
112
+\references{
113
+}
114
+\author{R. Scharpf}
115
+\note{}
116
+\seealso{}
117
+\examples{
118
+## data(example.callSet)
119
+## cnSet <- crlmmCopynumber(example.callSet)
120
+## total copy number
121
+## cn <- copyNumber(cnSet)
122
+## allele-specific copy number
123
+## ca <- CA(cnSet) ## A dosage
124
+## cb <- CB(cnSet) ## B dosage
125
+}
126
+% Add one or more standard keywords, see file 'KEYWORDS' in the
127
+% R documentation directory.
... ...
@@ -7,26 +7,27 @@
7 7
 	Preprocessing and genotyping of Affymetrix arrays.	
8 8
 }
9 9
 \usage{
10
-genotype(filenames, cdfName, mixtureSampleSize = 10^5, fitMixture = TRUE, eps = 0.1, verbose = TRUE, seed = 1, sns, copynumber = FALSE, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
10
+genotype(filenames, cdfName, mixtureSampleSize = 10^5, eps = 0.1, verbose = TRUE, seed = 1, sns, copynumber = FALSE, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
11 11
 }
12 12
 \arguments{
13 13
   \item{filenames}{ complete path to CEL files}
14 14
   \item{cdfName}{  annotation package  (see also \code{validCdfNames})}
15
-  \item{mixtureSampleSize}{}
16
-  \item{fitMixture}{}
17
-  \item{eps}{}
15
+  \item{mixtureSampleSize}{    Sample size to be use when fitting the mixture model.}
16
+ \item{eps}{   Stop criteria.}
18 17
   \item{verbose}{  Logical.  Whether to print descriptive messages during processing.}
19
-  \item{seed}{  Integer. Useful for reproducibility}
18
+  \item{seed}{ Seed to be used when sampling. Useful for reproducibility}
20 19
   \item{sns}{The sample identifiers.  If missing, the default sample names are \code{basename(filenames)}}
21 20
   \item{copynumber}{ Whether to quantile normalize the nonpolymorphic probes.  If TRUE, the quantile normalized intensities for nonpolymorphic markers are included in the 'A' matrix.}
22
-  \item{probs}{}
23
-  \item{DF}{}
24
-  \item{SNRMin}{}
25
-  \item{recallMin}{ }
26
-  \item{recallRegMin}{}
27
-  \item{gender}{  integer (  male = 1, female =2 ) or missing.  If missing, the gender is predicted.}
28
-  \item{returnParams}{}
29
-  \item{badSNP}{}
21
+  \item{probs}{'numeric' vector with priors for AA, AB and BB.}
22
+  \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.}
23
+  \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter
24
+  out samples.}
25
+  \item{recallMin}{Minimum number of samples for recalibration. }
26
+  \item{recallRegMin}{Minimum number of SNP's for regression.}
27
+  \item{gender}{  integer vector (  male = 1, female =2 ) or missing,
28
+  with same length as filenames.  If missing, the gender is predicted.}
29
+  \item{returnParams}{'logical'. Return recalibrated parameters from crlmm.}
30
+  \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
30 31
 }
31 32
 \details{
32 33
 }
... ...
@@ -44,10 +45,15 @@ genotype(filenames, cdfName, mixtureSampleSize = 10^5, fitMixture = TRUE, eps =
44 45
 
45 46
 }
46 47
 \author{R. Scharpf}
47
-\note{}
48
+\note{For large datasets, load the 'ff' package prior to genotyping --
49
+this will greatly reduce the RAM required for big jobs.  See
50
+\code{ldPath} and \code{ocSamples}.}
48 51
 
49 52
 \seealso{
50
-	\code{\link{snprma}}, \code{\link{crlmm}}, \code{\link{validCdfNames}}
53
+	\code{\link{snprma}}, \code{\link{crlmm}},
54
+	\code{\link{validCdfNames}},
55
+	\code{\link{oligoClasses}{ocSamples}},
56
+	\code{\link{oligoClasses}{ldOpts}}
51 57
 }
52 58
 \examples{
53 59
 if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){
... ...
@@ -55,7 +61,7 @@ if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){
55 61
   ## the filenames with full path...
56 62
   ## very useful when genotyping samples not in the working directory
57 63
   cels <- list.celfiles(path, full.names=TRUE)
58
-  (crlmmOutput <- genotype(cels))
64
+  (crlmmOutput <- genotype(cels, cdfName="genomewidesnp5"))
59 65
 }
60 66
 }
61 67
 \keyword{ classif }