Browse code

several updates for ff. new classes for affy/illumina processing. More s4-style code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45083 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 08/03/2010 04:46:55
Showing29 changed files

... ...
@@ -413,3 +413,19 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
413 413
 
414 414
 2010-03-05 M. Ritchie - committed version 1.5.29
415 415
  * crlmmIlluminaV2() now exported.  Added man page crlmmIlluminaV2.Rd
416
+
417
+2010-03-07 R. Scharpf committed version 1.5.30
418
+
419
+- one can use ff in conjunction with affy platforms 5.0 and 6.0
420
+
421
+- more s4-style code
422
+
423
+- preprocessing / genotyping is basically the same set of commands with either illumina/affy platforms (though illumina-users may have to play with some of the options for reading idat files
424
+
425
+- if ff package is loaded, the assayData elements are ff objects
426
+
427
+- the classes all inherit from 'CrlmmContainer' that contains an additional slot 'options' and 'genomeAnnotation'.   options is a list with the default arguments to snprma, crlmm, etc, as well as a few global settings such as 'verbose' and 'seed'.  I added the genomeAnnotation slot simply because I want to be able to use ff-objects for the feature-level data.   Maybe with setClassUnion we could avoid adding the genomeAnnotation slot (and use featureData instead), but I didn't have much success with this.
428
+
429
+- the batchSize argument will run the genotyping (crlmmGT) in batches to reduce the RAM.  The default is batches of size 1000.
430
+
431
+- The crlmm.Rd file contains an example with / without ff for Affymetrix data.
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.29
5
-Date: 2010-03-05
4
+Version: 1.5.30
5
+Date: 2010-03-07
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -19,16 +19,21 @@ Imports: affyio (>= 1.15.2),
19 19
          splines,
20 20
          mvtnorm,
21 21
          ellipse,
22
-         SNPchip
23
-Enhances: ff
22
+         SNPchip,ff
24 23
 Suggests: hapmapsnp5,
25 24
           hapmapsnp6,
26 25
           genomewidesnp5Crlmm (>= 1.0.2),
27 26
           genomewidesnp6Crlmm (>= 1.0.2),
28 27
           snpMatrix,
29 28
           metaArray
30
-Collate: AllGenerics.R
29
+Collate: AllClasses.R
30
+	 AllGenerics.R
31
+	 methods-AffymetrixAlleleSet.R
32
+	 methods-IlluminaAlleleSet.R
33
+	 methods-CrlmmContainer.R
31 34
 	 methods-CNSet.R
35
+	 methods-AlleleSet.R
36
+	 methods-CallSet.R
32 37
 	 methods-eSet.R
33 38
          methods-SnpSuperSet.R
34 39
          cnrma-functions.R
... ...
@@ -8,11 +8,12 @@ importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet, SnpSet,
8 8
 
9 9
 importMethodsFrom(Biobase, annotation, "annotation<-",
10 10
                   annotatedDataFrameFrom, assayData, "assayData<-",
11
+		  snpCallProbability,
11 12
                   combine, dims, experimentData, "experimentData<-",
12 13
                   fData, featureData, "featureData<-", featureNames,
13 14
                   fvarMetadata, fvarLabels, pData, phenoData,
14 15
                   "phenoData<-", protocolData, "protocolData<-",
15
-                  pubMedIds, rowMedians, sampleNames, storageMode,
16
+                  pubMedIds, rowMedians, sampleNames, snpCall, storageMode,
16 17
                   "storageMode<-", updateObject, varLabels)
17 18
 
18 19
 importFrom(Biobase, assayDataElement, assayDataElementNames,
... ...
@@ -20,11 +21,16 @@ importFrom(Biobase, assayDataElement, assayDataElementNames,
20 21
            validMsg)
21 22
 
22 23
 ## oligoClasses
23
-importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet)
24
+##importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet)
24 25
 
25
-importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
26
-		  "confs<-", cnConfidence, "cnConfidence<-", isSnp,
27
-		  chromosome, position, CA, "CA<-", CB, "CB<-", A, B)
26
+##S3 method ffdf and class ffdf
27
+importFrom(ff, ffdf, ff, as.ff, as.ffdf)
28
+
29
+
30
+
31
+##importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
32
+##		  "confs<-", cnConfidence, "cnConfidence<-", isSnp,
33
+##		  chromosome, position, CA, "CA<-", CB, "CB<-", A, B)
28 34
 
29 35
 importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles,
30 36
            copyNumber)
... ...
@@ -60,8 +66,24 @@ importFrom(mvtnorm, dmvnorm)
60 66
 ## ellipse
61 67
 importFrom(ellipse, ellipse)
62 68
 
63
-exportMethods(copyNumber)
64
-export(cnOptions, crlmm, crlmmIllumina, crlmmIlluminaV2, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
69
+
70
+##exportClasses(FFSet)
71
+exportMethods(annotatedDataFrameFrom,
72
+	copyNumber, initialize, 
73
+              ##show, "$", "[[", "[", 
74
+	      genomeAnnotation,
75
+	      lM,
76
+	      CA,
77
+	      CB,
78
+	      A,
79
+	      B,
80
+	      snpCall,
81
+	      confs,
82
+	      chromosome,
83
+	      position,
84
+	      isSnp)
85
+export(crlmmOptions, crlmm, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam, validCdfNames)
86
+
65 87
 
66 88
 
67 89
 #############
... ...
@@ -69,10 +91,5 @@ export(cnOptions, crlmm, crlmmIllumina, crlmmIlluminaV2, crlmmCopynumber, ellips
69 91
 #############
70 92
 
71 93
 ##export everything that does not start with a .
72
-##exportPattern("^[^\\.]")
94
+exportPattern("^[^\\.]")
73 95
 
74
-##export(thresholdModelParams, computeCopynumber.CNSet, nuphiAllele, coefs, biasAdjNP, 
75
-##       nonpolymorphic.poe, crlmmWrapper,
76
-##       loadIlluminaCallSet, loadIlluminaRG, loadIlluminaCnrma,
77
-##       cnrma, 
78
-##       crlmmGT, oneBatch)
79 96
new file mode 100644
... ...
@@ -0,0 +1,74 @@
1
+setOldClass("ffdf")
2
+setOldClass("ff_matrix")
3
+setClassUnion("matrix_or_ff", c("matrix", "ff_matrix"))
4
+setClassUnion("list_or_ffdf", c("list", "ffdf"))
5
+setClass("CrlmmContainer", contains="eSet",
6
+	 representation(options="list",
7
+			genomeAnnotation="ANY",
8
+			"VIRTUAL"))
9
+
10
+setMethod("show", "CrlmmContainer", function(object){
11
+	callNextMethod(object)
12
+	cat("options: \n")
13
+	print(names(crlmmOptions(object)))
14
+	cat("\n")
15
+	cat("genomeAnnotation:", nrow(genomeAnnotation(object)), " rows, ", ncol(genomeAnnotation(object)), " columns\n")
16
+	print(genomeAnnotation(object)[1:5, ])
17
+	cat("\n")
18
+})
19
+setClass("AlleleSet", contains="CrlmmContainer")
20
+setClass("CallSet", contains="AlleleSet")
21
+setClass("CNSet", contains="CallSet",
22
+	 representation(lM="list_or_ffdf"))
23
+
24
+setClass("IlluminaRGSet", contains="CrlmmContainer")
25
+setClass("IlluminaXYSet", contains="CrlmmContainer")
26
+
27
+setClass("AffymetrixAlleleSet", contains="AlleleSet")  ##AffymetrixAlleleSet
28
+setClass("IlluminaAlleleSet", contains="AlleleSet")
29
+##setClass("AffymetrixBigData", contains="AffymetrixAlleleSet")
30
+##setClass("AffymetrixSmallData", contains="AffymetrixAlleleSet")
31
+##setClass("IlluminaSmallData", contains="IlluminaAlleleSet")
32
+##setClass("IlluminaBigData", contains="IlluminaAlleleSet")
33
+##setMethod("initialize", "AffymetrixBigData", function(.Object, annotation){
34
+##	.Object <- callNextMethod(.Object)
35
+##	if(!missing(annotation)) annotation(.Object) <- annotation
36
+##	.Object
37
+##})
38
+##setClass("AffymetrixCallSet", contains="CallSet")
39
+##setClass("IlluminaCallSet", contains="CallSet")
40
+setMethod("initialize", "AlleleSet", function(.Object, alleleA=new("matrix"), alleleB=new("matrix"), ...){
41
+	.Object <- callNextMethod(.Object, alleleA=alleleA, alleleB=alleleB, ...)
42
+	storageMode(.Object) <- "environment"
43
+	.Object
44
+})
45
+setMethod("initialize", "CallSet", function(.Object, call=new("matrix"), callProbability=new("matrix"), ...){
46
+	.Object <- callNextMethod(.Object, call=call, callProbability=callProbability, ...)
47
+	storageMode(.Object) <- "environment"
48
+	.Object
49
+})
50
+setMethod("initialize", "CNSet", function(.Object, CA=new("matrix"), CB=new("matrix"), lM=new("list"), ...){
51
+	.Object <- callNextMethod(.Object, CA=CA, CB=CB, lM=lM,...)
52
+	storageMode(.Object) <- "environment"
53
+	.Object
54
+})
55
+setValidity("AlleleSet", function(object) {
56
+	assayDataValidMembers(assayData(object), c("alleleA", "alleleB"))
57
+})
58
+setValidity("IlluminaRGSet", function(object) {
59
+	assayDataValidMembers(assayData(object), c("R", "G", "zero"))
60
+})
61
+setValidity("IlluminaXYSet", function(object) {
62
+	assayDataValidMembers(assayData(object), c("X", "Y", "zero"))
63
+})
64
+
65
+setValidity("CallSet", function(object) {
66
+	assayDataValidMembers(assayData(object), c("alleleA", "alleleB", "call", "callProbability"))
67
+})
68
+setValidity("CNSet", function(object) {
69
+	assayDataValidMembers(assayData(object), c("alleleA", "alleleB", "call", "callProbability", "CA", "CB"))
70
+})
71
+	 
72
+
73
+
74
+
... ...
@@ -1,12 +1,57 @@
1
-##setGeneric("A<-", function(object, value) standardGeneric("A<-"))
2
-##setGeneric("B<-", function(object, value) standardGeneric("B<-"))
1
+setGeneric("A<-", function(object, value) standardGeneric("A<-"))
2
+setGeneric("B<-", function(object, value) standardGeneric("B<-"))
3 3
 
4 4
 setGeneric("getParam", function(object, name, ...) standardGeneric("getParam"))
5 5
 setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
6 6
 setGeneric("cnNames", function(object) standardGeneric("cnNames"))
7
-setGeneric("computeCopynumber", function(object, cnOptions) standardGeneric("computeCopynumber"))
7
+setGeneric("confs", function(object) standardGeneric("confs"))
8
+setGeneric("computeCopynumber", function(object) standardGeneric("computeCopynumber"))
9
+setGeneric("crlmm", function(object, ...) standardGeneric("crlmm"))
10
+setGeneric("crlmmOptions", function(object) standardGeneric("crlmmOptions"))
11
+setGeneric("crlmmOptions<-", function(object, value) standardGeneric("crlmmOptions<-"))
12
+setGeneric("construct", function(object, filenames) standardGeneric("construct"))
13
+
14
+setGeneric("getOptions", function(object) standardGeneric("getOptions"))
15
+
16
+##setGeneric("getA", function(object) standardGeneric("getA"))
17
+##setGeneric("getB", function(object) standardGeneric("getB"))
18
+##setGeneric("getG", function(object) standardGeneric("getG"))
19
+##setGeneric("getR", function(object) standardGeneric("getR"))
20
+##setGeneric("getZero", function(object) standardGeneric("getZero"))
21
+##setGeneric("getCalls", function(object) standardGeneric("getCalls"))
22
+##setGeneric("getConfs", function(object) standardGeneric("getConfs"))
23
+##setGeneric("getCA", function(object) standardGeneric("getCA"))
24
+##setGeneric("getCB", function(object) standardGeneric("getCB"))
25
+setGeneric("getPhenoData", function(object) standardGeneric("getPhenoData"))
26
+setGeneric("getFeatureData", function(object) standardGeneric("getFeatureData"))
27
+setGeneric("getProtocolData", function(object, filenames) standardGeneric("getProtocolData"))
28
+setGeneric("getGenomeAnnotation", function(object, ...) standardGeneric("getGenomeAnnotation"))
29
+setGeneric("getLinearModelParam", function(object, ...) standardGeneric("getLinearModelParam"))
30
+
31
+##setGeneric("initializeStorage", function(object) standardGeneric("initializeStorage")) 
32
+setGeneric("prediction", function(x, ...) standardGeneric("prediction")) 
33
+setGeneric("genomeAnnotation", function(object) standardGeneric("genomeAnnotation"))
34
+setGeneric("genomeAnnotation<-", function(object,value) standardGeneric("genomeAnnotation<-"))
35
+setGeneric("lM", function(object) standardGeneric("lM"))
36
+setGeneric("lM<-", function(object, value) standardGeneric("lM<-"))
37
+
38
+##setGeneric("nFeatures", function(object) standardGeneric("nFeatures"))
39
+
8 40
 setGeneric("pr", function(object, name, batch, value) standardGeneric("pr"))
41
+setGeneric("rma", function(object) standardGeneric("rma"))
42
+setGeneric("snprma", function(object, ...) standardGeneric("snprma")) 
9 43
 setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
10 44
 setGeneric("snpNames", function(object) standardGeneric("snpNames"))
11 45
 ##setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome"))
12 46
 
47
+setGeneric("R", function(object) standardGeneric("R"))
48
+setGeneric("G", function(object) standardGeneric("G"))
49
+setGeneric("Z", function(object) standardGeneric("Z"))
50
+setGeneric("X", function(object) standardGeneric("X"))
51
+setGeneric("Y", function(object) standardGeneric("Y"))
52
+setMethod("R", "IlluminaRGSet", function(object) assayDataElement(object, "R"))
53
+setMethod("G", "IlluminaRGSet", function(object) assayDataElement(object, "G"))
54
+setMethod("Z", "IlluminaRGSet", function(object) assayDataElement(object, "zero"))
55
+setMethod("X", "IlluminaXYSet", function(object) assayDataElement(object, "X"))
56
+setMethod("Y", "IlluminaXYSet", function(object) assayDataElement(object, "Y"))
57
+setMethod("Z", "IlluminaXYSet", function(object) assayDataElement(object, "zero"))
... ...
@@ -153,366 +153,272 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
153 153
 	return(gender)
154 154
 }
155 155
 
156
-##crlmmCopynumber <- function(filenames, cnOptions, object, ...){
157
-##	if(!missing(object)){
158
-##		stopifnot(class(object) == "CNSet")
159
-##		createIntermediateObjects <- FALSE
160
-##	} else {
161
-##		createIntermediateObjects <- TRUE
162
-##		## 33G for 1239 files
163
-##		crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
164
-##		snprmaResult <- crlmmResults[["snprmaResult"]]
165
-##		cnrmaResult <- crlmmResults[["cnrmaResult"]]
166
-##		callSet <- crlmmResults[["callSet"]]
167
-##		rm(crlmmResults); gc()
168
-##		annotation(callSet) <- cnOptions[["cdfName"]]
169
-##		stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]]))
170
-##		path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep=""))	
156
+
157
+##initializeFFObjects <- function(filenames, cnOptions){
158
+##	outdir <- cnOptions[["outdir"]]
159
+##	cdfName <- cnOptions[["cdfName"]]
160
+##	AFile <- cnOptions[["AFile"]]
161
+##	BFile <- cnOptions[["BFile"]]
162
+##	callsFile <- cnOptions[["callsFile"]]
163
+##	confsFile <- cnOptions[["confsFile"]]
164
+##	snprmaFile <- cnOptions[["snprmaFile"]]
165
+##	cnrmaFile <- cnOptions[["cnrmaFile"]]
166
+##	CAFile <- cnOptions[["CAFile"]]
167
+##	CBFile <- cnOptions[["CBFile"]]
168
+##	load.it <- cnOptions[["load.it"]]
169
+##	fileExists <- list(A=file.exists(AFile),
170
+##			   B=file.exists(BFile),
171
+##			   calls=file.exists(callsFile),
172
+##			   confs=file.exists(confsFile),
173
+##			   CA=file.exists(CAFile),
174
+##			   CB=file.exists(CBFile))
175
+##	allExists <- all(unlist(fileExists))
176
+##	##if files already exist, check that the files have the appropriate dimension
177
+##	if(allExists){
178
+##		load(AFile)
179
+##		open(A)
180
+##		sns <- dimnames(A)[[2]]
181
+##		if(!identical(sns, basename(filenames)) | !load.it){
182
+##		## if not of the same dimension, clean up
183
+##			message("Sample names in previously saved objects differ from the filenames. Removing previously saved objects.")
184
+##			delete(A); gc()
185
+##			unlink(AFile)
186
+##			load(BFile); delete(B); unlink(BFile)
187
+##			unlink(snprmaFile)
188
+##			unlink(cnrmaFile)
189
+##			if(file.exists(file.path(outdir, "cnParams.rda"))){
190
+##				load(file.path(outdir, "cnParams.rda"))
191
+##				delete(cnParams); gc()
192
+##				unlink(file.path(outdir, "cnParams.rda"))
193
+##			}
194
+##			load(callsFile); delete(calls); unlink(callsFile)
195
+##			load(confsFile); delete(confs); unlink(confsFile)
196
+##			load(CAFile); delete(CA); unlink(CAFile)
197
+##			load(CBFile); delete(CB); unlink(CBFile)
198
+##			allExists <- FALSE
199
+##		} 
200
+##	}
201
+##	if(!allExists) 	{
202
+##		message("Initializing ff objects for A, B, confs, calls, CA, and CB.")
203
+##		dns <- .dimnames(filenames, cnOptions[["cdfName"]], cnOptions[["verbose"]])
204
+##		fns <- dns[[1]]
205
+##	}
206
+##	if(!file.exists(AFile)) {A <- initializeBigMatrix(dns); save(A, file=AFile); close(A)}
207
+##	if(!file.exists(BFile)) {B <- initializeBigMatrix(dns); save(B, file=BFile); close(B)}
208
+##	if(!file.exists(confsFile)) {confs <- initializeBigMatrix(dns); save(confs, file=confsFile); close(confs)}
209
+##	if(!file.exists(callsFile)) {calls <- initializeBigMatrix(dns); save(calls, file=callsFile); close(calls)}
210
+##	if(!file.exists(CAFile)) {CA <- initializeBigMatrix(dns); save(CA, file=CAFile); close(CA)}
211
+##	if(!file.exists(CBFile)) {CB <- initializeBigMatrix(dns); save(CB, file=CBFile); close(CB)}
212
+##	featureDataFile <- file.path(outdir, "featureDataFF.rda")
213
+##	if(!file.exists(featureDataFile)){
214
+##		path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep=""))
171 215
 ##		load(file.path(path, "snpProbes.rda"))
172 216
 ##		snpProbes <- get("snpProbes")
173 217
 ##		load(file.path(path, "cnProbes.rda"))
174
-##		cnProbes <- get("cnProbes")	
175
-##		k <- grep("chr", colnames(snpProbes))
176
-##		if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
218
+##		cnProbes <- get("cnProbes")
219
+##		message("Initializing featureDataFF.")
220
+##		fvarlabels <- c("chromosome", "position",   "isSnp")
221
+##		M <- matrix(NA, length(fns), 3, dimnames=list(fns, fvarlabels))
222
+##		index <- match(rownames(snpProbes), rownames(M)) #only snp probes in M get assigned position
223
+##		M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))]
224
+##		M[index, "chromosome"] <- snpProbes[, grep("chr", colnames(snpProbes))]
225
+##		M[index, "isSnp"] <- 1L		
226
+##		index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position
227
+##		M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))]
228
+##		M[index, "chromosome"] <- cnProbes[, grep("chr", colnames(cnProbes))]
229
+##		M[index, "isSnp"] <- 0L
230
+##		featureDataFF <- ff(M, dim=c(nrow(M), ncol(M)),
231
+##				    vmode="integer", finalizer="close",
232
+##				    overwrite=TRUE,
233
+##				    dimnames=list(fns, fvarlabels))
234
+##		save(featureDataFF, file=file.path(outdir, "featureDataFF.rda"))
235
+##		close(featureDataFF)
236
+##		rm(M, cnProbes, snpProbes, featureDataFF); gc()
177 237
 ##	}
238
+##	## parameters file
239
+##	parameterFile <- file.path(outdir, "cnParams.rda")
240
+##	if(!file.exists(parameterFile)) {
241
+##		message("Initializing parameters file")
242
+##		batch <- cnOptions[["batch"]]
243
+##		dns.batch <- list(fns, unique(batch)) 
244
+##		cnParams <- initializeParamObject(dns.batch)
245
+##		save(cnParams, file=file.path(outdir, "cnParams.rda"))
246
+##		close(cnParams)
247
+##	}
248
+##}
249
+
250
+##preprocessAndGenotype <- function(filenames, cnOptions, ...){
178 251
 ##	set.seed(cnOptions[["seed"]])  ##for reproducibility
179
-##	cnFile <- cnOptions[["cnFile"]]
180
-##	chromosome <- cnOptions[["chromosome"]]
181
-##	SNRmin <- cnOptions[["SNRmin"]]
252
+##	protocolFile <- cnOptions[["protocolFile"]]
253
+##	cdfName <- cnOptions[["cdfName"]]
254
+##	verbose <- cnOptions[["verbose"]]
255
+##	if(file.exists(protocolFile)){
256
+##		## check that file is the same dimension
257
+##		load(protocolFile)
258
+##		if(!identical(sampleNames(protocoldata), basename(filenames)))
259
+##			unlink(protocolFile)
260
+##	}
261
+##	if(!file.exists(protocolFile)){
262
+##		platform <- whichPlatform(paste(cdfName, "Crlmm", sep=""))
263
+##		if(platform=="affymetrix"){
264
+##			if(verbose) message("Creating protocol file with scan dates for the affy arrays")
265
+##			scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
266
+##			rownames(scanDates) <- basename(rownames(scanDates))
267
+##			protocoldata <- new("AnnotatedDataFrame",
268
+##					    data=scanDates,
269
+##					    varMetadata=data.frame(labelDescription=colnames(scanDates),
270
+##					    row.names=colnames(scanDates)))
271
+##			save(protocoldata, file=protocolFile)
272
+##		}
273
+##		## protocol file for illumina saved during the readIdatFile step
274
+##	} 
275
+##	if(isPackageLoaded("ff")) initializeFFObjects(filenames, cnOptions)
276
+##	crlmmWrapper(filenames, cnOptions, ...)
277
+##	message("Checking for required files...")
278
+##	message(cnOptions[["AFile"]], ": ", file.exists(cnOptions[["AFile"]]))
279
+##	message(cnOptions[["BFile"]], ": ", file.exists(cnOptions[["BFile"]]))
280
+##	message(cnOptions[["callsFile"]], ": ", file.exists(cnOptions[["callsFile"]]))
281
+##	message(cnOptions[["confsFile"]], ": ", file.exists(cnOptions[["confsFile"]]))
282
+##	message(cnOptions[["snprmaFile"]], ": ", file.exists(cnOptions[["snprmaFile"]]))
283
+##	message(cnOptions[["protocolFile"]], ": ", file.exists(cnOptions[["protocolFile"]]))
284
+##}
285
+	
286
+##crlmmCopynumber <- function(cnOptions, ...){
287
+##crlmmCopynumber <- function(object){
288
+##	ops <- crlmmOptions(object)
289
+##	verbose <- ops$verbose
290
+##	calls <- snpCall(object)
291
+##	confs <- confs(object)
292
+##	fns <- featureNames(object)
293
+##	SNRmin <- ops$SNRMin
294
+##	batch <- object$batch
295
+##	whichBatch <- ops$cnOpts$whichBatch
296
+##	chromosome <- ops$cnOpts$chromosome
297
+##	MIN.SAMPLES <- ops$cnOpts$MIN.SAMPLES
298
+##	##k <- grep("chr", colnames(snpProbes))
182 299
 ##	for(CHR in chromosome){
300
+##		##annotated snp and cn probes
301
+##		##snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
302
+##		##cns <- rownames(cnProbes)[cnProbes[, k] == CHR]
303
+##		##where are the annotated snps in the fns file
304
+##		##index.snps <- match(snps, fns)
305
+##		##index.cn <- match(cns, fns)
306
+##		##row.index <- c(index.snps, index.cn)
183 307
 ##		cat("Chromosome ", CHR, "\n")
184
-##		if(createIntermediateObjects){
185
-##			snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
186
-##			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
187
-##			index.snps <- match(snps, featureNames(callSet))
188
-##			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
189
-##			if(!is.null(cnrmaResult)){
190
-##				npI <- cnrmaResult$NP[index.nps, ]
191
-##			} else npI <- NULL
192
-##			snpI <- list(A=snprmaResult$A[index.snps, ],
193
-##				     B=snprmaResult$B[index.snps, ],
194
-##				     sns=snprmaResult$sns,
195
-##				     gns=snprmaResult$gns[index.snps],
196
-##				     SNR=snprmaResult$SNR,
197
-##				     SKW=snprmaResult$SKW,
198
-##				     mixtureParams=snprmaResult$mixtureParams,
199
-##				     cdfName=snprmaResult$cdfName)
200
-##			cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
201
-##			cnSet <- combineIntensities(res=snpI,
202
-##						    NP=npI,
203
-##						    callSet=callSet[index.snps, ])
204
-##			if(any(cnSet$SNR > SNRmin)){
205
-##				message(paste("Excluding samples with SNR < ", SNRmin))
206
-##				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
207
-##			}
208
-##			rm(snpI, npI, snps, cnps, index.snps, index.nps); gc()
209
-##			pData(cnSet)$batch <- cnOptions[["batch"]]
210
-##			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
211
-##		} else {
212
-##			cnSet <- object
213
-##		}
214
-##		if(CHR != 24){
215
-##			## 60G for 1239 files
216
-##			cnSet <- computeCopynumber(cnSet, cnOptions)
217
-##		} else{
218
-##			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
219
-##			alleleSet <- cnSet
220
-##			save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep="")))
221
-##			rm(cnSet, alleleSet); gc()
222
-##			next()
223
-##		}
224
-##		if(length(chromosome) == 1){
225
-##			if(cnOptions[["save.cnset"]]){
226
-##				save(cnSet, file=file.path(paste(cnFile, "_", CHR, ".rda", sep="")))				
227
-##				##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
308
+##		for(i in whichBatch){
309
+##			PLATE <- unique(batch)[i]
310
+##			message("Plate: ", PLATE)
311
+##			sample.index <- which(batch==PLATE)
312
+##			if(length(sample.index) < MIN.SAMPLES) {
313
+##				warning("Plate ", PLATE, " has fewer than 10 samples.  Skipping this plate.")
314
+##				next()
228 315
 ##			}
229
-##		}
230
-##		if(length(chromosome) > 1){
231
-##			message(paste("Saving ", file.path(cnOptions[["outdir"]], paste(cnFile, "_", CHR, ".rda", sep=""))))
232
-##			save(cnSet, file=file.path(paste(cnFile, "_", CHR, ".rda", sep="")))				
233
-##			##save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
234
-##			rm(cnSet); gc()
235
-##		} else {
236
-##			return(cnSet)
237
-##		}
238
-##	}
239
-##	saved.objects <- list.files(cnOptions[["outdir"]], pattern=cnFile, full.names=TRUE)		
240
-##	return(saved.objects)
316
+##			##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
317
+####			if(isPackageLoaded("ff")){
318
+####				ca <- as.matrix(CA[row.index, sample.index])
319
+####				cb <- as.matrix(CB[row.index, sample.index])
320
+####			} else{
321
+####				dns <- dimnames(A[row.index, sample.index])
322
+####				cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index), dimnames=dns)
323
+####			}
324
+####			cnSet <- new("CNSet",
325
+####				     alleleA=as.matrix(A[row.index, sample.index]),
326
+####				     alleleB=as.matrix(B[row.index, sample.index]),
327
+####				     call=as.matrix(calls[row.index, sample.index]),
328
+####				     callProbability=as.matrix(confs[row.index, sample.index]),
329
+####				     CA=ca,
330
+####				     CB=cb,
331
+####				     featureData=annotatedDataFrameFrom(as.matrix(A[row.index, sample.index]), byrow=TRUE),
332
+####				     phenoData=pD[sample.index, ],
333
+####				     protocolData=protocoldata[sample.index, ])
334
+##			##Verify this is correct
335
+####			annotation(cnSet) <- cnOptions[["cdfName"]]
336
+####			featureNames(cnSet) <- fns[row.index]
337
+##			##add chromosome, position, isSnp
338
+####			cnSet <- annotate(cnSet)
339
+####			if(any(cnSet$SNR > SNRmin)){
340
+####				if(CHR == chromosome[1]) message(paste("Excluding samples with SNR < ", SNRmin))
341
+####				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
342
+####			}
343
+####			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
344
+##			if(CHR > 23) next()
345
+##			cnSet <- computeCopynumber(object[chromosome(object) == CHR, sample.index])
346
+####			if(!isPackageLoaded("ff") & i == whichBatch[1]) cnParams <- initializeParamObject(list(featureNames(cnSet), unique(cnOptions[["batch"]])[whichBatch]))
347
+####			if(!isPackageLoaded("ff")) {
348
+####				row.index <- 1:nrow(cnSet)
349
+####			} else {
350
+####				##Warning message:
351
+####				##In d[[1]] * d[[2]] : NAs produced by integer overflow
352
+####				CA[row.index, sample.index] <- cnSet@assayData[["CA"]]
353
+####				CB[row.index, sample.index] <- cnSet@assayData[["CB"]]
354
+####			}
355
+####			cnParams <- updateParams(cnParams, cnSet, row.index, batch=unique(batch)[i])
356
+##			## keep only chromosome, position, and isSnp
357
+####			featureData(cnSet) <- featureData(cnSet)[, 1:3]
358
+####			if(!isPackageLoaded("ff")){
359
+####				save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
360
+####				save(cnParams, file=paste(outdir, "cnParams_", PLATE, "_", CHR, ".rda", sep=""))
361
+####			}
362
+##		} ## end of batch loop
363
+##	} ## end of chromosome loop
364
+####	if(isPackageLoaded("ff")) {
365
+####		close(cnParams)
366
+####		close(A); close(B)		
367
+####		close(CA); close(CB)
368
+####		save(CA, file=CAFile)
369
+####		save(CB, file=CBFile)
370
+####		close(calls); close(confs)
371
+####		return()
372
+####	}
373
+##	return(cnSet)
241 374
 ##}
242 375
 
243 376
 
244 377
 
245
-##crlmmCopynumber <- function(filenames, cnOptions, object, ...){
246
-##	cnOpts <- cnOptions
247
-##	batchSize <- cnOptions[["crlmmBatchSize"]]
248
-##	batch <- cnOptions[["batch"]]
249
-##	outdir <- cnOptions[["outdir"]]
250
-##	##if necessary, split plates in separate batches
251
-##	if(length(filenames) > batchSize){
252
-##		L <- length(table(batch))
253
-##		message("Processing the files in")
254
-##		batchList <- split(names(table(batch)), rep(1:L, each=10, length.out=L))
255
-##		for(i in seq(along=batchList)){
256
-##			index <- as.character(batch) %in% as.character(batchList[[i]])
257
-##			fns <- filenames[index]
258
-##			cnOpts[["batch"]] <- cnOptions[["batch"]][index]
259
-##			cnOpts[["AFile"]] <- paste(outdir, "/", paste(cnOptions[["AFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
260
-##			cnOpts[["BFile"]] <- paste(outdir, "/", paste(cnOptions[["BFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
261
-##			cnOpts[["CAFile"]] <- paste(outdir, "/", paste(cnOptions[["CAFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
262
-##g			cnOpts[["CBFile"]] <- paste(outdir, "/", paste(cnOptions[["CBFile"]], i, sep="_crlmmBatch"), ".rda", sep="")			
263
-##			cnOpts[["callsFile"]] <- paste(outdir, "/", paste(cnOptions[["callsFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
264
-##			cnOpts[["confsFile"]] <- paste(outdir, "/", paste(cnOptions[["confsFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
265
-##			cnOpts[["snprmaFile"]] <- paste(outdir, "/", paste(cnOptions[["snprmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
266
-##			cnOpts[["cnrmaFile"]] <- paste(outdir, "/", paste(cnOptions[["cnrmaFile"]], i, sep="_crlmmBatch"), ".rda", sep="")
267
-##			cnOpts[["cnFile"]] <- file.path(outdir, paste(cnOptions[["cnFile"]], i, sep="_crlmmBatch"))
268
-##			cnOpts[["rgFile"]] <- file.path(outdir, paste(cnOptions[["rgFile"]], i, sep="_crlmmBatch"))
269
-##			crlmmCopynumber.crlmmBatch(fns, cnOpts, object, ...)
270
-##		}
271
-##	} else {
272
-##		cnOpts[["AFile"]] <- paste(outdir, "/", cnOptions[["AFile"]], ".rda", sep="")
273
-##		cnOpts[["BFile"]] <- paste(outdir, "/", cnOptions[["BFile"]], ".rda", sep="")
274
-##		cnOpts[["callsFile"]] <- paste(outdir, "/", cnOptions[["callsFile"]], ".rda", sep="")
275
-##		cnOpts[["confsFile"]] <- paste(outdir, "/", cnOptions[["confsFile"]], ".rda", sep="")
276
-##		cnOpts[["snprmaFile"]] <- paste(outdir, "/",cnOptions[["snprmaFile"]], ".rda", sep="")
277
-##		cnOpts[["cnrmaFile"]] <- paste(outdir, "/", cnOptions[["cnrmaFile"]], ".rda", sep="")
278
-##		cnOpts[["cnFile"]] <- file.path(outdir, cnOptions[["cnFile"]])
279
-##		cnOpts[["rgFile"]] <- file.path(outdir, cnOptions[["rgFile"]])
280
-##		crlmmCopynumber.crlmmBatch(filenames, cnOpts, object, ...)
281
-##	}
282
-##}
283 378
 
284 379
 
285
-crlmmCopynumber <- function(filenames, cnOptions, object, ...){
286
-	set.seed(cnOptions[["seed"]])  ##for reproducibility
287
-	if(!missing(object)){
288
-		stopifnot(class(object) == "CNSet")
289
-		createIntermediateObjects <- FALSE
290
-	} else {
291
-		createIntermediateObjects <- TRUE
292
-		## 33G for 1239 files
293
-		cnOptions[["save.it"]] <- TRUE
294
-		##crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
295
-		crlmmWrapper(filenames, cnOptions, ...)
296
-	}
297
-	verbose <- cnOptions[["verbose"]]
298
-	load.it <- cnOptions[["load.it"]]
299
-	outdir <- cnOptions[["outdir"]]
300
-	snprmaFile <- cnOptions[["snprmaFile"]]
301
-	cnFile <- cnOptions[["cnFile"]]
302
-	cnrmaFile <- cnOptions[["cnrmaFile"]]
303
-	snprmaFile <- cnOptions[["snprmaFile"]]
304
-	callsFile <- cnOptions[["callsFile"]]
305
-	confsFile <- cnOptions[["confsFile"]]
306
-	AFile <- cnOptions[["AFile"]]
307
-	BFile <- cnOptions[["BFile"]]
308
-	CAFile <- cnOptions[["CAFile"]]
309
-	CBFile <- cnOptions[["CBFile"]]
310
-	path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep=""))
311
-	load(file.path(path, "snpProbes.rda"))
312
-	snpProbes <- get("snpProbes")
313
-	load(file.path(path, "cnProbes.rda"))
314
-	cnProbes <- get("cnProbes")	
315
-	k <- grep("chr", colnames(snpProbes))
316
-	if(verbose) message("Loading quantile-normalized intensities...")
317
-	##cwd <- getwd()
318
-	##setwd(dirname(snprmaFile[1]))
319
-	load(snprmaFile)
320
-	res <- get("res")
321
-	load(AFile)
322
-	if(isPackageLoaded("ff")) open(A)
323
-	load(BFile)
324
-	if(isPackageLoaded("ff")) open(B)	
325
-	message("Loading genotype calls...")	
326
-	load(callsFile)
327
-	calls <- get("calls")
328
-	if(isPackageLoaded("ff")) open(calls)	
329
-	load(confsFile)
330
-	confs <- get("confs")
331
-	if(isPackageLoaded("ff")) open(confs)	
332
-	##fns <- rownames(A)
333
-	fns <- c(res[["gns"]], res[["cnnames"]])
334
-	if(length(fns) != nrow(A)) stop("check featurenames. fns should be the rownames of A")
335
-	nr <- nrow(A); nc <- ncol(A)
336
-	if(isPackageLoaded("ff")){
337
-		if(file.exists(CAFile)){
338
-			load(CAFile)
339
-			if(is.null(dim(CA)) | !all(dim(CA) == dim(A))) {
340
-				unlink(CAFile)
341
-				CA <- initializeBigMatrix(nr, nc)
342
-			}
343
-		} else CA <- initializeBigMatrix(nr, nc)
344
-		if(file.exists(CBFile)){
345
-			load(CBFile)
346
-			if(is.null(dim(CB)) | !all(dim(CB) == dim(A))) {			     
347
-				CB <- initializeBigMatrix(nr, nc)
348
-			}
349
-		} else CB <- initializeBigMatrix(nr, nc)
350
-		open(CA)
351
-		open(CB)
352
-	} 
353
-	##cnFile <- cnOptions[["cnFile"]]
354
-	chromosome <- cnOptions[["chromosome"]]
355
-	SNRmin <- cnOptions[["SNRmin"]]
356
-	ll <- 1
357
-	scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
358
-	rownames(scanDates) <- basename(rownames(scanDates))
359
-	protocoldata <- new("AnnotatedDataFrame",
360
-			    data=scanDates,
361
-			    varMetadata=data.frame(labelDescription=colnames(scanDates),
362
-			    row.names=colnames(scanDates)))
363
-	##load(file.path(cnOptions[["outdir"]], "sampleStats.rda"))
364
-	sampleStats <- data.frame(SKW=res$SKW,
365
-				  SNR=res$SNR,
366
-				  mixtureParams=res$mixtureParams,
367
-				  gender=res$gender,
368
-				  batch=cnOptions[["batch"]])
369
-	##sampleStats, batch=cnOptions[["batch"]])
370
-	pD <- new("AnnotatedDataFrame",
371
-		  data=sampleStats,
372
-		  varMetadata=data.frame(labelDescription=colnames(sampleStats)))
373
-	batch <- cnOptions[["batch"]]
374
-	if(isPackageLoaded("ff")){
375
-##		featureDataFF <- ffdf(position=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"),
376
-##				      chromosome=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"),
377
-##				      isSnp=ff(rep(0L, length(fns)), vmode="integer", finalizer="close"),
378
-##				      row.names=fns)
379
-##		fD <- vector("list", length(chromosome))
380
-		featureDataFF <- ff(dim=c(nrow(A), 17*length(unique(batch))+3), vmode="double", finalizer="close",
381
-				    overwrite=TRUE)
382
-	}
383
-	for(CHR in chromosome){
384
-		snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
385
-		cns <- rownames(cnProbes)[cnProbes[, k] == CHR]		
386
-		index.snps <- match(snps, fns)
387
-		index.cn <- match(cns, fns)
388
-		row.index <- c(index.snps, index.cn)
389
-		cat("Chromosome ", CHR, "\n")
390
-		if(isPackageLoaded("ff")){		
391
-			fD.batch <- vector("list", length(unique(batch)))
392
-		}
393
-		for(i in seq(along=unique(batch))){
394
-			PLATE <- unique(batch)[i]
395
-			message("Plate: ", PLATE)
396
-			sample.index <- which(batch==PLATE)
397
-			##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
398
-			if(isPackageLoaded("ff")){
399
-				ca <- CA[row.index, sample.index]
400
-				cb <- CB[row.index, sample.index]
401
-			} else{
402
-				##dns <- dimnames(A[row.index, sample.index])
403
-				cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index))##, dimnames=dns)
404
-			}
405
-			cnSet <- new("CNSet",
406
-				     alleleA=A[row.index, sample.index],
407
-				     alleleB=B[row.index, sample.index],
408
-				     call=calls[row.index, sample.index],
409
-				     callProbability=confs[row.index, sample.index],
410
-				     CA=ca,
411
-				     CB=cb,
412
-				     featureData=annotatedDataFrameFrom(A[row.index, sample.index], byrow=TRUE),
413
-				     phenoData=pD[sample.index, ],
414
-				     protocolData=protocoldata[sample.index, ])
415
-			##Verify this is correct
416
-			annotation(cnSet) <- cnOptions[["cdfName"]]
417
-			featureNames(cnSet) <- fns[row.index]
418
-			##add chromosome, position, isSnp
419
-			cnSet <- annotate(cnSet)
420
-##			if(any(cnSet$SNR > SNRmin)){
421
-##				if(CHR == chromosome[1]) message(paste("Excluding samples with SNR < ", SNRmin))
422
-##				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
423
-##			}
424
-			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
425
-			if(CHR < 24){
426
-				cnSet <- computeCopynumber(cnSet, cnOptions)
427
-				if(isPackageLoaded("ff")){
428
-					if(i == 1) {  ## first batch
429
-						featureDataFF[row.index, 1:20] <- as.matrix(fData(cnSet))
430
-						fcol <- 20
431
-					} else {
432
-						featureDataFF[row.index, (fcol+1):(fcol+17)]
433
-						fcol <- fcol+17
434
-						##remove redundant columns
435
-					}
436
-					CA[row.index, sample.index] <- cnSet@assayData[["CA"]]
437
-					CB[row.index, sample.index] <- cnSet@assayData[["CB"]]
438
-					next() ## next batch
439
-				} else {
440
-					save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
441
-				}
442
-			} else break()  ## break out of the batch loop.  Go to next chromosome
443
-			##		if(length(chromosome) == 1 & length(unique(batch)) == 1){
444
-			##			if(cnOptions[["save.cnset"]] & !isLoadedPackage("ff"))
445
-			##				save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
446
-			##			return(cnSet)
447
-			##		} else {
448
-			##			##either multiple chromosome or multiple batches...save to file if ff is not loaded
449
-			##			if(!isPackageLoaded("ff")){
450
-			##				save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
451
-			##			}
452
-			##			rm(cnSet); gc()
453
-			##	}
454
-		} ## end of batch loop
455
-	} ## end of chromosome loop
456
-	if(isPackageLoaded("ff")) {
457
-		close(A); close(B)
458
-		close(featureDataFF)
459
-		close(CA); close(CB)
460
-		save(featureDataFF, file=file.path(outdir, "featureDataFF.rda"))
461
-		save(CA, file=CAFile)
462
-		save(CB, file=CBFile)
463
-		close(calls); close(confs)
464
-		ffSet <- new("FFSet",
465
-			     A=A,
466
-			     B=B,
467
-			     snpCall=calls,
468
-			     callProbability=confs,
469
-			     CA=CA,
470
-			     CB=CB)
471
-		return(ffSet)
472
-	}
473
-}
474 380
 
475 381
 
476
-loadIlluminaRG <- function(rgFile, crlmmFile, load.it, save.it,...){
477
-##	if(missing(rgFile)){
478
-##		##stop("must specify 'rgFile'.")
479
-##		rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
480
-##		message("rgFile not specified.  Using ", rgFile)
382
+##loadIlluminaRG <- function(rgFile, crlmmFile, load.it, save.it,...){
383
+####	if(missing(rgFile)){
384
+####		##stop("must specify 'rgFile'.")
385
+####		rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
386
+####		message("rgFile not specified.  Using ", rgFile)
387
+####	}
388
+##	if(!load.it){
389
+##		RG <- readIdatFiles(...)
390
+##		if(save.it) save(RG, file=rgFile)
481 391
 ##	}
482
-	if(!load.it){
483
-		RG <- readIdatFiles(...)
484
-		if(save.it) save(RG, file=rgFile)
485
-	}
486
-	if(load.it & !file.exists(rgFile)){
487
-		message("load.it is TRUE, but rgFile not present.  Attempting to read the idatFiles.")
488
-		RG <- readIdatFiles(...)
489
-		if(save.it) save(RG, file=rgFile)
490
-	}
491
-	if(load.it & file.exists(rgFile)){
492
-		message("Loading RG file")
493
-		load(rgFile)
494
-		RG <- get("RG")
495
-	}
496
-	return(RG)
497
-}
498
-
499
-loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdfName){
500
-	if(!file.exists(crlmmFile) | !load.it){		
501
-		callSet <- crlmmIllumina(RG=RG,
502
-					 cdfName=cdfName,
503
-					 sns=sampleNames(RG),
504
-					 returnParams=TRUE,
505
-					 save.it=TRUE,
506
-					 intensityFile=snprmaFile)
507
-		if(save.it) save(callSet, file=crlmmFile)
508
-	} else {
509
-		message("Loading ", crlmmFile, "...")
510
-		load(crlmmFile)
511
-		callSet <- get("callSet")
512
-	}
513
-	protocolData(callSet) <- protocolData(RG)
514
-	return(callSet)
515
-}
392
+##	if(load.it & !file.exists(rgFile)){
393
+##		message("load.it is TRUE, but rgFile not present.  Attempting to read the idatFiles.")
394
+##		RG <- readIdatFiles(...)
395
+##		if(save.it) save(RG, file=rgFile)
396
+##	}
397
+##	if(load.it & file.exists(rgFile)){
398
+##		message("Loading RG file")
399
+##		load(rgFile)
400
+##		RG <- get("RG")
401
+##	}
402
+##	return(RG)
403
+##}
404
+##
405
+##loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdfName){
406
+##	if(!file.exists(crlmmFile) | !load.it){		
407
+##		callSet <- crlmmIllumina(RG=RG,
408
+##					 cdfName=cdfName,
409
+##					 sns=sampleNames(RG),
410
+##					 returnParams=TRUE,
411
+##					 save.it=TRUE,
412
+##					 intensityFile=snprmaFile)
413
+##		if(save.it) save(callSet, file=crlmmFile)
414
+##	} else {
415
+##		message("Loading ", crlmmFile, "...")
416
+##		load(crlmmFile)
417
+##		callSet <- get("callSet")
418
+##	}
419
+##	protocolData(callSet) <- protocolData(RG)
420
+##	return(callSet)
421
+##}
516 422
 
517 423
 
518 424
 ##loadAffyCallSet <- function(filenames, confsFile, callsFile, snprmaFile, load.it, save.it,  cdfName){
... ...
@@ -544,109 +450,121 @@ loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdf
544 450
 ##	return(cnrmaResult)
545 451
 ##}
546 452
 
547
-loadIlluminaCnrma <- function(){
548
-	if(exists("cnAB")){
549
-		np.A <- as.integer(cnAB$A)
550
-		np.B <- as.integer(cnAB$B)
551
-		np <- ifelse(np.A > np.B, np.A, np.B)
552
-		np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
553
-		rownames(np) <- cnAB$gns
554
-		colnames(np) <- cnAB$sns
555
-		cnAB$NP <- np
556
-		##sampleNames(callSet) <- res$sns
557
-		sampleNames(callSet) <- cnAB$sns
558
-		cnrmaResult <- get("cnAB")
559
-	} else cnrmaResult <- NULL
560
-	return(cnrmaResult)
561
-}
562
-
563
-crlmmWrapper <- function(filenames, cnOptions, ...){
564
-	crlmmBatchSize <- cnOptions[["crlmmBatchSize"]]
565
-	cdfName <- cnOptions[["cdfName"]]
566
-	load.it <- cnOptions[["load.it"]]
567
-	save.it <- cnOptions[["save.it"]]
568
-	callsFile <- cnOptions[["callsFile"]]
569
-	confsFile <- cnOptions[["confsFile"]]
570
-	AFile=cnOptions[["AFile"]]
571
-	BFile=cnOptions[["BFile"]]	
572
-	snprmaFile=cnOptions[["snprmaFile"]]
573
-	cnrmaFile=cnOptions[["cnrmaFile"]]
574
-	rgFile=cnOptions[["rgFile"]]
575
-	outdir <- cnOptions[["outdir"]]
576
-	if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required.  See crlmm:::validCdfNames()")
577
-	platform <- whichPlatform(cdfName)
578
-	if(!(platform %in% c("affymetrix", "illumina"))){
579
-		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
580
-	} else {
581
-		message("Checking whether annotation package for the ", platform, " platform is available")
582
-		if(!isValidCdfName(cdfName)){
583
-			cat("FALSE\n")
584
-			stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform)")
585
-		} else cat("TRUE\n")
586
-	}
587
-	if(platform == "illumina") RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it)
588
-	if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.")
589
-	if(!(file.exists(dirname(snprmaFile)))) stop(dirname(snprmaFile), " does not exist.")
590
-##	if(load.it){
591
-##		if(!file.exists(callsFile)){
592
-##			message("load.it is TRUE, but ", callsFile, " does not exist.  Rerunning the genotype calling algorithm") 
593
-##			##load.it <- FALSE
594
-##		}
453
+##loadIlluminaCnrma <- function(){
454
+##	if(exists("cnAB")){
455
+##		np.A <- as.integer(cnAB$A)
456
+##		np.B <- as.integer(cnAB$B)
457
+##		np <- ifelse(np.A > np.B, np.A, np.B)
458
+##		np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
459
+##		rownames(np) <- cnAB$gns
460
+##		colnames(np) <- cnAB$sns
461
+##		cnAB$NP <- np
462
+##		##sampleNames(callSet) <- res$sns
463
+##		sampleNames(callSet) <- cnAB$sns
464
+##		cnrmaResult <- get("cnAB")
465
+##	} else cnrmaResult <- NULL
466
+##	return(cnrmaResult)
467
+##}
468
+##
469
+##crlmmWrapper <- function(filenames, cnOptions, ...){
470
+##	crlmmBatchSize <- cnOptions[["crlmmBatchSize"]]
471
+##	cdfName <- cnOptions[["cdfName"]]
472
+##	load.it <- cnOptions[["load.it"]]
473
+##	save.it <- cnOptions[["save.it"]]
474
+##	callsFile <- cnOptions[["callsFile"]]
475
+##	confsFile <- cnOptions[["confsFile"]]
476
+##	AFile=cnOptions[["AFile"]]
477
+##	BFile=cnOptions[["BFile"]]	
478
+##	snprmaFile=cnOptions[["snprmaFile"]]
479
+##	cnrmaFile=cnOptions[["cnrmaFile"]]
480
+##	rgFile=cnOptions[["rgFile"]]
481
+##	protocolFile <- cnOptions[["protocolFile"]]
482
+##	outdir <- cnOptions[["outdir"]]
483
+##	if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required.  See crlmm:::validCdfNames()")
484
+##	platform <- whichPlatform(cdfName)
485
+##	if(!(platform %in% c("affymetrix", "illumina"))){
486
+##		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
487
+##	} else {
488
+##		if(!isValidCdfName(cdfName)){
489
+##			stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform)")
490
+##		} else  message("Using the annotation package ", cdfName, " for this ", platform, " platform")
595 491
 ##	}
596
-	if(platform == "affymetrix"){
597
-##		if(!file.exists(callsFile) | !load.it){
598
-			crlmm(filenames=filenames,
599
-			      cdfName=cdfName,
600
-			      save.it=TRUE,
601
-			      load.it=load.it,
602
-			      snprmaFile=snprmaFile,
603
-			      callsFile=callsFile,
604
-			      confsFile=confsFile,
605
-			      AFile=AFile,
606
-			      BFile=BFile,
607
-			      crlmmBatchSize=crlmmBatchSize)
608
-##		}
609
-	}
610
-	gc()
611
-	if(platform == "illumina") {
612
-		browser()
613
-		callSet <- loadIlluminaCallSet(callsFile, snprmaFile, RG, load.it, save.it, cdfName)
614
-	}
615
-	if(platform == "affymetrix"){
616
-		if(!file.exists(cnrmaFile) | !load.it){	
617
-			message("Quantile normalizing the copy number probes...")
618
-			## updates A matrix and saves cnrmaFile
619
-			cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir, cnrmaFile=cnrmaFile, AFile=AFile)
620
-		} 
621
-	}
622
-##	if(!is.null(cnrmaResult)){
623
-##		for(CHR in chromosome){
624
-##			cat(CHR, " ")
625
-##			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
626
-##			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
627
-##			NP <- cnrmaResult$NP[index.nps, ]
628
-##			save(NP, file=file.path(tmpdir, paste("NP_", CHR, ".rda", sep="")))
629
-##			rm(NP); gc()
492
+##	if(platform == "illumina") {
493
+##		if(!file.exists(rgFile)){
494
+##			if(load.it) message(rgFile, " does not exist and you chose to load.it.  Re-reading the R and G intensities from the IDAT files")
495
+##			sampleSheet <- cnOptions$sampleSheet
496
+##			ids <- cnOptions$ids
497
+##			arrayInfoColNames <- cnOptions$arrayInfoColNames
498
+##			highDensity <- cnOptions$highDensity
499
+##			##this is either an NChannelSet object, or a list of pointers to the ff objects
500
+##			RG <- readIdatFiles(sampleSheet=sampleSheet,
501
+##					    arrayNames=basename(filenames),
502
+##					    ids=ids,
503
+##					    path=dirname(filenames),
504
+##					    highDensity=highDensity,
505
+##					    fileExt=cnOptions$fileExt[1:2],
506
+##					    sep=cnOptions$fileExt[[3]],
507
+##					    saveDate=FALSE,  ## I do this earlier
508
+##					    verbose=cnOptions[["verbose"]],
509
+##					    protocolFile=protocolFile)
510
+##			if(save.it) save(RG, file=rgFile)
511
+##			##RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it)
512
+##		} else{
513
+##			if(!isPackageLoaded("ff")) {load(rgFile); RG <- get("RG")}
630 514
 ##		}
631 515
 ##	}
632
-	if(!save.it){
633
-		message("Cleaning up")		
634
-		unlink(snprmaFile); unlink(cnrmaFile)
635
-	}
636
-}
637
-
638
-
516
+##	if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.")
517
+##	if(!(file.exists(dirname(snprmaFile)))) stop(dirname(snprmaFile), " does not exist.")
518
+##	if(platform == "affymetrix"){
519
+##		crlmm(filenames=filenames,
520
+##		      cdfName=cdfName,
521
+##		      save.it=TRUE,
522
+##		      load.it=load.it,
523
+##		      snprmaFile=snprmaFile,
524
+##		      callsFile=callsFile,
525
+##		      confsFile=confsFile,
526
+##		      AFile=AFile,
527
+##		      BFile=BFile,
528
+##		      crlmmBatchSize=crlmmBatchSize,
529
+##		      SNRMin=cnOptions[["SNRMin"]])
530
+##	}
531
+##	gc()
532
+##	if(platform == "illumina") {
533
+##		callSet <- crlmmIllumina(RG=RG,
534
+##					 cdfName=cdfName,
535
+##					 sns=sampleNames(RG),
536
+##					 returnParams=TRUE,
537
+##					 save.it=TRUE,
538
+##					 snprmaFile=snprmaFile,
539
+##					 callsFile=callsFile,
540
+##					 confsFile=confsFile,
541
+##					 AFile=AFile,
542
+##					 BFile=BFile)
543
+##		##callSet <- loadIlluminaCallSet(callsFile, snprmaFile, RG, load.it, save.it, cdfName)
544
+##	}
545
+##	if(platform == "affymetrix"){
546
+##		if(!file.exists(cnrmaFile) | !load.it){	
547
+##			message("Quantile normalizing the copy number probes...")
548
+##			## updates A matrix and saves cnrmaFile
549
+##			cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir, verbose=cnOptions[["verbose"]], cnrmaFile=cnrmaFile, AFile=AFile, snprmaFile=snprmaFile)
550
+##		} 
551
+##	}
552
+####	if(!is.null(cnrmaResult)){
553
+####		for(CHR in chromosome){
554
+####			cat(CHR, " ")
555
+####			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
556
+####			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
557
+####			NP <- cnrmaResult$NP[index.nps, ]
558
+####			save(NP, file=file.path(tmpdir, paste("NP_", CHR, ".rda", sep="")))
559
+####			rm(NP); gc()
560
+####		}
561
+####	}
562
+##	if(!save.it){
563
+##		message("Cleaning up")		
564
+##		unlink(snprmaFile); unlink(cnrmaFile)
565
+##	}
566
+##}
639 567
 
640
-whichPlatform <- function(cdfName){
641
-	index <- grep("genomewidesnp", cdfName)
642
-	if(length(index) > 0){
643
-		platform <- "affymetrix"
644
-	} else{
645
-		index <- grep("human", cdfName)
646
-		platform <- "illumina"
647
-	}
648
-	return(platform)
649
-}
650 568
 
651 569
 
652 570
 # steps: quantile normalize hapmap: create 1m_reference_cn.rda object
... ...
@@ -693,25 +611,25 @@ whichPlatform <- function(cdfName){
693 611
 ##	return(res3)
694 612
 ##}
695 613
 
696
-cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir, cnrmaFile, AFile){
614
+cnrma <- function(object, filenames){
615
+	ops <- crlmmOptions(object)
616
+	cdfName <- annotation(object)
617
+	seed <- ops$seed
618
+	verbose <- ops$verbose
619
+	##cnrmaFile <- ops$cnrmaFile
620
+	A <- A(object)
697 621
 	if(missing(cdfName)) stop("must specify cdfName")
698 622
 	pkgname <- getCrlmmAnnotationName(cdfName)
699 623
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
700
-	if (missing(sns)) sns <- basename(filenames)
624
+	sns <- basename(filenames)
701 625
         loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
702 626
 	fid <- getVarInEnv("npProbesFid")
703 627
 	set.seed(seed)
704 628
 	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
705 629
 	SKW <- vector("numeric", length(filenames))
706
-	load(AFile)
707
-	if(isPackageLoaded("ff")) open(A)
708
-	path <- system.file("extdata", package=pkgname)
709
-	load(file.path(path, "cnProbes.rda"))
710
-	cnProbes <- get("cnProbes")
711
-	cnps <- rownames(cnProbes)
712
-	cnps <- cnps[cnps %in% rownames(A)]
713
-	index <- match(cnps, rownames(A), nomatch=0)
714
-	index <- index[index != 0]
630
+	index <- match(names(fid), featureNames(object))
631
+	stopifnot(identical(featureNames(object)[index], names(fid)))
632
+	if(length(index) < 1) stop("None of the names for the nonpolymorphic probes in the annotation package match the names stored in the snprmaFile.")
715 633
 	if(verbose){
716 634
 		message("Processing ", length(filenames), " files.")
717 635
 		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
... ...
@@ -723,26 +641,21 @@ cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir, cnrmaF
723 641
 		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
724 642
 	}
725 643
 	reference <- getVarInEnv("reference")
726
-	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
727 644
 	for(i in seq(along=filenames)){
728 645
 		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
729 646
 		x <- log2(y[idx2])
730 647
 		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
731 648
 		rm(x); gc()
732 649
 		A[index, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
733
-		##NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
734 650
 		if (verbose)
735 651
 			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
736 652
 			else cat(".")
737 653
 		rm(y); gc()
738 654
 	}
739
-	message("Done")
740
-	cnrmaResults <- list(SKW=SKW)
741
-	save(cnrmaResults, file=cnrmaFile)
742
-	if(isPackageLoaded("ff")) close(A)
743
-	save(A, file=AFile)
744
-	rm(list=ls()); gc()
745
-	return()
655
+	cat("\nDone\n")
656
+	pData(object)$SKW_nonpolymorphic <- SKW
657
+	object@assayData[["alleleA"]] <- A
658
+	return(object)
746 659
 }
747 660
 
748 661
 getFlags <- function(object, PHI.THR){
... ...
@@ -760,7 +673,7 @@ getFlags <- function(object, PHI.THR){
760 673
 }
761 674
 
762 675
 
763
-instantiateObjects <- function(object, cnOptions){
676
+instantiateObjects <- function(object){
764 677
 	Ns <- matrix(NA, nrow(object), 5)
765 678
 	colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
766 679
 	vA <- vB <- muB <- muA <- Ns
... ...
@@ -787,149 +700,38 @@ thresholdCopynumber <- function(object){
787 700
 	return(object)
788 701
 }
789 702
 
790
-##preprocessOptions <- function(callsFile="snpsetObject.rda",
791
-##			      snprmaFile="normalizedIntensities.rda",
792
-##			      rgFile="rgFile.rda"){
793
-##
794
-##}
795
-
796
-cnOptions <- function(outdir="./",
797
-		      cdfName,
798
-		      AFile="A.rda",
799
-		      BFile="B.rda",
800
-		      CAFile="CA.rda",
801
-		      CBFile="CB.rda",
802
-		      callsFile="genotypes.rda",
803
-		      rgFile="rgFile.rda",
804
-		      cnFile="cnSet",
805
-		      cnrmaFile="cn_rmaResult.rda",
806
-		      ##npBinary="NP.bin",  ##name of file.backed.big.matrix
807
-		      ##npDesc="NP.desc",
808
-		      snprmaFile="snp_rmaResult.rda",
809
-		      ##callsFile="calls.desc",
810
-		      ##confsFile="confsFile.desc",
811
-		      confsFile="confs.rda",
812
-		      save.it=TRUE,
813
-		      save.cnset=TRUE,
814
-		      load.it=TRUE,
815
-		      MIN.OBS=3,
816
-		      MIN.SAMPLES=10,
817
-		      batch=NULL,
818
-		      DF.PRIOR=50,
819
-		      bias.adj=FALSE,
820
-		      prior.prob=rep(1/4, 4),
821
-		      SNRmin=4,
822
-		      chromosome=1:24,
823
-		      seed=123,
824
-		      verbose=TRUE,
825
-		      GT.CONF.THR=0.99,
826
-		      PHI.THR=2^6,##used in nonpolymorphic fxn for training
827
-		      nHOM.THR=5, ##used in nonpolymorphic fxn for training
828
-		      MIN.NU=2^3,
829
-		      MIN.PHI=2^3,
830
-		      THR.NU.PHI=TRUE,
831
-		      thresholdCopynumber=TRUE,
832
-		      unlink=TRUE,
833
-		      use.poe=FALSE,
834
-		      crlmmBatchSize=1000,
835
-		      ...){
836
-##	if(isPackageLoaded("ff")){
837
-##		AFile=paste(AFile, "ff", sep="_")
838
-##		BFile=paste(BFile, "ff", sep="_")
839
-##		confsFile=paste(confsFile, "ff", sep="_")
840
-##		callsFile=paste(callsFile, "ff", sep="_")
841
-##		cnFile=paste(cnFile, "ff", sep="")
842
-##	}
843
-	if(length(batch) > 200) warning("This job may require a lot of RAM. Consider using the ff package in conjuction with crlmm, as described in the copynumber_ff vignette...")
844
-	if(use.poe) require(metaArray)
845
-	if(missing(cdfName)) stop("must specify cdfName")
846
-	if(!file.exists(outdir)){
847
-		message(outdir, " does not exist.  Trying to create it.")
848
-		dir.create(outdir, recursive=TRUE)
849
-	}
850
-	stopifnot(isValidCdfName(cdfName))
851
-##	if(hiddenMarkovModel){
852
-##		hmmOpts <- hmmOptions(...)
853
-##	}
854
-##	if(missing(snprmaFile)){
855
-##		snprmaFile <- file.path(outdir, "normalizedIntensities.rda")
856
-##	}
857
-	if(is.null(batch))
858
-		stop("must specify batch -- should be the same length as the number of files")
859
-	list(outdir=outdir,
860
-	     cdfName=cdfName,
861
-	     callsFile=file.path(outdir, callsFile),
862
-	     confsFile=file.path(outdir, confsFile),
863
-	     AFile=file.path(outdir, AFile),
864
-	     BFile=file.path(outdir, BFile),
865
-	     CAFile=file.path(outdir, CAFile),
866
-	     CBFile=file.path(outdir, CBFile),
867
-	     snprmaFile=file.path(outdir, snprmaFile),
868
-	     cnrmaFile=file.path(outdir, cnrmaFile),
869
-	     rgFile=file.path(outdir, rgFile),
870
-	     cnFile=file.path(outdir, cnFile),
871
-	     save.it=save.it,
872
-	     save.cnset=save.cnset,
873
-	     load.it=load.it,
874
-	     MIN.OBS=MIN.OBS,
875
-	     MIN.SAMPLES=MIN.SAMPLES,
876
-	     batch=batch,
877
-	     DF.PRIOR=DF.PRIOR,
878
-	     GT.CONF.THR=GT.CONF.THR,
879
-	     prior.prob=prior.prob,
880
-	     bias.adj=bias.adj,
881
-	     SNRmin=SNRmin,
882
-	     chromosome=chromosome,
883
-	     seed=seed,
884
-	     verbose=verbose,
885
-	     PHI.THR=PHI.THR,
886
-	     nHOM.THR=nHOM.THR,
887
-	     MIN.NU=MIN.NU,
888
-	     MIN.PHI=MIN.PHI,
889
-	     THR.NU.PHI=THR.NU.PHI,
890
-	     thresholdCopynumber=thresholdCopynumber,
891
-	     unlink=unlink,
892
-	     use.poe=use.poe,
893
-	     crlmmBatchSize=crlmmBatchSize)
894
-##	     use.bigmemory=use.bigmemory)
895
-##	     hiddenMarkovModel=hiddenMarkovModel,
896
-##	     circularBinarySegmentation=circularBinarySegmentation,
897
-##	     cbsOpts=cbsOpts,
898
-##	     hmmOpts=hmmOpts) ##remove SnpSuperSet object
899
-}
900
-
901 703
 ##linear model parameters
902
-lm.parameters <- function(object, cnOptions){
903
-	fD <- fData(object)
904
-	batch <- object$batch
905
-	uplate <- unique(batch)
906
-	parameterNames <- c(paste("tau2A", uplate, sep="_"),
907
-			    paste("tau2B", uplate, sep="_"),
908
-			    paste("sig2A", uplate, sep="_"),
909
-			    paste("sig2B", uplate, sep="_"),
910
-			    paste("nuA", uplate, sep="_"),
911
-			    paste("nuA.se", uplate, sep="_"),			    
912
-			    paste("nuB", uplate, sep="_"),
913
-			    paste("nuB.se", uplate, sep="_"),			    			    
914
-			    paste("phiA", uplate, sep="_"),
915
-			    paste("phiA.se", uplate, sep="_"),			    
916
-			    paste("phiB", uplate, sep="_"),
917
-			    paste("phiB.se", uplate, sep="_"),			    
918
-			    paste("phiAX", uplate, sep="_"),
919
-			    paste("phiBX", uplate, sep="_"),			    
920
-			    paste("corr", uplate, sep="_"),
921
-			    paste("corrA.BB", uplate, sep="_"),
922
-			    paste("corrB.AA", uplate, sep="_"))
923
-	pMatrix <- data.frame(matrix(numeric(0),
924
-				     nrow(object),
925
-				     length(parameterNames)),
926
-				     row.names=featureNames(object))
927
-	colnames(pMatrix) <- parameterNames
928
-	fD2 <- cbind(fD, pMatrix)
929
-	new("AnnotatedDataFrame", data=fD2,
930
-	    varMetadata=data.frame(labelDescription=colnames(fD2),
931
-	    row.names=colnames(fD2)))
932
-}
704
+##lm.parameters <- function(object, cnOptions){
705
+##	fD <- fData(object)
706
+##	batch <- object$batch
707
+##	uplate <- unique(batch)
708
+##	parameterNames <- c(paste("tau2A", uplate, sep="_"),
709
+##			    paste("tau2B", uplate, sep="_"),
710
+##			    paste("sig2A", uplate, sep="_"),
711
+##			    paste("sig2B", uplate, sep="_"),
712
+##			    paste("nuA", uplate, sep="_"),
713
+##			    paste("nuA.se", uplate, sep="_"),			    
714
+##			    paste("nuB", uplate, sep="_"),
715
+##			    paste("nuB.se", uplate, sep="_"),			    			    
716
+##			    paste("phiA", uplate, sep="_"),
717
+##			    paste("phiA.se", uplate, sep="_"),			    
718
+##			    paste("phiB", uplate, sep="_"),
719
+##			    paste("phiB.se", uplate, sep="_"),			    
720
+##			    paste("phiAX", uplate, sep="_"),
721
+##			    paste("phiBX", uplate, sep="_"),			    
722
+##			    paste("corr", uplate, sep="_"),
723
+##			    paste("corrA.BB", uplate, sep="_"),
724
+##			    paste("corrB.AA", uplate, sep="_"))
725
+##	pMatrix <- data.frame(matrix(numeric(0),
726
+##				     nrow(object),
727
+##				     length(parameterNames)),
728
+##				     row.names=featureNames(object))
729
+##	colnames(pMatrix) <- parameterNames
730
+##	fD2 <- cbind(fD, pMatrix)
731
+##	new("AnnotatedDataFrame", data=fD2,
732
+##	    varMetadata=data.frame(labelDescription=colnames(fD2),
733
+##	    row.names=colnames(fD2)))
734
+##}
933 735
 
934 736
 nonpolymorphic <- function(object, cnOptions, tmp.objects){
935 737
 	chromosome <- cnOptions[["chromosome"]]
... ...
@@ -937,6 +739,7 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
937 739
 	CHR <- unique(chromosome(object))
938 740
 	verbose <- cnOptions[["verbose"]]
939 741
 	if(CHR != chromosome[1]) verbose <- FALSE
742
+	if(batch != unique(cnOptions[["batch"]])[1]) verbose <- FALSE
940 743
 	goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){
941 744
 		Ns <- tmp.objects[["Ns"]]
942 745
 		##Ns <- get("Ns", envir)
... ...
@@ -1215,12 +1018,12 @@ withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
1215 1018
 	return(tmp.objects)
1216 1019
 }
1217 1020
 
1218
-
1219 1021
 oneBatch <- function(object, cnOptions, tmp.objects){
1220 1022
 	muA <- tmp.objects[["muA"]]
1221 1023
 	muB <- tmp.objects[["muB"]]
1222 1024
 	Ns <- tmp.objects[["Ns"]]
1223 1025
 	CHR <- unique(chromosome(object))
1026
+	PLATE <- unique(object$batch)
1224 1027
 	##---------------------------------------------------------------------------
1225 1028
 	## Impute sufficient statistics for unobserved genotypes (plate-specific)
1226 1029
 	##---------------------------------------------------------------------------
... ...
@@ -1236,7 +1039,8 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1236 1039
 	size <- min(5000, length(index.complete))
1237 1040
 	if(size == 5000) index.complete <- sample(index.complete, 5000)
1238 1041
 	if(length(index.complete) < 200){
1239
-		stop("fewer than 200 snps pass criteria for predicting the sufficient statistics")
1042
+		warning("There are too few samples in plate ", PLATE, " to estimate the copy number for chromosome ", CHR, ".  CA,CB values are NAs")
1043
+		return(tmp.objects)
1240 1044
 	}
1241 1045
 	index <- tmp.objects[["index"]]
1242 1046
 	index[[1]] <- which(Ns[, "AA"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS))
... ...
@@ -1254,7 +1058,6 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1254 1058
 		muA[index[[j]], j+2] <- mus[, 1]
1255 1059
 		muB[index[[j]], j+2] <- mus[, 2]
1256 1060
 	}
1257
-
1258 1061
 	if(CHR == 23){
1259 1062
 		nobsA <- Ns[, "A"] > MIN.OBS
1260 1063
 		nobsB <- Ns[, "B"] > MIN.OBS
... ...
@@ -1265,7 +1068,6 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1265 1068
 		if(length(complete[[1]]) < 1 | length(complete[[2]]) < 1) stop("Too few observations to estimate the center for 'A' and 'B'  clusters on chrom X")		
1266 1069
 		size <- min(5000, length(complete[[1]]))
1267 1070
 		if(size == 5000) complete <- lapply(complete, function(x) sample(x, size))
1268
-	
1269 1071
 		index <- list()
1270 1072
 		index[[1]] <- which(Ns[, "A"] == 0)
1271 1073
 		index[[2]] <- which(Ns[, "B"] == 0)
... ...
@@ -1757,112 +1559,26 @@ thresholdModelParams <- function(object, cnOptions){
1757 1559
 	return(object)
1758 1560
 }
1759 1561
 
1760
-computeCopynumber.SnpSuperSet <- function(object, cnOptions){
1761
-##	use.ff <- cnOptions[["use.ff"]]
1762
-##	if(!use.ff){
1763
-##		object <- as(object, "CrlmmSet")
1764
-##	} else	object <- as(object, "CrlmmSetFF")
1765
-	bias.adj <- cnOptions[["bias.adj"]]
1766
-	##must be FALSE to initialize parameters
1767
-	cnOptions[["bias.adj"]] <- FALSE
1768
-	## Add linear model parameters to the CrlmmSet object
1769
-	featureData(object) <- lm.parameters(object, cnOptions)
1770
-	if(!isValidCdfName(annotation(object))) stop(annotation(object), " not supported.")
1771
-	object <- as(object, "CNSet")
1772
-	object <- computeCopynumber.CNSet(object, cnOptions)
1773
-	if(bias.adj==TRUE){## run a second time
1774
-		object <- computeCopynumber.CNSet(object, cnOptions)
1775
-	}
1776
-	return(object)
1777
-}
1562
+##computeCopynumber.SnpSuperSet <- function(object, cnOptions){
1563
+####	use.ff <- cnOptions[["use.ff"]]
1564
+####	if(!use.ff){
1565
+####		object <- as(object, "CrlmmSet")
1566
+####	} else	object <- as(object, "CrlmmSetFF")
1567
+##	bias.adj <- cnOptions[["bias.adj"]]
1568
+##	##must be FALSE to initialize parameters
1569
+##	cnOptions[["bias.adj"]] <- FALSE
1570
+##	## Add linear model parameters to the CrlmmSet object
1571
+##	featureData(object) <- lm.parameters(object, cnOptions)
1572
+##	if(!isValidCdfName(annotation(object))) stop(annotation(object), " not supported.")
1573
+##	object <- as(object, "CNSet")
1574
+##	object <- computeCopynumber.CNSet(object, cnOptions)
1575
+##	if(bias.adj==TRUE){## run a second time
1576
+##		object <- computeCopynumber.CNSet(object, cnOptions)
1577
+##	}
1578
+##	return(object)
1579
+##}
1778 1580
 
1779 1581
 
1780
-computeCopynumber.CNSet <- function(object, cnOptions){
1781
-	CHR <- unique(chromosome(object))
1782
-	batch <- object$batch
1783
-	if(length(batch) != ncol(object)) stop("Batch must be the same length as the number of samples")
1784
-	MIN.SAMPLES <- cnOptions$MIN.SAMPLES
1785
-	verbose <- cnOptions$verbose
1786
-	if(CHR != cnOptions[["chromosome"]][1]) verbose <- FALSE
1787
-	use.poe <- cnOptions[["use.poe"]]
1788
-	for(i in seq(along=unique(batch))){
1789
-		PLATE <- unique(batch)[i]
1790
-		if(sum(batch == PLATE) < MIN.SAMPLES) {
1791
-			message("Skipping plate ", PLATE)
1792
-			next()
1793
-		}		
1794
-		object.batch <- object[, batch==PLATE]
1795
-		tmp.objects <- instantiateObjects(object.batch,
1796
-						  cnOptions)
1797
-		bias.adj <- cnOptions$bias.adj
1798
-		if(bias.adj & ncol(object) <= 15){
1799
-			warning(paste("bias.adj is TRUE, but too few samples to perform this step"))
1800
-			cnOptions$bias.adj <- bias.adj <- FALSE
1801
-		}
1802
-		if(bias.adj){
1803
-			if(verbose & i == 1) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)")
1804
-			if(!use.poe)
1805
-				tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects)
1806
-			tmp.objects <- biasAdj(object.batch, cnOptions, tmp.objects)
1807
-			if(verbose & i == 1) message("Recomputing location and scale parameters")
1808
-		}
1809
-		##update tmp.objects
1810
-		tmp.objects <- withinGenotypeMoments(object.batch,
1811
-						     cnOptions=cnOptions,
1812
-						     tmp.objects=tmp.objects)
1813
-		object.batch <- locationAndScale(object.batch, cnOptions, tmp.objects)
1814
-		tmp.objects <- oneBatch(object.batch,
1815
-					cnOptions=cnOptions,
1816
-					tmp.objects=tmp.objects)
1817
-		##coefs calls nuphiAllele.
1818
-		object.batch <- coefs(object.batch, cnOptions, tmp.objects)
1819
-		##nuA=getParam(object.batch, "nuA", PLATE)
1820
-		THR.NU.PHI <- cnOptions$THR.NU.PHI
1821
-		if(THR.NU.PHI){
1822
-			if(verbose & i == 1) message("Thresholding nu and phi")
1823
-			object.batch <- thresholdModelParams(object.batch, cnOptions)
1824
-		}		
1825
-		if(verbose & i == 1) message("\nAllele specific copy number")	
1826
-		object.batch <- polymorphic(object.batch, cnOptions, tmp.objects)
1827
-		if(any(!isSnp(object))){ ## there are nonpolymorphic probes
1828
-			if(verbose & i == 1) message("\nCopy number for nonpolymorphic probes...")
1829
-			if(!use.poe){
1830
-				object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects)
1831
-			} else {
1832
-				object.batch <- nonpolymorphic.poe(object.batch, cnOptions, tmp.objects)
1833
-			}
1834
-		}
1835
-		##---------------------------------------------------------------------------
1836
-		##Note: the replacement method multiples by 100
1837
-		CA(object)[, batch==PLATE] <- CA(object.batch)
1838
-		CB(object)[, batch==PLATE] <- CB(object.batch)
1839
-		##---------------------------------------------------------------------------
1840
-		##update-the plate-specific parameters for copy number
1841
-		object <- pr(object, "nuA", PLATE, getParam(object.batch, "nuA", PLATE))
1842
-		object <- pr(object, "nuA.se", PLATE, getParam(object.batch, "nuA.se", PLATE))
1843
-		object <- pr(object, "nuB", PLATE, getParam(object.batch, "nuB", PLATE))
1844
-		object <- pr(object, "nuB.se", PLATE, getParam(object.batch, "nuB.se", PLATE))
1845
-		object <- pr(object, "phiA", PLATE, getParam(object.batch, "phiA", PLATE))
1846
-		object <- pr(object, "phiA.se", PLATE, getParam(object.batch, "phiA.se", PLATE))
1847
-		object <- pr(object, "phiB", PLATE, getParam(object.batch, "phiB", PLATE))
1848
-		object <- pr(object, "phiB.se", PLATE, getParam(object.batch, "phiB.se", PLATE))
1849
-		object <- pr(object, "tau2A", PLATE, getParam(object.batch, "tau2A", PLATE))
1850
-		object <- pr(object, "tau2B", PLATE, getParam(object.batch, "tau2B", PLATE))				
1851
-		object <- pr(object, "sig2A", PLATE, getParam(object.batch, "sig2A", PLATE))
1852
-		object <- pr(object, "sig2B", PLATE, getParam(object.batch, "sig2B", PLATE))		
1853
-		object <- pr(object, "phiAX", PLATE, as.numeric(getParam(object.batch, "phiAX", PLATE)))
1854
-		object <- pr(object, "phiBX", PLATE, as.numeric(getParam(object.batch, "phiBX", PLATE)))
1855
-		object <- pr(object, "corr", PLATE, getParam(object.batch, "corr", PLATE))
1856
-		object <- pr(object, "corrA.BB", PLATE, getParam(object.batch, "corrA.BB", PLATE))
1857
-		object <- pr(object, "corrB.AA", PLATE, getParam(object.batch, "corrB.AA", PLATE))		
1858
-		rm(object.batch, tmp.objects); gc();
1859
-	}
1860
-	##object <- object[order(chromosome(object), position(object)), ]
1861
-	if(cnOptions[["thresholdCopynumber"]]){
1862
-		object <- thresholdCopynumber(object)
1863
-	}
1864
-	return(object)
1865
-}
1866 1582
 
1867 1583
 ## cite metaArray: Choi/Ghosh
1868 1584
 crlmm_poe <- function(mat, cl, threshold=0.00001, every=100,use.mad=TRUE) {
... ...
@@ -1978,3 +1694,184 @@ crlmm_fit.em <- function(x, cl, threshold=1e-6, use.mad=TRUE){
1978 1694
 	expr <- (z0 - zmu) * sgn.z0
1979 1695
 	return(list(expr=expr, a=a, b=b, sigmasq=sigmasq, mu=mu, Pi=Pi, lik.rec=lik.rec))
1980 1696
 }
1697
+
1698
+
1699
+
1700
+.copyNumber <- function(object){
1701
+	I <- isSnp(object)
1702
+	CA <- CA(object)
1703
+	CB <- CB(object)
1704
+	CN <- CA + CB
1705
+	##For nonpolymorphic probes, CA is the total copy number
1706
+	CN[!I, ] <- CA(object)[!I, ]
1707
+	CN
1708
+}
1709
+
1710
+
1711
+
1712
+##setMethod("ellipse", "CNSet", function(x, copynumber, ...){
1713
+ellipse.CNSet <- function(x, copynumber, batch, ...){
1714
+	if(nrow(x) > 1) stop("only 1 snp at a time")
1715
+	##batch <- unique(x$batch)
1716
+	if(missing(batch)){
1717
+		stop("must specify batch")
1718
+	}
1719
+	if(length(batch) > 1) stop("batch variable not unique")
1720
+	nuA <- getParam(x, "nuA", batch)
1721
+	nuB <- getParam(x, "nuB", batch)
1722
+	phiA <- getParam(x, "phiA", batch)
1723
+	phiB <- getParam(x, "phiB", batch)
1724
+	tau2A <- getParam(x, "tau2A", batch)
1725
+	tau2B <- getParam(x, "tau2B", batch)
1726
+	sig2A <- getParam(x, "sig2A", batch)
1727
+	sig2B <- getParam(x, "sig2B", batch)
1728
+	corrA.BB <- getParam(x, "corrA.BB", batch)
1729
+	corrB.AA <- getParam(x, "corrB.AA", batch)
1730
+	corr <- getParam(x, "corr", batch)
1731
+	for(CN in copynumber){
1732
+		for(CA in 0:CN){
1733
+			CB <- CN-CA
1734
+			A.scale <- sqrt(tau2A*(CA==0) + sig2A*(CA > 0))
1735
+			B.scale <- sqrt(tau2B*(CB==0) + sig2B*(CB > 0))
1736
+			scale <- c(A.scale, B.scale)
1737
+			if(CA == 0 & CB > 0) rho <- corrA.BB
1738
+			if(CA > 0 & CB == 0) rho <- corrB.AA
1739
+			if(CA > 0 & CB > 0) rho <- corr
1740
+			if(CA == 0 & CB == 0) rho <- 0
1741
+			lines(ellipse(x=rho, centre=c(log2(nuA+CA*phiA),
1742
+					     log2(nuB+CB*phiB)),
1743
+				      scale=scale), ...)
1744
+		}
1745
+	}
1746
+}
1747
+
1748
+
1749
+##initializeCNFiles <- function(cnOpts){
1750
+##	load.it <- cnOptions[["load.it"]]
1751
+##	outdir <- cnOptions[["outdir"]]
1752
+##	snprmaFile <- cnOptions[["snprmaFile"]]
1753
+##	protocolFile <- cnOptions[["protocolFile"]]
1754
+##	CAFile <- cnOpts[["CAFile"]]
1755
+##	CBFile <- cnOpts[["CBFile"]]
1756
+##	path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep=""))
1757
+##	load(file.path(path, "snpProbes.rda"))
1758
+##	snpProbes <- get("snpProbes")
1759
+##	load(file.path(path, "cnProbes.rda"))
1760
+##	cnProbes <- get("cnProbes")		
1761
+##	load(snprmaFile)
1762
+##	res <- get("res")
1763
+##	load(protocolFile)
1764
+##	if(isPackageLoaded("ff")){
1765
+##		if(file.exists(CAFile)){
1766
+##			load(CAFile)
1767
+##			if(is.null(dim(CA)) | !all(dim(CA) == dim(A))) {
1768
+##				unlink(CAFile)
1769
+##				CA <- initializeBigMatrix(nr, nc)
1770
+##				save(CA, file=CAFile)
1771
+##				CA[,] <- NA
1772
+##
1773
+##			}
1774
+##		} else {
1775
+##			CA <- initializeBigMatrix(nr, nc)
1776
+##			save(CA, file=CAFile)
1777
+##			CA[,] <- NA
1778
+##		}
1779
+##		if(file.exists(CBFile)){
1780
+##			load(CBFile)
1781
+##			if(is.null(dim(CB)) | !all(dim(CB) == dim(A))) {			     
1782
+##				CB <- initializeBigMatrix(nr, nc)
1783
+##				save(CB, file=CBFile)					
1784
+##				CB[,] <- NA
1785
+##			}
1786
+##		} else{
1787
+##			CB <- initializeBigMatrix(nr, nc)
1788
+##			CB[,] <- NA
1789
+##			save(CB, file=CBFile)
1790
+##		}
1791
+##		open(CA)
1792
+##		open(CB)
1793
+##		colnames(CA) <- colnames(CB) <- sampleNames(protocoldata)
1794
+##	}
1795
+##	close(CA); close(CB)
1796
+##
1797
+##
1798
+##}
1799
+##collect <- function(cnOptions, CHR, PLATE){
1800
+##	snprmaFile <- cnOptions[["snprmaFile"]]
1801
+##	cnFile <- cnOptions[["cnFile"]]
1802
+##	cnrmaFile <- cnOptions[["cnrmaFile"]]
1803
+##	snprmaFile <- cnOptions[["snprmaFile"]]
1804
+##	callsFile <- cnOptions[["callsFile"]]
1805
+##	confsFile <- cnOptions[["confsFile"]]
1806
+##	protocolFile <- cnOptions[["protocolFile"]]
1807
+##	AFile <- cnOptions[["AFile"]]
1808
+##	BFile <- cnOptions[["BFile"]]
1809
+##	CAFile <- cnOptions[["CAFile"]]
1810
+##	CBFile <- cnOptions[["CBFile"]]
1811
+##	if(isPackageLoaded("ff")){
1812
+##		message("collecting ff objects to create an FFSet instance")
1813
+##		load(protocolFile)
1814
+##		load(snprmaFile)
1815
+##		load(file.path(cnOptions[["outdir"]], "cnParams.rda"))
1816
+##		res <- get("res")
1817
+##		sampleStats <- data.frame(SKW=res$SKW,
1818
+##					  SNR=res$SNR,
1819
+##					  ##mixtureParams=res$mixtureParams,
1820
+##					  gender=res$gender,
1821
+##					  batch=cnOptions[["batch"]])##)
1822
+##		pD <- new("AnnotatedDataFrame",
1823
+##			  data=sampleStats,
1824
+##			  varMetadata=data.frame(labelDescription=colnames(sampleStats)))	
1825
+##		load(file.path(cnOptions[["outdir"]], "featureDataFF.rda"))
1826
+##		open(featureDataFF)
1827
+##		load(AFile); open(A)
1828
+##		load(BFile); open(B)
1829
+##		load(CAFile); open(CA)
1830
+##		load(CBFile); open(CB)
1831
+##		load(callsFile); open(calls)
1832
+##		load(confsFile); open(confs)
1833
+##		sampleNames(pD) <- sampleNames(protocoldata)
1834
+##
1835
+##		if(any(colnames(CA) != colnames(A)) | any(colnames(CA) != sampleNames(protocoldata)))
1836
+##			colnames(calls) <- colnames(confs) <- colnames(CA) <- colnames(CB) <- colnames(B) <- colnames(A) <- sampleNames(protocoldata)
1837
+####		if(any(rownames(CA) != fns))
1838
+####			rownames(CA) <- rownames(CB) <- rownames(B) <- rownames(calls) <- rownames(confs) <- rownames(A)
1839
+##		ffSet <- new("FFSet",
1840
+##			     alleleA=A,
1841
+##			     alleleB=B,
1842
+##			     call=calls,
1843
+##			     callProbability=confs,
1844
+##			     CA=CA,
1845
+##			     CB=CB,
1846
+##			     phenoData=pD,
1847
+##			     protocolData=protocoldata,
1848
+##			     ##annotation=cnOptions[["cdfName"]],
1849
+##			     genomeAnnotation=featureDataFF,
1850
+##			     linearModelParam=cnParams)
1851
+##		##featureNames(ffSet) <- rownames(featureDataFF)
1852
+##		##sampleNames(ffSet) <- sampleNames(protocoldata)
1853
+##		return(ffSet)
1854
+##	} else{
1855
+##		if(missing(PLATE)) stop("must specify PLATE")
1856
+##		load(paste(cnOptions[["cnFile"]], "_", PLATE, "_", CHR, ".rda", sep=""))
1857
+##		return(cnSet)
1858
+##	}
1859
+##}
1860
+
1861
+
1862
+lmPlot <- function(object, plate, fill,...){
1863
+	stopifnot(nrow(object) == 1)
1864
+	Ai <- split(A(object)[object$batch==plate], snpCall(object)[object$batch==plate])
1865
+	Bi <- split(B(object)[object$batch==plate], snpCall(object)[object$batch==plate])
1866
+	##xlim <- ylim <- c(9,12)
1867
+	boxplot(Ai, xaxt="n", ylab=expression(I[A]), main=featureNames(object), ...)
1868
+	legend("topleft", bty="n", legend=c("AA", "AB", "BB"), fill=fill)
1869
+	nuA <- getParam(object, "nuA", plate)
1870
+	phiA <- getParam(object, "phiA", plate)
1871
+	segments(3, nuA, 1, nuA+2*phiA, lwd=2, col="blue")
1872
+	boxplot(Bi, xaxt="n", ylab=expression(I[B]), ...)
1873
+	nuB <- getParam(object, "nuB", plate)
1874
+	phiB <- getParam(object, "phiB", plate)	
1875
+	segments(1, nuB,  3.0, nuB+2*phiB, lwd=2, col="blue")
1876
+	axis(1, at=1:3, labels=c("AA", "AB", "BB"))
1877
+}
... ...
@@ -1,125 +1,186 @@
1
-crlmm <- function(filenames, row.names=TRUE, col.names=TRUE,
2
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
3
-                  save.it=FALSE, load.it=FALSE,
4
-		  snprmaFile="snp_rmaResult.rda",
5
-		  callsFile="genotypes.rda",
6
-		  confsFile="confs.rda",
7
-		  AFile="A.rda",
8
-		  BFile="B.rda",
9
-                  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
10
-                  cdfName, sns, recallMin=10, recallRegMin=1000,
11
-                  returnParams=FALSE, badSNP=0.7,
12
-		  crlmmBatchSize=1000){
13
-	BS <- crlmmBatchSize
14
-	if(load.it & file.exists(snprmaFile) & file.exists(callsFile)) return() ##nothing to do
15
-	if (load.it & !file.exists(snprmaFile)){
16
-		##stop("'intensityFile' is missing, and you chose either load.it or save.it")
17
-		message("'snprmaFile' does not exist and you chose to load.it.  Rerunning snprma...")
18
-		load.it <- FALSE
19
-	} 
20
-	if (missing(sns)) sns <- basename(filenames)
21
-	if (!load.it){
22
-		res <- snprma(filenames,
23
-			      fitMixture=TRUE,
24
-			      mixtureSampleSize=mixtureSampleSize,
25
-			      verbose=verbose,
26
-			      eps=eps,
27
-			      cdfName=cdfName,
28
-			      sns=sns,
29
-			      AFile=AFile,
30
-			      BFile=BFile)
31
-		save(res, file=snprmaFile)##file.path(dirname(snprmaFile), "res.rda"))
32
-	} else {
33
-		message("Loading snprmaFile...")
34
-		load(snprmaFile)
35
-		res <- get("res")
36
-	}
37
-	SKW <- res[["SKW"]]
38
-	SNR <- res[["SNR"]]	
39
-	load(AFile)
40
-	load(BFile)
41
-	if(isPackageLoaded("ff")) {open(A); open(B)}
42
-	if(row.names) row.names <- res$gns
43
-	if(col.names) col.names <- res$sns	
44
-##	}else{
45
-##		if (verbose) message("Loading ", snprmaFile, ".")
46
-##		obj <- load(snprmaFile)
47
-##		if (verbose) message("Done.")
48
-##		if (obj != "res")
49
-##			stop("Object in ", snprmaFile, " seems to be invalid.")
1
+setMethod("crlmm", "AffymetrixAlleleSet", function(object, filenames){
2
+	message("Initializing AlleleSet")
3
+	obj <- construct(object, filenames)
4
+	obj <- snprma(obj, filenames)
5
+	ops <- crlmmOptions(obj)
6
+	if(ops$copynumber & ops$nonpolymorphic)
7
+		obj <- cnrma(obj, filenames)
8
+	message("Initializing CallSet")
9
+	object <- as(obj, "CallSet")
10
+	rm(obj); gc()
11
+	callSet <- crlmm.batch(object)
12
+
13
+})
14
+setMethod("crlmm", "IlluminaAlleleSet", function(object, filenames){
15
+	RG <- construct(object, filenames)
16
+	RG <- readIdatFiles(RG, filenames)
17
+	XY <- RGtoXY(RG)
18
+	rm(RG); gc()
19
+	storageMode(XY) <- "environment"
20
+	stripNorm <- crlmmOptions(XY)$readOpts[["stripNorm"]]
21
+	useTarget <- crlmmOptions(XY)$readOpts[["useTarget"]]
22
+	verbose <- crlmmOptions(XY)$verbose
23
+	if(stripNorm)
24
+		XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
25
+	## See the coercion step ... this puts X -> A and Y -> B
26
+	alleleSet <- as(XY, "IlluminaAlleleSet")
27
+	##Do i need to pass zero.  Is the only thing updated the mixtureParams and SNR?
28
+	alleleSet <- preprocessInfinium2(alleleSet, zero=Z(XY))
29
+	rm(XY); gc()
30
+	##Initialize call set
31
+	callSet <- as(alleleSet, "CallSet")
32
+	callSet <- crlmm.batch(callSet)
33
+	return(callSet)
34
+})
35
+
36
+##setMethod("getFeatureData", "SmallDataOptions", function(object){
37
+##        if(object$platform != "Illumina") stop("only for Illumina platforms")
38
+##        message("reading first idat file to extract feature data")
39
+##        fileExt <- object$illuminaOpts[["fileExt"]]
40
+##        filenames <- object$filenames
41
+##        grnfile = paste(filenames[1], fileExt$green, sep=fileExt$sep)
42
+##        if(!file.exists(grnfile)){
43
+##                stop(paste(grnfile, " does not exist. Check fileExt argument"))
44
+##        }
45
+##        G <- readIDAT(grnfile)
46
+##        idsG = rownames(G$Quants)
47
+##        nr <- length(idsG)
48
+##        fD <- new("AnnotatedDataFrame", data=data.frame(row.names=idsG))##, varMetadata=data.frame(labelDescript
49
+##        return(fD)
50
+##})
51
+
52
+##crlmm.IlluminaAlleleSet <- function(object, filenames){
53
+##	message("Initializing CallSet")
54
+##	object <- as(object, "IlluminaCallSet")
55
+##	rm(obj); gc()
56
+##	##Call in batches to reduce ram
57
+##	nc <- ncol(object)
58
+##	object$gender <- crlmmOptions(object)$crlmmOpts[["gender"]]
59
+##	if(is.null(object$gender)) object$gender <- rep(NA, nc)
60
+##	cOps <- crlmmOptions(object)$crlmmOpts
61
+##	BS <- cOps$batchSize
62
+##	gc()
63
+##	if(nc > BS){
64
+##		N <- ceiling(nc/BS)
65
+##		S <- ceiling(nc/N)
66
+##		colindex <- split(1:nc, rep(1:nc, each=S, length.out=nc))
67
+##	} else {
68
+##		colindex <- list(1:nc)
50 69
 ##	}
51
-	##path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
52
-	##load(file.path(path, "snpProbes.rda"))
53
-	gns <- res[["gns"]]
54
-	sns <- colnames(A)
55
-	nc <- ncol(A)	
56
-	##
57
-	##Ensures that if ff package is loaded, ordinary matrices are still passed to crlmmGT
58
-	if(nc > BS){
59
-		## genotype the samples in batches
70
+##	if(length(colindex) > 1)
71
+##		message("Calling genotypes in batches of size ", length(colindex[[1]]), " to reduce required RAM")
72
+##	row.index <- which(isSnp(object)==1 | is.na(isSnp(object)))
73
+##	if(length(colindex) > 1){
74
+##		for(i in seq(along=colindex)){
75
+##			col.index <- colindex[[i]]
76
+##			tmp <- crlmm.AffymetrixCallSet(object[row.index, col.index])
77
+##			calls(object)[row.index, col.index] <- calls(tmp)
78
+##			confs(object)[row.index, col.index] <- confs(tmp)
79
+##			object$gender[col.index] <- tmp$gender
80
+##			rm(tmp); gc()
81
+##		}
82
+##	} else {
83
+##		col.index <- colindex[[1]]
84
+##		tmp <- crlmm.AffymetrixCallSet(object[row.index, col.index])  ##ensure matrix is passed
85
+##		## enables writing to file
86
+##		calls(object)[row.index, col.index] <- calls(tmp)
87
+##		confs(object)[row.index, col.index] <- confs(tmp)
88
+##		object$gender <- tmp$gender
89
+##	}
90
+##	##Return a CallSet object
91
+##	class(object) <- "CallSet"
92
+##	return(object)	
93
+##}
94
+
60 95
 
61
-		##number crlmm batches
96
+crlmm.batch <- function(object){
97
+	##Call in batches to reduce ram
98
+	nc <- ncol(object)
99
+	object$gender <- crlmmOptions(object)$crlmmOpts[["gender"]]
100
+	cOps <- crlmmOptions(object)$crlmmOpts
101
+	BS <- cOps$batchSize
102
+	gc()
103
+	if(nc > BS){
62 104
 		N <- ceiling(nc/BS)
63
-		##samples per batch
64 105
 		S <- ceiling(nc/N)
65 106
 		colindex <- split(1:nc, rep(1:nc, each=S, length.out=nc))
66 107
 	} else {
67 108
 		colindex <- list(1:nc)
68 109
 	}
69 110
 	if(length(colindex) > 1)
70
-		message("Calling genotypes in batches of size ", length(colindex[[1]]), " to reduce required RAM")	
71
-	if(isPackageLoaded("ff")){
72
-		confs <- initializeBigMatrix(nrow(A), ncol(A))
73
-		calls <- initializeBigMatrix(nrow(A), ncol(A))
74
-	} else{
75
-		confs <- matrix(NA, nrow(A), ncol(A))
76
-		calls <- matrix(NA, nrow(A), ncol(A))
77
-	}	
78
-	sex <- vector("list", length(colindex))
79
-	for(i in seq(along=colindex)){
80
-		aMatrix <- A[1:length(gns), colindex[[i]]]
81
-		bMatrix <- B[1:length(gns), colindex[[i]]]
82
-		snr <- res[["SNR"]][colindex[[i]]]
83
-		mix <- res[["mixtureParameters"]][, colindex[[i]]]
84
-		##	res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
85
-		res2 <- tryCatch(crlmmGT(aMatrix,
86
-					 bMatrix,
87
-					 snr,
88
-					 mix,
89
-					 res[["cdfName"]],
90
-					 gender=gender,
91
-					 row.names=row.names,
92
-					 col.names=col.names[colindex[[i]]],
93
-					 recallMin=recallMin,
94
-					 recallRegMin=1000,
95
-					 SNRMin=SNRMin,
96
-					 returnParams=returnParams,
97
-					 badSNP=badSNP,
98
-					 verbose=verbose), error=function(e) NULL)
99
-		if(is.null(res2)) {
100
-			unlink(callsFile)
101
-			unlink(confsFile)
102
-			stop("error in call to crlmmGT")
111
+		message("Calling genotypes in batches of size ", length(colindex[[1]]), " to reduce required RAM")
112
+	row.index <- which(isSnp(object)==1 | is.na(isSnp(object)))
113
+	if(length(colindex) > 1){
114
+		for(i in seq(along=colindex)){