Browse code

roll back to crlmm version 1.5.24

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45126 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 10/03/2010 01:27:04
Showing36 changed files

... ...
@@ -433,3 +433,9 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
433 433
 2010-03-08 M. Ritchie committed version 1.5.31
434 434
  * removed a few unnecessary lines of code from crlmm-illumina.R (zero not needed as argument for preprocessInfinium2() and storageMode should not be "lockedEnvironment" in RGtoXY()) 
435 435
  * added "humanomni1quadv1b" to validCdfName() in utils.R
436
+
437
+2010-03-08 R.Scharpf committed version 1.5.32
438
+
439
+**	   This is a roll back to version 1.5.24 
440
+	   
441
+
... ...
@@ -1,39 +1,17 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.31
5
-Date: 2010-03-08
6
-Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7
-Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
4
+Version: 1.5.32
5
+Date: 2010-02-05
6
+Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7
+Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
9 9
 License: Artistic-2.0
10
-Depends: R (>= 2.11.0),
11
-         methods,
12
-         Biobase (>= 2.7.2),
13
-         oligoClasses (>= 1.9.28)
14
-Imports: affyio (>= 1.15.2),
15
-         preprocessCore,
16
-         utils,
17
-         stats,
18
-         genefilter,
19
-         splines,
20
-         mvtnorm,
21
-         ellipse,
22
-         SNPchip,ff
23
-Suggests: hapmapsnp5,
24
-          hapmapsnp6,
25
-          genomewidesnp5Crlmm (>= 1.0.2),
26
-          genomewidesnp6Crlmm (>= 1.0.2),
27
-          snpMatrix,
28
-          metaArray
29
-Collate: AllClasses.R
30
-	 AllGenerics.R
31
-	 methods-AffymetrixAlleleSet.R
32
-	 methods-IlluminaAlleleSet.R
33
-	 methods-CrlmmContainer.R
10
+Depends: methods, Biobase (>= 2.7.2), R (>= 2.11.0), oligoClasses (>= 1.9.21)
11
+Imports: affyio (>= 1.15.2), preprocessCore, utils, stats, genefilter, splines, mvtnorm, ellipse, SNPchip
12
+Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix
13
+Collate: AllGenerics.R
34 14
 	 methods-CNSet.R
35
-	 methods-AlleleSet.R
36
-	 methods-CallSet.R
37 15
 	 methods-eSet.R
38 16
          methods-SnpSuperSet.R
39 17
          cnrma-functions.R
... ...
@@ -1,4 +1,3 @@
1
-## crlmm NAMESPACE
2 1
 useDynLib("crlmm", .registration=TRUE)
3 2
 
4 3
 ## Biobase
... ...
@@ -8,12 +7,11 @@ importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet, SnpSet,
8 7
 
9 8
 importMethodsFrom(Biobase, annotation, "annotation<-",
10 9
                   annotatedDataFrameFrom, assayData, "assayData<-",
11
-		  snpCallProbability,
12 10
                   combine, dims, experimentData, "experimentData<-",
13 11
                   fData, featureData, "featureData<-", featureNames,
14 12
                   fvarMetadata, fvarLabels, pData, phenoData,
15 13
                   "phenoData<-", protocolData, "protocolData<-",
16
-                  pubMedIds, rowMedians, sampleNames, snpCall, storageMode,
14
+                  pubMedIds, rowMedians, sampleNames, storageMode,
17 15
                   "storageMode<-", updateObject, varLabels)
18 16
 
19 17
 importFrom(Biobase, assayDataElement, assayDataElementNames,
... ...
@@ -21,75 +19,44 @@ importFrom(Biobase, assayDataElement, assayDataElementNames,
21 19
            validMsg)
22 20
 
23 21
 ## oligoClasses
24
-##importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet)
22
+importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet)
25 23
 
26
-##S3 method ffdf and class ffdf
27
-importFrom(ff, ffdf, ff, as.ff, as.ffdf)
24
+importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
25
+		  "confs<-", cnConfidence, "cnConfidence<-", 
26
+		  isSnp, chromosome, position, CA, "CA<-", CB, "CB<-")
28 27
 
29 28
 
29
+importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles)
30 30
 
31
-##importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
32
-##		  "confs<-", cnConfidence, "cnConfidence<-", isSnp,
33
-##		  chromosome, position, CA, "CA<-", CB, "CB<-", A, B)
31
+importFrom(oligoClasses, copyNumber)
34 32
 
35
-importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles,
36
-           copyNumber)
37
-
38
-## graphics
39 33
 importFrom(graphics, abline, axis, layout, legend, mtext, par, plot,
40 34
            polygon, rect, segments, text, points, boxplot)
41 35
 
42
-## grDevices
43 36
 importFrom(grDevices, grey)
44 37
 
45
-## affyio
46 38
 importFrom(affyio, read.celfile.header, read.celfile)
47 39
 
48
-## preprocessCore
49
-importFrom(preprocessCore, normalize.quantiles.use.target,
50
-           normalize.quantiles)
40
+importFrom(preprocessCore, normalize.quantiles.use.target, normalize.quantiles)
51 41
 
52
-## utils
53
-importFrom(utils, data, packageDescription, setTxtProgressBar,
54
-           txtProgressBar)
42
+importFrom(utils, data, packageDescription, setTxtProgressBar, txtProgressBar)
55 43
 
56
-## stats
57
-importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile,
58
-           sd, update)
44
+importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, sd, update)
59 45
 
60
-## genefilter
61 46
 importFrom(genefilter, rowSds)
62 47
 
63
-## mvtnorm
64 48
 importFrom(mvtnorm, dmvnorm)
65 49
 
66
-## ellipse
67 50
 importFrom(ellipse, ellipse)
68 51
 
69
-
70
-##exportClasses(FFSet)
71
-exportMethods(annotatedDataFrameFrom,
72
-	copyNumber, initialize, 
73
-              ##show, "$", "[[", "[", 
74
-	      genomeAnnotation,
75
-	      lM,
76
-	      CA,
77
-	      CB,
78
-	      A,
79
-	      B,
80
-	      snpCall,
81
-	      confs,
82
-	      chromosome,
83
-	      position,
84
-	      isSnp)
85
-export(crlmmOptions, crlmm, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam, validCdfNames)
86
-
87
-
88
-
89
-#############
90
-## TO REMOVE
91
-#############
92
-
93
-##export everything that does not start with a .
94
-exportPattern("^[^\\.]")
95
-
52
+exportMethods(A, B, copyNumber)
53
+export(cnOptions, crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
54
+
55
+##export(##viterbi.CNSet, ##move to VanillaICE
56
+##	combineIntensities,
57
+##	whichPlatform,
58
+##	isValidCdfName,
59
+##	crlmmWrapper,
60
+##        withinGenotypeMoments,  
61
+##	locationAndScale, getParam.SnpSuperSet)
62
+export(thresholdModelParams, computeCopynumber.CNSet, nuphiAllele, coefs, biasAdjNP)
... ...
@@ -1,17 +1,3 @@
1
-        **************************************************
2
-        *                                                *
3
-        *              1.5 SERIES NEWS                   *
4
-        *                                                *
5
-        **************************************************
6
-
7
-USER VISIBLE CHANGES
8
-
9
-NEW FEATURES
10
-
11
-	o Support to large datasets via ff package. The user
12
-	must load the ff package manually to benefit from
13
-	this.
14
-
15 1
         **************************************************
16 2
         *                                                *
17 3
         *              1.3 SERIES NEWS                   *
18 4
deleted file mode 100644
... ...
@@ -1,74 +0,0 @@
1
-setOldClass("ffdf")
2
-setOldClass("ff_matrix")
3
-setClassUnion("matrix_or_ff", c("matrix", "ff_matrix"))
4
-setClassUnion("list_or_ffdf", c("list", "ffdf"))
5
-setClass("CrlmmContainer", contains="eSet",
6
-	 representation(options="list",
7
-			genomeAnnotation="ANY",
8
-			"VIRTUAL"))
9
-
10
-setMethod("show", "CrlmmContainer", function(object){
11
-	callNextMethod(object)
12
-	cat("options: \n")
13
-	print(names(crlmmOptions(object)))
14
-	cat("\n")
15
-	cat("genomeAnnotation:", nrow(genomeAnnotation(object)), " rows, ", ncol(genomeAnnotation(object)), " columns\n")
16
-	print(genomeAnnotation(object)[1:5, ])
17
-	cat("\n")
18
-})
19
-setClass("AlleleSet", contains="CrlmmContainer")
20
-setClass("CallSet", contains="AlleleSet")
21
-setClass("CNSet", contains="CallSet",
22
-	 representation(lM="list_or_ffdf"))
23
-
24
-setClass("IlluminaRGSet", contains="CrlmmContainer")
25
-setClass("IlluminaXYSet", contains="CrlmmContainer")
26
-
27
-setClass("AffymetrixAlleleSet", contains="AlleleSet")  ##AffymetrixAlleleSet
28
-setClass("IlluminaAlleleSet", contains="AlleleSet")
29
-##setClass("AffymetrixBigData", contains="AffymetrixAlleleSet")
30
-##setClass("AffymetrixSmallData", contains="AffymetrixAlleleSet")
31
-##setClass("IlluminaSmallData", contains="IlluminaAlleleSet")
32
-##setClass("IlluminaBigData", contains="IlluminaAlleleSet")
33
-##setMethod("initialize", "AffymetrixBigData", function(.Object, annotation){
34
-##	.Object <- callNextMethod(.Object)
35
-##	if(!missing(annotation)) annotation(.Object) <- annotation
36
-##	.Object
37
-##})
38
-##setClass("AffymetrixCallSet", contains="CallSet")
39
-##setClass("IlluminaCallSet", contains="CallSet")
40
-setMethod("initialize", "AlleleSet", function(.Object, alleleA=new("matrix"), alleleB=new("matrix"), ...){
41
-	.Object <- callNextMethod(.Object, alleleA=alleleA, alleleB=alleleB, ...)
42
-	storageMode(.Object) <- "environment"
43
-	.Object
44
-})
45
-setMethod("initialize", "CallSet", function(.Object, call=new("matrix"), callProbability=new("matrix"), ...){
46
-	.Object <- callNextMethod(.Object, call=call, callProbability=callProbability, ...)
47
-	storageMode(.Object) <- "environment"
48
-	.Object
49
-})
50
-setMethod("initialize", "CNSet", function(.Object, CA=new("matrix"), CB=new("matrix"), lM=new("list"), ...){
51
-	.Object <- callNextMethod(.Object, CA=CA, CB=CB, lM=lM,...)
52
-	storageMode(.Object) <- "environment"
53
-	.Object
54
-})
55
-setValidity("AlleleSet", function(object) {
56
-	assayDataValidMembers(assayData(object), c("alleleA", "alleleB"))
57
-})
58
-setValidity("IlluminaRGSet", function(object) {
59
-	assayDataValidMembers(assayData(object), c("R", "G", "zero"))
60
-})
61
-setValidity("IlluminaXYSet", function(object) {
62
-	assayDataValidMembers(assayData(object), c("X", "Y", "zero"))
63
-})
64
-
65
-setValidity("CallSet", function(object) {
66
-	assayDataValidMembers(assayData(object), c("alleleA", "alleleB", "call", "callProbability"))
67
-})
68
-setValidity("CNSet", function(object) {
69
-	assayDataValidMembers(assayData(object), c("alleleA", "alleleB", "call", "callProbability", "CA", "CB"))
70
-})
71
-	 
72
-
73
-
74
-
... ...
@@ -1,57 +1,14 @@
1
-setGeneric("A<-", function(object, value) standardGeneric("A<-"))
2
-setGeneric("B<-", function(object, value) standardGeneric("B<-"))
1
+setGeneric("A", function(object) standardGeneric("A"))
2
+setGeneric("B", function(object) standardGeneric("B"))
3
+##setGeneric("A<-", function(object, value) standardGeneric("A<-"))
4
+##setGeneric("B<-", function(object, value) standardGeneric("B<-"))
3 5
 
4
-setGeneric("getParam", function(object, name, ...) standardGeneric("getParam"))
6
+setGeneric("getParam", function(object, name, batch) standardGeneric("getParam"))
5 7
 setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
6 8
 setGeneric("cnNames", function(object) standardGeneric("cnNames"))
7
-setGeneric("confs", function(object) standardGeneric("confs"))
8
-setGeneric("computeCopynumber", function(object) standardGeneric("computeCopynumber"))
9
-setGeneric("crlmm", function(object, ...) standardGeneric("crlmm"))
10
-setGeneric("crlmmOptions", function(object) standardGeneric("crlmmOptions"))
11
-setGeneric("crlmmOptions<-", function(object, value) standardGeneric("crlmmOptions<-"))
12
-setGeneric("construct", function(object, filenames) standardGeneric("construct"))
13
-
14
-setGeneric("getOptions", function(object) standardGeneric("getOptions"))
15
-
16
-##setGeneric("getA", function(object) standardGeneric("getA"))
17
-##setGeneric("getB", function(object) standardGeneric("getB"))
18
-##setGeneric("getG", function(object) standardGeneric("getG"))
19
-##setGeneric("getR", function(object) standardGeneric("getR"))
20
-##setGeneric("getZero", function(object) standardGeneric("getZero"))
21
-##setGeneric("getCalls", function(object) standardGeneric("getCalls"))
22
-##setGeneric("getConfs", function(object) standardGeneric("getConfs"))
23
-##setGeneric("getCA", function(object) standardGeneric("getCA"))
24
-##setGeneric("getCB", function(object) standardGeneric("getCB"))
25
-setGeneric("getPhenoData", function(object) standardGeneric("getPhenoData"))
26
-setGeneric("getFeatureData", function(object) standardGeneric("getFeatureData"))
27
-setGeneric("getProtocolData", function(object, filenames) standardGeneric("getProtocolData"))
28
-setGeneric("getGenomeAnnotation", function(object, ...) standardGeneric("getGenomeAnnotation"))
29
-setGeneric("getLinearModelParam", function(object, ...) standardGeneric("getLinearModelParam"))
30
-
31
-##setGeneric("initializeStorage", function(object) standardGeneric("initializeStorage")) 
32
-setGeneric("prediction", function(x, ...) standardGeneric("prediction")) 
33
-setGeneric("genomeAnnotation", function(object) standardGeneric("genomeAnnotation"))
34
-setGeneric("genomeAnnotation<-", function(object,value) standardGeneric("genomeAnnotation<-"))
35
-setGeneric("lM", function(object) standardGeneric("lM"))
36
-setGeneric("lM<-", function(object, value) standardGeneric("lM<-"))
37
-
38
-##setGeneric("nFeatures", function(object) standardGeneric("nFeatures"))
39
-
9
+setGeneric("computeCopynumber", function(object, cnOptions) standardGeneric("computeCopynumber"))
40 10
 setGeneric("pr", function(object, name, batch, value) standardGeneric("pr"))
41
-setGeneric("rma", function(object) standardGeneric("rma"))
42
-setGeneric("snprma", function(object, ...) standardGeneric("snprma")) 
43 11
 setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
44 12
 setGeneric("snpNames", function(object) standardGeneric("snpNames"))
45 13
 ##setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome"))
46 14
 
47
-setGeneric("R", function(object) standardGeneric("R"))
48
-setGeneric("G", function(object) standardGeneric("G"))
49
-setGeneric("Z", function(object) standardGeneric("Z"))
50
-setGeneric("X", function(object) standardGeneric("X"))
51
-setGeneric("Y", function(object) standardGeneric("Y"))
52
-setMethod("R", "IlluminaRGSet", function(object) assayDataElement(object, "R"))
53
-setMethod("G", "IlluminaRGSet", function(object) assayDataElement(object, "G"))
54
-setMethod("Z", "IlluminaRGSet", function(object) assayDataElement(object, "zero"))
55
-setMethod("X", "IlluminaXYSet", function(object) assayDataElement(object, "X"))
56
-setMethod("Y", "IlluminaXYSet", function(object) assayDataElement(object, "Y"))
57
-setMethod("Z", "IlluminaXYSet", function(object) assayDataElement(object, "zero"))
... ...
@@ -153,483 +153,337 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
153 153
 	return(gender)
154 154
 }
155 155
 
156
-
157
-##initializeFFObjects <- function(filenames, cnOptions){
158
-##	outdir <- cnOptions[["outdir"]]
159
-##	cdfName <- cnOptions[["cdfName"]]
160
-##	AFile <- cnOptions[["AFile"]]
161
-##	BFile <- cnOptions[["BFile"]]
162
-##	callsFile <- cnOptions[["callsFile"]]
163
-##	confsFile <- cnOptions[["confsFile"]]
164
-##	snprmaFile <- cnOptions[["snprmaFile"]]
165
-##	cnrmaFile <- cnOptions[["cnrmaFile"]]
166
-##	CAFile <- cnOptions[["CAFile"]]
167
-##	CBFile <- cnOptions[["CBFile"]]
168
-##	load.it <- cnOptions[["load.it"]]
169
-##	fileExists <- list(A=file.exists(AFile),
170
-##			   B=file.exists(BFile),
171
-##			   calls=file.exists(callsFile),
172
-##			   confs=file.exists(confsFile),
173
-##			   CA=file.exists(CAFile),
174
-##			   CB=file.exists(CBFile))
175
-##	allExists <- all(unlist(fileExists))
176
-##	##if files already exist, check that the files have the appropriate dimension
177
-##	if(allExists){
178
-##		load(AFile)
179
-##		open(A)
180
-##		sns <- dimnames(A)[[2]]
181
-##		if(!identical(sns, basename(filenames)) | !load.it){
182
-##		## if not of the same dimension, clean up
183
-##			message("Sample names in previously saved objects differ from the filenames. Removing previously saved objects.")
184
-##			delete(A); gc()
185
-##			unlink(AFile)
186
-##			load(BFile); delete(B); unlink(BFile)
187
-##			unlink(snprmaFile)
188
-##			unlink(cnrmaFile)
189
-##			if(file.exists(file.path(outdir, "cnParams.rda"))){
190
-##				load(file.path(outdir, "cnParams.rda"))
191
-##				delete(cnParams); gc()
192
-##				unlink(file.path(outdir, "cnParams.rda"))
193
-##			}
194
-##			load(callsFile); delete(calls); unlink(callsFile)
195
-##			load(confsFile); delete(confs); unlink(confsFile)
196
-##			load(CAFile); delete(CA); unlink(CAFile)
197
-##			load(CBFile); delete(CB); unlink(CBFile)
198
-##			allExists <- FALSE
199
-##		} 
200
-##	}
201
-##	if(!allExists) 	{
202
-##		message("Initializing ff objects for A, B, confs, calls, CA, and CB.")
203
-##		dns <- .dimnames(filenames, cnOptions[["cdfName"]], cnOptions[["verbose"]])
204
-##		fns <- dns[[1]]
205
-##	}
206
-##	if(!file.exists(AFile)) {A <- initializeBigMatrix(dns); save(A, file=AFile); close(A)}
207
-##	if(!file.exists(BFile)) {B <- initializeBigMatrix(dns); save(B, file=BFile); close(B)}
208
-##	if(!file.exists(confsFile)) {confs <- initializeBigMatrix(dns); save(confs, file=confsFile); close(confs)}
209
-##	if(!file.exists(callsFile)) {calls <- initializeBigMatrix(dns); save(calls, file=callsFile); close(calls)}
210
-##	if(!file.exists(CAFile)) {CA <- initializeBigMatrix(dns); save(CA, file=CAFile); close(CA)}
211
-##	if(!file.exists(CBFile)) {CB <- initializeBigMatrix(dns); save(CB, file=CBFile); close(CB)}
212
-##	featureDataFile <- file.path(outdir, "featureDataFF.rda")
213
-##	if(!file.exists(featureDataFile)){
214
-##		path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep=""))
215
-##		load(file.path(path, "snpProbes.rda"))
216
-##		snpProbes <- get("snpProbes")
217
-##		load(file.path(path, "cnProbes.rda"))
218
-##		cnProbes <- get("cnProbes")
219
-##		message("Initializing featureDataFF.")
220
-##		fvarlabels <- c("chromosome", "position",   "isSnp")
221
-##		M <- matrix(NA, length(fns), 3, dimnames=list(fns, fvarlabels))
222
-##		index <- match(rownames(snpProbes), rownames(M)) #only snp probes in M get assigned position
223
-##		M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))]
224
-##		M[index, "chromosome"] <- snpProbes[, grep("chr", colnames(snpProbes))]
225
-##		M[index, "isSnp"] <- 1L		
226
-##		index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position
227
-##		M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))]
228
-##		M[index, "chromosome"] <- cnProbes[, grep("chr", colnames(cnProbes))]
229
-##		M[index, "isSnp"] <- 0L
230
-##		featureDataFF <- ff(M, dim=c(nrow(M), ncol(M)),
231
-##				    vmode="integer", finalizer="close",
232
-##				    overwrite=TRUE,
233
-##				    dimnames=list(fns, fvarlabels))
234
-##		save(featureDataFF, file=file.path(outdir, "featureDataFF.rda"))
235
-##		close(featureDataFF)
236
-##		rm(M, cnProbes, snpProbes, featureDataFF); gc()
237
-##	}
238
-##	## parameters file
239
-##	parameterFile <- file.path(outdir, "cnParams.rda")
240
-##	if(!file.exists(parameterFile)) {
241
-##		message("Initializing parameters file")
242
-##		batch <- cnOptions[["batch"]]
243
-##		dns.batch <- list(fns, unique(batch)) 
244
-##		cnParams <- initializeParamObject(dns.batch)
245
-##		save(cnParams, file=file.path(outdir, "cnParams.rda"))
246
-##		close(cnParams)
247
-##	}
248
-##}
249
-
250
-##preprocessAndGenotype <- function(filenames, cnOptions, ...){
251
-##	set.seed(cnOptions[["seed"]])  ##for reproducibility
252
-##	protocolFile <- cnOptions[["protocolFile"]]
253
-##	cdfName <- cnOptions[["cdfName"]]
254
-##	verbose <- cnOptions[["verbose"]]
255
-##	if(file.exists(protocolFile)){
256
-##		## check that file is the same dimension
257
-##		load(protocolFile)
258
-##		if(!identical(sampleNames(protocoldata), basename(filenames)))
259
-##			unlink(protocolFile)
260
-##	}
261
-##	if(!file.exists(protocolFile)){
262
-##		platform <- whichPlatform(paste(cdfName, "Crlmm", sep=""))
263
-##		if(platform=="affymetrix"){
264
-##			if(verbose) message("Creating protocol file with scan dates for the affy arrays")
265
-##			scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
266
-##			rownames(scanDates) <- basename(rownames(scanDates))
267
-##			protocoldata <- new("AnnotatedDataFrame",
268
-##					    data=scanDates,
269
-##					    varMetadata=data.frame(labelDescription=colnames(scanDates),
270
-##					    row.names=colnames(scanDates)))
271
-##			save(protocoldata, file=protocolFile)
272
-##		}
273
-##		## protocol file for illumina saved during the readIdatFile step
274
-##	} 
275
-##	if(isPackageLoaded("ff")) initializeFFObjects(filenames, cnOptions)
276
-##	crlmmWrapper(filenames, cnOptions, ...)
277
-##	message("Checking for required files...")
278
-##	message(cnOptions[["AFile"]], ": ", file.exists(cnOptions[["AFile"]]))
279
-##	message(cnOptions[["BFile"]], ": ", file.exists(cnOptions[["BFile"]]))
280
-##	message(cnOptions[["callsFile"]], ": ", file.exists(cnOptions[["callsFile"]]))
281
-##	message(cnOptions[["confsFile"]], ": ", file.exists(cnOptions[["confsFile"]]))
282
-##	message(cnOptions[["snprmaFile"]], ": ", file.exists(cnOptions[["snprmaFile"]]))
283
-##	message(cnOptions[["protocolFile"]], ": ", file.exists(cnOptions[["protocolFile"]]))
284
-##}
156
+combineIntensities <- function(res, NP=NULL, callSet){
157
+	rownames(res$B) <- rownames(res$A) <- res$gns
158
+	colnames(res$B) <- colnames(res$A) <- res$sns
159
+	if(!is.null(NP)){
160
+		blank <- matrix(NA, nrow(NP), ncol(NP))
161
+		dimnames(blank) <- dimnames(NP)
162
+		A <- rbind(res$A, NP)
163
+		B <- rbind(res$B, blank)
164
+	} else {
165
+		A <- res$A
166
+		B <- res$B
167
+	}
168
+	dimnames(B) <- dimnames(A)
169
+	index.snps <- match(res$gns, rownames(A))
170
+	callsConfs <- calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
285 171
 	
286
-##crlmmCopynumber <- function(cnOptions, ...){
287
-##crlmmCopynumber <- function(object){
288
-##	ops <- crlmmOptions(object)
289
-##	verbose <- ops$verbose
290
-##	calls <- snpCall(object)
291
-##	confs <- confs(object)
292
-##	fns <- featureNames(object)
293
-##	SNRmin <- ops$SNRMin
294
-##	batch <- object$batch
295
-##	whichBatch <- ops$cnOpts$whichBatch
296
-##	chromosome <- ops$cnOpts$chromosome
297
-##	MIN.SAMPLES <- ops$cnOpts$MIN.SAMPLES
298
-##	##k <- grep("chr", colnames(snpProbes))
299
-##	for(CHR in chromosome){
300
-##		##annotated snp and cn probes
301
-##		##snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
302
-##		##cns <- rownames(cnProbes)[cnProbes[, k] == CHR]
303
-##		##where are the annotated snps in the fns file
304
-##		##index.snps <- match(snps, fns)
305
-##		##index.cn <- match(cns, fns)
306
-##		##row.index <- c(index.snps, index.cn)
307
-##		cat("Chromosome ", CHR, "\n")
308
-##		for(i in whichBatch){
309
-##			PLATE <- unique(batch)[i]
310
-##			message("Plate: ", PLATE)
311
-##			sample.index <- which(batch==PLATE)
312
-##			if(length(sample.index) < MIN.SAMPLES) {
313
-##				warning("Plate ", PLATE, " has fewer than 10 samples.  Skipping this plate.")
314
-##				next()
315
-##			}
316
-##			##cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
317
-####			if(isPackageLoaded("ff")){
318
-####				ca <- as.matrix(CA[row.index, sample.index])
319
-####				cb <- as.matrix(CB[row.index, sample.index])
320
-####			} else{
321
-####				dns <- dimnames(A[row.index, sample.index])
322
-####				cb <- ca <- matrix(NA, nr=length(row.index), nc=length(sample.index), dimnames=dns)
323
-####			}
324
-####			cnSet <- new("CNSet",
325
-####				     alleleA=as.matrix(A[row.index, sample.index]),
326
-####				     alleleB=as.matrix(B[row.index, sample.index]),
327
-####				     call=as.matrix(calls[row.index, sample.index]),
328
-####				     callProbability=as.matrix(confs[row.index, sample.index]),
329
-####				     CA=ca,
330
-####				     CB=cb,
331
-####				     featureData=annotatedDataFrameFrom(as.matrix(A[row.index, sample.index]), byrow=TRUE),
332
-####				     phenoData=pD[sample.index, ],
333
-####				     protocolData=protocoldata[sample.index, ])
334
-##			##Verify this is correct
335
-####			annotation(cnSet) <- cnOptions[["cdfName"]]
336
-####			featureNames(cnSet) <- fns[row.index]
337
-##			##add chromosome, position, isSnp
338
-####			cnSet <- annotate(cnSet)
339
-####			if(any(cnSet$SNR > SNRmin)){
340
-####				if(CHR == chromosome[1]) message(paste("Excluding samples with SNR < ", SNRmin))
341
-####				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
342
-####			}
343
-####			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
344
-##			if(CHR > 23) next()
345
-##			cnSet <- computeCopynumber(object[chromosome(object) == CHR, sample.index])
346
-####			if(!isPackageLoaded("ff") & i == whichBatch[1]) cnParams <- initializeParamObject(list(featureNames(cnSet), unique(cnOptions[["batch"]])[whichBatch]))
347
-####			if(!isPackageLoaded("ff")) {
348
-####				row.index <- 1:nrow(cnSet)
349
-####			} else {
350
-####				##Warning message:
351
-####				##In d[[1]] * d[[2]] : NAs produced by integer overflow
352
-####				CA[row.index, sample.index] <- cnSet@assayData[["CA"]]
353
-####				CB[row.index, sample.index] <- cnSet@assayData[["CB"]]
354
-####			}
355
-####			cnParams <- updateParams(cnParams, cnSet, row.index, batch=unique(batch)[i])
356
-##			## keep only chromosome, position, and isSnp
357
-####			featureData(cnSet) <- featureData(cnSet)[, 1:3]
358
-####			if(!isPackageLoaded("ff")){
359
-####				save(cnSet, file=paste(cnFile, "_", PLATE, "_", CHR, ".rda", sep=""))
360
-####				save(cnParams, file=paste(outdir, "cnParams_", PLATE, "_", CHR, ".rda", sep=""))
361
-####			}
362
-##		} ## end of batch loop
363
-##	} ## end of chromosome loop
364
-####	if(isPackageLoaded("ff")) {
365
-####		close(cnParams)
366
-####		close(A); close(B)		
367
-####		close(CA); close(CB)
368
-####		save(CA, file=CAFile)
369
-####		save(CB, file=CBFile)
370
-####		close(calls); close(confs)
371
-####		return()
372
-####	}
373
-##	return(cnSet)
374
-##}
375
-
376
-
377
-
172
+	calls[index.snps, ] <- calls(callSet)
173
+	callsConfs[index.snps, ] <- assayData(callSet)[["callProbability"]]
174
+	fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet))))
175
+	fd[index.snps, ] <- fData(callSet)
176
+	rownames(fd) <- rownames(A)
177
+	colnames(fd) <- fvarLabels(callSet)
178
+	fD <- new("AnnotatedDataFrame",
179
+		  data=data.frame(fd),
180
+		  varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd)))
181
+	superSet <- new("CNSet",
182
+			CA=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
183
+			CB=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
184
+			alleleA=A,
185
+			alleleB=B,
186
+			call=calls,
187
+			callProbability=callsConfs,
188
+			featureData=fD,
189
+			phenoData=phenoData(callSet),
190
+			experimentData=experimentData(callSet),
191
+			protocolData=protocolData(callSet),
192
+			annotation=annotation(callSet))
193
+	return(superSet)
194
+}
378 195
 
196
+harmonizeDimnamesTo <- function(object1, object2){
197
+	#object2 should be a subset of object 1
198
+	object2 <- object2[featureNames(object2) %in% featureNames(object1), ]
199
+	object1 <- object1[match(featureNames(object2), featureNames(object1)), ]
200
+	object1 <- object1[, match(sampleNames(object2), sampleNames(object1))]
201
+	stopifnot(all.equal(featureNames(object1), featureNames(object2)))
202
+	stopifnot(all.equal(sampleNames(object1), sampleNames(object2)))
203
+	return(object1)
204
+}
379 205
 
206
+crlmmCopynumber <- function(filenames, cnOptions, object, ...){
207
+	if(!missing(object)){
208
+		stopifnot(class(object) == "CNSet")
209
+		createIntermediateObjects <- FALSE
210
+	} else {
211
+		createIntermediateObjects <- TRUE
212
+		crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
213
+		snprmaResult <- crlmmResults[["snprmaResult"]]
214
+		cnrmaResult <- crlmmResults[["cnrmaResult"]]
215
+		callSet <- crlmmResults[["callSet"]]
216
+		rm(crlmmResults); gc()
217
+		annotation(callSet) <- cnOptions[["cdfName"]]
218
+		stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]]))
219
+		path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep=""))	
220
+		load(file.path(path, "snpProbes.rda"))
221
+		snpProbes <- get("snpProbes")
222
+		load(file.path(path, "cnProbes.rda"))
223
+		cnProbes <- get("cnProbes")	
224
+		k <- grep("chr", colnames(snpProbes))
225
+		if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
226
+	}
227
+	set.seed(cnOptions[["seed"]])  ##for reproducibility
228
+	chromosome <- cnOptions[["chromosome"]]
229
+	SNRmin <- cnOptions[["SNRmin"]]
230
+	for(CHR in chromosome){
231
+		cat("Chromosome ", CHR, "\n")
232
+		if(createIntermediateObjects){
233
+			snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
234
+			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
235
+			index.snps <- match(snps, featureNames(callSet))
236
+			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
237
+			if(!is.null(cnrmaResult)){
238
+				npI <- cnrmaResult$NP[index.nps, ]
239
+			} else npI <- NULL
240
+			snpI <- list(A=snprmaResult$A[index.snps, ],
241
+				     B=snprmaResult$B[index.snps, ],
242
+				     sns=snprmaResult$sns,
243
+				     gns=snprmaResult$gns[index.snps],
244
+				     SNR=snprmaResult$SNR,
245
+				     SKW=snprmaResult$SKW,
246
+				     mixtureParams=snprmaResult$mixtureParams,
247
+				     cdfName=snprmaResult$cdfName)
248
+			cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
249
+			cnSet <- combineIntensities(res=snpI,
250
+						    NP=npI,
251
+						    callSet=callSet[index.snps, ])
252
+			if(any(cnSet$SNR > SNRmin)){
253
+				message(paste("Excluding samples with SNR < ", SNRmin))
254
+				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
255
+			}
256
+			rm(snpI, npI, snps, cnps, index.snps, index.nps); gc()
257
+			pData(cnSet)$batch <- cnOptions[["batch"]]
258
+			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
259
+		} else {
260
+			cnSet <- object
261
+		}
262
+		if(CHR != 24){		
263
+			cnSet <- computeCopynumber(cnSet, cnOptions)
264
+		} else{
265
+			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
266
+			alleleSet <- cnSet
267
+			save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep="")))
268
+			rm(cnSet, alleleSet); gc()
269
+			next()
270
+		}
271
+		if(length(chromosome) == 1){
272
+			if(cnOptions[["save.cnset"]]){
273
+				save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
274
+			}
275
+		}
276
+		if(length(chromosome) > 1){
277
+			save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
278
+			rm(cnSet); gc()
279
+		} else {
280
+			return(cnSet)
281
+		}
282
+	}
283
+	saved.objects <- list.files(cnOptions[["outdir"]], pattern="cnSet", full.names=TRUE)		
284
+	return(saved.objects)
285
+}
380 286
 
381 287
 
382
-##loadIlluminaRG <- function(rgFile, crlmmFile, load.it, save.it,...){
383
-####	if(missing(rgFile)){
384
-####		##stop("must specify 'rgFile'.")
385
-####		rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
386
-####		message("rgFile not specified.  Using ", rgFile)
387
-####	}
388
-##	if(!load.it){
389
-##		RG <- readIdatFiles(...)
390
-##		if(save.it) save(RG, file=rgFile)
391
-##	}
392
-##	if(load.it & !file.exists(rgFile)){
393
-##		message("load.it is TRUE, but rgFile not present.  Attempting to read the idatFiles.")
394
-##		RG <- readIdatFiles(...)
395
-##		if(save.it) save(RG, file=rgFile)
396
-##	}
397
-##	if(load.it & file.exists(rgFile)){
398
-##		message("Loading RG file")
399
-##		load(rgFile)
400
-##		RG <- get("RG")
401
-##	}
402
-##	return(RG)
403
-##}
404
-##
405
-##loadIlluminaCallSet <- function(crlmmFile, snprmaFile, RG, load.it, save.it, cdfName){
406
-##	if(!file.exists(crlmmFile) | !load.it){		
407
-##		callSet <- crlmmIllumina(RG=RG,
408
-##					 cdfName=cdfName,
409
-##					 sns=sampleNames(RG),
410
-##					 returnParams=TRUE,
411
-##					 save.it=TRUE,
412
-##					 intensityFile=snprmaFile)
413
-##		if(save.it) save(callSet, file=crlmmFile)
414
-##	} else {
415
-##		message("Loading ", crlmmFile, "...")
416
-##		load(crlmmFile)
417
-##		callSet <- get("callSet")
418
-##	}
419
-##	protocolData(callSet) <- protocolData(RG)
420
-##	return(callSet)
421
-##}
422 288
 
289
+crlmmWrapper <- function(filenames, cnOptions, ...){
290
+	cdfName <- cnOptions[["cdfName"]]
291
+	load.it <- cnOptions[["load.it"]]
292
+	save.it <- cnOptions[["save.it"]]
293
+	splitByChr <- cnOptions[["splitByChr"]]
294
+	crlmmFile <- cnOptions[["crlmmFile"]]
295
+	intensityFile=cnOptions[["intensityFile"]]
296
+	rgFile=cnOptions[["rgFile"]]
297
+	##use.ff=cnOptions[["use.ff"]]
298
+	outdir <- cnOptions[["outdir"]]
299
+	if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required.  See crlmm:::validCdfNames()")
300
+	platform <- whichPlatform(cdfName)
301
+	if(!(platform %in% c("affymetrix", "illumina"))){
302
+		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
303
+	} else {
304
+		message("Checking whether annotation package for the ", platform, " platform is available")
305
+		if(!isValidCdfName(cdfName)){
306
+			cat("FALSE\n")
307
+			stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform)")
308
+		} else cat("TRUE\n")
309
+	}
310
+	if(missing(intensityFile)) stop("must specify 'intensityFile'.")
311
+	if(missing(crlmmFile)) stop("must specify 'crlmmFile'.")
312
+	if(platform == "illumina"){
313
+		if(missing(rgFile)){
314
+			##stop("must specify 'rgFile'.")
315
+			rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
316
+			message("rgFile not specified.  Using ", rgFile)
317
+		}
318
+		if(!load.it){
319
+			RG <- readIdatFiles(...)
320
+			if(save.it) save(RG, file=rgFile)
321
+		}
322
+		if(load.it & !file.exists(rgFile)){
323
+			message("load.it is TRUE, but rgFile not present.  Attempting to read the idatFiles.")
324
+			RG <- readIdatFiles(...)
325
+			if(save.it) save(RG, file=rgFile)
326
+		}
327
+		if(load.it & file.exists(rgFile)){
328
+			message("Loading RG file")
329
+			load(rgFile)
330
+			RG <- get("RG")
331
+		}
332
+	}
333
+	if(!(file.exists(dirname(crlmmFile)))) stop(dirname(crlmmFile), " does not exist.")
334
+	if(!(file.exists(dirname(intensityFile)))) stop(dirname(intensityFile), " does not exist.")
335
+	##---------------------------------------------------------------------------
336
+	## FIX
337
+	outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep=""))
338
+	if(load.it & all(file.exists(outfiles))){
339
+		load(outfiles[1])
340
+		crlmmSetList <- get("crlmmSetList")
341
+		if(!all(sampleNames(crlmmSetList) == basename(filenames))){
342
+			stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))")
343
+		} else{
344
+			return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...")
345
+		}
346
+	}
347
+	if(load.it){
348
+		if(!file.exists(crlmmFile)){
349
+			message("load.it is TRUE, but ", crlmmFile, " does not exist.  Rerunning the genotype calling algorithm") 
350
+			load.it <- FALSE
351
+		}
352
+	}
353
+	if(platform == "affymetrix"){
354
+		if(!file.exists(crlmmFile) | !load.it){
355
+			callSet <- crlmm(filenames=filenames,
356
+					     cdfName=cdfName,
357
+					     save.it=TRUE,
358
+					     load.it=load.it,
359
+					     intensityFile=intensityFile)
360
+			message("Quantile normalizing the copy number probes...")		
361
+			cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir)
362
+			if(save.it){
363
+				message("Saving callSet and cnrmaResult to", crlmmFile)
364
+				save(callSet, cnrmaResult, file=crlmmFile)
365
+			}
366
+		} else {
367
+			message("Loading ", crlmmFile, "...")
368
+			load(intensityFile)				
369
+			load(crlmmFile)
370
+			callSet <- get("callSet")
371
+			cnrmaResult <- get("cnrmaResult")
372
+		}
373
+		scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
374
+		protocolData(callSet) <- new("AnnotatedDataFrame",
375
+					     data=scanDates,
376
+					     varMetadata=data.frame(labelDescription=colnames(scanDates),
377
+					     row.names=colnames(scanDates)))
378
+	}
379
+	if(platform == "illumina"){
380
+		if(!file.exists(crlmmFile) | !load.it){		
381
+			callSet <- crlmmIllumina(RG=RG,
382
+						 cdfName=cdfName,
383
+						 sns=sampleNames(RG),
384
+						 returnParams=TRUE,
385
+						 save.it=TRUE,
386
+						 intensityFile=intensityFile)
387
+			if(save.it) save(callSet, file=crlmmFile)
388
+		} else {
389
+			message("Loading ", crlmmFile, "...")
390
+			load(crlmmFile)
391
+			callSet <- get("callSet")
392
+		}
393
+		protocolData(callSet) <- protocolData(RG)
394
+	}
395
+	if(platform=="affymetrix") {
396
+		protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames))
397
+		sampleNames(protocolData(callSet)) <- sampleNames(callSet)
398
+	}	
399
+	load(intensityFile)
400
+	snprmaResult <- get("res")
401
+	if(platform=="illumina"){
402
+		if(exists("cnAB")){
403
+			np.A <- as.integer(cnAB$A)
404
+			np.B <- as.integer(cnAB$B)
405
+			np <- ifelse(np.A > np.B, np.A, np.B)
406
+			np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
407
+			rownames(np) <- cnAB$gns
408
+			colnames(np) <- cnAB$sns
409
+			cnAB$NP <- np
410
+			##sampleNames(callSet) <- res$sns
411
+			sampleNames(callSet) <- cnAB$sns
412
+			cnrmaResult <- get("cnAB")
413
+		} else cnrmaResult <- NULL
414
+	}
415
+	if(platform=="affymetrix"){
416
+		if(exists("cnrmaResult")){
417
+			cnrmaResult <- get("cnrmaResult")
418
+		} else cnrmaResult <- NULL
419
+	}
420
+	crlmmResults <- list(snprmaResult=snprmaResult,
421
+			     cnrmaResult=cnrmaResult,
422
+			     callSet=callSet)
423 423
 
424
-##loadAffyCallSet <- function(filenames, confsFile, callsFile, snprmaFile, load.it, save.it,  cdfName){
425
-##
426
-####		if(save.it){
427
-####			message("Saving callSet to", callsFile)
428
-####			##save(callSet, cnrmaResult, file=callsFile)
429
-####			save(callSet, file=callsFile)
430
-####		}
431
-####	} else {
432
-####		message("Loading ", callsFile, "...")
433
-####		##load(snprmaFile)				
434
-####		load(callsFile)
435
-####		callSet <- get("callSet")
436
-####		##cnrmaResult <- get("cnrmaResult")
437
-####	}
438
-##
439
-##}
440
-##	if(platform=="affymetrix") {
441
-##		protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames))
442
-##		sampleNames(protocolData(callSet)) <- sampleNames(callSet)
443
-##	}
424
+	if(!save.it){
425
+		message("Cleaning up")		
426
+		unlink(intensityFile)
427
+	}
428
+	return(crlmmResults)
429
+}
444 430
 
445
-##loadAffyCnrma <- function(filenames, cnrmaFile, cdfName, outdir, load.it, save.it, use.bigmemory=FALSE){
446
-##	if(!file.exists(cnrmaFile) | !load.it){	
447
-##		message("Quantile normalizing the copy number probes...")
448
-##		cnrmaResult <- cnrma2(filenames=filenames, cdfName=cdfName, outdir=outdir, cnrmaFile=cnrmaFile)
449
-##	} else cnrmaResult <- attach.big.matrix("NP.desc", outdir)
450
-##	return(cnrmaResult)
451
-##}
431
+validCdfNames <- function(){
432
+	c("genomewidesnp6",
433
+	  "genomewidesnp5",
434
+	  "human370v1c",
435
+	  "human370quadv3c",
436
+	  "human550v3b",
437
+	  "human650v3a",
438
+	  "human610quadv1b",
439
+	  "human660quadv1a",
440
+	  "human1mduov3b")
441
+}
452 442
 
453
-##loadIlluminaCnrma <- function(){
454
-##	if(exists("cnAB")){
455
-##		np.A <- as.integer(cnAB$A)
456
-##		np.B <- as.integer(cnAB$B)
457
-##		np <- ifelse(np.A > np.B, np.A, np.B)
458
-##		np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
459
-##		rownames(np) <- cnAB$gns
460
-##		colnames(np) <- cnAB$sns
461
-##		cnAB$NP <- np
462
-##		##sampleNames(callSet) <- res$sns
463
-##		sampleNames(callSet) <- cnAB$sns
464
-##		cnrmaResult <- get("cnAB")
465
-##	} else cnrmaResult <- NULL
466
-##	return(cnrmaResult)
467
-##}
468
-##
469
-##crlmmWrapper <- function(filenames, cnOptions, ...){
470
-##	crlmmBatchSize <- cnOptions[["crlmmBatchSize"]]
471
-##	cdfName <- cnOptions[["cdfName"]]
472
-##	load.it <- cnOptions[["load.it"]]
473
-##	save.it <- cnOptions[["save.it"]]
474
-##	callsFile <- cnOptions[["callsFile"]]
475
-##	confsFile <- cnOptions[["confsFile"]]
476
-##	AFile=cnOptions[["AFile"]]
477
-##	BFile=cnOptions[["BFile"]]	
478
-##	snprmaFile=cnOptions[["snprmaFile"]]
479
-##	cnrmaFile=cnOptions[["cnrmaFile"]]
480
-##	rgFile=cnOptions[["rgFile"]]
481
-##	protocolFile <- cnOptions[["protocolFile"]]
482
-##	outdir <- cnOptions[["outdir"]]
483
-##	if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required.  See crlmm:::validCdfNames()")
484
-##	platform <- whichPlatform(cdfName)
485
-##	if(!(platform %in% c("affymetrix", "illumina"))){
486
-##		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
487
-##	} else {
488
-##		if(!isValidCdfName(cdfName)){
489
-##			stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform)")
490
-##		} else  message("Using the annotation package ", cdfName, " for this ", platform, " platform")
491
-##	}
492
-##	if(platform == "illumina") {
493
-##		if(!file.exists(rgFile)){
494
-##			if(load.it) message(rgFile, " does not exist and you chose to load.it.  Re-reading the R and G intensities from the IDAT files")
495
-##			sampleSheet <- cnOptions$sampleSheet
496
-##			ids <- cnOptions$ids
497
-##			arrayInfoColNames <- cnOptions$arrayInfoColNames
498
-##			highDensity <- cnOptions$highDensity
499
-##			##this is either an NChannelSet object, or a list of pointers to the ff objects
500
-##			RG <- readIdatFiles(sampleSheet=sampleSheet,
501
-##					    arrayNames=basename(filenames),
502
-##					    ids=ids,
503
-##					    path=dirname(filenames),
504
-##					    highDensity=highDensity,
505
-##					    fileExt=cnOptions$fileExt[1:2],
506
-##					    sep=cnOptions$fileExt[[3]],
507
-##					    saveDate=FALSE,  ## I do this earlier
508
-##					    verbose=cnOptions[["verbose"]],
509
-##					    protocolFile=protocolFile)
510
-##			if(save.it) save(RG, file=rgFile)
511
-##			##RG <- loadIlluminaRG(rgFile, callsFile, load.it, save.it)
512
-##		} else{
513
-##			if(!isPackageLoaded("ff")) {load(rgFile); RG <- get("RG")}
514
-##		}
515
-##	}
516
-##	if(!(file.exists(dirname(callsFile)))) stop(dirname(callsFile), " does not exist.")
517
-##	if(!(file.exists(dirname(snprmaFile)))) stop(dirname(snprmaFile), " does not exist.")
518
-##	if(platform == "affymetrix"){
519
-##		crlmm(filenames=filenames,
520
-##		      cdfName=cdfName,
521
-##		      save.it=TRUE,
522
-##		      load.it=load.it,
523
-##		      snprmaFile=snprmaFile,
524
-##		      callsFile=callsFile,
525
-##		      confsFile=confsFile,
526
-##		      AFile=AFile,
527
-##		      BFile=BFile,
528
-##		      crlmmBatchSize=crlmmBatchSize,
529
-##		      SNRMin=cnOptions[["SNRMin"]])
530
-##	}
531
-##	gc()
532
-##	if(platform == "illumina") {
533
-##		callSet <- crlmmIllumina(RG=RG,
534
-##					 cdfName=cdfName,
535
-##					 sns=sampleNames(RG),
536
-##					 returnParams=TRUE,
537
-##					 save.it=TRUE,
538
-##					 snprmaFile=snprmaFile,
539
-##					 callsFile=callsFile,
540
-##					 confsFile=confsFile,
541
-##					 AFile=AFile,
542
-##					 BFile=BFile)
543
-##		##callSet <- loadIlluminaCallSet(callsFile, snprmaFile, RG, load.it, save.it, cdfName)
544
-##	}
545
-##	if(platform == "affymetrix"){
546
-##		if(!file.exists(cnrmaFile) | !load.it){	
547
-##			message("Quantile normalizing the copy number probes...")
548
-##			## updates A matrix and saves cnrmaFile
549
-##			cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir, verbose=cnOptions[["verbose"]], cnrmaFile=cnrmaFile, AFile=AFile, snprmaFile=snprmaFile)
550
-##		} 
551
-##	}
552
-####	if(!is.null(cnrmaResult)){
553
-####		for(CHR in chromosome){
554
-####			cat(CHR, " ")
555
-####			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
556
-####			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
557
-####			NP <- cnrmaResult$NP[index.nps, ]
558
-####			save(NP, file=file.path(tmpdir, paste("NP_", CHR, ".rda", sep="")))
559
-####			rm(NP); gc()
560
-####		}
561
-####	}
562
-##	if(!save.it){
563
-##		message("Cleaning up")		
564
-##		unlink(snprmaFile); unlink(cnrmaFile)
565
-##	}
566
-##}
443
+isValidCdfName <- function(cdfName){
444
+	chipList <- validCdfNames()
445
+	result <- cdfName %in% chipList	
446
+	if(!(result)){
447
+		warning("cdfName must be one of the following: ",
448
+			chipList)
449
+	}
450
+	return(result)
451
+}
567 452
 
453
+whichPlatform <- function(cdfName){
454
+	index <- grep("genomewidesnp", cdfName)
455
+	if(length(index) > 0){
456
+		platform <- "affymetrix"
457
+	} else{
458
+		index <- grep("human", cdfName)
459
+		platform <- "illumina"
460
+	}
461
+	return(platform)
462
+}
568 463
 
569 464
 
570 465
 # steps: quantile normalize hapmap: create 1m_reference_cn.rda object
571
-##cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){
572
-##	if(missing(cdfName)) stop("must specify cdfName")
573
-##	pkgname <- getCrlmmAnnotationName(cdfName)
574
-##	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
575
-##	if (missing(sns)) sns <- basename(filenames)
576
-##        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
577
-##	fid <- getVarInEnv("npProbesFid")
578
-##	set.seed(seed)
579
-##	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
580
-##	SKW <- vector("numeric", length(filenames))
581
-##
582
-##	
583
-##	NP <- matrix(NA, length(fid), length(filenames))
584
-##	verbose <- TRUE
585
-##	if(verbose){
586
-##		message("Processing ", length(filenames), " files.")
587
-##		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
588
-##	}
589
-##	if(cdfName=="genomewidesnp6"){
590
-##		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
591
-##	}
592
-##	if(cdfName=="genomewidesnp5"){
593
-##		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
594
-##	}
595
-##	reference <- getVarInEnv("reference")
596
-##	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
597
-##	for(i in seq(along=filenames)){
598
-##		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
599
-##		x <- log2(y[idx2])
600
-##		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
601
-##		rm(x)
602
-##		NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
603
-##		if (verbose)
604
-##			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
605
-##			else cat(".")
606
-##	}
607
-##	dimnames(NP) <- list(names(fid), sns)
608
-##	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
609
-##	res3 <- list(NP=NP, SKW=SKW)
610
-##	cat("\n")
611
-##	return(res3)
612
-##}
613
-
614
-cnrma <- function(object, filenames){
615
-	ops <- crlmmOptions(object)
616
-	cdfName <- annotation(object)
617
-	seed <- ops$seed
618
-	verbose <- ops$verbose
619
-	##cnrmaFile <- ops$cnrmaFile
620
-	A <- A(object)
466
+cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){
621 467
 	if(missing(cdfName)) stop("must specify cdfName")
622 468
 	pkgname <- getCrlmmAnnotationName(cdfName)
623 469
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
624
-	sns <- basename(filenames)
470
+	if (missing(sns)) sns <- basename(filenames)
625 471
         loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
626 472
 	fid <- getVarInEnv("npProbesFid")
627 473
 	set.seed(seed)
628 474
 	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
629 475
 	SKW <- vector("numeric", length(filenames))
630
-	index <- match(names(fid), featureNames(object))
631
-	stopifnot(identical(featureNames(object)[index], names(fid)))
632
-	if(length(index) < 1) stop("None of the names for the nonpolymorphic probes in the annotation package match the names stored in the snprmaFile.")
476
+##	if(bigmemory){
477
+##		NP <- filebacked.big.matrix(length(pnsa), length(filenames),
478
+##					    type="integer",
479
+##					    init=as.integer(0),
480
+##					    backingpath=outdir,
481
+##					    backingfile="NP.bin",
482
+##					    descriptorfile="NP.desc")
483
+##	} else{
484
+		NP <- matrix(NA, length(fid), length(filenames))
485
+##	}
486
+	verbose <- TRUE
633 487
 	if(verbose){
634 488
 		message("Processing ", length(filenames), " files.")
635 489
 		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
... ...
@@ -641,21 +495,22 @@ cnrma <- function(object, filenames){
641 495
 		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
642 496
 	}
643 497
 	reference <- getVarInEnv("reference")
498
+	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
644 499
 	for(i in seq(along=filenames)){
645 500
 		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
646 501
 		x <- log2(y[idx2])
647 502
 		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
648
-		rm(x); gc()
649
-		A[index, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
503
+		rm(x)
504
+		NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference))
650 505
 		if (verbose)
651 506
 			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
652 507
 			else cat(".")
653
-		rm(y); gc()
654 508
 	}
655
-	cat("\nDone\n")
656
-	pData(object)$SKW_nonpolymorphic <- SKW
657
-	object@assayData[["alleleA"]] <- A
658
-	return(object)
509
+	dimnames(NP) <- list(names(fid), sns)
510
+	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
511
+	res3 <- list(NP=NP, SKW=SKW)
512
+	cat("\n")
513
+	return(res3)
659 514
 }
660 515
 
661 516
 getFlags <- function(object, PHI.THR){
... ...
@@ -673,7 +528,7 @@ getFlags <- function(object, PHI.THR){
673 528
 }
674 529
 
675 530
 
676
-instantiateObjects <- function(object){
531
+instantiateObjects <- function(object, cnOptions){
677 532
 	Ns <- matrix(NA, nrow(object), 5)
678 533
 	colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
679 534
 	vA <- vB <- muB <- muA <- Ns
... ...
@@ -700,46 +555,130 @@ thresholdCopynumber <- function(object){
700 555
 	return(object)
701 556
 }
702 557
 
703
-##linear model parameters
704
-##lm.parameters <- function(object, cnOptions){
705
-##	fD <- fData(object)
706
-##	batch <- object$batch
707
-##	uplate <- unique(batch)
708
-##	parameterNames <- c(paste("tau2A", uplate, sep="_"),
709
-##			    paste("tau2B", uplate, sep="_"),
710
-##			    paste("sig2A", uplate, sep="_"),
711
-##			    paste("sig2B", uplate, sep="_"),
712
-##			    paste("nuA", uplate, sep="_"),
713
-##			    paste("nuA.se", uplate, sep="_"),			    
714
-##			    paste("nuB", uplate, sep="_"),
715
-##			    paste("nuB.se", uplate, sep="_"),			    			    
716
-##			    paste("phiA", uplate, sep="_"),
717
-##			    paste("phiA.se", uplate, sep="_"),			    
718
-##			    paste("phiB", uplate, sep="_"),
719
-##			    paste("phiB.se", uplate, sep="_"),			    
720
-##			    paste("phiAX", uplate, sep="_"),
721
-##			    paste("phiBX", uplate, sep="_"),			    
722
-##			    paste("corr", uplate, sep="_"),
723
-##			    paste("corrA.BB", uplate, sep="_"),
724
-##			    paste("corrB.AA", uplate, sep="_"))
725
-##	pMatrix <- data.frame(matrix(numeric(0),
726
-##				     nrow(object),
727
-##				     length(parameterNames)),
728
-##				     row.names=featureNames(object))
729
-##	colnames(pMatrix) <- parameterNames
730
-##	fD2 <- cbind(fD, pMatrix)
731
-##	new("AnnotatedDataFrame", data=fD2,
732
-##	    varMetadata=data.frame(labelDescription=colnames(fD2),
733
-##	    row.names=colnames(fD2)))
558
+##preprocessOptions <- function(crlmmFile="snpsetObject.rda",
559
+##			      intensityFile="normalizedIntensities.rda",
560
+##			      rgFile="rgFile.rda"){
561
+##
734 562
 ##}
735 563
 
564
+cnOptions <- function(outdir="./",
565
+		      cdfName,
566
+		      crlmmFile,
567
+		      intensityFile,
568
+		      rgFile="rgFile.rda",
569
+		      save.it=TRUE,
570
+		      save.cnset=TRUE,
571
+		      load.it=TRUE,
572
+		      splitByChr=TRUE,
573
+		      MIN.OBS=3,
574
+		      MIN.SAMPLES=10,
575
+		      batch=NULL,
576
+		      DF.PRIOR=50,
577
+		      bias.adj=FALSE,
578
+		      prior.prob=rep(1/4, 4),
579
+		      SNRmin=4,
580
+		      chromosome=1:24,
581
+		      seed=123,
582
+		      verbose=TRUE,
583
+		      GT.CONF.THR=0.99,
584
+		      PHI.THR=2^6,##used in nonpolymorphic fxn for training
585
+		      nHOM.THR=5, ##used in nonpolymorphic fxn for training
586
+		      MIN.NU=2^3,
587
+		      MIN.PHI=2^3,
588
+		      THR.NU.PHI=TRUE,
589
+		      thresholdCopynumber=TRUE,
590
+		      unlink=TRUE,
591
+		      ##hiddenMarkovModel=FALSE,
592
+		      ##circularBinarySegmentation=FALSE,
593
+##		      cbsOpts=NULL,
594
+		      ##hmmOpts=NULL,
595
+		      ...){
596
+	if(missing(cdfName)) stop("must specify cdfName")
597
+	if(!file.exists(outdir)){
598
+		message(outdir, " does not exist.  Trying to create it.")
599
+		dir.create(outdir, recursive=TRUE)
600
+	}
601
+	stopifnot(isValidCdfName(cdfName))
602
+##	if(hiddenMarkovModel){
603
+##		hmmOpts <- hmmOptions(...)
604
+##	}
605
+	if(missing(crlmmFile)){
606
+		crlmmFile <- file.path(outdir, "snpsetObject.rda")
607
+	}
608
+	if(missing(intensityFile)){
609
+		intensityFile <- file.path(outdir, "normalizedIntensities.rda")
610
+	}
611
+	if(is.null(batch))
612
+		stop("must specify batch -- should be the same length as the number of files")
613
+	list(outdir=outdir,
614
+	     cdfName=cdfName,
615
+	     crlmmFile=crlmmFile,
616
+	     intensityFile=intensityFile,
617
+	     rgFile=file.path(outdir, rgFile),
618
+	     save.it=save.it,
619
+	     save.cnset=save.cnset,
620
+	     load.it=load.it,
621
+	     splitByChr=splitByChr,
622
+	     MIN.OBS=MIN.OBS,
623
+	     MIN.SAMPLES=MIN.SAMPLES,
624
+	     batch=batch,
625
+	     DF.PRIOR=DF.PRIOR,
626
+	     GT.CONF.THR=GT.CONF.THR,
627
+	     prior.prob=prior.prob,
628
+	     bias.adj=bias.adj,
629
+	     SNRmin=SNRmin,
630
+	     chromosome=chromosome,
631
+	     seed=seed,
632
+	     verbose=verbose,
633
+	     PHI.THR=PHI.THR,
634
+	     nHOM.THR=nHOM.THR,
635
+	     MIN.NU=MIN.NU,
636
+	     MIN.PHI=MIN.PHI,
637
+	     THR.NU.PHI=THR.NU.PHI,
638
+	     thresholdCopynumber=thresholdCopynumber,
639
+	     unlink=unlink)
640
+##	     hiddenMarkovModel=hiddenMarkovModel,
641
+##	     circularBinarySegmentation=circularBinarySegmentation,
642
+##	     cbsOpts=cbsOpts,
643
+##	     hmmOpts=hmmOpts) ##remove SnpSuperSet object
644
+}
645
+
646
+##linear model parameters
647
+lm.parameters <- function(object, cnOptions){
648
+	fD <- fData(object)
649
+	batch <- object$batch
650
+	uplate <- unique(batch)
651
+	parameterNames <- c(paste("tau2A", uplate, sep="_"),
652
+			    paste("tau2B", uplate, sep="_"),
653
+			    paste("sig2A", uplate, sep="_"),
654
+			    paste("sig2B", uplate, sep="_"),
655
+			    paste("nuA", uplate, sep="_"),
656
+			    paste("nuA.se", uplate, sep="_"),			    
657
+			    paste("nuB", uplate, sep="_"),
658
+			    paste("nuB.se", uplate, sep="_"),			    			    
659
+			    paste("phiA", uplate, sep="_"),
660
+			    paste("phiA.se", uplate, sep="_"),			    
661
+			    paste("phiB", uplate, sep="_"),
662
+			    paste("phiB.se", uplate, sep="_"),			    
663
+			    paste("phiAX", uplate, sep="_"),
664
+			    paste("phiBX", uplate, sep="_"),			    
665
+			    paste("corr", uplate, sep="_"),
666
+			    paste("corrA.BB", uplate, sep="_"),
667
+			    paste("corrB.AA", uplate, sep="_"))
668
+	pMatrix <- data.frame(matrix(numeric(0),
669
+				     nrow(object),
670
+				     length(parameterNames)),
671
+				     row.names=featureNames(object))
672
+	colnames(pMatrix) <- parameterNames
673
+	fD2 <- cbind(fD, pMatrix)
674
+	new("AnnotatedDataFrame", data=fD2,
675
+	    varMetadata=data.frame(labelDescription=colnames(fD2),
676
+	    row.names=colnames(fD2)))
677
+}
678
+
736 679
 nonpolymorphic <- function(object, cnOptions, tmp.objects){
737
-	chromosome <- cnOptions[["chromosome"]]
738 680
 	batch <- unique(object$batch)
739 681
 	CHR <- unique(chromosome(object))
740
-	verbose <- cnOptions[["verbose"]]
741
-	if(CHR != chromosome[1]) verbose <- FALSE
742
-	if(batch != unique(cnOptions[["batch"]])[1]) verbose <- FALSE
743 682
 	goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){
744 683
 		Ns <- tmp.objects[["Ns"]]
745 684
 		##Ns <- get("Ns", envir)
... ...
@@ -865,9 +804,10 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
865 804
 		
866 805
 		THR.NU.PHI <- cnOptions$THR.NU.PHI
867 806
 		if(THR.NU.PHI){
807
+			verbose <- cnOptions$verbose
868 808
 			##Assign values to object
869 809
 			object <- pr(object, "nuA", batch, nuA)
870
-			object <- pr(object, "phiA", batch, phiA)
810
+			object <- pr(object, "phiA", batch, phiA)			
871 811
 			if(verbose) message("Thresholding nu and phi")
872 812
 			object <- thresholdModelParams(object, cnOptions)
873 813
 		} else {
... ...
@@ -885,6 +825,7 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
885 825
 
886 826
 		THR.NU.PHI <- cnOptions$THR.NU.PHI
887 827
 		if(THR.NU.PHI){
828
+			verbose <- cnOptions$verbose
888 829
 			##Assign values to object
889 830
 			object <- pr(object, "nuA", batch, nuA)
890 831
 			object <- pr(object, "phiA", batch, phiA)			
... ...
@@ -905,27 +846,6 @@ nonpolymorphic <- function(object, cnOptions, tmp.objects){
905 846
 	return(object)
906 847
 }
907 848
 
908
-nonpolymorphic.poe <- function(object, cnOptions, tmp.object){
909
-	require(metaArray)
910
-	nps <- log2(A(object)[!isSnp(object), ])
911
-	nps <- (nps-rowMedians(nps))/rowMAD(nps)
912
-	runAvg <- apply(nps, 2, myfilter, filter=rep(1/10, 10))
913
-	rownames(runAvg) <- featureNames(object)[!isSnp(object)]
914
-	rm.nas <- rowSums(is.na(runAvg)) == 0
915
-	runAvg <- runAvg[rm.nas, ]
916
-	
917
-	poe.scale <- poe.em(runAvg, cl=rep(0, ncol(nps)))$data
918
-	pinegg <- piposg <- poe.scale
919
-	piposg[piposg < 0] <- 0
920
-	pinegg[pinegg > 0] <- 0
921
-	pinegg <- pinegg*-1
922
-	pm.em <- 1*pinegg + 2*(1-pinegg-piposg) + 3*piposg
923
-	rownames(pm.em) <- rownames(runAvg)
924
-	CA(object)[match(rownames(pm.em), featureNames(object)), ] <- pm.em
925
-	##CA(object)[!isSnp(object), ] <- pm.em
926
-	return(object)
927
-}
928
-
929 849
 ##sufficient statistics on the intensity scale
930 850
 withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
931 851
 	normal <- tmp.objects[["normal"]]
... ...
@@ -1018,12 +938,12 @@ withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
1018 938
 	return(tmp.objects)
1019 939
 }
1020 940
 
941
+
1021 942
 oneBatch <- function(object, cnOptions, tmp.objects){
1022 943
 	muA <- tmp.objects[["muA"]]
1023 944
 	muB <- tmp.objects[["muB"]]
1024 945
 	Ns <- tmp.objects[["Ns"]]
1025 946
 	CHR <- unique(chromosome(object))
1026
-	PLATE <- unique(object$batch)
1027 947
 	##---------------------------------------------------------------------------
1028 948
 	## Impute sufficient statistics for unobserved genotypes (plate-specific)
1029 949
 	##---------------------------------------------------------------------------
... ...
@@ -1039,8 +959,7 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1039 959
 	size <- min(5000, length(index.complete))
1040 960
 	if(size == 5000) index.complete <- sample(index.complete, 5000)
1041 961
 	if(length(index.complete) < 200){
1042
-		warning("There are too few samples in plate ", PLATE, " to estimate the copy number for chromosome ", CHR, ".  CA,CB values are NAs")
1043
-		return(tmp.objects)
962
+		stop("fewer than 200 snps pass criteria for predicting the sufficient statistics")
1044 963
 	}
1045 964
 	index <- tmp.objects[["index"]]
1046 965
 	index[[1]] <- which(Ns[, "AA"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS))
... ...
@@ -1058,16 +977,15 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1058 977
 		muA[index[[j]], j+2] <- mus[, 1]
1059 978
 		muB[index[[j]], j+2] <- mus[, 2]
1060 979
 	}
980
+	nobsA <- Ns[, "A"] > 10
981
+	nobsB <- Ns[, "B"] > 10
982
+	notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"]))
983
+	complete <- list()
984
+	complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
985
+	complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here	
986
+	size <- min(5000, length(complete[[1]]))
987
+	if(size == 5000) complete <- lapply(complete, function(x) sample(x, size))
1061 988
 	if(CHR == 23){
1062
-		nobsA <- Ns[, "A"] > MIN.OBS
1063
-		nobsB <- Ns[, "B"] > MIN.OBS
1064
-		notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"]))
1065
-		complete <- list()
1066
-		complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
1067
-		complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here
1068
-		if(length(complete[[1]]) < 1 | length(complete[[2]]) < 1) stop("Too few observations to estimate the center for 'A' and 'B'  clusters on chrom X")		
1069
-		size <- min(5000, length(complete[[1]]))
1070
-		if(size == 5000) complete <- lapply(complete, function(x) sample(x, size))
1071 989
 		index <- list()
1072 990
 		index[[1]] <- which(Ns[, "A"] == 0)
1073 991
 		index[[2]] <- which(Ns[, "B"] == 0)
... ...
@@ -1093,6 +1011,7 @@ oneBatch <- function(object, cnOptions, tmp.objects){
1093 1011
 ##	snpflags <- envir[["snpflags"]]
1094 1012
 	snpflags <- index[[1]] | index[[2]] | index[[3]]
1095 1013
 ##	snpflags[, p] <- index[[1]] | index[[2]] | index[[3]]
1014
+
1096 1015
 	##---------------------------------------------------------------------------
1097 1016
 	## Two genotype clusters not observed -- would sequence help? (didn't seem to)
1098 1017
 	## 1. extract index of complete data
... ...
@@ -1399,8 +1318,6 @@ posteriorProbability.snps <- function(object, cnOptions, tmp.objects=list()){
1399 1318
 	return(list(tmp.objects, posteriorProb))
1400 1319
 }
1401 1320
 
1402
-
1403
-
1404 1321
 biasAdj <- function(object, cnOptions, tmp.objects){
1405 1322
 	gender <- object$gender
1406 1323
 	CHR <- unique(chromosome(object))
... ...
@@ -1559,319 +1476,110 @@ thresholdModelParams <- function(object, cnOptions){
1559 1476
 	return(object)
1560 1477
 }
1561 1478
 
1562
-##computeCopynumber.SnpSuperSet <- function(object, cnOptions){
1563
-####	use.ff <- cnOptions[["use.ff"]]
1564
-####	if(!use.ff){
1565
-####		object <- as(object, "CrlmmSet")
1566
-####	} else	object <- as(object, "CrlmmSetFF")
1567
-##	bias.adj <- cnOptions[["bias.adj"]]
1568
-##	##must be FALSE to initialize parameters
1569
-##	cnOptions[["bias.adj"]] <- FALSE
1570
-##	## Add linear model parameters to the CrlmmSet object
1571
-##	featureData(object) <- lm.parameters(object, cnOptions)
1572
-##	if(!isValidCdfName(annotation(object))) stop(annotation(object), " not supported.")
1573
-##	object <- as(object, "CNSet")
1574
-##	object <- computeCopynumber.CNSet(object, cnOptions)
1575
-##	if(bias.adj==TRUE){## run a second time
1576
-##		object <- computeCopynumber.CNSet(object, cnOptions)
1577
-##	}
1578
-##	return(object)
1579
-##}
1580
-
1581
-
1582
-
1583
-## cite metaArray: Choi/Ghosh
1584
-crlmm_poe <- function(mat, cl, threshold=0.00001, every=100,use.mad=TRUE) {
1585
-  mat <- as.matrix(mat)
1586
-  nc <- ncol(mat)
1587
-  nr <- nrow(mat)
1588
-  if(all(is.null(cl))) cl <- rep(0,dim(mat)[2])
1589
-  cat("Number of Samples:", nc, "\n")
1590
-  cat("Number of Genes:", nr, "\n")
1591
-  cat("This model assumes that the samples are centered and scaled.\n")
1592
-  new.mat <- matrix(0,nr,nc)
1593
-  med.expr <- apply(mat,1,median,na.rm=TRUE)
1594
-  new.mat <- sweep(mat,1,med.expr)
1595
-  for(i in 1:nr) {
1596
-    if(sum(is.na(as.numeric(mat[i,]))) > .25 * nc ) stop("More than 25% missing values for gene", i, "\n")
1597
-    zvec <- crlmm_fit.em(as.numeric(mat[i,]), cl, threshold=threshold, use.mad=use.mad) 
1598
-    new.mat[i,] <- zvec$expr
1599
-    if(i%%every==0) cat(i, "genes fitted\n")   
1600
-  }
1601
-  rownames(new.mat) <- rownames(mat)
1602
-  colnames(new.mat) <- colnames(mat)
1603
-  return(list(data=new.mat))
1604
-}
1605
-
1606
-crlmm_fit.em <- function(x, cl, threshold=1e-6, use.mad=TRUE){
1607
-	sup <- sum(cl==0) > 0 && sum(cl==1) > 0
1608
-	x <- as.numeric(x)
1609
-	len <- length(x)
1610
-	z <- rep(0,length(x))
1611
-	log.lik <- 1000
1612
-	lik.rec <- NULL
1613
-	num.iter <- 0
1614
-	a <- min(x,na.rm=TRUE); b <- max(x,na.rm=TRUE)
1615
-	err <- err.old <- 1
1616
-	if(!sup) {
1617
-		z <- find.init(x)
1618
-		Pi <- mean(z)
1619
-		mu <- sum((1-z)*x) / sum(1-z)
1620
-		sigmasq <- sum((1-z)*(x-mu)^2) / sum(1-z)
1621
-		tt <- len
1622
-		while(err > threshold) {
1623
-			num.iter <- num.iter + 1
1624
-			log.lik.old <- log.lik
1625
-			err.old <- err
1626
-			## E Step
1627
-			est.u <- dunif(x,a,b)
1628
-			est.u.p <- Pi * est.u
1629
-			est.n <- dnorm(x,mu,sqrt(sigmasq))
1630
-			est.n.p <- (1-Pi) * est.n
1631
-			z <- est.u.p / (est.n.p + est.u.p)
1632
-
1633
-			if(any(is.na(z))) stop("NA occurred in imputation\n")
1634
-			## M Step
1635
-			mu <- sum((1-z)*x) / sum(1-z)
1636
-			sigmasq <- sum((1-z)*((x-mu)^2)) / sum(1-z)
1637
-			Pi <- sum(z) / len    
1638
-			sgn.z <- ifelse(x < mu, -1, 1)
1639
-
1640
-			## Likelihood
1641
-			est.u <- dunif(x,a,b)
1642
-			est.u.p <- Pi * est.u
1643
-			est.n <- dnorm(x,mu,sqrt(sigmasq))
1644
-			est.n.p <- (1-Pi) * est.n   
1645
-			log.lik <- sum(log(est.u.p + est.n.p))
1646
-			err <- abs(log.lik.old - log.lik)
1647
-			if(num.iter != 1) lik.rec[num.iter-1] <- log.lik
1648
-		}    
1649
-	}  else {
1650
-		tt <- sum(cl==0)
1651
-		z[cl==0] <- runif(tt,0,1)
1652
-		Pi <- mean(z[cl==0])
1653
-		mu <- sum((1-z)*x) / sum(1-z)
1654
-		sigmasq <- sum((1-z)*(x-mu)^2) / sum(1-z)
1655
-
1656
-		while(err > threshold) {
1657
-			num.iter <- num.iter + 1
1658
-			log.lik.old <- log.lik
1659
-			err.old <- err
1660
-
1661
-			## E Step
1662
-			est.u <- dunif(x,a,b)
1663
-			est.u.p <- Pi * est.u
1664
-			est.n <- dnorm(x,mu,sqrt(sigmasq))
1665
-			est.n.p <- (1-Pi) * est.n
1666
-			z <- est.u.p / (est.n.p + est.u.p)
1667
-			z[cl==1] <- 0
1668
-			if(any(is.na(z))) stop("NA occurred in imputation\n")
1669
-			## M Step
1670
-			mu <- sum((1-z)*x) / sum(1-z)
1671
-			sigmasq <- sum((1-z)*((x-mu)^2)) / sum(1-z)
1672
-			Pi <- mean(z[cl==0])    
1673
-			sgn.z <- ifelse(x < mu, -1, 1)
1674
-
1675
-			## Likelihood
1676
-			est.u <- dunif(x,a,b)
1677
-			est.u.p <- Pi * est.u
1678
-			est.n <- dnorm(x,mu,sqrt(sigmasq))
1679
-			est.n.p <- (1-Pi) * est.n   
1680
-			log.lik <- sum(log(est.u.p[cl==0] + est.n.p[cl==0])) + sum(log(est.n[cl==1]))
1681
-			err <- abs(log.lik.old - log.lik)
1682
-			if(num.iter != 1) lik.rec[num.iter-1] <- log.lik
1683
-		}    
1479
+computeCopynumber.SnpSuperSet <- function(object, cnOptions){
1480
+##	use.ff <- cnOptions[["use.ff"]]
1481
+##	if(!use.ff){
1482
+##		object <- as(object, "CrlmmSet")
1483
+##	} else	object <- as(object, "CrlmmSetFF")
1484
+	bias.adj <- cnOptions[["bias.adj"]]
1485
+	##must be FALSE to initialize parameters
1486
+	cnOptions[["bias.adj"]] <- FALSE
1487
+	## Add linear model parameters to the CrlmmSet object
1488
+	featureData(object) <- lm.parameters(object, cnOptions)
1489
+	if(!isValidCdfName(annotation(object))) stop(annotation(object), " not supported.")
1490
+	object <- as(object, "CNSet")
1491
+	object <- computeCopynumber.CNSet(object, cnOptions)
1492
+	if(bias.adj==TRUE){## run a second time
1493
+		object <- computeCopynumber.CNSet(object, cnOptions)
1684 1494
 	}
1685
-	est.u.p <- Pi * dunif(x,a,b) 
1686
-	est.n.p <- (1-Pi) * dnorm(x,mu,sqrt(sigmasq))
1687
-	est.u.mu <- Pi * dunif(mu,a,b)
1688
-	est.n.mu <- (1-Pi) * dnorm(mu,mu,sqrt(sigmasq))
1689
-	z0 <- est.u.p / (est.n.p + est.u.p)
1690
-	zmu <- est.u.mu / (est.u.mu + est.n.mu)
1691
-	sgn.z0 <- ifelse(x < mu, -1, 1) 
1692
-					#loc <- (max(lik.rec) != lik.rec[length(lik.rec)])
1693
-	expr <- rep(0, len)
1694
-	expr <- (z0 - zmu) * sgn.z0
1695
-	return(list(expr=expr, a=a, b=b, sigmasq=sigmasq, mu=mu, Pi=Pi, lik.rec=lik.rec))
1495
+	return(object)
1696 1496
 }
1697 1497
 
1698 1498
 
1699
-
1700
-.copyNumber <- function(object){
1701
-	I <- isSnp(object)
1702
-	CA <- CA(object)
1703
-	CB <- CB(object)
1704
-	CN <- CA + CB
1705
-	##For nonpolymorphic probes, CA is the total copy number
1706
-	CN[!I, ] <- CA(object)[!I, ]
1707
-	CN
1499
+computeCopynumber.CNSet <- function(object, cnOptions){
1500
+	CHR <- unique(chromosome(object))
1501
+	batch <- object$batch
1502
+	if(length(batch) != ncol(object)) stop("Batch must be the same length as the number of samples")
1503
+	MIN.SAMPLES <- cnOptions$MIN.SAMPLES
1504
+	verbose <- cnOptions$verbose
1505
+	for(i in seq(along=unique(batch))){
1506
+		PLATE <- unique(batch)[i]
1507
+		if(sum(batch == PLATE) < MIN.SAMPLES) {
1508
+			message("Skipping plate ", PLATE)
1509
+			next()
1510
+		}		
1511
+		object.batch <- object[, batch==PLATE]
1512
+		tmp.objects <- instantiateObjects(object.batch,
1513
+						  cnOptions)
1514
+		bias.adj <- cnOptions$bias.adj
1515
+		if(bias.adj & ncol(object) <= 15){
1516
+			warning(paste("bias.adj is TRUE, but too few samples to perform this step"))
1517
+			cnOptions$bias.adj <- bias.adj <- FALSE
1518
+		}
1519
+		if(bias.adj){
1520
+			if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)")
1521
+			tmp.objects <- biasAdjNP(object.batch, cnOptions, tmp.objects)
1522
+			tmp.objects <- biasAdj(object.batch, cnOptions, tmp.objects)
1523
+			if(verbose) message("Recomputing location and scale parameters")
1524
+		}
1525
+		##update tmp.objects
1526
+		tmp.objects <- withinGenotypeMoments(object.batch,
1527
+						     cnOptions=cnOptions,
1528
+						     tmp.objects=tmp.objects)
1529
+		object.batch <- locationAndScale(object.batch, cnOptions, tmp.objects)
1530
+		tmp.objects <- oneBatch(object.batch,
1531
+					cnOptions=cnOptions,
1532
+					tmp.objects=tmp.objects)
1533
+		##coefs calls nuphiAllele.
1534
+		object.batch <- coefs(object.batch, cnOptions, tmp.objects)
1535
+		##nuA=getParam(object.batch, "nuA", PLATE)
1536
+		THR.NU.PHI <- cnOptions$THR.NU.PHI
1537
+		if(THR.NU.PHI){
1538
+			verbose <- cnOptions$verbose
1539
+			if(verbose) message("Thresholding nu and phi")
1540
+			object.batch <- thresholdModelParams(object.batch, cnOptions)
1541
+		}		
1542
+		if(verbose) message("\nAllele specific copy number")	
1543
+		object.batch <- polymorphic(object.batch, cnOptions, tmp.objects)
1544
+		if(any(!isSnp(object))){ ## there are nonpolymorphic probes
1545
+			if(verbose) message("\nCopy number for nonpolymorphic probes...")	
1546
+			object.batch <- nonpolymorphic(object.batch, cnOptions, tmp.objects)
1547
+		}
1548
+		##---------------------------------------------------------------------------
1549
+		##Note: the replacement method multiples by 100
1550
+		CA(object)[, batch==PLATE] <- CA(object.batch)
1551
+		CB(object)[, batch==PLATE] <- CB(object.batch)
1552
+		##---------------------------------------------------------------------------
1553
+		##update-the plate-specific parameters for copy number
1554
+		object <- pr(object, "nuA", PLATE, getParam(object.batch, "nuA", PLATE))
1555
+		object <- pr(object, "nuA.se", PLATE, getParam(object.batch, "nuA.se", PLATE))
1556
+		object <- pr(object, "nuB", PLATE, getParam(object.batch, "nuB", PLATE))
1557
+		object <- pr(object, "nuB.se", PLATE, getParam(object.batch, "nuB.se", PLATE))
1558
+		object <- pr(object, "phiA", PLATE, getParam(object.batch, "phiA", PLATE))
1559
+		object <- pr(object, "phiA.se", PLATE, getParam(object.batch, "phiA.se", PLATE))
1560
+		object <- pr(object, "phiB", PLATE, getParam(object.batch, "phiB", PLATE))
1561
+		object <- pr(object, "phiB.se", PLATE, getParam(object.batch, "phiB.se", PLATE))
1562
+		object <- pr(object, "tau2A", PLATE, getParam(object.batch, "tau2A", PLATE))
1563
+		object <- pr(object, "tau2B", PLATE, getParam(object.batch, "tau2B", PLATE))				
1564
+		object <- pr(object, "sig2A", PLATE, getParam(object.batch, "sig2A", PLATE))
1565
+		object <- pr(object, "sig2B", PLATE, getParam(object.batch, "sig2B", PLATE))		
1566
+		object <- pr(object, "phiAX", PLATE, as.numeric(getParam(object.batch, "phiAX", PLATE)))
1567
+		object <- pr(object, "phiBX", PLATE, as.numeric(getParam(object.batch, "phiBX", PLATE)))
1568
+		object <- pr(object, "corr", PLATE, getParam(object.batch, "corr", PLATE))
1569
+		object <- pr(object, "corrA.BB", PLATE, getParam(object.batch, "corrA.BB", PLATE))
1570
+		object <- pr(object, "corrB.AA", PLATE, getParam(object.batch, "corrB.AA", PLATE))		
1571
+		rm(object.batch, tmp.objects); gc();
1572
+	}
1573
+	object <- object[order(chromosome(object), position(object)), ]
1574
+	if(cnOptions[["thresholdCopynumber"]]){
1575
+		object <- thresholdCopynumber(object)
1576
+	}
1577
+	return(object)
1708 1578
 }
1709 1579
 
1710 1580
 
1711 1581
 
1712
-##setMethod("ellipse", "CNSet", function(x, copynumber, ...){
1713
-ellipse.CNSet <- function(x, copynumber, batch, ...){
1714
-	if(nrow(x) > 1) stop("only 1 snp at a time")
1715
-	##batch <- unique(x$batch)
1716
-	if(missing(batch)){
1717
-		stop("must specify batch")
1718
-	}
1719
-	if(length(batch) > 1) stop("batch variable not unique")
1720
-	nuA <- getParam(x, "nuA", batch)
1721
-	nuB <- getParam(x, "nuB", batch)
1722
-	phiA <- getParam(x, "phiA", batch)
1723
-	phiB <- getParam(x, "phiB", batch)
1724
-	tau2A <- getParam(x, "tau2A", batch)
1725
-	tau2B <- getParam(x, "tau2B", batch)
1726
-	sig2A <- getParam(x, "sig2A", batch)
1727
-	sig2B <- getParam(x, "sig2B", batch)
1728
-	corrA.BB <- getParam(x, "corrA.BB", batch)
1729
-	corrB.AA <- getParam(x, "corrB.AA", batch)
1730
-	corr <- getParam(x, "corr", batch)
1731
-	for(CN in copynumber){
1732
-		for(CA in 0:CN){
1733
-			CB <- CN-CA
1734
-			A.scale <- sqrt(tau2A*(CA==0) + sig2A*(CA > 0))
1735
-			B.scale <- sqrt(tau2B*(CB==0) + sig2B*(CB > 0))
1736
-			scale <- c(A.scale, B.scale)
1737
-			if(CA == 0 & CB > 0) rho <- corrA.BB
1738
-			if(CA > 0 & CB == 0) rho <- corrB.AA
1739
-			if(CA > 0 & CB > 0) rho <- corr
1740
-			if(CA == 0 & CB == 0) rho <- 0
1741
-			lines(ellipse(x=rho, centre=c(log2(nuA+CA*phiA),
1742
-					     log2(nuB+CB*phiB)),
1743
-				      scale=scale), ...)
1744
-		}
1745
-	}
1746
-}
1747 1582
 
1748 1583
 
1749
-##initializeCNFiles <- function(cnOpts){
1750
-##	load.it <- cnOptions[["load.it"]]
1751
-##	outdir <- cnOptions[["outdir"]]
1752
-##	snprmaFile <- cnOptions[["snprmaFile"]]
1753
-##	protocolFile <- cnOptions[["protocolFile"]]
1754
-##	CAFile <- cnOpts[["CAFile"]]
1755
-##	CBFile <- cnOpts[["CBFile"]]
1756
-##	path <- system.file("extdata", package=paste(cnOptions[["cdfName"]], "Crlmm", sep=""))
1757
-##	load(file.path(path, "snpProbes.rda"))
1758
-##	snpProbes <- get("snpProbes")
1759
-##	load(file.path(path, "cnProbes.rda"))
1760
-##	cnProbes <- get("cnProbes")		
1761
-##	load(snprmaFile)
1762
-##	res <- get("res")
1763
-##	load(protocolFile)
1764
-##	if(isPackageLoaded("ff")){
1765
-##		if(file.exists(CAFile)){
1766
-##			load(CAFile)
1767
-##			if(is.null(dim(CA)) | !all(dim(CA) == dim(A))) {
1768
-##				unlink(CAFile)
1769
-##				CA <- initializeBigMatrix(nr, nc)
1770
-##				save(CA, file=CAFile)
1771
-##				CA[,] <- NA
1772
-##
1773
-##			}
1774
-##		} else {
1775
-##			CA <- initializeBigMatrix(nr, nc)
1776
-##			save(CA, file=CAFile)
1777
-##			CA[,] <- NA
1778
-##		}
1779
-##		if(file.exists(CBFile)){
1780
-##			load(CBFile)
1781
-##			if(is.null(dim(CB)) | !all(dim(CB) == dim(A))) {			     
1782
-##				CB <- initializeBigMatrix(nr, nc)
1783
-##				save(CB, file=CBFile)					
1784
-##				CB[,] <- NA
1785
-##			}
1786
-##		} else{
1787
-##			CB <- initializeBigMatrix(nr, nc)
1788
-##			CB[,] <- NA
1789
-##			save(CB, file=CBFile)
1790
-##		}
1791
-##		open(CA)
1792
-##		open(CB)
1793
-##		colnames(CA) <- colnames(CB) <- sampleNames(protocoldata)
1794
-##	}
1795
-##	close(CA); close(CB)
1796
-##
1797
-##
1798
-##}
1799
-##collect <- function(cnOptions, CHR, PLATE){
1800
-##	snprmaFile <- cnOptions[["snprmaFile"]]
1801
-##	cnFile <- cnOptions[["cnFile"]]
1802
-##	cnrmaFile <- cnOptions[["cnrmaFile"]]
1803
-##	snprmaFile <- cnOptions[["snprmaFile"]]
1804
-##	callsFile <- cnOptions[["callsFile"]]
1805
-##	confsFile <- cnOptions[["confsFile"]]
1806
-##	protocolFile <- cnOptions[["protocolFile"]]
1807
-##	AFile <- cnOptions[["AFile"]]
1808
-##	BFile <- cnOptions[["BFile"]]
1809
-##	CAFile <- cnOptions[["CAFile"]]
1810
-##	CBFile <- cnOptions[["CBFile"]]
1811
-##	if(isPackageLoaded("ff")){
1812
-##		message("collecting ff objects to create an FFSet instance")
1813
-##		load(protocolFile)
1814
-##		load(snprmaFile)
1815
-##		load(file.path(cnOptions[["outdir"]], "cnParams.rda"))
1816
-##		res <- get("res")
1817
-##		sampleStats <- data.frame(SKW=res$SKW,
1818
-##					  SNR=res$SNR,
1819
-##					  ##mixtureParams=res$mixtureParams,
1820
-##					  gender=res$gender,
1821
-##					  batch=cnOptions[["batch"]])##)
1822
-##		pD <- new("AnnotatedDataFrame",
1823
-##			  data=sampleStats,
1824
-##			  varMetadata=data.frame(labelDescription=colnames(sampleStats)))	
1825
-##		load(file.path(cnOptions[["outdir"]], "featureDataFF.rda"))
1826
-##		open(featureDataFF)
1827
-##		load(AFile); open(A)
1828
-##		load(BFile); open(B)
1829
-##		load(CAFile); open(CA)
1830
-##		load(CBFile); open(CB)
1831
-##		load(callsFile); open(calls)
1832
-##		load(confsFile); open(confs)
1833
-##		sampleNames(pD) <- sampleNames(protocoldata)
1834
-##
1835
-##		if(any(colnames(CA) != colnames(A)) | any(colnames(CA) != sampleNames(protocoldata)))
1836
-##			colnames(calls) <- colnames(confs) <- colnames(CA) <- colnames(CB) <- colnames(B) <- colnames(A) <- sampleNames(protocoldata)
1837
-####		if(any(rownames(CA) != fns))
1838
-####			rownames(CA) <- rownames(CB) <- rownames(B) <- rownames(calls) <- rownames(confs) <- rownames(A)
1839
-##		ffSet <- new("FFSet",
1840
-##			     alleleA=A,
1841
-##			     alleleB=B,
1842
-##			     call=calls,
1843
-##			     callProbability=confs,
1844
-##			     CA=CA,
1845
-##			     CB=CB,
1846
-##			     phenoData=pD,
1847
-##			     protocolData=protocoldata,
1848
-##			     ##annotation=cnOptions[["cdfName"]],
1849
-##			     genomeAnnotation=featureDataFF,
1850
-##			     linearModelParam=cnParams)
1851
-##		##featureNames(ffSet) <- rownames(featureDataFF)
1852
-##		##sampleNames(ffSet) <- sampleNames(protocoldata)
1853
-##		return(ffSet)
1854
-##	} else{
1855
-##		if(missing(PLATE)) stop("must specify PLATE")
1856
-##		load(paste(cnOptions[["cnFile"]], "_", PLATE, "_", CHR, ".rda", sep=""))
1857
-##		return(cnSet)
1858
-##	}
1859
-##}
1860 1584
 
1861 1585
 
1862
-lmPlot <- function(object, plate, fill,...){
1863
-	stopifnot(nrow(object) == 1)
1864
-	Ai <- split(A(object)[object$batch==plate], snpCall(object)[object$batch==plate])
1865
-	Bi <- split(B(object)[object$batch==plate], snpCall(object)[object$batch==plate])
1866
-	##xlim <- ylim <- c(9,12)
1867
-	boxplot(Ai, xaxt="n", ylab=expression(I[A]), main=featureNames(object), ...)
1868
-	legend("topleft", bty="n", legend=c("AA", "AB", "BB"), fill=fill)
1869
-	nuA <- getParam(object, "nuA", plate)
1870
-	phiA <- getParam(object, "phiA", plate)
1871
-	segments(3, nuA, 1, nuA+2*phiA, lwd=2, col="blue")
1872
-	boxplot(Bi, xaxt="n", ylab=expression(I[B]), ...)
1873
-	nuB <- getParam(object, "nuB", plate)
1874
-	phiB <- getParam(object, "phiB", plate)	
1875
-	segments(1, nuB,  3.0, nuB+2*phiB, lwd=2, col="blue")
1876
-	axis(1, at=1:3, labels=c("AA", "AB", "BB"))
1877
-}
... ...
@@ -1,186 +1,48 @@
1
-setMethod("crlmm", "AffymetrixAlleleSet", function(object, filenames){
2
-	message("Initializing AlleleSet")
3
-	obj <- construct(object, filenames)
4
-	obj <- snprma(obj, filenames)
5
-	ops <- crlmmOptions(obj)
6
-	if(ops$copynumber & ops$nonpolymorphic)
7
-		obj <- cnrma(obj, filenames)
8
-	message("Initializing CallSet")
9
-	object <- as(obj, "CallSet")
10
-	rm(obj); gc()
11
-	callSet <- crlmm.batch(object)
12
-
13
-})
14
-setMethod("crlmm", "IlluminaAlleleSet", function(object, filenames){
15
-	RG <- construct(object, filenames)
16
-	RG <- readIdatFiles(RG, filenames)
17
-	XY <- RGtoXY(RG)
18
-	rm(RG); gc()
19
-	storageMode(XY) <- "environment"
20
-	stripNorm <- crlmmOptions(XY)$readOpts[["stripNorm"]]
21
-	useTarget <- crlmmOptions(XY)$readOpts[["useTarget"]]
22
-	verbose <- crlmmOptions(XY)$verbose
23
-	if(stripNorm)
24
-		XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
25
-	## See the coercion step ... this puts X -> A and Y -> B
26
-	alleleSet <- as(XY, "IlluminaAlleleSet")
27
-	##Do i need to pass zero.  Is the only thing updated the mixtureParams and SNR?
28
-	alleleSet <- preprocessInfinium2(alleleSet, zero=Z(XY))
29
-	rm(XY); gc()
30
-	##Initialize call set
31
-	callSet <- as(alleleSet, "CallSet")
32
-	callSet <- crlmm.batch(callSet)
33
-	return(callSet)
34
-})
35
-
36
-##setMethod("getFeatureData", "SmallDataOptions", function(object){
37
-##        if(object$platform != "Illumina") stop("only for Illumina platforms")
38
-##        message("reading first idat file to extract feature data")
39
-##        fileExt <- object$illuminaOpts[["fileExt"]]
40
-##        filenames <- object$filenames
41
-##        grnfile = paste(filenames[1], fileExt$green, sep=fileExt$sep)
42
-##        if(!file.exists(grnfile)){
43
-##                stop(paste(grnfile, " does not exist. Check fileExt argument"))
44
-##        }
45
-##        G <- readIDAT(grnfile)
46
-##        idsG = rownames(G$Quants)
47
-##        nr <- length(idsG)
48
-##        fD <- new("AnnotatedDataFrame", data=data.frame(row.names=idsG))##, varMetadata=data.frame(labelDescript
49
-##        return(fD)
50
-##})
51
-
52
-##crlmm.IlluminaAlleleSet <- function(object, filenames){
53
-##	message("Initializing CallSet")
54
-##	object <- as(object, "IlluminaCallSet")
55
-##	rm(obj); gc()
56
-##	##Call in batches to reduce ram
57
-##	nc <- ncol(object)
58
-##	object$gender <- crlmmOptions(object)$crlmmOpts[["gender"]]
59
-##	if(is.null(object$gender)) object$gender <- rep(NA, nc)
60
-##	cOps <- crlmmOptions(object)$crlmmOpts
61
-##	BS <- cOps$batchSize
62
-##	gc()
63
-##	if(nc > BS){
64
-##		N <- ceiling(nc/BS)
65
-##		S <- ceiling(nc/N)
66
-##		colindex <- split(1:nc, rep(1:nc, each=S, length.out=nc))
67
-##	} else {
68
-##		colindex <- list(1:nc)
69
-##	}
70
-##	if(length(colindex) > 1)
71
-##		message("Calling genotypes in batches of size ", length(colindex[[1]]), " to reduce required RAM")
72
-##	row.index <- which(isSnp(object)==1 | is.na(isSnp(object)))
73
-##	if(length(colindex) > 1){
74
-##		for(i in seq(along=colindex)){
75
-##			col.index <- colindex[[i]]
76
-##			tmp <- crlmm.AffymetrixCallSet(object[row.index, col.index])