Browse code

updates to namespace, cnrma-functions

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@43541 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 14/12/2009 14:12:43
Showing8 changed files

... ...
@@ -1,14 +1,14 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.11
4
+Version: 1.5.12
5 5
 Date: 2009-12-03
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
9 9
 License: Artistic-2.0
10
-Depends: methods, Biobase (>= 2.5.5), R (>= 2.10.0), oligoClasses (>= 1.9.14), IRanges
11
-Imports: affyio, preprocessCore, utils, stats, genefilter, splines, mvtnorm, ellipse, methods, SNPchip, oligoClasses, VanillaICE (>= 1.9.1)
10
+Depends: methods, Biobase (>= 2.5.5), R (>= 2.10.0)
11
+Imports: affyio, preprocessCore, utils, stats, genefilter, splines, mvtnorm, oligoClasses (>= 1.9.9), ellipse, methods, SNPchip, oligoClasses, IRanges
12 12
 Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix
13 13
 Collate: AllGenerics.R
14 14
          methods-AlleleSet.R
... ...
@@ -1,6 +1,5 @@
1 1
 useDynLib("crlmm", .registration=TRUE)
2 2
 
3
-
4 3
 ## Biobase
5 4
 importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet,
6 5
 		  SnpSet, NChannelSet, MIAME, Versioned, VersionedBiobase, Versions)
... ...
@@ -13,34 +12,24 @@ importMethodsFrom(Biobase, annotation, "annotation<-",
13 12
                   "phenoData<-", protocolData, "protocolData<-",
14 13
                   pubMedIds, rowMedians, sampleNames, storageMode,
15 14
                   "storageMode<-", updateObject, varLabels)
16
-
17 15
 importFrom(Biobase, assayDataElement, assayDataElementNames,
18 16
            assayDataElementReplace, assayDataNew, classVersion, validMsg)
19 17
 
20 18
 ## oligoClasses
21 19
 importFrom(oligoClasses, position, chromosome)
22 20
 importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet)
23
-
24
-importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
25
-                  "confs<-", cnConfidence, "cnConfidence<-", copyNumber)
26
-
27
- 
28
-## IRanges
29
-importClassesFrom(IRanges, RangedData, IRanges)
30
-importFrom(IRanges, IRanges, RleList, RangedData, width)
31
-importMethodsFrom(IRanges, Rle, start, end, runValue)
32
-
33
-##importMethodsFrom(methods, initialize, show)
34
-
35
-##importFrom(methods, "@<-", callNextMethod, new, validObject)
21
+importMethodsFrom(oligoClasses, 
22
+		  allele,
23
+                  calls, "calls<-",  
24
+          	  confs, "confs<-",
25
+	          cnConfidence, "cnConfidence<-", 
26
+		  copyNumber, isSnp, chromosome, position, CA, "CA<-", CB, "CB<-",
27
+		  rangedData, segmentData, "rangedData<-", "segmentData<-")
28
+importFrom(oligoClasses, chromosome2integer, celfileDate)
36 29
 
37 30
 importFrom(graphics, abline, axis, layout, legend, mtext, par, plot,
38 31
            polygon, rect, segments, text, points, boxplot)
39 32
 
40
-importFrom(SNPchip, chromosome2integer)
41
-
42
-importFrom(VanillaICE, viterbi, transitionProbability)
43
-
44 33
 importFrom(stats, update)
45 34
 
46 35
 importFrom(grDevices, grey)
... ...
@@ -59,48 +48,33 @@ importFrom(mvtnorm, dmvnorm)
59 48
 
60 49
 importFrom(ellipse, ellipse)
61 50
 
62
-exportMethods(A, B,
63
-		 CA, "CA<-", CB, "CB<-",
64
-		 isSnp) 
65
-
66
-
67
-exportMethods(chromosome, position)
68
-
69
-export(
70
-       celDates, 
71
-      crlmm, 
51
+export(celDates,  ##move to oligoClasses
52
+       crlmm, 
72 53
        cnOptions,
73 54
        calls,
74 55
        "calls<-",
75
-       confs,
76
-       "confs<-",
77
-       copyNumber, 
78
-       emissionPr,
79
-       "emissionPr<-",
80
-       list.celfiles, 
81
-      snprma)
82
-
83
-exportMethods(computeHmm, 
84
-              rangedData,
85
-             "rangedData<-",
86
-	     segmentData,
87
-	     "segmentData<-")
88
-
89
-export(hmmOptions,
90
-       crlmmCopynumber)
91
-
92
-export(ellipse) ##, ellipse.CopyNumberSet, getParam.SnpSuperSet)
93
-export(viterbi.CNSet, 
94
-		      combineIntensities,
95
-		      whichPlatform,
96
-		      isValidCdfName,
97
-		      splitByChromosome,
56
+     snprma)
57
+
58
+##exportMethods(computeHmm) ##move to VanillaICE
59
+exportMethods(A, B, copyNumber)
60
+export(crlmmCopynumber) 
61
+export(ellipse) 
62
+export(##viterbi.CNSet, ##move to VanillaICE
63
+	combineIntensities,
64
+	whichPlatform,
65
+	isValidCdfName,
98 66
 	crlmmWrapper,
99
-       computeHmm.CNSet, addFeatureAnnotation.SnpSuperSet,
100
-       readIdatFiles,
101
-       withinGenotypeMoments,  trioOptions, hmm.SnpSuperSet, trioOptions, computeBpiEmission.SnpSuperSet,
102
-       isBiparental.matrix, isBiparental.SnpSuperSet, hapmapPedFile, isSnp.AlleleSet,
103
-       findFatherMother)
104
-exportMethods(start, end, width)
67
+##	computeHmm.CNSet, ##move to VanillaICE
68
+        readIdatFiles,
69
+        withinGenotypeMoments,  
70
+##	trioOptions, ##move to VanillaICE
71
+##	hmm.SnpSuperSet, ##move to VanillaICE
72
+##	computeBpiEmission.SnpSuperSet,##move to VanillaICE
73
+##  	isBiparental.matrix, ##move to VanillaICE
74
+##	isBiparental.SnpSuperSet, ##move to VanillaICE
75
+##	hapmapPedFile, ##move to VanillaICE
76
+##       findFatherMother, 
77
+	 locationAndScale, getParam.SnpSuperSet)
78
+
105 79
 
106 80
 
... ...
@@ -1,31 +1,17 @@
1 1
 setGeneric("A", function(object) standardGeneric("A"))
2 2
 setGeneric("B", function(object) standardGeneric("B"))
3 3
 setGeneric("A<-", function(object, value) standardGeneric("A<-"))
4
-setGeneric("addFeatureAnnotation", function(object, ...) standardGeneric("addFeatureAnnotation"))
5 4
 setGeneric("B<-", function(object, value) standardGeneric("B<-"))
5
+setGeneric("addFeatureAnnotation", function(object, ...) standardGeneric("addFeatureAnnotation"))
6 6
 
7
-setGeneric("CA", function(object) standardGeneric("CA"))
8
-setGeneric("CB", function(object) standardGeneric("CB"))
9
-setGeneric("CA<-", function(object, value) standardGeneric("CA<-"))
10
-setGeneric("CB<-", function(object, value) standardGeneric("CB<-"))
11
-setGeneric("emissionPr", function(object) standardGeneric("emissionPr"))
12
-setGeneric("emissionPr<-", function(object, value) standardGeneric("emissionPr<-"))
13 7
 setGeneric("getParam", function(object, name, batch) standardGeneric("getParam"))
14 8
 setGeneric("GT<-", function(object, value) standardGeneric("GT<-"))
15 9
 setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
16 10
 setGeneric("cnNames", function(object) standardGeneric("cnNames"))
17 11
 setGeneric("computeCopynumber", function(object, cnOptions) standardGeneric("computeCopynumber"))
18
-setGeneric("computeEmission", function(object, hmmOptions) standardGeneric("computeEmission"))
19
-setGeneric("computeHmm", function(object, hmmOptions) standardGeneric("computeHmm"))
20
-
21
-setGeneric("GT", function(object, ...) standardGeneric("GT"))
12
+##setGeneric("GT", function(object, ...) standardGeneric("GT"))
22 13
 setGeneric(".harmonizeDimnames", function(object) standardGeneric(".harmonizeDimnames"))
23
-setGeneric("isSnp", function(object) standardGeneric("isSnp"))
24 14
 setGeneric("pr", function(object, name, batch, value) standardGeneric("pr"))
25
-setGeneric("rangedData", function(object) standardGeneric("rangedData"))
26
-setGeneric("rangedData<-", function(object, value) standardGeneric("rangedData<-"))
27
-setGeneric("segmentData", function(object) standardGeneric("segmentData"))
28
-setGeneric("segmentData<-", function(object, value) standardGeneric("segmentData<-"))
29 15
 setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
30 16
 setGeneric("snpNames", function(object) standardGeneric("snpNames"))
31
-setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome"))
17
+##setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome"))
... ...
@@ -127,21 +127,7 @@ nuphiAllele <- function(object, allele, Ystar, W, tmp.objects, cnOptions){
127 127
 	return(object)
128 128
 }
129 129
 
130
-celDates <- function(celfiles){
131
-	if(!all(file.exists(celfiles))) stop("1 or more cel file does not exist")
132
-	celdates <- vector("character", length(celfiles))
133
-	celtimes <- vector("character", length(celfiles))
134
-	for(i in seq(along=celfiles)){
135
-		if(i %% 100 == 0) cat(".")
136
-		tmp <- read.celfile.header(celfiles[i], info="full")$DatHeader
137
-		tmp <- strsplit(tmp, "\ +")
138
-		celdates[i] <- tmp[[1]][6]
139
-		celtimes[i] <- tmp[[1]][7]
140
-	}
141
-	tmp <- paste(celdates, celtimes)
142
-	celdts <- strptime(tmp, "%m/%d/%y %H:%M:%S")
143
-	return(celdts)
144
-}
130
+
145 131
 
146 132
 predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
147 133
 	pkgname <- paste(cdfName, "Crlmm", sep="")
... ...
@@ -168,12 +154,10 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
168 154
 	return(gender)
169 155
 }
170 156
 
171
-
172
-combineIntensities <- function(res, cnrmaResult, cdfName){
157
+combineIntensities <- function(res, NP=NULL, callSet){
173 158
 	rownames(res$B) <- rownames(res$A) <- res$gns
174 159
 	colnames(res$B) <- colnames(res$A) <- res$sns
175
-	if(!is.null(cnrmaResult)){
176
-		NP <- cnrmaResult$NP
160
+	if(!is.null(NP)){
177 161
 		blank <- matrix(NA, nrow(NP), ncol(NP))
178 162
 		dimnames(blank) <- dimnames(NP)
179 163
 		A <- rbind(res$A, NP)
... ...
@@ -182,41 +166,34 @@ combineIntensities <- function(res, cnrmaResult, cdfName){
182 166
 		A <- res$A
183 167
 		B <- res$B
184 168
 	}
185
-	ABset <- new("AlleleSet",
186
-		     alleleA=A,
187
-		     alleleB=B,
188
-		     annotation=cdfName)
189
-	return(ABset)
169
+	dimnames(B) <- dimnames(A)
170
+	index.snps <- match(res$gns, rownames(A))
171
+	callsConfs <- calls <- matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A))
172
+	
173
+	calls[index.snps, ] <- calls(callSet)
174
+	callsConfs[index.snps, ] <- assayData(callSet)[["callProbability"]]
175
+	fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet))))
176
+	fd[index.snps, ] <- fData(callSet)
177
+	rownames(fd) <- rownames(A)
178
+	colnames(fd) <- fvarLabels(callSet)
179
+	fD <- new("AnnotatedDataFrame",
180
+		  data=data.frame(fd),
181
+		  varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd)))
182
+	superSet <- new("CNSet",
183
+			CA=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
184
+			CB=matrix(NA, nrow(A), ncol(A), dimnames=dimnames(A)),
185
+			alleleA=A,
186
+			alleleB=B,
187
+			call=calls,
188
+			callProbability=callsConfs,
189
+			featureData=fD,
190
+			phenoData=phenoData(callSet),
191
+			experimentData=experimentData(callSet),
192
+			protocolData=protocolData(callSet),
193
+			annotation=annotation(callSet))
194
+	return(superSet)
190 195
 }
191 196
 
192
-##harmonizeSnpSet <- function(callSet, ABset, cdfName){
193
-##	blank <- matrix(NA, length(cnNames(ABset, cdfName)), ncol(ABset))
194
-##	rownames(blank) <- cnNames(ABset, cdfName)
195
-##	colnames(blank) <- sampleNames(ABset)
196
-##	crlmmCalls <- rbind(calls(callSet), blank)
197
-##	crlmmConf <- rbind(confs(callSet), blank)
198
-##	fD <- as.matrix(fData(callSet))
199
-##	fD2 <- matrix(NA, nrow(blank), ncol(fD))
200
-##	rownames(fD2) <- rownames(blank)
201
-##	fD <- rbind(fD, fD2)
202
-##	aD <- assayDataNew("lockedEnvironment",
203
-##			   calls=crlmmCalls,
204
-##			   callProbability=crlmmConf)
205
-##	##Make callSet the same dimension as ABset
206
-##	fD <- new("AnnotatedDataFrame",
207
-##		  data=data.frame(fD),
208
-##		  varMetadata=fvarMetadata(callSet))
209
-##	callSet <- new("SnpSet",
210
-##			   call=crlmmCalls,
211
-##			   callProbability=crlmmConf,
212
-##			   featureData=fD,
213
-##			   phenoData=phenoData(callSet),
214
-##			   protocolData=protocolData(ABset),
215
-##			   annotation=annotation(ABset))
216
-##	stopifnot(all.equal(dimnames(callSet), dimnames(ABset)))
217
-##	callSet
218
-##}
219
-
220 197
 harmonizeDimnamesTo <- function(object1, object2){
221 198
 	#object2 should be a subset of object 1
222 199
 	object2 <- object2[featureNames(object2) %in% featureNames(object1), ]
... ...
@@ -227,10 +204,79 @@ harmonizeDimnamesTo <- function(object1, object2){
227 204
 	return(object1)
228 205
 }
229 206
 
230
-crlmmCopynumber <- function(filenames, cnOptions, ...){
231
-	crlmmWrapper(filenames, cnOptions, ...)
207
+crlmmCopynumber <- function(filenames, cnOptions, object, ...){
208
+	if(!missing(object)){
209
+		stopifnot(class(object) == "CNSet")
210
+		createIntermediateObjects <- FALSE
211
+	} else {
212
+		createIntermediateObjects <- TRUE
213
+		crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
214
+		snprmaResult <- crlmmResults[["snprmaResult"]]
215
+		cnrmaResult <- crlmmResults[["cnrmaResult"]]
216
+		callSet <- crlmmResults[["callSet"]]
217
+		rm(crlmmResults); gc()
218
+		annotation(callSet) <- cnOptions[["cdfName"]]
219
+		stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]]))
220
+		path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep=""))	
221
+		load(file.path(path, "snpProbes.rda"))
222
+		snpProbes <- get("snpProbes")
223
+		load(file.path(path, "cnProbes.rda"))
224
+		cnProbes <- get("cnProbes")	
225
+		k <- grep("chr", colnames(snpProbes))
226
+		if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
227
+	}
228
+	chromosome <- cnOptions[["chromosome"]]
229
+	for(CHR in chromosome){
230
+		cat("Chromosome ", CHR, "\n")
231
+		if(createIntermediateObjects){
232
+			snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
233
+			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
234
+			index.snps <- match(snps, featureNames(callSet))
235
+			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
236
+			if(!is.null(cnrmaResult)){
237
+				npI <- cnrmaResult$NP[index.nps, ]
238
+			} else npI <- NULL
239
+			snpI <- list(A=snprmaResult$A[index.snps, ],
240
+				     B=snprmaResult$B[index.snps, ],
241
+				     sns=snprmaResult$sns,
242
+				     gns=snprmaResult$gns[index.snps],
243
+				     SNR=snprmaResult$SNR,
244
+				     SKW=snprmaResult$SKW,
245
+				     mixtureParams=snprmaResult$mixtureParams,
246
+				     cdfName=snprmaResult$cdfName)
247
+			cnSet <- combineIntensities(res=snpI,
248
+						    NP=npI,
249
+						    callSet=callSet[index.snps, ])
250
+			rm(snpI, npI, snps, cnps, index.snps, index.nps); gc()
251
+			pData(cnSet)$batch <- cnOptions[["batch"]]
252
+			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
253
+		} else {
254
+			cnSet <- object
255
+		}
256
+		if(CHR != 24){		
257
+			cnSet <- computeCopynumber(cnSet, cnOptions)
258
+		} else{
259
+			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
260
+			alleleSet <- cnSet
261
+			save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep="")))
262
+			rm(cnSet, alleleSet); gc()
263
+			next()
264
+		}
265
+		if(cnOptions[["save.cnset"]]){
266
+			save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
267
+		}
268
+		if(length(chromosome) > 1){
269
+			rm(cnSet); gc()
270
+		} else {
271
+			return(cnSet)
272
+		}
273
+	}
274
+	saved.objects <- list.files(cnOptions[["outdir"]], pattern="cnSet", full.names=TRUE)		
275
+	return(saved.objects)
232 276
 }
233 277
 
278
+
279
+
234 280
 crlmmWrapper <- function(filenames, cnOptions, ...){
235 281
 	cdfName <- cnOptions[["cdfName"]]
236 282
 	load.it <- cnOptions[["load.it"]]
... ...
@@ -338,7 +384,12 @@ crlmmWrapper <- function(filenames, cnOptions, ...){
338 384
 		}
339 385
 		protocolData(callSet) <- protocolData(RG)
340 386
 	}
387
+	if(platform=="affymetrix") {
388
+		protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames))
389
+		sampleNames(protocolData(callSet)) <- sampleNames(callSet)
390
+	}	
341 391
 	load(intensityFile)
392
+	snprmaResult <- get("res")
342 393
 	if(platform=="illumina"){
343 394
 		if(exists("cnAB")){
344 395
 			np.A <- as.integer(cnAB$A)
... ...
@@ -358,77 +409,15 @@ crlmmWrapper <- function(filenames, cnOptions, ...){
358 409
 			cnrmaResult <- get("cnrmaResult")
359 410
 		} else cnrmaResult <- NULL
360 411
 	}
361
-	ABset <- combineIntensities(get("res"), cnrmaResult, cdfName)
362
-	if(platform=="affymetrix") {
363
-		protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames))
364
-		sampleNames(protocolData(callSet)) <- sampleNames(callSet)
365
-	}
366
-	##callSet <- harmonizeSnpSet(callSet, ABset, cdfName)
367
-	##Make callSet the same dimension as ABset
368
-	if(nrow(callSet) != nrow(ABset)){
369
-		callsConfs <- calls <- matrix(NA, nrow(ABset), ncol(ABset))
370
-		dimnames(callsConfs) <- dimnames(calls) <- list(featureNames(ABset), sampleNames(ABset))
371
-		fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet))))
372
-		rownames(fd) <- featureNames(ABset)
373
-		colnames(fd) <- fvarLabels(callSet)
374
-		fD <- new("AnnotatedDataFrame",
375
-			  data=data.frame(fd),
376
-			  varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd)))
377
-		callSet2 <- new("SnpSet",
378
-				call=calls,
379
-				callProbability=callsConfs,
380
-				featureData=fD,
381
-				phenoData=phenoData(callSet),
382
-				experimentData=experimentData(callSet),
383
-				protocolData=protocolData(callSet),
384
-				annotation=annotation(callSet))
385
-		## match the featureNames of the original callSet (snps only) to the ABset object
386
-		index <- match(featureNames(callSet), featureNames(ABset))
387
-		if(any(is.na(index))) stop("missing values in match")
388
-		## Next, assign the calls to the appropriate subset of the full callSet (includes NPs)
389
-		calls(callSet2)[index, ] <- calls(callSet)
390
-		confs(callSet2)[index, ] <- confs(callSet)
391
-		fData(callSet2)[index, ] <- fData(callSet)
392
-		callSet <- callSet2
393
-		rm(callsConfs, calls, callSet2, fd, fD); gc()
394
-	} else{
395
-		callSet <- callSet[match(featureNames(ABset), featureNames(callSet)), ]
396
-	}
397
-	stopifnot(all.equal(featureNames(callSet), featureNames(ABset)))
398
-	stopifnot(all.equal(sampleNames(callSet), sampleNames(ABset)))
399
-	## create object with all of the assay data elements
400
-	## add an indicator to featureData for whether it is a snp or a np probe
401
-	## add annotation
402
-	## how should we combine the phenoData?
403
-	pd1 <- phenoData(ABset)
404
-	pd2 <- phenoData(callSet)
405
-	pd <- cbind(pData(pd1), pData(pd2))
406
-	pD <- new("AnnotatedDataFrame", data=pd,
407
-		  varMetadata=data.frame(labelDescription=colnames(pd),
408
-		  row.names=colnames(pd)))
409
-	nr <- nrow(ABset); nc <- ncol(ABset)
410
-	callSetPlus <- new("SnpSuperSet",
411
-			   alleleA=A(ABset),
412
-			   alleleB=B(ABset), 
413
-			   call=calls(callSet), 
414
-			   callProbability=assayData(callSet)[["callProbability"]],
415
-			   phenoData=pD,
416
-			   featureData=featureData(callSet),
417
-			   annotation=annotation(ABset),
418
-			   experimentData=experimentData(callSet),
419
-			   protocolData=protocolData(callSet))
420
-	
421
-	if(splitByChr){
422
-		saved.objects <- splitByChromosome(callSetPlus, cnOptions)
423
-		##callSetPlus <- list.files(outdir, pattern="", full.names=TRUE)
424
-		if(!save.it) unlink(intensityFile)
425
-		return(saved.objects)
426
-	} 
412
+	crlmmResults <- list(snprmaResult=snprmaResult,
413
+			     cnrmaResult=cnrmaResult,
414
+			     callSet=callSet)
415
+
427 416
 	if(!save.it){
428
-		message("Cleaning up")
417
+		message("Cleaning up")		
429 418
 		unlink(intensityFile)
430 419
 	}
431
-	return(callSetPlus)
420
+	return(crlmmResults)
432 421
 }
433 422
 
434 423
 validCdfNames <- function(){
... ...
@@ -583,6 +572,7 @@ cnOptions <- function(tmpdir=tempdir(),
583 572
 		      intensityFile="normalizedIntensities.rda",
584 573
 		      rgFile="rgFile.rda",
585 574
 		      save.it=TRUE,
575
+		      save.cnset=TRUE,
586 576
 		      load.it=TRUE,
587 577
 		      splitByChr=TRUE,
588 578
 		      MIN.OBS=3,
... ...
@@ -592,6 +582,7 @@ cnOptions <- function(tmpdir=tempdir(),
592 582
 		      bias.adj=FALSE,
593 583
 		      prior.prob=rep(1/4, 4),
594 584
 		      SNRmin=5,
585
+		      chromosome=1:24,
595 586
 		      seed=123,
596 587
 		      verbose=TRUE,
597 588
 		      GT.CONF.THR=0.99,
... ...
@@ -603,19 +594,20 @@ cnOptions <- function(tmpdir=tempdir(),
603 594
 		      THR.NU.PHI=TRUE,
604 595
 		      thresholdCopynumber=TRUE,
605 596
 		      unlink=TRUE,
606
-		      hiddenMarkovModel=FALSE,
607
-		      circularBinarySegmentation=FALSE,
608
-		      cbsOpts=NULL,
609
-		      hmmOpts=NULL, ...){
597
+		      ##hiddenMarkovModel=FALSE,
598
+		      ##circularBinarySegmentation=FALSE,
599
+##		      cbsOpts=NULL,
600
+		      ##hmmOpts=NULL,
601
+		      ...){
610 602
 	if(missing(cdfName)) stop("must specify cdfName")
611 603
 	if(!file.exists(outdir)){
612 604
 		message(outdir, " does not exist.  Trying to create it.")
613 605
 		dir.create(outdir, recursive=TRUE)
614 606
 	}
615 607
 	stopifnot(isValidCdfName(cdfName))
616
-	if(hiddenMarkovModel){
617
-		hmmOpts <- hmmOptions(...)
618
-	}
608
+##	if(hiddenMarkovModel){
609
+##		hmmOpts <- hmmOptions(...)
610
+##	}
619 611
 	if(is.null(batch))
620 612
 		stop("batch must have the same length as the number of samples")
621 613
 	list(tmpdir=tmpdir,
... ...
@@ -625,6 +617,7 @@ cnOptions <- function(tmpdir=tempdir(),
625 617
 	     intensityFile=file.path(outdir, intensityFile),
626 618
 	     rgFile=file.path(outdir, rgFile),
627 619
 	     save.it=save.it,
620
+	     save.cnset=save.cnset,
628 621
 	     load.it=load.it,
629 622
 	     splitByChr=splitByChr,
630 623
 	     MIN.OBS=MIN.OBS,
... ...
@@ -635,6 +628,7 @@ cnOptions <- function(tmpdir=tempdir(),
635 628
 	     prior.prob=prior.prob,
636 629
 	     bias.adj=bias.adj,
637 630
 	     SNRmin=SNRmin,
631
+	     chromosome=chromosome,
638 632
 	     seed=seed,
639 633
 	     verbose=verbose,
640 634
 	     PHI.THR=PHI.THR,
... ...
@@ -644,11 +638,11 @@ cnOptions <- function(tmpdir=tempdir(),
644 638
 	     MIN.PHI=MIN.PHI,
645 639
 	     THR.NU.PHI=THR.NU.PHI,
646 640
 	     thresholdCopynumber=thresholdCopynumber,
647
-	     unlink=unlink,
648
-	     hiddenMarkovModel=hiddenMarkovModel,
649
-	     circularBinarySegmentation=circularBinarySegmentation,
650
-	     cbsOpts=cbsOpts,
651
-	     hmmOpts=hmmOpts) ##remove SnpSuperSet object
641
+	     unlink=unlink)
642
+##	     hiddenMarkovModel=hiddenMarkovModel,
643
+##	     circularBinarySegmentation=circularBinarySegmentation,
644
+##	     cbsOpts=cbsOpts,
645
+##	     hmmOpts=hmmOpts) ##remove SnpSuperSet object
652 646
 }
653 647
 
654 648
 ##linear model parameters
... ...
@@ -1409,76 +1403,6 @@ getParams <- function(object, batch){
1409 1403
 	return(params)
1410 1404
 }
1411 1405
 
1412
-hmmOptions <- function(copynumberStates=0:4,
1413
-		       EMIT.THR=-10,
1414
-		       scaleSds=TRUE,
1415
-		       verbose=TRUE,
1416
-		       log.initialP,
1417
-		       normalIndex=3,
1418
-		       normal2altered=0.01,
1419
-		       altered2normal=1,
1420
-		       altered2altered=0.001,
1421
-		       TAUP=1e8,
1422
-		       save.it=TRUE,
1423
-		       MIN.MARKERS=5){  ## whether the save the emission probabilities
1424
-	if(missing(log.initialP)) log.initialP <- log(rep(1/length(copynumberStates), length(copynumberStates)))
1425
-	list(copynumberStates=copynumberStates,
1426
-	     EMIT.THR=EMIT.THR,
1427
-	     scaleSds=scaleSds,
1428
-	     verbose=verbose,
1429
-	     log.initialP=log.initialP,
1430
-	     normalIndex=normalIndex,
1431
-	     normal2altered=normal2altered,
1432
-	     altered2normal=altered2normal,
1433
-	     altered2altered=altered2altered,
1434
-	     TAUP=TAUP,
1435
-	     save.it=save.it,
1436
-	     MIN.MARKERS=MIN.MARKERS)
1437
-}
1438
-
1439
-computeHmm.CNSet <- function(object, cnOptions){
1440
-	hmmOptions <- cnOptions[["hmmOpts"]]
1441
-	object <- object[order(chromosome(object), position(object)), ]
1442
-	##emission <- hmmOptions[["emission"]]
1443
-	chrom <- unique(chromosome(object))
1444
-	tPr <- transitionProbability(chromosome=chromosome(object),
1445
-				     position=position(object),
1446
-				     TAUP=hmmOptions[["TAUP"]])
1447
-	emissionPr(object) <- computeEmission(object, hmmOptions)
1448
-	rangedData(object) <- viterbi.CNSet(object,
1449
-					    hmmOptions=hmmOptions,
1450
-					    transitionPr=tPr[, "transitionPr"],
1451
-					    chromosomeArm=tPr[, "arm"])
1452
-	return(object)
1453
-}
1454
-
1455
-viterbi.CNSet <- function(object, hmmOptions, transitionPr, chromosomeArm){
1456
-	state.sequence <- viterbi(emission=emissionPr(object),
1457
-				  tau=transitionPr,
1458
-				  initialStateProbs=hmmOptions[["log.initialP"]],
1459
-				  arm=chromosomeArm,
1460
-				  normalIndex=hmmOptions[["normalIndex"]],
1461
-				  normal2altered=hmmOptions[["normal2altered"]],
1462
-				  altered2normal=hmmOptions[["altered2normal"]],
1463
-				  altered2altered=hmmOptions[["altered2altered"]])
1464
-	state.sequence <- data.frame(state.sequence)
1465
-	rleList <- RleList(state.sequence)
1466
-	rd <- RangedData(rleList)
1467
-##	rdList <- vector("list", length(rle.object))
1468
-##	for(i in seq(along=rdList)){
1469
-##		rdList[[i]] <- RangedData(rle.object[[i]],
1470
-##					  space=paste("chr", transitionPr[, "chromosome"], sep=""),
1471
-##					  state=runValue(rle.object[[i]]),
1472
-##					  sample=sampleNames(object)[i],
1473
-##					  nprobes=runLength(rle.object[[i]]))
1474
-##	}
1475
-##	rangedData <- do.call("c", rdList)
1476
-	return(rd)
1477
-}
1478
-
1479
-
1480
-
1481
-
1482 1406
 
1483 1407
 thresholdModelParams <- function(object, cnOptions){
1484 1408
 	MIN.NU <- cnOptions$MIN.NU
... ...
@@ -1507,143 +1431,6 @@ thresholdModelParams <- function(object, cnOptions){
1507 1431
 }
1508 1432
 
1509 1433
 
1510
-computeEmission.CNSet <- function(object, hmmOptions){
1511
-	EMIT.THR <- hmmOptions[["EMIT.THR"]]
1512
-	cnStates <- hmmOptions[["copynumberStates"]]
1513
-	object <- object[order(chromosome(object), position(object)), ]
1514
-	if(any(diff(position(object)) < 0)) stop("must be ordered by chromosome and physical position")
1515
-	emissionProbs <- array(NA, dim=c(nrow(object), ncol(object), length(hmmOptions[["copynumberStates"]])))
1516
-	dimnames(emissionProbs) <- list(featureNames(object),
1517
-					sampleNames(object),
1518
-					paste("copy.number_", hmmOptions[["copynumberStates"]], sep=""))	
1519
-	batch <- object$batch
1520
-	for(i in seq(along=unique(batch))){
1521
-		emissionProbs[, batch == unique(batch)[i], ] <- getEmission(object[, batch==unique(batch)[i]], hmmOptions)
1522
-	}
1523
-	if(EMIT.THR > -Inf){  ## truncate emission probabilities for outliers
1524
-		emissionProbs[emissionProbs < EMIT.THR] <- EMIT.THR
1525
-	}
1526
-	emissionProbs
1527
-}
1528
-
1529
-getEmission <- function(object, hmmOptions){
1530
-	emissionProbs <- array(NA, dim=c(nrow(object),
1531
-				   ncol(object), length(hmmOptions[["copynumberStates"]])))
1532
-	emit.snps <- getEmission.snps(object[isSnp(object), ], hmmOptions)
1533
-	emit.nps <- getEmission.nps(object[!isSnp(object), ], hmmOptions)
1534
-	emissionProbs[isSnp(object), , ] <- emit.snps
1535
-	emissionProbs[!isSnp(object), , ] <- emit.nps
1536
-	emissionProbs
1537
-}
1538
-
1539
-getEmission.nps <- function(object, hmmOptions){
1540
-	##****************************************************
1541
-	##	                                             *
1542
-	##  Emission probabilities for nonpolymorphic probes *
1543
-	##	                                             *
1544
-	##****************************************************
1545
-	batch <- unique(object$batch)
1546
-	scaleSds <- hmmOptions[["scaleSds"]]
1547
-	cnStates <- hmmOptions[["copynumberStates"]]
1548
-	verbose <- hmmOptions[["verbose"]]
1549
-	if(verbose) message("Computing emission probabilities for nonpolymorphic probes.")
1550
-	if(scaleSds){
1551
-		a <- CA(object)
1552
-		sds.a <- apply(a, 2, sd, na.rm=TRUE)
1553
-		sds.a <- log2(sds.a/median(sds.a))
1554
-		sds.a <- matrix(sds.a, nrow(object), ncol(object), byrow=TRUE)
1555
-	} else sds.a <- matrix(0, nrow(object), ncol(object))	
1556
-	emissionProbs <- array(NA, dim=c(nrow(object),
1557
-				   ncol(object), length(cnStates)))
1558
-	nuA <- getParam(object, "nuA", batch)
1559
-	phiA <- getParam(object, "phiA", batch)
1560
-	sig2A <- getParam(object, "sig2A", batch)
1561
-	##tau2A <- getParam(object, "tau2A", batch)
1562
-	##Assume that on the log-scale, that the background variance is the same...
1563
-	##tau2A <- sig2A	
1564
-	a <- as.numeric(log2(A(object)))
1565
-	for(k in seq(along=cnStates)){
1566
-		CT <- cnStates[k]
1567
-		mus.matrix=matrix(log2(nuA + CT*phiA), nrow(object), ncol(object))
1568
-		mus <- as.numeric(matrix(log2(nuA + CT*phiA), nrow(object), ncol(object)))
1569
-		sds.matrix <- matrix(sqrt(sig2A), nrow(object), ncol(object))
1570
-		sds.matrix <- sds.matrix + sds.a
1571
-		sds <- as.numeric(sds.matrix)
1572
-		suppressWarnings(tmp <- matrix(dnorm(a, mean=mus, sd=sds), nrow(object), ncol(object)))
1573
-		emissionProbs[, , k] <- log(tmp)
1574
-	}
1575
-	emissionProbs
1576
-}
1577
-	
1578
-
1579
-getEmission.snps <- function(object, hmmOptions){
1580
-	batch <- unique(object$batch)
1581
-	if(length(batch) > 1) stop("batch variable not unique")
1582
-	scaleSds <- hmmOptions[["scaleSds"]]
1583
-	cnStates <- hmmOptions[["copynumberStates"]]
1584
-	verbose <- hmmOptions[["verbose"]]
1585
-	if(scaleSds){
1586
-		a <- CA(object)
1587
-		b <- CB(object)
1588
-		sds.a <- apply(a, 2, sd, na.rm=TRUE)
1589
-		sds.b <- apply(b, 2, sd, na.rm=TRUE)	
1590
-		sds.a <- log2(sds.a/median(sds.a))
1591
-		sds.b <- log2(sds.b/median(sds.b))
1592
-		sds.a <- matrix(sds.a, nrow(object), ncol(object), byrow=TRUE)
1593
-		sds.b <- matrix(sds.b, nrow(object), ncol(object), byrow=TRUE)
1594
-	} else sds.a <- sds.b <- matrix(0, nrow(object), ncol(object))
1595
-	emissionProbs <- array(NA, dim=c(nrow(object),
1596
-				   ncol(object), length(cnStates)))
1597
-	corr <- getParam(object, "corr", batch)
1598
-	corrA.BB <- getParam(object, "corrA.BB", batch)
1599
-	corrB.AA <- getParam(object, "corrB.AA", batch)
1600
-	nuA <- getParam(object, "nuA", batch)
1601
-	nuB <- getParam(object, "nuB", batch)
1602
-	phiA <- getParam(object, "phiA", batch)
1603
-	phiB <- getParam(object, "phiB", batch)
1604
-	sig2A <- getParam(object, "sig2A", batch)
1605
-	sig2B <- getParam(object, "sig2B", batch)
1606
-	tau2A <- getParam(object, "tau2A", batch)
1607
-	tau2B <- getParam(object, "tau2B", batch)	
1608
-	a <- as.numeric(log2(A(object)))
1609
-	b <- as.numeric(log2(B(object)))
1610
-	
1611
-	for(k in seq(along=cnStates)){
1612
-		T <- cnStates[k]
1613
-		f.x.y <- matrix(0, sum(nrow(object)), ncol(object))
1614
-		for(copyA in 0:T){
1615
-			copyB <- T-copyA
1616
-			sigmaA <- sqrt(tau2A*(copyA==0) + sig2A*(copyA > 0))
1617
-			sigmaB <- sqrt(tau2B*(copyB==0) + sig2B*(copyB > 0))
1618
-			if(copyA == 0 & copyB > 0) r <- corrA.BB
1619
-			if(copyA > 0 & copyB == 0) r <- corrB.AA
1620
-			if(copyA > 0 & copyB > 0) r <- corr
1621
-			if(copyA == 0 & copyB == 0) r <- 0
1622
-			muA <- log2(nuA+copyA*phiA)
1623
-			muB <- log2(nuB+copyB*phiB)
1624
-
1625
-			sigmaA <- matrix(sigmaA, nrow=length(sigmaA), ncol=ncol(object), byrow=FALSE)
1626
-			sigmaB <- matrix(sigmaB, nrow=length(sigmaB), ncol=ncol(object), byrow=FALSE)
1627
-			## scale the variances by a sample-specific estimate of the variances
1628
-			## var(I_A, ijp) = sigma_A_ip * sigma_A_jp			
1629
-			sigmaA <- sigmaA+sds.a
1630
-			sigmaB <- sigmaB+sds.b
1631
-			meanA <- as.numeric(matrix(muA, nrow(object), ncol(object)))
1632
-			meanB <- as.numeric(matrix(muB, nrow(object), ncol(object)))			
1633
-			rho <- as.numeric(matrix(r, nrow(object), ncol(object)))
1634
-			sd.A <- as.numeric(matrix(sigmaA, nrow(object), ncol(object)))
1635
-			sd.B <- as.numeric(matrix(sigmaB, nrow(object), ncol(object)))
1636
-			Q.x.y <- 1/(1-rho^2)*(((a - meanA)/sd.A)^2 + ((b - meanB)/sd.B)^2 - 2*rho*((a - meanA)*(b - meanB))/(sd.A*sd.B))
1637
-			## For CN states > 1, assume that any of the possible genotypes are equally likely a priori...just take the sum
1638
-			## For instance, for state copy number 2 there are three combinations: AA, AB, BB
1639
-			##   -- two of the three combinations should be near zero.
1640
-			## TODO: copy-neutral LOH would put near-zero mass on both copyA > 0, copyB > 0
1641
-			f.x.y <- f.x.y + matrix(1/(2*pi*sd.A*sd.B*sqrt(1-rho^2))*exp(-0.5*Q.x.y), nrow(object), ncol(object))
1642
-		}
1643
-		emissionProbs[, , k] <- log(f.x.y)
1644
-	}
1645
-	emissionProbs
1646
-}
1647 1434
 
1648 1435
 ##setMethod("update", "character", function(object, ...){
1649 1436
 ##	crlmmFile <- object
... ...
@@ -1670,78 +1457,6 @@ getEmission.snps <- function(object, hmmOptions){
1670 1457
 ##	}
1671 1458
 ##})
1672 1459
 
1673
-addFeatureAnnotation.SnpSuperSet <- function(object, ...){
1674
-	##if(missing(CHR)) stop("Must specificy chromosome")
1675
-	cdfName <- annotation(object)
1676
-	pkgname <- paste(cdfName, "Crlmm", sep="")	
1677
-	path <- system.file("extdata", package=pkgname)
1678
-	loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
1679
-	cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
1680
-	loader("snpProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
1681
-	snpProbes <- get("snpProbes", envir=.crlmmPkgEnv)	
1682
-
1683
-	##Feature Data
1684
-	isSnp <- rep(as.integer(0), nrow(object))
1685
-	isSnp[snpIndex(object)] <- as.integer(1)
1686
-	names(isSnp) <- featureNames(object)
1687
-
1688
-	if(any(isSnp)){
1689
-		snps <- featureNames(object)[isSnp == 1]
1690
-		position.snp <- snpProbes[match(snps, rownames(snpProbes)), "position"]
1691
-		names(position.snp) <- snps
1692
-
1693
-		J <- grep("chr", colnames(snpProbes))
1694
-		chr.snp <- snpProbes[match(snps, rownames(snpProbes)), J]		
1695
-	} else{
1696
-		chr.snp <- position.snp <- integer()
1697
-	}
1698
-	if(any(!isSnp)){
1699
-		nps <- featureNames(object)[isSnp == 0]
1700
-		position.np <- cnProbes[match(nps, rownames(cnProbes)), "position"]
1701
-		names(position.np) <- nps
1702
-		
1703
-		chr.np <- cnProbes[match(nps, rownames(cnProbes)), J]	
1704
-	} else {
1705
-		chr.np <- position.np <- integer()
1706
-	}
1707
-	position <- c(position.snp, position.np)
1708
-	chrom <- c(chr.snp, chr.np)
1709
-
1710
-	##We may not have annotation for all of the snps
1711
-	if(!all(featureNames(object) %in% names(position))){
1712
-		message("Dropping loci for which physical position  is not available.")
1713
-		object <- object[featureNames(object) %in% names(position), ]
1714
-	}
1715
-	ix <- match(featureNames(object), names(position))
1716
-	position <- position[ix]
1717
-	chrom <- chrom[ix]
1718
-	##require(SNPchip)
1719
-	chrom <- chromosome2integer(chrom)
1720
-
1721
-	stopifnot(identical(names(position), featureNames(object)))
1722
-	if(sum(duplicated(names(position))) > 0){
1723
-		warning("Removing rows with NA identifiers...")
1724
-		##RS: fix this
1725
-		I <- which(!is.na(names(position)))
1726
-	}  else I <- seq(along=names(position))
1727
-	tmp.fd <- data.frame(cbind(chrom[I],
1728
-				   position[I],
1729
-				   isSnp[I]))
1730
-	colnames(tmp.fd) <- c("chromosome", "position", "isSnp")
1731
-	if("chromosome" %in% fvarLabels(object))
1732
-		tmp.fd <- tmp.fd[, -1]
1733
-	if("position" %in% fvarLabels(object))
1734
-		tmp.fd <- tmp.fd[, -grep("position", colnames(tmp.fd)), drop=FALSE]
1735
-	if("isSnp" %in% fvarLabels(object))
1736
-		tmp.fd <- tmp.fd[, -grep("isSnp", colnames(tmp.fd)), drop=FALSE]
1737
-	rownames(tmp.fd) <- featureNames(object)
1738
-	tmp <- new("AnnotatedDataFrame",
1739
-		   data=tmp.fd,
1740
-		   varMetadata=data.frame(labelDescription=colnames(tmp.fd)))
1741
-	fd <- cbind(pData(tmp), fData(object))
1742
-	fD <- new("AnnotatedDataFrame", data=fd, varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd)))
1743
-	return(fD)
1744
-}
1745 1460
 
1746 1461
 
1747 1462
 computeCopynumber.SnpSuperSet <- function(object, cnOptions){
... ...
@@ -1844,247 +1559,7 @@ computeCopynumber.CNSet <- function(object, cnOptions){
1844 1559
 }
1845 1560
 
1846 1561
 
1847
-isSnp.AlleleSet <- function(object){
1848
-	labels <- fvarLabels(object)
1849
-	if("isSnp" %in% labels){
1850
-		res <- fData(object)[, "isSnp"]
1851
-	} else{
1852
-		res <- as.integer(featureNames(object) %in% snpNames(object))
1853
-	}
1854
-	return(res==1)
1855
-}
1856 1562
 
1857
-isBiparental.SnpSuperSet <- function(object, allowHetParent=TRUE){
1858
-	##if(length(object$familyMember) < 3) stop("object$familyMember not the right length")
1859
-	father <- 1
1860
-	mother <- 2
1861
-	offspring <- 3
1862
-	F <- calls(object[, father])
1863
-	M <- calls(object[, mother])
1864
-	O <- calls(object[, offspring])
1865
-	object <- cbind(F, M, O)
1866
-	colnames(object) <- c("father", "mother", "offspring")
1867
-	biparental <- isBiparental.matrix(object, allowHetParent=allowHetParent)
1868
-	return(biparental)
1869
-}
1870 1563
 
1871
-isBiparental.matrix <- function(object, allowHetParent=TRUE){
1872
-	F <- object[, 1]
1873
-	M <- object[, 2]
1874
-	O <- object[, 3]
1875
-	##M/F AA, F/M BB, O AB 
1876
-	##isHet <- offspringHeterozygous(object)  ##offspring is heterozygous
1877
-	biparental <- rep(NA, nrow(object))
1878
-	biparental[F==1 & M == 3 & O == 2] <- TRUE#Pr(O | biparental)=1-0.001, Pr(O | not biparental) = 0.01
1879
-	biparental[F==3 & M == 1 & O == 2] <- TRUE#Pr(O | biparental)=1-0.001, Pr(O | not biparental) = 0.01
1880
-	##M/F AA, F/M BB, O AA or BB
1881
-	biparental[F==1 & M == 3 & (O == 1 | O == 3)] <- FALSE#Pr(O | biparental)=0.001, Pr(O | not biparental) = 1-0.01
1882
-	biparental[F==3 & M == 1 & (O == 1 | O == 3)] <- FALSE#Pr(O | biparental)=0.001, Pr(O | not biparental) = 1-0.01
1883
-	## M/F AA, F/M BB, O AB
1884
-	if(allowHetParent) biparental[F == 1 & M == 2 & O == 2] <- TRUE#Pr(O | biparental)=1-0.001, Pr(O | not biparental) = 0.01
1885
-	if(allowHetParent) biparental[F == 2 & M == 1 & O == 2] <- TRUE#Pr(O | biparental)=1-0.001, Pr(O | not biparental) = 0.01
1886
-	## F AB, M AA, O BB is not biparental
1887
-	## F AA, M AB, O BB is not biparental
1888
-	biparental[F == 2 & M == 1 & O == 3] <- FALSE#Pr(O | biparental)=1-0.001, Pr(O | not biparental) = 0.01
1889
-	biparental[F == 1 & M == 2 & O == 3] <- FALSE#Pr(O | biparental)=0.001, Pr(O | not biparental) = 1-0.01
1890
-	## M AA, F AB, O AB
1891
-	if(allowHetParent) biparental[F == 2 & M == 3 & O == 2] <- TRUE#Pr(O | biparental)=1-0.001, Pr(O | not biparental) = 0.01
1892
-	if(allowHetParent) biparental[F == 3 & M == 2 & O == 2] <- TRUE#Pr(O | biparental)=1-0.001, Pr(O | not biparental) = 0.01
1893
-	## F=AB, M=BB, O=AA is NOT biparental
1894
-	biparental[F == 2 & M == 3 & O == 1] <- FALSE#Pr(O | biparental)=0.001, Pr(O | not biparental) = 1-0.01
1895
-	biparental[F == 3 & M == 2 & O == 1] <- FALSE#Pr(O | biparental)=0.001, Pr(O | not biparental) = 1-0.01
1896
-	return(biparental)
1897
-}
1898 1564
 
1899
-findFatherMother <- function(offspringId, object){
1900
-	stopifnot(!missing(offspringId)) 
1901
-	family.id <- pData(object)[sampleNames(object) == offspringId, "familyId"]
1902
-	father.id <- pData(object)[sampleNames(object) == offspringId, "fatherId"]
1903
-	mother.id <- pData(object)[sampleNames(object) == offspringId, "motherId"]
1904
-	father.name <- sampleNames(object)[object$familyId == family.id & object$individualId == father.id]
1905
-	mother.name <- sampleNames(object)[object$familyId == family.id & object$individualId == mother.id]
1906
-	if(length(father.name) > 1 | length(mother.name) > 1){
1907
-		stop("More than 1 father and/or more than 1 mother.  Check annotation in phenoData")
1908
-	}
1909
-	if(length(father.name) < 1 ){
1910
-		father.name <- NA
1911
-	}
1912
-	if(length(mother.name) < 1){
1913
-		mother.name <- NA
1914
-	}
1915
-	fmo.trio <- c(father.name, mother.name, offspringId)
1916
-	names(fmo.trio) <- c("father", "mother", "offspring")
1917
-	return(fmo.trio)
1918
-}
1919
-
1920
-hmm.SnpSuperSet <- function(object, hmmOptions){
1921
-	if(ncol(object) < 3){
1922
-		return("No complete trios")
1923
-	}
1924
-	stopifnot(all(c("familyId", "fatherId", "motherId", "individualId") %in% varLabels(object)))
1925
-	require(VanillaICE) || stop("VanillaICE not available")
1926
-	TAUP <- hmmOptions[["TAUP"]]
1927
-	states <- hmmOptions[["states"]]
1928
-	initialP <- hmmOptions[["initialP"]]
1929
-	verbose <- hmmOptions[["verbose"]]
1930
-	normal2altered <- hmmOptions[["normal2altered"]]
1931
-	altered2normal <- hmmOptions[["altered2normal"]]
1932
-	normalIndex <- hmmOptions[["normalIndex"]]
1933
-	offspringId <- sampleNames(object)[object$fatherId != 0 & object$motherId != 0]
1934
-
1935
-	##For each offspring, find the father and mother in the same family
1936
-
1937
-	trios <- as.matrix(t(sapply(offspringId, findFatherMother, object=object)))
1938
-	trios <- trios[rowSums(is.na(trios)) == 0, , drop=FALSE]
1939
-	colnames(trios) <- c("father", "mother", "offspring")
1940
-	
1941
-	rD <- vector("list", nrow(trios))
1942
-	for(i in 1:nrow(trios)){
1943
-		##if(verbose) cat("Family ", unique(familyId)[i], ", ")
1944
-		if(verbose) cat("Offspring ID ", trios[i, "offspring"], "\n")
1945
-		trioSet <- object[, match(trios[i, ], sampleNames(object))]
1946
-		## Remove the noinformative snps here.
1947
-		isBPI <- isBiparental.SnpSuperSet(trioSet)
1948
-		isInformative <- !is.na(isBPI)
1949
-		if(all(!isInformative)){
1950
-			fit[, i] <- 1
1951
-			next()
1952
-		}
1953
-		trioSet <- trioSet[isInformative, ]
1954
-		##index <- match(featureNames(trioSet), rownames(fit))
1955
-		tau <- transitionProbability(chromosome=chromosome(trioSet),
1956
-					     position=position(trioSet),
1957
-					     TAUP=TAUP)
1958
-		isBPI <- isBPI[isInformative]
1959
-		emission <- computeBpiEmission.SnpSuperSet(trioSet, hmmOptions, isBPI=isBPI)
1960
-		.GlobalEnv[["emission"]] <- emission
1961
-		if(is.null(emission)) stop("not a father, mother, offspring trio")
1962
-		log.e <- array(log(emission), dim=c(nrow(trioSet), 1, 2), dimnames=list(featureNames(trioSet), trios[i, "offspring"], hmmOptions[["states"]]))
1963
-		index <- match(featureNames(trioSet), featureNames(object))
1964
-		vitResults <- viterbi(initialStateProbs=log(initialP),
1965
-						 emission=log.e,
1966
-						 tau=tau[, "transitionPr"],
1967
-						 arm=tau[, "arm"],
1968
-						 normalIndex=normalIndex,
1969
-						 verbose=verbose,
1970
-						 normal2altered=normal2altered,
1971
-						 altered2normal=altered2normal,
1972
-						 returnLikelihood=TRUE)
1973
-		##vitSequence is a vector -- one trio at a time
1974
-		vitSequence <- vitResults[["stateSequence"]]
1975
-		if(length(table(tau[, "arm"])) > 1){
1976
-			##insert an extra index to force a break between chromosome arms
1977
-			tmp <- rep(NA, length(vitSequence)+1)
1978
-			end.parm <- end(Rle(tau[, "arm"]))[1]
1979
-			tmp[1:end.parm] <- vitSequence[1:end.parm]
1980
-			tmp[end.parm+1] <- 999
1981
-			tmp[(end.parm+2):length(tmp)] <- vitSequence[((end.parm)+1):length(vitSequence)]
1982
-			vitSequence <- tmp
1983
-		}
1984
-		llr <- vitResults[["logLikelihoodRatio"]][[1]]
1985
-		rl <- Rle(vitSequence)
1986
-		start.index <- start(rl)[runValue(rl) != 999]
1987
-		end.index <- end(rl)[runValue(rl) != 999]
1988
-		##this is tricky since we've added an index to force a segment for each arm.
1989
-		armBreak <- which(vitSequence==999)
1990
-		if(length(armBreak) > 0){
1991
-			start.index[start.index > armBreak] <- start.index[start.index > armBreak] - 1
1992
-			end.index[end.index > armBreak] <- end.index[end.index > armBreak] - 1
1993
-		}
1994
-		start <- position(trioSet)[start.index]
1995
-		end <- position(trioSet)[end.index]
1996
-		##numMarkers <- unlist(numMarkers)
1997
-		numMarkers <- width(rl)[runValue(rl) != 999]
1998
-		states <- hmmOptions[["states"]][vitSequence[start.index]]
1999
-		##states <- (hmmOptions[["states"]])[vitSequence[start.index]]
2000
-		ir <- IRanges(start=start, end=end)
2001
-		##For each segment, calculate number biparental, number not biparental
2002
-		nBpi <- nNotBpi <- rep(NA, length(ir))
2003
-		for(j in 1:length(start)){
2004
-			region <- (start(rl)[j]):(end(rl)[j])
2005
-			region <- (start.index[j]):(end.index[j])
2006
-			nNonInformative <- sum(is.na(isBPI[region]))
2007
-			nInformative <- sum(!is.na(isBPI[region]))
2008
-			nNotBpi[j] <- sum(isBPI[region] == FALSE, na.rm=TRUE)
2009
-			nBpi[j] <- sum(isBPI[region] == TRUE, na.rm=TRUE)
2010
-		}
2011
-		rD[[i]] <- RangedData(ir,
2012
-				      space=rep(paste("chr", unique(chromosome(trioSet)), sep=""), length(ir)),
2013
-				      offspringId=rep(trios[i, "offspring"], length(ir)),
2014
-				      numMarkers=numMarkers,
2015
-				      state=states,
2016
-				      nNotBpi=nNotBpi,
2017
-				      nBpi=nBpi,
2018
-				      LLR=llr)
2019
-	}
2020
-	##to avoid a .Primivite error with do.call(c, rD)
2021
-	tmp <- do.call(c, rD[sapply(rD, nrow) == 1])
2022
-	tmp2 <- do.call(c, rD[sapply(rD, nrow) > 1])
2023
-	rD <- c(tmp, tmp2)
2024
-	return(rD)
2025
-}
2026
-
2027
-trioOptions <- function(states=c("BPI", "notBPI"),
2028
-			initialP=c(0.99, 0.01),
2029
-			TAUP=1e7,
2030
-			prGtError=c(0.001, 0.01),
2031
-			verbose=FALSE,
2032
-			allowHetParent=FALSE,
2033
-			normalIndex=1,
2034
-			normal2altered=1,
2035
-			altered2normal=1,
2036
-			useCrlmmConfidence=FALSE){
2037
-	names(prGtError) <- states
2038
-	names(initialP) <- states
2039
-	list(states=states,
2040
-	     initialP=initialP,
2041
-	     TAUP=TAUP,
2042
-	     prGtError=prGtError,
2043
-	     verbose=verbose,
2044
-	     allowHetParent=allowHetParent,
2045
-	     normalIndex=normalIndex,
2046
-	     normal2altered=normal2altered,
2047
-	     altered2normal=altered2normal,
2048
-	     useCrlmmConfidence=useCrlmmConfidence)
2049
-}
2050
-
2051
-
2052
-
2053
-computeBpiEmission.SnpSuperSet <- function(object, hmmOptions, isBPI){
2054
-	states <- hmmOptions[["states"]]
2055
-	prGtError <- hmmOptions[["prGtError"]]
2056
-	useCrlmmConfidence <- hmmOptions[["useCrlmmConfidence"]]
2057
-	emission <- matrix(NA, nrow(object), ncol=2)
2058
-	colnames(emission) <- states
2059
-	if(useCrlmmConfidence){
2060
-		pCrlmm <- confs(object)  ## crlmm confidence score
2061
-		## take the minimum confidence score in the trio
2062
-		pCrlmm <- apply(pCrlmm, 1, min, na.rm=TRUE)
2063
-		## set emission probability to min(crlmmConfidence, 0.999)
2064
-		I <- as.integer(pCrlmm < (1 - prGtError[["BPI"]]))
2065
-		pCrlmm <- pCrlmm*I + (1 - prGtError[["BPI"]])*(1-I)
2066
-		emission[,  "BPI"] <- pCrlmm
2067
-		##Pr(mendelian inconsistency | BPI) = 0.001 
2068
-		emission[, "BPI"] <- 1 - pCrlmm
2069
-	} else { ##ignore confidence scores
2070
-		##Pr(call is consistent with biparental inheritance | BPI) = 0.999
2071
-		emission[isBPI==TRUE,  "BPI"] <-  1-prGtError["BPI"]
2072
-		##Pr(mendelian inconsistency | BPI) = 0.001 
2073
-		emission[isBPI==FALSE, "BPI"] <- prGtError["BPI"] ##Mendelian inconsistancy
2074
-	}
2075
-	##Pr(call is consistent with biparental inheritance | not BPI) = 0.01 		
2076
-	emission[isBPI==TRUE,  "notBPI"] <- prGtError["notBPI"]   ## biparental inheritance, but true state is not Biparental
2077
-	##Pr(mendelian inconsistency | not BPI) = 0.99 				
2078
-	emission[isBPI==FALSE, "notBPI"] <- 1-prGtError["notBPI"] ## Mendelian inconsistancy
2079
-	return(emission)
2080
-}
2081
-
2082
-## ---------------------------------------------------------------------------
2083
-## not to be exported
2084
-hapmapPedFile <- function(){
2085
-	pedFile <- read.csv("~/projects/Beaty/inst/extdata/HapMap_samples.csv", as.is=TRUE)
2086
-	pedFile <- pedFile[, 1:5]
2087
-	colnames(pedFile) <- c("coriellId", "familyId", "individualId", "fatherId", "motherId")
2088
-	return(pedFile)
2089
-}
2090 1565
 
... ...
@@ -8,8 +8,6 @@ setMethod("B", "AlleleSet", function(object) allele(object, "B"))
8 8
 ##		 function(object, value){
9 9
 ##			 assayDataElementReplace(object, "senseThetaB", value)			
10 10
 ##		 })
11
-setMethod("isSnp", "AlleleSet", function(object) {
12
-	isSnp.AlleleSet(object)
13
-})
11
+
14 12
 
15 13
 
... ...
@@ -1,44 +1,3 @@
1
-setMethod("initialize", "CNSet",
2
-          function(.Object,
3
-		   assayData,
4
-                   phenoData,
5
-		   featureData,
6
-		   annotation,
7
-		   experimentData,
8
-		   protocolData,
9
-                   call=new("matrix"),
10
-		   callProbability=matrix(integer(), nrow=nrow(call), ncol=ncol(call), dimnames=dimnames(call)),
11
-		   alleleA=matrix(integer(), nrow=nrow(call), ncol=ncol(call), dimnames=dimnames(call)),
12
-		   alleleB=matrix(integer(), nrow=nrow(call), ncol=ncol(call), dimnames=dimnames(call)),
13
-		   CA=matrix(integer(), nrow=nrow(call), ncol=ncol(call), dimnames=dimnames(call)),
14
-		   CB=matrix(integer(), nrow=nrow(call), ncol=ncol(call), dimnames=dimnames(call)),
15
-		   segmentData=new("RangedData"),
16
-		   emissionPr=new("array"), ... ){
17
-		  ## callProbability, CA, CB, are stored as integers
18
-		  if(missing(assayData)){
19
-			  assayData <- assayDataNew("lockedEnvironment",
20
-						    call=call,
21
-						    callProbability=callProbability,
22
-						    alleleA=alleleA,
23
-						    alleleB=alleleB,
24
-						    CA=CA,
25
-						    CB=CB)
26
-		  } 
27
-		  assayData(.Object) <- assayData
28
-		  if (missing(phenoData)){
29
-			  phenoData(.Object) <- annotatedDataFrameFrom(calls, byrow=FALSE)
30
-		  } else phenoData(.Object) <- phenoData
31
-		  if (missing(featureData)){
32
-			  featureData(.Object) <- annotatedDataFrameFrom(calls, byrow=TRUE)
33
-		  } else featureData(.Object) <- featureData
34
-		  if(!missing(annotation)) annotation(.Object) <- annotation
35
-		  if(!missing(experimentData)) experimentData(.Object) <- experimentData
36
-		  if(!missing(protocolData)) protocolData(.Object) <- protocolData
37
-		  if(!missing(emissionPr)) .Object@emissionPr <- emissionPr
38
-		  segmentData(.Object) <- segmentData
39
-		  .Object	    
40
-          })
41
-
42 1
 setAs("SnpSuperSet", "CNSet",
43 2
       function(from, to){
44 3
 	      CA <- CB <- matrix(NA, nrow(from), ncol(from))
... ...
@@ -57,14 +16,20 @@ setAs("SnpSuperSet", "CNSet",
57 16
 		  featureData=featureData(from))
58 17
       })
59 18
 
60
-setValidity("CNSet", function(object) {
61
-	assayDataValidMembers(assayData(object), c("CA", "CB", "call", "callProbability", "alleleA", "alleleB"))
62
-})
63
-
64
-
65
-
66 19
 setMethod("computeCopynumber", "CNSet", function(object, cnOptions){
67
-	computeCopynumber.CNSet(object, cnOptions)
20
+	## to do the bias adjustment, initial estimates of the parameters are needed
21
+	##  The initial estimates are gotten by running computeCopynumber with cnOptions[["bias.adj"]]=FALSE
22
+	bias.adj <- cnOptions[["bias.adj"]]
23
+	if(bias.adj & all(is.na(CA(object)))){
24
+		cnOptions[["bias.adj"]] <- FALSE
25
+	}
26
+	object <- computeCopynumber.CNSet(object, cnOptions)				
27
+	if(bias.adj & !cnOptions[["bias.adj"]]){
28
+		## Do a second iteration with bias adjustment
29
+		cnOptions[["bias.adj"]] <- TRUE
30
+		object <- computeCopynumber.CNSet(object, cnOptions)
31
+	}
32
+	object
68 33
 })
69 34
 
70 35
 setMethod("computeCopynumber", "character", function(object, cnOptions){
... ...
@@ -96,33 +61,7 @@ setMethod("computeCopynumber", "character", function(object, cnOptions){
96 61
 
97 62
 
98 63
 
99
-setMethod("computeEmission", "CNSet", function(object, hmmOptions){
100
-	computeEmission.CNSet(object, hmmOptions)
101
-})
102
-
103
-setMethod("computeEmission", "character", function(object, hmmOptions){
104
-	filename <- object
105
-	chrom <- gsub(".rda", "", strsplit(filename, "_")[[1]][[2]])
106
-	if(hmmOptions[["verbose"]])
107
-		message("Compute emission probabilities for chromosome ", chrom)
108
-	if(file.exists(filename)){
109
-		load(filename)
110
-		cnSet <- get("cnSet")
111
-	} else {
112
-		stop("File ", filename, " does not exist.")
113
-	}
114
-	emission <- computeEmission(cnSet, hmmOptions)
115
-	message("Saving ", file.path(dirname(filename), paste("emission_", chrom, ".rda", sep="")))
116
-	if(hmmOptions[["save.it"]]){
117
-		save(emission,
118
-		     file=file.path(dirname(filename), paste("emission_", chrom, ".rda", sep="")))
119
-	}
120
-	return(emission)
121
-})
122 64
 
123
-setMethod("computeHmm", "CNSet", function(object, hmmOptions){
124
-	computeHmm.CNSet(object, hmmOptions)
125
-})
126 65
 
127 66
 ##setMethod("computeHmm", "SnpSuperSet", function(object, hmmOptions){
128 67
 ##	cnSet <- computeCopynumber(object, hmmOptions)
... ...
@@ -160,25 +99,6 @@ setMethod("computeHmm", "CNSet", function(object, hmmOptions){
160 99
 ##	return(fns)	
161 100
 ##})
162 101
 
163
-
164
-setValidity("CNSet", function(object) {
165
-	msg <- validMsg(NULL, assayDataValidMembers(assayData(object), c("CA", "CB")))
166
-	if (is.null(msg)) TRUE else msg
167
-})
168
-##may want to allow thresholding here (... arg)
169
-setMethod("CA", "CNSet", function(object) assayData(object)[["CA"]]/100)
170
-setMethod("CB", "CNSet", function(object) assayData(object)[["CB"]]/100)
171
-setReplaceMethod("CA", signature(object="CNSet", value="matrix"),
172
-		 function(object, value){
173
-			 value <- matrix(as.integer(value*100), nrow(value), ncol(value), dimnames=dimnames(value))
174
-			 assayDataElementReplace(object, "CA", value)		
175
-		 })
176
-
177
-setReplaceMethod("CB", signature(object="CNSet", value="matrix"),
178
-		 function(object, value){
179
-			 value <- matrix(as.integer(value*100), nrow(value), ncol(value), dimnames=dimnames(value))			 
180
-			 assayDataElementReplace(object, "CB", value)
181
-		 })
182 102
 setMethod("copyNumber", "CNSet", function(object){
183 103
 	I <- isSnp(object)
184 104
 	CA <- CA(object)
... ...
@@ -241,43 +161,7 @@ ellipse.CNSet <- function(x, copynumber, batch, ...){
241 161
 	}
242 162
 }
243 163
 
244
-setMethod("rangedData", "CNSet", function(object) segmentData(object))
245
-setReplaceMethod("rangedData", c("CNSet", "RangedData"), function(object, value){
246
-	segmentData(object) <- value
247
-})
248
-
249
-setMethod("segmentData", "CNSet", function(object) object@segmentData)
250
-setReplaceMethod("segmentData", c("CNSet", "RangedData"), function(object, value){
251
-	object@segmentData <- value
252
-	object
253
-})
254
-
255
-setMethod("emissionPr", "CNSet", function(object) object@emissionPr)
256
-setReplaceMethod("emissionPr", c("CNSet", "array"), function(object, value){
257
-	object@emissionPr <- value
258
-	object
259
-})
260
-
261
-setMethod("show", "CNSet", function(object){
262
-	callNextMethod(object)
263
-	cat("emissionPr\n")
264
-	cat("   array:", nrow(object), "features,", ncol(object), "samples,", dim(emissionPr(object))[3], "states\n")
265
-	cat("   hidden states:\n")
266
-	cat("      ", dimnames(emissionPr(object))[[3]], "\n")
267
-	cat("   Missing values:", sum(is.na(emissionPr(object))), "\n")
268
-	if(!all(is.na(emissionPr(object)))){
269
-		cat("   minimum value:", min(emissionPr(object), na.rm=TRUE), "\n")
270
-	} else  cat("   minimum value: NA (all missing)\n")
271
-	cat("rangedData:  ")
272
-	cat("    ", show(rangedData(object)), "\n")
273
-##	cat("   ", nrow(segmentData(object)), "segments\n")
274
-##	cat("    column names:", paste(colnames(segmentData(object)), collapse=", "), "\n")
275
-#	cat("    mean # markers per segment:", mean(segmentData(object)$nprobes))
276
-})
277 164
 
278
-setMethod("start", "CNSet", function(x, ...) start(segmentData(x), ...))
279
-setMethod("end", "CNSet", function(x, ...) end(segmentData(x), ...))
280
-setMethod("width", "CNSet", function(x) width(segmentData(x)))
281 165
 
282 166
 
283 167
 
... ...
@@ -1,48 +1,145 @@
1
-##How to make the initialization platform-specific?
1
+## Method("initialize", "AlleleSet",
2
+##        function(.Object,
3
+##                 assayData = assayDataNew(alleleA=alleleA,
4
+##                                          alleleB=alleleB, ...),
5
+##                 phenoData = annotatedDataFrameFrom(assayData, byrow=FALSE),
6
+##                 featureData = annotatedDataFrameFrom(assayData, byrow=TRUE),
7
+##                 experimentData = new("MIAME"),
8
+##                 annotation = character(),
9
+##                 protocolData = phenoData[,integer(0)],
10
+##                 alleleA = new("matrix"),
11
+##                 alleleB = matrix(numeric(),
12
+## 		                    nrow=nrow(alleleA), ncol=ncol(alleleA),
13
+##                                  dimnames=dimnames(alleleA)),
14
+## 		   chromosome=integer(),
15
+## 		   position=integer(),
16
+## 		   isSnp=integer(),
17
+##                 ...) {
18
+## 		  .Object <- callNextMethod(.Object,
19
+## 					    assayData = assayData,
20
+## 					    phenoData = phenoData,
21
+## 					    featureData = featureData,
22
+## 					    experimentData = experimentData,
23
+## 					    annotation = annotation,
24
+## 					    protocolData = protocolData)
25
+## 		  if(length(annotation) < 1){
26
+## 			  if((length(position) < 1 | length(chromosome) < 1| length(isSnp) < 1)){
27
+## 				  stop("must specify annotation if 'chromosome', 'position', and 'isSnp' are missing")
28
+## 			  } else {
29
+## 				  pData(featureData)$chromosome <- chromosome
30
+## 				  pData(featureData)$position <- position
31
+## 				  pData(featureData)$isSnp <- isSnp
32
+## 			  }
33
+## 		  } else{
34
+## 			  .Object@annotation <- annotation
35
+## 			  if((length(position) < 1 | length(chromosome) < 1| length(isSnp) < 1)){
36
+## 				  if(!isSupportedAnnotation(annotation)){
37
+## 					  stop("The annotation is not supported. Arguments 'chromosome', 'position', and 'isSnp' can be omitted from the initialization only if the annotation is supported (see oligoClasses:::supportedAnnotation()).")
38
+## 				  }
39
+## 			  } else {
40
+## 				  pData(featureData)$chromosome <- chromosome
41
+## 				  pData(featureData)$position <- position
42
+## 				  pData(featureData)$isSnp <- isSnp
43
+## 			  }
44
+## 			  .Object@featureData <- featureData
45
+## 		  }
46
+## 		  ## Do after annotation has been assigned
47
+## 		  if(!(all(c("chromosome", "position", "isSnp") %in% varLabels(featureData))) & isSupportedAnnotation(annotation)){
48
+## 			  ##update the featureData
49
+## 			  .Object@featureData <- addFeatureAnnotation.crlmm(.Object)
50
+## 		  }
51
+## 		  .Object
52
+##        })
53
+## 
54
+## ow to make the initialization platform-specific?
55
+## Method("initialize", "SnpSuperSet",
56
+##        function(.Object,
57
+## 		   call=new("matrix"),		   
58
+##                 callProbability=matrix(NA, nrow(call), ncol(call), dimnames=dimnames(call)),
59
+##                 phenoData = annotatedDataFrameFrom(assayData, byrow=FALSE),		   
60
+## 		   featureData=annotatedDataFrameFrom(assayData, byrow=TRUE),
61
+## 		   experimentData=new("MIAME"),
62
+## 		   annotation=character(),
63
+## 		   protocolData=phenoData[, integer(0)],
64
+## 		   position=integer(),
65
+## 		   chromosome=integer(),
66
+## 		   isSnp=integer(),...){
67
+## 		  .Object <- callNextMethod(.Object,
68
+## 					    call=call,
69
+## 					    callProbability=callProbability,
70
+## 					    phenoData=phenoData,
71
+## 					    featureData=featureData,
72
+## 					    experimentData=experimentData,
73
+## 					    annotation=annotation,
74
+## 					    protocolData=protocolData,
75
+## 					    position=position,
76
+## 					    chromosome=chromosome,
77
+## 					    isSnp=isSnp, ...)
78
+##        })
79
+##
80
+
81
+##setMethod("initialize", "SnpSuperSet",
82
+##          function(.Object,
83
+##		   call=new("matrix"),		   
84
+##                   callProbability=matrix(NA, nrow(call), ncol(call), dimnames=dimnames(call)),
85
+##                   alleleA = new("matrix"),
86
+##                   alleleB = matrix(numeric(),
87
+##		                    nrow=nrow(alleleA), ncol=ncol(alleleA),
88
+##                                    dimnames=dimnames(alleleA)),		   
89
+##                   phenoData = annotatedDataFrameFrom(call, byrow=FALSE),		   
90
+##		   featureData=annotatedDataFrameFrom(call, byrow=TRUE),
91
+##		   experimentData=new("MIAME"),
92
+##		   protocolData=phenoData[, integer(0)],
93
+##		   position=integer(),
94
+##		   chromosome=integer(),
95
+##		   isSnp=integer(),
96
+##		   annotation=character(), ... ){
97
+##		  ##browser()
98
+##		  ##the ... should be additional assayDataElements, if any
99
+##		  .Object <- callNextMethod(.Object,
100
+##					    call=call,
101
+##					    callProbability=callProbability,
102
+##					    alleleA=alleleA,
103
+##					    alleleB=alleleB,
104
+##					    phenoData=phenoData,
105
+##					    featureData=featureData,
106
+##					    experimentData=experimentData,
107
+##					    protocolData=protocolData,
108
+##					    annotation=annotation, ...)
109
+##		  annotation <- .Object@annotation
110
+##		  ##add chromosome, position, isSnp to featureData
111
+##		  if(length(annotation) < 1){
112
+##			  if((length(position) < 1| length(chromosome) < 1 | length(isSnp) < 1)){
113
+##				  stop("must specify annotation if 'chromosome', 'position', and 'isSnp' are missing")
114
+##			  } else {
115
+##				  pData(featureData)$chromosome <- chromosome
116
+##				  pData(featureData)$position <- position
117
+##				  pData(featureData)$isSnp <- isSnp
118
+##			  }
119
+##		  } else{
120
+##			  if((length(position) < 1| length(chromosome) < 1 | length(isSnp) < 1)){
121
+##				  if(!isSupportedAnnotation(annotation)){
122
+##					  stop("The annotation is not supported. Arguments 'chromosome', 'position', and 'isSnp' can be omitted from the initialization only if the annotation is supported (see oligoClasses:::supportedAnnotation()).")
123
+##				  }
124
+##			  } else {
125
+##				  pData(featureData)$chromosome <- chromosome
126
+##				  pData(featureData)$position <- position
127
+##				  pData(featureData)$isSnp <- isSnp
128
+##			  }
129
+##			  .Object@featureData <- featureData
130
+##		  }
131
+##		  ##Do after annotation has been assigned
132
+##		  if(!(all(c("chromosome", "position", "isSnp") %in% varLabels(featureData))) & isSupportedAnnotation(annotation)){
133
+##			  .Object@featureData <- addFeatureAnnotation.crlmm(.Object)
134
+##		  }		  
135
+##		  .Object
136
+##          })
137
+
2 138
 
3
-setMethod("initialize", "SnpSuperSet",
4
-          function(.Object,
5
-		   assayData,
6
-                   call=new("matrix"),
7
-                   callProbability=new("matrix"),
8
-                   alleleA=new("matrix"),
9
-                   alleleB=new("matrix"),
10
-		   featureData,
11
-		   annotation,
12
-		   ...){
13
-		  if(!missing(assayData)){
14
-			  .Object <- callNextMethod(.Object, assayData=assayData,...)
15
-		  } else{
16
-			  ad <- assayDataNew("lockedEnvironment",
17
-					     call=call,
18
-					     callProbability=callProbability,
19
-					     alleleA=alleleA,
20
-					     alleleB=alleleB)
21
-			  .Object <- callNextMethod(.Object,
22
-						    assayData=ad, ...)
23
-		  }		  
24
-		  if(missing(annotation)){
25
-			  stop("must specify annotation")
26
-		  } else{
27
-			  stopifnot(isValidCdfName(annotation))
28
-			  .Object@annotation <- annotation
29
-		  }		  
30
-		  if (missing(featureData)){
31
-			  featureData(.Object) <- annotatedDataFrameFrom(call, byrow=TRUE)
32
-		  } else{
33
-			  featureData(.Object) <- featureData
34
-		  }
35
-		  ## Do after annotation has been assigned
36
-		  if(!(all(c("chromosome", "position", "isSnp")  %in% colnames(.Object@featureData)))){
37
-			  ##update the featureData
38
-			  .Object@featureData <- addFeatureAnnotation.SnpSuperSet(.Object)
39
-		  }
40
-		  .Object
41
-          })
42 139
 
43
-setMethod("addFeatureAnnotation", "SnpSuperSet", function(object, ...){
44
-	addFeatureAnnotation.SnpSuperSet(object, ...)
45
-})
140
+##setMethod("addFeatureAnnotation", "SnpSuperSet", function(object, ...){
141
+##	addFeatureAnnotation.crlmm(object, ...)
142
+##})
46 143
 
47 144
 getParam.SnpSuperSet <- function(object, name, batch){
48 145
 		  label <- paste(name, batch, sep="_")
... ...
@@ -64,49 +161,48 @@ getParam.SnpSuperSet <- function(object, name, batch){
64 161
 
65 162
 
66 163
 
67
-setMethod("splitByChromosome", "SnpSuperSet", function(object, cnOptions){
68
-	tmpdir <- cnOptions[["tmpdir"]]
69
-	outdir <- cnOptions[["outdir"]]	
70
-	save.it <- cnOptions[["save.it"]]
71
-	path <- system.file("extdata", package=paste(annotation(object), "Crlmm", sep=""))	
72
-	load(file.path(path, "snpProbes.rda"))
73
-	snpProbes <- get("snpProbes")
74
-	load(file.path(path, "cnProbes.rda"))
75
-	cnProbes <- get("cnProbes")	
76
-	k <- grep("chr", colnames(snpProbes))
77
-	if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
78
-	for(CHR in 1:24){
79
-		cat("Chromosome ", CHR, "\n")
80
-		snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
81
-		cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
82
-		index <- c(match(snps, featureNames(object)),
83
-			   match(cnps, featureNames(object)))
84
-		index <- index[!is.na(index)]
85
-		callSetPlus <- object[index, ]
86
-		if(CHR != 24){
87
-			cnSet <- computeCopynumber(callSetPlus, cnOptions)
88
-			
89
-		} else{
90
-			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSetPlus' object for this chromosome")
91
-			save(callSetPlus, file=file.path(outdir, paste("callSetPlus_", CHR, ".rda", sep="")))
92
-		}
93
-		if(cnOptions[["hiddenMarkovModel"]] & CHR != 24){
94
-			cnSet <- computeHmm(cnSet, cnOptions)
95
-		}
96
-		save(cnSet, file=file.path(outdir, paste("cnSet_", CHR, ".rda", sep="")))
97
-		saved.objects <- list.files(outdir, pattern="cnSet", full.names=TRUE)
98
-##		} else{ ## save crlmmSet to outdir
99
-##			save(cnSet, file=file.path(outdir, paste("cnSet_", CHR, ".rda", sep="")))
100
-##			saved.objects <- list.files(outdir, pattern="cnSet", full.names=TRUE)			
101
-##		}		
102
-	}
103
-	saved.objects
104
-})
164
+##setMethod("splitByChromosome", "SnpSuperSet", function(object, cnOptions){
165
+##	tmpdir <- cnOptions[["tmpdir"]]
166
+##	outdir <- cnOptions[["outdir"]]	
167
+##	save.it <- cnOptions[["save.it"]]
168
+##	path <- system.file("extdata", package=paste(annotation(object), "Crlmm", sep=""))	
169
+##	load(file.path(path, "snpProbes.rda"))
170
+##	snpProbes <- get("snpProbes")
171
+##	load(file.path(path, "cnProbes.rda"))
172
+##	cnProbes <- get("cnProbes")	
173
+##	k <- grep("chr", colnames(snpProbes))
174
+##	if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
175
+##	for(CHR in 1:24){
176
+##		cat("Chromosome ", CHR, "\n")
177
+##		snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
178
+##		cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
179
+##		index <- c(match(snps, featureNames(object)),
180
+##			   match(cnps, featureNames(object)))
181
+##		index <- index[!is.na(index)]
182
+##		callSetPlus <- object[index, ]
183
+##		if(CHR != 24){
184
+##			cnSet <- computeCopynumber(callSetPlus, cnOptions)
185
+##			
186
+##		} else{
187
+##			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSetPlus' object for this chromosome")
188
+##			save(callSetPlus, file=file.path(outdir, paste("callSetPlus_", CHR, ".rda", sep="")))
189
+##		}
190
+##		if(cnOptions[["hiddenMarkovModel"]] & CHR != 24){
191
+##			cnSet <- computeHmm(cnSet, cnOptions)
192
+##		}
193
+##		save(cnSet, file=file.path(outdir, paste("cnSet_", CHR, ".rda", sep="")))
194
+##		saved.objects <- list.files(outdir, pattern="cnSet", full.names=TRUE)
195
+####		} else{ ## save crlmmSet to outdir
196
+####			save(cnSet, file=file.path(outdir, paste("cnSet_", CHR, ".rda", sep="")))
197
+####			saved.objects <- list.files(outdir, pattern="cnSet", full.names=TRUE)			
198
+####		}		
199
+##	}
200
+##	saved.objects
201
+##})
105 202
 
106 203
 setMethod("computeCopynumber", "SnpSuperSet",
107 204
 	  function(object, cnOptions){
108 205
 		  computeCopynumber.SnpSuperSet(object, cnOptions)
109 206
 	  })
110 207
 
111
-##gtConfidence <- function(object) 1-exp(-confs(object)/1000)
112 208
 	
... ...
@@ -20,10 +20,7 @@ medianSummaries <- function(mat, grps)
20 20
 intMedianSummaries <- function(mat, grps)
21 21
   as.integer(medianSummaries(mat, grps))
22 22
 
23
-list.celfiles <-   function(...){
24
-  files <- list.files(...)
25
-  return(files[grep("\\.[cC][eE][lL]$", files)])
26
-}
23
+
27 24
 
28 25
 ## .crlmmPkgEnv is an enviroment that will
29 26
 ## store all the variables used by the pkg.
... ...
@@ -142,19 +139,22 @@ loader <- function(theFile, envir, pkgname){
142 139
 	load(theFile, envir=envir)
143 140
 }
144 141
 
145
-celfileDate <- function(filename) {
146
-	h <- affyio::read.celfile.header(filename, info="full")
147
-	date <- grep("/", strsplit(h$DatHeader, " ")[[1]], value=TRUE)
148
-	if(length(date) < 1){
149
-		##try something else
150
-		results <- h$ScanDate
151
-	} else{
152
-		date <- strsplit(date, split="/")[[1]]
153
-		CC <- ifelse(substr(date[3],1,1)=="9", "19", "20")
154
-		results <- as.character(as.Date(paste(paste(CC, date[3], sep=""), date[1],
155
-						      date[2], sep="-")))
142
+celDates <- function(celfiles){
143
+	if(!all(file.exists(celfiles))) stop("1 or more cel file does not exist")
144
+	celdates <- vector("character", length(celfiles))
145
+	celtimes <- vector("character", length(celfiles))
146
+	for(i in seq(along=celfiles)){
147
+		if(i %% 100 == 0) cat(".")
148
+		tmp <- read.celfile.header(celfiles[i], info="full")$DatHeader
149
+		tmp <- strsplit(tmp, "\ +")
150
+		celdates[i] <- tmp[[1]][6]
151
+		celtimes[i] <- tmp[[1]][7]
156 152
 	}
157
-	results
153
+	tmp <- paste(celdates, celtimes)
154
+	celdts <- strptime(tmp, "%m/%d/%y %H:%M:%S")
155
+	return(celdts)
158 156
 }
159 157
 
160 158
 
159
+
160
+