Browse code

fixed bugs in genotype and crlmmIlluminaRS. changed arguments to crlmmCopynumber. removed cnOptions function

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45322 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 18/03/2010 12:19:14
Showing6 changed files

... ...
@@ -467,3 +467,9 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
467 467
 
468 468
 **   added crlmmIlluminaRS to cnrma-functions.R (testing)
469 469
 
470
+2010-03-18 R.Scharpf committed version 1.5.37
471
+
472
+**   fixed bugs in genotype() and crlmmIlluminaRS.
473
+     (currently saving intermediate steps in crlmmIlluminaRS)
474
+     removed cnOptions function and changed arguments to crlmmCopynumber
475
+
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.36
4
+Version: 1.5.37
5 5
 Date: 2010-02-05
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -1,7 +1,7 @@
1 1
 useDynLib("crlmm", .registration=TRUE)
2 2
 
3 3
 ## this is temporary
4
-exportPattern("^[^\\.]")
4
+## exportPattern("^[^\\.]")
5 5
 
6 6
 ## Biobase
7 7
 importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet, SnpSet,
... ...
@@ -22,7 +22,7 @@ importFrom(Biobase, assayDataElement, assayDataElementNames,
22 22
            validMsg)
23 23
 
24 24
 ## oligoClasses
25
-importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet)
25
+importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet)
26 26
 
27 27
 importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
28 28
 		  "confs<-", cnConfidence, "cnConfidence<-", 
... ...
@@ -53,5 +53,7 @@ importFrom(mvtnorm, dmvnorm)
53 53
 importFrom(ellipse, ellipse)
54 54
 
55 55
 exportMethods(copyNumber)
56
-export(cnOptions, crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
57
-export(genotype)
56
+export(crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
57
+export(genotype, crlmmIlluminaRS)
58
+
59
+
... ...
@@ -4,7 +4,7 @@
4 4
 setGeneric("getParam", function(object, name, batch) standardGeneric("getParam"))
5 5
 setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
6 6
 setGeneric("cnNames", function(object) standardGeneric("cnNames"))
7
-setGeneric("computeCopynumber", function(object, cnOptions) standardGeneric("computeCopynumber"))
7
+setGeneric("computeCopynumber", function(object, ...) standardGeneric("computeCopynumber"))
8 8
 setGeneric("pr", function(object, name, batch, value) standardGeneric("pr"))
9 9
 setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
10 10
 setGeneric("snpNames", function(object) standardGeneric("snpNames"))
... ...
@@ -86,9 +86,9 @@ setMethod("open", "AlleleSet", function(con, ...){
86 86
 	return()
87 87
 })
88 88
 
89
-setReplaceMethod("calls", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "call", value))
90
-setReplaceMethod("confs", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "callProbability", value))
91
-setMethod("confs", "SnpSuperSet", function(object) assayDataElement(object, "callProbability"))
89
+##setReplaceMethod("calls", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "call", value))
90
+##setReplaceMethod("confs", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "callProbability", value))
91
+##setMethod("confs", "SnpSuperSet", function(object) assayDataElement(object, "callProbability"))
92 92
 genotype <- function(filenames, cdfName, mixtureSampleSize=10^5,
93 93
 		     fitMixture=TRUE,
94 94
 		     eps=0.1, verbose=TRUE,
... ...
@@ -122,9 +122,8 @@ genotype <- function(filenames, cdfName, mixtureSampleSize=10^5,
122 122
 				    verbose=verbose,
123 123
 				    seed=seed,
124 124
 				    cdfName=cdfName,
125
-				    sns=sns)
125
+				    sns=sns[j])
126 126
 		stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns))
127
-
128 127
 		pData(callSet)$SKW[j] <- snprmaRes$SKW
129 128
 		pData(callSet)$SNR[j] <- snprmaRes$SNR
130 129
 		A(callSet)[snp.index, j] <- snprmaRes[["A"]]
... ...
@@ -162,24 +161,58 @@ genotype <- function(filenames, cdfName, mixtureSampleSize=10^5,
162 161
 			       verbose=verbose,
163 162
 			       returnParams=returnParams,
164 163
 			       badSNP=badSNP)
165
-		calls(callSet)[snp.index, j] <- tmp[["calls"]]
166
-		confs(callSet)[snp.index, j] <- tmp[["confs"]]
164
+		snpCall(callSet)[snp.index, j] <- tmp[["calls"]]
165
+		snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]
167 166
 		callSet$gender[j] <- tmp$gender
168 167
 		rm(tmp); gc()
169 168
 	}	
170 169
 	return(callSet)
171 170
 }
172 171
 
173
-
174
-
175
-
176
-
177 172
 ##---------------------------------------------------------------------------
178 173
 ##---------------------------------------------------------------------------
179 174
 ## For Illumina
180 175
 ##---------------------------------------------------------------------------
181 176
 ##---------------------------------------------------------------------------
182
-constructRG <- function(filenames, cdfName, sns, verbose, fileExt, sep){
177
+getPhenoData <- function(sampleSheet=NULL, arrayNames=NULL,
178
+			 arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A")){
179
+	if(!is.null(arrayNames)) {
180
+		pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
181
+	}
182
+	if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
183
+		if(is.null(arrayNames)){
184
+			##arrayNames=NULL
185
+			if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
186
+				barcode = sampleSheet[,arrayInfoColNames$barcode]
187
+				arrayNames=barcode
188
+			}
189
+			if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {  
190
+				position = sampleSheet[,arrayInfoColNames$position]
191
+				if(is.null(arrayNames))
192
+					arrayNames=position
193
+				else
194
+					arrayNames = paste(arrayNames, position, sep=sep)
195
+				if(highDensity) {
196
+					hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
197
+					for(i in names(hdExt))
198
+						arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
199
+				}
200
+			}
201
+		}
202
+		pd = new("AnnotatedDataFrame", data = sampleSheet)
203
+		sampleNames(pd) <- basename(arrayNames)               
204
+	}
205
+	if(is.null(arrayNames)) {
206
+		arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
207
+		if(!is.null(sampleSheet)) {
208
+			sampleSheet=NULL
209
+			cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
210
+		}
211
+		pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
212
+	}
213
+	return(pd)
214
+}
215
+constructRG <- function(filenames, cdfName, sns, verbose, fileExt, sep, sampleSheet, arrayInfoColNames){
183 216
 	if(verbose)	message("reading first idat file to extract feature data")
184 217
 	grnfile <- paste(filenames[1], fileExt$green, sep=sep)
185 218
 	if(!file.exists(grnfile)){
... ...
@@ -197,11 +230,13 @@ constructRG <- function(filenames, cdfName, sns, verbose, fileExt, sep){
197 230
 		  zero=initializeBigMatrix(name="zero", nr=nr, nc=length(filenames)),
198 231
 		  featureData=fD,
199 232
 		  annotation=cdfName)
200
-	pD <- data.frame(matrix(NA, length(sampleNames(RG)), 12), row.names=sampleNames(RG))
201
-	colnames(pD) <- c("Index","HapMap.Name","Name","ID",
202
-			  "Gender", "Plate", "Well", "Group", "Parent1",
203
-			  "Parent2","Replicate","SentrixPosition")
204
-	phenoData(RG) <- new("AnnotatedDataFrame", data=pD)
233
+	phenoData(RG) <- getPhenoData(sampleSheet=sampleSheet, arrayNames=filenames,
234
+				      arrayInfoColNames=arrayInfoColNames)
235
+##	pD <- data.frame(matrix(NA, length(sampleNames(RG)), 12), row.names=sampleNames(RG))
236
+##	colnames(pD) <- c("Index","HapMap.Name","Name","ID",
237
+##			  "Gender", "Plate", "Well", "Group", "Parent1",
238
+##			  "Parent2","Replicate","SentrixPosition")
239
+	##phenoData(RG) <- new("AnnotatedDataFrame", data=pD)
205 240
 	pD <- data.frame(matrix(NA, length(sampleNames(RG)), 1), row.names=sampleNames(RG))
206 241
 	colnames(pD) <- "ScanDate"
207 242
 	protocolData(RG) <- new("AnnotatedDataFrame", data=pD)
... ...
@@ -234,7 +269,9 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
234 269
 			  sns=sns,
235 270
 			  verbose=verbose,
236 271
 			  fileExt=fileExt,
237
-			  sep=sep)	
272
+			  sep=sep,
273
+			  sampleSheet=sampleSheet,
274
+			  arrayInfoColNames=arrayInfoColNames)
238 275
 	batches <- splitIndicesByLength(seq(along=arrayNames), ocSamples())
239 276
 	for(j in batches){
240 277
 		tmp <- readIdatFiles(sampleSheet=sampleSheet[j, ],
... ...
@@ -246,14 +283,23 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
246 283
 				     sep=sep,
247 284
 				     fileExt=fileExt,
248 285
 				     saveDate=TRUE)
249
-		assayData(RG)$R[, j] <- assayData(tmp)$R
250
-		assayData(RG)$G[, j] <- assayData(tmp)$G
251
-		assayData(RG)$zero[, j] <- assayData(tmp)$zero
286
+		if(nrow(tmp) != nrow(RG)){
287
+			##RS: I don't understand why the IDATS for the
288
+			##same platform do not have necessarily have
289
+			##the same length
290
+			tmp <- tmp[featureNames(tmp) %in% featureNames(RG), ]
291
+		}
292
+		index <- match(featureNames(tmp), featureNames(RG))
293
+		assayData(RG)$R[index,j] <- assayData(tmp)$R
294
+		assayData(RG)$G[index, j] <- assayData(tmp)$G
295
+		assayData(RG)$zero[index, j] <- assayData(tmp)$zero
252 296
 		pData(RG)[j, ] <- pData(tmp)
253 297
 		annotation(RG) <- annotation(tmp)
254 298
 		pData(protocolData(RG))[j, ] <- pData(protocolData(tmp))
255 299
 		rm(tmp); gc()
256 300
 	}
301
+	message("Saving RG")
302
+	save(RG, file=file.path(ldPath(), "RG.rda"))
257 303
 	k <- 1
258 304
 	for(j in batches){
259 305
 		##MR: Might want to to nonpolymorphic markers in a
... ...
@@ -280,6 +326,8 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
280 326
 		rm(tmp); gc()
281 327
 	}
282 328
 	annotation(XY) <- cdfName
329
+	message("Saving XY")
330
+	save(XY, file=file.path(ldPath(), "XY.rda"))
283 331
 	mixtureParams <- matrix(NA, 4, length(filenames))
284 332
 	k <- 1
285 333
 	for(j in batches){
... ...
@@ -316,6 +364,8 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
316 364
 		mixtureParams[, j] <- res$mixtureParams
317 365
 		rm(res); gc()
318 366
 	}
367
+	message("Saving callSe")
368
+	save(callSet, file=file.path(ldPath(), "callSet.rda"))	
319 369
 	for(j in batches){
320 370
 		##MR:  edit snp.index
321 371
 		snp.index <- 1:nrow(callSet)
... ...
@@ -335,9 +385,9 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
335 385
 			       verbose=verbose,
336 386
 			       returnParams=returnParams,
337 387
 			       badSNP=badSNP)
338
-		calls(callSet)[snp.index, j] <- tmp[["calls"]]
388
+		snpCall(callSet)[snp.index, j] <- tmp[["calls"]]
339 389
 		## many zeros here (?)
340
-		confs(callSet)[snp.index, j] <- tmp[["confs"]]
390
+		snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]
341 391
 		callSet$gender[j] <- tmp$gender
342 392
 		rm(tmp); gc()
343 393
 	}
... ...
@@ -553,13 +603,30 @@ harmonizeDimnamesTo <- function(object1, object2){
553 603
 
554 604
 
555 605
 
556
-crlmmCopynumber <- function(object, cnOptions, chromosome=1:23, MIN.SAMPLES=10, SNRMin=5, ...){
606
+crlmmCopynumber <- function(object,
607
+			    batch,
608
+			    chromosome=1:23,
609
+			    MIN.SAMPLES=10,
610
+			    SNRMin=5,
611
+			    MIN.OBS=3,
612
+			    DF.PRIOR=50,
613
+			    bias.adj=FALSE,
614
+			    prior.prob=rep(1/4,4),
615
+			    seed=1,
616
+			    verbose=TRUE,
617
+			    GT.CONF.THR=0.99,
618
+			    PHI.THR=2^6,
619
+			    nHOM.THR=5,
620
+			    MIN.NU=2^3,
621
+			    MIN.PHI=2^3,
622
+			    THR.NU.PHI=TRUE,
623
+			    thresholdCopynumber=TRUE){
557 624
 	which.batch <- cnOptions[["whichbatch"]]
558 625
 	cnSet <- new("CNSetLM",
559 626
 		     alleleA=A(object),
560 627
 		     alleleB=B(object),
561
-		     call=calls(object),
562
-		     callProbability=confs(object),
628
+		     call=snpCall(object),
629
+		     callProbability=snpCallProbability(object),
563 630
 		     CA=initializeBigMatrix("CA", nrow(object), ncol(object)),
564 631
 		     CB=initializeBigMatrix("CB", nrow(object), ncol(object)),
565 632
 		     annotation=annotation(object),
... ...
@@ -582,7 +649,20 @@ crlmmCopynumber <- function(object, cnOptions, chromosome=1:23, MIN.SAMPLES=10,
582 649
 			row.index <- which(chromosome(cnSet) == i)
583 650
 			tmp <- cnSet[row.index, j]
584 651
 			featureData(tmp) <- lm.parameters(tmp)
585
-			tmp <- computeCopynumber(tmp, cnOptions)
652
+			tmp <- computeCopynumber(tmp,
653
+						 SNRMin=SNRMin,
654
+						 MIN.OBS=MIN.OBS,
655
+						 DF.PRIOR=DF.PRIOR,
656
+						 bias.adj=bias.adj,
657
+						 prior.prob=prior.prob,
658
+						 seed=seed,
659
+						 verbose=verbose,
660
+						 GT.CONF.THR=GT.CONF.THR,
661
+						 PHI.THR=PHI.THR,
662
+						 nHOM.THR=nHOM.THR,
663
+						 MIN.NU=MIN.NU,
664
+						 MIN.PHI=MIN.PHI,
665
+						 thresholdCopynumber=thresholdCopynumber)
586 666
 			fData(tmp) <- fData(tmp)[, -(1:3)]
587 667
 			CA(cnSet)[row.index, j] <- tmp@assayData[["CA"]]
588 668
 			CB(cnSet)[row.index, j] <- tmp@assayData[["CB"]]
... ...
@@ -1102,14 +1182,14 @@ withinGenotypeMoments <- function(object, cnOptions, tmp.objects){
1102 1182
 	vA <- tmp.objects[["vA"]]
1103 1183
 	vB <- tmp.objects[["vB"]]
1104 1184
 	Ns <- tmp.objects[["Ns"]]
1105
-	G <- calls(object) 
1185
+	G <- snpCallProbability(object) 
1106 1186
 	GT.CONF.THR <- cnOptions$GT.CONF.THR
1107 1187
 	CHR <- unique(chromosome(object))
1108 1188
 
1109 1189
 	A <- A(object)
1110 1190
 	B <- B(object)
1111 1191
 ##	highConf <- (1-exp(-confs(object)/1000)) > GT.CONF.THR
1112
-	xx <- assayData(object)$callProbability
1192
+	xx <- snpCallProbability(object)
1113 1193
 	highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1114 1194
 	##highConf <- confs(object) > GT.CONF.THR
1115 1195
 	##highConf <- highConf > GT.CONF.THR
... ...
@@ -41,9 +41,37 @@ setReplaceMethod("lM", c("CNSet", "list_or_ffdf"), function(object, value){
41 41
 ##		  featureData=featureData(from))
42 42
 ##      })
43 43
 
44
-setMethod("computeCopynumber", "CNSet", function(object, cnOptions){
44
+setMethod("computeCopynumber", "CNSet",
45
+	  function(object,
46
+		   MIN.OBS,
47
+		   DF.PRIOR,
48
+		   bias.adj,
49
+		   prior.prob,
50
+		   seed,
51
+		   verbose,
52
+		   GT.CONF.THR,
53
+		   PHI.THR,
54
+		   nHOM.THR,
55
+		   MIN.NU,
56
+		   MIN.PHI,
57
+		   thresholdCopynumber){
45 58
 	## to do the bias adjustment, initial estimates of the parameters are needed
46 59
 	##  The initial estimates are gotten by running computeCopynumber with cnOptions[["bias.adj"]]=FALSE
60
+
61
+		  cnOptions <- list(
62
+				    DF.PRIOR=DF.PRIOR,
63
+				    MIN.OBS=MIN.OBS,
64
+				    GT.CONF.THR=GT.CONF.THR,
65
+				    bias.adj=bias.adj,
66
+				    prior.prob=prior.prob,
67
+				    seed=seed,
68
+				    verbose=verbose,
69
+				    PHI.THR=PHI.THR,
70
+				    nHOM.THR=nHOM.THR,
71
+				    MIN.NU=MIN.NU,
72
+				    MIN.PHI=MIN.PHI,
73
+				    THR.NU.PHI=THR.NU.PHI,
74
+				    thresholdCopynumber=thresholdCopynumber)		  
47 75
 	bias.adj <- cnOptions[["bias.adj"]]
48 76
 	if(bias.adj & all(is.na(CA(object)))){
49 77
 		cnOptions[["bias.adj"]] <- FALSE
... ...
@@ -57,32 +85,32 @@ setMethod("computeCopynumber", "CNSet", function(object, cnOptions){
57 85
 	object
58 86
 })
59 87
 
60
-setMethod("computeCopynumber", "character", function(object, cnOptions){
61
-	crlmmFile <- object
62
-	isCNSet <- length(grep("cnSet", crlmmFile[1])) > 0
63
-	for(i in seq(along=crlmmFile)){
64
-		cat("Processing ", crlmmFile[i], "...\n")
65
-		load(crlmmFile[i])
66
-		if(isCNSet){
67
-			object <- get("cnSet")
68
-		} else {
69
-			object <- get("callSetPlus")
70
-		}
71
-		CHR <- unique(chromosome(object))
72
-		##if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.")		
73
-		if(all(CHR==24)){
74
-			message("skipping chromosome 24")
75
-			next()
76
-		}
77
-		cat("----------------------------------------------------------------------------\n")
78
-		cat("-        Estimating copy number for chromosome", CHR, "\n")
79
-		cat("----------------------------------------------------------------------------\n")
80
-		cnSet <- computeCopynumber(object, cnOptions)
81
-		save(cnSet, file=file.path(dirname(crlmmFile), paste("cnSet_", CHR, ".rda", sep="")))
82
-		if(!isCNSet) if(cnOptions[["unlink"]]) unlink(crlmmFile[i])
83
-		rm(object, cnSet); gc();
84
-	}	
85
-})
88
+##setMethod("computeCopynumber", "character", function(object, cnOptions){
89
+##	crlmmFile <- object
90
+##	isCNSet <- length(grep("cnSet", crlmmFile[1])) > 0
91
+##	for(i in seq(along=crlmmFile)){
92
+##		cat("Processing ", crlmmFile[i], "...\n")
93
+##		load(crlmmFile[i])
94
+##		if(isCNSet){
95
+##			object <- get("cnSet")
96
+##		} else {
97
+##			object <- get("callSetPlus")
98
+##		}
99
+##		CHR <- unique(chromosome(object))
100
+##		##if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.")		
101
+##		if(all(CHR==24)){
102
+##			message("skipping chromosome 24")
103
+##			next()
104
+##		}
105
+##		cat("----------------------------------------------------------------------------\n")
106
+##		cat("-        Estimating copy number for chromosome", CHR, "\n")
107
+##		cat("----------------------------------------------------------------------------\n")
108
+##		cnSet <- computeCopynumber(object, cnOptions)
109
+##		save(cnSet, file=file.path(dirname(crlmmFile), paste("cnSet_", CHR, ".rda", sep="")))
110
+##		if(!isCNSet) if(cnOptions[["unlink"]]) unlink(crlmmFile[i])
111
+##		rm(object, cnSet); gc();
112
+##	}	
113
+##})
86 114
 
87 115
 
88 116