Browse code

genotype now calls 'rscrlmmGT2'. rscrlmmGT2 is a copy of crlmmGT2

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52854 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 16/02/2011 15:59:32
Showing 2 changed files

... ...
@@ -77,12 +77,12 @@ construct <- function(filenames,
77 77
 		     callProbability=initializeBigMatrix(name="callPr", nr,nc),
78 78
 		     annotation=cdfName,
79 79
 		     batch=batch)
80
-	sampleNames(cnSet) <- sns
81
-	if(!missing(filenames)){
82
-		if(missing(sns)) sns <- basename(filenames)
83
-		protocolData <- getProtocolData.Affy(filenames)
84
-	} else{
80
+	if(!missing(sns)){
81
+		sampleNames(cnSet) <- sns
85 82
 		protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
83
+	} else {
84
+		sampleNames(cnSet) <- basename(filenames)
85
+		protocolData <- getProtocolData.Affy(filenames)
86 86
 	}
87 87
 	rownames(pData(protocolData)) <- sns
88 88
 	protocolData(cnSet) <- protocolData
... ...
@@ -175,42 +175,74 @@ genotype <- function(filenames,
175 175
 		 mixtureSampleSize=mixtureSampleSize, pkgname=pkgname,
176 176
 		 neededPkgs=c("crlmm", pkgname))
177 177
 
178
-	gns <- snprmaRes[["gns"]]
179
-	snp.I <- isSnp(callSet)
180
-	is.snp <- which(snp.I)
181
-	snp.index <- match(featureNames(callSet)[is.snp], gns)
182
-	stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index]))
183
-##	is.snp <- isSnp(callSet)
184
-##	snp.index <- which(is.snp)
185
-	if(is.lds) open(callSet)
186
-	mixtureParams <- matrix(NA, 4, length(filenames))
187
-	##message("Saving snprmaRes file")
188
-	##save(snprmaRes, file=file.path(outdir, "snprmaRes.rda"))
189
-	if(verbose) message("Finished preprocessing.")
190
-	gns <- snprmaRes[["gns"]]
191
-	snp.I <- isSnp(callSet)
192
-	is.snp <- which(snp.I)
193
-	snp.index <- match(featureNames(callSet)[is.snp], gns)
194
-	stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index]))
195
-	if(is.lds) open(callSet)
196
-	mixtureParams <- matrix(NA, 4, length(filenames))
197
-	if(is.lds){
198
-		open(snprmaRes[["A"]])
199
-		open(snprmaRes[["B"]])
200
-		open(snprmaRes[["SNR"]])
201
-		open(snprmaRes[["mixtureParams"]])
202
-		bb <- getOption("ffbatchbytes")
203
-		ffcolapply(A(callSet)[is.snp, i1:i2] <- snprmaRes[["A"]][snp.index, i1:i2], X=snprmaRes[["A"]],
204
-			   BATCHBYTES=bb)
205
-		ffcolapply(B(callSet)[is.snp, i1:i2] <- snprmaRes[["B"]][snp.index, i1:i2], X=snprmaRes[["B"]],
206
-			   BATCHBYTES=bb)
207
-	} else{
208
-		A(callSet)[is.snp, ] <- snprmaRes[["A"]][snp.index, ]
209
-		B(callSet)[is.snp, ] <- snprmaRes[["B"]][snp.index, ]
178
+	## Now we initialize a CNSet object, cloning A and B
179
+	message("Cloning A and B intensities")
180
+	open(A)
181
+	open(B)
182
+	call <- clone(A)
183
+	callProbability=clone(B)
184
+	close(A)
185
+	close(B)
186
+	cnSet <- new("CNSet",
187
+		     alleleA=A,
188
+		     alleleB=B,
189
+		     call=call,
190
+		     callProbability=callProbability,
191
+		     featureData=featureData,
192
+		     annotation=cdfName,
193
+		     batch=batch)
194
+	if(!missing(sns)){
195
+		sampleNames(cnSet) <- sns
196
+		protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
197
+	} else {
198
+		sampleNames(cnSet) <- basename(filenames)
199
+		protocolData <- getProtocolData.Affy(filenames)
210 200
 	}
211
-	pData(callSet)$SKW <- snprmaRes[["SKW"]]
212
-	pData(callSet)$SNR <- snprmaRes[["SNR"]]
213
-	mixtureParams <- snprmaRes$mixtureParams
201
+	rownames(pData(protocolData)) <- sns
202
+	protocolData(cnSet) <- protocolData
203
+	pd <- data.frame(matrix(NA, nc, 3), row.names=sns)
204
+	colnames(pd)=c("SKW", "SNR", "gender")
205
+	phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
206
+	stopifnot(validObject(cnSet))
207
+	snp.I <- isSnp(cnSet)
208
+	##gns <- snprmaRes[["gns"]]
209
+##	snp.I <- isSnp(callSet)
210
+##	is.snp <- which(snp.I)
211
+##	snp.index <- match(featureNames(callSet)[is.snp], gns)
212
+##	stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index]))
213
+####	is.snp <- isSnp(callSet)
214
+####	snp.index <- which(is.snp)
215
+##	if(is.lds) open(callSet)
216
+##	mixtureParams <- matrix(NA, 4, length(filenames))
217
+##	##message("Saving snprmaRes file")
218
+##	##save(snprmaRes, file=file.path(outdir, "snprmaRes.rda"))
219
+##	if(verbose) message("Finished preprocessing.")
220
+##	gns <- snprmaRes[["gns"]]
221
+##	snp.I <- isSnp(callSet)
222
+##	is.snp <- which(snp.I)
223
+##	snp.index <- match(featureNames(callSet)[is.snp], gns)
224
+##	stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index]))
225
+##	if(is.lds) open(callSet)
226
+##	mixtureParams <- matrix(NA, 4, length(filenames))
227
+##	if(is.lds){
228
+##		open(snprmaRes[["A"]])
229
+##		open(snprmaRes[["B"]])
230
+##		open(snprmaRes[["SNR"]])
231
+##		open(snprmaRes[["mixtureParams"]])
232
+##		bb <- getOption("ffbatchbytes")
233
+##		ffcolapply(A(callSet)[is.snp, i1:i2] <- snprmaRes[["A"]][snp.index, i1:i2], X=snprmaRes[["A"]],
234
+##			   BATCHBYTES=bb)
235
+##		ffcolapply(B(callSet)[is.snp, i1:i2] <- snprmaRes[["B"]][snp.index, i1:i2], X=snprmaRes[["B"]],
236
+##			   BATCHBYTES=bb)
237
+##	} else{
238
+##		A(callSet)[is.snp, ] <- snprmaRes[["A"]][snp.index, ]
239
+##		B(callSet)[is.snp, ] <- snprmaRes[["B"]][snp.index, ]
240
+##	}
241
+##	pData(callSet)$SKW <- snprmaRes[["SKW"]]
242
+##	pData(callSet)$SNR <- snprmaRes[["SNR"]]
243
+	pData(cnSet)$SKW <- SKW
244
+	pData(cnSet)$SNR <- SNR
245
+##	mixtureParams <- snprmaRes$mixtureParams
214 246
 	np.index <- which(!snp.I)
215 247
 	if(verbose) message("Normalizing nonpolymorphic markers")
216 248
 	FUN <- ifelse(is.lds, "cnrma2", "cnrma")
... ...
@@ -222,38 +254,59 @@ genotype <- function(filenames,
222 254
 	}
223 255
 	## consider passing only A for NPs.
224 256
 	if(verbose) message("Quantile normalizing nonpolymorphic markers")
225
-	AA <- cnrmaFxn(FUN, A=A(callSet),
226
-		       filenames=filenames,
227
-		       row.names=featureNames(callSet)[np.index],
228
-		       cdfName=cdfName,
229
-		       sns=sns,
230
-		       seed=seed,
231
-		       verbose=verbose)
232
-	if(!is.lds) A(callSet) <- AA
233
-	rm(AA)
234
-	FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
235
-	## genotyping
236
-	crlmmGTfxn <- function(FUN,...){
237
-		switch(FUN,
238
-		       crlmmGT2=crlmmGT2(...),
239
-		       crlmmGT=crlmmGT(...))
240
-	}
241
-	if(verbose) message("Running crlmmGT2")
242
-	tmp <- crlmmGTfxn(FUN,
243
-			  A=snprmaRes[["A"]],
244
-			  B=snprmaRes[["B"]],
245
-			  SNR=snprmaRes[["SNR"]],
246
-			  mixtureParams=snprmaRes[["mixtureParams"]],
247
-			  cdfName=cdfName,
248
-			  row.names=NULL,
249
-			  col.names=sampleNames(callSet),
250
-			  SNRMin=SNRMin,
251
-			  recallMin=recallMin,
252
-			  recallRegMin=recallRegMin,
253
-			  gender=gender,
254
-			  verbose=verbose,
255
-			  returnParams=returnParams,
256
-			  badSNP=badSNP)
257
+##	AA <- cnrmaFxn(FUN, A=A(cnSet),
258
+##		       filenames=filenames,
259
+##		       row.names=featureNames(cnSet)[np.index],
260
+##		       cdfName=cdfName,
261
+##		       sns=sns,
262
+##		       seed=seed,
263
+##		       verbose=verbose)
264
+	cnrma2(A=A(cnSet),
265
+	       filenames=filenames,
266
+	       row.names=featureNames(cnSet)[np.index],
267
+	       cdfName=cdfName,
268
+	       sns=sns,
269
+	       seed=seed,
270
+	       verbose=verbose)
271
+	##if(!is.lds) A(callSet) <- AA
272
+	##rm(AA)
273
+##	FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
274
+##	## genotyping
275
+##	crlmmGTfxn <- function(FUN,...){
276
+##		switch(FUN,
277
+##		       crlmmGT2=crlmmGT2(...),
278
+##		       crlmmGT=crlmmGT(...))
279
+##	}
280
+##	if(verbose) message("Running crlmmGT2")
281
+##	tmp <- crlmmGTfxn(FUN,
282
+##			  A=snprmaRes[["A"]],
283
+##			  B=snprmaRes[["B"]],
284
+##			  SNR=snprmaRes[["SNR"]],
285
+##			  mixtureParams=snprmaRes[["mixtureParams"]],
286
+##			  cdfName=cdfName,
287
+##			  row.names=NULL,
288
+##			  col.names=sampleNames(callSet),
289
+##			  SNRMin=SNRMin,
290
+##			  recallMin=recallMin,
291
+##			  recallRegMin=recallRegMin,
292
+##			  gender=gender,
293
+##			  verbose=verbose,
294
+##			  returnParams=returnParams,
295
+##			  badSNP=badSNP)
296
+	rscrlmmGT2(A=calls(cnSet),
297
+		   B=callsProbability(cnSet),
298
+		   SNR=SNR,
299
+		   mixtureParams=mixtureParams,
300
+		   cdfName=cdfName,
301
+		   row.names=NULL,
302
+		   col.names=sampleNames(callSet),
303
+		   SNRMin=SNRMin,
304
+		   recallMin=recallMin,
305
+		   recallRegMin=recallRegMin,
306
+		   gender=gender,
307
+		   verbose=verbose,
308
+		   returnParams=returnParams,
309
+		   badSNP=badSNP)
257 310
 	if(verbose) message("Genotyping finished.  Updating container with genotype calls and confidence scores.")
258 311
 	if(is.lds){
259 312
 		open(tmp[["calls"]])
... ...
@@ -622,7 +622,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
622 622
 ##          (2 reads / 2 writes)
623 623
 ## 4.  results are directly usable -- no need to do anything
624 624
 ##     4 reads / 2 writes  OR  ( 4 reads/ 4writes if clone requires reading)
625
-mycrlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
625
+rscrlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
626 626
 		       col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6,
627 627
 		       SNRMin=5, recallMin=10, recallRegMin=1000,
628 628
 		       gender=NULL, desctrucitve=FALSE, verbose=TRUE,