git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52854 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -77,12 +77,12 @@ construct <- function(filenames, |
77 | 77 |
callProbability=initializeBigMatrix(name="callPr", nr,nc), |
78 | 78 |
annotation=cdfName, |
79 | 79 |
batch=batch) |
80 |
- sampleNames(cnSet) <- sns |
|
81 |
- if(!missing(filenames)){ |
|
82 |
- if(missing(sns)) sns <- basename(filenames) |
|
83 |
- protocolData <- getProtocolData.Affy(filenames) |
|
84 |
- } else{ |
|
80 |
+ if(!missing(sns)){ |
|
81 |
+ sampleNames(cnSet) <- sns |
|
85 | 82 |
protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE) |
83 |
+ } else { |
|
84 |
+ sampleNames(cnSet) <- basename(filenames) |
|
85 |
+ protocolData <- getProtocolData.Affy(filenames) |
|
86 | 86 |
} |
87 | 87 |
rownames(pData(protocolData)) <- sns |
88 | 88 |
protocolData(cnSet) <- protocolData |
... | ... |
@@ -175,42 +175,74 @@ genotype <- function(filenames, |
175 | 175 |
mixtureSampleSize=mixtureSampleSize, pkgname=pkgname, |
176 | 176 |
neededPkgs=c("crlmm", pkgname)) |
177 | 177 |
|
178 |
- gns <- snprmaRes[["gns"]] |
|
179 |
- snp.I <- isSnp(callSet) |
|
180 |
- is.snp <- which(snp.I) |
|
181 |
- snp.index <- match(featureNames(callSet)[is.snp], gns) |
|
182 |
- stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index])) |
|
183 |
-## is.snp <- isSnp(callSet) |
|
184 |
-## snp.index <- which(is.snp) |
|
185 |
- if(is.lds) open(callSet) |
|
186 |
- mixtureParams <- matrix(NA, 4, length(filenames)) |
|
187 |
- ##message("Saving snprmaRes file") |
|
188 |
- ##save(snprmaRes, file=file.path(outdir, "snprmaRes.rda")) |
|
189 |
- if(verbose) message("Finished preprocessing.") |
|
190 |
- gns <- snprmaRes[["gns"]] |
|
191 |
- snp.I <- isSnp(callSet) |
|
192 |
- is.snp <- which(snp.I) |
|
193 |
- snp.index <- match(featureNames(callSet)[is.snp], gns) |
|
194 |
- stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index])) |
|
195 |
- if(is.lds) open(callSet) |
|
196 |
- mixtureParams <- matrix(NA, 4, length(filenames)) |
|
197 |
- if(is.lds){ |
|
198 |
- open(snprmaRes[["A"]]) |
|
199 |
- open(snprmaRes[["B"]]) |
|
200 |
- open(snprmaRes[["SNR"]]) |
|
201 |
- open(snprmaRes[["mixtureParams"]]) |
|
202 |
- bb <- getOption("ffbatchbytes") |
|
203 |
- ffcolapply(A(callSet)[is.snp, i1:i2] <- snprmaRes[["A"]][snp.index, i1:i2], X=snprmaRes[["A"]], |
|
204 |
- BATCHBYTES=bb) |
|
205 |
- ffcolapply(B(callSet)[is.snp, i1:i2] <- snprmaRes[["B"]][snp.index, i1:i2], X=snprmaRes[["B"]], |
|
206 |
- BATCHBYTES=bb) |
|
207 |
- } else{ |
|
208 |
- A(callSet)[is.snp, ] <- snprmaRes[["A"]][snp.index, ] |
|
209 |
- B(callSet)[is.snp, ] <- snprmaRes[["B"]][snp.index, ] |
|
178 |
+ ## Now we initialize a CNSet object, cloning A and B |
|
179 |
+ message("Cloning A and B intensities") |
|
180 |
+ open(A) |
|
181 |
+ open(B) |
|
182 |
+ call <- clone(A) |
|
183 |
+ callProbability=clone(B) |
|
184 |
+ close(A) |
|
185 |
+ close(B) |
|
186 |
+ cnSet <- new("CNSet", |
|
187 |
+ alleleA=A, |
|
188 |
+ alleleB=B, |
|
189 |
+ call=call, |
|
190 |
+ callProbability=callProbability, |
|
191 |
+ featureData=featureData, |
|
192 |
+ annotation=cdfName, |
|
193 |
+ batch=batch) |
|
194 |
+ if(!missing(sns)){ |
|
195 |
+ sampleNames(cnSet) <- sns |
|
196 |
+ protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE) |
|
197 |
+ } else { |
|
198 |
+ sampleNames(cnSet) <- basename(filenames) |
|
199 |
+ protocolData <- getProtocolData.Affy(filenames) |
|
210 | 200 |
} |
211 |
- pData(callSet)$SKW <- snprmaRes[["SKW"]] |
|
212 |
- pData(callSet)$SNR <- snprmaRes[["SNR"]] |
|
213 |
- mixtureParams <- snprmaRes$mixtureParams |
|
201 |
+ rownames(pData(protocolData)) <- sns |
|
202 |
+ protocolData(cnSet) <- protocolData |
|
203 |
+ pd <- data.frame(matrix(NA, nc, 3), row.names=sns) |
|
204 |
+ colnames(pd)=c("SKW", "SNR", "gender") |
|
205 |
+ phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd) |
|
206 |
+ stopifnot(validObject(cnSet)) |
|
207 |
+ snp.I <- isSnp(cnSet) |
|
208 |
+ ##gns <- snprmaRes[["gns"]] |
|
209 |
+## snp.I <- isSnp(callSet) |
|
210 |
+## is.snp <- which(snp.I) |
|
211 |
+## snp.index <- match(featureNames(callSet)[is.snp], gns) |
|
212 |
+## stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index])) |
|
213 |
+#### is.snp <- isSnp(callSet) |
|
214 |
+#### snp.index <- which(is.snp) |
|
215 |
+## if(is.lds) open(callSet) |
|
216 |
+## mixtureParams <- matrix(NA, 4, length(filenames)) |
|
217 |
+## ##message("Saving snprmaRes file") |
|
218 |
+## ##save(snprmaRes, file=file.path(outdir, "snprmaRes.rda")) |
|
219 |
+## if(verbose) message("Finished preprocessing.") |
|
220 |
+## gns <- snprmaRes[["gns"]] |
|
221 |
+## snp.I <- isSnp(callSet) |
|
222 |
+## is.snp <- which(snp.I) |
|
223 |
+## snp.index <- match(featureNames(callSet)[is.snp], gns) |
|
224 |
+## stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index])) |
|
225 |
+## if(is.lds) open(callSet) |
|
226 |
+## mixtureParams <- matrix(NA, 4, length(filenames)) |
|
227 |
+## if(is.lds){ |
|
228 |
+## open(snprmaRes[["A"]]) |
|
229 |
+## open(snprmaRes[["B"]]) |
|
230 |
+## open(snprmaRes[["SNR"]]) |
|
231 |
+## open(snprmaRes[["mixtureParams"]]) |
|
232 |
+## bb <- getOption("ffbatchbytes") |
|
233 |
+## ffcolapply(A(callSet)[is.snp, i1:i2] <- snprmaRes[["A"]][snp.index, i1:i2], X=snprmaRes[["A"]], |
|
234 |
+## BATCHBYTES=bb) |
|
235 |
+## ffcolapply(B(callSet)[is.snp, i1:i2] <- snprmaRes[["B"]][snp.index, i1:i2], X=snprmaRes[["B"]], |
|
236 |
+## BATCHBYTES=bb) |
|
237 |
+## } else{ |
|
238 |
+## A(callSet)[is.snp, ] <- snprmaRes[["A"]][snp.index, ] |
|
239 |
+## B(callSet)[is.snp, ] <- snprmaRes[["B"]][snp.index, ] |
|
240 |
+## } |
|
241 |
+## pData(callSet)$SKW <- snprmaRes[["SKW"]] |
|
242 |
+## pData(callSet)$SNR <- snprmaRes[["SNR"]] |
|
243 |
+ pData(cnSet)$SKW <- SKW |
|
244 |
+ pData(cnSet)$SNR <- SNR |
|
245 |
+## mixtureParams <- snprmaRes$mixtureParams |
|
214 | 246 |
np.index <- which(!snp.I) |
215 | 247 |
if(verbose) message("Normalizing nonpolymorphic markers") |
216 | 248 |
FUN <- ifelse(is.lds, "cnrma2", "cnrma") |
... | ... |
@@ -222,38 +254,59 @@ genotype <- function(filenames, |
222 | 254 |
} |
223 | 255 |
## consider passing only A for NPs. |
224 | 256 |
if(verbose) message("Quantile normalizing nonpolymorphic markers") |
225 |
- AA <- cnrmaFxn(FUN, A=A(callSet), |
|
226 |
- filenames=filenames, |
|
227 |
- row.names=featureNames(callSet)[np.index], |
|
228 |
- cdfName=cdfName, |
|
229 |
- sns=sns, |
|
230 |
- seed=seed, |
|
231 |
- verbose=verbose) |
|
232 |
- if(!is.lds) A(callSet) <- AA |
|
233 |
- rm(AA) |
|
234 |
- FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT") |
|
235 |
- ## genotyping |
|
236 |
- crlmmGTfxn <- function(FUN,...){ |
|
237 |
- switch(FUN, |
|
238 |
- crlmmGT2=crlmmGT2(...), |
|
239 |
- crlmmGT=crlmmGT(...)) |
|
240 |
- } |
|
241 |
- if(verbose) message("Running crlmmGT2") |
|
242 |
- tmp <- crlmmGTfxn(FUN, |
|
243 |
- A=snprmaRes[["A"]], |
|
244 |
- B=snprmaRes[["B"]], |
|
245 |
- SNR=snprmaRes[["SNR"]], |
|
246 |
- mixtureParams=snprmaRes[["mixtureParams"]], |
|
247 |
- cdfName=cdfName, |
|
248 |
- row.names=NULL, |
|
249 |
- col.names=sampleNames(callSet), |
|
250 |
- SNRMin=SNRMin, |
|
251 |
- recallMin=recallMin, |
|
252 |
- recallRegMin=recallRegMin, |
|
253 |
- gender=gender, |
|
254 |
- verbose=verbose, |
|
255 |
- returnParams=returnParams, |
|
256 |
- badSNP=badSNP) |
|
257 |
+## AA <- cnrmaFxn(FUN, A=A(cnSet), |
|
258 |
+## filenames=filenames, |
|
259 |
+## row.names=featureNames(cnSet)[np.index], |
|
260 |
+## cdfName=cdfName, |
|
261 |
+## sns=sns, |
|
262 |
+## seed=seed, |
|
263 |
+## verbose=verbose) |
|
264 |
+ cnrma2(A=A(cnSet), |
|
265 |
+ filenames=filenames, |
|
266 |
+ row.names=featureNames(cnSet)[np.index], |
|
267 |
+ cdfName=cdfName, |
|
268 |
+ sns=sns, |
|
269 |
+ seed=seed, |
|
270 |
+ verbose=verbose) |
|
271 |
+ ##if(!is.lds) A(callSet) <- AA |
|
272 |
+ ##rm(AA) |
|
273 |
+## FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT") |
|
274 |
+## ## genotyping |
|
275 |
+## crlmmGTfxn <- function(FUN,...){ |
|
276 |
+## switch(FUN, |
|
277 |
+## crlmmGT2=crlmmGT2(...), |
|
278 |
+## crlmmGT=crlmmGT(...)) |
|
279 |
+## } |
|
280 |
+## if(verbose) message("Running crlmmGT2") |
|
281 |
+## tmp <- crlmmGTfxn(FUN, |
|
282 |
+## A=snprmaRes[["A"]], |
|
283 |
+## B=snprmaRes[["B"]], |
|
284 |
+## SNR=snprmaRes[["SNR"]], |
|
285 |
+## mixtureParams=snprmaRes[["mixtureParams"]], |
|
286 |
+## cdfName=cdfName, |
|
287 |
+## row.names=NULL, |
|
288 |
+## col.names=sampleNames(callSet), |
|
289 |
+## SNRMin=SNRMin, |
|
290 |
+## recallMin=recallMin, |
|
291 |
+## recallRegMin=recallRegMin, |
|
292 |
+## gender=gender, |
|
293 |
+## verbose=verbose, |
|
294 |
+## returnParams=returnParams, |
|
295 |
+## badSNP=badSNP) |
|
296 |
+ rscrlmmGT2(A=calls(cnSet), |
|
297 |
+ B=callsProbability(cnSet), |
|
298 |
+ SNR=SNR, |
|
299 |
+ mixtureParams=mixtureParams, |
|
300 |
+ cdfName=cdfName, |
|
301 |
+ row.names=NULL, |
|
302 |
+ col.names=sampleNames(callSet), |
|
303 |
+ SNRMin=SNRMin, |
|
304 |
+ recallMin=recallMin, |
|
305 |
+ recallRegMin=recallRegMin, |
|
306 |
+ gender=gender, |
|
307 |
+ verbose=verbose, |
|
308 |
+ returnParams=returnParams, |
|
309 |
+ badSNP=badSNP) |
|
257 | 310 |
if(verbose) message("Genotyping finished. Updating container with genotype calls and confidence scores.") |
258 | 311 |
if(is.lds){ |
259 | 312 |
open(tmp[["calls"]]) |
... | ... |
@@ -622,7 +622,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
622 | 622 |
## (2 reads / 2 writes) |
623 | 623 |
## 4. results are directly usable -- no need to do anything |
624 | 624 |
## 4 reads / 2 writes OR ( 4 reads/ 4writes if clone requires reading) |
625 |
-mycrlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
|
625 |
+rscrlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
|
626 | 626 |
col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6, |
627 | 627 |
SNRMin=5, recallMin=10, recallRegMin=1000, |
628 | 628 |
gender=NULL, desctrucitve=FALSE, verbose=TRUE, |