Overall, the changes should save one read step, reducing I/O for big
datasets.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@54166 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1160,6 +1160,8 @@ preprocessInf <- function(cnSet, |
1160 | 1160 |
useTarget=useTarget, |
1161 | 1161 |
A=A(cnSet), |
1162 | 1162 |
B=B(cnSet), |
1163 |
+ GT=calls(cnSet), |
|
1164 |
+ GTP=snpCallProbability(cnSet), |
|
1163 | 1165 |
SKW=cnSet$SKW, |
1164 | 1166 |
SNR=cnSet$SNR, |
1165 | 1167 |
mixtureParams=mixtureParams, |
... | ... |
@@ -1178,22 +1180,22 @@ genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3), |
1178 | 1180 |
badSNP=0.7, |
1179 | 1181 |
gender=NULL, |
1180 | 1182 |
DF=6){ |
1181 |
- is.snp = isSnp(cnSet) |
|
1182 |
- snp.index = which(is.snp) |
|
1183 |
- narrays = ncol(cnSet) |
|
1184 |
- open(A(cnSet)) |
|
1185 |
- open(B(cnSet)) |
|
1186 |
- open(snpCall(cnSet)) |
|
1187 |
- open(snpCallProbability(cnSet)) |
|
1188 |
- ## crlmmGT2 overwrites the normalized intensities with calls and confidenceScores |
|
1189 |
- message("Writing to call and callProbability slots") |
|
1190 |
- for(j in 1:ncol(cnSet)){ |
|
1191 |
- snpCall(cnSet)[snp.index, j] <- as.integer(A(cnSet)[snp.index, j]) |
|
1192 |
- snpCallProbability(cnSet)[snp.index, j] <- as.integer(B(cnSet)[snp.index, j]) |
|
1193 |
- } |
|
1194 |
- message("Writing complete. Begin genotyping...") |
|
1195 |
- close(A(cnSet)) |
|
1196 |
- close(B(cnSet)) |
|
1183 |
+## is.snp = isSnp(cnSet) |
|
1184 |
+## snp.index = which(is.snp) |
|
1185 |
+## narrays = ncol(cnSet) |
|
1186 |
+## open(A(cnSet)) |
|
1187 |
+## open(B(cnSet)) |
|
1188 |
+## open(snpCall(cnSet)) |
|
1189 |
+## open(snpCallProbability(cnSet)) |
|
1190 |
+## ## crlmmGT2 overwrites the normalized intensities with calls and confidenceScores |
|
1191 |
+## message("Writing to call and callProbability slots") |
|
1192 |
+## for(j in 1:ncol(cnSet)){ |
|
1193 |
+## snpCall(cnSet)[snp.index, j] <- as.integer(A(cnSet)[snp.index, j]) |
|
1194 |
+## snpCallProbability(cnSet)[snp.index, j] <- as.integer(B(cnSet)[snp.index, j]) |
|
1195 |
+## } |
|
1196 |
+## message("Writing complete. Begin genotyping...") |
|
1197 |
+## close(A(cnSet)) |
|
1198 |
+## close(B(cnSet)) |
|
1197 | 1199 |
tmp <- rscrlmmGT2(A=snpCall(cnSet), |
1198 | 1200 |
B=snpCallProbability(cnSet), |
1199 | 1201 |
SNR=cnSet$SNR, |
... | ... |
@@ -1316,81 +1318,64 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL, |
1316 | 1318 |
sns, |
1317 | 1319 |
stripNorm=TRUE, |
1318 | 1320 |
useTarget=TRUE, |
1319 |
- A, B, SKW, SNR, mixtureParams, is.snp) { #, outdir=".") { |
|
1321 |
+ A, B, |
|
1322 |
+ GT, |
|
1323 |
+ GTP, |
|
1324 |
+ SKW, SNR, mixtureParams, is.snp) { #, outdir=".") { |
|
1320 | 1325 |
message("Processing sample stratum ", stratum, " of ", length(sampleBatches)) |
1321 | 1326 |
sel <- sampleBatches[[stratum]] |
1322 | 1327 |
if(length(path)>= length(sel)) path = path[sel] |
1323 |
- ##message("RS:... processIDAT: calling readIdatFiles") |
|
1324 | 1328 |
RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel], |
1325 | 1329 |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
1326 | 1330 |
highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose) |
1327 | 1331 |
XY = RGtoXY(RG, chipType=cdfName) |
1328 |
-# open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero) |
|
1329 |
-# delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero); |
|
1330 | 1332 |
rm(RG) |
1331 | 1333 |
gc() |
1332 | 1334 |
if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY) |
1333 |
- |
|
1334 |
- ##message("RS:... processIDAT: calling preprocessInfinium2") |
|
1335 | 1335 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
1336 | 1336 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir) |
1337 |
-# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
1338 |
-# open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero) |
|
1339 |
-# delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero); |
|
1340 | 1337 |
rm(XY) |
1341 | 1338 |
gc() |
1342 |
- if(verbose) message("Finished preprocessing.") |
|
1339 |
+ if(verbose) message("Finished preprocessing.") |
|
1343 | 1340 |
snp.index = which(is.snp) |
1344 | 1341 |
np.index = which(!is.snp) |
1345 |
- |
|
1346 |
-# open(res[["A"]]) |
|
1347 |
-# open(res[["B"]]) |
|
1348 |
-# open(res[["SKW"]]) |
|
1349 |
-# open(res[["SNR"]]) |
|
1350 |
-# open(res[["mixtureParams"]]) |
|
1351 |
-# remove this line: bb = ocProbesets()*length(sns)*8 |
|
1352 |
-# Add these |
|
1353 |
- ##segfault came after 'Finished preprocessing', but the processIDAT loop was not yet complete |
|
1354 | 1342 |
open(A); open(B); |
1343 |
+ open(GT); open(GTP) |
|
1355 | 1344 |
Amatrix <- res[["A"]] |
1356 | 1345 |
Bmatrix <- res[["B"]] |
1346 |
+ |
|
1347 |
+ ## Amatrix and Bmatrix are ordinary matrices--not ff objects. |
|
1348 |
+ ## By writing columns of a ordinary matrix to GT and GTP, we |
|
1349 |
+ ## save one read step later on. GT and GTP will be updated to |
|
1350 |
+ ## calls and call probabilities by the crlmmGT2 function. The A |
|
1351 |
+ ## and B slots will retain the normalized intensities for the |
|
1352 |
+ ## A and B alleles |
|
1357 | 1353 |
for(j in seq_along(sel)){ |
1358 | 1354 |
jj <- sel[j] |
1359 | 1355 |
A[snp.index, jj] <- Amatrix[, j] |
1356 |
+ GT[snp.index, jj] <- Amatrix[, j] |
|
1360 | 1357 |
B[snp.index, jj] <- Bmatrix[, j] |
1358 |
+ GTP[snp.index, jj] <- Bmatrix[, j] |
|
1361 | 1359 |
} |
1362 |
- ##ffcolapply(A[snp.index,sel][,i1:i2] <- res[["A"]][,i1:i2], X=res[["A"]]) |
|
1363 |
- ##ffcolapply(B[snp.index,sel][,i1:i2] <- res[["B"]][,i1:i2], X=res[["B"]]) |
|
1364 |
-## } |
|
1365 |
-# ffrowapply(A[snp.index,][i1:i2, sel] <- res[["A"]][i1:i2, ], X=res[["A"]], BATCHBYTES=bb) |
|
1366 |
-# ffrowapply(B[snp.index,][i1:i2, sel] <- res[["B"]][i1:i2, ], X=res[["B"]], BATCHBYTES=bb) |
|
1367 |
- if(length(np.index)>0) { |
|
1368 |
-# for (j in 1:length(sel)) { |
|
1369 |
- cnAmatrix <- res[["cnAB"]]$A |
|
1370 |
- cnBmatrix <- res[["cnAB"]]$B |
|
1371 |
- for(j in seq_along(sel)){ |
|
1372 |
- jj <- sel[j] |
|
1373 |
- A[np.index, jj] <- cnAmatrix[, j] |
|
1374 |
- B[np.index, jj] <- cnBmatrix[, j] |
|
1375 |
- } |
|
1376 |
- |
|
1377 |
-## ffcolapply(A[np.index,sel][,i1:i2] <- res[["cnAB"]]$A[,i1:i2], X=res[["cnAB"]]$A) |
|
1378 |
-## ffcolapply(B[np.index,sel][,i1:i2] <- res[["cnAB"]]$B[,i1:i2], X=res[["cnAB"]]$B) |
|
1379 |
-# A[np.index, sel[j]] = res[["cnAB"]]$A[,j] |
|
1380 |
-# B[np.index, sel[j]] = res[["cnAB"]]$B[,j] |
|
1381 |
-# } |
|
1360 |
+ if(length(np.index)>0) { |
|
1361 |
+ cnAmatrix <- res[["cnAB"]]$A |
|
1362 |
+ cnBmatrix <- res[["cnAB"]]$B |
|
1363 |
+ for(j in seq_along(sel)){ |
|
1364 |
+ jj <- sel[j] |
|
1365 |
+ A[np.index, jj] <- cnAmatrix[, j] |
|
1366 |
+ GT[np.index, jj] <- cnAmatrix[, j] |
|
1367 |
+ B[np.index, jj] <- cnBmatrix[, j] |
|
1368 |
+ GTP[np.index, jj] <- cnBmatrix[, j] |
|
1369 |
+ } |
|
1382 | 1370 |
} |
1383 |
-# delete(res[["A"]]); delete(res[["B"]]) |
|
1384 | 1371 |
open(SKW); open(SNR); open(mixtureParams) |
1385 | 1372 |
SKW[sel] = res[["SKW"]][] |
1386 | 1373 |
SNR[sel] = res[["SNR"]][] |
1387 | 1374 |
mixtureParams[, sel] = res[["mixtureParams"]][] |
1388 |
- close(A) |
|
1389 |
- close(B) |
|
1390 |
- close(SNR) |
|
1391 |
- close(SKW) |
|
1375 |
+ close(A); close(B) |
|
1376 |
+ close(GT); close(GTP) |
|
1377 |
+ close(SNR); close(SKW) |
|
1392 | 1378 |
close(mixtureParams) |
1393 |
-# delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]]) |
|
1394 | 1379 |
rm(res) |
1395 | 1380 |
gc() |
1396 | 1381 |
TRUE |