(trying to figure out segfault)
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@53849 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1110,9 +1110,9 @@ construct.Illumina <- function(sampleSheet=NULL, |
1110 | 1110 |
protocolData(cnSet) = protocolData |
1111 | 1111 |
featureData(cnSet) = featureData |
1112 | 1112 |
featureNames(cnSet) = featureNames(featureData) |
1113 |
- pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) |
|
1114 |
- colnames(pd)=c("SKW", "SNR", "gender") |
|
1115 |
- phenoData(cnSet) = new("AnnotatedDataFrame", data=pd) |
|
1113 |
+ ##pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) |
|
1114 |
+ ##colnames(pd)=c("SKW", "SNR", "gender") |
|
1115 |
+ ##phenoData(cnSet) = new("AnnotatedDataFrame", data=pd) |
|
1116 | 1116 |
return(cnSet) |
1117 | 1117 |
} |
1118 | 1118 |
|
... | ... |
@@ -1151,18 +1151,18 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1151 | 1151 |
if(missing(cdfName)) stop("must specify cdfName") |
1152 | 1152 |
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
1153 | 1153 |
pkgname = getCrlmmAnnotationName(cdfName) |
1154 |
-# if(missing(outdir)) |
|
1155 |
-# stop("Must specify a directory to store large data objects") |
|
1156 |
- callSet <- .GlobalEnv[["callSet"]] |
|
1157 |
- if(is.null(callSet)){ |
|
1154 |
+## if(missing(outdir)) |
|
1155 |
+## stop("Must specify a directory to store large data objects") |
|
1156 |
+ ##callSet <- .GlobalEnv[["callSet"]] |
|
1157 |
+ ##if(is.null(callSet)){ |
|
1158 | 1158 |
callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames, |
1159 | 1159 |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
1160 | 1160 |
highDensity=highDensity, sep=sep, fileExt=fileExt, |
1161 | 1161 |
cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, # fns=fns, |
1162 | 1162 |
saveDate=saveDate) #, outdir=outdir) |
1163 | 1163 |
sampleNames(callSet) <- basename(sampleNames(callSet)) |
1164 |
- save(callSet, file=file.path(ldPath(), "callSet.rda")) |
|
1165 |
- } else message("Using callSet loaded from GlobalEnv") |
|
1164 |
+ ##save(callSet, file=file.path(ldPath(), "callSet.rda")) |
|
1165 |
+ ##} else message("Using callSet loaded from GlobalEnv") |
|
1166 | 1166 |
if(missing(sns)) sns = basename(sampleNames(callSet)) |
1167 | 1167 |
if(is.lds) { |
1168 | 1168 |
open(A(callSet)) |
... | ... |
@@ -1173,10 +1173,11 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1173 | 1173 |
snp.index = which(is.snp) |
1174 | 1174 |
narrays = ncol(callSet) |
1175 | 1175 |
if(is.lds) { |
1176 |
- index <- .GlobalEnv[["index"]] |
|
1177 |
- if(is.null(index)){ |
|
1178 |
- sampleBatches = splitIndicesByLength(seq(along=sampleNames(callSet)), ocSamples()) |
|
1179 |
- } else sampleBatches <- splitIndicesByLength(index, ocSamples()) |
|
1176 |
+## index <- .GlobalEnv[["index"]] |
|
1177 |
+## if(is.null(index)){ |
|
1178 |
+## sampleBatches = splitIndicesByLength(seq(along=sampleNames(callSet)), ocSamples()) |
|
1179 |
+## } else sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples()) |
|
1180 |
+ sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples()) |
|
1180 | 1181 |
mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double") |
1181 | 1182 |
SNR = initializeBigVector("crlmmSNR-", narrays, "double") |
1182 | 1183 |
SKW = initializeBigVector("crlmmSKW-", narrays, "double") |
... | ... |
@@ -1190,6 +1191,7 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1190 | 1191 |
open(SNR) |
1191 | 1192 |
pData(callSet)$SKW = SKW |
1192 | 1193 |
pData(callSet)$SNR = SNR |
1194 |
+ save(callSet, file=file.path(ldPath(), "callSet.rda")) |
|
1193 | 1195 |
close(SNR) |
1194 | 1196 |
close(SKW) |
1195 | 1197 |
} else { |
... | ... |
@@ -1228,8 +1230,12 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1228 | 1230 |
open(B(callSet)) |
1229 | 1231 |
tmpA = initializeBigMatrix(name="tmpA", length(snp.index), narrays) |
1230 | 1232 |
tmpB = initializeBigMatrix(name="tmpB", length(snp.index), narrays) |
1231 |
- ffcolapply(tmpA[,i1:i2] <- A(callSet)[snp.index,i1:i2], X=A(callSet)) |
|
1232 |
- ffcolapply(tmpB[,i1:i2] <- B(callSet)[snp.index,i1:i2], X=B(callSet)) |
|
1233 |
+ ##ffcolapply(tmpA[,i1:i2] <- A(callSet)[snp.index,i1:i2], X=A(callSet)) |
|
1234 |
+ ##ffcolapply(tmpB[,i1:i2] <- B(callSet)[snp.index,i1:i2], X=B(callSet)) |
|
1235 |
+ for(j in seq(length=ncol(callSet)){ |
|
1236 |
+ tmpA[, j] <- as.integer(A(callSet)[snp.index, j]) |
|
1237 |
+ tmpB[, j] <- as.integer(B(callSet)[snp.index, j]) |
|
1238 |
+ } |
|
1233 | 1239 |
close(A(callSet)) |
1234 | 1240 |
close(B(callSet)) |
1235 | 1241 |
close(tmpA) |
... | ... |
@@ -1255,7 +1261,10 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1255 | 1261 |
## badSNP)) |
1256 | 1262 |
save(tmpA, file=file.path(ldPath(), "tmpA.rda")) |
1257 | 1263 |
save(tmpB, file=file.path(ldPath(), "tmpB.rda")) |
1264 |
+ save(SNR, file=file.path(ldPath(), "SNR.rda")) |
|
1265 |
+ save(SKW, file=file.path(ldPath(), "SKW.rda")) |
|
1258 | 1266 |
save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda")) |
1267 |
+## return(NULL) |
|
1259 | 1268 |
tmp <- crlmmGTfxn(FUN, |
1260 | 1269 |
A=tmpA, |
1261 | 1270 |
B=tmpB, |
... | ... |
@@ -1282,12 +1291,15 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1282 | 1291 |
open(snpCallProbability(callSet)) |
1283 | 1292 |
ffcolapply(snpCall(callSet)[snp.index,i1:i2] <- tmp[["calls"]][,i1:i2], X=tmp[["calls"]]) #, BATCHBYTES=bb) |
1284 | 1293 |
ffcolapply(snpCallProbability(callSet)[snp.index,i1:i2] <- tmp[["confs"]][,i1:i2], X=tmp[["confs"]]) #, BATCHBYTES=bb) |
1285 |
-# close(tmp[["calls"]]) |
|
1286 |
-# close(tmp[["confs"]]) |
|
1287 |
- open(tmpA); open(tmpB) |
|
1294 |
+ close(tmp[["calls"]]) |
|
1295 |
+ close(tmp[["confs"]]) |
|
1296 |
+ close(tmpA); close(tmpB) |
|
1288 | 1297 |
delete(tmpA); delete(tmpB); |
1289 |
- delete(tmp[["calls"]]); delete(tmp[["confs"]]) |
|
1290 |
- rm(tmpA, tmpB) |
|
1298 |
+ ## This line is not needed as tmp[["calls"]] |
|
1299 |
+ ## is stored in tmpA and tmp[["confs"]] is stored in tmpB |
|
1300 |
+ ## So deleting tmpA and tmpB in the above command removes |
|
1301 |
+ ## the ff objects on disk |
|
1302 |
+ ##delete(tmp[["calls"]]); delete(tmp[["confs"]]) |
|
1291 | 1303 |
} else { |
1292 | 1304 |
calls(callSet)[snp.index, ] = tmp[["calls"]] |
1293 | 1305 |
snpCallProbability(callSet)[snp.index, ] = tmp[["confs"]] |
... | ... |
@@ -1302,25 +1314,25 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1302 | 1314 |
|
1303 | 1315 |
|
1304 | 1316 |
|
1305 |
-processIDAT = function(sel, sampleSheet=NULL, |
|
1317 |
+processIDAT <- function(sel, sampleSheet=NULL, |
|
1306 | 1318 |
arrayNames=NULL, |
1307 |
- ids=NULL, |
|
1308 |
- path=".", |
|
1309 |
- arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
1310 |
- highDensity=FALSE, |
|
1311 |
- sep="_", |
|
1312 |
- fileExt=list(green="Grn.idat", red="Red.idat"), |
|
1313 |
- saveDate=FALSE, |
|
1314 |
- verbose=TRUE, |
|
1319 |
+ ids=NULL, |
|
1320 |
+ path=".", |
|
1321 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
1322 |
+ highDensity=FALSE, |
|
1323 |
+ sep="_", |
|
1324 |
+ fileExt=list(green="Grn.idat", red="Red.idat"), |
|
1325 |
+ saveDate=FALSE, |
|
1326 |
+ verbose=TRUE, |
|
1315 | 1327 |
mixtureSampleSize=10^5, |
1316 |
- fitMixture=TRUE, |
|
1317 |
- eps=0.1, |
|
1318 |
- seed=1, |
|
1319 |
- cdfName, |
|
1320 |
- sns, |
|
1321 |
- stripNorm=TRUE, |
|
1322 |
- useTarget=TRUE, |
|
1323 |
- A, B, SKW, SNR, mixtureParams, is.snp) { #, outdir=".") { |
|
1328 |
+ fitMixture=TRUE, |
|
1329 |
+ eps=0.1, |
|
1330 |
+ seed=1, |
|
1331 |
+ cdfName, |
|
1332 |
+ sns, |
|
1333 |
+ stripNorm=TRUE, |
|
1334 |
+ useTarget=TRUE, |
|
1335 |
+ A, B, SKW, SNR, mixtureParams, is.snp) { #, outdir=".") { |
|
1324 | 1336 |
|
1325 | 1337 |
if(length(path)>= length(sel)) path = path[sel] |
1326 | 1338 |
message("RS:... processIDAT: calling readIdatFiles") |
... | ... |
@@ -1353,24 +1365,41 @@ processIDAT = function(sel, sampleSheet=NULL, |
1353 | 1365 |
# open(res[["mixtureParams"]]) |
1354 | 1366 |
# remove this line: bb = ocProbesets()*length(sns)*8 |
1355 | 1367 |
# Add these |
1368 |
+ ##segfault came after 'Finished preprocessing', but the processIDAT loop was not yet complete |
|
1356 | 1369 |
open(A); open(B); |
1357 |
- ffcolapply(A[snp.index,sel][,i1:i2] <- res[["A"]][,i1:i2], X=res[["A"]]) |
|
1358 |
- ffcolapply(B[snp.index,sel][,i1:i2] <- res[["B"]][,i1:i2], X=res[["B"]]) |
|
1370 |
+ Amatrix <- res[["A"]] |
|
1371 |
+ Bmatrix <- res[["B"]] |
|
1372 |
+ for(j in seq_along(sel)){ |
|
1373 |
+ jj <- sel[j] |
|
1374 |
+ A[snp.index, jj] <- Amatrix[, j] |
|
1375 |
+ B[snp.index, jj] <- Bmatrix[, j] |
|
1376 |
+ } |
|
1377 |
+ ##ffcolapply(A[snp.index,sel][,i1:i2] <- res[["A"]][,i1:i2], X=res[["A"]]) |
|
1378 |
+ ##ffcolapply(B[snp.index,sel][,i1:i2] <- res[["B"]][,i1:i2], X=res[["B"]]) |
|
1379 |
+## } |
|
1359 | 1380 |
# ffrowapply(A[snp.index,][i1:i2, sel] <- res[["A"]][i1:i2, ], X=res[["A"]], BATCHBYTES=bb) |
1360 | 1381 |
# ffrowapply(B[snp.index,][i1:i2, sel] <- res[["B"]][i1:i2, ], X=res[["B"]], BATCHBYTES=bb) |
1361 | 1382 |
if(length(np.index)>0) { |
1362 | 1383 |
# for (j in 1:length(sel)) { |
1363 |
- ffcolapply(A[np.index,sel][,i1:i2] <- res[["cnAB"]]$A[,i1:i2], X=res[["cnAB"]]$A) |
|
1364 |
- ffcolapply(B[np.index,sel][,i1:i2] <- res[["cnAB"]]$B[,i1:i2], X=res[["cnAB"]]$B) |
|
1384 |
+ cnAmatrix <- res[["cnAB"]]$A |
|
1385 |
+ cnBmatrix <- res[["cnAB"]]$B |
|
1386 |
+ for(j in seq_along(sel)){ |
|
1387 |
+ jj <- sel[j] |
|
1388 |
+ A[np.index, jj] <- cnAmatrix[, j] |
|
1389 |
+ B[np.index, jj] <- cnBmatrix[, j] |
|
1390 |
+ } |
|
1391 |
+ |
|
1392 |
+## ffcolapply(A[np.index,sel][,i1:i2] <- res[["cnAB"]]$A[,i1:i2], X=res[["cnAB"]]$A) |
|
1393 |
+## ffcolapply(B[np.index,sel][,i1:i2] <- res[["cnAB"]]$B[,i1:i2], X=res[["cnAB"]]$B) |
|
1365 | 1394 |
# A[np.index, sel[j]] = res[["cnAB"]]$A[,j] |
1366 | 1395 |
# B[np.index, sel[j]] = res[["cnAB"]]$B[,j] |
1367 | 1396 |
# } |
1368 | 1397 |
} |
1369 | 1398 |
# delete(res[["A"]]); delete(res[["B"]]) |
1370 | 1399 |
open(SKW); open(SNR); open(mixtureParams) |
1371 |
- SKW[sel] = res[["SKW"]][] |
|
1372 |
- SNR[sel] = res[["SNR"]][] |
|
1373 |
- mixtureParams[,sel] = res[["mixtureParams"]][] |
|
1400 |
+ SKW[sel] = res[["SKW"]][] |
|
1401 |
+ SNR[sel] = res[["SNR"]][] |
|
1402 |
+ mixtureParams[, sel] = res[["mixtureParams"]][] |
|
1374 | 1403 |
close(A) |
1375 | 1404 |
close(B) |
1376 | 1405 |
close(SNR) |
... | ... |
@@ -1378,6 +1407,6 @@ processIDAT = function(sel, sampleSheet=NULL, |
1378 | 1407 |
close(mixtureParams) |
1379 | 1408 |
# delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]]) |
1380 | 1409 |
rm(res) |
1381 |
- gc() |
|
1410 |
+ gc() |
|
1382 | 1411 |
TRUE |
1383 | 1412 |
} |