Browse code

took out calls to ffcolapply in genotype.Illumina and processIDAT

(trying to figure out segfault)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@53849 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 18/03/2011 01:57:11
Showing1 changed files

... ...
@@ -1110,9 +1110,9 @@ construct.Illumina <- function(sampleSheet=NULL,
1110 1110
 	protocolData(cnSet) = protocolData
1111 1111
 	featureData(cnSet) = featureData
1112 1112
 	featureNames(cnSet) = featureNames(featureData)
1113
-	pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet))
1114
-	colnames(pd)=c("SKW", "SNR", "gender")
1115
-	phenoData(cnSet) = new("AnnotatedDataFrame", data=pd)
1113
+	##pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet))
1114
+	##colnames(pd)=c("SKW", "SNR", "gender")
1115
+	##phenoData(cnSet) = new("AnnotatedDataFrame", data=pd)
1116 1116
 	return(cnSet)
1117 1117
 }
1118 1118
 
... ...
@@ -1151,18 +1151,18 @@ genotype.Illumina <- function(sampleSheet=NULL,
1151 1151
 	if(missing(cdfName)) stop("must specify cdfName")
1152 1152
 	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
1153 1153
         pkgname = getCrlmmAnnotationName(cdfName)
1154
-#        if(missing(outdir))
1155
-#          stop("Must specify a directory to store large data objects")
1156
-	callSet <- .GlobalEnv[["callSet"]]
1157
-	if(is.null(callSet)){
1154
+##        if(missing(outdir))
1155
+##		stop("Must specify a directory to store large data objects")
1156
+	##callSet <- .GlobalEnv[["callSet"]]
1157
+	##if(is.null(callSet)){
1158 1158
 		callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames,
1159 1159
 					      ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1160 1160
 					      highDensity=highDensity, sep=sep, fileExt=fileExt,
1161 1161
 					      cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, # fns=fns,
1162 1162
 					      saveDate=saveDate) #, outdir=outdir)
1163 1163
 		sampleNames(callSet) <- basename(sampleNames(callSet))
1164
-		save(callSet, file=file.path(ldPath(), "callSet.rda"))
1165
-	} else message("Using callSet loaded from GlobalEnv")
1164
+	##save(callSet, file=file.path(ldPath(), "callSet.rda"))
1165
+	##} else message("Using callSet loaded from GlobalEnv")
1166 1166
 	if(missing(sns)) sns = basename(sampleNames(callSet))
1167 1167
 	if(is.lds) {
1168 1168
 		open(A(callSet))
... ...
@@ -1173,10 +1173,11 @@ genotype.Illumina <- function(sampleSheet=NULL,
1173 1173
 	snp.index = which(is.snp)
1174 1174
 	narrays = ncol(callSet)
1175 1175
 	if(is.lds) {
1176
-		index <- .GlobalEnv[["index"]]
1177
-		if(is.null(index)){
1178
-			sampleBatches = splitIndicesByLength(seq(along=sampleNames(callSet)), ocSamples())
1179
-		} else sampleBatches <- splitIndicesByLength(index, ocSamples())
1176
+##		index <- .GlobalEnv[["index"]]
1177
+##		if(is.null(index)){
1178
+##			sampleBatches = splitIndicesByLength(seq(along=sampleNames(callSet)), ocSamples())
1179
+##		} else sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples())
1180
+		sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples())
1180 1181
 		mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1181 1182
 		SNR = initializeBigVector("crlmmSNR-", narrays, "double")
1182 1183
 		SKW = initializeBigVector("crlmmSKW-", narrays, "double")
... ...
@@ -1190,6 +1191,7 @@ genotype.Illumina <- function(sampleSheet=NULL,
1190 1191
 		open(SNR)
1191 1192
 		pData(callSet)$SKW = SKW
1192 1193
 		pData(callSet)$SNR = SNR
1194
+		save(callSet, file=file.path(ldPath(), "callSet.rda"))
1193 1195
 		close(SNR)
1194 1196
 		close(SKW)
1195 1197
 	} else {
... ...
@@ -1228,8 +1230,12 @@ genotype.Illumina <- function(sampleSheet=NULL,
1228 1230
 		open(B(callSet))
1229 1231
 		tmpA = initializeBigMatrix(name="tmpA", length(snp.index), narrays)
1230 1232
 		tmpB = initializeBigMatrix(name="tmpB", length(snp.index), narrays)
1231
-		ffcolapply(tmpA[,i1:i2] <- A(callSet)[snp.index,i1:i2], X=A(callSet))
1232
-		ffcolapply(tmpB[,i1:i2] <- B(callSet)[snp.index,i1:i2], X=B(callSet))
1233
+		##ffcolapply(tmpA[,i1:i2] <- A(callSet)[snp.index,i1:i2], X=A(callSet))
1234
+		##ffcolapply(tmpB[,i1:i2] <- B(callSet)[snp.index,i1:i2], X=B(callSet))
1235
+		for(j in seq(length=ncol(callSet)){
1236
+			tmpA[, j] <- as.integer(A(callSet)[snp.index, j])
1237
+			tmpB[, j] <- as.integer(B(callSet)[snp.index, j])
1238
+		}
1233 1239
 		close(A(callSet))
1234 1240
 		close(B(callSet))
1235 1241
 		close(tmpA)
... ...
@@ -1255,7 +1261,10 @@ genotype.Illumina <- function(sampleSheet=NULL,
1255 1261
 ##		    badSNP))
1256 1262
 	save(tmpA, file=file.path(ldPath(), "tmpA.rda"))
1257 1263
 	save(tmpB, file=file.path(ldPath(), "tmpB.rda"))
1264
+	save(SNR, file=file.path(ldPath(), "SNR.rda"))
1265
+	save(SKW, file=file.path(ldPath(), "SKW.rda"))
1258 1266
 	save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda"))
1267
+##	return(NULL)
1259 1268
 	tmp <- crlmmGTfxn(FUN,
1260 1269
 			  A=tmpA,
1261 1270
 			  B=tmpB,
... ...
@@ -1282,12 +1291,15 @@ genotype.Illumina <- function(sampleSheet=NULL,
1282 1291
 		open(snpCallProbability(callSet))
1283 1292
 		ffcolapply(snpCall(callSet)[snp.index,i1:i2] <- tmp[["calls"]][,i1:i2], X=tmp[["calls"]]) #, BATCHBYTES=bb)
1284 1293
 		ffcolapply(snpCallProbability(callSet)[snp.index,i1:i2] <- tmp[["confs"]][,i1:i2], X=tmp[["confs"]]) #, BATCHBYTES=bb)
1285
-#		close(tmp[["calls"]])
1286
-#		close(tmp[["confs"]])
1287
-		open(tmpA); open(tmpB)
1294
+		close(tmp[["calls"]])
1295
+		close(tmp[["confs"]])
1296
+		close(tmpA); close(tmpB)
1288 1297
 		delete(tmpA); delete(tmpB);
1289
-		delete(tmp[["calls"]]); delete(tmp[["confs"]])
1290
-		rm(tmpA, tmpB)
1298
+		## This line is not needed as tmp[["calls"]]
1299
+		## is stored in tmpA and tmp[["confs"]] is stored in tmpB
1300
+		## So deleting tmpA and tmpB in the above command removes
1301
+		## the ff objects on disk
1302
+		##delete(tmp[["calls"]]); delete(tmp[["confs"]])
1291 1303
 	} else {
1292 1304
 		calls(callSet)[snp.index, ] = tmp[["calls"]]
1293 1305
 		snpCallProbability(callSet)[snp.index, ] = tmp[["confs"]]
... ...
@@ -1302,25 +1314,25 @@ genotype.Illumina <- function(sampleSheet=NULL,
1302 1314
 
1303 1315
 
1304 1316
 
1305
-processIDAT = function(sel, sampleSheet=NULL,
1317
+processIDAT <- function(sel, sampleSheet=NULL,
1306 1318
 			arrayNames=NULL,
1307
-			  ids=NULL,
1308
-			  path=".",
1309
-			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
1310
-			  highDensity=FALSE,
1311
-			  sep="_",
1312
-			  fileExt=list(green="Grn.idat", red="Red.idat"),
1313
-			  saveDate=FALSE,
1314
-			  verbose=TRUE,
1319
+			ids=NULL,
1320
+			path=".",
1321
+			arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
1322
+			highDensity=FALSE,
1323
+			sep="_",
1324
+			fileExt=list(green="Grn.idat", red="Red.idat"),
1325
+			saveDate=FALSE,
1326
+			verbose=TRUE,
1315 1327
 			mixtureSampleSize=10^5,
1316
-			  fitMixture=TRUE,
1317
-			  eps=0.1,
1318
-			  seed=1,
1319
-			  cdfName,
1320
-			  sns,
1321
-			  stripNorm=TRUE,
1322
-			  useTarget=TRUE,
1323
-              A, B, SKW, SNR, mixtureParams, is.snp) { #, outdir=".") {
1328
+			fitMixture=TRUE,
1329
+			eps=0.1,
1330
+			seed=1,
1331
+			cdfName,
1332
+			sns,
1333
+			stripNorm=TRUE,
1334
+			useTarget=TRUE,
1335
+			A, B, SKW, SNR, mixtureParams, is.snp) { #, outdir=".") {
1324 1336
 
1325 1337
         if(length(path)>= length(sel)) path = path[sel]
1326 1338
 	message("RS:... processIDAT:  calling readIdatFiles")
... ...
@@ -1353,24 +1365,41 @@ processIDAT = function(sel, sampleSheet=NULL,
1353 1365
 #        open(res[["mixtureParams"]])
1354 1366
 # remove this line: bb = ocProbesets()*length(sns)*8
1355 1367
 # Add these
1368
+	##segfault came after 'Finished preprocessing', but the processIDAT loop was not yet complete
1356 1369
 	open(A); open(B);
1357
-        ffcolapply(A[snp.index,sel][,i1:i2] <- res[["A"]][,i1:i2], X=res[["A"]])
1358
-        ffcolapply(B[snp.index,sel][,i1:i2] <- res[["B"]][,i1:i2], X=res[["B"]])
1370
+	Amatrix <- res[["A"]]
1371
+	Bmatrix <- res[["B"]]
1372
+	for(j in seq_along(sel)){
1373
+		jj <- sel[j]
1374
+		A[snp.index, jj] <- Amatrix[, j]
1375
+		B[snp.index, jj] <- Bmatrix[, j]
1376
+	}
1377
+		##ffcolapply(A[snp.index,sel][,i1:i2] <- res[["A"]][,i1:i2], X=res[["A"]])
1378
+		##ffcolapply(B[snp.index,sel][,i1:i2] <- res[["B"]][,i1:i2], X=res[["B"]])
1379
+##	}
1359 1380
 #   ffrowapply(A[snp.index,][i1:i2, sel] <- res[["A"]][i1:i2, ], X=res[["A"]], BATCHBYTES=bb)
1360 1381
 #	ffrowapply(B[snp.index,][i1:i2, sel] <- res[["B"]][i1:i2, ], X=res[["B"]], BATCHBYTES=bb)
1361 1382
 	    if(length(np.index)>0) {
1362 1383
 #			for (j in 1:length(sel)) {
1363
-			ffcolapply(A[np.index,sel][,i1:i2] <- res[["cnAB"]]$A[,i1:i2], X=res[["cnAB"]]$A)
1364
-			ffcolapply(B[np.index,sel][,i1:i2] <- res[["cnAB"]]$B[,i1:i2], X=res[["cnAB"]]$B)
1384
+		    cnAmatrix <- res[["cnAB"]]$A
1385
+		    cnBmatrix <- res[["cnAB"]]$B
1386
+		    for(j in seq_along(sel)){
1387
+			    jj <- sel[j]
1388
+			    A[np.index, jj] <- cnAmatrix[, j]
1389
+			    B[np.index, jj] <- cnBmatrix[, j]
1390
+		    }
1391
+
1392
+##			ffcolapply(A[np.index,sel][,i1:i2] <- res[["cnAB"]]$A[,i1:i2], X=res[["cnAB"]]$A)
1393
+##			ffcolapply(B[np.index,sel][,i1:i2] <- res[["cnAB"]]$B[,i1:i2], X=res[["cnAB"]]$B)
1365 1394
 #            A[np.index, sel[j]] = res[["cnAB"]]$A[,j]
1366 1395
 #            B[np.index, sel[j]] = res[["cnAB"]]$B[,j]
1367 1396
 #          }
1368 1397
         }
1369 1398
 #        delete(res[["A"]]); delete(res[["B"]])
1370 1399
 	open(SKW); open(SNR); open(mixtureParams)
1371
-	    SKW[sel] = res[["SKW"]][]
1372
-	    SNR[sel] = res[["SNR"]][]
1373
-	    mixtureParams[,sel] = res[["mixtureParams"]][]
1400
+	SKW[sel] = res[["SKW"]][]
1401
+	SNR[sel] = res[["SNR"]][]
1402
+	mixtureParams[, sel] = res[["mixtureParams"]][]
1374 1403
         close(A)
1375 1404
         close(B)
1376 1405
         close(SNR)
... ...
@@ -1378,6 +1407,6 @@ processIDAT = function(sel, sampleSheet=NULL,
1378 1407
         close(mixtureParams)
1379 1408
 #        delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
1380 1409
         rm(res)
1381
-		gc()
1410
+	gc()
1382 1411
         TRUE
1383 1412
       }