Browse code

genotype.Illumina only supports ff. no matrix option

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@53852 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 18/03/2011 01:57:25
Showing1 changed files

... ...
@@ -1128,8 +1128,6 @@ genotype.Illumina <- function(sampleSheet=NULL,
1128 1128
 			      cdfName,
1129 1129
 			      copynumber=TRUE,
1130 1130
 			      batch,
1131
-			      ##                          outdir=".",
1132
-			      ##                          fns,
1133 1131
 			      saveDate=TRUE,
1134 1132
 			      stripNorm=TRUE,
1135 1133
 			      useTarget=TRUE,
... ...
@@ -1148,124 +1146,64 @@ genotype.Illumina <- function(sampleSheet=NULL,
1148 1146
 			      returnParams=TRUE,
1149 1147
 			      badSNP=0.7) {
1150 1148
 	is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
1149
+	stopifnot(is.lds)
1151 1150
 	if(missing(cdfName)) stop("must specify cdfName")
1152 1151
 	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
1153 1152
         pkgname = getCrlmmAnnotationName(cdfName)
1154
-##        if(missing(outdir))
1155
-##		stop("Must specify a directory to store large data objects")
1156
-	##callSet <- .GlobalEnv[["callSet"]]
1157
-	##if(is.null(callSet)){
1158
-		callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames,
1159
-					      ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1160
-					      highDensity=highDensity, sep=sep, fileExt=fileExt,
1161
-					      cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, # fns=fns,
1162
-					      saveDate=saveDate) #, outdir=outdir)
1163
-		sampleNames(callSet) <- basename(sampleNames(callSet))
1164
-	##save(callSet, file=file.path(ldPath(), "callSet.rda"))
1165
-	##} else message("Using callSet loaded from GlobalEnv")
1153
+	callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames,
1154
+				      ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1155
+				      highDensity=highDensity, sep=sep, fileExt=fileExt,
1156
+				      cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, # fns=fns,
1157
+				      saveDate=saveDate) #, outdir=outdir)
1158
+	sampleNames(callSet) <- basename(sampleNames(callSet))
1166 1159
 	if(missing(sns)) sns = basename(sampleNames(callSet))
1167
-	if(is.lds) {
1168
-		open(A(callSet))
1169
-		open(B(callSet))
1170
-		## open(callSet)
1171
-	}
1160
+	open(A(callSet))
1161
+	open(B(callSet))
1172 1162
  	is.snp = isSnp(callSet)
1173 1163
 	snp.index = which(is.snp)
1174 1164
 	narrays = ncol(callSet)
1175
-	if(is.lds) {
1176
-##		index <- .GlobalEnv[["index"]]
1177
-##		if(is.null(index)){
1178
-##			sampleBatches = splitIndicesByLength(seq(along=sampleNames(callSet)), ocSamples())
1179
-##		} else sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples())
1180
-		sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples())
1181
-		mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1182
-		SNR = initializeBigVector("crlmmSNR-", narrays, "double")
1183
-		SKW = initializeBigVector("crlmmSKW-", narrays, "double")
1184
-		ocLapply(stratum=seq_along(sampleBatches), processIDAT, sampleBatches=sampleBatches,
1185
-			 sampleSheet=sampleSheet, arrayNames=arrayNames,
1186
-			 ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, highDensity=highDensity,
1187
-			 sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose, mixtureSampleSize=mixtureSampleSize,
1188
-			 fitMixture=fitMixture, eps=eps, seed=seed, cdfName=cdfName, sns=sns, stripNorm=stripNorm,
1189
-			 useTarget=useTarget, A=A(callSet), B=B(callSet), SKW=SKW, SNR=SNR,
1190
-			 mixtureParams=mixtureParams, is.snp=is.snp, neededPkgs=c("crlmm", pkgname)) # outdir=outdir,
1191
-		open(SKW)
1192
-		open(SNR)
1193
-		pData(callSet)$SKW = SKW
1194
-		pData(callSet)$SNR = SNR
1195
-		save(callSet, file=file.path(ldPath(), "callSet.rda"))
1196
-		close(SNR)
1197
-		close(SKW)
1198
-	} else {
1199
-		mixtureParams = matrix(NA, 4, nrow(callSet))
1200
-		RG <- readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
1201
-				    ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1202
-				    highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
1203
-		XY = RGtoXY(RG, chipType=cdfName)
1204
-		rm(RG); gc()
1205
-
1206
-		res <- preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1207
-					   seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) # , outdir=outdir)
1208
-		rm(XY); gc()
1209
-		if(verbose) message("Finished preprocessing.")
1210
-		np.index = which(!is.snp)
1211
-		A(callSet)[snp.index, ] = res[["A"]]
1212
-		B(callSet)[snp.index, ] = res[["B"]]
1213
-		if(length(np.index)>0) {
1214
-			A(callSet)[np.index, ] = res[["cnAB"]]$A
1215
-			B(callSet)[np.index, ] = res[["cnAB"]]$B
1216
-		}
1217
-		SKW = pData(callSet)$SKW = res[["SKW"]]
1218
-		SNR = pData(callSet)$SNR = res[["SNR"]]
1219
-		mixtureParams = res[["mixtureParams"]]
1220
-		rm(res)
1221
-	}
1165
+	sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples())
1166
+	mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1167
+	SNR = initializeBigVector("crlmmSNR-", narrays, "double")
1168
+	SKW = initializeBigVector("crlmmSKW-", narrays, "double")
1169
+	ocLapply(seq_along(sampleBatches), processIDAT, sampleBatches=sampleBatches,
1170
+		 sampleSheet=sampleSheet, arrayNames=arrayNames,
1171
+		 ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, highDensity=highDensity,
1172
+		 sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose, mixtureSampleSize=mixtureSampleSize,
1173
+		 fitMixture=fitMixture, eps=eps, seed=seed, cdfName=cdfName, sns=sns, stripNorm=stripNorm,
1174
+		 useTarget=useTarget, A=A(callSet), B=B(callSet), SKW=SKW, SNR=SNR,
1175
+		 mixtureParams=mixtureParams, is.snp=is.snp, neededPkgs=c("crlmm", pkgname)) # outdir=outdir,
1176
+	open(SKW)
1177
+	open(SNR)
1178
+	pData(callSet)$SKW = SKW
1179
+	pData(callSet)$SNR = SNR
1180
+	save(callSet, file=file.path(ldPath(), "callSet.rda"))
1181
+	close(SNR)
1182
+	close(SKW)
1222 1183
 	FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
1223
-	## genotyping
1224 1184
 	crlmmGTfxn = function(FUN,...){
1225 1185
 		switch(FUN,
1226 1186
 		       crlmmGT2=crlmmGT2(...),
1227 1187
 		       crlmmGT=crlmmGT(...))
1228 1188
 	}
1229
-	if(is.lds) {
1230
-		open(A(callSet))
1231
-		open(B(callSet))
1232
-		tmpA = initializeBigMatrix(name="tmpA", length(snp.index), narrays)
1233
-		tmpB = initializeBigMatrix(name="tmpB", length(snp.index), narrays)
1234
-		##ffcolapply(tmpA[,i1:i2] <- A(callSet)[snp.index,i1:i2], X=A(callSet))
1235
-		##ffcolapply(tmpB[,i1:i2] <- B(callSet)[snp.index,i1:i2], X=B(callSet))
1236
-		for(j in seq(length=ncol(callSet))){
1237
-			tmpA[, j] <- as.integer(A(callSet)[snp.index, j])
1238
-			tmpB[, j] <- as.integer(B(callSet)[snp.index, j])
1239
-		}
1240
-		close(A(callSet))
1241
-		close(B(callSet))
1242
-		close(tmpA)
1243
-		close(tmpB)
1244
-	} else {
1245
-		tmpA = A(callSet)[snp.index,]
1246
-		tmpB = B(callSet)[snp.index,]
1189
+	open(A(callSet))
1190
+	open(B(callSet))
1191
+	tmpA = initializeBigMatrix(name="tmpA", length(snp.index), narrays)
1192
+	tmpB = initializeBigMatrix(name="tmpB", length(snp.index), narrays)
1193
+	for(j in seq(length=ncol(callSet))){
1194
+		tmpA[, j] <- as.integer(A(callSet)[snp.index, j])
1195
+		tmpB[, j] <- as.integer(B(callSet)[snp.index, j])
1247 1196
 	}
1248
-##	return(list(tmpA,
1249
-##		    tmpB,
1250
-##		    SNR,
1251
-##		    mixtureParams,
1252
-##		    cdfName,
1253
-##		    sampleNames(callSet),
1254
-##		    probs,
1255
-##		    DF,
1256
-##		    SNRMin,
1257
-##		    recallMin,
1258
-##		    recallRegMin,
1259
-##		    gender,
1260
-##		    verbose,
1261
-##		    returnParams,
1262
-##		    badSNP))
1197
+	close(A(callSet))
1198
+	close(B(callSet))
1199
+	close(tmpA)
1200
+	close(tmpB)
1263 1201
 	save(tmpA, file=file.path(ldPath(), "tmpA.rda"))
1264 1202
 	save(tmpB, file=file.path(ldPath(), "tmpB.rda"))
1265 1203
 	save(SNR, file=file.path(ldPath(), "SNR.rda"))
1266 1204
 	save(SKW, file=file.path(ldPath(), "SKW.rda"))
1267 1205
 	save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda"))
1268
-##	return(NULL)
1206
+	message("Begin genotyping")
1269 1207
 	tmp <- crlmmGTfxn(FUN,
1270 1208
 			  A=tmpA,
1271 1209
 			  B=tmpB,
... ...
@@ -1283,29 +1221,16 @@ genotype.Illumina <- function(sampleSheet=NULL,
1283 1221
 			  verbose=verbose,
1284 1222
 			  returnParams=returnParams,
1285 1223
 			  badSNP=badSNP)
1224
+	save(tmp, file=file.path(ldPath(), "tmp.rda"))
1286 1225
 	if(verbose) message("Genotyping finished.  Updating container with genotype calls and confidence scores.")
1287
-	if(is.lds){
1288
-		##       bb = getOption("ffbatchbytes") # ocProbesets()*ncol(callSet)*8
1289
-		open(tmp[["calls"]])
1290
-		open(tmp[["confs"]])
1291
-		open(calls(callSet))
1292
-		open(snpCallProbability(callSet))
1293
-		ffcolapply(snpCall(callSet)[snp.index,i1:i2] <- tmp[["calls"]][,i1:i2], X=tmp[["calls"]]) #, BATCHBYTES=bb)
1294
-		ffcolapply(snpCallProbability(callSet)[snp.index,i1:i2] <- tmp[["confs"]][,i1:i2], X=tmp[["confs"]]) #, BATCHBYTES=bb)
1295
-		close(tmp[["calls"]])
1296
-		close(tmp[["confs"]])
1297
-		close(tmpA); close(tmpB)
1298
-		delete(tmpA); delete(tmpB);
1299
-		## This line is not needed as tmp[["calls"]]
1300
-		## is stored in tmpA and tmp[["confs"]] is stored in tmpB
1301
-		## So deleting tmpA and tmpB in the above command removes
1302
-		## the ff objects on disk
1303
-		##delete(tmp[["calls"]]); delete(tmp[["confs"]])
1304
-	} else {
1305
-		calls(callSet)[snp.index, ] = tmp[["calls"]]
1306
-		snpCallProbability(callSet)[snp.index, ] = tmp[["confs"]]
1307
-		rm(tmpA, tmpB)
1226
+	open(tmpA)
1227
+	open(tmpB)
1228
+	for(j in 1:ncol(callSet)){
1229
+		snpCall(callSet)[snp.index, j] <- as.integer(tmpA[, j])
1230
+		snpCallProbability(callSet)[snp.index, j] <- as.integer(tmpB[, j])
1308 1231
 	}
1232
+	close(tmpA); close(tmpB)
1233
+	delete(tmpA); delete(tmpB);
1309 1234
 	callSet$gender = tmp$gender
1310 1235
 	rm(tmp)
1311 1236
 	close(callSet)