git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@53852 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1128,8 +1128,6 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1128 | 1128 |
cdfName, |
1129 | 1129 |
copynumber=TRUE, |
1130 | 1130 |
batch, |
1131 |
- ## outdir=".", |
|
1132 |
- ## fns, |
|
1133 | 1131 |
saveDate=TRUE, |
1134 | 1132 |
stripNorm=TRUE, |
1135 | 1133 |
useTarget=TRUE, |
... | ... |
@@ -1148,124 +1146,64 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1148 | 1146 |
returnParams=TRUE, |
1149 | 1147 |
badSNP=0.7) { |
1150 | 1148 |
is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
1149 |
+ stopifnot(is.lds) |
|
1151 | 1150 |
if(missing(cdfName)) stop("must specify cdfName") |
1152 | 1151 |
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
1153 | 1152 |
pkgname = getCrlmmAnnotationName(cdfName) |
1154 |
-## if(missing(outdir)) |
|
1155 |
-## stop("Must specify a directory to store large data objects") |
|
1156 |
- ##callSet <- .GlobalEnv[["callSet"]] |
|
1157 |
- ##if(is.null(callSet)){ |
|
1158 |
- callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
1159 |
- ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
|
1160 |
- highDensity=highDensity, sep=sep, fileExt=fileExt, |
|
1161 |
- cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, # fns=fns, |
|
1162 |
- saveDate=saveDate) #, outdir=outdir) |
|
1163 |
- sampleNames(callSet) <- basename(sampleNames(callSet)) |
|
1164 |
- ##save(callSet, file=file.path(ldPath(), "callSet.rda")) |
|
1165 |
- ##} else message("Using callSet loaded from GlobalEnv") |
|
1153 |
+ callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
1154 |
+ ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
|
1155 |
+ highDensity=highDensity, sep=sep, fileExt=fileExt, |
|
1156 |
+ cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, # fns=fns, |
|
1157 |
+ saveDate=saveDate) #, outdir=outdir) |
|
1158 |
+ sampleNames(callSet) <- basename(sampleNames(callSet)) |
|
1166 | 1159 |
if(missing(sns)) sns = basename(sampleNames(callSet)) |
1167 |
- if(is.lds) { |
|
1168 |
- open(A(callSet)) |
|
1169 |
- open(B(callSet)) |
|
1170 |
- ## open(callSet) |
|
1171 |
- } |
|
1160 |
+ open(A(callSet)) |
|
1161 |
+ open(B(callSet)) |
|
1172 | 1162 |
is.snp = isSnp(callSet) |
1173 | 1163 |
snp.index = which(is.snp) |
1174 | 1164 |
narrays = ncol(callSet) |
1175 |
- if(is.lds) { |
|
1176 |
-## index <- .GlobalEnv[["index"]] |
|
1177 |
-## if(is.null(index)){ |
|
1178 |
-## sampleBatches = splitIndicesByLength(seq(along=sampleNames(callSet)), ocSamples()) |
|
1179 |
-## } else sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples()) |
|
1180 |
- sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples()) |
|
1181 |
- mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double") |
|
1182 |
- SNR = initializeBigVector("crlmmSNR-", narrays, "double") |
|
1183 |
- SKW = initializeBigVector("crlmmSKW-", narrays, "double") |
|
1184 |
- ocLapply(stratum=seq_along(sampleBatches), processIDAT, sampleBatches=sampleBatches, |
|
1185 |
- sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
1186 |
- ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, highDensity=highDensity, |
|
1187 |
- sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose, mixtureSampleSize=mixtureSampleSize, |
|
1188 |
- fitMixture=fitMixture, eps=eps, seed=seed, cdfName=cdfName, sns=sns, stripNorm=stripNorm, |
|
1189 |
- useTarget=useTarget, A=A(callSet), B=B(callSet), SKW=SKW, SNR=SNR, |
|
1190 |
- mixtureParams=mixtureParams, is.snp=is.snp, neededPkgs=c("crlmm", pkgname)) # outdir=outdir, |
|
1191 |
- open(SKW) |
|
1192 |
- open(SNR) |
|
1193 |
- pData(callSet)$SKW = SKW |
|
1194 |
- pData(callSet)$SNR = SNR |
|
1195 |
- save(callSet, file=file.path(ldPath(), "callSet.rda")) |
|
1196 |
- close(SNR) |
|
1197 |
- close(SKW) |
|
1198 |
- } else { |
|
1199 |
- mixtureParams = matrix(NA, 4, nrow(callSet)) |
|
1200 |
- RG <- readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
1201 |
- ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
|
1202 |
- highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate) |
|
1203 |
- XY = RGtoXY(RG, chipType=cdfName) |
|
1204 |
- rm(RG); gc() |
|
1205 |
- |
|
1206 |
- res <- preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
|
1207 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) # , outdir=outdir) |
|
1208 |
- rm(XY); gc() |
|
1209 |
- if(verbose) message("Finished preprocessing.") |
|
1210 |
- np.index = which(!is.snp) |
|
1211 |
- A(callSet)[snp.index, ] = res[["A"]] |
|
1212 |
- B(callSet)[snp.index, ] = res[["B"]] |
|
1213 |
- if(length(np.index)>0) { |
|
1214 |
- A(callSet)[np.index, ] = res[["cnAB"]]$A |
|
1215 |
- B(callSet)[np.index, ] = res[["cnAB"]]$B |
|
1216 |
- } |
|
1217 |
- SKW = pData(callSet)$SKW = res[["SKW"]] |
|
1218 |
- SNR = pData(callSet)$SNR = res[["SNR"]] |
|
1219 |
- mixtureParams = res[["mixtureParams"]] |
|
1220 |
- rm(res) |
|
1221 |
- } |
|
1165 |
+ sampleBatches <- splitIndicesByLength(seq(length=ncol(callSet)), ocSamples()) |
|
1166 |
+ mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double") |
|
1167 |
+ SNR = initializeBigVector("crlmmSNR-", narrays, "double") |
|
1168 |
+ SKW = initializeBigVector("crlmmSKW-", narrays, "double") |
|
1169 |
+ ocLapply(seq_along(sampleBatches), processIDAT, sampleBatches=sampleBatches, |
|
1170 |
+ sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
1171 |
+ ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, highDensity=highDensity, |
|
1172 |
+ sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose, mixtureSampleSize=mixtureSampleSize, |
|
1173 |
+ fitMixture=fitMixture, eps=eps, seed=seed, cdfName=cdfName, sns=sns, stripNorm=stripNorm, |
|
1174 |
+ useTarget=useTarget, A=A(callSet), B=B(callSet), SKW=SKW, SNR=SNR, |
|
1175 |
+ mixtureParams=mixtureParams, is.snp=is.snp, neededPkgs=c("crlmm", pkgname)) # outdir=outdir, |
|
1176 |
+ open(SKW) |
|
1177 |
+ open(SNR) |
|
1178 |
+ pData(callSet)$SKW = SKW |
|
1179 |
+ pData(callSet)$SNR = SNR |
|
1180 |
+ save(callSet, file=file.path(ldPath(), "callSet.rda")) |
|
1181 |
+ close(SNR) |
|
1182 |
+ close(SKW) |
|
1222 | 1183 |
FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT") |
1223 |
- ## genotyping |
|
1224 | 1184 |
crlmmGTfxn = function(FUN,...){ |
1225 | 1185 |
switch(FUN, |
1226 | 1186 |
crlmmGT2=crlmmGT2(...), |
1227 | 1187 |
crlmmGT=crlmmGT(...)) |
1228 | 1188 |
} |
1229 |
- if(is.lds) { |
|
1230 |
- open(A(callSet)) |
|
1231 |
- open(B(callSet)) |
|
1232 |
- tmpA = initializeBigMatrix(name="tmpA", length(snp.index), narrays) |
|
1233 |
- tmpB = initializeBigMatrix(name="tmpB", length(snp.index), narrays) |
|
1234 |
- ##ffcolapply(tmpA[,i1:i2] <- A(callSet)[snp.index,i1:i2], X=A(callSet)) |
|
1235 |
- ##ffcolapply(tmpB[,i1:i2] <- B(callSet)[snp.index,i1:i2], X=B(callSet)) |
|
1236 |
- for(j in seq(length=ncol(callSet))){ |
|
1237 |
- tmpA[, j] <- as.integer(A(callSet)[snp.index, j]) |
|
1238 |
- tmpB[, j] <- as.integer(B(callSet)[snp.index, j]) |
|
1239 |
- } |
|
1240 |
- close(A(callSet)) |
|
1241 |
- close(B(callSet)) |
|
1242 |
- close(tmpA) |
|
1243 |
- close(tmpB) |
|
1244 |
- } else { |
|
1245 |
- tmpA = A(callSet)[snp.index,] |
|
1246 |
- tmpB = B(callSet)[snp.index,] |
|
1189 |
+ open(A(callSet)) |
|
1190 |
+ open(B(callSet)) |
|
1191 |
+ tmpA = initializeBigMatrix(name="tmpA", length(snp.index), narrays) |
|
1192 |
+ tmpB = initializeBigMatrix(name="tmpB", length(snp.index), narrays) |
|
1193 |
+ for(j in seq(length=ncol(callSet))){ |
|
1194 |
+ tmpA[, j] <- as.integer(A(callSet)[snp.index, j]) |
|
1195 |
+ tmpB[, j] <- as.integer(B(callSet)[snp.index, j]) |
|
1247 | 1196 |
} |
1248 |
-## return(list(tmpA, |
|
1249 |
-## tmpB, |
|
1250 |
-## SNR, |
|
1251 |
-## mixtureParams, |
|
1252 |
-## cdfName, |
|
1253 |
-## sampleNames(callSet), |
|
1254 |
-## probs, |
|
1255 |
-## DF, |
|
1256 |
-## SNRMin, |
|
1257 |
-## recallMin, |
|
1258 |
-## recallRegMin, |
|
1259 |
-## gender, |
|
1260 |
-## verbose, |
|
1261 |
-## returnParams, |
|
1262 |
-## badSNP)) |
|
1197 |
+ close(A(callSet)) |
|
1198 |
+ close(B(callSet)) |
|
1199 |
+ close(tmpA) |
|
1200 |
+ close(tmpB) |
|
1263 | 1201 |
save(tmpA, file=file.path(ldPath(), "tmpA.rda")) |
1264 | 1202 |
save(tmpB, file=file.path(ldPath(), "tmpB.rda")) |
1265 | 1203 |
save(SNR, file=file.path(ldPath(), "SNR.rda")) |
1266 | 1204 |
save(SKW, file=file.path(ldPath(), "SKW.rda")) |
1267 | 1205 |
save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda")) |
1268 |
-## return(NULL) |
|
1206 |
+ message("Begin genotyping") |
|
1269 | 1207 |
tmp <- crlmmGTfxn(FUN, |
1270 | 1208 |
A=tmpA, |
1271 | 1209 |
B=tmpB, |
... | ... |
@@ -1283,29 +1221,16 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1283 | 1221 |
verbose=verbose, |
1284 | 1222 |
returnParams=returnParams, |
1285 | 1223 |
badSNP=badSNP) |
1224 |
+ save(tmp, file=file.path(ldPath(), "tmp.rda")) |
|
1286 | 1225 |
if(verbose) message("Genotyping finished. Updating container with genotype calls and confidence scores.") |
1287 |
- if(is.lds){ |
|
1288 |
- ## bb = getOption("ffbatchbytes") # ocProbesets()*ncol(callSet)*8 |
|
1289 |
- open(tmp[["calls"]]) |
|
1290 |
- open(tmp[["confs"]]) |
|
1291 |
- open(calls(callSet)) |
|
1292 |
- open(snpCallProbability(callSet)) |
|
1293 |
- ffcolapply(snpCall(callSet)[snp.index,i1:i2] <- tmp[["calls"]][,i1:i2], X=tmp[["calls"]]) #, BATCHBYTES=bb) |
|
1294 |
- ffcolapply(snpCallProbability(callSet)[snp.index,i1:i2] <- tmp[["confs"]][,i1:i2], X=tmp[["confs"]]) #, BATCHBYTES=bb) |
|
1295 |
- close(tmp[["calls"]]) |
|
1296 |
- close(tmp[["confs"]]) |
|
1297 |
- close(tmpA); close(tmpB) |
|
1298 |
- delete(tmpA); delete(tmpB); |
|
1299 |
- ## This line is not needed as tmp[["calls"]] |
|
1300 |
- ## is stored in tmpA and tmp[["confs"]] is stored in tmpB |
|
1301 |
- ## So deleting tmpA and tmpB in the above command removes |
|
1302 |
- ## the ff objects on disk |
|
1303 |
- ##delete(tmp[["calls"]]); delete(tmp[["confs"]]) |
|
1304 |
- } else { |
|
1305 |
- calls(callSet)[snp.index, ] = tmp[["calls"]] |
|
1306 |
- snpCallProbability(callSet)[snp.index, ] = tmp[["confs"]] |
|
1307 |
- rm(tmpA, tmpB) |
|
1226 |
+ open(tmpA) |
|
1227 |
+ open(tmpB) |
|
1228 |
+ for(j in 1:ncol(callSet)){ |
|
1229 |
+ snpCall(callSet)[snp.index, j] <- as.integer(tmpA[, j]) |
|
1230 |
+ snpCallProbability(callSet)[snp.index, j] <- as.integer(tmpB[, j]) |
|
1308 | 1231 |
} |
1232 |
+ close(tmpA); close(tmpB) |
|
1233 |
+ delete(tmpA); delete(tmpB); |
|
1309 | 1234 |
callSet$gender = tmp$gender |
1310 | 1235 |
rm(tmp) |
1311 | 1236 |
close(callSet) |