... | ... |
@@ -576,3 +576,11 @@ function (which expects ff objects and supports parallel processing) |
576 | 576 |
2010-10-17 M. Ritchie 1.7.22 |
577 | 577 |
** added "humanomni25quadv1b" to validCdfName() in utils.R and RGtoXY() in crlmm-illumina.R |
578 | 578 |
|
579 |
+2010-11-18 M. Ritchie 1.9.8 |
|
580 |
+** genotype.Illumina() is now exported and has a man page. The argument 'outdir' should |
|
581 |
+be used to specify where you want to store the large data objects. ldPath(outdir) is specified |
|
582 |
+inside preprocessInfinium2() |
|
583 |
+** the 'batch' variable is now left empty and must be specified by the user |
|
584 |
+** X and Y are now initialized with zeroes by initializeBigMatrix( ,initdata=0) in RGtoXY(). |
|
585 |
+** open(A(callSet); open(B(callSet) replaces open(callSet) in genotyp.Illumina() |
|
586 |
+ |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.9.7 |
|
5 |
-Date: 2010-11-14 |
|
4 |
+Version: 1.9.8 |
|
5 |
+Date: 2010-11-18 |
|
6 | 6 |
Author: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry |
7 | 7 |
Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms |
... | ... |
@@ -462,18 +462,18 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
462 | 462 |
# brgrg = bids[rrgg] |
463 | 463 |
|
464 | 464 |
XY = new("NChannelSet", |
465 |
- X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"), |
|
466 |
- Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"), |
|
467 |
- zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"), |
|
465 |
+ X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer", initdata=0), |
|
466 |
+ Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer", initdata=0), |
|
467 |
+ zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer", initdata=0), |
|
468 | 468 |
annotation=chipType, phenoData=RG@phenoData, |
469 | 469 |
protocolData=RG@protocolData, storage.mode="environment") |
470 | 470 |
featureNames(XY) = ids |
471 | 471 |
sampleNames(XY) = sampleNames(RG) |
472 | 472 |
gc() |
473 | 473 |
# Need to initialize - matrices filled with NAs to begin with |
474 |
- XY@assayData$X[1:nsnps,] = 0 |
|
475 |
- XY@assayData$Y[1:nsnps,] = 0 |
|
476 |
- XY@assayData$zero[1:nsnps,] = 0 |
|
474 |
+# XY@assayData$X[1:nsnps,] = 0 |
|
475 |
+# XY@assayData$Y[1:nsnps,] = 0 |
|
476 |
+# XY@assayData$zero[1:nsnps,] = 0 |
|
477 | 477 |
|
478 | 478 |
is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
479 | 479 |
|
... | ... |
@@ -585,7 +585,8 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
585 | 585 |
cdfName, |
586 | 586 |
sns, |
587 | 587 |
stripNorm=TRUE, |
588 |
- useTarget=TRUE) { |
|
588 |
+ useTarget=TRUE, |
|
589 |
+ outdir=".") { |
|
589 | 590 |
# save.it=FALSE, |
590 | 591 |
# snpFile, |
591 | 592 |
# cnFile) { |
... | ... |
@@ -615,7 +616,8 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
615 | 616 |
narrays = ncol(XY) |
616 | 617 |
|
617 | 618 |
is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
618 |
- |
|
619 |
+ if(is.lds) |
|
620 |
+ ldPath(outdir) |
|
619 | 621 |
# if(save.it & !missing(cnFile)) { |
620 | 622 |
# separate out copy number probes |
621 | 623 |
npIndex = getVarInEnv("npProbesFid") |
... | ... |
@@ -913,6 +915,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
913 | 915 |
saveDate=FALSE, |
914 | 916 |
stripNorm=TRUE, |
915 | 917 |
useTarget=TRUE, |
918 |
+ outdir=".", |
|
916 | 919 |
row.names=TRUE, |
917 | 920 |
col.names=TRUE, |
918 | 921 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
... | ... |
@@ -941,7 +944,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
941 | 944 |
if (missing(sns)) { sns = sampleNames(XY) |
942 | 945 |
} |
943 | 946 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
944 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, |
|
947 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, outdir=outdir) #, |
|
945 | 948 |
# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
946 | 949 |
|
947 | 950 |
if(is.lds) { |
... | ... |
@@ -1042,7 +1045,8 @@ construct.Illumina = function(sampleSheet=NULL, |
1042 | 1045 |
red="Red.idat"), |
1043 | 1046 |
cdfName, |
1044 | 1047 |
copynumber=TRUE, |
1045 |
- verbose=TRUE, batch, fns, saveDate=TRUE){ |
|
1048 |
+ verbose=TRUE, batch, #fns, |
|
1049 |
+ saveDate=TRUE, outdir="."){ |
|
1046 | 1050 |
if(!is.null(arrayNames)) { |
1047 | 1051 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
1048 | 1052 |
} |
... | ... |
@@ -1083,7 +1087,7 @@ construct.Illumina = function(sampleSheet=NULL, |
1083 | 1087 |
stopifnot(length(batch) == narrays) |
1084 | 1088 |
} |
1085 | 1089 |
if(missing(batch)) { |
1086 |
- batch = as.factor(rep(1, narrays)) |
|
1090 |
+ stop("Must specify 'batch'") # batch = as.factor(rep(1, narrays)) |
|
1087 | 1091 |
} |
1088 | 1092 |
|
1089 | 1093 |
grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
... | ... |
@@ -1105,12 +1109,13 @@ construct.Illumina = function(sampleSheet=NULL, |
1105 | 1109 |
|
1106 | 1110 |
if(verbose) message("Initializing container for genotyping and copy number estimation") |
1107 | 1111 |
featureData = getFeatureData.Affy(cdfName, copynumber=copynumber) |
1108 |
- if(!missing(fns)){ |
|
1109 |
- index = match(fns, featureNames(featureData)) |
|
1110 |
- if(all(is.na(index))) stop("fns not in featureNames") |
|
1111 |
- featureData = featureData[index, ] |
|
1112 |
- } |
|
1112 |
+# if(!missing(fns)){ |
|
1113 |
+# index = match(fns, featureNames(featureData)) |
|
1114 |
+# if(all(is.na(index))) stop("fns not in featureNames") |
|
1115 |
+# featureData = featureData[index, ] |
|
1116 |
+# } |
|
1113 | 1117 |
nr = nrow(featureData); nc = narrays |
1118 |
+ ldPath(outdir) |
|
1114 | 1119 |
cnSet = new("CNSet", |
1115 | 1120 |
alleleA=initializeBigMatrix(name="A", nr, nc), |
1116 | 1121 |
alleleB=initializeBigMatrix(name="B", nr, nc), |
... | ... |
@@ -1151,7 +1156,8 @@ genotype.Illumina = function(sampleSheet=NULL, |
1151 | 1156 |
cdfName, |
1152 | 1157 |
copynumber=TRUE, |
1153 | 1158 |
batch, |
1154 |
- fns, |
|
1159 |
+ outdir=".", |
|
1160 |
+# fns, |
|
1155 | 1161 |
saveDate=TRUE, |
1156 | 1162 |
stripNorm=TRUE, |
1157 | 1163 |
useTarget=TRUE, |
... | ... |
@@ -1173,14 +1179,19 @@ genotype.Illumina = function(sampleSheet=NULL, |
1173 | 1179 |
if(missing(cdfName)) stop("must specify cdfName") |
1174 | 1180 |
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
1175 | 1181 |
pkgname = getCrlmmAnnotationName(cdfName) |
1182 |
+ if(missing(outdir)) |
|
1183 |
+ stop("Must specify a directory to store large data objects") |
|
1176 | 1184 |
callSet = construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames, |
1177 | 1185 |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
1178 | 1186 |
highDensity=highDensity, sep=sep, fileExt=fileExt, |
1179 |
- cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, |
|
1180 |
- fns=fns, saveDate=saveDate) |
|
1187 |
+ cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, # fns=fns, |
|
1188 |
+ saveDate=saveDate, outdir=outdir) |
|
1181 | 1189 |
if(missing(sns)) sns = sampleNames(callSet) |
1182 |
- open(callSet) |
|
1183 |
- is.snp = isSnp(callSet) |
|
1190 |
+ |
|
1191 |
+ open(A(callSet)) |
|
1192 |
+ open(B(callSet)) |
|
1193 |
+ # open(callSet) |
|
1194 |
+ is.snp = isSnp(callSet) |
|
1184 | 1195 |
snp.index = which(is.snp) |
1185 | 1196 |
narrays = ncol(callSet) |
1186 | 1197 |
if(is.lds) { |
... | ... |
@@ -1195,7 +1206,7 @@ genotype.Illumina = function(sampleSheet=NULL, |
1195 | 1206 |
sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose, mixtureSampleSize=mixtureSampleSize, |
1196 | 1207 |
fitMixture=fitMixture, eps=eps, seed=seed, cdfName=cdfName, sns=sns, stripNorm=stripNorm, |
1197 | 1208 |
useTarget=useTarget, A=A(callSet), B=B(callSet), SKW=SKW, SNR=SNR, |
1198 |
- mixtureParams=mixtureParams, is.snp=is.snp, neededPkgs=c("crlmm", pkgname)) |
|
1209 |
+ mixtureParams=mixtureParams, is.snp=is.snp, outdir=outdir, neededPkgs=c("crlmm", pkgname)) |
|
1199 | 1210 |
|
1200 | 1211 |
open(SKW) |
1201 | 1212 |
open(SNR) |
... | ... |
@@ -1214,7 +1225,7 @@ genotype.Illumina = function(sampleSheet=NULL, |
1214 | 1225 |
rm(RG); gc() |
1215 | 1226 |
|
1216 | 1227 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
1217 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) |
|
1228 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, outdir=outdir) |
|
1218 | 1229 |
rm(XY); gc() |
1219 | 1230 |
if(verbose) message("Finished preprocessing.") |
1220 | 1231 |
np.index = which(!is.snp) |
... | ... |
@@ -1317,7 +1328,7 @@ processIDAT = function(sel, sampleSheet=NULL, |
1317 | 1328 |
sns, |
1318 | 1329 |
stripNorm=TRUE, |
1319 | 1330 |
useTarget=TRUE, |
1320 |
- A, B, SKW, SNR, mixtureParams, is.snp) { |
|
1331 |
+ A, B, SKW, SNR, mixtureParams, is.snp, outdir=".") { |
|
1321 | 1332 |
|
1322 | 1333 |
if(length(path)>= length(sel)) path = path[sel] |
1323 | 1334 |
RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel], |
... | ... |
@@ -1331,7 +1342,7 @@ processIDAT = function(sel, sampleSheet=NULL, |
1331 | 1342 |
if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY) |
1332 | 1343 |
|
1333 | 1344 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
1334 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) |
|
1345 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, outdir=outdir) |
|
1335 | 1346 |
# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
1336 | 1347 |
open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero) |
1337 | 1348 |
delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero); rm(XY) |
... | ... |
@@ -10,11 +10,11 @@ |
10 | 10 |
crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
11 | 11 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
12 | 12 |
highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
13 |
- saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
|
14 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
15 |
- seed=1, mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
16 |
- cdfName, sns, recallMin=10, recallRegMin=1000, |
|
17 |
- returnParams=FALSE, badSNP=.7) |
|
13 |
+ saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, outdir=".", |
|
14 |
+ row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), |
|
15 |
+ DF=6, SNRMin=5, gender=NULL, seed=1, mixtureSampleSize=10^5, |
|
16 |
+ eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
|
17 |
+ recallRegMin=1000, returnParams=FALSE, badSNP=.7) |
|
18 | 18 |
} |
19 | 19 |
|
20 | 20 |
\arguments{ |
... | ... |
@@ -47,6 +47,8 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
47 | 47 |
\item{stripNorm}{'logical'. Should the data be strip-level normalized?} |
48 | 48 |
\item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}). |
49 | 49 |
Should the reference HapMap intensities be used in strip-level normalization?} |
50 |
+ \item{outdir}{character string specifying the location to store large data objects |
|
51 |
+ (used when \code{ff} package is loaded)} |
|
50 | 52 |
\item{row.names}{'logical'. Use rownames - SNP names?} |
51 | 53 |
\item{col.names}{'logical'. Use colnames - Sample names?} |
52 | 54 |
\item{probs}{'numeric' vector with priors for AA, AB and BB.} |