Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50950 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 18/11/2010 05:31:36
Showing 5 changed files

... ...
@@ -576,3 +576,11 @@ function (which expects ff objects and supports parallel processing)
576 576
 2010-10-17 M. Ritchie 1.7.22
577 577
 **  added "humanomni25quadv1b" to validCdfName() in utils.R and RGtoXY() in crlmm-illumina.R
578 578
 
579
+2010-11-18 M. Ritchie 1.9.8
580
+** genotype.Illumina() is now exported and has a man page.  The argument 'outdir' should 
581
+be used to specify where you want to store the large data objects.  ldPath(outdir) is specified 
582
+inside preprocessInfinium2()
583
+** the 'batch' variable is now left empty and must be specified by the user
584
+** X and Y are now initialized with zeroes by initializeBigMatrix( ,initdata=0) in RGtoXY().
585
+** open(A(callSet); open(B(callSet) replaces open(callSet) in genotyp.Illumina()
586
+
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.9.7
5
-Date: 2010-11-14
4
+Version: 1.9.8
5
+Date: 2010-11-18
6 6
 Author: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -68,6 +68,7 @@ export(crlmm,
68 68
        snprma2,
69 69
        crlmm2,
70 70
        genotype2, genotypeLD,
71
+       genotype.Illumina, 
71 72
        crlmmCopynumber2, crlmmCopynumberLD, crlmmCopynumber)
72 73
 export(constructIlluminaCNSet)
73 74
 export(totalCopynumber)
... ...
@@ -462,18 +462,18 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
462 462
 #  brgrg = bids[rrgg]
463 463
 
464 464
   XY = new("NChannelSet",
465
-	     X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
466
-	     Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
467
-	     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
465
+	     X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer", initdata=0),
466
+	     Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer", initdata=0),
467
+	     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer", initdata=0),
468 468
 	     annotation=chipType, phenoData=RG@phenoData,
469 469
 	     protocolData=RG@protocolData, storage.mode="environment")
470 470
   featureNames(XY) = ids
471 471
   sampleNames(XY) = sampleNames(RG)
472 472
   gc()
473 473
   # Need to initialize - matrices filled with NAs to begin with
474
-  XY@assayData$X[1:nsnps,] = 0
475
-  XY@assayData$Y[1:nsnps,] = 0
476
-  XY@assayData$zero[1:nsnps,] = 0
474
+#  XY@assayData$X[1:nsnps,] = 0
475
+#  XY@assayData$Y[1:nsnps,] = 0
476
+#  XY@assayData$zero[1:nsnps,] = 0
477 477
 
478 478
   is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
479 479
 
... ...
@@ -585,7 +585,8 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
585 585
 				cdfName,
586 586
 				sns,
587 587
 				stripNorm=TRUE,
588
-				useTarget=TRUE) {
588
+				useTarget=TRUE,
589
+                                outdir=".") {
589 590
 #				save.it=FALSE,
590 591
 #				snpFile,
591 592
 #				cnFile) {
... ...
@@ -615,7 +616,8 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
615 616
   narrays = ncol(XY)
616 617
 
617 618
   is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
618
-
619
+  if(is.lds)
620
+    ldPath(outdir)
619 621
 #  if(save.it & !missing(cnFile)) {
620 622
     # separate out copy number probes
621 623
     npIndex = getVarInEnv("npProbesFid")
... ...
@@ -913,6 +915,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
913 915
 			  saveDate=FALSE,
914 916
 			  stripNorm=TRUE,
915 917
 			  useTarget=TRUE,
918
+                          outdir=".",
916 919
 			  row.names=TRUE,
917 920
 			  col.names=TRUE,
918 921
 			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
... ...
@@ -941,7 +944,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
941 944
     if (missing(sns)) { sns = sampleNames(XY)
942 945
     }
943 946
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
944
-                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
947
+                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, outdir=outdir) #,
945 948
 #                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
946 949
 
947 950
     if(is.lds) {
... ...
@@ -1042,7 +1045,8 @@ construct.Illumina = function(sampleSheet=NULL,
1042 1045
 			  red="Red.idat"),
1043 1046
 		      	  cdfName,
1044 1047
 		      	  copynumber=TRUE,
1045
-		      	  verbose=TRUE, batch, fns, saveDate=TRUE){
1048
+		      	  verbose=TRUE, batch, #fns,
1049
+                          saveDate=TRUE, outdir="."){
1046 1050
        if(!is.null(arrayNames)) {
1047 1051
                pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
1048 1052
        }
... ...
@@ -1083,7 +1087,7 @@ construct.Illumina = function(sampleSheet=NULL,
1083 1087
 		stopifnot(length(batch) == narrays)
1084 1088
        }
1085 1089
        if(missing(batch)) {
1086
-                batch = as.factor(rep(1, narrays))
1090
+                stop("Must specify 'batch'") # batch = as.factor(rep(1, narrays))
1087 1091
        }
1088 1092
 
1089 1093
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
... ...
@@ -1105,12 +1109,13 @@ construct.Illumina = function(sampleSheet=NULL,
1105 1109
 
1106 1110
 	if(verbose) message("Initializing container for genotyping and copy number estimation")
1107 1111
 	featureData = getFeatureData.Affy(cdfName, copynumber=copynumber)
1108
-	if(!missing(fns)){
1109
-		index = match(fns, featureNames(featureData))
1110
-		if(all(is.na(index))) stop("fns not in featureNames")
1111
-		featureData = featureData[index, ]
1112
-	}
1112
+#	if(!missing(fns)){
1113
+#		index = match(fns, featureNames(featureData))
1114
+#		if(all(is.na(index))) stop("fns not in featureNames")
1115
+#		featureData = featureData[index, ]
1116
+#	}
1113 1117
 	nr = nrow(featureData); nc = narrays
1118
+        ldPath(outdir)
1114 1119
 	cnSet = new("CNSet",
1115 1120
 		     alleleA=initializeBigMatrix(name="A", nr, nc),
1116 1121
 		     alleleB=initializeBigMatrix(name="B", nr, nc),
... ...
@@ -1151,7 +1156,8 @@ genotype.Illumina = function(sampleSheet=NULL,
1151 1156
 		      	  cdfName,
1152 1157
 		      	  copynumber=TRUE,
1153 1158
                           batch,
1154
-                          fns,
1159
+                          outdir=".",
1160
+#                          fns,
1155 1161
                           saveDate=TRUE,
1156 1162
        			  stripNorm=TRUE,
1157 1163
 			  useTarget=TRUE,
... ...
@@ -1173,14 +1179,19 @@ genotype.Illumina = function(sampleSheet=NULL,
1173 1179
 	if(missing(cdfName)) stop("must specify cdfName")
1174 1180
 	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
1175 1181
         pkgname = getCrlmmAnnotationName(cdfName)
1182
+        if(missing(outdir))
1183
+          stop("Must specify a directory to store large data objects")
1176 1184
 	callSet = construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames,
1177 1185
 			     ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1178 1186
                              highDensity=highDensity, sep=sep, fileExt=fileExt,
1179
-			     cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch,
1180
-                             fns=fns, saveDate=saveDate)
1187
+			     cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, # fns=fns, 
1188
+                             saveDate=saveDate, outdir=outdir)
1181 1189
         if(missing(sns)) sns = sampleNames(callSet)
1182
-	open(callSet)
1183
-	is.snp = isSnp(callSet)
1190
+        
1191
+        open(A(callSet))
1192
+        open(B(callSet))
1193
+        # open(callSet)
1194
+ 	is.snp = isSnp(callSet)
1184 1195
 	snp.index = which(is.snp)
1185 1196
         narrays = ncol(callSet)
1186 1197
         if(is.lds) {
... ...
@@ -1195,7 +1206,7 @@ genotype.Illumina = function(sampleSheet=NULL,
1195 1206
                  sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose, mixtureSampleSize=mixtureSampleSize,
1196 1207
                  fitMixture=fitMixture, eps=eps, seed=seed, cdfName=cdfName, sns=sns, stripNorm=stripNorm,
1197 1208
                  useTarget=useTarget, A=A(callSet), B=B(callSet), SKW=SKW, SNR=SNR,
1198
-                 mixtureParams=mixtureParams, is.snp=is.snp, neededPkgs=c("crlmm", pkgname))
1209
+                 mixtureParams=mixtureParams, is.snp=is.snp, outdir=outdir, neededPkgs=c("crlmm", pkgname))
1199 1210
 
1200 1211
           open(SKW)
1201 1212
           open(SNR)
... ...
@@ -1214,7 +1225,7 @@ genotype.Illumina = function(sampleSheet=NULL,
1214 1225
           rm(RG); gc()
1215 1226
 
1216 1227
           res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1217
-                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget)
1228
+                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, outdir=outdir)
1218 1229
           rm(XY); gc()
1219 1230
           if(verbose) message("Finished preprocessing.")
1220 1231
           np.index = which(!is.snp)
... ...
@@ -1317,7 +1328,7 @@ processIDAT =  function(sel, sampleSheet=NULL,
1317 1328
 			  sns,
1318 1329
 			  stripNorm=TRUE,
1319 1330
 			  useTarget=TRUE,
1320
-                          A, B, SKW, SNR, mixtureParams, is.snp) {
1331
+                          A, B, SKW, SNR, mixtureParams, is.snp, outdir=".") {
1321 1332
 
1322 1333
         if(length(path)>= length(sel)) path = path[sel]
1323 1334
         RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel],
... ...
@@ -1331,7 +1342,7 @@ processIDAT =  function(sel, sampleSheet=NULL,
1331 1342
         if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
1332 1343
 
1333 1344
         res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1334
-                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget)
1345
+                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, outdir=outdir)
1335 1346
         #                       save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1336 1347
         open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
1337 1348
         delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero); rm(XY)
... ...
@@ -10,11 +10,11 @@
10 10
 crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
11 11
       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
12 12
       highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
13
-      saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE,
14
-      probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
15
-      seed=1, mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
16
-      cdfName, sns, recallMin=10, recallRegMin=1000,
17
-      returnParams=FALSE, badSNP=.7)
13
+      saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, outdir=".", 
14
+      row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), 
15
+      DF=6, SNRMin=5, gender=NULL, seed=1, mixtureSampleSize=10^5, 
16
+      eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, 
17
+      recallRegMin=1000, returnParams=FALSE, badSNP=.7)
18 18
 }
19 19
 
20 20
 \arguments{
... ...
@@ -47,6 +47,8 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
47 47
   \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
48 48
   \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}).
49 49
     Should the reference HapMap intensities be used in strip-level normalization?}
50
+  \item{outdir}{character string specifying the location to store large data objects 
51
+    (used when \code{ff} package is loaded)}
50 52
   \item{row.names}{'logical'. Use rownames - SNP names?}
51 53
   \item{col.names}{'logical'. Use colnames - Sample names?}
52 54
   \item{probs}{'numeric' vector with priors for AA, AB and BB.}