Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45831 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 09/04/2010 07:02:35
Showing5 changed files

... ...
@@ -501,5 +501,11 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
501 501
 ** modified the storage of RG, XY and genotype call data to use the ff package and initializeBigMatrix() function from oligoClasses package.  The genotype call data now includes non-polymorphic probes (which have NA calls).  Functions which use ff storage are named readIdatFiles2(), RGtoXY2(), preprocessInfinium2v2(), crlmmIllumina2() and crlmmIlluminaV2().
502 502
 
503 503
 2010-04-08 B Carvalho committed version 1.5.46
504
-
505 504
 ** moved NEWS to inst/
505
+
506
+2010-04-09 M. Ritchie committed version 1.5.47
507
+** fixed bugs in code and removed copy number probes from output 
508
+(now saved separately as before - SnpSuperSet not used)
509
+** crlmmIllumina2() and crlmmIlluminaV2() now use the crlmmGT2() 
510
+function (which expects ff objects and supports parallel processing)
511
+** updated crlmmIllumina.pdf vignette
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.46
5
-Date: 2010-02-07
4
+Version: 1.5.47
5
+Date: 2010-04-09
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -663,7 +663,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
663 663
 #  argrg = aids[rrgg]
664 664
 #  brgrg = bids[rrgg]
665 665
 
666
-  tmpmat = matrix(0, nsnps, narrays)
666
+  tmpmat = matrix(as.integer(0), nsnps, narrays)
667 667
   rownames(tmpmat) = ids
668 668
   colnames(tmpmat) = sampleNames(RG)
669 669
   XY = new("NChannelSet", X=tmpmat, Y=tmpmat, zero=tmpmat, # Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
... ...
@@ -778,6 +778,10 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
778 778
 	     protocolData=RG@protocolData, storage.mode="environment")
779 779
   featureNames(XY) = ids # featureNames(RG)
780 780
   gc()
781
+  # Need to initialize - matrices filled with NAs to begin with
782
+  XY@assayData$X[1:nsnps,] = 0
783
+  XY@assayData$Y[1:nsnps,] = 0
784
+  XY@assayData$zero[1:nsnps,] = 0
781 785
   
782 786
   # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
783 787
   XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
... ...
@@ -866,8 +870,8 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
866 870
       tmp = normalize.quantiles.use.target(as.matrix(cbind(subX, subY)), targetdist[[s]])
867 871
     else
868 872
       tmp = normalize.quantiles(as.matrix(cbind(subX, subY)))
869
-    XY@assayData$X[sel,] = tmp[,1:(ncol(tmp)/2)]+16
870
-    XY@assayData$Y[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]+16
873
+    XY@assayData$X[sel,] = matrix(as.integer(tmp[,1:(ncol(tmp)/2)]+16), nrow(tmp), ncol(tmp)/2)
874
+    XY@assayData$Y[sel,] = matrix(as.integer(tmp[,(ncol(tmp)/2+1):ncol(tmp)]+16), nrow(tmp), ncol(tmp)/2)
871 875
 #    Xqws[sel,] = tmp[,1:(ncol(tmp)/2)]
872 876
 #    Yqws[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]
873 877
     rm(subX, subY, tmp, sel)
... ...
@@ -972,6 +976,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
972 976
   ##     (like what happened to GCRMA)
973 977
   set.seed(seed)
974 978
   idx <- sort(sample(autosomeIndex, mixtureSampleSize))
979
+  idx2 <- sample(nprobes, 10^5)
975 980
   
976 981
   ##S will hold (A+B)/2 and M will hold A-B
977 982
   ##NOTE: We actually dont need to save S. Only for pics etc...
... ...
@@ -990,7 +995,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
990 995
      if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
991 996
   }
992 997
 
993
-  idx2 <- sample(nprobes, 10^5)
998
+
994 999
   for(i in 1:narrays){
995 1000
     SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
996 1001
     if(fitMixture){
... ...
@@ -1034,10 +1039,10 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1034 1039
 				cdfName,
1035 1040
 				sns,
1036 1041
 				stripNorm=TRUE,
1037
-				useTarget=TRUE) { #,
1038
-#				save.it=FALSE,
1039
-#				snpFile,
1040
-#				cnFile) {
1042
+				useTarget=TRUE, # ) { #,
1043
+				save.it=FALSE,
1044
+				snpFile,
1045
+				cnFile) {
1041 1046
   if(stripNorm)
1042 1047
     XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
1043 1048
 
... ...
@@ -1069,29 +1074,30 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1069 1074
   #  bIndex <- getVarInEnv("bIndex")
1070 1075
   SMEDIAN <- getVarInEnv("SMEDIAN")
1071 1076
   theKnots <- getVarInEnv("theKnots")
1072
-  gns <- featureNames(XY) # getVarInEnv("gns") # needs to include np probes - gns is only for snps
1073
-  
1074
-  # separate out copy number probes
1075
-  npIndex = getVarInEnv("npProbesFid")
1076
-#  nprobes = length(npIndex)
1077
+#  gns <- featureNames(XY) # getVarInEnv("gns") # needs to include np probes - gns is only for snps
1077 1078
   narrays = ncol(XY)
1078
-#  A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
1079
-#  B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
1079
+  
1080
+  if(save.it & !missing(cnFile)) {
1081
+    # separate out copy number probes
1082
+    npIndex = getVarInEnv("npProbesFid")
1083
+    nprobes = length(npIndex)
1084
+    A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
1085
+    B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
1080 1086
 
1081
-  # new lines below - useful to keep track of zeroed out probes
1082
-#  zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) 
1087
+    # new lines below - useful to keep track of zeroed out probes
1088
+    zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) 
1083 1089
 
1084
-#  colnames(A) <- colnames(B) <- colnames(zero) <- sns
1085
-#  rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
1090
+    colnames(A) <- colnames(B) <- colnames(zero) <- sns
1091
+    rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
1086 1092
   
1087
-#  cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
1088
-#  if(save.it & !missing(cnFile)) {
1089
-#    t0 <- proc.time() 
1090
-#    save(cnAB, file=cnFile) 
1091
-#    t0 <- proc.time()-t0
1092
-#    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
1093
-#  }
1094
-#  rm(cnAB, B, zero)
1093
+    cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
1094
+    
1095
+    t0 <- proc.time() 
1096
+    save(cnAB, file=cnFile) 
1097
+    t0 <- proc.time()-t0
1098
+    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
1099
+     rm(cnAB, B, zero)
1100
+  }
1095 1101
   
1096 1102
   # next process snp probes
1097 1103
   snpIndex = getVarInEnv("snpProbesFid")
... ...
@@ -1099,12 +1105,12 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1099 1105
   
1100 1106
   ##We will read each cel file, summarize, and run EM one by one
1101 1107
   ##We will save parameters of EM to use later
1102
-#  mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(narrays), "double")
1103
-#  SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
1104
-#  SKW <- initializeBigVector("crlmmSKW-", narrays, "double") 
1105
-  mixtureParams <- matrix(0, 4, narrays)
1106
-  SNR <- vector("numeric", narrays)
1107
-  SKW <- vector("numeric", narrays)
1108
+  mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1109
+  SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
1110
+  SKW <- initializeBigVector("crlmmSKW-", narrays, "double") 
1111
+#  mixtureParams <- matrix(0, 4, narrays)
1112
+#  SNR <- vector("numeric", narrays)
1113
+#  SKW <- vector("numeric", narrays)
1108 1114
 
1109 1115
   ## This is the sample for the fitting of splines
1110 1116
   ## BC: I like better the idea of the user passing the seed,
... ...
@@ -1112,15 +1118,16 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1112 1118
   ##     (like what happened to GCRMA)
1113 1119
   set.seed(seed)
1114 1120
   idx <- sort(sample(autosomeIndex, mixtureSampleSize))
1121
+  idx2 <- sample(nprobes, 10^5)
1115 1122
   
1116 1123
   ##S will hold (A+B)/2 and M will hold A-B
1117 1124
   ##NOTE: We actually dont need to save S. Only for pics etc...
1118 1125
   ##f is the correction. we save to avoid recomputing
1119
-  A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
1120
-  B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
1126
+#  A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
1127
+#  B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
1121 1128
 
1122 1129
   # new lines below - useful to keep track of zeroed out SNPs
1123
-  zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays)
1130
+#  zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays)
1124 1131
 
1125 1132
 #  if(!is.matrix(A)) {
1126 1133
 #     A = A[,]
... ...
@@ -1128,25 +1135,32 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1128 1135
 #     zero = zero[,]
1129 1136
 #  }
1130 1137
 
1131
-  if(!is.integer(A)) {
1132
-     A = matrix(as.integer(A), nrow(A), ncol(A))
1133
-     B = matrix(as.integer(B), nrow(B), ncol(B))
1134
-  }  
1138
+#  if(!is.integer(A)) {
1139
+#     A = matrix(as.integer(A), nrow(A), ncol(A))
1140
+#     B = matrix(as.integer(B), nrow(B), ncol(B))
1141
+#  }  
1135 1142
     
1136 1143
 #  colnames(A) <- colnames(B) <- colnames(zero) <- sns
1137 1144
 #  rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
1145
+
1146
+  A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer")
1147
+  B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer")
1148
+  zero <- initializeBigMatrix("crlmmZero-", nprobes, narrays, "integer")
1138 1149
   
1139 1150
   if(verbose){
1140 1151
      message("Calibrating ", narrays, " arrays.")
1141 1152
      if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
1142 1153
   }
1143 1154
 
1144
-  idx2 <- sample(nprobes, 10^5)
1145 1155
   for(i in 1:narrays){
1146
-    SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3)
1156
+     A[,i] = exprs(channel(XY, "X"))[snpIndex,i]
1157
+     B[,i] = exprs(channel(XY, "Y"))[snpIndex,i]
1158
+     zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i] 
1159
+#    SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3)
1160
+     SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
1147 1161
     if(fitMixture){
1148
-      S <- (log2(A[snpIndex,i][idx])+log2(B[snpIndex,i][idx]))/2 - SMEDIAN
1149
-      M <- log2(A[snpIndex,i][idx])-log2(B[snpIndex,i][idx])
1162
+      S <- (log2(A[idx,i])+log2(B[idx,i]))/2 - SMEDIAN
1163
+      M <- log2(A[idx,i])-log2(B[idx,i])
1150 1164
 
1151 1165
       ##we need to test the choice of eps.. it is not the max diff between funcs
1152 1166
       tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
... ...
@@ -1165,14 +1179,22 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1165 1179
   }
1166 1180
   if (!fitMixture) SNR <- mixtureParams <- NA
1167 1181
   ## gns comes from preprocStuff.rda
1168
-  res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName, snpIndex=snpIndex, npIndex=npIndex)
1182
+  res = list(A=A, B=B,
1183
+             zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
1184
+             mixtureParams=mixtureParams, cdfName=cdfName) # , snpIndex=snpIndex, npIndex=npIndex)
1169 1185
 
1170
-#  if(save.it & !missing(snpFile)) {
1171
-#    t0 <- proc.time() 
1172
-#    save(res, file=snpFile) 
1173
-#    t0 <- proc.time()-t0
1174
-#    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
1175
-#  }
1186
+  if(save.it & !missing(snpFile)) {
1187
+    t0 <- proc.time() 
1188
+    save(res, file=snpFile) 
1189
+    t0 <- proc.time()-t0
1190
+    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
1191
+  }
1192
+  close(A)
1193
+  close(B)
1194
+  close(zero)
1195
+  close(SKW)
1196
+  close(mixtureParams)
1197
+  close(SNR)
1176 1198
   return(res)
1177 1199
 }
1178 1200
 
... ...
@@ -1213,8 +1235,8 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1213 1235
         if(!any(obj == "res"))
1214 1236
           stop("Object in ", snpFile, " seems to be invalid.")
1215 1237
   }
1216
-  if(row.names) row.names=res$gns else row.names=NULL
1217
-  if(col.names) col.names=res$sns else col.names=NULL
1238
+  if(row.names) row.names=res[["gns"]] else row.names=NULL
1239
+  if(col.names) col.names=res[["sns"]] else col.names=NULL
1218 1240
 
1219 1241
   res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
1220 1242
                   res[["mixtureParams"]], res[["cdfName"]],
... ...
@@ -1235,21 +1257,21 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1235 1257
 crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1236 1258
                   row.names=TRUE, col.names=TRUE,
1237 1259
                   probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
1238
-                  seed=1, # save.it=FALSE, load.it=FALSE, snpFile, cnFile,
1260
+                  seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile,
1239 1261
                   mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
1240 1262
                   cdfName, sns, recallMin=10, recallRegMin=1000,
1241 1263
                   returnParams=FALSE, badSNP=.7) {
1242
-#  if (save.it & (missing(snpFile) | missing(cnFile)))
1243
-#    stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
1244
-#  if (load.it & missing(snpFile))
1245
-#    stop("'snpFile' is missing and you chose to load.it")
1246
-#  if (!missing(snpFile))
1247
-#    if (load.it & !file.exists(snpFile)){
1248
-#      load.it <- FALSE
1249
-#      message("File ", snpFile, " does not exist.")
1250
-#      stop("Cannot load SNP data.")
1251
-#  }
1252
-#  if (!load.it){
1264
+  if (save.it & (missing(snpFile) | missing(cnFile)))
1265
+    stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
1266
+  if (load.it & missing(snpFile))
1267
+    stop("'snpFile' is missing and you chose to load.it")
1268
+  if (!missing(snpFile))
1269
+    if (load.it & !file.exists(snpFile)){
1270
+      load.it <- FALSE
1271
+      message("File ", snpFile, " does not exist.")
1272
+      stop("Cannot load SNP data.")
1273
+  }
1274
+  if (!load.it){
1253 1275
     if(!missing(RG)) {
1254 1276
       if(missing(XY))
1255 1277
         XY = RGtoXY2(RG, chipType=cdfName)
... ...
@@ -1262,48 +1284,51 @@ crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1262 1284
                         seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
1263 1285
                       #  save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1264 1286
 
1265
-    fD = featureData(XY)
1266
-    phenD = XY@phenoData
1267
-    protD = XY@protocolData
1268
-    rm(XY)
1269
-    gc()
1270
-    if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
1271
-    callSet <- new("SnpSuperSet",
1272
-                    alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
1273
-                    alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
1274
-                    call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
1275
-                    callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
1276
-  	            annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD) 
1277
-    sampleNames(callSet) <- sns
1278
-    featureNames(callSet) <- res[["gns"]]
1279
-    pData(callSet)$SKW <- rep(NA, length(sns))
1280
-    pData(callSet)$SNR <- rep(NA, length(sns))
1281
-    pData(callSet)$gender <- rep(NA, length(sns))
1287
+#    fD = featureData(XY)
1288
+#    phenD = XY@phenoData
1289
+#    protD = XY@protocolData
1290
+#    rm(XY)
1291
+#    gc()
1292
+#    if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
1293
+#    callSet <- new("SnpSuperSet",
1294
+#                    alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
1295
+#                    alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
1296
+#                    call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
1297
+#                    callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
1298
+#  	            annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD) 
1299
+#    sampleNames(callSet) <- sns
1300
+#    featureNames(callSet) <- res[["gns"]]
1301
+#    pData(callSet)$SKW <- rep(NA, length(sns))
1302
+#    pData(callSet)$SNR <- rep(NA, length(sns))
1303
+#    pData(callSet)$gender <- rep(NA, length(sns))
1282 1304
 					  
1283
-#  }else{
1284
-#      if(verbose) message("Loading ", snpFile, ".")
1285
-#        obj <- load(snpFile)
1286
-#        if(verbose) message("Done.")
1287
-#        if(!any(obj == "res"))
1288
-#          stop("Object in ", snpFile, " seems to be invalid.")
1289
-#  }
1305
+  }else{
1306
+      if(verbose) message("Loading ", snpFile, ".")
1307
+        obj <- load(snpFile)
1308
+        if(verbose) message("Done.")
1309
+        if(!any(obj == "res"))
1310
+          stop("Object in ", snpFile, " seems to be invalid.")
1311
+  }
1290 1312
 
1291
-    rm(phenD, protD , fD)
1313
+ #   rm(phenD, protD , fD)
1292 1314
 	
1293
-    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))                
1294
-    suppressWarnings(A(callSet) <- res[["A"]])
1295
-    suppressWarnings(B(callSet) <- res[["B"]])
1296
-    pData(callSet)$SKW <- res$SKW
1297
-    pData(callSet)$SNR <- res$SNR
1298
-    mixtureParams <- res$mixtureParams
1299
-    rm(res); gc()
1300
-    tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
1301
-                  B=as.matrix(B(callSet)[snp.index,]),  # j]),
1302
-                  SNR=callSet$SNR, # [j],
1303
-                  mixtureParams=mixtureParams,
1304
-                  cdfName=annotation(callSet),
1305
-#                  row.names=featureNames(callSet)[snp.index],
1306
-#                  col.names=sampleNames(callSet), #[j],
1315
+#    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))                
1316
+#    suppressWarnings(A(callSet) <- res[["A"]])
1317
+#    suppressWarnings(B(callSet) <- res[["B"]])
1318
+#    pData(callSet)$SKW <- res$SKW
1319
+#    pData(callSet)$SNR <- res$SNR
1320
+#    mixtureParams <- res$mixtureParams
1321
+#    rm(res); gc()
1322
+  if(row.names) row.names=res$gns else row.names=NULL
1323
+  if(col.names) col.names=res$sns else col.names=NULL
1324
+  
1325
+  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
1326
+                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
1327
+                  SNR=res[["SNR"]], # callSet$SNR, # [j],
1328
+                  mixtureParams=res[["mixtureParams"]], #,
1329
+                  cdfName=res[["cdfName"]], # annotation(callSet),
1330
+                  row.names=row.names, # featureNames(callSet)[snp.index],
1331
+                  col.names=col.names, # sampleNames(callSet), #[j],
1307 1332
                   probs=probs,
1308 1333
                   DF=DF,
1309 1334
                   SNRMin=SNRMin,
... ...
@@ -1313,14 +1338,20 @@ crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1313 1338
                   verbose=verbose,
1314 1339
                   returnParams=returnParams,
1315 1340
                   badSNP=badSNP)
1341
+#    rm(res); gc()  
1316 1342
 #    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
1317 1343
 #    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
1318 1344
 #    callSet$gender[j] <- tmp$gender
1319
-    suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
1320
-    suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
1321
-    callSet$gender <- tmp$gender
1322
-    rm(tmp); gc()
1323
-    return(callSet)
1345
+#    suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
1346
+#    suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
1347
+#    callSet$gender <- tmp$gender
1348
+#    rm(tmp); gc()
1349
+#    return(callSet)
1350
+
1351
+  res2[["SNR"]] <- res[["SNR"]]
1352
+  res2[["SKW"]] <- res[["SKW"]]
1353
+  rm(res); gc()
1354
+  return(list2SnpSet(res2, returnParams=returnParams))
1324 1355
 }
1325 1356
 
1326 1357
 
... ...
@@ -1343,7 +1374,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1343 1374
           	          row.names=TRUE, 
1344 1375
 			  col.names=TRUE,
1345 1376
 			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
1346
-                          seed=1, # save.ab=FALSE, snpFile, cnFile,
1377
+                          seed=1, save.ab=FALSE, snpFile, cnFile,
1347 1378
                           mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
1348 1379
                           cdfName, sns, recallMin=10, recallRegMin=1000,
1349 1380
                           returnParams=FALSE, badSNP=.7) {
... ...
@@ -1354,8 +1385,8 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1354 1385
 			  
1355 1386
 #  if (save.rg & missing(rgFile))
1356 1387
 #    stop("'rgFile' is missing, and you chose save.rg")
1357
-#  if (save.ab & (missing(snpFile) | missing(cnFile)))
1358
-#    stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
1388
+  if (save.ab & (missing(snpFile) | missing(cnFile)))
1389
+    stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
1359 1390
 #  batches = NULL
1360 1391
 #  if(!is.null(arrayNames))
1361 1392
 #    batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples())
... ...
@@ -1377,27 +1408,25 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1377 1408
 #	save(RG, file=rgFile)
1378 1409
 
1379 1410
     XY = RGtoXY2(RG, chipType=cdfName)
1380
-    rm(RG)
1381
-    gc()
1411
+    rm(RG); gc()
1382 1412
     if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
1383
-    } else subsns = sns[j]
1413
+    } # else subsns = sns[j]
1384 1414
     res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1385
-                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) # sns=subsns
1386
-#                          save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
1387
-    fD = featureData(XY)
1388
-    phenD = XY@phenoData
1389
-    protD = XY@protocolData
1390
-    rm(XY)
1391
-    gc()
1415
+                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, #) # sns=subsns
1416
+                               save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
1417
+#    fD = featureData(XY)
1418
+#    phenD = XY@phenoData
1419
+#    protD = XY@protocolData
1420
+    rm(XY); gc()
1392 1421
 #    if(k == 1){
1393
-    if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
1394
-    callSet <- new("SnpSuperSet",
1395
-                    alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
1396
-                    alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
1397
-                    call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
1398
-                    callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
1399
- 		    annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
1400
-    sampleNames(callSet) <- sns
1422
+#    if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
1423
+#    callSet <- new("SnpSuperSet",
1424
+#                    alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
1425
+#                    alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
1426
+#                    call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
1427
+#                    callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
1428
+# 		    annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
1429
+#    sampleNames(callSet) <- sns
1401 1430
 #            phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
1402 1431
 #                                    arrayNames=sns,
1403 1432
 #								    arrayInfoColNames=arrayInfoColNames)
... ...
@@ -1405,17 +1434,17 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1405 1434
 #            colnames(pD) <- "ScanDate"
1406 1435
 #            protocolData(callSet) <- pData(protD) # new("AnnotatedDataFrame", data=pD)
1407 1436
 #            pData(protocolData(callSet))[j, ] <- pData(protocolData)
1408
-    featureNames(callSet) <- res[["gns"]]
1409
-    pData(callSet)$SKW <- rep(NA, length(sns))
1410
-    pData(callSet)$SNR <- rep(NA, length(sns))
1411
-    pData(callSet)$gender <- rep(NA, length(sns))			
1437
+#    featureNames(callSet) <- res[["gns"]]
1438
+#    pData(callSet)$SKW <- rep(NA, length(sns))
1439
+#    pData(callSet)$SNR <- rep(NA, length(sns))
1440
+#    pData(callSet)$gender <- rep(NA, length(sns))			
1412 1441
 #	}
1413 1442
 #	pData(callSet)[j,] <- phenD
1414 1443
 #	pData(protocolData(callSet))[j,] <- protD
1415 1444
 #	pData(callSet) <- phenD
1416 1445
 #	pData(protocolData(callSet)) <- protD
1417 1446
 
1418
-    rm(phenD, protD, fD)
1447
+#    rm(phenD, protD, fD)
1419 1448
 	
1420 1449
 #    if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
1421 1450
         ##RS: I don't understand why the IDATS for the
... ...
@@ -1424,24 +1453,26 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1424 1453
 #        res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ]
1425 1454
 #    }
1426 1455
 
1427
-    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))                
1456
+#    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))                
1428 1457
 #    suppressWarnings(A(callSet)[, j] <- res[["A"]])
1429 1458
 #    suppressWarnings(B(callSet)[, j] <- res[["B"]])
1430
-    suppressWarnings(A(callSet) <- res[["A"]])
1431
-    suppressWarnings(B(callSet) <- res[["B"]])
1459
+#    suppressWarnings(A(callSet) <- res[["A"]])
1460
+#    suppressWarnings(B(callSet) <- res[["B"]])
1432 1461
 #    pData(callSet)$SKW[j] <- res$SKW
1433 1462
 #    pData(callSet)$SNR[j] <- res$SNR
1434
-    pData(callSet)$SKW <- res$SKW
1435
-    pData(callSet)$SNR <- res$SNR
1436
-    mixtureParams <- res$mixtureParams
1437
-    rm(res); gc()
1438
-    tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
1439
-                  B=as.matrix(B(callSet)[snp.index,]),  # j]),
1440
-                  SNR=callSet$SNR, # [j],
1441
-                  mixtureParams=mixtureParams,
1442
-                  cdfName=annotation(callSet),
1443
-                  row.names=featureNames(callSet)[snp.index],
1444
-                  col.names=sampleNames(callSet), #[j],
1463
+#    pData(callSet)$SKW <- res$SKW
1464
+#    pData(callSet)$SNR <- res$SNR
1465
+#    mixtureParams <- res$mixtureParams
1466
+#    rm(res); gc()
1467
+  if(row.names) row.names=res$gns else row.names=NULL
1468
+  if(col.names) col.names=res$sns else col.names=NULL    
1469
+  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
1470
+                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
1471
+                  SNR=res[["SNR"]], # callSet$SNR, # [j],
1472
+                  mixtureParams=res[["mixtureParams"]], #,
1473
+                  cdfName=res[["cdfName"]], # annotation(callSet),
1474
+                  row.names=row.names, # featureNames(callSet)[snp.index],
1475
+                  col.names=col.names, # sampleNames(callSet), #[j],
1445 1476
                   probs=probs,
1446 1477
                   DF=DF,
1447 1478
                   SNRMin=SNRMin,
... ...
@@ -1451,14 +1482,44 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1451 1482
                   verbose=verbose,
1452 1483
                   returnParams=returnParams,
1453 1484
                   badSNP=badSNP)
1485
+#    rm(res); gc()  
1486
+#    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
1487
+#    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
1488
+#    callSet$gender[j] <- tmp$gender
1489
+#    suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
1490
+#    suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
1491
+#    callSet$gender <- tmp$gender
1492
+#    rm(tmp); gc()
1493
+#    return(callSet)
1494
+
1495
+  res2[["SNR"]] <- res[["SNR"]]
1496
+  res2[["SKW"]] <- res[["SKW"]]
1497
+  rm(res); gc()
1498
+  return(list2SnpSet(res2, returnParams=returnParams))
1499
+#    tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
1500
+#                  B=as.matrix(B(callSet)[snp.index,]),  # j]),
1501
+#                  SNR=callSet$SNR, # [j],
1502
+#                  mixtureParams=mixtureParams,
1503
+#                  cdfName=annotation(callSet),
1504
+#                  row.names=featureNames(callSet)[snp.index],
1505
+#                  col.names=sampleNames(callSet), #[j],
1506
+#                  probs=probs,
1507
+#                  DF=DF,
1508
+#                  SNRMin=SNRMin,
1509
+#                  recallMin=recallMin,
1510
+#                  recallRegMin=recallRegMin,
1511
+#                  gender=gender,
1512
+#                  verbose=verbose,
1513
+#                  returnParams=returnParams,
1514
+#                  badSNP=badSNP)
1454 1515
 #    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
1455 1516
 #    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
1456 1517
 #    callSet$gender[j] <- tmp$gender
1457
-    suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
1458
-    suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
1459
-    callSet$gender <- tmp$gender
1460
-    rm(tmp); gc()
1518
+#    suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
1519
+#    suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
1520
+#    callSet$gender <- tmp$gender
1521
+#    rm(tmp); gc()
1461 1522
 #    k <- k+1
1462 1523
 #  }
1463
-    return(callSet)
1524
+#    return(callSet)
1464 1525
 }
1465 1526
Binary files a/inst/doc/crlmmIllumina.pdf and b/inst/doc/crlmmIllumina.pdf differ
... ...
@@ -44,12 +44,15 @@ In this user guide we read in and genotype data from 40 HapMap samples
44 44
 which have been analyzed using Illumina's 370k Duo BeadChips.
45 45
 This data is available in the \Rpackage{hapmap370k} package.  
46 46
 Additional chip-specific model parameters and basic SNP annotation 
47
-information used by CRLMM is stored in the \Rpackage{human370v1c} package.
48
-These can be downloaded from \href{http://rafalab.jhsph.edu/software.html}{http://rafalab.jhsph.edu/software.html}
49
-and must be installed for the following code to work.
47
+information used by CRLMM is stored in the \Rpackage{human370v1cCrlmm} package.
48
+These packages can be installed in the usual way using the \Rfunction{biocLite} function.
50 49
 
51
-\section{Reading in data}
50
+<<<echo=TRUE, results=hide, eval=FALSE>>=
51
+source("http://www.bioconductor.org/biocLite.R")
52
+biocLite(c("hapmap370k", "human370v1cCrlmm"))
53
+@ 
52 54
 
55
+\section{Reading in data} 
53 56
 The function \Rfunction{readIdatFiles} extracts the Red and Green intensities 
54 57
 from the binary {\tt idat} files output by Illumina's scanning device.
55 58
 The file {\tt samples370k.csv} contains information about each sample.
... ...
@@ -58,7 +61,7 @@ The file {\tt samples370k.csv} contains information about each sample.
58 61
 options(width=50)
59 62
 @ 
60 63
 
61
-<<read>>=
64
+<<read, results=hide, eval=TRUE>>=
62 65
 library(Biobase)
63 66
 library(crlmm)
64 67
 library(hapmap370k)
... ...
@@ -112,8 +115,7 @@ mtext("Array", side=1, outer=TRUE)
112 115
 
113 116
 \section{Genotyping}
114 117
 
115
-Next we use the function \Rfunction{crlmmIllumina} which performs preprocessing followed by genotyping
116
-using the CRLMM algorithm.
118
+Next we use the function \Rfunction{crlmmIllumina} which performs preprocessing followed by genotyping using the CRLMM algorithm.
117 119
 
118 120
 <<genotype, results=hide, cache=TRUE>>=
119 121
 crlmmResult = crlmmIllumina(RG=RG, cdfName="human370v1c", sns=pData(RG)$ID, returnParams=TRUE)