Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@49010 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 25/08/2010 02:32:48
Showing5 changed files

... ...
@@ -541,4 +541,9 @@ function (which expects ff objects and supports parallel processing)
541 541
 ** added omni express 12 as one of the chip options for Illumina.  Also added a check for copy number probes (this chip doesn't have any).
542 542
 
543 543
 2010-07-30 R. Scharpf 1.7.8
544
-** edited construct function for CNSetLM
545 544
\ No newline at end of file
545
+** edited construct function for CNSetLM
546
+
547
+2010-08-25 M. Ritchie 1.7.12
548
+** Renamed functions readIdatFiles2() -> readIdatFiles(), RGtoXY2() -> RGtoXY(), preprocessInfinium2v2() -> preprocessInfinium2(), crlmmIllumina2() -> crlmmIllumina().  These make use of ff objects to store data.
549
+** Exported crlmmIlluminaV2(), which combines reading in of idats and genotyping in one.  Also added a man page for this function
550
+** tidied up crlmm-illumina.R, removing commented out code.
546 551
\ No newline at end of file
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.7.11
5
-Date: 2010-07-30
4
+Version: 1.7.12
5
+Date: 2010-08-25
6 6
 Author: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -63,11 +63,10 @@ exportMethods(lines)
63 63
 exportMethods(CA, CB)
64 64
 export(crlmm,
65 65
        crlmmIllumina,
66
-       crlmmIllumina2,
66
+       crlmmIlluminaV2,
67 67
        ellipseCenters,
68 68
        genotype,
69 69
        readIdatFiles,
70
-       readIdatFiles2,
71 70
        snprma,
72 71
        snprma2,
73 72
        crlmm2,
... ...
@@ -12,193 +12,12 @@ readIdatFiles <- function(sampleSheet=NULL,
12 12
 			  sep="_",
13 13
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
14 14
 			  saveDate=FALSE) {
15
-#       if(!is.null(arrayNames)) {
16
-#	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
17
-#	       if(!is.null(sampleSheet)) {
18
-#		       sampleSheet=NULL
19
-#		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
20
-#	       }
21
-#	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
22
-#       }
23
-       if(!is.null(arrayNames)) {
24
-               pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
25
-       }
26
-       if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
27
-	       if(is.null(arrayNames)){
28
-		       ##arrayNames=NULL
29
-		       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
30
-			       barcode = sampleSheet[,arrayInfoColNames$barcode]
31
-			       arrayNames=barcode
32
-		       }
33
-		       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {  
34
-			       position = sampleSheet[,arrayInfoColNames$position]
35
-			       if(is.null(arrayNames))
36
-				       arrayNames=position
37
-			       else
38
-				       arrayNames = paste(arrayNames, position, sep=sep)
39
-			       if(highDensity) {
40
-				       hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
41
-				       for(i in names(hdExt))
42
-					       arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
43
-			       }
44
-		       }
45
-	       }
46
-	       pd = new("AnnotatedDataFrame", data = sampleSheet)
47
-	       sampleNames(pd) <- basename(arrayNames)               
48
-       }
49
-       if(is.null(arrayNames)) {
50
-               arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
51
-               if(!is.null(sampleSheet)) {
52
-                      sampleSheet=NULL
53
-                      cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
54
-               }
55
-               pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
56
-       }
57
-       narrays = length(arrayNames)
58
-       grnfiles = paste(arrayNames, fileExt$green, sep=sep)
59
-       redfiles = paste(arrayNames, fileExt$red, sep=sep)
60
-       if(length(grnfiles)==0 || length(redfiles)==0)
61
-	       stop("Cannot find .idat files")
62
-       if(length(grnfiles)!=length(redfiles))
63
-	       stop("Cannot find matching .idat files")
64
-       if(path[1] != "."){
65
-	       grnidats = file.path(path, grnfiles)
66
-	       redidats = file.path(path, redfiles)
67
-       }  else {
68
-	       message("path arg not set.  Assuming files are in local directory")
69
-	       grnidats <- grnfiles
70
-	       redidats <- redfiles
71
-       }
72
-       if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
73
-       if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")       
74
-##       if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
75
-##	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
76
-##		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
77
-##       }
78
-       headerInfo = list(nProbes = rep(NA, narrays),
79
-                         Barcode = rep(NA, narrays),
80
-                         ChipType = rep(NA, narrays),
81
-                         Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
82
-                         Position = rep(NA, narrays)) # this may also vary a bit
83
-       dates = list(decode=rep(NA, narrays),
84
-                    scan=rep(NA, narrays))
85
-       ## read in the data
86
-       for(i in seq(along=arrayNames)) {
87
-	       cat("reading", arrayNames[i], "\t")
88
-	       idsG = idsR = G = R = NULL
89
-	       cat(paste(sep, fileExt$green, sep=""), "\t")
90
-	       G = readIDAT(grnidats[i])
91
-	       idsG = rownames(G$Quants)
92
-	       headerInfo$nProbes[i] = G$nSNPsRead
93
-	       headerInfo$Barcode[i] = G$Barcode
94
-	       headerInfo$ChipType[i] = G$ChipType
95
-	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
96
-	       headerInfo$Position[i] = G$Unknowns$MostlyA
97
-               if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
98
-                       ## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
99
-		       ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
100
-		       ## but differed by a few SNPs for some reason - most of the chip was the same though
101
-		       ##           stop("Chips are not of all of the same type - please check your data")
102
-		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
103
-		       next()
104
-	       }
105
-	       dates$decode[i] = G$RunInfo[1, 1]
106
-	       dates$scan[i] = G$RunInfo[2, 1]
107
-	       if(i==1) {
108
-		       if(is.null(ids) && !is.null(G)){
109
-			       ids = idsG
110
-		       }else stop("Could not find probe IDs")
111
-		       nprobes = length(ids)
112
-		       narrays = length(arrayNames)
113
-		       tmpmat = matrix(NA, nprobes, narrays)
114
-		       rownames(tmpmat) = ids
115
-		       if(!is.null(sampleSheet)){
116
-			       colnames(tmpmat) = sampleSheet$Sample_ID
117
-		       }else  colnames(tmpmat) = arrayNames
118
-		       RG <- new("NChannelSet",
119
-				 R=tmpmat,
120
-				 G=tmpmat,
121
-				 zero=tmpmat,
122
-#				 Rnb=tmpmat,
123
-#				 Gnb=tmpmat,
124
-#				 Rse=tmpmat,
125
-#				 Gse=tmpmat,
126
-				 annotation=headerInfo$Manifest[1],
127
-				 phenoData=pd,
128
-				 storage.mode="environment")
129
-		       rm(tmpmat)
130
-		       gc()
131
-	       }
132
-	       if(length(ids)==length(idsG)) {
133
-		       if(sum(ids==idsG)==nprobes) {
134
-			       RG@assayData$G[,i] = G$Quants[, "Mean"]
135
-				   zeroG = G$Quants[, "NBeads"]==0
136
-#			       RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
137
-#			       RG@assayData$Gse[,i] = G$Quants[, "SD"]
138
-		       }
139
-	       } else {
140
-		       indG = match(ids, idsG)
141
-		       RG@assayData$G[,i] = G$Quants[indG, "Mean"]
142
-			   zeroG = G$Quants[indG, "NBeads"]==0
143
-#		       RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
144
-#		       RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
145
-	       }
146
-	       rm(G)
147
-	       gc()
148
-         
149
-	       cat(paste(sep, fileExt$red, sep=""), "\n")
150
-	       R = readIDAT(redidats[i])
151
-	       idsR = rownames(R$Quants)
152
-         
153
-	       if(length(ids)==length(idsG)) {   
154
-		       if(sum(ids==idsR)==nprobes) {
155
-			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
156
-				   zeroR = R$Quants[ ,"NBeads"]==0
157
-#			       RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
158
-#			       RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
159
-		       }
160
-	       } else {
161
-		       indR = match(ids, idsR)
162
-		       RG@assayData$R[,i] = R$Quants[indR, "Mean"]
163
-			   zeroR = R$Quants[indR, "NBeads"]==0
164
-#		       RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
165
-#		       RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
166
-	       }
167
-		   RG@assayData$zero[,i] = zeroG | zeroR
168
-	       rm(R, zeroG, zeroR)
169
-	       gc()
170
-       }
171
-       if(saveDate) {
172
-	       protocolData(RG)[["ScanDate"]] = dates$scan
173
-       }
174
-       storageMode(RG) = "lockedEnvironment"
175
-       RG
176
-}
177
-
178
-
179
-readIdatFiles2 <- function(sampleSheet=NULL,
180
-			  arrayNames=NULL,
181
-			  ids=NULL,
182
-			  path=".",
183
-			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
184
-			  highDensity=FALSE,
185
-			  sep="_",
186
-			  fileExt=list(green="Grn.idat", red="Red.idat"),
187
-			  saveDate=FALSE) {
188
-#       if(!is.null(arrayNames)) {
189
-#	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
190
-#	       if(!is.null(sampleSheet)) {
191
-#		       sampleSheet=NULL
192
-#		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
193
-#	       }
194
-#	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
195
-#       }
15
+			  
196 16
        if(!is.null(arrayNames)) {
197 17
                pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
198 18
        }
199 19
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
200 20
 	       if(is.null(arrayNames)){
201
-		       ##arrayNames=NULL
202 21
 		       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
203 22
 			       barcode = sampleSheet[,arrayInfoColNames$barcode]
204 23
 			       arrayNames=barcode
... ...
@@ -245,10 +64,6 @@ readIdatFiles2 <- function(sampleSheet=NULL,
245 64
        }
246 65
        if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
247 66
        if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")       
248
-##       if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
249
-##	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
250
-##		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
251
-##       }
252 67
        headerInfo = list(nProbes = rep(NA, narrays),
253 68
                          Barcode = rep(NA, narrays),
254 69
                          ChipType = rep(NA, narrays),
... ...
@@ -268,11 +83,7 @@ readIdatFiles2 <- function(sampleSheet=NULL,
268 83
 	       headerInfo$ChipType[i] = G$ChipType
269 84
 	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
270 85
 	       headerInfo$Position[i] = G$Unknowns$MostlyA
271
-               if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
272
-                       ## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
273
-		       ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
274
-		       ## but differed by a few SNPs for some reason - most of the chip was the same though
275
-		       ##           stop("Chips are not of all of the same type - please check your data")
86
+           if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
276 87
 		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
277 88
 		       next()
278 89
 	       }
... ...
@@ -284,27 +95,13 @@ readIdatFiles2 <- function(sampleSheet=NULL,
284 95
 		       } else stop("Could not find probe IDs")
285 96
 		       nprobes = length(ids)
286 97
 		       narrays = length(arrayNames)
287
-#		        tmpmat = matrix(NA, nprobes, narrays)
288
-#		        rownames(tmpmat) = ids
289
-#		       fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
290 98
 		       RG <- new("NChannelSet",
291 99
 		                 R=initializeBigMatrix(name="R", nr=nprobes, nc=narrays, vmode="integer"),
292 100
 		                 G=initializeBigMatrix(name="G", nr=nprobes, nc=narrays, vmode="integer"),
293 101
 		                 zero=initializeBigMatrix(name="zero", nr=nprobes, nc=narrays, vmode="integer"),
294
-#		                 featureData=fD,
295
-#		                 annotation=cdfName)
296
-#				 R=tmpmat,
297
-#				 G=tmpmat,
298
-#				 zero=tmpmat,
299
-#				 Rnb=tmpmat,
300
-#				 Gnb=tmpmat,
301
-#				 Rse=tmpmat,
302
-#				 Gse=tmpmat,
303
-				 annotation=headerInfo$Manifest[1],
304
-				 phenoData=pd,
305
-				 storage.mode="environment")
306
-                       featureNames(RG) = ids
307
-#		       rm(tmpmat)
102
+				         annotation=headerInfo$Manifest[1],
103
+				         phenoData=pd, storage.mode="environment")
104
+			   featureNames(RG) = ids
308 105
 		       if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){
309 106
 		            sampleNames(RG) = sampleSheet$Sample_ID
310 107
 		       } else  sampleNames(RG) = arrayNames
... ...
@@ -314,15 +111,11 @@ readIdatFiles2 <- function(sampleSheet=NULL,
314 111
 		       if(sum(ids==idsG)==nprobes) {
315 112
 			       RG@assayData$G[,i] = G$Quants[, "Mean"]
316 113
 			       zeroG = G$Quants[, "NBeads"]==0
317
-#			       RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
318
-#			       RG@assayData$Gse[,i] = G$Quants[, "SD"]
319 114
 		       }
320 115
 	       } else {
321 116
 		       indG = match(ids, idsG)
322 117
 		       RG@assayData$G[,i] = G$Quants[indG, "Mean"]
323 118
 		       zeroG = G$Quants[indG, "NBeads"]==0
324
-#		       RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
325
-#		       RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
326 119
 	       }
327 120
 	       rm(G)
328 121
 	       gc()
... ...
@@ -334,16 +127,12 @@ readIdatFiles2 <- function(sampleSheet=NULL,
334 127
 	       if(length(ids)==length(idsG)) {   
335 128
 		       if(sum(ids==idsR)==nprobes) {
336 129
 			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
337
-		               zeroR = R$Quants[ ,"NBeads"]==0
338
-#			       RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
339
-#			       RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
130
+		           zeroR = R$Quants[ ,"NBeads"]==0
340 131
 		       }
341 132
 	       } else {
342 133
 		       indR = match(ids, idsR)
343 134
 		       RG@assayData$R[,i] = R$Quants[indR, "Mean"]
344 135
 		       zeroR = R$Quants[indR, "NBeads"]==0
345
-#		       RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
346
-#		       RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
347 136
 	       }
348 137
 	       RG@assayData$zero[,i] = zeroG | zeroR
349 138
 	       rm(R, zeroG, zeroR)
... ...
@@ -615,114 +404,8 @@ readBPM <- function(bpmFile){
615 404
   
616 405
 }
617 406
 
618
-RGtoXY = function(RG, chipType, verbose=TRUE) {
619
-  chipList = c("human1mv1c",             # 1M
620
-               "human370v1c",            # 370CNV
621
-               "human650v3a",            # 650Y
622
-               "human610quadv1b",        # 610 quad
623
-               "human660quadv1a",        # 660 quad
624
-               "human370quadv3c",        # 370CNV quad
625
-               "human550v3b",            # 550K
626
-               "human1mduov3b",          # 1M Duo
627
-               "humanomni1quadv1b",      # Omni1 quad
628
-               "humanomniexpress12v1b") # Omni express 12
629
-  if(missing(chipType)){
630
-	  chipType = match.arg(annotation(RG), chipList)
631
-  } else chipType = match.arg(chipType, chipList)
632
-  
633
-  pkgname <- getCrlmmAnnotationName(chipType)
634
-  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
635
-     suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
636
-     msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
637
-     message(strwrap(msg))
638
-     stop("Package ", pkgname, " could not be found.")
639
-     rm(suggCall, msg)
640
-  }
641
-  if(verbose) message("Loading chip annotation information.")
642
-    loader("address.rda", .crlmmPkgEnv, pkgname)
643
-#    data(annotation, package=pkgname, envir=.crlmmPkgEnv)
644
-  aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
645
-  bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
646
-  ids <- names(aids)
647
-  snpbase <- getVarInEnv("base")
648
-  
649
-  nsnps = length(aids)
650
-  narrays = ncol(RG)
651
-  
652
-#  aidcol = match("AddressA_ID", colnames(annot))
653
-#  if(is.na(aidcol))
654
-#    aidcol = match("Address", colnames(annot))
655
-#  bidcol = match("AddressB_ID", colnames(annot))
656
-#  if(is.na(bidcol)) 
657
-#    bidcol = match("Address2", colnames(annot))
658
-#  aids = annot[, aidcol]
659
-#  bids = annot[, bidcol]
660
-#  snpids = annot[,"Name"]
661
-#  snpbase = annot[,"SNP"] 
662
-  infI = !is.na(bids) & bids!=0  
663
-  aord = match(aids, featureNames(RG)) # NAs are possible here
664
-  bord = match(bids, featureNames(RG)) # and here
665
-#  argrg = aids[rrgg]
666
-#  brgrg = bids[rrgg]
667 407
 
668
-  tmpmat = matrix(as.integer(0), nsnps, narrays)
669
-  rownames(tmpmat) = ids
670
-  colnames(tmpmat) = sampleNames(RG)
671
-  XY = new("NChannelSet", X=tmpmat, Y=tmpmat, zero=tmpmat, # Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
672
-                 annotation=chipType, phenoData=RG@phenoData, protocolData=RG@protocolData, storage.mode="environment")
673
-  rm(tmpmat)
674
-  gc()
675
-  
676
-  # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
677
-  XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
678
-  XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
679
-  XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
680
-#  XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
681
-#  XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
682
-#  XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
683
-#  XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
684
-  gc()
685
-  
686
-  ## Warning - not 100% sure that the code below is correct - could be more complicated than this
687
-  
688
-  # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
689
-#  infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
690
-  
691
-#  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
692
-#  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
693
-#  Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
694
-#  Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
695
-#  Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
696
-#  Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
697
-  
698
-  # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
699
-#  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
700
-
701
-#  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
702
-#  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
703
-#  Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
704
-#  Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
705
-#  Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
706
-#  Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
707
-    
708
-  #  For now zero out Infinium I probes
709
-  XY@assayData$X[infI,] = 0
710
-  XY@assayData$Y[infI,] = 0
711
-  XY@assayData$zero[infI,] = 0  
712
-#  XY@assayData$Xnb[infI,] = 0
713
-#  XY@assayData$Ynb[infI,] = 0
714
-#  XY@assayData$Xse[infI,] = 0
715
-#  XY@assayData$Yse[infI,] = 0
716
-
717
-#  XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
718
-  gc()
719
-
720
-#  storageMode(XY) = "lockedEnvironment"
721
-  XY
722
-}
723
-
724
-
725
-RGtoXY2 = function(RG, chipType, verbose=TRUE) {
408
+RGtoXY = function(RG, chipType, verbose=TRUE) {
726 409
   chipList = c("human1mv1c",             # 1M
727 410
                "human370v1c",            # 370CNV
728 411
                "human650v3a",            # 650Y
... ...
@@ -747,7 +430,6 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
747 430
   }
748 431
   if(verbose) message("Loading chip annotation information.")
749 432
     loader("address.rda", .crlmmPkgEnv, pkgname)
750
-#    data(annotation, package=pkgname, envir=.crlmmPkgEnv)
751 433
   aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
752 434
   bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
753 435
   ids <- names(aids)
... ...
@@ -772,12 +454,11 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
772 454
 #  argrg = aids[rrgg]
773 455
 #  brgrg = bids[rrgg]
774 456
 
775
-#  fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
776 457
   XY <- new("NChannelSet",
777 458
 	     X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
778 459
 	     Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
779 460
 	     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
780
-	     annotation=chipType, phenoData=RG@phenoData, # featureData=fD
461
+	     annotation=chipType, phenoData=RG@phenoData, 
781 462
 	     protocolData=RG@protocolData, storage.mode="environment")
782 463
   featureNames(XY) = ids # featureNames(RG)
783 464
   gc()
... ...
@@ -790,10 +471,6 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
790 471
   XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
791 472
   XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
792 473
   XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
793
-#  XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
794
-#  XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
795
-#  XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
796
-#  XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
797 474
   gc()
798 475
   
799 476
   ## Warning - not 100% sure that the code below is correct - could be more complicated than this
... ...
@@ -803,31 +480,17 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
803 480
   
804 481
 #  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
805 482
 #  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
806
-#  Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
807
-#  Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
808
-#  Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
809
-#  Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
810 483
   
811 484
   # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
812 485
 #  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
813 486
 
814 487
 #  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
815 488
 #  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
816
-#  Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
817
-#  Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
818
-#  Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
819
-#  Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
820 489
     
821 490
   #  For now zero out Infinium I probes
822 491
   XY@assayData$X[infI,] = 0
823 492
   XY@assayData$Y[infI,] = 0
824 493
   XY@assayData$zero[infI,] = 0  
825
-#  XY@assayData$Xnb[infI,] = 0
826
-#  XY@assayData$Ynb[infI,] = 0
827
-#  XY@assayData$Xse[infI,] = 0
828
-#  XY@assayData$Yse[infI,] = 0
829
-
830
-#  XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
831 494
   gc()
832 495
 
833 496
 #  storageMode(XY) = "lockedEnvironment"
... ...
@@ -846,17 +509,12 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
846 509
   }
847 510
   if(verbose) message("Loading strip and reference normalization information.")
848 511
   loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
849
-#  data(preprocStuff, package=pkgname, envir=.crlmmPkgEnv)
850 512
 
851 513
   stripnum <- getVarInEnv("stripnum")
852 514
   
853 515
   if(useTarget)
854 516
     targetdist = getVarInEnv("reference")
855 517
   
856
-#  Xqws = Yqws = matrix(0, nrow(XY), ncol(XY))
857
-#  colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X
858
-#  rownames(Xqws) = rownames(Yqws) = featureNames(XY)
859
-
860 518
   if(verbose){
861 519
     message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
862 520
     if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3)
... ...
@@ -883,160 +541,10 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
883 541
   if(verbose)
884 542
     cat("\n")
885 543
   XY
886
-#  XYNorm = new("NChannelSet",
887
-#                X=Xqws+16,
888
-#                Y=Yqws+16,
889
-#                Xnb=exprs(channel(XY, "Xnb")),
890
-#                Ynb=exprs(channel(XY, "Ynb")),
891
-#                Xse=exprs(channel(XY, "Xse")),
892
-#                Yse=exprs(channel(XY, "Yse")),
893
-#                zero=exprs(channel(XY, "zero")),
894
-#                annotation=annotation(XY),
895
-#                phenoData=XY@phenoData)
896 544
 }
897 545
 
898 546
 
899 547
 preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
900
-				fitMixture=TRUE,
901
-				eps=0.1,
902
-				verbose=TRUE,
903
-				seed=1,
904
-				cdfName,
905
-				sns,
906
-				stripNorm=TRUE,
907
-				useTarget=TRUE,
908
-				save.it=FALSE,
909
-				snpFile,
910
-				cnFile) {
911
-  if(stripNorm)
912
-    XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
913
-
914
-## MR: the code below is mostly straight from snprma.R
915
-  if (missing(sns)) sns <- sampleNames(XY) #$X
916
-  if(missing(cdfName))
917
-    cdfName <- annotation(XY)
918
-##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
919
-  pkgname <- getCrlmmAnnotationName(cdfName)
920
-  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
921
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
922
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
923
-    message(strwrap(msg))
924
-    stop("Package ", pkgname, " could not be found.")
925
-    rm(suggCall, msg)
926
-  }
927
-  if(verbose) message("Loading snp annotation and mixture model parameters.")
928
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
929
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
930
-  loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
931
-  loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
932
-#  data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
933
-  autosomeIndex <- getVarInEnv("autosomeIndex")
934
-  #  pnsa <- getVarInEnv("pnsa")
935
-  #  pnsb <- getVarInEnv("pnsb")
936
-  #  fid <- getVarInEnv("fid")
937
-  #  reference <- getVarInEnv("reference")
938
-  #  aIndex <- getVarInEnv("aIndex")
939
-  #  bIndex <- getVarInEnv("bIndex")
940
-  SMEDIAN <- getVarInEnv("SMEDIAN")
941
-  theKnots <- getVarInEnv("theKnots")
942
-  gns <- getVarInEnv("gns")
943
-  
944
-  # separate out copy number probes
945
-  npIndex = getVarInEnv("npProbesFid")
946
-  nprobes = length(npIndex)
947
-  if(length(nprobes)>0) {
948
-    narrays = ncol(XY)
949
-    A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
950
-    B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
951
-
952
-    # new lines below - useful to keep track of zeroed out probes
953
-    zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) 
954
-
955
-    colnames(A) <- colnames(B) <- colnames(zero) <- sns
956
-    rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
957
-  
958
-    cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
959
-    if(save.it & !missing(cnFile)) {
960
-      t0 <- proc.time() 
961
-      save(cnAB, file=cnFile) 
962
-      t0 <- proc.time()-t0
963
-      if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
964
-    }
965
-    rm(cnAB, B, zero)
966
-  }
967
-
968
-  # next process snp probes
969
-  snpIndex = getVarInEnv("snpProbesFid")
970
-  nprobes <- length(snpIndex)
971
-  
972
-  ##We will read each cel file, summarize, and run EM one by one
973
-  ##We will save parameters of EM to use later
974
-  mixtureParams <- matrix(0, 4, narrays)
975
-  SNR <- vector("numeric", narrays)
976
-  SKW <- vector("numeric", narrays)
977
-
978
-  ## This is the sample for the fitting of splines
979
-  ## BC: I like better the idea of the user passing the seed,
980
-  ##     because this might intefere with other analyses
981
-  ##     (like what happened to GCRMA)
982
-  set.seed(seed)
983
-  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
984
-  idx2 <- sample(nprobes, 10^5)
985
-  
986
-  ##S will hold (A+B)/2 and M will hold A-B
987
-  ##NOTE: We actually dont need to save S. Only for pics etc...
988
-  ##f is the correction. we save to avoid recomputing
989
-  A <- matrix(as.integer(exprs(channel(XY, "X"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
990
-  B <- matrix(as.integer(exprs(channel(XY, "Y"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
991
-
992
-  # new lines below - useful to keep track of zeroed out SNPs
993
-  zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays)
994
-    
995
-  colnames(A) <- colnames(B) <- colnames(zero) <- sns
996
-  rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
997
-  
998
-  if(verbose){
999
-     message("Calibrating ", narrays, " arrays.")
1000
-     if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
1001
-  }
1002
-
1003
-
1004
-  for(i in 1:narrays){
1005
-    SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
1006
-    if(fitMixture){
1007
-      S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
1008
-      M <- log2(A[idx, i])-log2(B[idx, i])
1009
-
1010
-      ##we need to test the choice of eps.. it is not the max diff between funcs
1011
-      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
1012
-
1013
-      mixtureParams[, i] <- tmp[["coef"]]
1014
-      SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
1015
-    }
1016
-    if(verbose) {
1017
-       if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
1018
-       else cat(".")
1019
-    }
1020
-  }
1021
-  if (verbose) {
1022
-    if (getRversion() > '2.7.0') close(pb)
1023
-    else cat("\n")
1024
-  }
1025
-  if (!fitMixture) SNR <- mixtureParams <- NA
1026
-  ## gns comes from preprocStuff.rda
1027
-  res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
1028
-
1029
-  if(save.it & !missing(snpFile)) {
1030
-    t0 <- proc.time() 
1031
-    save(res, file=snpFile) 
1032
-    t0 <- proc.time()-t0
1033
-    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
1034
-  }
1035
-  return(res)
1036
-}
1037
-
1038
-
1039
-preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1040 548
 				fitMixture=TRUE,
1041 549
 				eps=0.1,
1042 550
 				verbose=TRUE,
... ...
@@ -1055,7 +563,6 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1055 563
   if (missing(sns)) sns <- sampleNames(XY) #$X
1056 564
   if(missing(cdfName))
1057 565
     cdfName <- annotation(XY)
1058
-##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
1059 566
   pkgname <- getCrlmmAnnotationName(cdfName)
1060 567
   if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
1061 568
     suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
... ...
@@ -1069,17 +576,9 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1069 576
   loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
1070 577
   loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
1071 578
   loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
1072
-#  data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
1073 579
   autosomeIndex <- getVarInEnv("autosomeIndex")
1074
-  #  pnsa <- getVarInEnv("pnsa")
1075
-  #  pnsb <- getVarInEnv("pnsb")
1076
-  #  fid <- getVarInEnv("fid")
1077
-  #  reference <- getVarInEnv("reference")
1078
-  #  aIndex <- getVarInEnv("aIndex")
1079
-  #  bIndex <- getVarInEnv("bIndex")
1080 580
   SMEDIAN <- getVarInEnv("SMEDIAN")
1081 581
   theKnots <- getVarInEnv("theKnots")
1082
-#  gns <- featureNames(XY) # getVarInEnv("gns") # needs to include np probes - gns is only for snps
1083 582
   narrays = ncol(XY)
1084 583
   
1085 584
   if(save.it & !missing(cnFile)) {
... ...
@@ -1115,9 +614,6 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1115 614
   mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1116 615
   SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
1117 616
   SKW <- initializeBigVector("crlmmSKW-", narrays, "double") 
1118
-#  mixtureParams <- matrix(0, 4, narrays)
1119
-#  SNR <- vector("numeric", narrays)
1120
-#  SKW <- vector("numeric", narrays)
1121 617
 
1122 618
   ## This is the sample for the fitting of splines
1123 619
   ## BC: I like better the idea of the user passing the seed,
... ...
@@ -1130,25 +626,6 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1130 626
   ##S will hold (A+B)/2 and M will hold A-B
1131 627
   ##NOTE: We actually dont need to save S. Only for pics etc...
1132 628
   ##f is the correction. we save to avoid recomputing
1133
-#  A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
1134
-#  B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
1135
-
1136
-  # new lines below - useful to keep track of zeroed out SNPs
1137
-#  zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays)
1138
-
1139
-#  if(!is.matrix(A)) {
1140
-#     A = A[,]
1141
-#     B = B[,]
1142
-#     zero = zero[,]
1143
-#  }
1144
-
1145
-#  if(!is.integer(A)) {
1146
-#     A = matrix(as.integer(A), nrow(A), ncol(A))
1147
-#     B = matrix(as.integer(B), nrow(B), ncol(B))
1148
-#  }  
1149
-    
1150
-#  colnames(A) <- colnames(B) <- colnames(zero) <- sns
1151
-#  rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
1152 629
 
1153 630
   A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer")
1154 631
   B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer")
... ...
@@ -1188,7 +665,7 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1188 665
   ## gns comes from preprocStuff.rda
1189 666
   res = list(A=A, B=B,
1190 667
              zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
1191
-             mixtureParams=mixtureParams, cdfName=cdfName) # , snpIndex=snpIndex, npIndex=npIndex)
668
+             mixtureParams=mixtureParams, cdfName=cdfName)
1192 669
 
1193 670
   if(save.it & !missing(snpFile)) {
1194 671
     t0 <- proc.time() 
... ...
@@ -1235,61 +712,6 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1235 712
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1236 713
                         seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
1237 714
                         save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1238
-  }else{
1239
-      if(verbose) message("Loading ", snpFile, ".")
1240
-        obj <- load(snpFile)
1241
-        if(verbose) message("Done.")
1242
-        if(!any(obj == "res"))
1243
-          stop("Object in ", snpFile, " seems to be invalid.")
1244
-  }
1245
-  if(row.names) row.names=res[["gns"]] else row.names=NULL
1246
-  if(col.names) col.names=res[["sns"]] else col.names=NULL
1247
-
1248
-  res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
1249
-                  res[["mixtureParams"]], res[["cdfName"]],
1250
-                  gender=gender, row.names=row.names,
1251
-                  col.names=col.names, recallMin=recallMin,
1252
-                  recallRegMin=1000, SNRMin=SNRMin,
1253
-                  returnParams=returnParams, badSNP=badSNP,
1254
-                  verbose=verbose)
1255
-  
1256
-  res2[["SNR"]] <- res[["SNR"]]
1257
-  res2[["SKW"]] <- res[["SKW"]]
1258
-  rm(res)
1259
-  gc()
1260
-  return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
1261
-}
1262
-
1263
-
1264
-crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1265
-                  row.names=TRUE, col.names=TRUE,
1266
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
1267
-                  seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile,
1268
-                  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
1269
-                  cdfName, sns, recallMin=10, recallRegMin=1000,
1270
-                  returnParams=FALSE, badSNP=.7) {
1271
-  if (save.it & (missing(snpFile) | missing(cnFile)))
1272
-    stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
1273
-  if (load.it & missing(snpFile))
1274
-    stop("'snpFile' is missing and you chose to load.it")
1275
-  if (!missing(snpFile))
1276
-    if (load.it & !file.exists(snpFile)){
1277
-      load.it <- FALSE
1278
-      message("File ", snpFile, " does not exist.")
1279
-      stop("Cannot load SNP data.")
1280
-  }
1281
-  if (!load.it){
1282
-    if(!missing(RG)) {
1283
-      if(missing(XY))
1284
-        XY = RGtoXY2(RG, chipType=cdfName)
1285
-      else
1286
-        stop("Both RG and XY specified - please use one or the other")
1287
-    }
1288
-    if (missing(sns)) sns <- sampleNames(XY) #$X
1289
-    
1290
-    res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1291
-                        seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
1292
-                      #  save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1293 715
 
1294 716
 #    fD = featureData(XY)
1295 717
 #    phenD = XY@phenoData
... ...
@@ -1378,10 +800,10 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1378 800
 #                          rgFile,
1379 801
 			  stripNorm=TRUE,
1380 802
 			  useTarget=TRUE,
1381
-          	          row.names=TRUE, 
803
+			  row.names=TRUE, 
1382 804
 			  col.names=TRUE,
1383 805
 			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
1384
-                          seed=1, save.ab=FALSE, snpFile, cnFile,
806
+                          seed=1, save.it=FALSE, snpFile, cnFile,
1385 807
                           mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
1386 808
                           cdfName, sns, recallMin=10, recallRegMin=1000,
1387 809
                           returnParams=FALSE, badSNP=.7) {
... ...
@@ -1392,8 +814,8 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1392 814
 			  
1393 815
 #  if (save.rg & missing(rgFile))
1394 816
 #    stop("'rgFile' is missing, and you chose save.rg")
1395
-  if (save.ab & (missing(snpFile) | missing(cnFile)))
1396
-    stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
817
+  if (save.it & (missing(snpFile) | missing(cnFile)))
818
+    stop("'snpFile' or 'cnFile' is missing and you chose save.it")
1397 819
 #  batches = NULL
1398 820
 #  if(!is.null(arrayNames))
1399 821
 #    batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples())
... ...
@@ -1407,20 +829,20 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1407 829
 #     RG = readIdatFiles(sampleSheet=sampleSheet[j,], arrayNames=arrayNames[j],
1408 830
 #                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1409 831
 #                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
1410
-     RG = readIdatFiles2(sampleSheet=sampleSheet, arrayNames=arrayNames,
832
+     RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
1411 833
                        ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1412 834
                        highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
1413 835
 
1414 836
 #  if(save.rg)
1415 837
 #	save(RG, file=rgFile)
1416 838
 
1417
-    XY = RGtoXY2(RG, chipType=cdfName)
839
+    XY = RGtoXY(RG, chipType=cdfName)
1418 840
     rm(RG); gc()
1419 841
     if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
1420 842
     } # else subsns = sns[j]
1421
-    res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
843
+    res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1422 844
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, #) # sns=subsns
1423
-                               save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
845
+                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1424 846
 #    fD = featureData(XY)
1425 847
 #    phenD = XY@phenoData
1426 848
 #    protD = XY@protocolData
... ...
@@ -1,6 +1,5 @@
1 1
 \name{crlmmIllumina}
2 2
 \alias{crlmmIllumina}
3
-\alias{crlmmIllumina2}
4 3
 \title{Genotype Illumina Infinium II BeadChip data with CRLMM}
5 4
 \description{
6 5
   Implementation of the CRLMM algorithm for
... ...
@@ -15,14 +14,6 @@ crlmmIllumina(RG, XY, stripNorm=TRUE,
15 14
       snpFile, cnFile, mixtureSampleSize=10^5,
16 15
       eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10,
17 16
       recallRegMin=1000, returnParams=FALSE, badSNP=0.7)
18
-
19
-crlmmIllumina2(RG, XY, stripNorm=TRUE, 
20
-      useTarget=TRUE, row.names=TRUE, col.names=TRUE,
21
-      probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5,
22
-      gender=NULL, seed=1, save.it=FALSE, load.it=FALSE,
23
-      snpFile, cnFile, mixtureSampleSize=10^5,
24
-      eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10,
25
-      recallRegMin=1000, returnParams=FALSE, badSNP=0.7)
26 17
 }
27 18
 
28 19
 \arguments{
... ...
@@ -54,7 +45,7 @@ crlmmIllumina2(RG, XY, stripNorm=TRUE,
54 45
   \item{cdfName}{'character' defining the chip annotation (manifest) to use
55 46
     ('human370v1c', human550v3b', 'human650v3a', 'human1mv1c',
56 47
     'human370quadv3c', 'human610quadv1b', 'human660quadv1a',
57
-    'human1mduov3b', 'humanomni1quadv1b')}
48
+    'human1mduov3b', 'humanomni1quadv1b', 'humanomniexpress12v1b')}
58 49
   \item{sns}{'character' vector with sample names to be used.}
59 50
   \item{recallMin}{'integer'. Minimum number of samples for recalibration.}
60 51
   \item{recallRegMin}{'integer'. Minimum number of SNP's for regression.}