Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38494 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 06/04/2009 05:19:27
Showing 6 changed files

... ...
@@ -42,7 +42,7 @@ BioC data packages
42 42
 
43 43
 2009-03-25 Rob Scharpf - committed version 1.0.60
44 44
 * simplified some of the preliminary steps for the computeCopynumber function
45
-** crlmm output subset within the body of the function
45
+* crlmm output subset within the body of the function
46 46
 * modified vignette -- store results in an oligoSnpSet object (for now)
47 47
 * added function to extract the date from the celfile headers (celDates)
48 48
 
... ...
@@ -78,7 +78,6 @@ BioC data packages
78 78
 * added functions to read in idat files, pre-process and genotype 
79 79
 data from Illumina SNP arrays (functions named readIdatFiles() 
80 80
 and crlmmIllumina() in crlmm-illumina.R)
81
-
82 81
 * added man pages for these functions
83 82
 
84 83
 2009-04-01 Matt Ritchie - committed version 1.0.67
... ...
@@ -91,8 +90,18 @@ and crlmmIllumina() in crlmm-illumina.R)
91 90
 
92 91
 2009-04-03 B Carvalho - committed version 1.0.70
93 92
 
94
-** Updated TODO and DESCRIPTION
93
+* Updated TODO and DESCRIPTION
95 94
 
96 95
 2009-04-04 R.Scharpf - committed version 1.0.71
97 96
 
98 97
 *  bug in oneBatch function for chromosome X.  added additional checks for missing values
98
+
99
+2009-04-06 Matt Ritchie - committed version 1.0.72
100
+
101
+* new argument in readIdatFiles() 'saveDate', which saves the date and time each array
102
+is decoded and scanned
103
+* results from crlmmIllumina() now saved as 'SnpSet' just like crlmm()
104
+* reorganised Illumina preprocessing functions slightly to
105
+  - allow copy number intensities to be saved along with snp intensities
106
+  - make use of new indexing objects in the chip specific crlmm data packages
107
+  - fix a few bugs
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for SNP 5.0 and 6.0 arrays.
4
-Version: 1.0.71
4
+Version: 1.0.72
5 5
 Date: 2008-12-30
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -5,7 +5,7 @@
5 5
 
6 6
 readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
7 7
                                 arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
8
-                                highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat")) {
8
+                                highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) {
9 9
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
10 10
          arrayNames=NULL
11 11
          if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
... ...
@@ -33,6 +33,7 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
33 33
            cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
34 34
          }
35 35
        }
36
+       narrays = length(arrayNames)
36 37
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
37 38
        redfiles = paste(arrayNames, fileExt$red, sep=sep)
38 39
        if(length(grnfiles)==0 || length(redfiles)==0)
... ...
@@ -45,11 +46,17 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
45 46
        grnidats = file.path(path, grnfiles)
46 47
        redidats = file.path(path, redfiles)
47 48
        
48
-       headerInfo = list(nProbes = rep(NA, length(arrayNames)),
49
-                         Barcode = rep(NA, length(arrayNames)),
50
-                         ChipType = rep(NA, length(arrayNames)),
51
-                         Manifest = rep(NA, length(arrayNames)), # not sure about this one - sometimes blank
52
-                         Position = rep(NA, length(arrayNames))) # this may also vary a bit
49
+       headerInfo = list(nProbes = rep(NA, narrays),
50
+                         Barcode = rep(NA, narrays),
51
+                         ChipType = rep(NA, narrays),
52
+                         Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
53
+                         Position = rep(NA, narrays)) # this may also vary a bit
54
+
55
+       dates = list(decode=rep(NA, narrays),
56
+                    scan=rep(NA, narrays),
57
+                    register=rep(NA, narrays),
58
+                    extract=rep(NA, narrays))
59
+
53 60
        # read in the data
54 61
        for(i in seq(along=arrayNames)) {
55 62
          cat("reading", arrayNames[i], "\t")
... ...
@@ -80,6 +87,12 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
80 87
                   # || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
81 88
                   # but differed by a few SNPs for some reason - most of the chip was the same though
82 89
            stop("Chips are not of all of the same type - please check your data")
90
+
91
+         dates$decode[i] = G$RunInfo[1, 1]
92
+         dates$scan[i] = G$RunInfo[2, 1]
93
+         dates$register[i] = G$RunInfo[3, 1]
94
+         dates$extract[i] = G$RunInfo[4, 1]
95
+
83 96
          if(i==1) {
84 97
            if(is.null(ids) && !is.null(G))
85 98
              ids = idsG
... ...
@@ -122,14 +135,22 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
122 135
            Rstderr[,i] = R$Quants[indR, "SD"]
123 136
          }
124 137
        }
125
-       if(is.null(sampleSheet))
126
-         pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
127
-       else
128
-         pd = new("AnnotatedDataFrame", data = sampleSheet) 
138
+       if(is.null(sampleSheet)) {
139
+         if(saveDate)
140
+           pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames, DecodeDate=dates$decode, ScanDate=dates$scan))
141
+         else
142
+           pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
143
+       }
144
+       else {
145
+         if(saveDate)
146
+           pd = new("AnnotatedDataFrame", data = cbind(sampleSheet, DecodeDate=dates$decode, ScanDate=dates$scan))
147
+         else
148
+           pd = new("AnnotatedDataFrame", data = sampleSheet)
149
+       }
129 150
        RG = new("NChannelSet",
130 151
                 R=Rintens, G=Gintens, Rnb=Rnobeads, Gnb=Gnobeads,
131 152
                 Rse=Rstderr, Gse=Gstderr, annotation=headerInfo$Manifest[1],
132
-                phenoData = pd)
153
+                phenoData=pd)
133 154
 }
134 155
 
135 156
 
... ...
@@ -390,7 +411,7 @@ readBPM <- function(bpmFile){
390 411
 }
391 412
 
392 413
 
393
-RGtoXY = function(RG, chipType, remove=TRUE, verbose=TRUE) {
414
+RGtoXY = function(RG, chipType, verbose=TRUE) {
394 415
   chipList = c("human1mv1c",             # 1M
395 416
                "human370v1c",            # 370CNV
396 417
                "human650v3a",            # 650Y
... ...
@@ -413,32 +434,36 @@ RGtoXY = function(RG, chipType, remove=TRUE, verbose=TRUE) {
413 434
      rm(suggCall, msg)
414 435
   }
415 436
   if(verbose) message("Loading chip annotation information.")
416
-    loader("annotation.rda", .crlmmPkgEnv, pkgname)
437
+    loader("address.rda", .crlmmPkgEnv, pkgname)
417 438
 #    data(annotation, package=pkgname, envir=.crlmmPkgEnv)
418
-  annot <- getVarInEnv("annot")
439
+  aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
440
+  bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
441
+  ids <- names(aids)
442
+  snpbase <- getVarInEnv("base")
419 443
   
420
-  if(remove)
421
-    annot = annot[annot[,"ToCall"]==1,]
422
-  nsnps = nrow(annot)
444
+  nsnps = length(aids)
423 445
   narrays = ncol(RG)
424
-  aidcol = match("AddressA_ID", colnames(annot))
425
-  if(is.na(aidcol))
426
-    aidcol = match("Address", colnames(annot))
427
-  bidcol = match("AddressB_ID", colnames(annot))
428
-  if(is.na(bidcol)) 
429
-    bidcol = match("Address2", colnames(annot))
430
-  aids = annot[, aidcol]
431
-  bids = annot[, bidcol]
432
-  snpids = annot[,"Name"]
433
-  snpbase = annot[,"SNP"] 
434
-  rrgg = !is.na(bids) & bids!=0  
446
+  
447
+#  aidcol = match("AddressA_ID", colnames(annot))
448
+#  if(is.na(aidcol))
449
+#    aidcol = match("Address", colnames(annot))
450
+#  bidcol = match("AddressB_ID", colnames(annot))
451
+#  if(is.na(bidcol)) 
452
+#    bidcol = match("Address2", colnames(annot))
453
+#  aids = annot[, aidcol]
454
+#  bids = annot[, bidcol]
455
+#  snpids = annot[,"Name"]
456
+#  snpbase = annot[,"SNP"] 
457
+  infI = !is.na(bids) & bids!=0  
435 458
   aord = match(aids, featureNames(RG)) # NAs are possible here
436
-  bord = match(bids[!rrgg], featureNames(RG)) # and here
459
+  bord = match(bids, featureNames(RG)) # and here
437 460
 #  argrg = aids[rrgg]
438 461
 #  brgrg = bids[rrgg]
439 462
   X = Y = Xnb = Ynb = Xse = Yse = zero = matrix(0, nsnps, narrays)
440
-  rownames(X) = rownames(Y) = rownames(Xnb) = rownames(Ynb) = rownames(Xse) = rownames(Yse) = snpids
463
+  rownames(X) = rownames(Y) = rownames(Xnb) = rownames(Ynb) = rownames(Xse) = rownames(Yse) = ids
441 464
   colnames(X) = colnames(Y) = colnames(Xnb) = colnames(Ynb) = colnames(Xse) = colnames(Yse) = sampleNames(RG)$G
465
+
466
+  # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
442 467
   X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
443 468
   Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
444 469
   Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
... ...
@@ -446,23 +471,38 @@ RGtoXY = function(RG, chipType, remove=TRUE, verbose=TRUE) {
446 471
   Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
447 472
   Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
448 473
   
449
-  # Important addition needed to get the correct intensities for the transitional Infinium I SNPs
450
-  # these SNPs are intergorated by 2 separate bead types (the second bead type is given in the 'AddressB_ID'
451
-  # (or 'Address2') column, and will have bid != 0 | !is.na(bid)
452
-  # for these it could be that the X signal is the R/G from the A bead and the Y signal the R/G from the B snp
453
-  # at present these are all zeroed out, so will presumably be called as hets
454
-  # this is not a problem for the training data, since we get the X and Y intensities directly from BeadStudio output
474
+  ## Warning - not 100% sure that the code below is correct - could be more complicated than this
475
+  
476
+  # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
477
+#  infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
478
+  
479
+#  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
480
+#  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
481
+#  Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
482
+#  Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
483
+#  Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
484
+#  Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
455 485
   
456
-  X[!is.na(bord),] = 0
457
-  Y[!is.na(bord),] = 0
458
-  Xnb[!is.na(bord),] = 0
459
-  Ynb[!is.na(bord),] = 0
460
-  Xse[!is.na(bord),] = 0
461
-  Yse[!is.na(bord),] = 0
486
+  # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
487
+#  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
488
+
489
+#  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
490
+#  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
491
+#  Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
492
+#  Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
493
+#  Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
494
+#  Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
495
+    
496
+  #  For now zero out Infinium I probes
497
+  X[infI,] = 0
498
+  Y[infI,] = 0
499
+  Xnb[infI,] = 0
500
+  Ynb[infI,] = 0
501
+  Xse[infI,] = 0
502
+  Yse[infI,] = 0
462 503
 
463 504
   zero[X==0 | Y==0] = 1
464 505
   
465
-  rm(annot)
466 506
   gc()
467 507
   
468 508
   XY = new("NChannelSet",
... ...
@@ -533,12 +573,12 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
533 573
 
534 574
 
535 575
 preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=0.1, verbose=TRUE, seed=1,
536
-                               cdfName, sns, stripNorm=TRUE, useTarget=TRUE) {
576
+                               cdfName, sns, stripNorm=TRUE, useTarget=TRUE, save.it=FALSE, intensityFile) {
537 577
   if(stripNorm)
538 578
     XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
539
-  
579
+
540 580
 ## MR: the code below is mostly straight from snprma.R
541
-  if (missing(sns)) sns <- sampleNames(XY)
581
+  if (missing(sns)) sns <- sampleNames(XY)$X
542 582
   if(missing(cdfName))
543 583
     cdfName <- annotation(XY)
544 584
 ##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
... ...
@@ -552,7 +592,9 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=
552 592
   }
553 593
   if(verbose) message("Loading snp annotation and mixture model parameters.")
554 594
   loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
555
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)  
595
+  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
596
+  loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
597
+  loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
556 598
 #  data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
557 599
   autosomeIndex <- getVarInEnv("autosomeIndex")
558 600
   #  pnsa <- getVarInEnv("pnsa")
... ...
@@ -564,9 +606,18 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=
564 606
   SMEDIAN <- getVarInEnv("SMEDIAN")
565 607
   theKnots <- getVarInEnv("theKnots")
566 608
   gns <- getVarInEnv("gns")
567
-
568
-  nprobes <- nrow(XY)
569
-  narrays <- ncol(XY)
609
+  
610
+  # separate out copy number probes
611
+  npIndex = getVarInEnv("npProbesFid")
612
+  nprobes = length(npIndex)
613
+  narrays = ncol(XY)
614
+  A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
615
+  B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
616
+  cnAB = list(A=A, B=B, sns=sns, gns=names(npIndex), cdfName=cdfName)
617
+  
618
+  # next process snp probes
619
+  snpIndex = getVarInEnv("snpProbesFid")
620
+  nprobes <- length(snpIndex)
570 621
   
571 622
   ##We will read each cel file, summarize, and run EM one by one
572 623
   ##We will save parameters of EM to use later
... ...
@@ -585,14 +636,14 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=
585 636
   ##S will hold (A+B)/2 and M will hold A-B
586 637
   ##NOTE: We actually dont need to save S. Only for pics etc...
587 638
   ##f is the correction. we save to avoid recomputing
588
-  A <- matrix(as.integer(exprs(channel(XY, "X"))), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
589
-  B <- matrix(as.integer(exprs(channel(XY, "Y"))), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
639
+  A <- matrix(as.integer(exprs(channel(XY, "X"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
640
+  B <- matrix(as.integer(exprs(channel(XY, "Y"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
590 641
 
591 642
   # new lines below - useful to keep track of zeroed out SNPs
592
-  zero <- matrix(as.integer(exprs(channel(XY, "zero"))), nprobes, narrays)
643
+  zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays)
593 644
     
594
-  colnames(A) <- colnames(B) <- colnames(zero) <- sampleNames(XY)$X
595
-  rownames(A) <- rownames(B) <- rownames(zero) <- featureNames(XY)
645
+  colnames(A) <- colnames(B) <- colnames(zero) <- sns
646
+  rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
596 647
   
597 648
   if(verbose){
598 649
      message("Calibrating ", narrays, " arrays.")
... ...
@@ -613,8 +664,8 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=
613 664
       SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
614 665
     }
615 666
     if(verbose) {
616
-      if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
617
-      else cat(".")
667
+       if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
668
+       else cat(".")
618 669
     }
619 670
   }
620 671
   if (verbose) {
... ...
@@ -623,11 +674,20 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=
623 674
   }
624 675
   if (!fitMixture) SNR <- mixtureParams <- NA
625 676
   ## gns comes from preprocStuff.rda
626
-  list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
677
+  res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
678
+
679
+  if(save.it & !missing(intensityFile)) {
680
+    t0 <- proc.time() 
681
+    save(cnAB, res, file=intensityFile)
682
+    t0 <- proc.time()-t0
683
+    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", intensityFile, ".")
684
+  }
685
+  return(res)
627 686
 }
628 687
 
629 688
 
630
-## MR: Could add arguments to allow this function to read in idat data as well, although this add a further 7 arguments, which might over-complicate things
689
+## MR: Could add arguments to allow this function to read in idat data as well,
690
+## although this would add a further 7 arguments, which might over-complicate things
631 691
 crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
632 692
                   row.names=TRUE, col.names=TRUE,
633 693
                   probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
... ...
@@ -635,15 +695,9 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
635 695
                   mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
636 696
                   cdfName, sns, recallMin=10, recallRegMin=1000,
637 697
                   returnParams=FALSE, badSNP=.7) {
638
-  if(!missing(RG)) {
639
-    if(missing(XY))
640
-      XY = RGtoXY(RG, chipType=cdfName)
641
-    else
642
-      stop("Both RG and XY specified - please use one or the other")
643
-  }
698
+
644 699
   if ((load.it | save.it) & missing(intensityFile))
645 700
     stop("'intensityFile' is missing, and you chose either load.it or save.it")
646
-  if (missing(sns)) sns <- sampleNames(XY) #basename(filenames)
647 701
   if (!missing(intensityFile))
648 702
     if (load.it & !file.exists(intensityFile)){
649 703
       load.it <- FALSE
... ...
@@ -651,23 +705,23 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
651 705
       message("Not loading it, but running SNPRMA from scratch.")
652 706
   }
653 707
   if (!load.it){
654
-#    res <- snprma(filenames, fitMixture=TRUE,
655
-#                    mixtureSampleSize=mixtureSampleSize, verbose=verbose,
656
-#                    eps=eps, cdfName=cdfName, sns=sns)
657
-    res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
658
-                        seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget)
659
-    if(save.it){
660
-      t0 <- proc.time()
661
-      save(res, file=intensityFile)
662
-      t0 <- proc.time()-t0
663
-      if(verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".")
708
+    if(!missing(RG)) {
709
+      if(missing(XY))
710
+        XY = RGtoXY(RG, chipType=cdfName)
711
+      else
712
+        stop("Both RG and XY specified - please use one or the other")
664 713
     }
714
+    if (missing(sns)) sns <- sampleNames(XY)$X
715
+    
716
+    res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
717
+                        seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
718
+                        save.it=save.it, intensityFile=intensityFile)
665 719
   }else{
666 720
       if(verbose) message("Loading ", intensityFile, ".")
667 721
         obj <- load(intensityFile)
668 722
         if(verbose) message("Done.")
669
-        if(obj != "res")
670
-         stop("Object in ", intensityFile, " seems to be invalid.")
723
+        if(!any(obj == "res"))
724
+          stop("Object in ", intensityFile, " seems to be invalid.")
671 725
   }
672 726
   if(row.names) row.names=res$gns else row.names=NULL
673 727
   if(col.names) col.names=res$sns else col.names=NULL
... ...
@@ -682,5 +736,5 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
682 736
   
683 737
   res2[["SNR"]] <- res[["SNR"]]
684 738
   res2[["SKW"]] <- res[["SKW"]]
685
-  return(res2) # return(list2SnpSet(res2, returnParams=returnParams))
739
+  return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
686 740
 }
... ...
@@ -96,10 +96,10 @@ list2SnpSet <- function(x, returnParams=FALSE){
96 96
                           "SNP Quality Score",
97 97
                           "Center AA", "Center AB", "Center BB",
98 98
                           "Scale AA", "Scale AB", "Scale BB",
99
-                          "N AA", "N AB", "N BB",
100
-                          "Shift in parameters AA",
101
-                          "Shift in parameters AB",
102
-                          "Shift in parameters BB"),
99
+                          "N AA", "N AB", "N BB"),
100
+#                          "Shift in parameters AA",
101
+#                          "Shift in parameters AB",
102
+#                          "Shift in parameters BB"),
103 103
                         row.names=c(
104 104
                           "SNPQC",
105 105
                           "cAA", "cAB", "cBB",
... ...
@@ -50,16 +50,22 @@ crlmmIllumina(RG, XY, stripNorm=TRUE, useTarget=TRUE,
50 50
   \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
51 51
 }
52 52
 \value{
53
+  A \code{SnpSet} object which contains
53 54
   \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)}
54
-  \item{confs}{Confidence scores 'round(-1000*log2(1-p))'}
55
+  \item{callProbability}{confidence scores 'round(-1000*log2(1-p))'}
56
+  in the \code{assayData} slot and
55 57
   \item{SNPQC}{SNP Quality Scores}
56
-  \item{batchQC}{Batch Quality Score}
57
-  \item{params}{Recalibrated parameters}
58
+  \item{batchQC}{Batch Quality Scores}
59
+  along with center and scale parameters when \code{returnParams=TRUE}
60
+  in the \code{featureData} slot.
58 61
 }
59 62
 
60 63
 \details{
61 64
   Note: The user should specify either the \code{RG} or \code{XY}
62
-  intensities, not both.
65
+  intensities, not both.  Alternatively if \code{crlmmIllumina} has been
66
+  run already with \code{save.it=TRUE}, the preprocessed data can be
67
+  loaded from file by specifying \code{load.it=TRUE} and
68
+  \code{intensityFile} (\code{RG} or \code{XY} are not needed in this case).
63 69
 }
64 70
 
65 71
 \references{
... ...
@@ -12,7 +12,8 @@ readIdatFiles(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 12
               arrayInfoColNames=list(barcode="SentrixBarcode_A",
13 13
                                      position="SentrixPosition_A"),
14 14
               highDensity=FALSE, sep="_",
15
-              fileExt=list(green="Grn.idat", red="Red.idat"))
15
+              fileExt=list(green="Grn.idat", red="Red.idat"),
16
+              saveDate=FALSE)
16 17
 }
17 18
 
18 19
 \arguments{
... ...
@@ -40,6 +41,8 @@ readIdatFiles(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
40 41
     names.}
41 42
   \item{fileExt}{list containing elements 'Green' and 'Red' which
42 43
     specify the .idat file extension for the Cy3 and Cy5 channels.}
44
+  \item{saveDate}{logical.  Should the dates from each .idat be saved
45
+    with sample information?}
43 46
 }
44 47
 
45 48
 \details{