Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@49660 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 28/09/2010 01:44:41
Showing 5 changed files

... ...
@@ -549,6 +549,10 @@ function (which expects ff objects and supports parallel processing)
549 549
 ** Added close() statements to readIdatFiles(), RGtoXY() and stripNormalize().  Added open() statement to stripNormalize(). Moved close() statement in preprocessInfinium2()
550 550
 ** tidied up crlmm-illumina.R, removing commented out code.
551 551
 
552
-2010-09-16 M. Ritchie 1.7.13
552
+2010-09-16 M. Ritchie 1.7.14
553 553
 ** added immuno chip 12 as one of the chip options for Illumina.
554 554
 
555
+2010-09-28 M. Ritchie 1.7.15
556
+** added functions get.ProtocolData.Illumina(), construct.Illumina() and genotype.Illumina() to maintiain consistency with Rob's Affy functions
557
+** removed 'save.it', 'load.it', 'snpFile', 'cnFile' arguments from crlmmIllumina(), crlmmIlluminaV2 and preprocessInfinium2().  Updated man pages to reflect these changes.
558
+** added a switch so that both regular matrix/ff storage can be handled in each function, depending upon whether the ff package has been loaded.
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.7.14
5
-Date: 2010-09-16
4
+Version: 1.7.15
5
+Date: 2010-09-27
6 6
 Author: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -141,11 +141,12 @@ readIdatFiles <- function(sampleSheet=NULL,
141 141
 	       protocolData(RG)[["ScanDate"]] = dates$scan
142 142
        }
143 143
        storageMode(RG) = "lockedEnvironment"
144
-#       if(class(RG@assayData$R)[1]=="ff_matrix") {
144
+       is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)       
145
+       if(is.lds) {
145 146
          close(RG@assayData$R)
146 147
          close(RG@assayData$G)
147 148
          close(RG@assayData$zero)
148
-#       }
149
+       }
149 150
        RG
150 151
 }
151 152
 
... ...
@@ -466,6 +467,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
466 467
 	     annotation=chipType, phenoData=RG@phenoData,
467 468
 	     protocolData=RG@protocolData, storage.mode="environment")
468 469
   featureNames(XY) = ids # featureNames(RG)
470
+  sampleNames(XY) = sampleNames(RG)
469 471
   gc()
470 472
   # Need to initialize - matrices filled with NAs to begin with
471 473
   XY@assayData$X[1:nsnps,] = 0
... ...
@@ -499,11 +501,12 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
499 501
   XY@assayData$zero[infI,] = 0
500 502
   gc()
501 503
 
502
-#  if(class(XY@assayData$X)[1]=="ff_matrix") {
504
+  is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
505
+  if(is.lds) {
503 506
     close(XY@assayData$X)
504 507
     close(XY@assayData$Y)
505 508
     close(XY@assayData$zero)
506
-#  }
509
+  }
507 510
 
508 511
 #  storageMode(XY) = "lockedEnvironment"
509 512
   XY
... ...
@@ -531,10 +534,12 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
531 534
     message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
532 535
     if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3)
533 536
   }
534
-#  if(class(XY@assayData$X)[1]=="ff_matrix") {
537
+  is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
538
+  
539
+  if(is.lds) {
535 540
     open(XY@assayData$X)
536 541
     open(XY@assayData$Y)
537
-#  }
542
+  }
538 543
   for(s in 1:max(stripnum)) {
539 544
     if(verbose) {
540 545
       if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
... ...
@@ -554,10 +559,10 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
554 559
     rm(subX, subY, tmp, sel)
555 560
     gc()
556 561
   }
557
-#  if(class(XY@assayData$X)[1]=="ff_matrix") {
562
+  if(is.lds) {
558 563
     close(XY@assayData$X)
559 564
     close(XY@assayData$Y)
560
-#  }
565
+  }
561 566
 
562 567
   if(verbose)
563 568
     cat("\n")
... ...
@@ -573,10 +578,10 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
573 578
 				cdfName,
574 579
 				sns,
575 580
 				stripNorm=TRUE,
576
-				useTarget=TRUE, # ) { #,
577
-				save.it=FALSE,
578
-				snpFile,
579
-				cnFile) {
581
+				useTarget=TRUE) { 
582
+#				save.it=FALSE,
583
+#				snpFile,
584
+#				cnFile) {
580 585
   if(stripNorm)
581 586
     XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
582 587
 
... ...
@@ -602,15 +607,18 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
602 607
   theKnots <- getVarInEnv("theKnots")
603 608
   narrays = ncol(XY)
604 609
 
605
-  if(save.it & !missing(cnFile)) {
610
+  is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)    
611
+  
612
+#  if(save.it & !missing(cnFile)) {
606 613
     # separate out copy number probes
607 614
     npIndex = getVarInEnv("npProbesFid")
608 615
     nprobes = length(npIndex)
609 616
     if(length(nprobes)>0) {
610
-      open(XY@assayData$X)
611
-      open(XY@assayData$Y)
612
-      open(XY@assayData$zero)
613
-
617
+      if(is.lds) {    
618
+        open(XY@assayData$X)
619
+        open(XY@assayData$Y)
620
+        open(XY@assayData$zero)
621
+      }
614 622
       A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
615 623
       B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
616 624
 
... ...
@@ -622,13 +630,18 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
622 630
 
623 631
       cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
624 632
 
625
-      t0 <- proc.time()
626
-      save(cnAB, file=cnFile)
627
-      t0 <- proc.time()-t0
628
-      if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
629
-       rm(cnAB, B, zero)
633
+#      t0 <- proc.time()
634
+#      save(cnAB, file=cnFile)
635
+#      t0 <- proc.time()-t0
636
+#      if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
637
+       rm(A, B, zero)
638
+      if(is.lds) {    
639
+        close(XY@assayData$X)
640
+        close(XY@assayData$Y)
641
+        close(XY@assayData$zero)
642
+      }
630 643
     }
631
-  }
644
+#  }
632 645
 
633 646
   # next process snp probes
634 647
   snpIndex = getVarInEnv("snpProbesFid")
... ...
@@ -661,14 +674,16 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
661 674
      if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
662 675
   }
663 676
 
664
-  open(XY@assayData$X)
665
-  open(XY@assayData$Y)
666
-  open(XY@assayData$zero)
667
-
677
+  if(is.lds) {
678
+    open(XY@assayData$X)
679
+    open(XY@assayData$Y)
680
+    open(XY@assayData$zero)
681
+  }
682
+  
668 683
   for(i in 1:narrays){
669
-     A[,i] = exprs(channel(XY, "X"))[snpIndex,i]
670
-     B[,i] = exprs(channel(XY, "Y"))[snpIndex,i]
671
-     zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i]
684
+     A[,i] = as.integer(exprs(channel(XY, "X"))[snpIndex,i])
685
+     B[,i] = as.integer(exprs(channel(XY, "Y"))[snpIndex,i])
686
+     zero[,i] = as.integer(exprs(channel(XY, "zero"))[snpIndex,i])
672 687
 #    SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3)
673 688
      SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
674 689
     if(fitMixture){
... ...
@@ -693,43 +708,43 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
693 708
   if (!fitMixture) SNR <- mixtureParams <- NA
694 709
   ## gns comes from preprocStuff.rda
695 710
 
696
-  if(save.it & !missing(snpFile)) {
697
-    t0 <- proc.time()
698
-    save(res, file=snpFile)
699
-    t0 <- proc.time()-t0
700
-    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
701
-  }
702
-#  if(class(A)[1]=="ff_matrix") {
711
+#  if(save.it & !missing(snpFile)) {
712
+#    t0 <- proc.time()
713
+#    save(res, file=snpFile)
714
+#    t0 <- proc.time()-t0
715
+#    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
716
+#  }
717
+  if(is.lds) {
703 718
     close(A)
704 719
     close(B)
705 720
     close(zero)
706 721
     close(SKW)
707 722
     close(mixtureParams)
708 723
     close(SNR)
709
-#  }
724
+  }
710 725
 
711 726
   res = list(A=A, B=B,
712 727
              zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
713
-             mixtureParams=mixtureParams, cdfName=cdfName)
714
-
715
-  open(res[["A"]])
716
-  open(res[["B"]])
717
-  open(res[["zero"]])
718
-  open(res[["SNR"]])
719
-  open(res[["mixtureParams"]])
720
-
721
-  if(save.it & !missing(snpFile)) {
722
-    t0 <- proc.time()
723
-    save(res, file=snpFile)
724
-    t0 <- proc.time()-t0
725
-    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
726
-  }
728
+             mixtureParams=mixtureParams, cdfName=cdfName, cnAB=cnAB)
729
+  
730
+#  open(res[["A"]])
731
+#  open(res[["B"]])
732
+#  open(res[["zero"]])
733
+#  open(res[["SNR"]])
734
+#  open(res[["mixtureParams"]])
735
+
736
+#  if(save.it & !missing(snpFile)) {
737
+#    t0 <- proc.time()
738
+#    save(res, file=snpFile)
739
+#    t0 <- proc.time()-t0
740
+#    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
741
+#  }
727 742
 
728
-  close(res[["A"]])
729
-  close(res[["B"]])
730
-  close(res[["zero"]])
731
-  close(res[["SNR"]])
732
-  close(res[["mixtureParams"]])
743
+#  close(res[["A"]])
744
+#  close(res[["B"]])
745
+#  close(res[["zero"]])
746
+#  close(res[["SNR"]])
747
+#  close(res[["mixtureParams"]])
733 748
 
734 749
   return(res)
735 750
 }
... ...
@@ -738,32 +753,43 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
738 753
 crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
739 754
                   row.names=TRUE, col.names=TRUE,
740 755
                   probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
741
-                  seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile,
756
+                  seed=1, # save.it=FALSE, load.it=FALSE, snpFile, cnFile,
742 757
                   mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
743 758
                   cdfName, sns, recallMin=10, recallRegMin=1000,
744 759
                   returnParams=FALSE, badSNP=.7) {
745
-  if (save.it & (missing(snpFile) | missing(cnFile)))
746
-    stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
747
-  if (load.it & missing(snpFile))
748
-    stop("'snpFile' is missing and you chose to load.it")
749
-  if (!missing(snpFile))
750
-    if (load.it & !file.exists(snpFile)){
751
-      load.it <- FALSE
752
-      message("File ", snpFile, " does not exist.")
753
-      stop("Cannot load SNP data.")
754
-  }
755
-  if (!load.it){
760
+#  if (save.it & (missing(snpFile) | missing(cnFile)))
761
+#    stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
762
+#  if (load.it & missing(snpFile))
763
+#    stop("'snpFile' is missing and you chose to load.it")
764
+#  if (!missing(snpFile))
765
+#    if (load.it & !file.exists(snpFile)){
766
+#      load.it <- FALSE
767
+#      message("File ", snpFile, " does not exist.")
768
+#      stop("Cannot load SNP data.")
769
+#  }
770
+#  if (!load.it){
756 771
     if(!missing(RG)) {
757 772
       if(missing(XY))
758 773
         XY = RGtoXY(RG, chipType=cdfName)
759 774
       else
760 775
         stop("Both RG and XY specified - please use one or the other")
761 776
     }
762
-    if (missing(sns)) sns <- sampleNames(XY) #$X
763
-
777
+    if (missing(sns)) {
778
+      sns = sampleNames(XY) #$X
779
+    }
764 780
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
765
-                        seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
766
-                        save.it=save.it, snpFile=snpFile, cnFile=cnFile)
781
+                        seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
782
+#                        save.it=save.it, snpFile=snpFile, cnFile=cnFile)
783
+
784
+    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
785
+
786
+    if(is.lds) {
787
+      open(res[["A"]])
788
+      open(res[["B"]])
789
+      open(res[["SNR"]])
790
+      open(res[["mixtureParams"]])
791
+    }
792
+    
767 793
 #    fD = featureData(XY)
768 794
 #    phenD = XY@phenoData
769 795
 #    protD = XY@protocolData
... ...
@@ -782,13 +808,13 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
782 808
 #    pData(callSet)$SNR <- rep(NA, length(sns))
783 809
 #    pData(callSet)$gender <- rep(NA, length(sns))
784 810
 
785
-  }else{
786
-      if(verbose) message("Loading ", snpFile, ".")
787
-        obj <- load(snpFile)
788
-        if(verbose) message("Done.")
789
-        if(!any(obj == "res"))
790
-          stop("Object in ", snpFile, " seems to be invalid.")
791
-  }
811
+#  }else{
812
+#      if(verbose) message("Loading ", snpFile, ".")
813
+#        obj <- load(snpFile)
814
+#        if(verbose) message("Done.")
815
+#        if(!any(obj == "res"))
816
+#          stop("Object in ", snpFile, " seems to be invalid.")
817
+#  }
792 818
 
793 819
  #   rm(phenD, protD , fD)
794 820
 
... ...
@@ -802,22 +828,47 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
802 828
   if(row.names) row.names=res$gns else row.names=NULL
803 829
   if(col.names) col.names=res$sns else col.names=NULL
804 830
 
805
-  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
806
-                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
807
-                  SNR=res[["SNR"]], # callSet$SNR, # [j],
808
-                  mixtureParams=res[["mixtureParams"]], #,
809
-                  cdfName=res[["cdfName"]], # annotation(callSet),
810
-                  row.names=row.names, # featureNames(callSet)[snp.index],
811
-                  col.names=col.names, # sampleNames(callSet), #[j],
812
-                  probs=probs,
813
-                  DF=DF,
814
-                  SNRMin=SNRMin,
815
-                  recallMin=recallMin,
816
-                  recallRegMin=recallRegMin,
817
-                  gender=gender,
818
-                  verbose=verbose,
819
-                  returnParams=returnParams,
820
-                  badSNP=badSNP)
831
+  FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
832
+  ## genotyping
833
+  crlmmGTfxn <- function(FUN,...){
834
+		switch(FUN,
835
+		       crlmmGT2=crlmmGT2(...),
836
+		       crlmmGT=crlmmGT(...))
837
+              }
838
+  res2 <- crlmmGTfxn(FUN,
839
+                     A=res[["A"]],
840
+                     B=res[["B"]],
841
+                     SNR=res[["SNR"]],
842
+                     mixtureParams=res[["mixtureParams"]],
843
+                     cdfName=cdfName,
844
+                     row.names=row.names,
845
+                     col.names=col.names,
846
+                     probs=probs,
847
+                     DF=DF,
848
+                     SNRMin=SNRMin,
849
+                     recallMin=recallMin,
850
+                     recallRegMin=recallRegMin,
851
+                     gender=gender,
852
+                     verbose=verbose,
853
+                     returnParams=returnParams,
854
+                     badSNP=badSNP)
855
+    
856
+#  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
857
+#                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
858
+#                  SNR=res[["SNR"]], # callSet$SNR, # [j],
859
+#                  mixtureParams=res[["mixtureParams"]], #,
860
+#                  cdfName=res[["cdfName"]], # annotation(callSet),
861
+#                  row.names=row.names, # featureNames(callSet)[snp.index],
862
+#                  col.names=col.names, # sampleNames(callSet), #[j],
863
+#                  probs=probs,
864
+#                  DF=DF,
865
+#                  SNRMin=SNRMin,
866
+#                  recallMin=recallMin,
867
+#                  recallRegMin=recallRegMin,
868
+#                  gender=gender,
869
+#                  verbose=verbose,
870
+#                  returnParams=returnParams,
871
+#                  badSNP=badSNP)
821 872
 #    rm(res); gc()
822 873
 #    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
823 874
 #    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
... ...
@@ -847,14 +898,12 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
847 898
 			  sep="_",
848 899
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
849 900
 			  saveDate=FALSE,
850
-#                          save.rg=FALSE,
851
-#                          rgFile,
852 901
 			  stripNorm=TRUE,
853 902
 			  useTarget=TRUE,
854 903
 			  row.names=TRUE,
855 904
 			  col.names=TRUE,
856 905
 			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
857
-                          seed=1, save.it=FALSE, snpFile, cnFile,
906
+                          seed=1,  # save.it=FALSE, snpFile, cnFile,
858 907
                           mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
859 908
                           cdfName, sns, recallMin=10, recallRegMin=1000,
860 909
                           returnParams=FALSE, badSNP=.7) {
... ...
@@ -865,8 +914,8 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
865 914
 
866 915
 #  if (save.rg & missing(rgFile))
867 916
 #    stop("'rgFile' is missing, and you chose save.rg")
868
-  if (save.it & (missing(snpFile) | missing(cnFile)))
869
-    stop("'snpFile' or 'cnFile' is missing and you chose save.it")
917
+#  if (save.it & (missing(snpFile) | missing(cnFile)))
918
+#    stop("'snpFile' or 'cnFile' is missing and you chose save.it")
870 919
 #  batches = NULL
871 920
 #  if(!is.null(arrayNames))
872 921
 #    batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples())
... ...
@@ -892,13 +941,18 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
892 941
     if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
893 942
     } # else subsns = sns[j]
894 943
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
895
-                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
896
-                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
897
-    open(res[["A"]])
898
-    open(res[["B"]])
899
-    open(res[["SNR"]])
900
-    open(res[["mixtureParams"]])
944
+                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
945
+#                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
946
+
947
+    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
901 948
 
949
+    if(is.lds) {
950
+      open(res[["A"]])
951
+      open(res[["B"]])
952
+      open(res[["SNR"]])
953
+      open(res[["mixtureParams"]])
954
+    }
955
+  
902 956
 #    fD = featureData(XY)
903 957
 #    phenD = XY@phenoData
904 958
 #    protD = XY@protocolData
... ...
@@ -951,22 +1005,48 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
951 1005
 #    rm(res); gc()
952 1006
   if(row.names) row.names=res$gns else row.names=NULL
953 1007
   if(col.names) col.names=res$sns else col.names=NULL
954
-  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
955
-                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
956
-                  SNR=res[["SNR"]], # callSet$SNR, # [j],
957
-                  mixtureParams=res[["mixtureParams"]], #,
958
-                  cdfName=res[["cdfName"]], # annotation(callSet),
959
-                  row.names=row.names, # featureNames(callSet)[snp.index],
960
-                  col.names=col.names, # sampleNames(callSet), #[j],
961
-                  probs=probs,
962
-                  DF=DF,
963
-                  SNRMin=SNRMin,
964
-                  recallMin=recallMin,
965
-                  recallRegMin=recallRegMin,
966
-                  gender=gender,
967
-                  verbose=verbose,
968
-                  returnParams=returnParams,
969
-                  badSNP=badSNP)
1008
+  
1009
+  FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
1010
+  ## genotyping
1011
+  crlmmGTfxn <- function(FUN,...){
1012
+		switch(FUN,
1013
+		       crlmmGT2=crlmmGT2(...),
1014
+		       crlmmGT=crlmmGT(...))
1015
+              }
1016
+  res2 <- crlmmGTfxn(FUN,
1017
+                     A=res[["A"]],
1018
+                     B=res[["B"]],
1019
+                     SNR=res[["SNR"]],
1020
+                     mixtureParams=res[["mixtureParams"]],
1021
+                     cdfName=cdfName,
1022
+                     row.names=row.names,
1023
+                     col.names=col.names,
1024
+                     probs=probs,
1025
+                     DF=DF,
1026
+                     SNRMin=SNRMin,
1027
+                     recallMin=recallMin,
1028
+                     recallRegMin=recallRegMin,
1029
+                     gender=gender,
1030
+                     verbose=verbose,
1031
+                     returnParams=returnParams,
1032
+                     badSNP=badSNP)
1033
+  
1034
+#  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
1035
+#                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
1036
+#                  SNR=res[["SNR"]], # callSet$SNR, # [j],
1037
+#                  mixtureParams=res[["mixtureParams"]], #,
1038
+#                  cdfName=res[["cdfName"]], # annotation(callSet),
1039
+#                  row.names=row.names, # featureNames(callSet)[snp.index],
1040
+#                  col.names=col.names, # sampleNames(callSet), #[j],
1041
+#                  probs=probs,
1042
+#                  DF=DF,
1043
+#                  SNRMin=SNRMin,
1044
+#                  recallMin=recallMin,
1045
+#                  recallRegMin=recallRegMin,
1046
+#                  gender=gender,
1047
+#                  verbose=verbose,
1048
+#                  returnParams=returnParams,
1049
+#                  badSNP=badSNP)
970 1050
 #    rm(res); gc()
971 1051
 #    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
972 1052
 #    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
... ...
@@ -1008,3 +1088,283 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1008 1088
 #  }
1009 1089
 #    return(callSet)
1010 1090
 }
1091
+
1092
+# Add function analogous to Rob's Affy functions to set up container
1093
+getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat") {
1094
+       narrays = length(filenames)
1095
+
1096
+       headerInfo = list(nProbes = rep(NA, narrays),
1097
+                         Barcode = rep(NA, narrays),
1098
+                         ChipType = rep(NA, narrays),
1099
+                         Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
1100
+                         Position = rep(NA, narrays)) # this may also vary a bit
1101
+
1102
+       scanDates = data.frame(ScanDate=rep(NA, narrays), DecodeDate=rep(NA, narrays))
1103
+       rownames(scanDates) = gsub(paste(sep, fileExt, sep=""), "", filenames)
1104
+       ## read in the data
1105
+       for(i in seq_along(filenames)) {
1106
+	       cat("reading", filenames[i], "\n")
1107
+	       idsG = G = NULL
1108
+	       G = readIDAT(filenames[i])
1109
+	       idsG = rownames(G$Quants)
1110
+	       headerInfo$nProbes[i] = G$nSNPsRead
1111
+	       headerInfo$Barcode[i] = G$Barcode
1112
+	       headerInfo$ChipType[i] = G$ChipType
1113
+	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
1114
+	       headerInfo$Position[i] = G$Unknowns$MostlyA
1115
+               if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
1116
+		       warning("Chips are not of the same type.  Skipping ", basename(filenames[i]))
1117
+		       next()
1118
+	       }
1119
+	       scanDates$ScanDate[i] = G$RunInfo[1, 1]
1120
+	       scanDates$DecodeDate[i] = G$RunInfo[2, 1]
1121
+	       rm(G)
1122
+	       gc()
1123
+       }
1124
+       protocoldata = new("AnnotatedDataFrame",
1125
+			    data=scanDates,
1126
+			    varMetadata=data.frame(labelDescription=colnames(scanDates),
1127
+			                           row.names=colnames(scanDates)))
1128
+	return(protocoldata)
1129
+}
1130
+
1131
+
1132
+construct.Illumina <- function(sampleSheet=NULL,
1133
+			  arrayNames=NULL,
1134
+			  ids=NULL,
1135
+			  path=".",
1136
+			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
1137
+			  highDensity=FALSE,
1138
+			  sep="_",
1139
+			  fileExt=list(green="Grn.idat",
1140
+			  red="Red.idat"),
1141
+		      	  cdfName,
1142
+		      	  copynumber=TRUE,
1143
+		      	  verbose=TRUE, batch, fns, saveDate=TRUE){
1144
+       if(!is.null(arrayNames)) {
1145
+               pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
1146
+       }
1147
+       if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
1148
+	       if(is.null(arrayNames)) {
1149
+		       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
1150
+			       barcode = sampleSheet[,arrayInfoColNames$barcode]
1151
+			       arrayNames=barcode
1152
+		       }
1153
+		       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
1154
+			       position = sampleSheet[,arrayInfoColNames$position]
1155
+			       if(is.null(arrayNames))
1156
+				       arrayNames=position
1157
+			       else
1158
+				       arrayNames = paste(arrayNames, position, sep=sep)
1159
+			       if(highDensity) {
1160
+				       hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
1161
+				       for(i in names(hdExt))
1162
+					       arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
1163
+			       }
1164
+		       }
1165
+	       }
1166
+	       pd = new("AnnotatedDataFrame", data = sampleSheet)
1167
+	       sampleNames(pd) <- basename(arrayNames)
1168
+       }
1169
+       if(is.null(arrayNames)) {
1170
+               arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
1171
+               if(!is.null(sampleSheet)) {
1172
+                      sampleSheet=NULL
1173
+                      cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
1174
+               }
1175
+               pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
1176
+       }
1177
+
1178
+       narrays = length(arrayNames)
1179
+          
1180
+       if(!missing(batch)) {
1181
+		stopifnot(length(batch) == narrays)
1182
+       }
1183
+       if(missing(batch)) {
1184
+                batch = as.factor(rep(1, narrays))
1185
+       }
1186
+
1187
+       grnfiles = paste(arrayNames, fileExt$green, sep=sep)
1188
+       redfiles = paste(arrayNames, fileExt$red, sep=sep)
1189
+       if(length(grnfiles)==0 || length(redfiles)==0)
1190
+	       stop("Cannot find .idat files")
1191
+       if(length(grnfiles)!=length(redfiles))
1192
+	       stop("Cannot find matching .idat files")
1193
+       if(path[1] != "." & path[1] != ""){
1194
+	       grnidats = file.path(path, grnfiles)
1195
+	       redidats = file.path(path, redfiles)
1196
+       }  else {
1197
+	       message("path arg not set.  Assuming files are in local directory, or that complete path is provided")
1198
+	       grnidats = grnfiles
1199
+	       redidats = redfiles
1200
+       }
1201
+       if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
1202
+       if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
1203
+
1204
+	if(verbose) message("Initializing container for copy number estimation")
1205
+	featureData <- getFeatureData.Affy(cdfName, copynumber=copynumber)
1206
+	if(!missing(fns)){
1207
+		index <- match(fns, featureNames(featureData))
1208
+		if(all(is.na(index))) stop("fns not in featureNames")
1209
+		featureData <- featureData[index, ]
1210
+	}
1211
+	nr <- nrow(featureData); nc <- narrays
1212
+	cnSet <- new("CNSet",
1213
+		     alleleA=initializeBigMatrix(name="A", nr, nc),
1214
+		     alleleB=initializeBigMatrix(name="B", nr, nc),
1215
+		     call=initializeBigMatrix(name="call", nr, nc),
1216
+		     callProbability=initializeBigMatrix(name="callPr", nr,nc),
1217
+		     annotation=cdfName,
1218
+		     batch=batch)
1219
+        if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) {
1220
+	   	sampleNames(cnSet) = sampleSheet$Sample_ID
1221
+        }
1222
+        else sampleNames(cnSet) = arrayNames
1223
+
1224
+	if(saveDate){
1225
+		protocolData <- getProtocolData.Illumina(grnidats, sep=sep, fileExt=fileExt$green)
1226
+	} else{
1227
+		protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
1228
+	}
1229
+	rownames(pData(protocolData)) <- sampleNames(cnSet) # sns
1230
+	protocolData(cnSet) <- protocolData
1231
+	featureData(cnSet) <- featureData
1232
+	featureNames(cnSet) <- featureNames(featureData)
1233
+	pd <- data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) #  sns)
1234
+	colnames(pd)=c("SKW", "SNR", "gender")
1235
+	phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
1236
+	return(cnSet)
1237
+}
1238
+
1239
+genotype.Illumina <- function(sampleSheet=NULL,
1240
+			  arrayNames=NULL,
1241
+			  ids=NULL,
1242
+			  path=".",
1243
+			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
1244
+			  highDensity=FALSE,
1245
+			  sep="_",
1246
+			  fileExt=list(green="Grn.idat",
1247
+			  red="Red.idat"),
1248
+		      	  cdfName,
1249
+		      	  copynumber=TRUE,
1250
+                          batch,
1251
+                          fns,
1252
+                          saveDate=TRUE,
1253
+       			  stripNorm=TRUE,
1254
+			  useTarget=TRUE,
1255
+		          mixtureSampleSize=10^5,
1256
+		          eps=0.1,
1257
+		          verbose=TRUE,
1258
+		          seed=1,
1259
+		          sns,
1260
+		          probs=rep(1/3, 3),
1261
+		          DF=6,
1262
+		          SNRMin=5,
1263
+		          recallMin=10,
1264
+		          recallRegMin=1000,
1265
+		          gender=NULL,
1266
+		          returnParams=TRUE,
1267
+		          badSNP=0.7) {
1268
+	is.lds <- ifelse(isPackageLoaded("ff"), TRUE, FALSE)
1269
+	if(missing(cdfName)) stop("must specify cdfName")
1270
+	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
1271
+	callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames,
1272
+			     ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1273
+                             highDensity=highDensity, sep=sep, fileExt=fileExt, 
1274
+			     cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch,
1275
+                             fns=fns, saveDate=saveDate)
1276
+	open(callSet)
1277
+	mixtureParams <- matrix(NA, 4, nrow(callSet))
1278
+	is.snp <- isSnp(callSet)
1279
+	snp.index <- which(is.snp)
1280
+
1281
+        RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
1282
+                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1283
+                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
1284
+
1285
+        XY = RGtoXY(RG, chipType=cdfName)
1286
+        rm(RG); gc()
1287
+        if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
1288
+        } # else subsns = sns[j]
1289
+        
1290
+        res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1291
+                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget)
1292
+        #                       save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1293
+        rm(XY); gc()
1294
+	if(verbose) message("Finished preprocessing.")
1295
+	if(is.lds){
1296
+                open(res[["A"]])
1297
+                open(res[["B"]])
1298
+                open(res[["SNR"]])
1299
+                open(res[["mixtureParams"]])
1300
+		bb = ocProbesets()*ncol(A)*8
1301
+		ffrowapply(A(callSet)[i1:i2, ] <- res[["A"]][i1:i2, ], X=res[["A"]], BATCHBYTES=bb)
1302
+		ffrowapply(B(callSet)[i1:i2, ] <- res[["B"]][i1:i2, ], X=res[["B"]], BATCHBYTES=bb)
1303
+	} else{
1304
+		A(callSet)[snp.index, ] <- res[["A"]]
1305
+		B(callSet)[snp.index, ] <- res[["B"]]
1306
+	}
1307
+	pData(callSet)$SKW <- res[["SKW"]]
1308
+	pData(callSet)$SNR <- res[["SNR"]]
1309
+	mixtureParams <- res[["mixtureParams"]]
1310
+#	np.index <- which(!is.snp)
1311
+#	if(verbose) message("Normalizing nonpolymorphic markers")
1312
+#	FUN <- ifelse(is.lds, "cnrma2", "cnrma")
1313
+	## main purpose is to update 'alleleA'
1314
+#	cnrmaFxn <- function(FUN,...){
1315
+#		switch(FUN,
1316
+#		       cnrma=cnrma(...),
1317
+#		       cnrma2=cnrma2(...))
1318
+#	}
1319
+	## consider passing only A for NPs.
1320
+#	AA <- cnrmaFxn(FUN, A=A(callSet),
1321
+#		       filenames=filenames,
1322
+#		       row.names=featureNames(callSet)[np.index],
1323
+#		       cdfName=cdfName,
1324
+#		       sns=sns,
1325
+#		       seed=seed,
1326
+#		       verbose=verbose)
1327
+#	if(!is.lds) A(callSet) <- AA
1328
+#	rm(AA)
1329
+	FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
1330
+	## genotyping
1331
+	crlmmGTfxn <- function(FUN,...){
1332
+		switch(FUN,
1333
+		       crlmmGT2=crlmmGT2(...),
1334
+		       crlmmGT=crlmmGT(...))
1335
+	}
1336
+	tmp <- crlmmGTfxn(FUN,
1337
+			  A=res[["A"]],
1338
+			  B=res[["B"]],
1339
+			  SNR=res[["SNR"]],
1340
+			  mixtureParams=res[["mixtureParams"]],
1341
+			  cdfName=cdfName,
1342
+			  row.names=NULL,
1343
+			  col.names=sampleNames(callSet),
1344
+			  probs=probs,
1345
+			  DF=DF,
1346
+			  SNRMin=SNRMin,
1347
+			  recallMin=recallMin,
1348
+			  recallRegMin=recallRegMin,
1349
+			  gender=gender,
1350
+			  verbose=verbose,
1351
+			  returnParams=returnParams,
1352
+			  badSNP=badSNP)
1353
+        rm(res); gc()
1354
+	if(verbose) message("Genotyping finished.  Updating container with genotype calls and confidence scores.")
1355
+	if(is.lds){
1356
+		open(tmp[["calls"]])
1357
+		open(tmp[["confs"]])
1358
+		ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]], BATCHBYTES=bb)
1359
+		ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]], BATCHBYTES=bb)
1360
+		close(tmp[["calls"]])
1361
+		close(tmp[["confs"]])
1362
+	} else {
1363
+		calls(callSet)[snp.index, ] <- tmp[["calls"]]
1364
+		snpCallProbability(callSet)[snp.index, ] <- tmp[["confs"]]
1365
+	}
1366
+	callSet$gender <- tmp$gender
1367
+	close(callSet)
1368
+	return(callSet)
1369
+}
1370
+
... ...
@@ -10,8 +10,7 @@
10 10
 crlmmIllumina(RG, XY, stripNorm=TRUE, 
11 11
       useTarget=TRUE, row.names=TRUE, col.names=TRUE,
12 12
       probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5,
13
-      gender=NULL, seed=1, save.it=FALSE, load.it=FALSE,
14
-      snpFile, cnFile, mixtureSampleSize=10^5,
13
+      gender=NULL, seed=1, mixtureSampleSize=10^5,
15 14
       eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10,
16 15
       recallRegMin=1000, returnParams=FALSE, badSNP=0.7)
17 16
 }
... ...
@@ -32,12 +31,6 @@ crlmmIllumina(RG, XY, stripNorm=TRUE,
32 31
     defining sex. (1 - male; 2 - female)}
33 32
   \item{seed}{'integer' scalar for random number generator (used to
34 33
     sample \code{mixtureSampleSize} SNPs for mixture model.}
35
-  \item{save.it}{'logical'. Save preprocessed SNP and copy number data?}
36
-  \item{load.it}{'logical'. Load preprocessed SNP data to speed up analysis?}
37
-  \item{snpFile}{'character' with filename of preprocessed SNP data to
38
-    be saved/loaded.}
39
-  \item{cnFile}{'character' with filename of preprocessed copy number 
40
-    data to be saved.}
41 34
   \item{mixtureSampleSize}{'integer'. The number of SNP's to be used
42 35
     when fitting the mixture model.}
43 36
   \item{eps}{Minimum change for mixture model.}
... ...
@@ -66,11 +59,7 @@ crlmmIllumina(RG, XY, stripNorm=TRUE,
66 59
 \details{
67 60
 
68 61
   Note: The user should specify either the \code{RG} or \code{XY}
69
-  intensities, not both.  Alternatively if \code{crlmmIllumina} has been
70
-  run already with \code{save.it=TRUE}, the preprocessed data can be
71
-  loaded from file by specifying \code{load.it=TRUE} and
72
-  \code{intensityFile} (\code{RG} or \code{XY} are not needed in this case).
73
-
62
+  intensities, not both.
74 63
 }
75 64
 
76 65
 \references{
... ...
@@ -12,8 +12,7 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 12
       highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
13 13
       saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE,
14 14
       probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
15
-      seed=1, save.ab=FALSE, snpFile, cnFile,
16
-      mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
15
+      seed=1, mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
17 16
       cdfName, sns, recallMin=10, recallRegMin=1000,
18 17
       returnParams=FALSE, badSNP=.7)
19 18
 }
... ...
@@ -58,11 +57,6 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
58 57
     defining sex. (1 - male; 2 - female)}
59 58
   \item{seed}{'integer' scalar for random number generator (used to
60 59
     sample \code{mixtureSampleSize} SNPs for mixture model.}
61
-  \item{save.it}{'logical'. Save preprocessed SNP and copy number data?}
62
-  \item{snpFile}{'character' with filename of preprocessed SNP data to
63
-    be saved/loaded.}
64
-  \item{cnFile}{'character' with filename of preprocessed copy number 
65
-    data to be saved.}
66 60
   \item{mixtureSampleSize}{'integer'. The number of SNP's to be used
67 61
     when fitting the mixture model.}
68 62
   \item{eps}{Minimum change for mixture model.}