Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50021 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 08/10/2010 04:45:17
Showing5 changed files

... ...
@@ -565,3 +565,7 @@ function (which expects ff objects and supports parallel processing)
565 565
 ** new internal function processIDAT which uses ocLapply() to parallelize pre-processing of Illumina data
566 566
 ** changes to genotype.Illumina()
567 567
 ** updated vignettes - crlmmIllumina.Rnw and crlmmIllumina.pdf
568
+
569
+2010-10-07 M. Ritchie 1.7.20
570
+** updated vignettes - crlmmIllumina.Rnw and crlmmIllumina.pdf
571
+** tidied code and added snp.index to ffrowapply() in genotype.Illumina() when calls and confidence values are being updated in callSet.
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.7.19
5
-Date: 2010-10-06
4
+Version: 1.7.20
5
+Date: 2010-10-07
6 6
 Author: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -3,7 +3,7 @@
3 3
 # or to use the optional 'Path' column in sampleSheet
4 4
 # - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet
5 5
 
6
-readIdatFiles <- function(sampleSheet=NULL,
6
+readIdatFiles = function(sampleSheet=NULL,
7 7
 			  arrayNames=NULL,
8 8
 			  ids=NULL,
9 9
 			  path=".",
... ...
@@ -35,7 +35,7 @@ readIdatFiles <- function(sampleSheet=NULL,
35 35
 		       }
36 36
 	       }
37 37
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
38
-	       sampleNames(pd) <- basename(arrayNames)
38
+	       sampleNames(pd) = basename(arrayNames)
39 39
        }
40 40
        if(is.null(arrayNames)) {
41 41
                arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
... ...
@@ -58,8 +58,8 @@ readIdatFiles <- function(sampleSheet=NULL,
58 58
 	       redidats = file.path(path, redfiles)
59 59
        }  else {
60 60
 	       message("path arg not set.  Assuming files are in local directory, or that complete path is provided")
61
-	       grnidats <- grnfiles
62
-	       redidats <- redfiles
61
+	       grnidats = grnfiles
62
+	       redidats = redfiles
63 63
        }
64 64
        if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
65 65
        if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
... ...
@@ -94,7 +94,7 @@ readIdatFiles <- function(sampleSheet=NULL,
94 94
 		       } else stop("Could not find probe IDs")
95 95
 		       nprobes = length(ids)
96 96
 		       narrays = length(arrayNames)
97
-		       RG <- new("NChannelSet",
97
+		       RG = new("NChannelSet",
98 98
 		                 R=initializeBigMatrix(name="R", nr=nprobes, nc=narrays, vmode="integer"),
99 99
 		                 G=initializeBigMatrix(name="G", nr=nprobes, nc=narrays, vmode="integer"),
100 100
 		                 zero=initializeBigMatrix(name="zero", nr=nprobes, nc=narrays, vmode="integer"),
... ...
@@ -426,20 +426,20 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
426 426
 	  chipType = match.arg(annotation(RG), chipList)
427 427
   } else chipType = match.arg(chipType, chipList)
428 428
 
429
-  pkgname <- getCrlmmAnnotationName(chipType)
429
+  pkgname = getCrlmmAnnotationName(chipType)
430 430
   if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
431
-     suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
432
-     msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
431
+     suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
432
+     msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
433 433
      message(strwrap(msg))
434 434
      stop("Package ", pkgname, " could not be found.")
435 435
      rm(suggCall, msg)
436 436
   }
437 437
   if(verbose) message("Loading chip annotation information.")
438 438
     loader("address.rda", .crlmmPkgEnv, pkgname)
439
-  aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
440
-  bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
441
-  ids <- names(aids)
442
-  snpbase <- getVarInEnv("base")
439
+  aids = getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
440
+  bids = getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
441
+  ids = names(aids)
442
+  snpbase = getVarInEnv("base")
443 443
 
444 444
   nsnps = length(aids)
445 445
   narrays = ncol(RG)
... ...
@@ -460,13 +460,13 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
460 460
 #  argrg = aids[rrgg]
461 461
 #  brgrg = bids[rrgg]
462 462
 
463
-  XY <- new("NChannelSet",
463
+  XY = new("NChannelSet",
464 464
 	     X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
465 465
 	     Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
466 466
 	     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
467 467
 	     annotation=chipType, phenoData=RG@phenoData,
468 468
 	     protocolData=RG@protocolData, storage.mode="environment")
469
-  featureNames(XY) = ids # featureNames(RG)
469
+  featureNames(XY) = ids 
470 470
   sampleNames(XY) = sampleNames(RG)
471 471
   gc()
472 472
   # Need to initialize - matrices filled with NAs to begin with
... ...
@@ -517,17 +517,15 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
517 517
     close(XY@assayData$Y)
518 518
     close(XY@assayData$zero)
519 519
   }
520
-
521
-#  storageMode(XY) = "lockedEnvironment"
522 520
   XY
523 521
 }
524 522
 
525 523
 
526 524
 stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
527
-  pkgname <- getCrlmmAnnotationName(annotation(XY))
525
+  pkgname = getCrlmmAnnotationName(annotation(XY))
528 526
   if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
529
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
530
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
527
+    suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
528
+    msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
531 529
     message(strwrap(msg))
532 530
     stop("Package ", pkgname, " could not be found.")
533 531
     rm(suggCall, msg)
... ...
@@ -535,14 +533,14 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
535 533
   if(verbose) message("Loading strip and reference normalization information.")
536 534
   loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
537 535
 
538
-  stripnum <- getVarInEnv("stripnum")
536
+  stripnum = getVarInEnv("stripnum")
539 537
 
540 538
   if(useTarget)
541 539
     targetdist = getVarInEnv("reference")
542 540
 
543 541
   if(verbose){
544 542
     message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
545
-    if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3)
543
+    if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=max(stripnum), style=3)
546 544
   }
547 545
   is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
548 546
   
... ...
@@ -564,8 +562,6 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
564 562
       tmp = normalize.quantiles(as.matrix(cbind(subX, subY)))
565 563
     XY@assayData$X[sel,] = matrix(as.integer(tmp[,1:(ncol(tmp)/2)]+16), nrow(tmp), ncol(tmp)/2)
566 564
     XY@assayData$Y[sel,] = matrix(as.integer(tmp[,(ncol(tmp)/2+1):ncol(tmp)]+16), nrow(tmp), ncol(tmp)/2)
567
-#    Xqws[sel,] = tmp[,1:(ncol(tmp)/2)]
568
-#    Yqws[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]
569 565
     rm(subX, subY, tmp, sel)
570 566
     gc()
571 567
   }
... ...
@@ -580,7 +576,7 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
580 576
 }
581 577
 
582 578
 
583
-preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
579
+preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
584 580
 				fitMixture=TRUE,
585 581
 				eps=0.1,
586 582
 				verbose=TRUE,
... ...
@@ -596,13 +592,13 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
596 592
     XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
597 593
 
598 594
 ## MR: the code below is mostly straight from snprma.R
599
-  if (missing(sns)) sns <- sampleNames(XY) #$X
595
+  if (missing(sns)) sns = sampleNames(XY) #$X
600 596
   if(missing(cdfName))
601
-    cdfName <- annotation(XY)
602
-  pkgname <- getCrlmmAnnotationName(cdfName)
597
+    cdfName = annotation(XY)
598
+  pkgname = getCrlmmAnnotationName(cdfName)
603 599
   if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
604
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
605
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
600
+    suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
601
+    msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
606 602
     message(strwrap(msg))
607 603
     stop("Package ", pkgname, " could not be found.")
608 604
     rm(suggCall, msg)
... ...
@@ -612,9 +608,9 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
612 608
   loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
613 609
   loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
614 610
   loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
615
-  autosomeIndex <- getVarInEnv("autosomeIndex")
616
-  SMEDIAN <- getVarInEnv("SMEDIAN")
617
-  theKnots <- getVarInEnv("theKnots")
611
+  autosomeIndex = getVarInEnv("autosomeIndex")
612
+  SMEDIAN = getVarInEnv("SMEDIAN")
613
+  theKnots = getVarInEnv("theKnots")
618 614
   narrays = ncol(XY)
619 615
 
620 616
   is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)    
... ...
@@ -629,14 +625,14 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
629 625
         open(XY@assayData$Y)
630 626
         open(XY@assayData$zero)
631 627
       }
632
-      A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
633
-      B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
628
+      A = matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
629
+      B = matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
634 630
 
635 631
       # new lines below - useful to keep track of zeroed out probes
636
-      zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
632
+      zero = matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
637 633
 
638
-      colnames(A) <- colnames(B) <- colnames(zero) <- sns
639
-      rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
634
+      colnames(A) = colnames(B) = colnames(zero) = sns
635
+      rownames(A) = rownames(B) = rownames(zero) = names(npIndex)
640 636
 
641 637
       cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
642 638
 
... ...
@@ -655,33 +651,33 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
655 651
 
656 652
   # next process snp probes
657 653
   snpIndex = getVarInEnv("snpProbesFid")
658
-  nprobes <- length(snpIndex)
654
+  nprobes = length(snpIndex)
659 655
 
660 656
   ##We will read each cel file, summarize, and run EM one by one
661 657
   ##We will save parameters of EM to use later
662
-  mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
663
-  SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
664
-  SKW <- initializeBigVector("crlmmSKW-", narrays, "double")
658
+  mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
659
+  SNR = initializeBigVector("crlmmSNR-", narrays, "double")
660
+  SKW = initializeBigVector("crlmmSKW-", narrays, "double")
665 661
 
666 662
   ## This is the sample for the fitting of splines
667 663
   ## BC: I like better the idea of the user passing the seed,
668 664
   ##     because this might intefere with other analyses
669 665
   ##     (like what happened to GCRMA)
670 666
   set.seed(seed)
671
-  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
672
-  idx2 <- sample(nprobes, 10^5)
667
+  idx = sort(sample(autosomeIndex, mixtureSampleSize))
668
+  idx2 = sample(nprobes, 10^5)
673 669
 
674 670
   ##S will hold (A+B)/2 and M will hold A-B
675 671
   ##NOTE: We actually dont need to save S. Only for pics etc...
676 672
   ##f is the correction. we save to avoid recomputing
677 673
 
678
-  A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer")
679
-  B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer")
680
-  zero <- initializeBigMatrix("crlmmZero-", nprobes, narrays, "integer")
674
+  A = initializeBigMatrix("crlmmA-", nprobes, narrays, "integer")
675
+  B = initializeBigMatrix("crlmmB-", nprobes, narrays, "integer")
676
+  zero = initializeBigMatrix("crlmmZero-", nprobes, narrays, "integer")
681 677
 
682 678
   if(verbose){
683 679
      message("Calibrating ", narrays, " arrays.")
684
-     if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
680
+     if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=narrays, style=3)
685 681
   }
686 682
 
687 683
   if(is.lds) {
... ...
@@ -694,28 +690,27 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
694 690
      A[,i] = as.integer(exprs(channel(XY, "X"))[snpIndex,i])
695 691
      B[,i] = as.integer(exprs(channel(XY, "Y"))[snpIndex,i])
696 692
      zero[,i] = as.integer(exprs(channel(XY, "zero"))[snpIndex,i])
697
-#    SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3)
698 693
      SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
699
-    if(fitMixture){
700
-      S <- (log2(A[idx,i])+log2(B[idx,i]))/2 - SMEDIAN
701
-      M <- log2(A[idx,i])-log2(B[idx,i])
694
+     if(fitMixture){
695
+        S = (log2(A[idx,i])+log2(B[idx,i]))/2 - SMEDIAN
696
+        M = log2(A[idx,i])-log2(B[idx,i])
702 697
 
703
-      ##we need to test the choice of eps.. it is not the max diff between funcs
704
-      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
698
+        ##we need to test the choice of eps.. it is not the max diff between funcs
699
+        tmp = fitAffySnpMixture56(S, M, theKnots, eps=eps)
705 700
 
706
-      mixtureParams[, i] <- tmp[["coef"]]
707
-      SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
708
-    }
709
-    if(verbose) {
710
-       if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
711
-       else cat(".")
712
-    }
701
+        mixtureParams[, i] = tmp[["coef"]]
702
+        SNR[i] = tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
703
+      }
704
+      if(verbose) {
705
+        if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
706
+        else cat(".")
707
+      }
713 708
   }
714 709
   if (verbose) {
715 710
     if (getRversion() > '2.7.0') close(pb)
716 711
     else cat("\n")
717 712
   }
718
-  if (!fitMixture) SNR <- mixtureParams <- NA
713
+  if (!fitMixture) SNR = mixtureParams = NA
719 714
   ## gns comes from preprocStuff.rda
720 715
 
721 716
 #  if(save.it & !missing(snpFile)) {
... ...
@@ -763,7 +758,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
763 758
 }
764 759
 
765 760
 
766
-crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
761
+crlmmIllumina = function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
767 762
                   row.names=TRUE, col.names=TRUE,
768 763
                   probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
769 764
                   seed=1, # save.it=FALSE, load.it=FALSE, snpFile, cnFile,
... ...
@@ -787,9 +782,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
787 782
       else
788 783
         stop("Both RG and XY specified - please use one or the other")
789 784
     }
790
-    if (missing(sns)) {
791
-      sns = sampleNames(XY) #$X
792
-    }
785
+    if (missing(sns)) sns = sampleNames(XY)
793 786
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
794 787
                         seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
795 788
 #                        save.it=save.it, snpFile=snpFile, cnFile=cnFile)
... ...
@@ -841,14 +834,14 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
841 834
   if(row.names) row.names=res$gns else row.names=NULL
842 835
   if(col.names) col.names=res$sns else col.names=NULL
843 836
 
844
-  FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
837
+  FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
845 838
   ## genotyping
846
-  crlmmGTfxn <- function(FUN,...){
839
+  crlmmGTfxn = function(FUN,...){
847 840
 		switch(FUN,
848 841
 		       crlmmGT2=crlmmGT2(...),
849 842
 		       crlmmGT=crlmmGT(...))
850 843
               }
851
-  res2 <- crlmmGTfxn(FUN,
844
+  res2 = crlmmGTfxn(FUN,
852 845
                      A=res[["A"]],
853 846
                      B=res[["B"]],
854 847
                      SNR=res[["SNR"]],
... ...
@@ -892,8 +885,8 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
892 885
 #    rm(tmp); gc()
893 886
 #    return(callSet)
894 887
 
895
-  res2[["SNR"]] <- res[["SNR"]]
896
-  res2[["SKW"]] <- res[["SKW"]]
888
+  res2[["SNR"]] = res[["SNR"]]
889
+  res2[["SKW"]] = res[["SKW"]]
897 890
 #  if(is.lds) {
898 891
 #    delete(res[["A"]])
899 892
 #    delete(res[["B"]])
... ...
@@ -927,36 +920,15 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
927 920
                           cdfName, sns, recallMin=10, recallRegMin=1000,
928 921
                           returnParams=FALSE, badSNP=.7) {
929 922
 
930
-  if(missing(cdfName)) stop("must specify cdfName")
931
-  if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
923
+    if(missing(cdfName)) stop("must specify cdfName")
924
+    if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
932 925
   
933
-  is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
934
-
935
-#  if(missing(sns)) sns <- basename(arrayNames)
926
+    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
936 927
 
937
-#  if (save.rg & missing(rgFile))
938
-#    stop("'rgFile' is missing, and you chose save.rg")
939
-#  if (save.it & (missing(snpFile) | missing(cnFile)))
940
-#    stop("'snpFile' or 'cnFile' is missing and you chose save.it")
941
-#  batches = NULL
942
-#  if(!is.null(arrayNames))
943
-#    batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples())
944
-#  if(!is.null(sampleSheet))
945
-#    batches <- rep(1, nrow(sampleSheet))
946
-#  if(is.null(batches))
947
-#    batches=1
948
-#  k <- 1
949
-#  for(j in batches){
950
-#     if(verbose) message("Batch ", k, " of ", length(batches))
951
-#     RG = readIdatFiles(sampleSheet=sampleSheet[j,], arrayNames=arrayNames[j],
952
-#                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
953
-#                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
954
-     RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
928
+    RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
955 929
                        ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
956 930
                        highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
957 931
 
958
-#  if(save.rg)
959
-#	save(RG, file=rgFile)
960 932
 
961 933
     XY = RGtoXY(RG, chipType=cdfName)
962 934
     if(is.lds) {
... ...
@@ -965,85 +937,29 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
965 937
     }
966 938
     rm(RG); gc()
967 939
   
968
-    if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
969
-    } # else subsns = sns[j]
940
+    if (missing(sns)) { sns = sampleNames(XY)
941
+    } 
970 942
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
971 943
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
972 944
 #                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
973 945
 
974
-#    if(is.lds) {
975
-#      open(res[["A"]])
976
-#      open(res[["B"]])
977
-#      open(res[["SNR"]])
978
-#      open(res[["mixtureParams"]])
979
-#    }
980
-  
981
-#    fD = featureData(XY)
982
-#    phenD = XY@phenoData
983
-#    protD = XY@protocolData
984
-
985 946
     if(is.lds) {
986 947
       open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
987 948
       delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero)
988 949
     }
989 950
     rm(XY); gc()
990
-#    if(k == 1){
991
-#    if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
992
-#    callSet <- new("SnpSuperSet",
993
-#                    alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
994
-#                    alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
995
-#                    call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
996
-#                    callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
997
-# 		    annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
998
-#    sampleNames(callSet) <- sns
999
-#            phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
1000
-#                                    arrayNames=sns,
1001
-#								    arrayInfoColNames=arrayInfoColNames)
1002
-#            pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns)
1003
-#            colnames(pD) <- "ScanDate"
1004
-#            protocolData(callSet) <- pData(protD) # new("AnnotatedDataFrame", data=pD)
1005
-#            pData(protocolData(callSet))[j, ] <- pData(protocolData)
1006
-#    featureNames(callSet) <- res[["gns"]]
1007
-#    pData(callSet)$SKW <- rep(NA, length(sns))
1008
-#    pData(callSet)$SNR <- rep(NA, length(sns))
1009
-#    pData(callSet)$gender <- rep(NA, length(sns))
1010
-#	}
1011
-#	pData(callSet)[j,] <- phenD
1012
-#	pData(protocolData(callSet))[j,] <- protD
1013
-#	pData(callSet) <- phenD
1014
-#	pData(protocolData(callSet)) <- protD
1015 951
 
1016
-#    rm(phenD, protD, fD)
1017
-
1018
-#    if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
1019
-        ##RS: I don't understand why the IDATS for the
1020
-        ##same platform potentially have different lengths
1021
-#        res[["A"]] <- res[["A"]][res$gns %in% featureNames(callSet), ]
1022
-#        res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ]
1023
-#    }
1024
-
1025
-#    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
1026
-#    suppressWarnings(A(callSet)[, j] <- res[["A"]])
1027
-#    suppressWarnings(B(callSet)[, j] <- res[["B"]])
1028
-#    suppressWarnings(A(callSet) <- res[["A"]])
1029
-#    suppressWarnings(B(callSet) <- res[["B"]])
1030
-#    pData(callSet)$SKW[j] <- res$SKW
1031
-#    pData(callSet)$SNR[j] <- res$SNR
1032
-#    pData(callSet)$SKW <- res$SKW
1033
-#    pData(callSet)$SNR <- res$SNR
1034
-#    mixtureParams <- res$mixtureParams
1035
-#    rm(res); gc()
1036
-  if(row.names) row.names=res$gns else row.names=NULL
1037
-  if(col.names) col.names=res$sns else col.names=NULL
952
+    if(row.names) row.names=res$gns else row.names=NULL
953
+    if(col.names) col.names=res$sns else col.names=NULL
1038 954
   
1039
-  FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
1040
-  ## genotyping
1041
-  crlmmGTfxn <- function(FUN,...){
955
+    FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
956
+    ## genotyping
957
+    crlmmGTfxn = function(FUN,...){
1042 958
 		switch(FUN,
1043 959
 		       crlmmGT2=crlmmGT2(...),
1044 960
 		       crlmmGT=crlmmGT(...))
1045 961
               }
1046
-  res2 <- crlmmGTfxn(FUN,
962
+    res2 = crlmmGTfxn(FUN,
1047 963
                      A=res[["A"]],
1048 964
                      B=res[["B"]],
1049 965
                      SNR=res[["SNR"]],
... ...
@@ -1061,80 +977,28 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1061 977
                      returnParams=returnParams,
1062 978
                      badSNP=badSNP)
1063 979
   
1064
-#  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
1065
-#                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
1066
-#                  SNR=res[["SNR"]], # callSet$SNR, # [j],
1067
-#                  mixtureParams=res[["mixtureParams"]], #,
1068
-#                  cdfName=res[["cdfName"]], # annotation(callSet),
1069
-#                  row.names=row.names, # featureNames(callSet)[snp.index],
1070
-#                  col.names=col.names, # sampleNames(callSet), #[j],
1071
-#                  probs=probs,
1072
-#                  DF=DF,
1073
-#                  SNRMin=SNRMin,
1074
-#                  recallMin=recallMin,
1075
-#                  recallRegMin=recallRegMin,
1076
-#                  gender=gender,
1077
-#                  verbose=verbose,
1078
-#                  returnParams=returnParams,
1079
-#                  badSNP=badSNP)
1080
-#    rm(res); gc()
1081
-#    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
1082
-#    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
1083
-#    callSet$gender[j] <- tmp$gender
1084
-#    suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
1085
-#    suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
1086
-#    callSet$gender <- tmp$gender
1087
-#    rm(tmp); gc()
1088
-#    return(callSet)
1089
-  if(is.lds) {
1090
-     open(res[["SNR"]]); open(res[["SKW"]])
1091
-
1092
-  }
1093
-  res2[["SNR"]] <- res[["SNR"]]
1094
-  res2[["SKW"]] <- res[["SKW"]]
1095
-#  if(is.lds) {
1096
-#    delete(res[["A"]]); delete(res[["B"]])
1097
-#    delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
1098
-#  }
1099
-  rm(res); gc()
1100
-  return(list2SnpSet(res2, returnParams=returnParams))
1101
-#    tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
1102
-#                  B=as.matrix(B(callSet)[snp.index,]),  # j]),
1103
-#                  SNR=callSet$SNR, # [j],
1104
-#                  mixtureParams=mixtureParams,
1105
-#                  cdfName=annotation(callSet),
1106
-#                  row.names=featureNames(callSet)[snp.index],
1107
-#                  col.names=sampleNames(callSet), #[j],
1108
-#                  probs=probs,
1109
-#                  DF=DF,
1110
-#                  SNRMin=SNRMin,
1111
-#                  recallMin=recallMin,
1112
-#                  recallRegMin=recallRegMin,
1113
-#                  gender=gender,
1114
-#                  verbose=verbose,
1115
-#                  returnParams=returnParams,
1116
-#                  badSNP=badSNP)
1117
-#    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
1118
-#    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
1119
-#    callSet$gender[j] <- tmp$gender
1120
-#    suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
1121
-#    suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
1122
-#    callSet$gender <- tmp$gender
1123
-#    rm(tmp); gc()
1124
-#    k <- k+1
1125
-#  }
1126
-#    return(callSet)
980
+    if(is.lds) {
981
+      open(res[["SNR"]]); open(res[["SKW"]])
982
+    }
983
+    res2[["SNR"]] = res[["SNR"]]
984
+    res2[["SKW"]] = res[["SKW"]]
985
+ #  if(is.lds) {
986
+ #    delete(res[["A"]]); delete(res[["B"]])
987
+ #    delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
988
+ #  }
989
+    rm(res); gc()
990
+    return(list2SnpSet(res2, returnParams=returnParams))
1127 991
 }
1128 992
 
1129
-# Add function analogous to Rob's Affy functions to set up container
993
+# Functions analogous to Rob's Affy functions to set up container
1130 994
 getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat") {
1131 995
        narrays = length(filenames)
1132 996
 
1133 997
        headerInfo = list(nProbes = rep(NA, narrays),
1134 998
                          Barcode = rep(NA, narrays),
1135 999
                          ChipType = rep(NA, narrays),
1136
-                         Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
1137
-                         Position = rep(NA, narrays)) # this may also vary a bit
1000
+                         Manifest = rep(NA, narrays),
1001
+                         Position = rep(NA, narrays))
1138 1002
 
1139 1003
        scanDates = data.frame(ScanDate=rep(NA, narrays), DecodeDate=rep(NA, narrays))
1140 1004
        rownames(scanDates) = gsub(paste(sep, fileExt, sep=""), "", filenames)
... ...
@@ -1166,7 +1030,7 @@ getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat") {
1166 1030
 }
1167 1031
 
1168 1032
 
1169
-construct.Illumina <- function(sampleSheet=NULL,
1033
+construct.Illumina = function(sampleSheet=NULL,
1170 1034
 			  arrayNames=NULL,
1171 1035
 			  ids=NULL,
1172 1036
 			  path=".",
... ...
@@ -1201,7 +1065,7 @@ construct.Illumina <- function(sampleSheet=NULL,
1201 1065
 		       }
1202 1066
 	       }
1203 1067
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
1204
-	       sampleNames(pd) <- basename(arrayNames)
1068
+	       sampleNames(pd) = basename(arrayNames)
1205 1069
        }
1206 1070
        if(is.null(arrayNames)) {
1207 1071
                arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
... ...
@@ -1239,14 +1103,14 @@ construct.Illumina <- function(sampleSheet=NULL,
1239 1103
        if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
1240 1104
 
1241 1105
 	if(verbose) message("Initializing container for genotyping and copy number estimation")
1242
-	featureData <- getFeatureData.Affy(cdfName, copynumber=copynumber)
1106
+	featureData = getFeatureData.Affy(cdfName, copynumber=copynumber)
1243 1107
 	if(!missing(fns)){
1244
-		index <- match(fns, featureNames(featureData))
1108
+		index = match(fns, featureNames(featureData))
1245 1109
 		if(all(is.na(index))) stop("fns not in featureNames")
1246
-		featureData <- featureData[index, ]
1110
+		featureData = featureData[index, ]
1247 1111
 	}
1248
-	nr <- nrow(featureData); nc <- narrays
1249
-	cnSet <- new("CNSet",
1112
+	nr = nrow(featureData); nc = narrays
1113
+	cnSet = new("CNSet",
1250 1114
 		     alleleA=initializeBigMatrix(name="A", nr, nc),
1251 1115
 		     alleleB=initializeBigMatrix(name="B", nr, nc),
1252 1116
 		     call=initializeBigMatrix(name="call", nr, nc),
... ...
@@ -1259,22 +1123,22 @@ construct.Illumina <- function(sampleSheet=NULL,
1259 1123
         else sampleNames(cnSet) = arrayNames
1260 1124
 
1261 1125
 	if(saveDate){
1262
-		protocolData <- getProtocolData.Illumina(grnidats, sep=sep, fileExt=fileExt$green)
1126
+		protocolData = getProtocolData.Illumina(grnidats, sep=sep, fileExt=fileExt$green)
1263 1127
 	} else{
1264
-		protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
1128
+		protocolData = annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
1265 1129
 	}
1266
-	rownames(pData(protocolData)) <- sampleNames(cnSet) # sns
1267
-	protocolData(cnSet) <- protocolData
1268
-	featureData(cnSet) <- featureData
1269
-	featureNames(cnSet) <- featureNames(featureData)
1270
-	pd <- data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) #  sns)
1130
+	rownames(pData(protocolData)) = sampleNames(cnSet)
1131
+	protocolData(cnSet) = protocolData
1132
+	featureData(cnSet) = featureData
1133
+	featureNames(cnSet) = featureNames(featureData)
1134
+	pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) 
1271 1135
 	colnames(pd)=c("SKW", "SNR", "gender")
1272
-	phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
1136
+	phenoData(cnSet) = new("AnnotatedDataFrame", data=pd)
1273 1137
 	return(cnSet)
1274 1138
 }
1275 1139
 
1276 1140
 
1277
-genotype.Illumina <- function(sampleSheet=NULL,
1141
+genotype.Illumina = function(sampleSheet=NULL,
1278 1142
 			  arrayNames=NULL,
1279 1143
 			  ids=NULL,
1280 1144
 			  path=".",
... ...
@@ -1304,26 +1168,26 @@ genotype.Illumina <- function(sampleSheet=NULL,
1304 1168
 		          gender=NULL,
1305 1169
 		          returnParams=TRUE,
1306 1170
 		          badSNP=0.7) {
1307
-	is.lds <- ifelse(isPackageLoaded("ff"), TRUE, FALSE)
1171
+	is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
1308 1172
 	if(missing(cdfName)) stop("must specify cdfName")
1309 1173
 	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
1310
-	callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames,
1174
+	callSet = construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames,
1311 1175
 			     ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1312 1176
                              highDensity=highDensity, sep=sep, fileExt=fileExt, 
1313 1177
 			     cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch,
1314 1178
                              fns=fns, saveDate=saveDate)
1315 1179
         if(missing(sns)) sns = sampleNames(callSet)
1316 1180
 	open(callSet)
1317
-	is.snp <- isSnp(callSet)
1318
-	snp.index <- which(is.snp)
1181
+	is.snp = isSnp(callSet)
1182
+	snp.index = which(is.snp)
1319 1183
         narrays = ncol(callSet)
1320 1184
 
1321 1185
         if(is.lds) {
1322
-          sampleBatches <- splitIndicesByNode(seq(along=sampleNames(callSet)))
1186
+          sampleBatches = splitIndicesByNode(seq(along=sampleNames(callSet)))
1323 1187
 
1324
-          mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1325
-          SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
1326
-          SKW <- initializeBigVector("crlmmSKW-", narrays, "double")
1188
+          mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1189
+          SNR = initializeBigVector("crlmmSNR-", narrays, "double")
1190
+          SKW = initializeBigVector("crlmmSKW-", narrays, "double")
1327 1191
           
1328 1192
           ocLapply(sampleBatches, processIDAT, sampleSheet=sampleSheet, arrayNames=arrayNames,
1329 1193
                  ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, highDensity=highDensity,
... ...
@@ -1334,12 +1198,12 @@ genotype.Illumina <- function(sampleSheet=NULL,
1334 1198
 
1335 1199
           open(SKW)
1336 1200
           open(SNR)
1337
-          pData(callSet)$SKW <- SKW
1338
-          pData(callSet)$SNR <- SNR
1201
+          pData(callSet)$SKW = SKW
1202
+          pData(callSet)$SNR = SNR
1339 1203
           close(SNR)
1340 1204
           close(SKW)
1341 1205
         } else {
1342
-          mixtureParams <- matrix(NA, 4, nrow(callSet))
1206
+          mixtureParams = matrix(NA, 4, nrow(callSet))
1343 1207
 
1344 1208
           RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
1345 1209
                        ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
... ...
@@ -1352,24 +1216,24 @@ genotype.Illumina <- function(sampleSheet=NULL,
1352 1216
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget)
1353 1217
           rm(XY); gc()
1354 1218
           if(verbose) message("Finished preprocessing.")
1355
-          np.index <- which(!is.snp)        
1356
-          A(callSet)[snp.index, ] <- res[["A"]]
1357
-          B(callSet)[snp.index, ] <- res[["B"]]
1219
+          np.index = which(!is.snp)        
1220
+          A(callSet)[snp.index, ] = res[["A"]]
1221
+          B(callSet)[snp.index, ] = res[["B"]]
1358 1222
           if(length(np.index)>0) {
1359 1223
             for (j in 1:ncol(callSet)) {
1360
-        	A(callSet)[np.index, ] <- res[["cnAB"]]$A
1361
-        	B(callSet)[np.index, ] <- res[["cnAB"]]$B
1224
+        	A(callSet)[np.index, ] = res[["cnAB"]]$A
1225
+        	B(callSet)[np.index, ] = res[["cnAB"]]$B
1362 1226
         	}
1363 1227
           }
1364
-	  SKW = pData(callSet)$SKW <- res[["SKW"]]
1365
-	  SNR = pData(callSet)$SNR <- res[["SNR"]]
1366
-	  mixtureParams <- res[["mixtureParams"]]
1228
+	  SKW = pData(callSet)$SKW = res[["SKW"]]
1229
+	  SNR = pData(callSet)$SNR = res[["SNR"]]
1230
+	  mixtureParams = res[["mixtureParams"]]
1367 1231
           rm(res)
1368 1232
         }
1369 1233
 
1370
-	FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
1234
+	FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
1371 1235
 	## genotyping
1372
-	crlmmGTfxn <- function(FUN,...){
1236
+	crlmmGTfxn = function(FUN,...){
1373 1237
 		switch(FUN,
1374 1238
 		       crlmmGT2=crlmmGT2(...),
1375 1239
 		       crlmmGT=crlmmGT(...))
... ...
@@ -1392,7 +1256,7 @@ genotype.Illumina <- function(sampleSheet=NULL,
1392 1256
           tmpB = B(callSet)[snp.index,]
1393 1257
         }
1394 1258
         
1395
-	tmp <- crlmmGTfxn(FUN,
1259
+	tmp = crlmmGTfxn(FUN,
1396 1260
 			  A=tmpA,
1397 1261
 			  B=tmpB,
1398 1262
 			  SNR=SNR,
... ...
@@ -1409,14 +1273,13 @@ genotype.Illumina <- function(sampleSheet=NULL,
1409 1273
 			  verbose=verbose,
1410 1274
 			  returnParams=returnParams,
1411 1275
 			  badSNP=badSNP)
1412
-#        rm(res); gc()
1413 1276
 	if(verbose) message("Genotyping finished.  Updating container with genotype calls and confidence scores.")
1414 1277
 	if(is.lds){
1415 1278
 		bb = ocProbesets()*ncol(callSet)*8
1416 1279
 		open(tmp[["calls"]])
1417 1280
 		open(tmp[["confs"]])
1418
-		ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]], BATCHBYTES=bb)
1419
-		ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]], BATCHBYTES=bb)
1281
+		ffrowapply(snpCall(callSet)[snp.index,][i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]], BATCHBYTES=bb)
1282
+		ffrowapply(snpCallProbability(callSet)[snp.index,][i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]], BATCHBYTES=bb)
1420 1283
 #		close(tmp[["calls"]])
1421 1284
 #		close(tmp[["confs"]])
1422 1285
 #                open(tmpA); open(tmpB)
... ...
@@ -1424,11 +1287,11 @@ genotype.Illumina <- function(sampleSheet=NULL,
1424 1287
                 delete(tmp[["calls"]]); delete(tmp[["confs"]])
1425 1288
                 rm(tmpA, tmpB)
1426 1289
 	} else {
1427
-		calls(callSet)[snp.index, ] <- tmp[["calls"]]
1428
-		snpCallProbability(callSet)[snp.index, ] <- tmp[["confs"]]
1290
+		calls(callSet)[snp.index, ] = tmp[["calls"]]
1291
+		snpCallProbability(callSet)[snp.index, ] = tmp[["confs"]]
1429 1292
                 rm(tmpA, tmpB)
1430 1293
 	}
1431
-	callSet$gender <- tmp$gender
1294
+	callSet$gender = tmp$gender
1432 1295
         rm(tmp)
1433 1296
 	close(callSet)
1434 1297
 	return(callSet)
... ...
@@ -1454,75 +1317,52 @@ processIDAT =  function(sel, sampleSheet=NULL,
1454 1317
 			  stripNorm=TRUE,
1455 1318
 			  useTarget=TRUE,
1456 1319
                           A, B, SKW, SNR, mixtureParams, is.snp) {
1457
-# 	is.lds <- ifelse(isPackageLoaded("ff"), TRUE, FALSE)
1458
-
1320
+  
1459 1321
         if(length(path)>= length(sel)) path = path[sel]
1460 1322
         RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel],
1461 1323
                        ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1462 1324
                        highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
1463 1325
 
1464 1326
         XY = RGtoXY(RG, chipType=cdfName)
1465
- #       if(is.lds) {
1466
-          open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero)
1467
-          delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero); rm(RG)
1468
-#        } else {
1469
-#          rm(RG)
1470
-#        }
1327
+        open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero)
1328
+        delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero); rm(RG)
1471 1329
         gc()
1472
-        if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
1473
-        } # else subsns = sns[j]
1330
+        if (missing(sns)) sns = sampleNames(XY)
1474 1331
         
1475 1332
         res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1476 1333
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget)
1477 1334
         #                       save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1478
-#        if(is.lds) {
1479
-          open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
1480
-          delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero); rm(XY)
1481
-#        } else {
1482
-#          rm(XY)
1483
-#        }
1335
+        open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
1336
+        delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero); rm(XY)
1484 1337
         gc()
1485 1338
 	if(verbose) message("Finished preprocessing.")
1486 1339
         snp.index = which(is.snp)
1487
-	np.index <- which(!is.snp)      
1488
-#	if(is.lds){
1489
-                open(res[["A"]])
1490
-                open(res[["B"]])
1491
-                open(res[["SKW"]])
1492
-                open(res[["SNR"]])
1493
-                open(res[["mixtureParams"]])
1494
-		bb = ocProbesets()*length(sns)*8
1495
-		ffrowapply(A[snp.index,][i1:i2, sel] <- res[["A"]][i1:i2, sel], X=res[["A"]], BATCHBYTES=bb)
1496
-		ffrowapply(B[snp.index,][i1:i2, sel] <- res[["B"]][i1:i2, sel], X=res[["B"]], BATCHBYTES=bb)
1497
-        	if(length(np.index)>0) {
1498
-        		for (j in sel) {
1499
-        			A[np.index, j] <- res[["cnAB"]]$A[,j]
1500
-        			B[np.index, j] <- res[["cnAB"]]$B[,j]
1501
-        		}
1502
-                }
1503
-                delete(res[["A"]]); delete(res[["B"]])
1504
-#	} else {
1505
-#		A[snp.index, ] <- res[["A"]]
1506
-#		B[snp.index, ] <- res[["B"]]
1507
-#        	if(length(np.index)>0) {
1508
-#        		A[np.index, ] <- res[["cnAB"]]$A
1509
-#        		B[np.index, ] <- res[["cnAB"]]$B
1510
-#                }
1511
-#	}
1512
-#        open(res[["SKW"]])
1513
-#        open(res[["SNR"]])
1514
-#        open(res[["mixtureParams"]])
1515
-	SKW[sel] <- res[["SKW"]][]
1516
-	SNR[sel] <- res[["SNR"]][]
1517
-	mixtureParams[,sel] <- res[["mixtureParams"]][]
1340
+	np.index = which(!is.snp)      
1341
+
1342
+        open(res[["A"]])
1343
+        open(res[["B"]])
1344
+        open(res[["SKW"]])
1345
+        open(res[["SNR"]])
1346
+        open(res[["mixtureParams"]])
1347
+	bb = ocProbesets()*length(sns)*8
1348
+        ffrowapply(A[snp.index,][i1:i2, sel] <- res[["A"]][i1:i2, sel], X=res[["A"]], BATCHBYTES=bb)
1349
+	ffrowapply(B[snp.index,][i1:i2, sel] <- res[["B"]][i1:i2, sel], X=res[["B"]], BATCHBYTES=bb)
1350
+	if(length(np.index)>0) {
1351
+          for (j in sel) {
1352
+            A[np.index, j] = res[["cnAB"]]$A[,j]
1353
+            B[np.index, j] = res[["cnAB"]]$B[,j]
1354
+          }
1355
+        }
1356
+        delete(res[["A"]]); delete(res[["B"]])
1357
+	SKW[sel] = res[["SKW"]][]
1358
+	SNR[sel] = res[["SNR"]][]
1359
+	mixtureParams[,sel] = res[["mixtureParams"]][]
1518 1360
         close(A)
1519 1361
         close(B)
1520 1362
         close(SNR)
1521 1363
         close(SKW)
1522 1364
         close(mixtureParams)
1523
-#       	if(is.lds){
1524
-          delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
1525
-#        }
1365
+        delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
1526 1366
         rm(res)
1527 1367
         TRUE
1528 1368
       }
1529 1369
Binary files a/inst/doc/crlmmIllumina.pdf and b/inst/doc/crlmmIllumina.pdf differ
... ...
@@ -78,8 +78,8 @@ samples[1:5,]
78 78
 RG = readIdatFiles(samples, path=data.dir, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE)
79 79
 @
80 80
 
81
-Reading in this data takes approximately 90 seconds and peak memory usage 
82
-was 1.2 GB of RAM on our linux system.
81
+Reading in this data takes approximately 100 seconds and peak memory usage 
82
+was 0.8 GB of RAM on our linux system.
83 83
 If memory is limiting, load the \Rpackage{ff} package and run the same command.
84 84
 When this package is available, the objects are stored using disk rather then RAM.
85 85
 The \Robject{RG} object is an \Rclass{NChannelSet} which stores the 
... ...
@@ -123,7 +123,7 @@ Next we use the function \Rfunction{crlmmIllumina} which performs preprocessing
123 123
 crlmmResult = crlmmIllumina(RG=RG, cdfName="human370v1c", sns=pData(RG)$ID, returnParams=TRUE)
124 124
 @ 
125 125
 
126
-This analysis took 470 seconds to complete and peak memory usage was 3.3 GB on our system.
126
+This analysis took 18 minutes to complete and peak memory usage was 2.5 GB on our system.
127 127
 The output stored in \Robject{crlmmResult} is a \Rclass{SnpSet} object.                                                                                                                                                         
128 128
 <<explore2>>=
129 129
   class(crlmmResult)