# function below works OK provided all .idat files are in the current working directory
# - could add an option to allow files in Illumina directory structure to be handled
# or to use the optional 'Path' column in sampleSheet
# - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet

readIdatFiles = function(sampleSheet=NULL,
			  arrayNames=NULL,
			  ids=NULL,
			  path=".",
			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
			  highDensity=FALSE,
			  sep="_",
			  fileExt=list(green="Grn.idat", red="Red.idat"),
			  saveDate=FALSE, verbose=FALSE) {
	verbose <- FALSE
       if(!is.null(arrayNames)) {
               pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
       }
       if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
	       if(is.null(arrayNames)){
		       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
			       barcode = sampleSheet[,arrayInfoColNames$barcode]
			       arrayNames=barcode
		       }
		       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
			       position = sampleSheet[,arrayInfoColNames$position]
			       if(is.null(arrayNames))
				       arrayNames=position
			       else
				       arrayNames = paste(arrayNames, position, sep=sep)
			       if(highDensity) {
				       hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
				       for(i in names(hdExt))
					       arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
			       }
		       }
	       }
	       pd = new("AnnotatedDataFrame", data = sampleSheet)
	       sampleNames(pd) = basename(arrayNames)
       }
       if(is.null(arrayNames)) {
               arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
               if(!is.null(sampleSheet)) {
                      sampleSheet=NULL
                      cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
               }
               pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
       }
       narrays = length(arrayNames)
       grnfiles = paste(arrayNames, fileExt$green, sep=sep)
       redfiles = paste(arrayNames, fileExt$red, sep=sep)
       if(length(grnfiles)==0 || length(redfiles)==0)
	       stop("Cannot find .idat files")
       if(length(grnfiles)!=length(redfiles))
	       stop("Cannot find matching .idat files")
       if(path[1] != "." & path[1] != ""){
	       grnidats = file.path(path, grnfiles)
	       redidats = file.path(path, redfiles)
       }  else {
	       message("path arg not set.  Assuming files are in local directory, or that complete path is provided")
	       grnidats = grnfiles
	       redidats = redfiles
       }
       if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
       if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
       headerInfo = list(nProbes = rep(NA, narrays),
                         Barcode = rep(NA, narrays),
                         ChipType = rep(NA, narrays),
                         Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
                         Position = rep(NA, narrays)) # this may also vary a bit
       dates = list(decode=rep(NA, narrays),
                    scan=rep(NA, narrays))
       ## read in the data
       for(i in seq_along(arrayNames)) {
	       if(verbose) {
	       ## RS
		       ##cat("reading", arrayNames[i], "\t")
		       cat("reading", basename(arrayNames[i]), "\t")
		       cat(paste(sep, fileExt$green, sep=""), "\t")
	       }
	       idsG = idsR = G = R = NULL
	       G = readIDAT(grnidats[i])
	       idsG = rownames(G$Quants)
	       headerInfo$nProbes[i] = G$nSNPsRead
	       headerInfo$Barcode[i] = G$Barcode
	       headerInfo$ChipType[i] = G$ChipType
	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
	       headerInfo$Position[i] = G$Unknowns$MostlyA
               if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
		       next()
	       }
	       dates$decode[i] = G$RunInfo[1, 1]
	       dates$scan[i] = G$RunInfo[2, 1]
	       if(i==1) {
		       if(is.null(ids) && !is.null(G)){
			       ids = idsG
		       } else stop("Could not find probe IDs")
		       nprobes = length(ids)
		       narrays = length(arrayNames)
		       RG = new("NChannelSet",
		                 R=matrix(NA, nprobes, narrays),
		                 G=matrix(NA, nprobes, narrays),
		                 zero=matrix(NA, nprobes, narrays),
				 annotation=headerInfo$Manifest[1],
				 phenoData=pd, storage.mode="environment")
		       featureNames(RG) = ids
		       if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){
		            sampleNames(RG) = sampleSheet$Sample_ID
		       } else  sampleNames(RG) = arrayNames
		       gc()
	       }
	       if(length(ids)==length(idsG)) {
		       if(sum(ids==idsG)==nprobes) {
			       RG@assayData$G[,i] = G$Quants[, "Mean"]
			       zeroG = G$Quants[, "NBeads"]==0
		       }
	       } else {
		       indG = match(ids, idsG)
		       RG@assayData$G[,i] = G$Quants[indG, "Mean"]
		       zeroG = G$Quants[indG, "NBeads"]==0
	       }
	       rm(G)
	       gc()
	       if(verbose) {
                      cat(paste(sep, fileExt$red, sep=""), "\n")
	       }
	       R = readIDAT(redidats[i])
	       idsR = rownames(R$Quants)

	       if(length(ids)==length(idsG)) {
		       if(sum(ids==idsR)==nprobes) {
			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
		               zeroR = R$Quants[ ,"NBeads"]==0
		       }
	       } else {
		       indR = match(ids, idsR)
		       RG@assayData$R[,i] = R$Quants[indR, "Mean"]
		       zeroR = R$Quants[indR, "NBeads"]==0
	       }
	       RG@assayData$zero[,i] = zeroG | zeroR
	       rm(R, zeroG, zeroR)
	       gc()
       }
       if(saveDate) {
	       protocolData(RG)[["ScanDate"]] = dates$scan
       }
       storageMode(RG) = "lockedEnvironment"
       RG
}


## the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008
readIDAT <- function(idatFile){
  fileSize <- file.info(idatFile)$size

  tempCon <- file(idatFile,"rb")
  prefixCheck <- readChar(tempCon,4)
  if(prefixCheck != "IDAT"){

  }

  versionNumber <- readBin(tempCon, "integer", n=1, size=8,
                           endian="little")

  if(versionNumber<3)
	  stop("Older style IDAT files not supported:  consider updating your scanner settings")

  nFields <- readBin(tempCon, "integer", n=1, size=4,
                     endian="little")

  fields <- matrix(0,nFields,3);
  colnames(fields) <- c("Field Code", "Byte Offset", "Bytes")
  for(i1 in 1:nFields){
    fields[i1,"Field Code"] <-
      readBin(tempCon, "integer", n=1, size=2, endian="little", signed=FALSE)
    fields[i1,"Byte Offset"] <-
      readBin(tempCon, "integer", n=1, size=8, endian="little")
  }

  knownCodes <- c(1000, 102, 103, 104, 107, 200, 300, 400,
                  401, 402, 403, 404, 405, 406, 407, 408, 409)
  names(knownCodes) <-
    c("nSNPsRead",  # 1000
      "IlluminaID", #  102
      "SD",         #  103
      "Mean",       #  104
      "NBeads",     #  107
      "MidBlock",   #  200
      "RunInfo",    #  300
      "RedGreen",   #  400
      "MostlyNull", #  401
      "Barcode",    #  402
      "ChipType",   #  403
      "MostlyA",    #  404
      "Unknown.1",  #  405
      "Unknown.2",  #  406
      "Unknown.3",  #  407
      "Unknown.4",  #  408
      "Unknown.5"   #  409
      )

  nNewFields <- 1
  rownames(fields) <- paste("Null", 1:nFields)
  for(i1 in 1:nFields){
    temp <- match(fields[i1,"Field Code"], knownCodes)
    if(!is.na(temp)){
      rownames(fields)[i1] <- names(knownCodes)[temp]
    }else{
      rownames(fields)[i1] <- paste("newField", nNewFields, sep=".")
      nNewFields <- nNewFields + 1
    }
  }

  seek(tempCon, fields["nSNPsRead", "Byte Offset"])
  nSNPsRead <- readBin(tempCon, "integer", n=1, size=4,
                       endian="little")

  seek(tempCon, fields["IlluminaID", "Byte Offset"])
  IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4,
                       endian="little")

  seek(tempCon, fields["SD", "Byte Offset"])
  SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2,
                endian="little", signed=FALSE)

  seek(tempCon, fields["Mean", "Byte Offset"])
  Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2,
                  endian="little", signed=FALSE)

  seek(tempCon, fields["NBeads", "Byte Offset"])
  NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE)

  seek(tempCon, fields["MidBlock", "Byte Offset"])
  nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4,
                              endian="little")
  MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4,
                      endian="little")

  seek(tempCon, fields["RunInfo", "Byte Offset"])
  nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4,
                            endian="little")
  RunInfo <- matrix(NA, nRunInfoBlocks, 5)
  colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars",
                         "BlockCode", "CodeVersion")
  for(i1 in 1:2) { #nRunInfoBlocks){  ## MR edit
    for(i2 in 1:5){
      nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
      RunInfo[i1,i2] <- readChar(tempCon, nChars)
    }
  }

  seek(tempCon, fields["RedGreen", "Byte Offset"])
  RedGreen <- readBin(tempCon, "numeric", n=1, size=4,
                      endian="little")
  #RedGreen <- readBin(tempCon, "integer", n=4, size=1,
  #                    endian="little", signed=FALSE)

  seek(tempCon, fields["MostlyNull", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  MostlyNull <- readChar(tempCon, nChars)

  seek(tempCon, fields["Barcode", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Barcode <- readChar(tempCon, nChars)

  seek(tempCon, fields["ChipType", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  ChipType <- readChar(tempCon, nChars)

  seek(tempCon, fields["MostlyA", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  MostlyA <- readChar(tempCon, nChars)

  seek(tempCon, fields["Unknown.1", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.1 <- readChar(tempCon, nChars)

  seek(tempCon, fields["Unknown.2", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.2 <- readChar(tempCon, nChars)

  seek(tempCon, fields["Unknown.3", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.3 <- readChar(tempCon, nChars)

  seek(tempCon, fields["Unknown.4", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.4 <- readChar(tempCon, nChars)

  seek(tempCon, fields["Unknown.5", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.5 <- readChar(tempCon, nChars)

  close(tempCon)

  Unknowns <-
    list(MostlyNull=MostlyNull,
         MostlyA=MostlyA,
         Unknown.1=Unknown.1,
         Unknown.2=Unknown.2,
         Unknown.3=Unknown.3,
         Unknown.4=Unknown.4,
         Unknown.5=Unknown.5)

  Quants <- cbind(Mean, SD, NBeads)
  colnames(Quants) <- c("Mean", "SD", "NBeads")
  rownames(Quants) <- as.character(IlluminaID)

  idatValues <-
    list(fileSize=fileSize,
         versionNumber=versionNumber,
         nFields=nFields,
         fields=fields,
         nSNPsRead=nSNPsRead,
         #IlluminaID=IlluminaID,
         #SD=SD,
         #Mean=Mean,
         #NBeads=NBeads,
         Quants=Quants,
         MidBlock=MidBlock,
         RunInfo=RunInfo,
         RedGreen=RedGreen,
         Barcode=Barcode,
         ChipType=ChipType,
         Unknowns=Unknowns)

  idatValues

}

readBPM <- function(bpmFile){

  ## Reads and parses Illumina BPM files

  fileSize <- file.info(bpmFile)$size

  tempCon <- file(bpmFile,"rb")

  # The first few bytes of the egtFile are some type of
  # header, but there's no related byte offset information.

  prefixCheck <- readChar(tempCon,3) ## should be "BPM"

  null.1 <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  ## should be 1

  versionNumber <-
    readBin(tempCon, "integer", n=1, size=4, endian="little")
  ## should be 4

  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  chipType <- readChar(tempCon, nChars)

  null.2 <- readBin(tempCon, "integer", n=2, size=1, signed=FALSE)

  csvLines <- readLines(tempCon, 22)

  entriesByteOffset <- seek(tempCon);
  nEntries <- readBin(tempCon, "integer", n=1, size=4,
                      endian="little")

  if(FALSE){

    snpIndexByteOffset <- seek(tempCon)
    snpIndex <- readBin(tempCon, "integer", n=nEntries, size=4,
                        endian="little")
    ## for the 1M array, these are simply in order from 1 to 1072820.

    snpNamesByteOffset <- seek(tempCon)
    snpNames <- rep("A", nEntries)
    for(i1 in 1:nEntries){
      nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
      snpNames[i1] <- readChar(tempCon, nChars)
    }

  }

  seek(tempCon, 15278138)

  normIDByteOffset <- seek(tempCon)
  normID <- readBin(tempCon, "integer", n=nEntries, size=1, signed=FALSE) + 1

  newBlockByteOffset <- seek(tempCon)
  newBlock <- readBin(tempCon, "integer", n=10000, size=1, signed=FALSE)

  close(tempCon)

  byteOffsets <- list(entriesByteOffset=entriesByteOffset,
                      #snpIndexByteOffset=snpIndexByteOffset,
                      #snpNamesByteOffset=snpNamesByteOffset,
                      normIDByteOffset=normIDByteOffset,
                      newBlockByteOffset=newBlockByteOffset)

  allStuff <- list(prefixCheck=prefixCheck,
                   null.1=null.1,
                   versionNumber=versionNumber,
                   chipType=chipType,
                   null.2=null.2,
                   csvLines=csvLines,
                   nEntries=nEntries,
                   #snpIndex=snpIndex,
                   #snpNames=snpNames,
                   normID=normID,
                   newBlock=newBlock,
                   byteOffsets=byteOffsets)
  allStuff

}


RGtoXY = function(RG, chipType, verbose=TRUE) {

  needToLoad <- !all(sapply(c('addressA', 'addressB', 'base'), isLoaded))
  if(needToLoad){
	  chipList = c("human1mv1c",# 1M
	  "human370v1c",            # 370CNV
	  "human650v3a",            # 650Y
	  "human610quadv1b",        # 610 quad
	  "human660quadv1a",        # 660 quad
	  "human370quadv3c",        # 370CNV quad
	  "human550v3b",            # 550K
	  "human1mduov3b",          # 1M Duo
	  "humanomni1quadv1b",      # Omni1 quad
	  "humanomni25quadv1b",     # Omni2.5 quad
	  "humanomniexpress12v1b",  # Omni express 12
	  "humanimmuno12v1b",       # Immuno chip 12
          "humancytosnp12v2p1h")    # CytoSNP 12
	  if(missing(chipType)){
		  chipType = match.arg(annotation(RG), chipList)
	  } else chipType = match.arg(chipType, chipList)
	  pkgname = getCrlmmAnnotationName(chipType)
	  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
		  suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
		  msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
		  message(strwrap(msg))
		  stop("Package ", pkgname, " could not be found.")
		  rm(suggCall, msg)
	  }
	  if(verbose) message("Loading chip annotation information.")
	  loader("address.rda", .crlmmPkgEnv, pkgname)
  }
  aids = getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
  bids = getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
  snpbase = getVarInEnv("base")
##  loader(‘file.rda’)
##  x = getVarInEnv(‘x’)
##  y = getVarInEnv(‘y’)
##
##  I’d consider using something like:
##
##	  needToLoad = !all(sapply(c(‘x’, ‘y’), isLoaded))
##  if (needToLoad){
##	  loader(‘file.rda’)
##	  x = getVarInEnv(‘x’)
##	  y = getVarInEnv(‘y’)
##  }
  ids = names(aids)
  nsnps = length(aids)
  narrays = ncol(RG)
#  aidcol = match("AddressA_ID", colnames(annot))
#  if(is.na(aidcol))
#    aidcol = match("Address", colnames(annot))
#  bidcol = match("AddressB_ID", colnames(annot))
#  if(is.na(bidcol))
#    bidcol = match("Address2", colnames(annot))
#  aids = annot[, aidcol]
#  bids = annot[, bidcol]
#  snpids = annot[,"Name"]
#  snpbase = annot[,"SNP"]
  infI = !is.na(bids) & bids!=0
  aord = match(aids, featureNames(RG)) # NAs are possible here
  bord = match(bids, featureNames(RG)) # and here
#  argrg = aids[rrgg]
#  brgrg = bids[rrgg]
  XY <- new("NChannelSet",
	    X=matrix(0, nsnps, narrays),
	    Y=matrix(0, nsnps, narrays),
	    zero=matrix(0, nsnps, narrays),
	    annotation=chipType, phenoData=RG@phenoData,
	    protocolData=RG@protocolData, storage.mode="environment")
  featureNames(XY) = ids
  sampleNames(XY) = sampleNames(RG)
  ## RS
  rm(list=c("bord", "infI", "aids", "bids", "ids", "snpbase"))
  print(gc())
  # Need to initialize - matrices filled with NAs to begin with
#  XY@assayData$X[1:nsnps,] = 0
#  XY@assayData$Y[1:nsnps,] = 0
#  XY@assayData$zero[1:nsnps,] = 0

  # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
  ## RS added
  not.na <- !is.na(aord)
  not.na.aord <- aord[not.na]
  ## RS substitution  !is.na(aord) -> not.na
  ##r <- as.matrix(exprs(channel(RG, "R"))[not.na.aord,]) # mostly red
  r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
  XY@assayData$X[not.na,] <- r
  rm(r);gc()
  g <- as.matrix(assayData(RG)[["G"]][not.na.aord,]) # mostly green
  XY@assayData$Y[not.na,] <- g
  rm(g); gc()
  ##z <- as.matrix(exprs(channel(RG, "zero"))[not.na.aord,]) # mostly green
  z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
  XY@assayData$zero[not.na,] <- z
  rm(z); gc()
  ##RS added
  rm(RG)
  print(gc())
  ## Warning - not 100% sure that the code below is correct - could be more complicated than this
#  infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")

#  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
#  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green

  # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
#  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")

#  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
#  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green

  #  For now zero out Infinium I probes
#  XY@assayData$X[infI,] = 0
#  XY@assayData$Y[infI,] = 0
#  XY@assayData$zero[infI,] = 0
#  gc()
  XY
}


stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
	if(useTarget){
		objectsNeeded <- c("stripnum", "reference")
	} else objectsNeeded <- "stripnum"
	needToLoad <- !all(sapply(objectsNeeded, isLoaded))
	if(needToLoad){
		pkgname = getCrlmmAnnotationName(annotation(XY))
		if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
			suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
			msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
			message(strwrap(msg))
			stop("Package ", pkgname, " could not be found.")
			rm(suggCall, msg)
		}
		if(verbose) message("Loading strip and reference normalization information.")
		loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
	}
	stripnum = getVarInEnv("stripnum")
	if(useTarget)
		targetdist = getVarInEnv("reference")
	if(verbose){
		message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
		if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=max(stripnum), style=3)
	}
  for(s in 1:max(stripnum)) {
    if(verbose) {
      if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
      else cat(".")
    }
    sel = stripnum==s
    ##RS: faster to access data directly
    ##subX = as.matrix(exprs(channel(XY, "X"))[sel,])
    ##subY = as.matrix(exprs(channel(XY, "Y"))[sel,])
    subX <- as.matrix(assayData(XY)[["X"]][sel, ])
    subY <- as.matrix(assayData(XY)[["Y"]][sel, ])
    if(useTarget)
      tmp = normalize.quantiles.use.target(cbind(subX, subY), targetdist[[s]])
    else
      tmp = normalize.quantiles(cbind(subX, subY))
    XY@assayData$X[sel,] = matrix(as.integer(tmp[,1:(ncol(tmp)/2)]+16), nrow(tmp), ncol(tmp)/2)
    XY@assayData$Y[sel,] = matrix(as.integer(tmp[,(ncol(tmp)/2+1):ncol(tmp)]+16), nrow(tmp), ncol(tmp)/2)
    rm(subX, subY, tmp, sel)
    gc()
  }
  if(verbose)
    cat("\n")
  XY
}


preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
				fitMixture=TRUE,
				eps=0.1,
				verbose=TRUE,
				seed=1,
				cdfName,
				sns,
				stripNorm=TRUE,
				useTarget=TRUE) { #,
#               outdir=".") {
#				save.it=FALSE,
#				snpFile,
#				cnFile) {
	if(stripNorm)
		XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
	## MR: the code below is mostly straight from snprma.R
	if (missing(sns)) sns = sampleNames(XY) #$X
	if(missing(cdfName))
		cdfName = annotation(XY)
	needToLoad <- !all(sapply(c("autosomeIndex",
				    "SMEDIAN",
				    "theKnots",
				    "npProbesFid"), isLoaded))
	if(needToLoad){
		pkgname = getCrlmmAnnotationName(cdfName)
		if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
			suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
			msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
			message(strwrap(msg))
			stop("Package ", pkgname, " could not be found.")
			rm(suggCall, msg)
		}
		if(verbose) message("Loading snp annotation and mixture model parameters.")
		loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
		loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
		loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
		loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
	}
	autosomeIndex = getVarInEnv("autosomeIndex")
	SMEDIAN = getVarInEnv("SMEDIAN")
	theKnots = getVarInEnv("theKnots")
	npIndex = getVarInEnv("npProbesFid")
	snpIndex = getVarInEnv("snpProbesFid")
	narrays = ncol(XY)
	nprobes = length(npIndex)
	if(length(nprobes)>0) {
		## RS: channel creates an expression set.  This is much slower than directly accessing the data
		A <- matrix(as.integer(assayData(XY)[["X"]][npIndex, ]), nprobes, narrays)
		##system.time(A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays))
		B <- matrix(as.integer(assayData(XY)[["Y"]][npIndex, ]), nprobes, narrays)
		##B = matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
		## new lines below - useful to keep track of zeroed out probes
		zero <- matrix(as.integer(assayData(XY)[["zero"]][npIndex, ]), nprobes, narrays)
		##zero = matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
		colnames(A) = colnames(B) = colnames(zero) = sns
		rownames(A) = rownames(B) = rownames(zero) = names(npIndex)
		cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
		##      t0 <- proc.time()
		##      save(cnAB, file=cnFile)
		##      t0 <- proc.time()-t0
		##      if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
		rm(A, B, zero)
		print(gc())
	}
  # next process snp probes
  nprobes = length(snpIndex)
  ##We will read each cel file, summarize, and run EM one by one
  ##We will save parameters of EM to use later
  mixtureParams = matrix(NA, 4, narrays)
  SNR = rep(NA, narrays)
  SKW = rep(NA, narrays)
  ## This is the sample for the fitting of splines
  ## BC: I like better the idea of the user passing the seed,
  ##     because this might intefere with other analyses
  ##     (like what happened to GCRMA)
  set.seed(seed)
  idx = sort(sample(autosomeIndex, mixtureSampleSize))
  idx2 = sample(nprobes, 10^5)
  ##S will hold (A+B)/2 and M will hold A-B
  ##NOTE: We actually dont need to save S. Only for pics etc...
  ##f is the correction. we save to avoid recomputing

  A = matrix(NA, nprobes, narrays)
  B = matrix(NA, nprobes, narrays)
  zero = matrix(NA, nprobes, narrays)

  if(verbose){
     message("Calibrating ", narrays, " arrays.")
     if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=narrays, style=3)
  }
  for(i in 1:narrays){
	  ## RS: faster to access data directly without using channel method
	  ##A[,i] = as.integer(exprs(channel(XY, "X"))[snpIndex,i])
	  A[, i] <- as.integer(assayData(XY)[["X"]][snpIndex, i])
	  B[, i] <- as.integer(assayData(XY)[["Y"]][snpIndex, i])
	  zero[, i] <- as.integer(assayData(XY)[["zero"]][snpIndex, i])
	  ##B[,i] = as.integer(exprs(channel(XY, "Y"))[snpIndex,i])
	  ##zero[,i] = as.integer(exprs(channel(XY, "zero"))[snpIndex,i])
	  SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
	  if(fitMixture){
		  S = (log2(A[idx,i])+log2(B[idx,i]))/2 - SMEDIAN
		  M = log2(A[idx,i])-log2(B[idx,i])
		  ##we need to test the choice of eps.. it is not the max diff between funcs
		  tmp = fitAffySnpMixture56(S, M, theKnots, eps=eps)
		  mixtureParams[, i] = tmp[["coef"]]
		  SNR[i] = tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
	  }
	  if(verbose) {
		  if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
		  else cat(".")
	  }
	  ## run garbage collection every now and then
	  if(i %% 100 == 0) gc();
  }
  if (verbose) {
	  if (getRversion() > '2.7.0') close(pb)
	  else cat("\n")
  }
  if (!fitMixture) SNR = mixtureParams = NA
  ## gns comes from preprocStuff.rda

#  if(save.it & !missing(snpFile)) {
#    t0 <- proc.time()
#    save(res, file=snpFile)
#    t0 <- proc.time()-t0
#    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
#  }

  res = list(A=A, B=B,
             zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
             mixtureParams=mixtureParams, cdfName=cdfName, cnAB=cnAB)

#  open(res[["A"]])
#  open(res[["B"]])
#  open(res[["zero"]])
#  open(res[["SNR"]])
#  open(res[["mixtureParams"]])

#  if(save.it & !missing(snpFile)) {
#    t0 <- proc.time()
#    save(res, file=snpFile)
#    t0 <- proc.time()-t0
#    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
#  }

#  close(res[["A"]])
#  close(res[["B"]])
#  close(res[["zero"]])
#  close(res[["SNR"]])
#  close(res[["mixtureParams"]])

  return(res)
}


crlmmIllumina = function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
                  row.names=TRUE, col.names=TRUE,
                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
                  seed=1, # save.it=FALSE, load.it=FALSE, snpFile, cnFile,
                  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
                  cdfName, sns, recallMin=10, recallRegMin=1000,
                  returnParams=FALSE, badSNP=.7) {
#  if (save.it & (missing(snpFile) | missing(cnFile)))
#    stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
#  if (load.it & missing(snpFile))
#    stop("'snpFile' is missing and you chose to load.it")
#  if (!missing(snpFile))
#    if (load.it & !file.exists(snpFile)){
#      load.it <- FALSE
#      message("File ", snpFile, " does not exist.")
#      stop("Cannot load SNP data.")
#  }
#  if (!load.it){
    if(!missing(RG)) {
      if(missing(XY))
        XY = RGtoXY(RG, chipType=cdfName)
      else
        stop("Both RG and XY specified - please use one or the other")
    }
    if (missing(sns)) sns = sampleNames(XY)
    res <- preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize,
			       fitMixture=TRUE, verbose=verbose,
			       seed=seed, eps=eps, cdfName=cdfName, sns=sns,
			       stripNorm=stripNorm, useTarget=useTarget) #,
#                        save.it=save.it, snpFile=snpFile, cnFile=cnFile)
    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)


#    if(is.lds) {
#      open(res[["A"]])
#      open(res[["B"]])
#      open(res[["SNR"]])
#      open(res[["mixtureParams"]])
#    }

#    fD = featureData(XY)
#    phenD = XY@phenoData
#    protD = XY@protocolData
#    rm(XY)
#    gc()
#    if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
#    callSet <- new("SnpSuperSet",
#                    alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
#                    alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
#                    call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
#                    callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
#  	            annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
#    sampleNames(callSet) <- sns
#    featureNames(callSet) <- res[["gns"]]
#    pData(callSet)$SKW <- rep(NA, length(sns))
#    pData(callSet)$SNR <- rep(NA, length(sns))
#    pData(callSet)$gender <- rep(NA, length(sns))

#  }else{
#      if(verbose) message("Loading ", snpFile, ".")
#        obj <- load(snpFile)
#        if(verbose) message("Done.")
#        if(!any(obj == "res"))
#          stop("Object in ", snpFile, " seems to be invalid.")
#  }

 #   rm(phenD, protD , fD)

#    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
#    suppressWarnings(A(callSet) <- res[["A"]])
#    suppressWarnings(B(callSet) <- res[["B"]])
#    pData(callSet)$SKW <- res$SKW
#    pData(callSet)$SNR <- res$SNR
#    mixtureParams <- res$mixtureParams
#    rm(res); gc()
  if(row.names) row.names=res$gns else row.names=NULL
  if(col.names) col.names=res$sns else col.names=NULL
  FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
  ## genotyping
  crlmmGTfxn = function(FUN,...){
		switch(FUN,
		       crlmmGT2=crlmmGT2(...),
		       crlmmGT=crlmmGT(...))
              }
  res2 = crlmmGTfxn(FUN,
                     A=res[["A"]],
                     B=res[["B"]],
                     SNR=res[["SNR"]],
                     mixtureParams=res[["mixtureParams"]],
                     cdfName=cdfName,
                     row.names=row.names,
                     col.names=col.names,
                     probs=probs,
                     DF=DF,
                     SNRMin=SNRMin,
                     recallMin=recallMin,
                     recallRegMin=recallRegMin,
                     gender=gender,
                     verbose=verbose,
                     returnParams=returnParams,
                     badSNP=badSNP)

#  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
#                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
#                  SNR=res[["SNR"]], # callSet$SNR, # [j],
#                  mixtureParams=res[["mixtureParams"]], #,
#                  cdfName=res[["cdfName"]], # annotation(callSet),
#                  row.names=row.names, # featureNames(callSet)[snp.index],
#                  col.names=col.names, # sampleNames(callSet), #[j],
#                  probs=probs,
#                  DF=DF,
#                  SNRMin=SNRMin,
#                  recallMin=recallMin,
#                  recallRegMin=recallRegMin,
#                  gender=gender,
#                  verbose=verbose,
#                  returnParams=returnParams,
#                  badSNP=badSNP)
#    rm(res); gc()
#    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
#    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
#    callSet$gender[j] <- tmp$gender
#    suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
#    suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
#    callSet$gender <- tmp$gender
#    rm(tmp); gc()
#    return(callSet)

  res2[["SNR"]] = res[["SNR"]]
  res2[["SKW"]] = res[["SKW"]]
#  if(is.lds) {
#    delete(res[["A"]])
#    delete(res[["B"]])
#    delete(res[["SNR"]])
#    delete(res[["mixtureParams"]])
#  }
  rm(res); gc()
  return(list2SnpSet(res2, returnParams=returnParams))
}


## MR: Below is a more memory efficient version of crlmmIllumina() which
## reads in the .idats and genotypes in the one function and removes objects
## after they have been used
crlmmIlluminaV2 = function(sampleSheet=NULL,
			  arrayNames=NULL,
			  ids=NULL,
			  path=".",
			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
			  highDensity=FALSE,
			  sep="_",
			  fileExt=list(green="Grn.idat", red="Red.idat"),
			  saveDate=FALSE,
			  stripNorm=TRUE,
			  useTarget=TRUE,
 #                         outdir=".",
			  row.names=TRUE,
			  col.names=TRUE,
			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
                          seed=1,  # save.it=FALSE, snpFile, cnFile,
                          mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
                          cdfName, sns, recallMin=10, recallRegMin=1000,
                          returnParams=FALSE, badSNP=.7) {

    if(missing(cdfName)) stop("must specify cdfName")
    if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")

#    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
    is.lds=FALSE
    RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)


    XY = RGtoXY(RG, chipType=cdfName)
#    if(is.lds) {
#      open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero)
#      delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero)
#    }
    rm(RG); gc()

    if (missing(sns)) { sns = sampleNames(XY)
    }
    res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir) #,
#                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)

#    if(is.lds) {
#      open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
#      delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero)
#    }
    rm(XY); gc()

    if(row.names) row.names=res$gns else row.names=NULL
    if(col.names) col.names=res$sns else col.names=NULL

    FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
    ## genotyping
    crlmmGTfxn = function(FUN,...){
		switch(FUN,
		       crlmmGT2=crlmmGT2(...),
		       crlmmGT=crlmmGT(...))
              }
    res2 = crlmmGTfxn(FUN,
                     A=res[["A"]],
                     B=res[["B"]],
                     SNR=res[["SNR"]],
                     mixtureParams=res[["mixtureParams"]],
                     cdfName=cdfName,
                     row.names=row.names,
                     col.names=col.names,
                     probs=probs,
                     DF=DF,
                     SNRMin=SNRMin,
                     recallMin=recallMin,
                     recallRegMin=recallRegMin,
                     gender=gender,
                     verbose=verbose,
                     returnParams=returnParams,
                     badSNP=badSNP)

#    if(is.lds) {
#      open(res[["SNR"]]); open(res[["SKW"]])
#    }
    res2[["SNR"]] = res[["SNR"]]
    res2[["SKW"]] = res[["SKW"]]
 #  if(is.lds) {
 #    delete(res[["A"]]); delete(res[["B"]])
 #    delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
 #  }
    rm(res); gc()
    return(list2SnpSet(res2, returnParams=returnParams))
}

# Functions analogous to Rob's Affy functions to set up container
getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat", verbose=FALSE) {
       narrays = length(filenames)

       headerInfo = list(nProbes = rep(NA, narrays),
                         Barcode = rep(NA, narrays),
                         ChipType = rep(NA, narrays),
                         Manifest = rep(NA, narrays),
                         Position = rep(NA, narrays))

       scanDates = data.frame(ScanDate=rep(NA, narrays), DecodeDate=rep(NA, narrays))
       rownames(scanDates) = gsub(paste(sep, fileExt, sep=""), "", filenames)
       ## read in the data
       for(i in seq_along(filenames)) {
               if(verbose)
	               cat("reading", filenames[i], "\n")
	       idsG = G = NULL
	       G = readIDAT(filenames[i])
	       idsG = rownames(G$Quants)
	       headerInfo$nProbes[i] = G$nSNPsRead
	       headerInfo$Barcode[i] = G$Barcode
	       headerInfo$ChipType[i] = G$ChipType
	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
	       headerInfo$Position[i] = G$Unknowns$MostlyA
               if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
		       warning("Chips are not of the same type.  Skipping ", basename(filenames[i]))
		       next()
	       }
	       scanDates$ScanDate[i] = G$RunInfo[1, 1]
	       scanDates$DecodeDate[i] = G$RunInfo[2, 1]
	       rm(G)
	       gc()
       }
       protocoldata = new("AnnotatedDataFrame",
			    data=scanDates,
			    varMetadata=data.frame(labelDescription=colnames(scanDates),
			                           row.names=colnames(scanDates)))
	return(protocoldata)
}


constructInf <- function(sampleSheet=NULL,
			 arrayNames=NULL,
			 path=".",
			 arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
			 highDensity=FALSE,
			 sep="_",
			 fileExt=list(green="Grn.idat", red="Red.idat"),
			 cdfName,
			 verbose=FALSE,
			 batch, #fns,
			 saveDate=TRUE) { #, outdir="."){
	verbose <- FALSE
	if(!is.null(arrayNames)) {
		pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
	}
	if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
		if(is.null(arrayNames)) {
			if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
				barcode = sampleSheet[,arrayInfoColNames$barcode]
				arrayNames=barcode
			}
			if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
				position = sampleSheet[,arrayInfoColNames$position]
				if(is.null(arrayNames))
					arrayNames=position
				else
					arrayNames = paste(arrayNames, position, sep=sep)
				if(highDensity) {
					hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
					for(i in names(hdExt))
						arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
				}
			}
		}
		pd = new("AnnotatedDataFrame", data = sampleSheet)
		sampleNames(pd) = basename(arrayNames)
	}
	if(is.null(arrayNames)) {
		arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
		if(!is.null(sampleSheet)) {
			sampleSheet=NULL
			cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
		}
		pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
	}
	narrays = length(arrayNames)

	if(!missing(batch)) {
		stopifnot(length(batch) == narrays)
	}
	if(missing(batch)) {
                stop("Must specify 'batch'") # batch = as.factor(rep(1, narrays))
	}

	grnfiles = paste(arrayNames, fileExt$green, sep=sep)
	redfiles = paste(arrayNames, fileExt$red, sep=sep)
	if(length(grnfiles)==0 || length(redfiles)==0)
		stop("Cannot find .idat files")
	if(length(grnfiles)!=length(redfiles))
		stop("Cannot find matching .idat files")
	if(path[1] != "." & path[1] != ""){
		grnidats = file.path(path, grnfiles)
		redidats = file.path(path, redfiles)
	}  else {
		message("path arg not set.  Assuming files are in local directory, or that complete path is provided")
		grnidats = grnfiles
		redidats = redfiles
	}
	if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
	if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")

	if(verbose) message("Initializing container for genotyping and copy number estimation")
	featureData = getFeatureData(cdfName, copynumber=TRUE)
	nr = nrow(featureData); nc = narrays
	cnSet <- new("CNSet",
		     alleleA=initializeBigMatrix(name="A", nr, nc),
		     alleleB=initializeBigMatrix(name="B", nr, nc),
		     call=initializeBigMatrix(name="call", nr, nc),
		     callProbability=initializeBigMatrix(name="callPr", nr,nc),
		     annotation=cdfName,
		     batch=batch)
        if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) {
	   	sampleNames(cnSet) = sampleSheet$Sample_ID
        }
        else sampleNames(cnSet) = arrayNames
	if(saveDate){
		protocolData = getProtocolData.Illumina(grnidats, sep=sep, fileExt=fileExt$green, verbose=verbose)
	} else{
		protocolData = annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
	}
	rownames(pData(protocolData)) = sampleNames(cnSet)
	protocolData(cnSet) = protocolData
	featureData(cnSet) = featureData
	featureNames(cnSet) = featureNames(featureData)
	cnSet$gender <- initializeBigVector("gender", ncol(cnSet), vmode="integer")
	cnSet$SNR = initializeBigVector("crlmmSNR-", ncol(cnSet), "double")
	cnSet$SKW = initializeBigVector("crlmmSKW-", ncol(cnSet), "double")
	sampleNames(cnSet) <- basename(sampleNames(cnSet))
	return(cnSet)
}
construct.Illumina <- constructInf

preprocessInf <- function(cnSet,
			  sampleSheet=NULL,
		       arrayNames=NULL,
		       ids=NULL,
		       path=".",
		       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
		       highDensity=TRUE,
		       sep="_",
		       fileExt=list(green="Grn.idat", red="Red.idat"),
		       saveDate=TRUE,
		       stripNorm=TRUE,
		       useTarget=TRUE,
		       mixtureSampleSize=10^5,
		       fitMixture=TRUE,
		       eps=0.1,
		       verbose=TRUE,
		       seed=1){
	stopifnot(all(c("gender", "SNR", "SKW") %in% varLabels(cnSet)))
	open(A(cnSet))
	open(B(cnSet))
	sns <- sampleNames(cnSet)
	pkgname = getCrlmmAnnotationName(annotation(cnSet))
	is.snp = isSnp(cnSet)
	snp.index = which(is.snp)
	narrays = ncol(cnSet)
	sampleBatches <- splitIndicesByLength(seq(length=ncol(cnSet)), ocSamples())
	mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
	ocLapply(seq_along(sampleBatches),
		 processIDAT,
		 sampleBatches=sampleBatches,
		 sampleSheet=sampleSheet,
		 arrayNames=arrayNames,
		 ids=ids,
		 path=path,
		 arrayInfoColNames=arrayInfoColNames,
		 highDensity=highDensity,
		 sep=sep,
		 fileExt=fileExt,
		 saveDate=saveDate,
		 verbose=verbose,
		 mixtureSampleSize=mixtureSampleSize,
		 fitMixture=fitMixture,
		 eps=eps,
		 seed=seed,
		 cdfName=cdfName,
		 sns=sns,
		 stripNorm=stripNorm,
		 useTarget=useTarget,
		 A=A(cnSet),
		 B=B(cnSet),
		 GT=calls(cnSet),
		 GTP=snpCallProbability(cnSet),
		 SKW=cnSet$SKW,
		 SNR=cnSet$SNR,
		 mixtureParams=mixtureParams,
		 is.snp=is.snp,
		 neededPkgs=c("crlmm", pkgname)) # outdir=outdir,
	return(mixtureParams)
}
preprocess <- preprocessInf

genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3),
			SNRMin=5,
			recallMin=10,
			recallRegMin=1000,
			verbose=TRUE,
			returnParams=TRUE,
			badSNP=0.7,
			gender=NULL,
			DF=6){
	is.snp = isSnp(cnSet)
	snp.index = which(is.snp)
##	narrays = ncol(cnSet)
##	open(A(cnSet))
##	open(B(cnSet))
##	open(snpCall(cnSet))
##	open(snpCallProbability(cnSet))
##	## crlmmGT2 overwrites the normalized intensities with calls and confidenceScores
##	message("Writing to call and callProbability slots")
##	for(j in 1:ncol(cnSet)){
##		snpCall(cnSet)[snp.index, j] <- as.integer(A(cnSet)[snp.index, j])
##		snpCallProbability(cnSet)[snp.index, j] <- as.integer(B(cnSet)[snp.index, j])
##	}
##	message("Writing complete.  Begin genotyping...")
##	close(A(cnSet))
##	close(B(cnSet))
	tmp <- crlmmGT2(A=snpCall(cnSet),
			  B=snpCallProbability(cnSet),
			  SNR=cnSet$SNR,
			  mixtureParams=mixtureParams,
			  cdfName=cdfName,
			  col.names=sampleNames(cnSet),
			  probs=probs,
			  DF=DF,
			  SNRMin=SNRMin,
			  recallMin=recallMin,
			  recallRegMin=recallRegMin,
			  gender=gender,
			  verbose=verbose,
			  returnParams=returnParams,
			  badSNP=badSNP,
			  snp.names=featureNames(cnSet)[snp.index])
	if(verbose) message("Genotyping finished.  Updating container with genotype calls and confidence scores.")
	open(cnSet$gender)
	cnSet$gender[,] = tmp$gender
	close(cnSet$gender)
	TRUE
}


genotype.Illumina <- function(sampleSheet=NULL,
			      arrayNames=NULL,
			      ids=NULL,
			      path=".",
			      arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
			      highDensity=FALSE,
			      sep="_",
			      fileExt=list(green="Grn.idat", red="Red.idat"),
			      cdfName,
			      copynumber=TRUE,
			      batch,
			      saveDate=TRUE,
			      stripNorm=TRUE,
			      useTarget=TRUE,
			      mixtureSampleSize=10^5,
			      fitMixture=TRUE,
			      eps=0.1,
			      verbose=TRUE,
			      seed=1,
			      sns,
			      probs=rep(1/3, 3),
			      DF=6,
			      SNRMin=5,
			      recallMin=10,
			      recallRegMin=1000,
			      gender=NULL,
			      returnParams=TRUE,
			      badSNP=0.7) {
	is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
	stopifnot(is.lds)
	if(missing(cdfName)) stop("must specify cdfName")
	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
	stopifnot(!missing(batch))
        pkgname <- getCrlmmAnnotationName(cdfName)
	message("Instantiate CNSet container.")
	cnSet <- constructInf(sampleSheet=sampleSheet,
				    arrayNames=arrayNames,
				    path=path,
				    arrayInfoColNames=arrayInfoColNames,
				    highDensity=highDensity,
				    sep=sep,
				    fileExt=fileExt,
				    cdfName=cdfName,
				    verbose=verbose,
				    batch=batch,
				    saveDate=saveDate)
	mixtureParams <- preprocessInf(cnSet=cnSet,
				    sampleSheet=sampleSheet,
				    arrayNames=arrayNames,
				    ids=ids,
				    path=path,
				    arrayInfoColNames=arrayInfoColNames,
				    highDensity=highDensity,
				    sep=sep,
				    fileExt=fileExt,
				    saveDate=saveDate,
				    stripNorm=stripNorm,
				    useTarget=useTarget,
				    mixtureSampleSize=mixtureSampleSize,
				    fitMixture=fitMixture,
				    eps=eps,
				    verbose=verbose,
				    seed=seed)
	message("Preprocessing complete.  Begin genotyping...")
	genotypeInf(cnSet=cnSet, mixtureParams=mixtureParams,
		   probs=probs,
		   SNRMin=SNRMin,
		   recallMin=recallMin,
		   recallRegMin=recallRegMin,
		   verbose=verbose,
		   returnParams=returnParams,
		   badSNP=badSNP,
		   gender=gender,
		   DF=DF)
	return(cnSet)
}




processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
			arrayNames=NULL,
			ids=NULL,
			path=".",
			arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
			highDensity=FALSE,
			sep="_",
			fileExt=list(green="Grn.idat", red="Red.idat"),
			saveDate=FALSE,
			verbose=TRUE,
			mixtureSampleSize=10^5,
			fitMixture=TRUE,
			eps=0.1,
			seed=1,
			cdfName,
			sns,
			stripNorm=TRUE,
			useTarget=TRUE,
			A, B,
			GT,
			GTP,
			SKW, SNR, mixtureParams, is.snp) { #, outdir=".") {
	message("Processing sample stratum ", stratum, " of ", length(sampleBatches))
	sel <- sampleBatches[[stratum]]
        if(length(path)>= length(sel)) path = path[sel]
        RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel],
                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose)
        XY = RGtoXY(RG, chipType=cdfName)
        rm(RG)
        gc()
        if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
        res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir)
        rm(XY)
        gc()
	if(verbose) message("Finished preprocessing.")
        snp.index = which(is.snp)
	np.index = which(!is.snp)
	open(A); open(B);
	open(GT); open(GTP)
	Amatrix <- res[["A"]]
	Bmatrix <- res[["B"]]

	## Amatrix and Bmatrix are ordinary matrices--not ff objects.
	## By writing columns of a ordinary matrix to GT and GTP, we
	## save one read step later on.  GT and GTP will be updated to
	## calls and call probabilities by the crlmmGT2 function. The A
	## and B slots will retain the normalized intensities for the
	## A and B alleles
	for(j in seq_along(sel)){
		jj <- sel[j]
		A[snp.index, jj] <- Amatrix[, j]
		GT[snp.index, jj] <- Amatrix[, j]
		B[snp.index, jj] <- Bmatrix[, j]
		GTP[snp.index, jj] <- Bmatrix[, j]
	}
	if(length(np.index)>0) {
		cnAmatrix <- res[["cnAB"]]$A
		cnBmatrix <- res[["cnAB"]]$B
		for(j in seq_along(sel)){
			jj <- sel[j]
			A[np.index, jj] <- cnAmatrix[, j]
			GT[np.index, jj] <- cnAmatrix[, j]
			B[np.index, jj] <- cnBmatrix[, j]
			GTP[np.index, jj] <- cnBmatrix[, j]
		}
        }
	open(SKW); open(SNR); open(mixtureParams)
	SKW[sel] = res[["SKW"]][]
	SNR[sel] = res[["SNR"]][]
	mixtureParams[, sel] = res[["mixtureParams"]][]
        close(A); close(B)
	close(GT); close(GTP)
        close(SNR); close(SKW)
        close(mixtureParams)
        rm(res)
	gc()
        TRUE
      }