# function below works OK provided all .idat files are in the current working directory
# - could add an option to allow files in Illumina directory structure to be handled
# or to use the optional 'Path' column in sampleSheet
# - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet

readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
                                arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
                                highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) {
       if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
         arrayNames=NULL
         if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
           barcode = sampleSheet[,arrayInfoColNames$barcode]
           arrayNames=barcode
         }
         if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {  
           position = sampleSheet[,arrayInfoColNames$position]
           if(is.null(arrayNames))
             arrayNames=position
           else
             arrayNames = paste(arrayNames, position, sep=sep)
           if(highDensity) {
             hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
             for(i in names(hdExt))
               arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
            }
         }
         pd = new("AnnotatedDataFrame", data = sampleSheet)
       }
       if(is.null(arrayNames)) {
         arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
         if(!is.null(sampleSheet)) {
           sampleSheet=NULL
           cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
         }
         pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
       }
       narrays = length(arrayNames)
       grnfiles = paste(arrayNames, fileExt$green, sep=sep)
       redfiles = paste(arrayNames, fileExt$red, sep=sep)
       if(length(grnfiles)==0 || length(redfiles)==0)
         stop("Cannot find .idat files")
       if(length(grnfiles)!=length(redfiles))
         stop("Cannot find matching .idat files")
       if(!all(c(redfiles,grnfiles) %in% dir(path=path)))
         stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
                 paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
       grnidats = file.path(path, grnfiles)
       redidats = file.path(path, redfiles)
       
       headerInfo = list(nProbes = rep(NA, narrays),
                         Barcode = rep(NA, narrays),
                         ChipType = rep(NA, narrays),
                         Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
                         Position = rep(NA, narrays)) # this may also vary a bit

       dates = list(decode=rep(NA, narrays),
                    scan=rep(NA, narrays))

       # read in the data
       for(i in seq(along=arrayNames)) {
         cat("reading", arrayNames[i], "\t")
         idsG = idsR = G = R = NULL

         cat(paste(sep, fileExt$green, sep=""), "\t")
         G = readIDAT(grnidats[i])
         idsG = rownames(G$Quants)
         
         headerInfo$nProbes[i] = G$nSNPsRead
         headerInfo$Barcode[i] = G$Barcode
         headerInfo$ChipType[i] = G$ChipType
         headerInfo$Manifest[i] = G$Unknown$MostlyNull
         headerInfo$Position[i] = G$Unknowns$MostlyA
         
         if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
                  # || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
                  # but differed by a few SNPs for some reason - most of the chip was the same though
#           stop("Chips are not of all of the same type - please check your data")
           warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
           next()
         }

         dates$decode[i] = G$RunInfo[1, 1]
         dates$scan[i] = G$RunInfo[2, 1]

         if(i==1) {
           if(is.null(ids) && !is.null(G))
             ids = idsG
           else
             stop("Could not find probe IDs")
           nprobes = length(ids)
           narrays = length(arrayNames)
           
           tmpmat = matrix(NA, nprobes, narrays)
           rownames(tmpmat) = ids
           if(!is.null(sampleSheet))
             colnames(tmpmat) = sampleSheet$Sample_ID
           else
             colnames(tmpmat) = arrayNames

           RG = new("NChannelSet",
                     R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat,
                     Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1],
                     phenoData=pd, storage.mode="environment")
           rm(tmpmat)
           gc()
         }
         
         if(length(ids)==length(idsG)) {
           if(sum(ids==idsG)==nprobes) {
             RG@assayData$G[,i] = G$Quants[, "Mean"]
             RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
             RG@assayData$Gse[,i] = G$Quants[, "SD"]
           }
         }
         else {
           indG = match(ids, idsG)
           RG@assayData$G[,i] = G$Quants[indG, "Mean"]
           RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
           RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
         }
         rm(G)
         gc()
         
         cat(paste(sep, fileExt$red, sep=""), "\n")
         R = readIDAT(redidats[i])
         idsR = rownames(R$Quants)
         
         if(length(ids)==length(idsG)) {   
           if(sum(ids==idsR)==nprobes) {
             RG@assayData$R[,i] = R$Quants[ ,"Mean"]
             RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
             RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
           }
         }
         else {
           indR = match(ids, idsR)
           RG@assayData$R[,i] = R$Quants[indR, "Mean"]
           RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
           RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
         }
         rm(R)
         gc()
       }
       if(saveDate)
         RG@scanDates = dates$scan
       storageMode(RG) = "lockedEnvironment"
       RG
}


# the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008
readIDAT <- function(idatFile){

  fileSize <- file.info(idatFile)$size

  tempCon <- file(idatFile,"rb")
  prefixCheck <- readChar(tempCon,4)
  if(prefixCheck != "IDAT"){

  }

  versionNumber <- readBin(tempCon, "integer", n=1, size=8, 
                           endian="little", signed=FALSE)
  
  nFields <- readBin(tempCon, "integer", n=1, size=4, 
                     endian="little", signed=FALSE)

  fields <- matrix(0,nFields,3);
  colnames(fields) <- c("Field Code", "Byte Offset", "Bytes")
  for(i1 in 1:nFields){
    fields[i1,"Field Code"] <- 
      readBin(tempCon, "integer", n=1, size=2, endian="little", signed=FALSE)
    fields[i1,"Byte Offset"] <- 
      readBin(tempCon, "integer", n=1, size=8, endian="little", signed=FALSE)
  }

  knownCodes <- c(1000, 102, 103, 104, 107, 200, 300, 400,
                  401, 402, 403, 404, 405, 406, 407, 408, 409)
  names(knownCodes) <- 
    c("nSNPsRead",  # 1000
      "IlluminaID", #  102
      "SD",         #  103
      "Mean",       #  104
      "NBeads",     #  107
      "MidBlock",   #  200
      "RunInfo",    #  300
      "RedGreen",   #  400
      "MostlyNull", #  401
      "Barcode",    #  402
      "ChipType",   #  403
      "MostlyA",    #  404
      "Unknown.1",  #  405
      "Unknown.2",  #  406
      "Unknown.3",  #  407
      "Unknown.4",  #  408
      "Unknown.5"   #  409
      )

  nNewFields <- 1
  rownames(fields) <- paste("Null", 1:nFields)
  for(i1 in 1:nFields){
    temp <- match(fields[i1,"Field Code"], knownCodes)
    if(!is.na(temp)){
      rownames(fields)[i1] <- names(knownCodes)[temp]
    }else{
      rownames(fields)[i1] <- paste("newField", nNewFields, sep=".")
      nNewFields <- nNewFields + 1
    }
  }

  seek(tempCon, fields["nSNPsRead", "Byte Offset"])
  nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, 
                       endian="little", signed=FALSE)

  seek(tempCon, fields["IlluminaID", "Byte Offset"])
  IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, 
                       endian="little", signed=FALSE)

  seek(tempCon, fields["SD", "Byte Offset"])
  SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, 
                endian="little", signed=FALSE)

  seek(tempCon, fields["Mean", "Byte Offset"])
  Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, 
                  endian="little", signed=FALSE)

  seek(tempCon, fields["NBeads", "Byte Offset"])
  NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE)

  seek(tempCon, fields["MidBlock", "Byte Offset"])
  nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, 
                              endian="little", signed=FALSE)
  MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4, 
                      endian="little", signed=FALSE)

  seek(tempCon, fields["RunInfo", "Byte Offset"])
  nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, 
                            endian="little", signed=FALSE)
  RunInfo <- matrix(NA, nRunInfoBlocks, 5)
  colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars", 
                         "BlockCode", "CodeVersion")
  for(i1 in 1:2) { #nRunInfoBlocks){  ## MR edit
    for(i2 in 1:5){
      nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
      RunInfo[i1,i2] <- readChar(tempCon, nChars)
    }
  }

  seek(tempCon, fields["RedGreen", "Byte Offset"])
  RedGreen <- readBin(tempCon, "numeric", n=1, size=4, 
                      endian="little", signed=FALSE)
  #RedGreen <- readBin(tempCon, "integer", n=4, size=1, 
  #                    endian="little", signed=FALSE)

  seek(tempCon, fields["MostlyNull", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  MostlyNull <- readChar(tempCon, nChars) 

  seek(tempCon, fields["Barcode", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Barcode <- readChar(tempCon, nChars) 

  seek(tempCon, fields["ChipType", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  ChipType <- readChar(tempCon, nChars) 

  seek(tempCon, fields["MostlyA", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  MostlyA <- readChar(tempCon, nChars) 

  seek(tempCon, fields["Unknown.1", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.1 <- readChar(tempCon, nChars) 

  seek(tempCon, fields["Unknown.2", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.2 <- readChar(tempCon, nChars) 

  seek(tempCon, fields["Unknown.3", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.3 <- readChar(tempCon, nChars) 

  seek(tempCon, fields["Unknown.4", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.4 <- readChar(tempCon, nChars) 

  seek(tempCon, fields["Unknown.5", "Byte Offset"])
  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  Unknown.5 <- readChar(tempCon, nChars) 

  close(tempCon)

  Unknowns <- 
    list(MostlyNull=MostlyNull,
         MostlyA=MostlyA,
         Unknown.1=Unknown.1,
         Unknown.2=Unknown.2,
         Unknown.3=Unknown.3,
         Unknown.4=Unknown.4,
         Unknown.5=Unknown.5)

  Quants <- cbind(Mean, SD, NBeads)
  colnames(Quants) <- c("Mean", "SD", "NBeads")
  rownames(Quants) <- as.character(IlluminaID)

  idatValues <- 
    list(fileSize=fileSize, 
         versionNumber=versionNumber, 
         nFields=nFields, 
         fields=fields,
         nSNPsRead=nSNPsRead, 
         #IlluminaID=IlluminaID, 
         #SD=SD, 
         #Mean=Mean, 
         #NBeads=NBeads,
         Quants=Quants,
         MidBlock=MidBlock, 
         RunInfo=RunInfo, 
         RedGreen=RedGreen, 
         Barcode=Barcode, 
         ChipType=ChipType,
         Unknowns=Unknowns)

  idatValues

}

readBPM <- function(bpmFile){

  ## Reads and parses Illumina BPM files
  
  fileSize <- file.info(bpmFile)$size

  tempCon <- file(bpmFile,"rb")

  # The first few bytes of the egtFile are some type of
  # header, but there's no related byte offset information. 

  prefixCheck <- readChar(tempCon,3) ## should be "BPM"

  null.1 <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  ## should be 1  
  
  versionNumber <- 
    readBin(tempCon, "integer", n=1, size=4, endian="little", signed=FALSE)
  ## should be 4

  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
  chipType <- readChar(tempCon, nChars)

  null.2 <- readBin(tempCon, "integer", n=2, size=1, signed=FALSE)

  csvLines <- readLines(tempCon, 22)

  entriesByteOffset <- seek(tempCon);
  nEntries <- readBin(tempCon, "integer", n=1, size=4,
                      endian="little", signed=FALSE)
    
  if(FALSE){
    
    snpIndexByteOffset <- seek(tempCon)
    snpIndex <- readBin(tempCon, "integer", n=nEntries, size=4,
                        endian="little", signed=FALSE)
    ## for the 1M array, these are simply in order from 1 to 1072820.
    
    snpNamesByteOffset <- seek(tempCon)
    snpNames <- rep("A", nEntries)
    for(i1 in 1:nEntries){
      nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
      snpNames[i1] <- readChar(tempCon, nChars)
    }

  }

  seek(tempCon, 15278138)
    
  normIDByteOffset <- seek(tempCon)
  normID <- readBin(tempCon, "integer", n=nEntries, size=1, signed=FALSE) + 1
  
  newBlockByteOffset <- seek(tempCon)
  newBlock <- readBin(tempCon, "integer", n=10000, size=1, signed=FALSE)
  
  close(tempCon)

  byteOffsets <- list(entriesByteOffset=entriesByteOffset,
                      #snpIndexByteOffset=snpIndexByteOffset, 
                      #snpNamesByteOffset=snpNamesByteOffset,
                      normIDByteOffset=normIDByteOffset,
                      newBlockByteOffset=newBlockByteOffset)
  
  allStuff <- list(prefixCheck=prefixCheck,
                   null.1=null.1,
                   versionNumber=versionNumber,
                   chipType=chipType,
                   null.2=null.2,
                   csvLines=csvLines,
                   nEntries=nEntries,
                   #snpIndex=snpIndex,
                   #snpNames=snpNames,
                   normID=normID,
                   newBlock=newBlock,
                   byteOffsets=byteOffsets)
  allStuff
  
}


RGtoXY = function(RG, chipType, verbose=TRUE) {
  chipList = c("human1mv1c",             # 1M
               "human370v1c",            # 370CNV
               "human650v3a",            # 650Y
               "human610quadv1b",        # 610 quad
               "human660quadv1a",        # 660 quad
               "human370quadv3c",        # 370CNV quad
               "human550v3b",            # 550K
               "human1mduov3b")          # 1M Duo   
  if(missing(chipType))
     chipType = match.arg(annotation(RG), chipList)
  else
     chipType = match.arg(chipType, chipList)
  
  pkgname <- getCrlmmAnnotationName(chipType)
  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
     suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
     msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
     message(strwrap(msg))
     stop("Package ", pkgname, " could not be found.")
     rm(suggCall, msg)
  }
  if(verbose) message("Loading chip annotation information.")
    loader("address.rda", .crlmmPkgEnv, pkgname)
#    data(annotation, package=pkgname, envir=.crlmmPkgEnv)
  aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
  bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
  ids <- names(aids)
  snpbase <- getVarInEnv("base")
  
  nsnps = length(aids)
  narrays = ncol(RG)
  
#  aidcol = match("AddressA_ID", colnames(annot))
#  if(is.na(aidcol))
#    aidcol = match("Address", colnames(annot))
#  bidcol = match("AddressB_ID", colnames(annot))
#  if(is.na(bidcol)) 
#    bidcol = match("Address2", colnames(annot))
#  aids = annot[, aidcol]
#  bids = annot[, bidcol]
#  snpids = annot[,"Name"]
#  snpbase = annot[,"SNP"] 
  infI = !is.na(bids) & bids!=0  
  aord = match(aids, featureNames(RG)) # NAs are possible here
  bord = match(bids, featureNames(RG)) # and here
#  argrg = aids[rrgg]
#  brgrg = bids[rrgg]

  tmpmat = matrix(0, nsnps, narrays)
  rownames(tmpmat) = ids
  colnames(tmpmat) = sampleNames(RG)
  XY = new("NChannelSet", X=tmpmat, Y=tmpmat, Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
                 annotation=chipType, phenoData=RG@phenoData, scanDates=RG@scanDates, storage.mode="environment")
  rm(tmpmat)
  gc()
  
  # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
  XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
  XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
  XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
  XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
  XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
  XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
  gc()
  
  ## Warning - not 100% sure that the code below is correct - could be more complicated than this
  
  # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
#  infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
  
#  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
#  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
#  Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
#  Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
#  Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
#  Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
  
  # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
#  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")

#  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
#  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
#  Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
#  Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
#  Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
#  Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
    
  #  For now zero out Infinium I probes
  XY@assayData$X[infI,] = 0
  XY@assayData$Y[infI,] = 0
  XY@assayData$Xnb[infI,] = 0
  XY@assayData$Ynb[infI,] = 0
  XY@assayData$Xse[infI,] = 0
  XY@assayData$Yse[infI,] = 0

  XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
  gc()

#  storageMode(XY) = "lockedEnvironment"
  XY
}

stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
  pkgname <- getCrlmmAnnotationName(annotation(XY))
  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
    message(strwrap(msg))
    stop("Package ", pkgname, " could not be found.")
    rm(suggCall, msg)
  }
  if(verbose) message("Loading strip and reference normalization information.")
  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
#  data(preprocStuff, package=pkgname, envir=.crlmmPkgEnv)

  stripnum <- getVarInEnv("stripnum")
  
  if(useTarget)
    targetdist = getVarInEnv("reference")
  
#  Xqws = Yqws = matrix(0, nrow(XY), ncol(XY))
#  colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X
#  rownames(Xqws) = rownames(Yqws) = featureNames(XY)

  if(verbose){
    message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
    if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3)
  } 
  for(s in 1:max(stripnum)) {
    if(verbose) {
      if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
      else cat(".")
    }
    sel = stripnum==s
    subX = exprs(channel(XY, "X"))[sel,]
    subY = exprs(channel(XY, "Y"))[sel,]
    if(useTarget)
      tmp = normalize.quantiles.use.target(as.matrix(cbind(subX, subY)), targetdist[[s]])
    else
      tmp = normalize.quantiles(as.matrix(cbind(subX, subY)))
    XY@assayData$X[sel,] = tmp[,1:(ncol(tmp)/2)]
    XY@assayData$Y[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]
#    Xqws[sel,] = tmp[,1:(ncol(tmp)/2)]
#    Yqws[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]
    rm(subX, subY, tmp, sel)
    gc()
  }
  if(verbose)
    cat("\n")
  XY
#  XYNorm = new("NChannelSet",
#                X=Xqws+16,
#                Y=Yqws+16,
#                Xnb=exprs(channel(XY, "Xnb")),
#                Ynb=exprs(channel(XY, "Ynb")),
#                Xse=exprs(channel(XY, "Xse")),
#                Yse=exprs(channel(XY, "Yse")),
#                zero=exprs(channel(XY, "zero")),
#                annotation=annotation(XY),
#                phenoData=XY@phenoData)
}


preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=0.1, verbose=TRUE, seed=1,
                               cdfName, sns, stripNorm=TRUE, useTarget=TRUE, save.it=FALSE, intensityFile) {
  if(stripNorm)
    XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)

## MR: the code below is mostly straight from snprma.R
  if (missing(sns)) sns <- sampleNames(XY) #$X
  if(missing(cdfName))
    cdfName <- annotation(XY)
##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
  pkgname <- getCrlmmAnnotationName(cdfName)
  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
    message(strwrap(msg))
    stop("Package ", pkgname, " could not be found.")
    rm(suggCall, msg)
  }
  if(verbose) message("Loading snp annotation and mixture model parameters.")
  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
  loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
  loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
#  data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
  autosomeIndex <- getVarInEnv("autosomeIndex")
  #  pnsa <- getVarInEnv("pnsa")
  #  pnsb <- getVarInEnv("pnsb")
  #  fid <- getVarInEnv("fid")
  #  reference <- getVarInEnv("reference")
  #  aIndex <- getVarInEnv("aIndex")
  #  bIndex <- getVarInEnv("bIndex")
  SMEDIAN <- getVarInEnv("SMEDIAN")
  theKnots <- getVarInEnv("theKnots")
  gns <- getVarInEnv("gns")
  
  # separate out copy number probes
  npIndex = getVarInEnv("npProbesFid")
  nprobes = length(npIndex)
  narrays = ncol(XY)
  A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
  B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
  cnAB = list(A=A, B=B, sns=sns, gns=names(npIndex), cdfName=cdfName)
  
  # next process snp probes
  snpIndex = getVarInEnv("snpProbesFid")
  nprobes <- length(snpIndex)
  
  ##We will read each cel file, summarize, and run EM one by one
  ##We will save parameters of EM to use later
  mixtureParams <- matrix(0, 4, narrays)
  SNR <- vector("numeric", narrays)
  SKW <- vector("numeric", narrays)

  ## This is the sample for the fitting of splines
  ## BC: I like better the idea of the user passing the seed,
  ##     because this might intefere with other analyses
  ##     (like what happened to GCRMA)
  set.seed(seed)

  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
  
  ##S will hold (A+B)/2 and M will hold A-B
  ##NOTE: We actually dont need to save S. Only for pics etc...
  ##f is the correction. we save to avoid recomputing
  A <- matrix(as.integer(exprs(channel(XY, "X"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
  B <- matrix(as.integer(exprs(channel(XY, "Y"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))

  # new lines below - useful to keep track of zeroed out SNPs
  zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays)
    
  colnames(A) <- colnames(B) <- colnames(zero) <- sns
  rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
  
  if(verbose){
     message("Calibrating ", narrays, " arrays.")
     if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
  }

  idx2 <- sample(nprobes, 10^5)
  for(i in 1:narrays){
    SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
    if(fitMixture){
      S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
      M <- log2(A[idx, i])-log2(B[idx, i])

      ##we need to test the choice of eps.. it is not the max diff between funcs
      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)

      mixtureParams[, i] <- tmp[["coef"]]
      SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
    }
    if(verbose) {
       if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
       else cat(".")
    }
  }
  if (verbose) {
    if (getRversion() > '2.7.0') close(pb)
    else cat("\n")
  }
  if (!fitMixture) SNR <- mixtureParams <- NA
  ## gns comes from preprocStuff.rda
  res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)

  if(save.it & !missing(intensityFile)) {
    t0 <- proc.time() 
    save(cnAB, res, file=intensityFile)
    t0 <- proc.time()-t0
    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", intensityFile, ".")
  }
  return(res)
}


## MR: Could add arguments to allow this function to read in idat data as well,
## although this would add a further 7 arguments, which might over-complicate things
crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
                  row.names=TRUE, col.names=TRUE,
                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
                  seed=1, save.it=FALSE, load.it=FALSE, intensityFile,
                  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
                  cdfName, sns, recallMin=10, recallRegMin=1000,
                  returnParams=FALSE, badSNP=.7) {

  if ((load.it | save.it) & missing(intensityFile))
    stop("'intensityFile' is missing, and you chose either load.it or save.it")
  if (!missing(intensityFile))
    if (load.it & !file.exists(intensityFile)){
      load.it <- FALSE
      message("File ", intensityFile, " does not exist.")
      message("Not loading it, but running SNPRMA from scratch.")
  }
  if (!load.it){
    if(!missing(RG)) {
      if(missing(XY))
        XY = RGtoXY(RG, chipType=cdfName)
      else
        stop("Both RG and XY specified - please use one or the other")
    }
    if (missing(sns)) sns <- sampleNames(XY) #$X
    
    res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
                        seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
                        save.it=save.it, intensityFile=intensityFile)
  }else{
      if(verbose) message("Loading ", intensityFile, ".")
        obj <- load(intensityFile)
        if(verbose) message("Done.")
        if(!any(obj == "res"))
          stop("Object in ", intensityFile, " seems to be invalid.")
  }
  if(row.names) row.names=res$gns else row.names=NULL
  if(col.names) col.names=res$sns else col.names=NULL

  res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
                  res[["mixtureParams"]], res[["cdfName"]],
                  gender=gender, row.names=row.names,
                  col.names=col.names, recallMin=recallMin,
                  recallRegMin=1000, SNRMin=SNRMin,
                  returnParams=returnParams, badSNP=badSNP,
                  verbose=verbose)
  
  res2[["SNR"]] <- res[["SNR"]]
  res2[["SKW"]] <- res[["SKW"]]
  return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
}