R/crlmm-illumina.R
808cfecc
 # function below works OK provided all .idat files are in the current working directory
cd8b85b6
 # - could add an option to allow files in Illumina directory structure to be handled
 # or to use the optional 'Path' column in sampleSheet
 # - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet
 
450a6d8a
 readIdatFiles = function(sampleSheet=NULL,
5fcfed7d
 			  arrayNames=NULL,
 			  ids=NULL,
 			  path=".",
 			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
 			  highDensity=FALSE,
 			  sep="_",
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
99202387
 			  saveDate=FALSE, verbose=FALSE) {
cbfc52e5
        verbose <- FALSE
e609f78b
        if(!is.null(arrayNames)) {
                pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
        }
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
 	       if(is.null(arrayNames)){
 		       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
 			       barcode = sampleSheet[,arrayInfoColNames$barcode]
 			       arrayNames=barcode
 		       }
f1e14c0e
 		       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
e609f78b
 			       position = sampleSheet[,arrayInfoColNames$position]
 			       if(is.null(arrayNames))
 				       arrayNames=position
 			       else
 				       arrayNames = paste(arrayNames, position, sep=sep)
 			       if(highDensity) {
 				       hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
 				       for(i in names(hdExt))
 					       arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
 			       }
 		       }
 	       }
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
063b3d14
 	       sampleNames(pd) = make.unique(basename(arrayNames))
e609f78b
        }
        if(is.null(arrayNames)) {
                arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
                if(!is.null(sampleSheet)) {
                       sampleSheet=NULL
                       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
                }
                pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
        }
        narrays = length(arrayNames)
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
        redfiles = paste(arrayNames, fileExt$red, sep=sep)
        if(length(grnfiles)==0 || length(redfiles)==0)
 	       stop("Cannot find .idat files")
        if(length(grnfiles)!=length(redfiles))
 	       stop("Cannot find matching .idat files")
d98bfc16
        if(path[1] != "." & path[1] != ""){
e609f78b
 	       grnidats = file.path(path, grnfiles)
 	       redidats = file.path(path, redfiles)
        }  else {
cbfc52e5
 	       message("'path' arg not set.  Assuming files are in local directory, or that complete path is provided in 'arrayNames'")
450a6d8a
 	       grnidats = grnfiles
 	       redidats = redfiles
e609f78b
        }
        if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
f1e14c0e
        if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
e609f78b
        headerInfo = list(nProbes = rep(NA, narrays),
                          Barcode = rep(NA, narrays),
                          ChipType = rep(NA, narrays),
                          Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
                          Position = rep(NA, narrays)) # this may also vary a bit
        dates = list(decode=rep(NA, narrays),
                     scan=rep(NA, narrays))
        ## read in the data
d98bfc16
        for(i in seq_along(arrayNames)) {
99202387
 	       if(verbose) {
453e688a
 	       ## RS
 		       ##cat("reading", arrayNames[i], "\t")
 		       cat("reading", basename(arrayNames[i]), "\t")
 		       cat(paste(sep, fileExt$green, sep=""), "\t")
99202387
 	       }
e609f78b
 	       idsG = idsR = G = R = NULL
adde216c
 	       G = readIDAT(grnidats[i])
e609f78b
 	       idsG = rownames(G$Quants)
 	       headerInfo$nProbes[i] = G$nSNPsRead
 	       headerInfo$Barcode[i] = G$Barcode
 	       headerInfo$ChipType[i] = G$ChipType
 	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
 	       headerInfo$Position[i] = G$Unknowns$MostlyA
987f58fd
                if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+headerInfo$nProbes[1]*0.04) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-headerInfo$nProbes[1]*0.04)) {
e609f78b
 		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
 		       next()
 	       }
c27ba9c5
                if(saveDate) {
                       if(nrow(G$RunInfo)>=2) {
 	              dates$decode[i] = G$RunInfo[1, 1]
 	              dates$scan[i] = G$RunInfo[2, 1]
                       }
                }
e609f78b
 	       if(i==1) {
 		       if(is.null(ids) && !is.null(G)){
 			       ids = idsG
063b3d14
 		       } # else stop("Could not find probe IDs")
e609f78b
 		       nprobes = length(ids)
 		       narrays = length(arrayNames)
cbfc52e5
 #                       if(cdfName=="nopackage") {
     	               RG = new("NChannelSet",
                                  R = initializeBigMatrix(name = "R", nr = nprobes, nc = narrays, vmode = "integer"),
                                  G = initializeBigMatrix(name = "G", nr = nprobes, nc = narrays, vmode = "integer"),
 		                 zero = initializeBigMatrix(name = "zero", nr = nprobes, nc = narrays, vmode = "integer"),
99202387
 				 annotation=headerInfo$Manifest[1],
 				 phenoData=pd, storage.mode="environment")
cbfc52e5
 #                       } else {
 #    	               RG = new("NChannelSet",
 #                                 R = matrix(0, nprobes, narrays),
 #                                 G = matrix(0, nprobes, narrays),
 #		                 zero = matrix(1, nprobes, narrays),
 #				 annotation=headerInfo$Manifest[1],
 #				 phenoData=pd, storage.mode="environment")                           
 #                       }
99202387
 		       featureNames(RG) = ids
cbfc52e5
                        
e609f78b
 		       if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){
063b3d14
 		            sampleNames(RG) = make.unique(sampleSheet$Sample_ID)
 		       } else  sampleNames(RG) = make.unique(basename(arrayNames))
a1394042
 		       gc(verbose=FALSE)
e609f78b
 	       }
 	       if(length(ids)==length(idsG)) {
 		       if(sum(ids==idsG)==nprobes) {
 			       RG@assayData$G[,i] = G$Quants[, "Mean"]
 			       zeroG = G$Quants[, "NBeads"]==0
 		       }
 	       } else {
 		       indG = match(ids, idsG)
c27ba9c5
                        nasG = is.na(indG)
 		       RG@assayData$G[!nasG,i] = G$Quants[indG[!nasG], "Mean"]
 		       zeroG = G$Quants[indG[!nasG], "NBeads"]==0
e609f78b
 	       }
 	       rm(G)
a1394042
 	       gc(verbose=FALSE)
e6e6981f
 	       if(verbose) {
99202387
                       cat(paste(sep, fileExt$red, sep=""), "\n")
 	       }
e609f78b
 	       R = readIDAT(redidats[i])
 	       idsR = rownames(R$Quants)
f1e14c0e
 
 	       if(length(ids)==length(idsG)) {
e609f78b
 		       if(sum(ids==idsR)==nprobes) {
 			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
99202387
 		               zeroR = R$Quants[ ,"NBeads"]==0
c27ba9c5
                                RG@assayData$zero[,i] = zeroG | zeroR
e609f78b
 		       }
 	       } else {
 		       indR = match(ids, idsR)
c27ba9c5
 	               nasR = is.na(indR)
 		       RG@assayData$R[!nasR,i] = R$Quants[indR[!nasR], "Mean"]
 		       zeroR = R$Quants[indR[!nasR], "NBeads"]==0
                        RG@assayData$zero[!nasR,i] = zeroG | zeroR
e609f78b
 	       }
c27ba9c5
 #	       RG@assayData$zero[,i] = zeroG | zeroR
e609f78b
 	       rm(R, zeroG, zeroR)
a1394042
 	       gc(verbose=FALSE)
e609f78b
        }
        if(saveDate) {
 	       protocolData(RG)[["ScanDate"]] = dates$scan
        }
        storageMode(RG) = "lockedEnvironment"
        RG
 }
 
 
ba1a7d73
 getNumberOfSNPs = function(afile, path){
     fullfilename = file.path(path, afile)
     headerSection = readLines(fullfilename, n=15)
 
     headerLine = headerSection[10][1]
     delimiterList = c(",", "\t")
 
     headers = unlist(strsplit(headerLine, delimiterList[1]))
     if (length(headers)!=1) {
         delimiterIndex = 1
     }
     if (length(headers) == 1) {
         headers = unlist(strsplit(headerLine, delimiterList[2]))
         if (length(headers) != 1) {
             delimiterIndex = 2
         }
         if (length(headers) == 1) {
             stop("Input file ", fullfilename, " is not delimited by either comm(,) or tab(\\t)")
         }
5cdc0c76
     }
ba1a7d73
 
     SNPLine = headerSection[5][1]
     elements = unlist(strsplit(SNPLine, delimiterList[delimiterIndex]))
     numSNP = as.integer(elements[2])
     return(numSNP)
 }
 
 checkNumberOfSNPs = function(filenames, path){
     numSNP = getNumberOfSNPs(filenames[1], path)
     if (length(filenames) > 1) {
 		for (i in 2:length(filenames)){
 			if (getNumberOfSNPs(filenames[i], path) != numSNP){
 				return(FALSE)
 			}
 		}
5cdc0c76
     }
ba1a7d73
     return(TRUE)
 }
7c0c9ac5
 
d3668036
 
ba1a7d73
 getNumberOfSamples = function(filenames, path, numSNP){
     sampleCount = rep(0, length(filenames))
     for (i in 1:length(filenames)){
     	# number of sample in input file line 7 is not reliable, calculate from number of lines and number of SNPs
     	fullfilename = file.path(path, filenames[i])
 	LineCount = .Call("countFileLines", fullfilename)
   	if (((LineCount - 10) %% numSNP) != 0){
            stop("Please check input file: ", fullfilename, " Line count is not a multiple of number of SNPs")
     	}
 	sampleCount[i] = LineCount %/% numSNP
     }	
     return(sampleCount)
 }
d3668036
 
ba1a7d73
 processOneGenCallFile = function(afile, numSNP, numSample,
     colnames=list("SampleID"="Sample ID", "SNPID"="SNP Name", "XRaw"="X Raw", "YRaw"="Y Raw"),
     verbose=FALSE) {
   
     headerSection = readLines(afile, n=15)
7c0c9ac5
 
ba1a7d73
     headerLine = headerSection[10][1]
     delimiterList = c(",", "\t")
7c0c9ac5
 
ba1a7d73
     headers = unlist(strsplit(headerLine, delimiterList[1]))
     if (length(headers)!=1) {
         delimiterIndex = 1
     }
     if (length(headers) == 1) {
         headers = unlist(strsplit(headerLine, delimiterList[2]))
         if (length(headers) != 1) {
             delimiterIndex = 2
d3668036
         }
ba1a7d73
         if (length(headers) == 1) {
             stop("Input file is not delimited by either comm(,) or tab(\\t)")
         }
     }
      
     if(sum(is.na(match(colnames, headers))) != 0)
 	stop("Cannot find required columns: ", colnames[is.na(match(colnames, headers))], " in ", file, "\nPlease check whether this data was exported.")
     
     SNPIDPos = which(headers == colnames$SNPID)
     sampleIDPos = which(headers == colnames$SampleID)
     XValuePos = which(headers == colnames$XRaw)
     YValuePos = which(headers == colnames$YRaw)   
         
     if(verbose) {
         message("Number of SNPs in file: ", afile, " is ", numSNP, " and number of samples is ", numSample)
         if (delimiterIndex == 1) message("File is comma-seperated. ")
         if (delimiterIndex == 2) message("File is tab-seperated. ")              
d3668036
     }
7c0c9ac5
 
ba1a7d73
     values = .Call("readGenCallOutputCFunc", afile, numSNP, numSample, SNPIDPos, sampleIDPos, XValuePos, YValuePos, delimiterIndex)
              
     return(values)
 }
7c0c9ac5
 
ba1a7d73
 readGenCallOutput = function(filenames, path=".", cdfName,
     colnames=list("SampleID"="Sample ID", "SNPID"="SNP Name", "XRaw"="X Raw", "YRaw"="Y Raw"),
     type=list("SampleID"="character", "SNPID"="character", "XRaw"="integer", "YRaw"="integer"), verbose=FALSE) {
7c0c9ac5
 
ba1a7d73
     if(!identical(names(type), names(colnames)))
        stop("The arguments 'colnames' and 'type' must have consistent names")
     if(missing(cdfName)) stop("must specify cdfName")
7c0c9ac5
 
ba1a7d73
     if (!checkNumberOfSNPs(filenames, path)){
        stop("Number of SNPs in each file must be identical to form one output NChannelSet object.")
     }
 
     if (verbose) message("Checking number of samples and features in each file. ")
     numSNP = getNumberOfSNPs(filenames[1], path)
     sampleCounts = getNumberOfSamples(filenames, path, numSNP)
     numSample = sum(sampleCounts)
 
     X = initializeBigMatrix(name = "X", nr = numSNP, nc = numSample, vmode = "integer")
     Y = initializeBigMatrix(name = "Y", nr = numSNP, nc = numSample, vmode = "integer")
     zero = initializeBigMatrix(name = "zero", nr = numSNP, nc = numSample, vmode = "integer")
     
     totSampleNames = rep(NA, numSample)
    
     baseIndex = 1
     if (verbose) message("Start processing ", length(filenames), " input file(s)")
     for (i in 1:length(filenames)){
     	fullfilename = file.path(path, filenames[i])
     	valuesThisFile = processOneGenCallFile(fullfilename, numSNP, sampleCounts[i], colnames, verbose)
 	
 	if (i == 1){
 	    totSNPNames = rownames(valuesThisFile$Xvalues)
 	} else {
 	    # matching on SNP names? now assume they come in order
 	}
 	maxIndex = baseIndex + sampleCounts[i] - 1
cbfc52e5
         m=match(totSNPNames, rownames(valuesThisFile$Xvalues))
         valuesThisFile$Xvalues=valuesThisFile$Xvalues[m,]
         valuesThisFile$Yvalues=valuesThisFile$Yvalues[m,]
ba1a7d73
 	X[, baseIndex:maxIndex] = valuesThisFile$Xvalues
 	Y[, baseIndex:maxIndex] = valuesThisFile$Yvalues
         zero[, baseIndex:maxIndex] = (X[, baseIndex:maxIndex] == 0) || (Y[, baseIndex:maxIndex] == 0)
 	totSampleNames[baseIndex:(baseIndex + sampleCounts[i] - 1)] = colnames(valuesThisFile$Xvalues)
 	rm(valuesThisFile)
 	baseIndex = baseIndex + sampleCounts[i]
     }
 
     if(verbose) message("Creating NChannelSet object\n")
 
     XY = new("NChannelSet", X=X, Y=Y, zero=zero, annotation=cdfName, storage.mode = "environment")
     sampleNames(XY) = totSampleNames
     featureNames(XY) = totSNPNames
     
5cdc0c76
     if(verbose)
ba1a7d73
     cat("Done\n")
7c0c9ac5
 
5cdc0c76
     XY
 }
 
 
ba1a7d73
 
d49c9607
 RGtoXY = function (RG, chipType, anno, verbose = TRUE) 
 {
     if (chipType != "nopackage") {
         needToLoad <- !all(sapply(c("addressA", "addressB", "base"), 
             isLoaded))
         if (needToLoad) {
             chipList = c("human1mv1c", "human370v1c", "human650v3a", 
                 "human610quadv1b", "human660quadv1a", "human370quadv3c", 
                 "human550v3b", "human1mduov3b", "humanomni1quadv1b", 
                 "humanomni25quadv1b", "humanomni258v1a", "humanomni258v1p1b", 
                 "humanomni5quadv1b", "humanomniexpress12v1b", 
                 "humanimmuno12v1b", "humancytosnp12v2p1h", "humanexome12v1p2a", 
                 "humanomniexpexome8v1p1b")
             if (missing(chipType)) {
                 chipType = match.arg(cleancdfname(annotation(RG)), 
                   chipList)
             }
             else chipType = match.arg(cleancdfname(chipType), 
                 chipList)
             pkgname = getCrlmmAnnotationName(chipType)
             if (!require(pkgname, character.only = TRUE, quietly = !verbose)) {
                 suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", 
                   sep = "")
                 msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", 
                   suggCall)
                 message(strwrap(msg))
                 stop("Package ", pkgname, " could not be found.")
                 rm(suggCall, msg)
             }
             if (verbose) 
                 message("Loading chip annotation information.")
             loader("address.rda", .crlmmPkgEnv, pkgname)
         }
         aids = getVarInEnv("addressA")
         bids = getVarInEnv("addressB")
         snpbase = getVarInEnv("base")
         ids = names(aids)
         nsnps = length(aids)
         narrays = ncol(RG)
         infI = !is.na(bids) & bids != 0
         aord = match(aids, featureNames(RG))
         bord = match(bids, featureNames(RG))
         xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
             varMetadata = RG@phenoData@varMetadata)
         levels(xyPhenoData@varMetadata$channel) = c("X", "Y", 
             "zero", "_ALL_")
         XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", 
             nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", 
             nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", 
             nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, 
             protocolData = RG@protocolData, annotation = chipType)
         storageMode(XY) = "environment"
         featureNames(XY) = ids
         sampleNames(XY) = sampleNames(RG)
         rm(list = c("bord", "infI", "aids", "bids", "ids", "snpbase"))
         gc(verbose = FALSE)
         not.na <- !is.na(aord)
         not.na.aord <- aord[not.na]
         r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
         XY@assayData$X[not.na, ] <- r
         rm(r)
         gc(verbose = FALSE)
         g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
         XY@assayData$Y[not.na, ] <- g
         rm(g)
         gc(verbose = FALSE)
         z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
         XY@assayData$zero[not.na, ] <- z
         rm(z)
         gc(verbose = FALSE)
         rm(RG)
         gc(verbose = FALSE)
     }
     if (chipType == "nopackage" && !is.null(anno)) {
         pkgname = NULL
         if(sum(colnames(anno@data)=="AddressA_ID")!=1){
            
            message("annotation format is wrong, could not find a column with name: AddressA_ID")
          }else{
          anno2=anno@data
            # chr = as.character(anno$Chr)
   #chr[chr=="X"] = 23
   #  chr[chr=="Y"] = 24
   #  chr[chr=="XY"] = 25
 #chr[chr=="MT"] = 26
 #anno[,"chromosome"]=as.integer(chr)
f1e14c0e
 
d49c9607
        aids = anno2[, "AddressA_ID"]
         bids = anno2[, "AddressB_ID"]
         snpbase = anno2[, "SNP"]
         names(aids) = ids = anno2[, "featureNames"]
         nsnps = length(aids)
         narrays = ncol(RG)
         infI = !is.na(bids) & bids != 0
         aord = match(aids, featureNames(RG))
         bord = match(bids,featureNames(RG))
         xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
             varMetadata = RG@phenoData@varMetadata)
         levels(xyPhenoData@varMetadata$channel) = c("X", "Y", 
             "zero", "_ALL_")
         XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", 
             nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", 
             nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", 
             nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, 
             protocolData = RG@protocolData)
         storageMode(XY) = "environment"
        # featureNames(XY) = names(aids)
         sampleNames(XY) = sampleNames(RG)
         rm(list = c("bord", "infI", "aids", "bids", "snpbase"))
         gc(verbose = FALSE)
         not.na <- !is.na(aord)
         not.na.aord <- aord[not.na]
         r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
         XY@assayData$X[not.na, ] <- r
         rm(r)
         gc(verbose = FALSE)
         g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
         XY@assayData$Y[not.na, ] <- g
         rm(g)
         gc(verbose = FALSE)
         z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
         XY@assayData$zero[not.na, ] <- z
         rm(z)
         gc(verbose = FALSE)
         rm(RG)
         gc(verbose = FALSE)
    
     
     
     
          }
       }
            XY 
e609f78b
 }
 
 
808cfecc
 stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE, quantile.method="between") {
         if(quantile.method=="between") {
 	  if(useTarget){
453e688a
 		objectsNeeded <- c("stripnum", "reference")
808cfecc
 	  } else objectsNeeded <- "stripnum"
 	  needToLoad <- !all(sapply(objectsNeeded, isLoaded))
 	  if(needToLoad){
453e688a
 		pkgname = getCrlmmAnnotationName(annotation(XY))
 		if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
 			suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
 			msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
 			message(strwrap(msg))
 			stop("Package ", pkgname, " could not be found.")
 			rm(suggCall, msg)
 		}
 		if(verbose) message("Loading strip and reference normalization information.")
 		loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
808cfecc
 	  }
 	  stripnum = getVarInEnv("stripnum")
 	  if(useTarget)
453e688a
 		targetdist = getVarInEnv("reference")
808cfecc
 	  if(verbose){
453e688a
 		message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
 		if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=max(stripnum), style=3)
808cfecc
 	  }
           for(s in 1:max(stripnum)) {
             if(verbose) {
               if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
               else cat(".")
             }
             sel = stripnum==s
             ##RS: faster to access data directly
             ##subX = as.matrix(exprs(channel(XY, "X"))[sel,])
             ##subY = as.matrix(exprs(channel(XY, "Y"))[sel,])
             subX <- as.matrix(assayData(XY)[["X"]][sel, ])
             subY <- as.matrix(assayData(XY)[["Y"]][sel, ])
             if(useTarget)
               tmp = normalize.quantiles.use.target(cbind(subX, subY), targetdist[[s]])
             else
               tmp = normalize.quantiles(cbind(subX, subY))
             XY@assayData$X[sel,] = matrix(as.integer(tmp[,1:(ncol(tmp)/2)]+16), nrow(tmp), ncol(tmp)/2)
             XY@assayData$Y[sel,] = matrix(as.integer(tmp[,(ncol(tmp)/2+1):ncol(tmp)]+16), nrow(tmp), ncol(tmp)/2)
             rm(subX, subY, tmp, sel)
             gc(verbose=FALSE)
           }
           if(verbose)
             cat("\n")
         }
         if(quantile.method=="within") {  # ignore strip information
 	  if(useTarget){
 		objectsNeeded <- c("Xref", "Yref")
 	  } else objectsNeeded <- ""
 	  needToLoad <- !all(sapply(objectsNeeded, isLoaded))
 	  if(needToLoad){
 		pkgname = getCrlmmAnnotationName(annotation(XY))
 		if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
 			suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
 			msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
 			message(strwrap(msg))
 			stop("Package ", pkgname, " could not be found.")
 			rm(suggCall, msg)
 		}
 		if(verbose) message("Loading reference normalization information.")
 		loader("targetXY.rda", .crlmmPkgEnv, pkgname)
 	  }
 	  if(useTarget) {
                 Xref = getVarInEnv("Xref")
 	        Yref = getVarInEnv("Yref")
           } else{
                 Xref = normalize.quantiles.determine.target(as.matrix(assayData(XY)[["X"]]))
                 gc(verbose=FALSE)
                 Yref = normalize.quantiles.determine.target(as.matrix(assayData(XY)[["Y"]]))
                 gc(verbose=FALSE)
           }
 	  if(verbose){
 		message("Quantile normalizing ", ncol(XY), " arrays, one at a time.")
 		if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=ncol(XY), style=3)
 	  }
           for(s in 1:ncol(XY)) {
             if(verbose) {
               if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
               else cat(".")
             }
             ##RS: faster to access data directly
             ##subX = as.matrix(exprs(channel(XY, "X"))[sel,])
             ##subY = as.matrix(exprs(channel(XY, "Y"))[sel,])
             subX <- as.matrix(assayData(XY)[["X"]][,s])
             subY <- as.matrix(assayData(XY)[["Y"]][,s])
             tmpX = normalize.quantiles.use.target(subX, as.integer(Xref))
             tmpY = normalize.quantiles.use.target(subY, as.integer(Yref))              
             XY@assayData$X[,s] = as.integer(tmpX+16)
             XY@assayData$Y[,s] = as.integer(tmpY+16)
             rm(subX, subY, tmpX, tmpY)
           }
           if(verbose)
             cat("\n")
         }
5fcfed7d
   XY
cd8b85b6
 }
 
 
450a6d8a
 preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
e609f78b
 				fitMixture=TRUE,
 				eps=0.1,
 				verbose=TRUE,
 				seed=1,
 				cdfName,
 				sns,
 				stripNorm=TRUE,
808cfecc
 				useTarget=TRUE,
                                 quantile.method="between") { #,
99202387
 #               outdir=".") {
e972ec4a
 #				save.it=FALSE,
 #				snpFile,
 #				cnFile) {
453e688a
 	if(stripNorm)
808cfecc
 		XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose, quantile.method=quantile.method)
453e688a
 	## MR: the code below is mostly straight from snprma.R
 	if (missing(sns)) sns = sampleNames(XY) #$X
 	if(missing(cdfName))
 		cdfName = annotation(XY)
 	needToLoad <- !all(sapply(c("autosomeIndex",
 				    "SMEDIAN",
 				    "theKnots",
 				    "npProbesFid"), isLoaded))
 	if(needToLoad){
 		pkgname = getCrlmmAnnotationName(cdfName)
 		if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
 			suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
 			msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
 			message(strwrap(msg))
 			stop("Package ", pkgname, " could not be found.")
 			rm(suggCall, msg)
 		}
 		if(verbose) message("Loading snp annotation and mixture model parameters.")
 		loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
808cfecc
                 if(fitMixture)
  		  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
453e688a
 		loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
 		loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
 	}
 	autosomeIndex = getVarInEnv("autosomeIndex")
808cfecc
         if(fitMixture) {
           SMEDIAN = getVarInEnv("SMEDIAN")
           theKnots = getVarInEnv("theKnots")
         }
         npIndex = getVarInEnv("npProbesFid")
453e688a
 	snpIndex = getVarInEnv("snpProbesFid")
 	narrays = ncol(XY)
 	nprobes = length(npIndex)
 	if(length(nprobes)>0) {
 		## RS: channel creates an expression set.  This is much slower than directly accessing the data
 		A <- matrix(as.integer(assayData(XY)[["X"]][npIndex, ]), nprobes, narrays)
 		##system.time(A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays))
 		B <- matrix(as.integer(assayData(XY)[["Y"]][npIndex, ]), nprobes, narrays)
 		##B = matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
 		## new lines below - useful to keep track of zeroed out probes
 		zero <- matrix(as.integer(assayData(XY)[["zero"]][npIndex, ]), nprobes, narrays)
 		##zero = matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
ba1a7d73
 		
453e688a
 		colnames(A) = colnames(B) = colnames(zero) = sns
 		rownames(A) = rownames(B) = rownames(zero) = names(npIndex)
 		cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
 		##      t0 <- proc.time()
 		##      save(cnAB, file=cnFile)
 		##      t0 <- proc.time()-t0
 		##      if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
 		rm(A, B, zero)
a1394042
 #		print(gc())
 		gc(verbose=FALSE)
453e688a
 	}
e609f78b
   # next process snp probes
450a6d8a
   nprobes = length(snpIndex)
e609f78b
   ##We will read each cel file, summarize, and run EM one by one
   ##We will save parameters of EM to use later
99202387
   mixtureParams = matrix(NA, 4, narrays)
   SNR = rep(NA, narrays)
   SKW = rep(NA, narrays)
e609f78b
   ## This is the sample for the fitting of splines
   ## BC: I like better the idea of the user passing the seed,
   ##     because this might intefere with other analyses
   ##     (like what happened to GCRMA)
   set.seed(seed)
450a6d8a
   idx = sort(sample(autosomeIndex, mixtureSampleSize))
   idx2 = sample(nprobes, 10^5)
e609f78b
   ##S will hold (A+B)/2 and M will hold A-B
   ##NOTE: We actually dont need to save S. Only for pics etc...
   ##f is the correction. we save to avoid recomputing
a11efc9c
 
c27ba9c5
   A = matrix(0, nprobes, narrays)
   B = matrix(0, nprobes, narrays)
99202387
   zero = matrix(NA, nprobes, narrays)
808cfecc
   if(verbose && fitMixture){
e609f78b
      message("Calibrating ", narrays, " arrays.")
450a6d8a
      if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=narrays, style=3)
e609f78b
   }
   for(i in 1:narrays){
453e688a
 	  ## RS: faster to access data directly without using channel method
 	  ##A[,i] = as.integer(exprs(channel(XY, "X"))[snpIndex,i])
 	  A[, i] <- as.integer(assayData(XY)[["X"]][snpIndex, i])
 	  B[, i] <- as.integer(assayData(XY)[["Y"]][snpIndex, i])
 	  zero[, i] <- as.integer(assayData(XY)[["zero"]][snpIndex, i])
 	  ##B[,i] = as.integer(exprs(channel(XY, "Y"))[snpIndex,i])
 	  ##zero[,i] = as.integer(exprs(channel(XY, "zero"))[snpIndex,i])
 	  SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
 	  if(fitMixture){
 		  S = (log2(A[idx,i])+log2(B[idx,i]))/2 - SMEDIAN
 		  M = log2(A[idx,i])-log2(B[idx,i])
 		  ##we need to test the choice of eps.. it is not the max diff between funcs
 		  tmp = fitAffySnpMixture56(S, M, theKnots, eps=eps)
 		  mixtureParams[, i] = tmp[["coef"]]
 		  SNR[i] = tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
808cfecc
 	          if(verbose) {
 		    if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
 		    else cat(".")
                   }
453e688a
 	  }
 	  ## run garbage collection every now and then
a1394042
 	  if(i %% 100 == 0) gc(verbose=FALSE);
e609f78b
   }
808cfecc
   if (verbose && fitMixture) {
453e688a
 	  if (getRversion() > '2.7.0') close(pb)
 	  else cat("\n")
e609f78b
   }
450a6d8a
   if (!fitMixture) SNR = mixtureParams = NA
e609f78b
   ## gns comes from preprocStuff.rda
8d6ca965
 
e972ec4a
 #  if(save.it & !missing(snpFile)) {
 #    t0 <- proc.time()
 #    save(res, file=snpFile)
 #    t0 <- proc.time()-t0
 #    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
 #  }
f1e14c0e
 
33753706
   res = list(A=A, B=B,
              zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
e972ec4a
              mixtureParams=mixtureParams, cdfName=cdfName, cnAB=cnAB)
3f404581
 
e972ec4a
 #  open(res[["A"]])
 #  open(res[["B"]])
 #  open(res[["zero"]])
 #  open(res[["SNR"]])
 #  open(res[["mixtureParams"]])
 
 #  if(save.it & !missing(snpFile)) {
 #    t0 <- proc.time()
 #    save(res, file=snpFile)
 #    t0 <- proc.time()-t0
 #    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
 #  }
8d6ca965
 
e972ec4a
 #  close(res[["A"]])
 #  close(res[["B"]])
 #  close(res[["zero"]])
 #  close(res[["SNR"]])
 #  close(res[["mixtureParams"]])
a1a4312d
 
e609f78b
   return(res)
 }
 
cbfc52e5
 ## crlmmIllumina and genotype.Illumina are now the same function
 ## Use genotype.Illumina instead
 #crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
 #			  row.names=TRUE, col.names=TRUE,
 #			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
 #			  seed=1,
 #			  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
 #			  cdfName, sns, recallMin=10, recallRegMin=1000,
 #			  returnParams=FALSE, badSNP=.7) {
 #	if(missing(cdfName)) {
 #		if(!missing(RG))
 #			cdfName = annotation(RG)
 #		if(!missing(XY))
 #			cdfName = annotation(XY)
 #	}
 #	if(!isValidCdfName(cdfName))
 #		stop("cdfName not valid.  see validCdfNames")
 #	if(!missing(RG)) {
 #		if(missing(XY))
 #			XY = RGtoXY(RG, chipType=cdfName)
 #		else
 #			stop("Both RG and XY specified - please use one or the other")
 #	}
 #	if (missing(sns)) sns = sampleNames(XY)
 #	res <- preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize,
 #				   fitMixture=TRUE, verbose=verbose,
 #				   seed=seed, eps=eps, cdfName=cdfName, sns=sns,
 #				   stripNorm=stripNorm, useTarget=useTarget) #,
 #	if(row.names) row.names=res$gns else row.names=NULL
 #	if(col.names) col.names=res$sns else col.names=NULL
 #	res2 <- crlmmGT(A=res[["A"]],
 #			B=res[["B"]],
 #			SNR=res[["SNR"]],
 #			mixtureParams=res[["mixtureParams"]],
 #			cdfName=cdfName,
 #			row.names=row.names,
 #			col.names=col.names,
 #			probs=probs,
 #			DF=DF,
 #			SNRMin=SNRMin,
 #			recallMin=recallMin,
 #			recallRegMin=recallRegMin,
 #			gender=gender,
 #			verbose=verbose,
 #			returnParams=returnParams,
 #			badSNP=badSNP)
 #	res2[["SNR"]] = res[["SNR"]]
 #	res2[["SKW"]] = res[["SKW"]]
 #	rm(res); gc(verbose=FALSE)
 #	return(list2SnpSet(res2, returnParams=returnParams))
 #}
 
 
 ## Deprecate crlmmIlluminaV2 function
f1e14c0e
 ## MR: Below is a more memory efficient version of crlmmIllumina() which
 ## reads in the .idats and genotypes in the one function and removes objects
4e4601af
 ## after they have been used
cbfc52e5
 #crlmmIlluminaV2 = function(sampleSheet=NULL,
 #			  arrayNames=NULL,
 #			  ids=NULL,
 #			  path=".",
 #			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
 #			  highDensity=FALSE,
 #			  sep="_",
 #			  fileExt=list(green="Grn.idat", red="Red.idat"),
 #			  saveDate=FALSE,
 #			  stripNorm=TRUE,
 #			  useTarget=TRUE,
 # #                         outdir=".",
 #			  row.names=TRUE,
 #			  col.names=TRUE,
 #			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
 #                          seed=1,  # save.it=FALSE, snpFile, cnFile,
 #                          mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
 #                          cdfName, sns, recallMin=10, recallRegMin=1000,
 #                          returnParams=FALSE, badSNP=.7) {
 #
 #    if(missing(cdfName)) stop("must specify cdfName")
 #    if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
 #
 ##    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
 #    is.lds=FALSE
 #    RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
 #                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
 #                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
 #
 #
 #    XY = RGtoXY(RG, chipType=cdfName)
 ##    if(is.lds) {
 ##      open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero)
 ##      delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero)
 ##    }
 #    rm(RG); gc(verbose=FALSE)
 #
 #    if (missing(sns)) { sns = sampleNames(XY)
99202387
 #    }
cbfc52e5
 #    res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
 #                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir) #,
 ##                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
 #
 ##    if(is.lds) {
 ##      open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
 ##      delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero)
 ##    }
 #    rm(XY); gc(verbose=FALSE)
 #
 #    if(row.names) row.names=res$gns else row.names=NULL
 #    if(col.names) col.names=res$sns else col.names=NULL
 ###
 ###  if(is.lds){
 ###	  res2 <- crlmmGT2(A=res[["A"]],
 ###			   B=res[["B"]],
 ###			   SNR=res[["SNR"]],
 ###			   mixtureParams=res[["mixtureParams"]],
 ###			   cdfName=cdfName,
 ###			   row.names=row.names,
 ###			   col.names=col.names,
 ###			   probs=probs,
 ###			   DF=DF,
 ###			   SNRMin=SNRMin,
 ###			   recallMin=recallMin,
 ###			   recallRegMin=recallRegMin,
 ###			   gender=gender,
 ###			   verbose=verbose,
 ###			   returnParams=returnParams,
 ###			   badSNP=badSNP)
 ###  } else {
 #	  res2 <- crlmmGT(A=res[["A"]],
 #			   B=res[["B"]],
 #			   SNR=res[["SNR"]],
 #			   mixtureParams=res[["mixtureParams"]],
 #			   cdfName=cdfName,
 #			   row.names=row.names,
 #			   col.names=col.names,
 #			   probs=probs,
 #			   DF=DF,
 #			   SNRMin=SNRMin,
 #			   recallMin=recallMin,
 #			   recallRegMin=recallRegMin,
 #			   gender=gender,
 #			   verbose=verbose,
 #			   returnParams=returnParams,
 #			   badSNP=badSNP)
 ###  }
 #
 ###    FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
 ###    ## genotyping
 ###    crlmmGTfxn = function(FUN,...){
 ###		switch(FUN,
 ###		       crlmmGT2=crlmmGT2(...),
 ###		       crlmmGT=crlmmGT(...))
 ###              }
 ###    res2 = crlmmGTfxn(FUN,
 ###                     A=res[["A"]],
 ###                     B=res[["B"]],
 ###                     SNR=res[["SNR"]],
 ###                     mixtureParams=res[["mixtureParams"]],
 ###                     cdfName=cdfName,
 ###                     row.names=row.names,
 ###                     col.names=col.names,
 ###                     probs=probs,
 ###                     DF=DF,
 ###                     SNRMin=SNRMin,
 ###                     recallMin=recallMin,
 ###                     recallRegMin=recallRegMin,
 ###                     gender=gender,
 ###                     verbose=verbose,
 ###                     returnParams=returnParams,
 ###                     badSNP=badSNP)
 #
 ##    if(is.lds) {
 ##      open(res[["SNR"]]); open(res[["SKW"]])
 ##    }
 #    res2[["SNR"]] = res[["SNR"]]
 #    res2[["SKW"]] = res[["SKW"]]
 # #  if(is.lds) {
 # #    delete(res[["A"]]); delete(res[["B"]])
 # #    delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
 # #  }
 #    rm(res); gc(verbose=FALSE)
 #    return(list2SnpSet(res2, returnParams=returnParams))
 #}
e972ec4a
 
450a6d8a
 # Functions analogous to Rob's Affy functions to set up container
99202387
 getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat", verbose=FALSE) {
e972ec4a
        narrays = length(filenames)
 
        headerInfo = list(nProbes = rep(NA, narrays),
                          Barcode = rep(NA, narrays),
                          ChipType = rep(NA, narrays),
450a6d8a
                          Manifest = rep(NA, narrays),
                          Position = rep(NA, narrays))
e972ec4a
 
        scanDates = data.frame(ScanDate=rep(NA, narrays), DecodeDate=rep(NA, narrays))
063b3d14
        rownames(scanDates) <- make.unique(gsub(paste(sep, fileExt, sep=""), "", filenames))
e972ec4a
        ## read in the data
        for(i in seq_along(filenames)) {
99202387
                if(verbose)
 	               cat("reading", filenames[i], "\n")
e972ec4a
 	       idsG = G = NULL
 	       G = readIDAT(filenames[i])
 	       idsG = rownames(G$Quants)
 	       headerInfo$nProbes[i] = G$nSNPsRead
 	       headerInfo$Barcode[i] = G$Barcode
 	       headerInfo$ChipType[i] = G$ChipType
 	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
 	       headerInfo$Position[i] = G$Unknowns$MostlyA
                if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
 		       warning("Chips are not of the same type.  Skipping ", basename(filenames[i]))
 		       next()
 	       }
c27ba9c5
                if(nrow(G$RunInfo)>=2) {
    	           scanDates$ScanDate[i] = G$RunInfo[1, 1]
 	           scanDates$DecodeDate[i] = G$RunInfo[2, 1]
                }
e972ec4a
 	       rm(G)
a1394042
 	       gc(verbose=FALSE)
e972ec4a
        }
        protocoldata = new("AnnotatedDataFrame",
 			    data=scanDates,
 			    varMetadata=data.frame(labelDescription=colnames(scanDates),
 			                           row.names=colnames(scanDates)))
063b3d14
        return(protocoldata)
e972ec4a
 }
 
3be95c2b
 getAvailableIlluminaGenomeBuild <- function(path){
 	snp.file <- list.files(path, pattern="snpProbes_hg")
 	if(length(snp.file) > 1){
 		## use hg19
 		message("genome build not specified. Using build hg19 for annotation.")
 		snp.file <- snp.file[1]
 	}
a3b625d4
 	if(length(snp.file) == 1)
 		genome <- gsub(".rda", "", strsplit(snp.file, "snpProbes_")[[1]][[2]])
 	if(length(snp.file)==0) genome <- ""
 	genome
3be95c2b
 }
 
e972ec4a
 
f0f8ba16
 constructInf <- function(sampleSheet=NULL,
415a3b06
 			 arrayNames=NULL,
 			 path=".",
 			 arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
 			 highDensity=FALSE,
 			 sep="_",
 			 fileExt=list(green="Grn.idat", red="Red.idat"),
808cfecc
                          XY,
415a3b06
 			 cdfName,
cbfc52e5
                          anno,
                          genome,
415a3b06
 			 verbose=FALSE,
808cfecc
 			 batch=NULL, #fns,
415a3b06
 			 saveDate=TRUE) { #, outdir="."){
808cfecc
         if(is.null(XY)) {
   	  if(!is.null(arrayNames)) {
453e688a
 		pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
808cfecc
 	  }
 	  if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
453e688a
 		if(is.null(arrayNames)) {
 			if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
 				barcode = sampleSheet[,arrayInfoColNames$barcode]
 				arrayNames=barcode
 			}
 			if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
 				position = sampleSheet[,arrayInfoColNames$position]
 				if(is.null(arrayNames))
 					arrayNames=position
 				else
 					arrayNames = paste(arrayNames, position, sep=sep)
 				if(highDensity) {
 					hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
 					for(i in names(hdExt))
 						arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
 				}
 			}
808cfecc
 		  }
 		  pd = new("AnnotatedDataFrame", data = sampleSheet)
 		  sampleNames(pd) = make.unique(basename(arrayNames))
 	  }
 	  if(is.null(arrayNames)) {
453e688a
 		arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
 		if(!is.null(sampleSheet)) {
 			sampleSheet=NULL
 			cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
 		}
 		pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
808cfecc
   	  }
 	  narrays = length(arrayNames)
3f404581
 
808cfecc
 	  if(!is.null(batch)) {
e972ec4a
 		stopifnot(length(batch) == narrays)
808cfecc
 	  }
  	  if(is.null(batch)) {
                 batch = rep("batch1", narrays) # assume only one batch stop("Must specify 'batch'")
 	  }
           if(is(batch, "factor")) batch = as.character(batch)
e972ec4a
 
808cfecc
 	  grnfiles = paste(arrayNames, fileExt$green, sep=sep)
 	  redfiles = paste(arrayNames, fileExt$red, sep=sep)
 	  if(length(grnfiles)==0 || length(redfiles)==0)
453e688a
 		stop("Cannot find .idat files")
808cfecc
 	  if(length(grnfiles)!=length(redfiles))
453e688a
 		stop("Cannot find matching .idat files")
808cfecc
 	  if(path[1] != "." & path[1] != ""){
453e688a
 		grnidats = file.path(path, grnfiles)
 		redidats = file.path(path, redfiles)
808cfecc
 	  }  else {
453e688a
 		message("path arg not set.  Assuming files are in local directory, or that complete path is provided")
 		grnidats = grnfiles
 		redidats = redfiles
808cfecc
     	  }
 	  if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
 	  if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
 
 	  if(verbose) message("Initializing container for genotyping and copy number estimation")
cbfc52e5
           if(cdfName!="nopackage") {
 	    pkgname <- getCrlmmAnnotationName(cdfName)
 	    path <- system.file("extdata", package=pkgname)
 	    genome <- getAvailableIlluminaGenomeBuild(path)
 	    featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
           } else {
             if(!is.integer(anno$chr)) {
                   chr = as.character(anno$chromosome)
                   chr[chr=="X"] = 23
                   chr[chr=="Y"] = 24
                   chr[chr=="XY"] = 25
             }
             featureData = new("GenomeAnnotatedDataFrame",
                           isSnp=as.logical(anno$isSnp),
                           position=as.integer(anno$position),
                           chromosome=as.integer(chr),
                           row.names=anno$featureNames)
           }
             nr <- nrow(featureData)
             nc <- narrays
 	    sns <-  if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) {
063b3d14
 		make.unique(sampleSheet$Sample_ID)
cbfc52e5
             } else{
063b3d14
 		make.unique(basename(arrayNames))
cbfc52e5
             } 
808cfecc
 	  biga <- initializeBigMatrix(name="A", nr, nc)
 	  bigb <- initializeBigMatrix(name="B", nr, nc)
 	  bigc <- initializeBigMatrix(name="call", nr, nc)
 	  bigd <- initializeBigMatrix(name="callPr", nr,nc)
 	  colnames(biga) <- colnames(bigb) <- colnames(bigc) <- colnames(bigd) <- sns
 	  cnSet <- new("CNSet",
063b3d14
 		     alleleA=biga,
 		     alleleB=bigb,
 		     call=bigc,
 		     callProbability=bigd,
e972ec4a
 		     annotation=cdfName,
7c0c9ac5
 		     featureData=featureData,
3be95c2b
 		     batch=batch,
 		     genome=genome)
063b3d14
 ##        if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) {
 ##		sampleNames(cnSet) = make.unique(sampleSheet$Sample_ID)
 ##        } else{
 ##		sampleNames(cnSet) <- make.unique(basename(arrayNames))
 ##	}
808cfecc
   	  if(saveDate){
99202387
 		protocolData = getProtocolData.Illumina(grnidats, sep=sep, fileExt=fileExt$green, verbose=verbose)
808cfecc
 	  } else{
450a6d8a
 		protocolData = annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
808cfecc
 	  }
 	  rownames(pData(protocolData)) = sampleNames(cnSet)
 	  protocolData(cnSet) = protocolData
7c0c9ac5
 	##featureData(cnSet) = featureData
808cfecc
 	  featureNames(cnSet) = featureNames(featureData)
 	  cnSet$gender <- initializeBigVector("gender", ncol(cnSet), vmode="integer")
 	  cnSet$SNR = initializeBigVector("crlmmSNR-", ncol(cnSet), "double")
 	  cnSet$SKW = initializeBigVector("crlmmSKW-", ncol(cnSet), "double")
063b3d14
 	##sampleNames(cnSet) <- basename(sampleNames(cnSet))
ba1a7d73
         } else { # if XY specified, easier set-up of cnSet
808cfecc
           narrays = ncol(XY)
           if(verbose) message("Initializing container for genotyping and copy number estimation")           
           if(!is.null(batch)) {
               stopifnot(length(batch) == narrays)
           }
           if(is.null(batch)) {
               batch = rep("batch1", narrays) # assume only one batch stop("Must specify 'batch'")
           }
           if(is(batch, "factor")) batch = as.character(batch)
cbfc52e5
           if(cdfName!="nopackage") {
             pkgname <- getCrlmmAnnotationName(cdfName)
             path <- system.file("extdata", package=pkgname)
             genome <- getAvailableIlluminaGenomeBuild(path)
             featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
           } else {
d49c9607
                       anno = as(anno, "AnnotatedDataFrame")
  anno = as(anno, "GenomeAnnotatedDataFrame")
 featureData=anno
            
cbfc52e5
           }
           nr <- nrow(featureData)
           nc <- narrays
808cfecc
           sns <-  sampleNames(XY)
           biga <- initializeBigMatrix(name="A", nr, nc)
           bigb <- initializeBigMatrix(name="B", nr, nc)
           bigc <- initializeBigMatrix(name="call", nr, nc)
           bigd <- initializeBigMatrix(name="callPr", nr,nc)
           colnames(biga) <- colnames(bigb) <- colnames(bigc) <- colnames(bigd) <- sns
           cnSet <- new("CNSet",
                        alleleA=biga,
                        alleleB=bigb,
                        call=bigc,
                        callProbability=bigd,
                        annotation=cdfName,
                        featureData=featureData,
                        batch=batch,
                        genome=genome)
           protocolData = annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
           rownames(pData(protocolData)) = sampleNames(cnSet)
           protocolData(cnSet) = protocolData
           featureNames(cnSet) = featureNames(featureData)
           cnSet$gender = initializeBigVector("gender", ncol(cnSet), vmode="integer")
           cnSet$SNR = initializeBigVector("crlmmSNR-", ncol(cnSet), "double")
           cnSet$SKW = initializeBigVector("crlmmSKW-", ncol(cnSet), "double")
         }
e972ec4a
 	return(cnSet)
 }
f0f8ba16
 construct.Illumina <- constructInf
e972ec4a
 
f0f8ba16
 preprocessInf <- function(cnSet,
808cfecc
 		       sampleSheet=NULL,
cfa9fbbc
 		       arrayNames=NULL,
 		       ids=NULL,
 		       path=".",
 		       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
 		       highDensity=TRUE,
 		       sep="_",
 		       fileExt=list(green="Grn.idat", red="Red.idat"),
808cfecc
                        XY,
cbfc52e5
                        anno,
cfa9fbbc
 		       saveDate=TRUE,
 		       stripNorm=TRUE,
 		       useTarget=TRUE,
 		       mixtureSampleSize=10^5,
808cfecc
 		       fitMixture=TRUE,
                        quantile.method="between",
cfa9fbbc
 		       eps=0.1,
 		       verbose=TRUE,
7c0c9ac5
 		       seed=1, cdfName){
cfa9fbbc
 	stopifnot(all(c("gender", "SNR", "SKW") %in% varLabels(cnSet)))
 	open(A(cnSet))
 	open(B(cnSet))
 	sns <- sampleNames(cnSet)
 	pkgname = getCrlmmAnnotationName(annotation(cnSet))
cbfc52e5
         is.snp = isSnp(cnSet)
cfa9fbbc
 	snp.index = which(is.snp)
 	narrays = ncol(cnSet)
063b3d14
 	sampleBatches <- splitIndicesByLength(seq_len(ncol(cnSet)), ocSamples()/getDoParWorkers())
cfa9fbbc
 	mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
 	ocLapply(seq_along(sampleBatches),
1e416092
 		 processIDAT, # crlmm:::
cfa9fbbc
 		 sampleBatches=sampleBatches,
 		 sampleSheet=sampleSheet,
 		 arrayNames=arrayNames,
 		 ids=ids,
 		 path=path,
 		 arrayInfoColNames=arrayInfoColNames,
 		 highDensity=highDensity,
 		 sep=sep,
 		 fileExt=fileExt,
808cfecc
                  XY=XY,
cbfc52e5
                  anno=anno,
cfa9fbbc
 		 saveDate=saveDate,
 		 verbose=verbose,
 		 mixtureSampleSize=mixtureSampleSize,
 		 fitMixture=fitMixture,
 		 eps=eps,
 		 seed=seed,
 		 cdfName=cdfName,
 		 sns=sns,
 		 stripNorm=stripNorm,
 		 useTarget=useTarget,
808cfecc
                  quantile.method=quantile.method,                 
cfa9fbbc
 		 A=A(cnSet),
 		 B=B(cnSet),
5c9f60e9
 		 GT=calls(cnSet),
 		 GTP=snpCallProbability(cnSet),
cfa9fbbc
 		 SKW=cnSet$SKW,
 		 SNR=cnSet$SNR,
 		 mixtureParams=mixtureParams,
 		 is.snp=is.snp,
 		 neededPkgs=c("crlmm", pkgname)) # outdir=outdir,
 	return(mixtureParams)
 }
f0f8ba16
 preprocess <- preprocessInf
cfa9fbbc
 
f0f8ba16
 genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3),
 			SNRMin=5,
 			recallMin=10,
 			recallRegMin=1000,
 			verbose=TRUE,
 			returnParams=TRUE,
 			badSNP=0.7,
 			gender=NULL,
7c0c9ac5
 			DF=6,
808cfecc
 			cdfName,
cbfc52e5
                         nopackage.norm="quantile",
ba1a7d73
                         call.method="crlmm",
                         trueCalls=NULL){
284565dd
 	is.snp = isSnp(cnSet)
 	snp.index = which(is.snp)
5c9f60e9
 ##	narrays = ncol(cnSet)
 ##	open(A(cnSet))
 ##	open(B(cnSet))
 ##	open(snpCall(cnSet))
 ##	open(snpCallProbability(cnSet))
 ##	## crlmmGT2 overwrites the normalized intensities with calls and confidenceScores
 ##	message("Writing to call and callProbability slots")
 ##	for(j in 1:ncol(cnSet)){
 ##		snpCall(cnSet)[snp.index, j] <- as.integer(A(cnSet)[snp.index, j])
 ##		snpCallProbability(cnSet)[snp.index, j] <- as.integer(B(cnSet)[snp.index, j])
 ##	}
 ##	message("Writing complete.  Begin genotyping...")
 ##	close(A(cnSet))
 ##	close(B(cnSet))
808cfecc
         if(call.method=="crlmm") {
   	  tmp <- crlmmGT2(A=A(cnSet),
3be95c2b
 			B=B(cnSet),
 			SNR=cnSet$SNR,
 			mixtureParams=mixtureParams,
 			cdfName=cdfName,
 			col.names=sampleNames(cnSet),
 			probs=probs,
 			DF=DF,
 			SNRMin=SNRMin,
 			recallMin=recallMin,
 			recallRegMin=recallRegMin,
 			gender=gender,
 			verbose=verbose,
 			returnParams=returnParams,
 			badSNP=badSNP,
 			callsGt=calls(cnSet),
 			callsPr=snpCallProbability(cnSet))#,
1e0ed16d
 	##RS:  I changed the API for crlmmGT2 to be consistent with crlmmGT
808cfecc
            open(cnSet$gender)
            cnSet$gender[,] = tmp$gender
            close(cnSet$gender)
         }
cbfc52e5
         if(call.method=="krlmm") {
             if(cdfName=="nopackage") {
                 tmp <- krlmmnopkg(cnSet, offset=16, gender=gender, normalize.method=nopackage.norm, annotation=anno, verbose=verbose)
             } else {
               tmp <- krlmm(cnSet, cdfName, gender=gender, trueCalls=trueCalls, verbose=verbose)
1e0ed16d
 	## snp.names=featureNames(cnSet)[snp.index])
cbfc52e5
             }
         }
808cfecc
 	if(verbose) message("Genotyping finished.") # Updating container with genotype calls and confidence scores.")
cfa9fbbc
 	TRUE
 }
 
e972ec4a
 
453e688a
 genotype.Illumina <- function(sampleSheet=NULL,
 			      arrayNames=NULL,
 			      ids=NULL,
 			      path=".",
 			      arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
 			      highDensity=FALSE,
 			      sep="_",
 			      fileExt=list(green="Grn.idat", red="Red.idat"),
808cfecc
                               XY=NULL,
cbfc52e5
                               anno,
                               genome,
ba1a7d73
                               call.method="crlmm",
                               trueCalls=NULL,
453e688a
 			      cdfName,
 			      copynumber=TRUE,
808cfecc
 			      batch=NULL,
c27ba9c5
 			      saveDate=FALSE,
453e688a
 			      stripNorm=TRUE,
 			      useTarget=TRUE,
cbfc52e5
                               quantile.method="between",
                               nopackage.norm="quantile",
453e688a
 			      mixtureSampleSize=10^5,
 			      fitMixture=TRUE,
 			      eps=0.1,
 			      verbose=TRUE,
 			      seed=1,
 			      sns,
 			      probs=rep(1/3, 3),
 			      DF=6,
 			      SNRMin=5,
 			      recallMin=10,
 			      recallRegMin=1000,
 			      gender=NULL,
 			      returnParams=TRUE,
ac7e0c8f
 			      badSNP=0.7) {
808cfecc
         krlmm.supported = c("humanomni1quadv1b",      # Omni1 quad
 	                    "humanomni25quadv1b",     # Omni2.5 quad
3d8f4872
 	                    "humanomni258v1a",        # Omni2.5 8 v1 A
                             "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
1613e94f
                             "humanomni5quadv1b",      # Omni5 quad
cc879294
 			    "humanexome12v1p2a",      # Exome 12 v1.2 A
cbfc52e5
                             "humanomniexpexome8v1p1b", # Omni Express Exome 8 v1.1b
                             "nopackage")
b2dc56da
         crlmm.supported = c("human1mv1c",             # 1M
                             "human370v1c",            # 370CNV
808cfecc
 	                    "human650v3a",            # 650Y
 	                    "human610quadv1b",        # 610 quad
 	                    "human660quadv1a",        # 660 quad
 	                    "human370quadv3c",        # 370CNV quad
 	                    "human550v3b",            # 550K
 	                    "human1mduov3b",          # 1M Duo
                             "humanomni1quadv1b",      # Omni1 quad
b7e51e97
 #			    "humanomni258v1a",        # Omni2.5 8 v1 A
 #                           "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
808cfecc
 	                    "humanomniexpress12v1b",  # Omni express 12
 	                    "humanimmuno12v1b",       # Immuno chip 12
                             "humancytosnp12v2p1h",    # CytoSNP 12
                             "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b
         if(call.method=="krlmm") {
           if(!any(cdfName==krlmm.supported))
              stop(cdfName, " platform not supported by krlmm.  Consider setting call.method=\'crlmm\'")           
           fitMixture=FALSE
           quantile.method="within"
         }
         if(call.method=="crlmm") {
           if(!any(cdfName==crlmm.supported))
              stop(cdfName, " platform not supported by crlmm.  Consider setting call.method=\'krlmm\'")
           fitMixture=TRUE
           # quantile.method="between"
         }
450a6d8a
 	is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
ba1a7d73
         if (!(is.lds))
             stop("Package ff not loaded")
808cfecc
         if(!is.null(XY) && missing(cdfName))
           cdfName = getCrlmmAnnotationName(annotation(cnSet))
d3320c21
 	if(missing(cdfName)) stop("must specify cdfName")
 	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
808cfecc
 #	stopifnot(!missing(batch))
 #	if(is(batch, "factor")) batch <- as.character(batch)
f0f8ba16
         pkgname <- getCrlmmAnnotationName(cdfName)
808cfecc
 #        if(is.null(cnSet)) {
   	  message("Instantiate CNSet container.")
 	  cnSet <- constructInf(sampleSheet=sampleSheet,
f0f8ba16
 				    arrayNames=arrayNames,
 				    path=path,
 				    arrayInfoColNames=arrayInfoColNames,
 				    highDensity=highDensity,
 				    sep=sep,
 				    fileExt=fileExt,
808cfecc
                                     XY=XY,
cbfc52e5
                                     anno=anno,
                                     genome=genome,
f0f8ba16
 				    cdfName=cdfName,
 				    verbose=verbose,
 				    batch=batch,
 				    saveDate=saveDate)
808cfecc
 #        }
3eec840d
         if(call.method=="krlmm" && ncol(cnSet)<8)
            stop(paste("To run krlmm you need at least 8 samples (in general, the more the better).  You currently have", ncol(cnSet)))
f0f8ba16
 	mixtureParams <- preprocessInf(cnSet=cnSet,
 				    sampleSheet=sampleSheet,
 				    arrayNames=arrayNames,
 				    ids=ids,
 				    path=path,
 				    arrayInfoColNames=arrayInfoColNames,
 				    highDensity=highDensity,
 				    sep=sep,
 				    fileExt=fileExt,
 				    saveDate=saveDate,
808cfecc
                                     XY=XY,
cbfc52e5
                                     anno=anno, 
f0f8ba16
 				    stripNorm=stripNorm,
 				    useTarget=useTarget,
 				    mixtureSampleSize=mixtureSampleSize,
 				    fitMixture=fitMixture,
808cfecc
                                     quantile.method=quantile.method,
f0f8ba16
 				    eps=eps,
 				    verbose=verbose,
7c0c9ac5
 				    seed=seed,
808cfecc
 				    cdfName=cdfName)
cbfc52e5
 	message("Begin genotyping...")
f0f8ba16
 	genotypeInf(cnSet=cnSet, mixtureParams=mixtureParams,
3be95c2b
 		    probs=probs,
808cfecc
 		    SNRMin=SNRMin,
 		    recallMin=recallMin,
 		    recallRegMin=recallRegMin,
 		    verbose=verbose,
 		    returnParams=returnParams,
 		    badSNP=badSNP,
 		    gender=gender,
 		    DF=DF,
 		    cdfName=cdfName,
cbfc52e5
                     nopackage.norm=nopackage.norm,
ba1a7d73
                     call.method=call.method,
                     trueCalls=trueCalls)
f0f8ba16
 	return(cnSet)
d3320c21
 }
 
cbfc52e5
 crlmmIllumina <- genotype.Illumina
 
 
d3320c21
 
edc69ca1
 processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
453e688a
 			arrayNames=NULL,
802ca707
 			ids=NULL,
 			path=".",
 			arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
 			highDensity=FALSE,
 			sep="_",
 			fileExt=list(green="Grn.idat", red="Red.idat"),
808cfecc
                         XY,
cbfc52e5
                         anno,
802ca707
 			saveDate=FALSE,
 			verbose=TRUE,
453e688a
 			mixtureSampleSize=10^5,
802ca707
 			fitMixture=TRUE,
 			eps=0.1,
 			seed=1,
 			cdfName,
 			sns,
 			stripNorm=TRUE,
 			useTarget=TRUE,
808cfecc
                         quantile.method=quantile.method,                        
5c9f60e9
 			A, B,
 			GT,
 			GTP,
4d398a2f
 			SKW, SNR, mixtureParams, is.snp) { #, outdir=".") {
edc69ca1
 	message("Processing sample stratum ", stratum, " of ", length(sampleBatches))
 	sel <- sampleBatches[[stratum]]
d3320c21
         if(length(path)>= length(sel)) path = path[sel]
808cfecc
         if(is.null(XY)) {
           RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel],
d3320c21
                        ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
99202387
                        highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose)
cbfc52e5
           XY = RGtoXY(RG, chipType=cdfName, anno=anno)
808cfecc
           rm(RG)
           gc(verbose=FALSE)
           if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
cbfc52e5
           if(cdfName!="nopackage") {
              res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
808cfecc
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
cbfc52e5
                                quantile.method=quantile.method) #, outdir=outdir
              rm(XY)
           } else {
               res = list(A=XY@assayData$X, B=XY@assayData$Y, zero=XY@assayData$zero, sns=sns, gns=names(XY), cdfName=cdfName,
                   SNR=NA, SKW=NA, mixtureParams=NA)
               rm(XY)
           }
           } else{ #XY already available
           if(missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
           if(cdfName!="nopackage") {
             res = preprocessInfinium2(XY[,sel], mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
ba1a7d73
                                            seed=seed, eps=eps, cdfName=cdfName, sns=sns[sel], stripNorm=stripNorm, useTarget=useTarget,
cbfc52e5
                                            quantile.method=quantile.method)
           } else {
               res = list(A=XY@assayData$X, B=XY@assayData$Y, zero=XY@assayData$zero, sns=sns, gns=names(XY), cdfName=cdfName,
                   SNR=NA, SKW=NA, mixtureParams=NA)
           }
808cfecc
         }
a1394042
         gc(verbose=FALSE)
5c9f60e9
 	if(verbose) message("Finished preprocessing.")
d3320c21
         snp.index = which(is.snp)
e6e6981f
 	np.index = which(!is.snp)
453e688a
 	open(A); open(B);
5c9f60e9
 	open(GT); open(GTP)
802ca707
 	Amatrix <- res[["A"]]
 	Bmatrix <- res[["B"]]
5c9f60e9
 
 	## Amatrix and Bmatrix are ordinary matrices--not ff objects.
 	## By writing columns of a ordinary matrix to GT and GTP, we
 	## save one read step later on.  GT and GTP will be updated to
 	## calls and call probabilities by the crlmmGT2 function. The A
 	## and B slots will retain the normalized intensities for the
 	## A and B alleles
cbfc52e5
         ## The loop below gives rise to warnings: In `[<-.ff_array`(`*tmp*`, snp.index, jj, value = structure(c(6648L,  ... :
         ## number of elements to replace is not multiple of values for replacement
802ca707
 	for(j in seq_along(sel)){
 		jj <- sel[j]
 		A[snp.index, jj] <- Amatrix[, j]
5c9f60e9
 		GT[snp.index, jj] <- Amatrix[, j]
802ca707
 		B[snp.index, jj] <- Bmatrix[, j]
5c9f60e9
 		GTP[snp.index, jj] <- Bmatrix[, j]
802ca707
 	}
5c9f60e9
 	if(length(np.index)>0) {
 		cnAmatrix <- res[["cnAB"]]$A
 		cnBmatrix <- res[["cnAB"]]$B
 		for(j in seq_along(sel)){
 			jj <- sel[j]
 			A[np.index, jj] <- cnAmatrix[, j]
 			GT[np.index, jj] <- cnAmatrix[, j]
 			B[np.index, jj] <- cnBmatrix[, j]
 			GTP[np.index, jj] <- cnBmatrix[, j]
 		}
450a6d8a
         }
453e688a
 	open(SKW); open(SNR); open(mixtureParams)
802ca707
 	SKW[sel] = res[["SKW"]][]
 	SNR[sel] = res[["SNR"]][]
 	mixtureParams[, sel] = res[["mixtureParams"]][]
5c9f60e9
         close(A); close(B)
 	close(GT); close(GTP)
         close(SNR); close(SKW)
d3320c21
         close(mixtureParams)
         rm(res)
a1394042
 	gc(verbose=FALSE)
d3320c21
         TRUE
2564dd8b
       }
cbfc52e5