R/cnrma-functions.R
a3335242
 ##---------------------------------------------------------------------------
 ##---------------------------------------------------------------------------
 getProtocolData.Affy <- function(filenames){
 	scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
 	rownames(scanDates) <- basename(rownames(scanDates))
 	protocoldata <- new("AnnotatedDataFrame",
 			    data=scanDates,
 			    varMetadata=data.frame(labelDescription=colnames(scanDates),
 			                           row.names=colnames(scanDates)))
 	return(protocoldata)
 }
d0d83390
 
7818b532
 ##setMethod("snpNames", signature(object="character"),
 ##	  function(object){
 ##		  nm <- grep("Crlmm", object)
 ##		  if(length(nm)==0){
 ##			  pkgname <- paste(object, "Crlmm", sep="")
 ##		  } else pkgname <- object
 ##		  loader("preprocStuff.rda", .crlmmPkgEnv, object)
 ##		  gns <- getVarInEnv("gns")
 ##		  return(gns)
 ##	  })
d0d83390
 
3be95c2b
 
 getFeatureData <- function(cdfName, copynumber=FALSE, genome){
a3335242
 	pkgname <- getCrlmmAnnotationName(cdfName)
 	if(!require(pkgname, character.only=TRUE)){
 		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
 		msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
 		message(strwrap(msg))
 		stop("Package ", pkgname, " could not be found.")
 		rm(suggCall, msg)
 	}
 	loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
 	loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
 	loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
 	gns <- getVarInEnv("gns")
e95fbc14
 	nm <- grep("Crlmm", pkgname)
 	if(length(nm)==0){
 		pkgname <- paste(pkgname, "Crlmm", sep="")
 	}
 	path <- system.file("extdata", package=pkgname)
3be95c2b
 	##multiple.builds <- length(grep("hg19", list.files(path)) > 0)
a3b625d4
 	snp.file <- list.files(path, pattern="snpProbes_hg")
 	if(length(snp.file)==0){
 		no.build <- TRUE
 		snp.file <- list.files(path, pattern="snpProbes.rda")
 	} else {
 		no.build <- FALSE
 		if(missing(genome)){
 			snp.file <- list.files(path, pattern="snpProbes_hg")
 			if(length(snp.file) > 1){
 				## use hg19
 				message("genome build not specified. Using build hg19 for annotation.")
 				snp.file <- snp.file[1]
 			}
 			genome <- gsub(".rda", "", strsplit(snp.file, "snpProbes_")[[1]][[2]])
 		} else snp.file <- paste("snpProbes_", genome, ".rda", sep="")
 	}
3be95c2b
 ##	if(!multiple.builds){
 ##		load(file.path(path, "snpProbes.rda"))
 ##	} else load(file.path(path, paste("snpProbes_", genome, ".rda", sep="")))
 	load(file.path(path, snp.file))
b5b74b9f
 	snpProbes <- get("snpProbes")
e31c7794
 	## if we use a different build we may throw out a number of snps...
 	snpProbes <- snpProbes[rownames(snpProbes) %in% gns, ]
a3335242
 	if(copynumber){
a3b625d4
 		if(!no.build){
 			cn.file <- paste("cnProbes_", genome, ".rda", sep="")
 		} else cn.file <- "cnProbes.rda"
3be95c2b
 		load(file.path(path, cn.file))
 		##		if(!multiple.builds){
 		##			load(file.path(path, "cnProbes.rda"))
 		##		} else load(file.path(path, paste("cnProbes_", genome, ".rda", sep="")))
a3335242
 		cnProbes <- get("cnProbes")
 		snpIndex <- seq(along=gns)
81601623
 		npIndex <- seq(along=rownames(cnProbes)) + max(snpIndex)
a3335242
 		featurenames <- c(gns, rownames(cnProbes))
 	} else featurenames <- gns
 	fvarlabels=c("chromosome", "position", "isSnp")
 	M <- matrix(NA, length(featurenames), 3, dimnames=list(featurenames, fvarlabels))
 	index <- match(rownames(snpProbes), rownames(M)) #only snp probes in M get assigned position
 	M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))]
6a6b67c5
 	M[index, "chromosome"] <- chromosome2integer(snpProbes[, grep("chr", colnames(snpProbes))])
7a41ca3b
 	##M[index, "isSnp"] <- 1L
7c0c9ac5
 	index <- which(is.na(M[, "isSnp"]))
 	M[index, "isSnp"] <- 1L
a3335242
 	if(copynumber){
 		index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position
 		M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))]
6a6b67c5
 		M[index, "chromosome"] <- chromosome2integer(cnProbes[, grep("chr", colnames(cnProbes))])
a3335242
 		M[index, "isSnp"] <- 0L
 	}
cb875a5a
 	##A few of the snpProbes do not match -- I think it is chromosome Y.
8ed2dc29
 	if(any(is.na(M[, "chromosome"])))
 		M[is.na(M[, "chromosome"]), "isSnp"] <- 1L
7c0c9ac5
 	##return(new("AnnotatedDataFrame", data=M))
 	new("GenomeAnnotatedDataFrame",
 	    position=as.integer(M[, "position"]),
 	    chromosome=as.integer(M[, "chromosome"]),
 	    isSnp=as.logical(M[, "isSnp"]),
 	    row.names=featurenames)
a3335242
 }
7c0c9ac5
 
6a6b67c5
 getFeatureData.Affy <- getFeatureData
a097b918
 
ada0da1e
 construct <- function(filenames,
 		      cdfName,
 		      copynumber=TRUE,
518a2908
 		      sns, verbose=TRUE, batch, fns){
dded663e
 	if(!missing(batch)){
 		stopifnot(length(batch) == length(sns))
 	}
453e688a
 	if(any(is.na(batch))){
 		stop("NA's in batch variable")
 	}
dded663e
 	if(missing(sns) & missing(filenames)) stop("one of filenames or samplenames (sns) must be provided")
ada0da1e
 	if(verbose) message("Initializing container for copy number estimation")
6a6b67c5
 	featureData <- getFeatureData(cdfName, copynumber=copynumber)
518a2908
 	if(!missing(fns)){
7a41ca3b
 		warning("subsetting the featureData can cause the snprma object and CNSet object to be misaligned. This problem is fixed in devel as we match the names prior to assigning values from snprma")
518a2908
 		index <- match(fns, featureNames(featureData))
 		if(all(is.na(index))) stop("fns not in featureNames")
 		featureData <- featureData[index, ]
 	}
dded663e
 	nr <- nrow(featureData); nc <- length(sns)
69ec8644
 	cnSet <- new("CNSet",
 		     alleleA=initializeBigMatrix(name="A", nr, nc),
 		     alleleB=initializeBigMatrix(name="B", nr, nc),
08d4b4d4
 		     call=initializeBigMatrix(name="call", nr, nc),
 		     callProbability=initializeBigMatrix(name="callPr", nr,nc),
69ec8644
 		     annotation=cdfName,
 		     batch=batch)
caaed0d6
 	if(!missing(sns)){
 		sampleNames(cnSet) <- sns
69ec8644
 		protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
caaed0d6
 	} else {
 		sampleNames(cnSet) <- basename(filenames)
 		protocolData <- getProtocolData.Affy(filenames)
69ec8644
 	}
 	rownames(pData(protocolData)) <- sns
d08acba1
 	protocolData(cnSet) <- protocolData
69ec8644
 	featureData(cnSet) <- featureData
d08acba1
 	featureNames(cnSet) <- featureNames(featureData)
da553af0
 	pd <- data.frame(matrix(NA, nc, 3), row.names=sns)
 	colnames(pd)=c("SKW", "SNR", "gender")
69ec8644
 	phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
7a41ca3b
 	gns <- getVarInEnv("gns")
 	stopifnot(identical(gns, featureNames(cnSet)[1:length(gns)]))
69ec8644
 	return(cnSet)
e272e5d6
 }
ea66649b
 
e31c7794
 constructAffyCNSet <- function(filenames, sns, cdfName, batch, verbose=TRUE, genome){
063b3d14
 	##stopifnot(!missing(filenames))
729e92c0
 	if(missing(sns)) sns <- basename(filenames)
cfa9fbbc
 	stopifnot(!missing(batch))
08d4b4d4
 	##---------------------------------------------------------------------------
 	##
cfa9fbbc
 	## from snprma2.  Goal is to initialize A and B with
 	## appropriate dimension for snps+nps
08d4b4d4
 	##
 	##---------------------------------------------------------------------------
 	if (missing(cdfName))
 		cdfName <- read.celfile.header(filenames[1])[["cdfName"]]
 	pkgname <- getCrlmmAnnotationName(cdfName)
 	stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
 	if(verbose) message("Loading annotations and mixture model parameters.")
 	obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
 	pnsa <- getVarInEnv("pnsa")
 	pnsb <- getVarInEnv("pnsb")
 	gns <- getVarInEnv("gns")
 	rm(list=obj, envir=.crlmmPkgEnv)
 	rm(obj)
b59e8327
 	if(verbose) message("Initializing ff objects.")
 	##mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
08d4b4d4
 	SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double")
 	SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double")
284565dd
 	genderff <- initializeBigVector("gender", length(filenames), "integer")
063b3d14
 	featureData <- getFeatureData(cdfName, copynumber=TRUE, genome=genome)
08d4b4d4
 	nr <- nrow(featureData); nc <- length(sns)
 	A <- initializeBigMatrix("crlmmA-", nr, length(filenames), "integer")
 	B <- initializeBigMatrix("crlmmB-", nr, length(filenames), "integer")
b59e8327
 	call <- initializeBigMatrix("call-", nr, length(filenames), "integer")
 	callPr <- initializeBigMatrix("callPr-", nr, length(filenames), "integer")
e31c7794
 	mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
08d4b4d4
 	rownames(A) <- rownames(B) <- featureNames(featureData)
b59e8327
 	rownames(call) <- rownames(callPr) <- featureNames(featureData)
e31c7794
 	dirNames <- dirname(filenames)
 	unames <- unique(dirNames)
 	dirNames <- factor(dirNames, levels=unames)
 	dd <- split(seq_len(length(filenames)), dirNames)
 	datadir <- list(dirname=names(dd), n=as.integer(sapply(dd, length)))
b59e8327
 	if(verbose) message("Instantiating CNSet container")
caaed0d6
 	cnSet <- new("CNSet",
 		     alleleA=A,
 		     alleleB=B,
 		     call=call,
b59e8327
 		     callProbability=callPr,
caaed0d6
 		     featureData=featureData,
 		     annotation=cdfName,
063b3d14
 		     batch=as.character(batch),
e31c7794
 		     genome=genome,
 		     datadir=datadir)
 	cnSet@mixtureParams <- mixtureParams
063b3d14
 	sampleNames(cnSet) <- sns
453e688a
 	protocolData <- getProtocolData.Affy(filenames)
e31c7794
 	protocolData$filename <- basename(filenames)
caaed0d6
 	rownames(pData(protocolData)) <- sns
 	protocolData(cnSet) <- protocolData
284565dd
 	cnSet$SKW <- SKW
 	cnSet$SNR <- SNR
 	cnSet$gender <- genderff
caaed0d6
 	stopifnot(validObject(cnSet))
b59e8327
 	return(cnSet)
 }
 
e31c7794
 snprmaAffy <- function(cnSet,
b59e8327
 		       mixtureSampleSize=10^5,
 		       eps=0.1,
 		       seed=1,
 		       verbose=TRUE){
e31c7794
 	filenames <- celfileName(cnSet)
b59e8327
 	if(verbose) message("Preprocessing ", length(filenames), " files.")
 	pkgname <- getCrlmmAnnotationName(annotation(cnSet))
 	stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
e31c7794
 	sampleBatches <- splitIndicesByNode(seq_along(filenames))
 	mixtureParams <- cnSet@mixtureParams
 	obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
 	obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
 	obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
 	autosomeIndex <- getVarInEnv("autosomeIndex")
 	pnsa <- getVarInEnv("pnsa")
 	pnsb <- getVarInEnv("pnsb")
 	fid <- getVarInEnv("fid")
 	reference <- getVarInEnv("reference")
 	aIndex <- getVarInEnv("aIndex")
 	bIndex <- getVarInEnv("bIndex")
 	SMEDIAN <- getVarInEnv("SMEDIAN")
 	theKnots <- getVarInEnv("theKnots")
 	gns <- getVarInEnv("gns")
 	rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv)
 	rm(obj1, obj2, obj3)
 	## for mixture
 	set.seed(seed)
 	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
 	##for skewness. no need to do everything
 	idx2 <- sample(length(fid), 10^5)
 	A <- A(cnSet)
 	B <- B(cnSet)
 	SKW <- cnSet$SKW; SNR <- cnSet$SNR
 	open(A)
 	open(B)
 	open(SKW)
 	open(mixtureParams)
 	open(SNR)
 	## RS ADDED
 	index <- match(gns, rownames(A))
 	rsprocessCEL <- function(i){
 		for (k in i){
 			y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
 			x <- log2(y[idx2])
 			SKW[k] <- mean((x-mean(x))^3)/(sd(x)^3)
 			rm(x)
 			y <- normalize.quantiles.use.target(y, target=reference)
 			## RS: add index for row assignment
2bc29627
 			ya <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
 			yb <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
 			A[index, k] <- ya
 			B[index, k] <- yb
e31c7794
 			rm(y)
 			S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
 			M <- log2(A[idx, k])-log2(B[idx, k])
 			tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
 			rm(S, M)
 			mixtureParams[, k] <- tmp[["coef"]]
 			SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
 			rm(tmp)
 		}
 		TRUE
 	}
 	ocLapply(sampleBatches, rsprocessCEL, neededPkgs="crlmm")
 	close(A)
 	close(B)
 	close(SKW)
 	close(mixtureParams)
 	close(SNR)
 	return()
b59e8327
 }
e95fbc14
 
b59e8327
 genotype <- function(filenames,
 		     cdfName,
 		     batch,
 		     mixtureSampleSize=10^5,
 		     eps=0.1,
 		     verbose=TRUE,
 		     seed=1,
 		     sns,
 		     probs=rep(1/3, 3),
 		     DF=6,
 		     SNRMin=5,
 		     recallMin=10,
 		     recallRegMin=1000,
 		     gender=NULL,
 		     returnParams=TRUE,
063b3d14
 		     badSNP=0.7,
 		     genome=c("hg19", "hg18")){
 	if(!isPackageLoaded("ff")) stop("ff package must be loaded")
 	if (missing(sns)) sns <- basename(filenames)
 	if (any(duplicated(sns))) stop("sample identifiers must be unique")
 	genome <- match.arg(genome)
e31c7794
 	cnSet <- constructAffyCNSet(filenames=filenames,
 				    sns=sns,
 				    cdfName=cdfName,
 				    batch=batch, verbose=verbose,
 				    genome=genome)
7818b532
 	if(!(is(A(cnSet), "ff") || is(A(cnSet), "ffdf"))){
 		stop("The ff package is required for this function.")
 	}
a3b625d4
 	ok <- snprmaAffy(cnSet,
 			 mixtureSampleSize=mixtureSampleSize,
 			 eps=eps,
 			 seed=seed,
 			 verbose=verbose)
e31c7794
 	ok <- cnrmaAffy(cnSet=cnSet,
3be95c2b
 			seed=seed,
b59e8327
 			verbose=verbose)
 	stopifnot(ok)
e95fbc14
 	ok <- genotypeAffy(cnSet=cnSet,
 			   SNRMin=SNRMin,
 			   recallMin=recallMin,
b59e8327
 			   recallRegMin=recallRegMin,
 			   gender=gender,
 			   badSNP=badSNP,
 			   returnParams=returnParams,
 			   verbose=verbose)
 	return(cnSet)
 }
 
e31c7794
 genotypeAffy <- function(cnSet, SNRMin=5, recallMin=10,
b59e8327
 			 recallRegMin=1000,
 			 gender=NULL, badSNP=0.7, returnParams=TRUE,
 			 verbose=TRUE){
2bc29627
 	## The passed arguments A and B currently contain the intensities
 	## (not calls and call probabilities)
 	tmp <- crlmmGT2(A=A(cnSet),
 			B=B(cnSet),
b59e8327
 			SNR=cnSet$SNR,
e31c7794
 			mixtureParams=cnSet@mixtureParams,
b59e8327
 			cdfName=annotation(cnSet),
284565dd
 			row.names=NULL,
 			col.names=sampleNames(cnSet),
 			SNRMin=SNRMin,
 			recallMin=recallMin,
 			recallRegMin=recallRegMin,
 			gender=gender,
 			verbose=verbose,
 			returnParams=returnParams,
2bc29627
 			badSNP=badSNP,
 			callsGt=calls(cnSet),
 			callsPr=snpCallProbability(cnSet))
3be95c2b
 	cnSet$gender[] <- tmp$gender
2bc29627
 	if(verbose) message("Genotyping finished.")
b59e8327
 	return(TRUE)
 }
 
e31c7794
 cnrmaAffy <- function(cnSet, seed=1, verbose=TRUE){
 	filenames <- celfileName(cnSet)
 	np.index <- which(!isSnp(cnSet))
 	A <- A(cnSet)
 	cdfName <- annotation(cnSet)
b59e8327
 	if(verbose) message("Quantile normalizing nonpolymorphic markers")
e31c7794
 	##if(missing(cdfName)) stop("must specify cdfName")
 	pkgname <- getCrlmmAnnotationName(cdfName)
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
 	sampleBatches <- splitIndicesByNode(seq_along(filenames))
 	if(verbose) message("Processing nonpolymorphic probes for ", length(filenames), " files.")
 	if(pkgname=="genomewidesnp6Crlmm"){
 		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
 	}
 	if(pkgname=="genomewidesnp5Crlmm"){
 		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
 	}
08d4b4d4
 	reference <- getVarInEnv("reference")
e31c7794
         loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
 	fid <- getVarInEnv("npProbesFid")
 	row.names <- featureNames(cnSet)[np.index]
 	row.names <- row.names[row.names %in% names(fid)] ##removes chromosome Y
 	fid <- fid[match(row.names, names(fid))]
 	np.index <- match(row.names, rownames(A))
 	gns <- names(fid)
08d4b4d4
 	set.seed(seed)
e31c7794
 	## updates A
08d4b4d4
 	open(A)
e31c7794
 	processCEL2 <- function(i){
 		for (k in i){
 			y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
 			A[np.index, k] <- as.integer(normalize.quantiles.use.target(y, target=reference))
08d4b4d4
 		}
e31c7794
 		return(TRUE)
08d4b4d4
 	}
e31c7794
 	ocLapply(sampleBatches, processCEL2, neededPkgs="crlmm")
08d4b4d4
 	close(A)
 	TRUE
 }
 
e31c7794
 
 
17158566
 genotype2 <- function(){
 	.Defunct(msg="The genotype2 function has been deprecated. The function genotype should be used instead.  genotype will support large data using ff provided that the ff package is loaded.")
 }
 genotypeLD <- genotype2
a097b918
 
50b0d125
 rowCovs <- function(x, y, ...){
 	notna <- !is.na(x)
 	N <- rowSums(notna)
 	x <- suppressWarnings(log2(x))
 	if(!missing(y)) y <- suppressWarnings(log2(y))
 	return(rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1))
 }
 
 rowMAD <- function(x, y, ...){
18053f2d
 	##notna <- !is.na(x)
50b0d125
 	mad <- 1.4826*rowMedians(abs(x-rowMedians(x, ...)), ...)
 	return(mad)
 }
 
d5bc779e
 shrink <- function(x, Ns, DF.PRIOR){
 	DF <- Ns-1
 	DF[DF < 1] <- 1
 	x.0 <- apply(x, 2, median, na.rm=TRUE)
 	x <- (x*DF + x.0*DF.PRIOR)/(DF.PRIOR + DF)
 	for(j in 1:ncol(x)) x[is.na(x[, j]), j] <- x.0[j]
 	return(x)
 }
 
50b0d125
 rowCors <- function(x, y, ...){
 	N <- rowSums(!is.na(x))
 	x <- suppressWarnings(log2(x))
 	y <- suppressWarnings(log2(y))
 	sd.x <- rowSds(x, ...)
 	sd.y <- rowSds(y, ...)
 	covar <- rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1)
 	return(covar/(sd.x*sd.y))
 }
 
a3b625d4
 ## dqrlsWrapper <- function(x, y, wts, tol=1e-7){
 ## 	n <- NROW(y)
 ## 	p <- ncol(x)
 ## 	ny <- NCOL(y)
 ## 	.Fortran("dqrls", qr=x*wts, n=n, p=p, y=y * wts, ny=ny,
 ## 		 tol=as.double(tol), coefficients=mat.or.vec(p, ny),
 ## 		 residuals=y, effects=mat.or.vec(n, ny),
 ## 		 rank=integer(1L), pivot=1L:p, qraux=double(p),
 ## 		 work=double(2 * p), PACKAGE="base")[["coefficients"]]
 ## }
 
50b0d125
 
a3b625d4
 dqrlsWrapper <- function(x, y, wts, ...)
     fastLmPure(X=x*wts, y=y*wts, method=3)[['coefficients']]
6846d583
 
 fit.wls <- function(NN, sigma, allele, Y, autosome, X){
 	Np <- NN
 	Np[Np < 1] <- 1
 	W <- (sigma/sqrt(Np))^-1
 	Ystar <- Y*W
3c6c270e
 	complete <- which(rowSums(is.na(W)) == 0 & rowSums(is.na(Ystar)) == 0)
d5bc779e
 	if(missing(allele)) stop("must specify allele")
6846d583
 	if(autosome & missing(X)){
5a23c457
 		if(allele == "A") X <- cbind(1, 2:0)
 		if(allele == "B") X <- cbind(1, 0:2)
6846d583
 	}
 	if(!autosome & missing(X)){
5a23c457
 		if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
 		if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
 	}
6846d583
 	betahat <- matrix(NA, ncol(X), nrow(Ystar))
5a23c457
 	ww <- rep(1, ncol(Ystar))
3c6c270e
 	for(i in complete){
5a23c457
 		betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww)
dded663e
 		##ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
d5bc779e
 	}
dded663e
 	return(betahat)
d5bc779e
 }
 
6846d583
 shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAMPLES, DF.PRIOR,
453e688a
 				    verbose, is.lds=TRUE){
 	##if(is.lds) {physical <- get("physical"); open(object)}
 	open(object)
61dc01ae
 	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
6846d583
 	marker.index <- index.list[[strata]]
 	batches <- split(seq_along(batch(object)), as.character(batch(object)))
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
aa4a7e0b
 	batch.names <- names(batches)
453e688a
 	batch.index <- which(batchNames(object) %in% batch.names)
 	N.AA <- as.matrix(N.AA(object)[marker.index, batch.index])
 	N.AB <- as.matrix(N.AB(object)[marker.index, batch.index])
 	N.BB <- as.matrix(N.BB(object)[marker.index, batch.index])
 	medianA.AA <- as.matrix(medianA.AA(object)[marker.index, batch.index])
 	medianA.AB <- as.matrix(medianA.AB(object)[marker.index, batch.index])
 	medianA.BB <- as.matrix(medianA.BB(object)[marker.index, batch.index])
 	medianB.AA <- as.matrix(medianB.AA(object)[marker.index, batch.index])
 	medianB.AB <- as.matrix(medianB.AB(object)[marker.index, batch.index])
 	medianB.BB <- as.matrix(medianB.BB(object)[marker.index, batch.index])
 	madA.AA <- as.matrix(madA.AA(object)[marker.index, batch.index])
 	madA.AB <- as.matrix(madA.AB(object)[marker.index, batch.index])
 	madA.BB <- as.matrix(madA.BB(object)[marker.index, batch.index])
 	madB.AA <- as.matrix(madB.AA(object)[marker.index, batch.index])
 	madB.AB <- as.matrix(madB.AB(object)[marker.index, batch.index])
 	madB.BB <- as.matrix(madB.BB(object)[marker.index, batch.index])
 	medianA <- medianB <- shrink.madB <- shrink.madA <- vector("list", length(batch.names))
 	shrink.tau2A.AA <- tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, batch.index])
 	shrink.tau2B.BB <- tau2B.BB <- as.matrix(tau2B.BB(object)[marker.index, batch.index])
 	shrink.tau2A.BB <- tau2A.BB <- as.matrix(tau2A.BB(object)[marker.index, batch.index])
 	shrink.tau2B.AA <- tau2B.AA <- as.matrix(tau2B.AA(object)[marker.index, batch.index])
 	shrink.corrAA <- corrAA <- as.matrix(corrAA(object)[marker.index, batch.index])
 	shrink.corrAB <- corrAB <- as.matrix(corrAB(object)[marker.index, batch.index])
 	shrink.corrBB <- corrBB <- as.matrix(corrBB(object)[marker.index, batch.index])
 	flags <- as.matrix(flags(object)[marker.index, batch.index])
6846d583
 	for(k in seq(along=batches)){
cbbfdda9
 		sample.index <- batches[[k]]
 		this.batch <- unique(as.character(batch(object)[sample.index]))
6846d583
 		medianA[[k]] <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
 		medianB[[k]] <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
 		madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
 		madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
 		NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
 		##RS: estimate DF.PRIOR
 		shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR)
 		shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR)
 		## an estimate of the background variance is the MAD
 		## of the log2(allele A) intensities among subjects with
 		## genotypes BB
 		shrink.tau2A.BB[, k] <- shrink(tau2A.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE]
 		shrink.tau2B.AA[, k] <- shrink(tau2B.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE]
 		## an estimate of the signal variance is the MAD
 		## of the log2(allele A) intensities among subjects with
 		## genotypes AA
 		shrink.tau2A.AA[, k] <- shrink(tau2A.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE]
 		shrink.tau2B.BB[, k] <- shrink(tau2B.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE]
 		cor.AA <- corrAA[, k, drop=FALSE]
 		cor.AB <- corrAB[, k, drop=FALSE]
 		cor.BB <- corrBB[, k, drop=FALSE]
 		shrink.corrAA[, k] <- shrink(cor.AA, NN[, 1], DF.PRIOR)
 		shrink.corrAB[, k] <- shrink(cor.AB, NN[, 2], DF.PRIOR)
 		shrink.corrBB[, k] <- shrink(cor.BB, NN[, 3], DF.PRIOR)
66900fea
 		##
6846d583
 		##---------------------------------------------------------------------------
 		## SNPs that we'll use for imputing location/scale of unobserved genotypes
 		##---------------------------------------------------------------------------
 		index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS)
66900fea
 		##
6846d583
 		##---------------------------------------------------------------------------
 		## Impute sufficient statistics for unobserved genotypes (plate-specific)
 		##---------------------------------------------------------------------------
 		unobservedAA <- NN[, 1] < MIN.OBS
 		unobservedAB <- NN[, 2] < MIN.OBS
 		unobservedBB <- NN[, 3] < MIN.OBS
 		unobserved.index <- vector("list", 3)
1813938e
 		## AB, BB observed
6846d583
 		unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS))
1813938e
 		## AA and BB observed (strange)
6846d583
 		unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS))
1813938e
 		## AB and AA observed
6846d583
 		unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS))
 		res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index)
 		medianA[[k]] <- res[[1]]
 		medianB[[k]] <- res[[2]]
 		rm(res)
 		##the NA's in 'medianA' and 'medianB' are monomorphic if MIN.OBS = 1
66900fea
 		##
6846d583
 		## RS: For Monomorphic SNPs a mixture model may be better
 		## RS: Further, we can improve estimation by borrowing strength across batch
 		unobserved.index[[1]] <- which(unobservedAA & unobservedAB)
 		unobserved.index[[2]] <- which(unobservedBB & unobservedAB)
 		unobserved.index[[3]] <- which(unobservedAA & unobservedBB) ## strange
 		res <- imputeCentersForMonomorphicSnps(medianA[[k]], medianB[[k]],
 						       index.complete,
 						       unobserved.index)
 		medianA[[k]] <- res[[1]]; medianB[[k]] <- res[[2]]
 		rm(res)
 		negA <- rowSums(medianA[[k]] < 0) > 0
 		negB <- rowSums(medianB[[k]] < 0) > 0
 		flags[, k] <- as.integer(rowSums(NN == 0) > 0 | negA | negB)
 	}
453e688a
 	flags(object)[marker.index, batch.index] <- flags
 	medianA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 1]))
 	medianA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 2]))
 	medianA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 3]))
 	medianB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 1]))
 	medianB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 2]))
 	medianB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 3]))
66900fea
 	##
453e688a
 	madA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 1]))
 	madA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 2]))
 	madA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 3]))
 	madB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 1]))
 	madB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 2]))
 	madB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 3]))
66900fea
 	##
453e688a
 	corrAA(object)[marker.index, batch.index] <- shrink.corrAA
 	corrAB(object)[marker.index, batch.index] <- shrink.corrAB
 	corrBB(object)[marker.index, batch.index] <- shrink.corrBB
 	tau2A.AA(object)[marker.index, batch.index] <- shrink.tau2A.AA
 	tau2A.BB(object)[marker.index, batch.index] <- shrink.tau2A.BB
 	tau2B.AA(object)[marker.index, batch.index] <- shrink.tau2B.AA
 	tau2B.BB(object)[marker.index, batch.index] <- shrink.tau2B.BB
66900fea
 	if(is.lds) return(TRUE) else return(object)
6846d583
 }
 
 
 fit.lm1 <- function(strata,
d08acba1
 		    index.list,
d5bc779e
 		    object,
 		    MIN.SAMPLES,
 		    THR.NU.PHI,
 		    MIN.NU,
 		    MIN.PHI,
6846d583
 		    verbose, is.lds,
 		    CHR.X, ...){
f0f8ba16
 	open(object$gender)
6846d583
 	if(is.lds) {physical <- get("physical"); open(object)}
61dc01ae
 	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
6846d583
 	snps <- index.list[[strata]]
 	batches <- split(seq_along(batch(object)), as.character(batch(object)))
ea66649b
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
453e688a
 	##batchnames <- batchNames(object)
 	batch.names <- names(batches)
 	batch.index <- which(batchNames(object) %in% batch.names)
 	if(length(batches) > 1 && "grandMean" %in% batchNames(object))
 		batch.index <- c(batch.index, match("grandMean", batchNames(object)))
 	N.AA <- as.matrix(N.AA(object)[snps, batch.index])
 	N.AB <- as.matrix(N.AB(object)[snps, batch.index])
 	N.BB <- as.matrix(N.BB(object)[snps, batch.index])
 	medianA.AA <- as.matrix(medianA.AA(object)[snps, batch.index])
 	medianA.AB <- as.matrix(medianA.AB(object)[snps, batch.index])
 	medianA.BB <- as.matrix(medianA.BB(object)[snps, batch.index])
 	medianB.AA <- as.matrix(medianB.AA(object)[snps, batch.index])
 	medianB.AB <- as.matrix(medianB.AB(object)[snps, batch.index])
 	medianB.BB <- as.matrix(medianB.BB(object)[snps, batch.index])
 	madA.AA <- as.matrix(madA.AA(object)[snps, batch.index])
 	madA.AB <- as.matrix(madA.AB(object)[snps, batch.index])
 	madA.BB <- as.matrix(madA.BB(object)[snps, batch.index])
 	madB.AA <- as.matrix(madB.AA(object)[snps, batch.index])
 	madB.AB <- as.matrix(madB.AB(object)[snps, batch.index])
 	madB.BB <- as.matrix(madB.BB(object)[snps, batch.index])
 	tau2A.AA <- as.matrix(tau2A.AA(object)[snps, batch.index])
 	tau2B.BB <- as.matrix(tau2B.BB(object)[snps, batch.index])
 	tau2A.BB <- as.matrix(tau2A.BB(object)[snps, batch.index])
 	tau2B.AA <- as.matrix(tau2B.AA(object)[snps, batch.index])
 	corrAA <- as.matrix(corrAA(object)[snps, batch.index])
 	corrAB <- as.matrix(corrAB(object)[snps, batch.index])
 	corrBB <- as.matrix(corrBB(object)[snps, batch.index])
 	nuA <- as.matrix(nuA(object)[snps,  batch.index])
 	phiA <- as.matrix(phiA(object)[snps, batch.index])
 	nuB <- as.matrix(nuB(object)[snps, batch.index])
 	phiB <- as.matrix(phiB(object)[snps, batch.index])
 	flags <- as.matrix(flags(object)[snps, batch.index])
6846d583
 	for(k in seq(along=batches)){
 		B <- batches[[k]]
66900fea
 		if(length(B) < MIN.SAMPLES) next()
6846d583
 		this.batch <- unique(as.character(batch(object)[B]))
 		medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
 		medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
 		madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
 		madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
 		NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
453e688a
 		## regress on the medians using the standard errors (hence the division by N) as weights
 		res <- fit.wls(NN=NN, sigma=madA, allele="A", Y=medianA, autosome=!CHR.X)
 		nuA[, k] <- res[1, ]
 		phiA[, k] <- res[2, ]
 		rm(res)
 		res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns)
 		nuB[, k] <- res[1, ]
 		phiB[, k] <- res[2, ]
 	}
 	##---------------------------------------------------------------------------
 	##
 	## Grand mean
 	##
 	##---------------------------------------------------------------------------
5239f884
 ##	if(length(batches) > 1 && "grandMean" %in% batchNames(object)){
 	##  TODO:  There are NA's in the medianA.AA for the grandMean and 0's in the madA
 	##         - both need to be handled prior to estimating a grand intercept and slope
 	if(FALSE){
453e688a
 		## then the last column is for the grandMean
 		k <- ncol(medianA.AA)
 		medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
 		medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
 		madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
 		madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
 		NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
5239f884
 		index <- which(rowSums(is.na(medianA)) == 0)
 		res <- fit.wls(NN=NN[index, ], sigma=madA[index, ], allele="A", Y=medianA[index, ], autosome=!CHR.X)
6846d583
 		nuA[, k] <- res[1, ]
 		phiA[, k] <- res[2, ]
 		rm(res)
 		res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns)
 		nuB[, k] <- res[1, ]
 		phiB[, k] <- res[2, ]
d5bc779e
 	}
d08acba1
 	if(THR.NU.PHI){
 		nuA[nuA < MIN.NU] <- MIN.NU
 		nuB[nuB < MIN.NU] <- MIN.NU
 		phiA[phiA < MIN.PHI] <- MIN.PHI
 		phiB[phiB < MIN.PHI] <- MIN.PHI
 	}
453e688a
 	nuA(object)[snps, batch.index] <- nuA
 	nuB(object)[snps, batch.index] <- nuB
 	phiA(object)[snps, batch.index] <- phiA
 	phiB(object)[snps, batch.index] <- phiB
6846d583
 	if(is.lds){
 		close(object)
d08acba1
 		return(TRUE)
 	} else{
 		return(object)
 	}
d5bc779e
 }
 
73fd7374
 ## nonpolymorphic markers
6846d583
 fit.lm2 <- function(strata,
d08acba1
 		    index.list,
d5bc779e
 		    object,
 		    MIN.SAMPLES,
 		    THR.NU.PHI,
 		    MIN.NU,
 		    MIN.PHI,
6846d583
 		    verbose, is.lds, CHR.X, ...){
 	if(is.lds) {physical <- get("physical"); open(object)}
f0f8ba16
 	open(object$gender)
61dc01ae
 	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
6846d583
 	marker.index <- index.list[[strata]]
 	batches <- split(seq_along(batch(object)), as.character(batch(object)))
d5bc779e
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
453e688a
 	batch.names <- names(batches)
 	batch.index <- which(batchNames(object) %in% batch.names)
 	##
d5bc779e
 	ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
453e688a
 	flags <- as.matrix(flags(object)[ii, batch.index])
6846d583
 	fns <- featureNames(object)[ii]
 	fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
063b3d14
 	if(length(fns.noflags) == 0) {
 		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
 		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
 	}
a3b625d4
 	index.flag <- match(fns.noflags, featureNames(object))
 	snp.index <- sample(index.flag, min(c(length(index.flag), 5000)))
453e688a
 	##
 	nuA.np <- as.matrix(nuA(object)[marker.index, batch.index])
 	phiA.np <- as.matrix(phiA(object)[marker.index, batch.index])
 	tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, batch.index])
 	##
 	nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index])
 	nuB.snp <- as.matrix(nuB(object)[snp.index, batch.index])
 	phiA.snp <- as.matrix(phiA(object)[snp.index, batch.index])
 	phiB.snp <- as.matrix(phiB(object)[snp.index, batch.index])
 	medianA.AA <- as.matrix(medianA.AA(object)[snp.index, batch.index])
 	medianB.BB <- as.matrix(medianB.BB(object)[snp.index, batch.index])
 	medianA.AA.np <- as.matrix(medianA.AA(object)[marker.index, batch.index])
6846d583
 	for(k in seq_along(batches)){
 		B <- batches[[k]]
 		this.batch <- unique(as.character(batch(object)[B]))
 		X <- cbind(1, log2(c(medianA.AA[, k], medianB.BB[, k])))
3e4db4b1
 		Y <- log2(c(phiA.snp[, k], phiB.snp[, k]))
453e688a
 		not.missing <- rowSums(is.na(X)) == 0 & !is.na(Y)
 		X <- X[not.missing, ]
 		Y <- Y[not.missing]
d5bc779e
 		betahat <- solve(crossprod(X), crossprod(X, Y))
73fd7374
 		##crosshyb <- max(median(medianA.AA[, k]) - median(medianA.AA.np[, k]), 0)
 		crosshyb <- 0
6846d583
 		X <- cbind(1, log2(medianA.AA.np[, k] + crosshyb))
d5bc779e
 		logPhiT <- X %*% betahat
6846d583
 		phiA.np[, k] <- 2^(logPhiT)
 		nuA.np[, k] <- medianA.AA.np[,k]-2*phiA.np[, k]
29f437a0
 ##		cA[, k] <- 1/phiA.np[, J] * (A.np - nuA.np[, J])
6846d583
 	}
 	if(THR.NU.PHI){
 		nuA.np[nuA.np < MIN.NU] <- MIN.NU
 		phiA.np[phiA.np < MIN.PHI] <- MIN.PHI
d5bc779e
 	}
453e688a
 	nuA(object)[marker.index, batch.index] <- nuA.np
 	phiA(object)[marker.index, batch.index] <- phiA.np
6846d583
 	if(is.lds) { close(object); return(TRUE)}
 	return(object)
d5bc779e
 }
 
6846d583
 summarizeMaleXNps <- function(marker.index,
 			      batches,
 			      object, MIN.SAMPLES){
 	nr <- length(marker.index)
 	nc <- length(batchNames(object))
 	NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
f0f8ba16
 	open(object$gender)
 	gender <- object$gender[]
 	open(A(object))
6846d583
 	AA <- as.matrix(A(object)[marker.index, gender==1])
 	madA.AA <- medianA.AA <- matrix(NA, nr, nc)
453e688a
 	colnames(madA.AA) <- colnames(medianA.AA) <- batchNames(object)
6846d583
 	numberMenPerBatch <- rep(NA, nc)
 	for(k in seq_along(batches)){
 		B <- batches[[k]]
 		this.batch <- unique(as.character(batch(object)[B]))
 		gender <- object$gender[B]
 		if(sum(gender==1) < MIN.SAMPLES) next()
 		sns.batch <- sampleNames(object)[B]
 		##subset GG apppriately
 		sns <- colnames(AA)
 		J <- sns%in%sns.batch
 		numberMenPerBatch[k] <- length(J)
453e688a
 		medianA.AA[, k] <- rowMedians(AA[, J, drop=FALSE], na.rm=TRUE)
 		madA.AA[, k] <- rowMAD(AA[, J, drop=FALSE], na.rm=TRUE)
6846d583
 	}
 	return(list(medianA.AA=medianA.AA,
 		    madA.AA=madA.AA))
 }
 
 
f7d82191
 summarizeXGenotypes <- function(marker.index,
453e688a
 				batches,
 				object,
 				GT.CONF.THR,
 				MIN.OBS,
 				MIN.SAMPLES,
 				verbose,
 				is.lds,
 				DF.PRIOR,
 				gender="male", ...){
 	I <- unlist(batches)
f0f8ba16
 	open(object$gender)
f7d82191
 	if(gender == "male"){
f0f8ba16
 		gender.index <- which(object$gender[] == 1)
453e688a
 	} else {
f0f8ba16
 		gender.index <- which(object$gender[] == 2)
453e688a
 	}
 	batches <- lapply(batches, function(x, gender.index) intersect(x, gender.index), gender.index)
 	batch.names <- names(batches)
 	batch.index <- which(batchNames(object) %in% batch.names)
6846d583
 	nr <- length(marker.index)
453e688a
 	nc <- length(batch.index)
1813938e
 	NN.Mlist <- medianA <- medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
453e688a
 	names(NN.Mlist) <- names(medianA) <- names(medianB) <- names(shrink.madA) <- names(shrink.madB) <- batch.names
f0f8ba16
 	open(calls(object))
 	open(snpCallProbability(object))
 	open(A(object))
 	open(B(object))
453e688a
 	GG <- as.matrix(calls(object)[marker.index, ])
 	CP <- as.matrix(snpCallProbability(object)[marker.index, ])
 	AA <- as.matrix(A(object)[marker.index, ])
 	BB <- as.matrix(B(object)[marker.index, ])
6846d583
 	for(k in seq_along(batches)){
f7d82191
 		sample.index <- batches[[k]]
453e688a
 		if(length(sample.index) == 0) next()
f7d82191
 		this.batch <- unique(as.character(batch(object)[sample.index]))
 		##gender <- object$gender[sample.index]
 		if(length(sample.index) < MIN.SAMPLES) next()
 		sns.batch <- sampleNames(object)[sample.index]
6846d583
 		##subset GG apppriately
 		sns <- colnames(GG)
 		J <- sns%in%sns.batch
453e688a
 		G <- GG[, J, drop=FALSE]
 		xx <- CP[, J, drop=FALSE]
 		p <- i2p(xx)
6846d583
 		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
 		G <- G*highConf
453e688a
 		A <- AA[, J, drop=FALSE]
 		B <- BB[, J, drop=FALSE]
6846d583
 		G.AA <- G==1
 		G.AA[G.AA==FALSE] <- NA
 		G.AB <- G==2
 		G.AB[G.AB==FALSE] <- NA
 		G.BB <- G==3
 		G.BB[G.BB==FALSE] <- NA
f7d82191
 		N.AA <- rowSums(G.AA, na.rm=TRUE)
 		if(gender == "female")
 			N.AB <- rowSums(G.AB, na.rm=TRUE)
 		N.BB <- rowSums(G.BB, na.rm=TRUE)
6846d583
 		summaryStats <- function(X, INT, FUNS){
 			tmp <- matrix(NA, nrow(X), length(FUNS))
 			for(j in seq_along(FUNS)){
 				FUN <- match.fun(FUNS[j])
 				tmp[, j] <- FUN(X*INT, na.rm=TRUE)
 			}
 			tmp
 		}
 		statsA.AA <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
f7d82191
 		if(gender == "female")
 			statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
6846d583
 		statsA.BB <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
 		statsB.AA <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
f7d82191
 		if(gender == "female")
 			statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
6846d583
 		statsB.BB <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
f7d82191
 		if(gender=="male"){
 			medianA[[k]] <- cbind(statsA.AA[, 1], ##statsA.AB[, 1],
 					      statsA.BB[, 1])
 			medianB[[k]] <- cbind(statsB.AA[, 1], ##statsB.AB[, 1],
 					      statsB.BB[, 1])
 			madA <- cbind(statsA.AA[, 2],  ##statsA.AB[, 2],
 				      statsA.BB[, 2])
 			madB <- cbind(statsB.AA[, 2], ##statsB.AB[, 2],
 				      statsB.BB[, 2])
 			NN <- cbind(N.AA, N.BB)
7b3d98f7
 			rm(statsA.AA, statsA.BB, statsB.AA, statsB.BB)
f7d82191
 		} else {
 			medianA[[k]] <- cbind(statsA.AA[, 1], statsA.AB[, 1],
 					      statsA.BB[, 1])
 			medianB[[k]] <- cbind(statsB.AA[, 1], statsB.AB[, 1],
 					      statsB.BB[, 1])
 			madA <- cbind(statsA.AA[, 2],  statsA.AB[, 2],
 				      statsA.BB[, 2])
 			madB <- cbind(statsB.AA[, 2], statsB.AB[, 2],
 				      statsB.BB[, 2])
 			NN <- cbind(N.AA, N.AB, N.BB)
 			rm(statsA.AA, statsA.AB, statsA.BB, statsB.AA, statsB.AB, statsB.BB)
 		}
 		NN.Mlist[[k]] <- NN
 		shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR)
 		shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR)
6846d583
 		##---------------------------------------------------------------------------
 		## SNPs that we'll use for imputing location/scale of unobserved genotypes
 		##---------------------------------------------------------------------------
f7d82191
 		index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS)
6846d583
 		##---------------------------------------------------------------------------
 		## Impute sufficient statistics for unobserved genotypes (plate-specific)
 		##---------------------------------------------------------------------------
f7d82191
 		if(gender=="male"){
7b3d98f7
 			res <- imputeCenterX(medianA[[k]], medianB[[k]], NN, index.complete, MIN.OBS)
f7d82191
 		} else {
 			unobservedAA <- NN[, 1] < MIN.OBS
 			unobservedAB <- NN[, 2] < MIN.OBS
 			unobservedBB <- NN[, 3] < MIN.OBS
 			unobserved.index <- vector("list", 3)
 			## AB, BB observed
 			unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS))
 			## AA and BB observed (strange)
 			unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS))
 			## AB and AA observed
 			unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS))
 			res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index)
453e688a
 			medianA[[k]] <- res[[1]]
 			medianB[[k]] <- res[[2]]
 
 			## RS: For Monomorphic SNPs a mixture model may be better
 			## RS: Further, we can improve estimation by borrowing strength across batch
 			unobserved.index[[1]] <- which(unobservedAA & unobservedAB)
 			unobserved.index[[2]] <- which(unobservedBB & unobservedAB)
 			unobserved.index[[3]] <- which(unobservedAA & unobservedBB) ## strange
 			res <- imputeCentersForMonomorphicSnps(medianA[[k]], medianB[[k]],
 							       index.complete,
 							       unobserved.index)
 			medianA[[k]] <- res[[1]]; medianB[[k]] <- res[[2]]
f7d82191
 		}
1813938e
 		medianA[[k]] <- res[[1]]
 		medianB[[k]] <- res[[2]]
6846d583
 	}
f0f8ba16
 	close(calls(object))
 	close(snpCallProbability(object))
 	close(A(object))
 	close(B(object))
6846d583
 	return(list(madA=shrink.madA,
 		    madB=shrink.madB,
f7d82191
 		    NN=NN.Mlist,
1813938e
 		    medianA=medianA,
 		    medianB=medianB))
6846d583
 }
 
 ## X chromosome, SNPs
 fit.lm3 <- function(strata,
d08acba1
 		    index.list,
d5bc779e
 		    object,
 		    SNRMin,
 		    MIN.SAMPLES,
 		    MIN.OBS,
 		    DF.PRIOR,
 		    GT.CONF.THR,
 		    THR.NU.PHI,
 		    MIN.NU,
 		    MIN.PHI,
6846d583
 		    verbose, is.lds, CHR.X, ...){
453e688a
 	##if(is.lds) {physical <- get("physical"); open(object)}
61dc01ae
 	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
f0f8ba16
 	open(object$gender)
 	gender <- object$gender[]
07d00a55
 	enough.males <- sum(gender==1) >= MIN.SAMPLES
 	enough.females <- sum(gender==2) >= MIN.SAMPLES
6846d583
 	if(!enough.males & !enough.females){
 		message(paste("fewer than", MIN.SAMPLES, "men and women.  Copy number not estimated for CHR X"))
 		return(object)
 	}
 	marker.index <- index.list[[strata]]
 	batches <- split(seq_along(batch(object)), as.character(batch(object)))
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
453e688a
 	batch.names <- names(batches)
 	batch.index <- which(batchNames(object) %in% batch.names)
 
 	nuA <- as.matrix(nuA(object)[marker.index, batch.index])
 	nuB <- as.matrix(nuB(object)[marker.index, batch.index])
 	phiA <- as.matrix(phiA(object)[marker.index, batch.index])
 	phiB <- as.matrix(phiB(object)[marker.index, batch.index])
 	phiA2 <- as.matrix(phiPrimeA(object)[marker.index, batch.index])
 	phiB2 <- as.matrix(phiPrimeB(object)[marker.index, batch.index])
6846d583
 	if(enough.males){
f7d82191
 		res <- summarizeXGenotypes(marker.index=marker.index,
453e688a
 					   batches=batches,
 					   object=object,
 					   GT.CONF.THR=GT.CONF.THR,
 					   MIN.SAMPLES=MIN.SAMPLES,
 					   MIN.OBS=MIN.OBS,
 					   verbose=verbose,
 					   is.lds=is.lds,
 					   DF.PRIOR=DF.PRIOR/2,
f7d82191
 					   gender="male")
6846d583
 		madA.Mlist <- res[["madA"]]
 		madB.Mlist <- res[["madB"]]
 		medianA.Mlist <- res[["medianA"]]
 		medianB.Mlist <- res[["medianB"]]
f2e18f28
 		NN.Mlist <- res[["NN"]]
6846d583
 		rm(res)
 		## Need N, median, mad
 	}
 	if(enough.females){
f7d82191
 		res <- summarizeXGenotypes(marker.index=marker.index,
 						 batches=batches,
 						 object=object,
 						 GT.CONF.THR=GT.CONF.THR,
 						 MIN.SAMPLES=MIN.SAMPLES,
 						 MIN.OBS=MIN.OBS,
 						 verbose=verbose,
 						 is.lds=is.lds,
 						 DF.PRIOR=DF.PRIOR/2,
 					   gender="female")
f2e18f28
 		madA.Flist <- res[["madA"]]
 		madB.Flist <- res[["madB"]]
 		medianA.Flist <- res[["medianA"]]
 		medianB.Flist <- res[["medianB"]]
 		NN.Flist <- res[["NN"]]
 		rm(res)
6846d583
 	}
 	for(k in seq_along(batches)){
f2e18f28
 		sample.index <- batches[[k]]
453e688a
 		if(is.null(sample.index)) next()
f2e18f28
 		this.batch <- unique(as.character(batch(object)[sample.index]))
 		gender <- object$gender[sample.index]
6846d583
 		enough.men <- sum(gender==1) >= MIN.SAMPLES
 		enough.women <- sum(gender==2) >= MIN.SAMPLES
 		if(!enough.men & !enough.women) {
 			if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. "))
 			next()
d5bc779e
 		}
6846d583
 		if(enough.women){
f2e18f28
 			madA.F <- madA.Flist[[k]]
 			madB.F <- madB.Flist[[k]]
 			medianA.F <- medianA.Flist[[k]]
 			medianB.F <- medianB.Flist[[k]]
 			NN.F <- NN.Flist[[k]]
6846d583
 		}
 		if(enough.men){
 			madA.M <- madA.Mlist[[k]]
 			madB.M <- madB.Mlist[[k]]
 			medianA.M <- medianA.Mlist[[k]]
 			medianB.M <- medianB.Mlist[[k]]
 			NN.M <- NN.Mlist[[k]]
 		}
 		if(enough.men & enough.women){
453e688a
 			if(any(madA.F == 0)){
 				message("Zeros in median absolute deviation.  Not estimating CN for chrX for batch ", names(batches)[k])
 				next()
 			}
f2e18f28
 			betas <- fit.wls(cbind(NN.M, NN.F),
 					 sigma=cbind(madA.M, madA.F),
6846d583
 					 allele="A",
f2e18f28
 					 Y=cbind(medianA.M, medianA.F),
6846d583
 					 autosome=FALSE)
 			nuA[, k] <- betas[1, ]
 			phiA[, k] <- betas[2, ]
 			phiA2[, k] <- betas[3, ]
f2e18f28
 			betas <- fit.wls(cbind(NN.M, NN.F),
 					 sigma=cbind(madB.M, madB.F),
6846d583
 					 allele="B",
f2e18f28
 					 Y=cbind(medianB.M, medianB.F),
6846d583
 					 autosome=FALSE)
 			nuB[, k] <- betas[1, ]
 			phiB[, k] <- betas[2, ]
 			phiB2[, k] <- betas[3, ]
d5bc779e
 		}
6846d583
 		if(enough.men & !enough.women){
1813938e
 			betas <- fit.wls(NN.M,##[, c(1,3)],
 					 sigma=madA.M,##[, c(1,3)],
6846d583
 					 allele="A",
1813938e
 					 Y=medianA.M,##[, c(1,3)],
6846d583
 					 autosome=FALSE,
1813938e
 					 X=cbind(1, c(1, 0)))
6846d583
 			nuA[, k] <- betas[1, ]
 			phiA[, k] <- betas[2, ]
1813938e
 			betas <- fit.wls(NN.M,##[, c(1,3)],
 					 sigma=madB.M,#[, c(1,3)],
6846d583
 					 allele="B",
1813938e
 					 Y=medianB.M,#[, c(1,3)],
6846d583
 					 autosome=FALSE,
 					 X=cbind(1, c(0, 1)))
 			nuB[, k] <- betas[1, ]
 			phiB[, k] <- betas[2, ]
 		}
 		if(!enough.men & enough.women){
453e688a
 			if(any(madA.F == 0)){
 				message("Zeros in median absolute deviation.  Not estimating CN for chrX for batch ", names(batches)[k])
 				next()
 			}
6846d583
 			betas <- fit.wls(NN.F,
 					 sigma=madA.F,
 					 allele="A",
 					 Y=medianA.F,
 					 autosome=TRUE) ## can just use the usual design matrix for the women-only analysis
 			nuA[, k] <- betas[1, ]
 			phiA[, k] <- betas[2, ]
 			betas <- fit.wls(NN.F,
 					 sigma=madB.F,
 					 allele="B",
 					 Y=medianB.F,
 					 autosome=TRUE) ## can just use the usual design matrix for the women-only analysis
 			nuB[, k] <- betas[1, ]
 			phiB[, k] <- betas[2, ]
d5bc779e
 		}
 	}
6846d583
 	if(THR.NU.PHI){
 		nuA[nuA < MIN.NU] <- MIN.NU
 		nuB[nuB < MIN.NU] <- MIN.NU
 		phiA[phiA < MIN.PHI] <- MIN.PHI
f3d40144
 		phiA2[phiA2 < 1] <- 1
6846d583
 		phiB[phiB < MIN.PHI] <- MIN.PHI
f3d40144
 		phiB2[phiB2 < 1] <- 1
6846d583
 	}
453e688a
 	nuA(object)[marker.index, batch.index] <- nuA
 	nuB(object)[marker.index, batch.index] <- nuB
 	phiA(object)[marker.index, batch.index] <- phiA
 	phiB(object)[marker.index, batch.index] <- phiB
 	phiPrimeA(object)[marker.index, batch.index] <- phiA2
 	phiPrimeB(object)[marker.index, batch.index] <- phiB2
063b3d14
 	##
 	if(enough.women){
 		medianA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 1]))
 		medianA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 2]))
 		medianA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 3]))
 		medianB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 1]))
 		medianB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 2]))
 		medianB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 3]))
 	}
66900fea
 	if(is.lds) {close(object); return(TRUE)} else return(object)
d5bc779e
 }
 
453e688a
 ##nonpolymorphic markers on X
6846d583
 fit.lm4 <- function(strata,
d08acba1
 		    index.list,
d5bc779e
 		    object,
 		    MIN.SAMPLES,
 		    THR.NU.PHI,
 		    MIN.NU,
 		    MIN.PHI,
453e688a
 		    verbose, is.lds=TRUE, ...){
 	##if(is.lds) {physical <- get("physical"); open(object)}
 	## exclude batches that have fewer than MIN.SAMPLES
f0f8ba16
 	open(object$gender)
 	gender <- object$gender[]
07d00a55
 	enough.males <- sum(gender==1) >= MIN.SAMPLES
 	enough.females <- sum(gender==2) >= MIN.SAMPLES
6846d583
 	if(!enough.males & !enough.females){
 		message(paste("fewer than", MIN.SAMPLES, "men and women.  Copy number not estimated for CHR X"))
 		return(object)
 	}
453e688a
 	excludeBatch <- names(table(batch(object)))[table(batch(object)) < MIN.SAMPLES]
 	if(length(excludeBatch) > 0){
 		bns <- unique(batch(object))
 		batch.index <- which(!bns %in% excludeBatch)
 		sample.index <- which(!batch(object)%in% excludeBatch)
 	} else {
 		sample.index <- 1:ncol(object)
 		batch.index <- as.integer(as.factor(unique(batch(object))))
 	}
61dc01ae
 	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
6846d583
 	marker.index <- index.list[[strata]]
 	batches <- split(seq_along(batch(object)), as.character(batch(object)))
453e688a
 	batches <- batches[batch.index]
 	nc <- length(batch.index)
6846d583
 	if(enough.males){
 		res <- summarizeMaleXNps(marker.index=marker.index,
 					 batches=batches,
 					 object=object, MIN.SAMPLES=MIN.SAMPLES)
453e688a
 		medianA.AA.M <- res[["medianA.AA"]][, batch.index, drop=FALSE]
 		madA.AA.M <- res[["madA.AA"]][, batch.index, drop=FALSE]
6846d583
 	}
453e688a
 	medianA.AA.F <- as.matrix(medianA.AA(object)[marker.index, batch.index, drop=FALSE]) ## median for women
 	madA.AA.F <- as.matrix(madA.AA(object)[marker.index, batch.index, drop=FALSE]) ## median for women
 	split.gender <- split(gender[sample.index], as.character(batch(object)[sample.index]))
6846d583
 	N.M <- sapply(split.gender, function(x) sum(x==1))
 	N.F <- sapply(split.gender, function(x) sum(x==2))
453e688a
 	nuA <- as.matrix(nuA(object)[marker.index, batch.index, drop=FALSE])
 	nuB <- as.matrix(nuB(object)[marker.index, batch.index, drop=FALSE])
 	phiA <- as.matrix(phiA(object)[marker.index, batch.index, drop=FALSE])
 	phiB <- as.matrix(phiB(object)[marker.index, batch.index, drop=FALSE])
6846d583
 	ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
 	fns <- featureNames(object)[ii]
453e688a
 	flags <- as.matrix(flags(object)[ii, batch.index])
6846d583
 	fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
063b3d14
 	if(length(fns.noflags) == 0){
 		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
 		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
 	}
a3b625d4
 	index.flag <- match(fns.noflags, featureNames(object))
 	snp.index <- sample(index.flag, min(c(length(index.flag), 10000)))
453e688a
 	##
07d00a55
 	N.AA <- as.matrix(N.AA(object)[snp.index, batch.index, drop=FALSE])
453e688a
 	N.AB <- as.matrix(N.AB(object)[snp.index, batch.index, drop=FALSE])
 	N.BB <- as.matrix(N.BB(object)[snp.index, batch.index, drop=FALSE])
6846d583
 	enoughAA <- rowSums(N.AA < 5) == 0
 	enoughAB <- rowSums(N.AB < 5) == 0
 	enoughBB <- rowSums(N.BB < 5) == 0
 	snp.index <- snp.index[enoughAA & enoughAB & enoughBB]
453e688a
 	if(length(snp.index) < 100){
 		message("too few snps pass criteria for estimating parameters for NP markers on chr X")
 		return(object)
07d00a55
 	}
453e688a
 	nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index, drop=FALSE])
 	nuA.snp.notmissing <- rowSums(is.na(nuA.snp)) == 0
 	nuA.snp.notnegative <- rowSums(as.matrix(nuA(object)[snp.index, batch.index, drop=FALSE]) < 20) == 0
6846d583
 	snp.index <- snp.index[nuA.snp.notmissing & nuA.snp.notnegative]
453e688a
 	medianA.AA.snp <- as.matrix(medianA.AA(object)[snp.index, batch.index])
 	medianB.BB.snp <- as.matrix(medianB.BB(object)[snp.index, batch.index])
 	nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index])
 	nuB.snp <- as.matrix(nuB(object)[snp.index, batch.index])
 	phiA.snp <- as.matrix(phiA(object)[snp.index, batch.index])
 	phiB.snp <- as.matrix(phiB(object)[snp.index, batch.index])
6846d583
 ##	pseudoAR <- position(object)[snp.index] < 2709520 | (position(object)[snp.index] > 154584237 & position(object)[snp.index] < 154913754)
 ##	pseudoAR[is.na(pseudoAR)] <- FALSE
 	for(k in seq_along(batches)){
 		B <- batches[[k]]
 		this.batch <- unique(as.character(batch(object)[B]))
 		gender <- object$gender[B]
453e688a
 		enough.men <- N.M[[this.batch]] >= MIN.SAMPLES
 		enough.women <- N.F[[this.batch]] >= MIN.SAMPLES
6846d583
 		if(!enough.men & !enough.women) {
 			if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. "))
 			next()
 		}
 		tmp <- cbind(medianA.AA.snp[, k], medianB.BB.snp[,k], phiA.snp[, k], phiB.snp[, k])
 		tmp <- tmp[rowSums(is.na(tmp) == 0) & rowSums(tmp < 20) == 0, ]
4191b5f9
 		if(nrow(tmp) < 100){
 			stop("too few markers for estimating nonpolymorphic CN on chromosome X")
 		}
6846d583
 		X <- cbind(1, log2(c(tmp[, 1], tmp[, 2])))
 		Y <- log2(c(tmp[, 3], tmp[, 4]))
453e688a
 		notmissing.index <- which(rowSums(is.na(X)) == 0 & !is.na(Y))
 		X <- X[notmissing.index, ]
 		Y <- Y[notmissing.index]
d5bc779e
 		betahat <- solve(crossprod(X), crossprod(X, Y))
910549db
 		if(enough.men){
4191b5f9
 			X.men <- cbind(1, log2(medianA.AA.M[, k]))
910549db
 			Yhat1 <- as.numeric(X.men %*% betahat)
 			## put intercept and slope for men in nuB and phiB
 			phiB[, k] <- 2^(Yhat1)
cb875a5a
 			nuB[, k] <- medianA.AA.M[, k] - 1*phiB[, k]
910549db
 		}
4191b5f9
 		X.fem <- cbind(1, log2(medianA.AA.F[, k]))
6846d583
 		Yhat2 <- as.numeric(X.fem %*% betahat)
 		phiA[, k] <- 2^(Yhat2)
4191b5f9
 		nuA[, k] <- medianA.AA.F[, k] - 2*phiA[, k]
6846d583
 	}
 	if(THR.NU.PHI){
 		nuA[nuA < MIN.NU] <- MIN.NU
 		phiA[phiA < MIN.PHI] <- MIN.PHI
 		nuB[nuB < MIN.NU] <- MIN.NU
 		phiB[phiB < MIN.PHI] <- MIN.PHI
 	}
453e688a
 	nuA(object)[marker.index, batch.index] <- nuA
 	phiA(object)[marker.index, batch.index] <- phiA
 	nuB(object)[marker.index, batch.index] <- nuB
 	phiB(object)[marker.index, batch.index] <- phiB
 	##if(is.lds) {close(object); return(TRUE)} else return(object)
415a3b06
 	if(is.lds) {close(object); return(TRUE)} else return(object)
d5bc779e
 }
82be31d4
 
5fcfed7d
 whichPlatform <- function(cdfName){
 	index <- grep("genomewidesnp", cdfName)
 	if(length(index) > 0){
 		platform <- "affymetrix"
 	} else{
 		index <- grep("human", cdfName)
 		platform <- "illumina"
 	}
 	return(platform)
 }
50b0d125
 
d08acba1
 cnrma2 <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){
d5bc779e
 
 }
 
3c6c270e
 imputeCenter <- function(muA, muB, index.complete, index.missing){
 	index <- index.missing
 	mnA <- muA
 	mnB <- muB
1813938e
 	if(length(index.complete) >= 100){
 		for(j in 1:3){
 			if(length(index[[j]]) == 0) next()
 			X <- cbind(1, mnA[index.complete,  -j, drop=FALSE], mnB[index.complete,  -j, drop=FALSE])
 			Y <- cbind(mnA[index.complete, j], mnB[index.complete,  j])
 			betahat <- solve(crossprod(X), crossprod(X,Y))
 			X <- cbind(1, mnA[index[[j]],  -j, drop=FALSE],  mnB[index[[j]],  -j, drop=FALSE])
 			mus <- X %*% betahat
4c4e9303
 			## threshold imputed intensities that are very small
 			mus[mus < 64] <- 64
1813938e
 			muA[index[[j]], j] <- mus[, 1]
 			muB[index[[j]], j] <- mus[, 2]
 		}
3c6c270e
 	}
1813938e
 	results <- list(muA, muB)
 	return(results)
3c6c270e
 }
 
6846d583
 indexComplete <- function(NN, medianA, medianB, MIN.OBS){
 	Nindex <- which(rowSums(NN > MIN.OBS) == ncol(NN))
 	correct.order <- which(medianA[, 1] > medianA[, ncol(medianA)] & medianB[, ncol(medianB)] > medianB[, 1])
 	index.complete <- intersect(Nindex, correct.order)
 	size <- min(5000, length(index.complete))
 	if(size == 5000) index.complete <- sample(index.complete, 5000, replace=TRUE)
1813938e
 ##	if(length(index.complete) < 100){
 ##		stop("fewer than 100 snps pass criteria for imputing unobserved genotype location/scale")
 ##	}
6846d583
 	return(index.complete)
 }
 
 imputeCentersForMonomorphicSnps <- function(medianA, medianB, index.complete, unobserved.index){
 	cols <- c(3, 1, 2)
1813938e
 	if(length(index.complete) < 100){
 		##message("index.complete less than 100.  No imputation")
 		results <- list(medianA=medianA, medianB=medianB)
 		return(results)
 	}
6846d583
 	for(j in 1:3){
66900fea
 		if(length(unobserved.index[[j]]) == 0) next()
6846d583
 		kk <- cols[j]
 		X <- cbind(1, medianA[index.complete, kk], medianB[index.complete, kk])
 		Y <- cbind(medianA[index.complete,  -kk],
 			   medianB[index.complete,  -kk])
 		betahat <- solve(crossprod(X), crossprod(X,Y))
 		X <- cbind(1, medianA[unobserved.index[[j]],  kk], medianB[unobserved.index[[j]],  kk])
 		mus <- X %*% betahat
 		medianA[unobserved.index[[j]], -kk] <- mus[, 1:2]
 		medianB[unobserved.index[[j]], -kk] <- mus[, 3:4]
 	}
1813938e
 	results <- list(medianA=medianA, medianB=medianB)
 	return(results)
6846d583
 }
 
 
3c6c270e
 imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
1813938e
 	##index1 <- which(Ns[, 1] == 0 & Ns[, 3] > MIN.OBS)
 	index1 <- which(Ns[, 1] == 0 & Ns[, 2] > MIN.OBS)
3c6c270e
 	if(length(index1) > 0){
1813938e
 		##X <- cbind(1, muA[index.complete, 3], muB[index.complete, 3])
 		X <- cbind(1, muA[index.complete, 2], muB[index.complete, 2])
6846d583
 		Y <- cbind(1, muA[index.complete, 1], muB[index.complete, 1])
 ##		X <- cbind(1, muA[index.complete[[1]], 3], muB[index.complete[[1]], 3])
 ##		Y <- cbind(1, muA[index.complete[[1]], 1], muB[index.complete[[1]], 1])
3c6c270e
 		betahat <- solve(crossprod(X), crossprod(X,Y))
 		##now with the incomplete SNPs
1813938e
 		##X <- cbind(1, muA[index1, 3], muB[index1, 3])
 		X <- cbind(1, muA[index1, 2], muB[index1, 2])
3c6c270e
 		mus <- X %*% betahat
 		muA[index1, 1] <- mus[, 2]
 		muB[index1, 1] <- mus[, 3]
 	}
1813938e
 	index1 <- which(Ns[, 2] == 0)
3c6c270e
 	if(length(index1) > 0){
6846d583
 		X <- cbind(1, muA[index.complete, 1], muB[index.complete, 1])
1813938e
 		Y <- cbind(1, muA[index.complete, 2], muB[index.complete, 2])
6846d583
 ##		X <- cbind(1, muA[index.complete[[2]], 1], muB[index.complete[[2]], 1])
 ##		Y <- cbind(1, muA[index.complete[[2]], 3], muB[index.complete[[2]], 3])
3c6c270e
 		betahat <- solve(crossprod(X), crossprod(X,Y))
 		##now with the incomplete SNPs
 		X <- cbind(1, muA[index1, 1], muB[index1, 1])
 		mus <- X %*% betahat
1813938e
 		muA[index1, 2] <- mus[, 2]
 		muB[index1, 2] <- mus[, 3]
3c6c270e
 	}
 	list(muA, muB)
 }
50b0d125
 
 
eb0b8dd0
 ## constructors for Illumina platform
 constructIlluminaFeatureData <- function(gns, cdfName){
69ec8644
 	pkgname <- paste(cdfName, "Crlmm", sep="")
eb0b8dd0
 	path <- system.file("extdata", package=pkgname)
 	load(file.path(path, "cnProbes.rda"))
 	load(file.path(path, "snpProbes.rda"))
 	cnProbes$chr <- chromosome2integer(cnProbes$chr)
 	cnProbes <- as.matrix(cnProbes)
 	snpProbes$chr <- chromosome2integer(snpProbes$chr)
 	snpProbes <- as.matrix(snpProbes)
 	mapping <- rbind(snpProbes, cnProbes, deparse.level=0)
 	mapping <- mapping[match(gns, rownames(mapping)), ]
 	isSnp <- 1L-as.integer(gns %in% rownames(cnProbes))
 	mapping <- cbind(mapping, isSnp, deparse.level=0)
 	stopifnot(identical(rownames(mapping), gns))
 	colnames(mapping) <- c("chromosome", "position", "isSnp")
 	new("AnnotatedDataFrame",
 	    data=data.frame(mapping),
69ec8644
 	    varMetadata=data.frame(labelDescription=colnames(mapping)))
eb0b8dd0
 }
8ed2dc29
 
 
0198a9ad
 constructIlluminaAssayData <- function(np, snp, object, storage.mode="environment", nr){
eb0b8dd0
 	stopifnot(identical(snp$gns, featureNames(object)))
 	gns <- c(featureNames(object), np$gns)
 	sns <- np$sns
 	np <- np[1:2]
 	snp <- snp[1:2]
 	stripnames <- function(x) {
 		dimnames(x) <- NULL
 		x
 	}
 	np <- lapply(np, stripnames)
 	snp <- lapply(snp, stripnames)
fd0501ad
 	is.ff <- is(snp[[1]], "ff") | is(snp[[1]], "ffdf")
 	if(is.ff){
3e4db4b1
 		lapply(snp, open)
 		open(calls(object))
 		open(snpCallProbability(object))
 ##		lapply(np, open)
 	}
 	##tmp <- rbind(as.matrix(snp[[1]]), as.matrix(np[[1]]), deparse.level=0)
 	A.snp <- snp[[1]]
 	B.snp <- snp[[2]]
 	##Why is np not a ff object?
 	A.np <- np[[1]]
 	B.np <- np[[2]]
 	nc <- ncol(object)
fd0501ad
 	if(is.ff){
3e4db4b1
 		NA.vec <- rep(NA, nrow(A.np))
 		AA <- initializeBigMatrix("A", nr, nc, vmode="integer")
 		BB <- initializeBigMatrix("B", nr, nc, vmode="integer")
 		GG <- initializeBigMatrix("calls", nr, nc, vmode="integer")
 		PP <- initializeBigMatrix("confs", nr, nc, vmode="integer")
 		for(j in 1:ncol(object)){
0198a9ad
 			AA[, j] <- c(snp[[1]][, j], np[[1]][, j])
 			BB[, j] <- c(snp[[2]][, j], np[[2]][, j])
 			GG[, j] <- c(calls(object)[, j], NA.vec)
 			PP[, j] <- c(snpCallProbability(object)[, j], NA.vec)
3e4db4b1
 
 		}
 	} else {
0198a9ad
 		AA <- rbind(snp[[1]], np[[1]], deparse.level=0)
 		BB <- rbind(snp[[2]], np[[2]], deparse.level=0)
3e4db4b1
 		gt <- stripnames(calls(object))
 		emptyMatrix <- matrix(integer(), nrow(np[[1]]), ncol(A))
0198a9ad
 		GG <- rbind(gt, emptyMatrix, deparse.level=0)
3e4db4b1
 		pr <- stripnames(snpCallProbability(object))
0198a9ad
 		PP <- rbind(pr, emptyMatrix, deparse.level=0)
3e4db4b1
 	}
 	assayDataNew(storage.mode,
 		     alleleA=AA,
 		     alleleB=BB,
 		     call=GG,
 		     callProbability=PP)
eb0b8dd0
 }
57284b5e
 constructIlluminaCNSet <- function(crlmmResult,
76f72a5a
 				   path,
57284b5e
 				   snpFile,
 				   cnFile){
76f72a5a
 	load(file.path(path, "snpFile.rda"))
57284b5e
 	res <- get("res")
76f72a5a
 	load(file.path(path, "cnFile.rda"))
69ec8644
 	cnAB <- get("cnAB")
4923ac09
 	fD <- constructIlluminaFeatureData(c(res$gns, cnAB$gns), cdfName="human370v1c")
0198a9ad
 	##new.order <- order(fD$chromosome, fD$position)
 	##fD <- fD[new.order, ]
 	aD <- constructIlluminaAssayData(cnAB, res, crlmmResult, nr=nrow(fD))
81601623
 	##protocolData(crlmmResult)$batch <- vector("integer", ncol(crlmmResult))
3e4db4b1
 	new("CNSet",
 	    call=aD[["call"]],
 	    callProbability=aD[["callProbability"]],
 	    alleleA=aD[["alleleA"]],
 	    alleleB=aD[["alleleB"]],
 	    phenoData=phenoData(crlmmResult),
 	    protocolData=protocolData(crlmmResult),
 	    featureData=fD,
 	    batch=batch,
 	    annotation="human370v1c")
0198a9ad
 
4923ac09
 }
69ec8644
 
 
4923ac09
 
76f72a5a
 ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){
b5b74b9f
 	ubatch <- unique(batch(object))[batch]
76f72a5a
 	Nu <- nu(object, allele)[index, batch]
 	Phi <- phi(object, allele)[index, batch]
 	centers <- list(Nu, Nu+Phi, Nu+2*Phi)
 	if(log.it)
 		centers <- lapply(centers, log2)
 	myLabels <- function(allele){
 		switch(allele,
 		       A=c("BB", "AB", "AA"),
 		       B=c("AA", "AB", "BB"),
 		       stop("allele must be 'A' or 'B'")
 		       )
 	}
 	nms <- myLabels(allele)
 	names(centers) <- nms
 	fns <- featureNames(object)[index]
 	centers$fns <- fns
69ec8644
 	return(centers)
76f72a5a
 }
b5b74b9f
 
8800496d
 
6846d583
 shrinkSummary <- function(object,
8decfc7e
 			  type="SNP",
6846d583
 			  MIN.OBS=1,
 			  MIN.SAMPLES=10,
 			  DF.PRIOR=50,
66900fea
 			  verbose=TRUE,
16fadf14
 			  marker.index){
8decfc7e
 	stopifnot(type[[1]] == "SNP")
b0270962
 	CHR.X <- FALSE ## this is no longer needed
16fadf14
 	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
 	if(is.lds) stopifnot(isPackageLoaded("ff"))
66900fea
 	if(missing(marker.index)){
 		batch <- batch(object)
 		is.snp <- isSnp(object)
 		is.autosome <- chromosome(object) < 23
 		is.annotated <- !is.na(chromosome(object))
 		is.X <- chromosome(object) == 23
 		marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
6846d583
 	}
 	if(is.lds){
063b3d14
 		index.list <- splitIndicesByLength(marker.index, ocProbesets())
 ##		if(parStatus()){
 ##			index.list <- splitIndicesByNode(marker.index)
 ##		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
6846d583
 		ocLapply(seq(along=index.list),
3e4db4b1
 			 shrinkGenotypeSummaries,
6846d583
 			 index.list=index.list,
 			 object=object,
 			 verbose=verbose,
 			 MIN.OBS=MIN.OBS,
 			 MIN.SAMPLES=MIN.SAMPLES,
 			 DF.PRIOR=DF.PRIOR,
 			 is.lds=is.lds,
 			 neededPkgs="crlmm")
 	} else {
3e4db4b1
 		object <- shrinkGenotypeSummaries(strata=1,
6846d583
 			      index.list=list(marker.index),
 			      object=object,
66900fea
 			      verbose=verbose,
6846d583
 			      MIN.OBS=MIN.OBS,
 			      MIN.SAMPLES=MIN.SAMPLES,
 			      DF.PRIOR=DF.PRIOR,
 			      is.lds=is.lds)
 	}
16fadf14
 	if(is.lds) return(TRUE) else return(object)
6846d583
 }
 
 genotypeSummary <- function(object,
453e688a
 			    GT.CONF.THR=0.80,
6846d583
 			    type=c("SNP", "NP", "X.SNP", "X.NP"), ##"X.snps", "X.nps"),
66900fea
 			    verbose=TRUE,
16fadf14
 			    marker.index){
6846d583
 	if(type == "X.SNP" | type=="X.NP"){
 		CHR.X <- TRUE
 	} else CHR.X <- FALSE
16fadf14
 	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
 	if(is.lds) stopifnot(isPackageLoaded("ff"))
66900fea
 	if(missing(marker.index)){
 		batch <- batch(object)
 		is.snp <- isSnp(object)
 		is.autosome <- chromosome(object) < 23
 		is.annotated <- !is.na(chromosome(object))
 		is.X <- chromosome(object) == 23
 		marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
6846d583
 	}
 	summaryFxn <- function(type){
 		switch(type,
 		       SNP="summarizeSnps",
 		       NP="summarizeNps",
 		       X.NP="summarizeNps")
 	}
a070e5bd
 	myf <- summaryFxn(type[[1]])
 	FUN <- get(myf)
6846d583
 	if(is.lds){
063b3d14
 		index.list <- splitIndicesByLength(marker.index, ocProbesets())
6846d583
 		ocLapply(seq(along=index.list),
 			 FUN,
 			 index.list=index.list,
 			 object=object,
 			 batchSize=ocProbesets(),
 			 GT.CONF.THR=GT.CONF.THR,
 			 verbose=verbose,
 			 CHR.X=CHR.X,
 			 neededPkgs="crlmm")
a070e5bd
 	} else{
66900fea
 		object <- FUN(strata=1,
6846d583
 			      index.list=list(marker.index),
 			      object=object,
 			      batchSize=ocProbesets(),
 			      GT.CONF.THR=GT.CONF.THR,
 			      verbose=verbose,
66900fea
 			      CHR.X=CHR.X)
6846d583
 	}
16fadf14
 	if(is.lds) return(TRUE) else return(object)
6846d583
 }
 
66900fea
 whichMarkers <- function(type, is.snp, is.autosome, is.annotated, is.X){
 	switch(type,
 	       SNP=which(is.snp & is.autosome & is.annotated),
 	       NP=which(!is.snp & is.autosome),
 	       X.SNP=which(is.snp & is.X),
 	       X.NP=which(!is.snp & is.X),
 	       stop("'type' must be one of 'SNP', 'NP', 'X.SNP', or 'X.NP'")
 	       )
 }
 
6846d583
 summarizeNps <- function(strata, index.list, object, batchSize,
e31c7794
 			 GT.CONF.THR, verbose=TRUE, CHR.X=FALSE, ...){
16fadf14
 	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
6846d583
 	if(is.lds) {physical <- get("physical"); open(object)}
61dc01ae
 	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
6846d583
 	index <- index.list[[strata]]
e026208f
 ##	if(CHR.X) {
 ##		sample.index <- which(object$gender==2)
 ##		batches <- split(sample.index, as.character(batch(object))[sample.index])
 ##	} else {
 ##		batches <- split(seq_along(batch(object)), as.character(batch(object)))
 ##	}
 	batches <- split(seq_along(batch(object)), as.character(batch(object)))
6846d583
 	batchnames <- batchNames(object)
 	nr <- length(index)
 	nc <- length(batchnames)
 	N.AA <- medianA.AA <- madA.AA <- tau2A.AA <- matrix(NA, nr, nc)
73fd7374
 	is.illumina <- whichPlatform(annotation(object)) == "illumina"
f0f8ba16
 	open(A(object))
 	open(object$gender)
6846d583
 	AA <- as.matrix(A(object)[index, ])
73fd7374
 	if(is.illumina){
 		BB <- as.matrix(B(object)[index, ])
 		AVG <- (AA+BB)/2
 		A(object)[index, ] <- AVG
 		AA <- AVG
 		rm(AVG, BB)
 	}
6846d583
 	for(k in seq_along(batches)){
e026208f
 		sample.index <- batches[[k]]
 		N.AA[, k] <- length(sample.index)
 		if(CHR.X){
 			gender <- object$gender[sample.index]
 			sample.index <- sample.index[gender == 2]
 			if(length(sample.index) == 0) next()
cbbfdda9
 		}
e026208f
 		this.batch <- unique(as.character(batch(object)[sample.index]))
6846d583
 		j <- match(this.batch, batchnames)
453e688a
 		I.A <- AA[, sample.index, drop=FALSE]
73fd7374
 		medianA.AA[, k] <- rowMedians(I.A, na.rm=TRUE)
 		madA.AA[, k] <- rowMAD(I.A, na.rm=TRUE)
6846d583
 		## log2 Transform Intensities
73fd7374
 		I.A <- log2(I.A)
 		tau2A.AA[, k] <- rowMAD(I.A, na.rm=TRUE)^2
6846d583
 	}
 	N.AA(object)[index,] <- N.AA
 	medianA.AA(object)[index,] <- medianA.AA
 	madA.AA(object)[index, ] <- madA.AA
 	tau2A.AA(object)[index, ] <- tau2A.AA
 	if(is.lds) return(TRUE) else return(object)
 }
 
 summarizeSnps <- function(strata,
 			  index.list,
 			  object,
e31c7794
 			  GT.CONF.THR=0.80,
 			  verbose=TRUE,
 			  CHR.X=FALSE, ...){
453e688a
 ##	if(is.lds) {
 ##		physical <- get("physical")
 ##		open(object)
 ##	}
16fadf14
 	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
 	if(is.lds) stopifnot(isPackageLoaded("ff"))
61dc01ae
 	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
6846d583
 	index <- index.list[[strata]]
4b208ce1
 	batches <- split(seq_along(batch(object)), as.character(batch(object)))
6846d583
 	batchnames <- batchNames(object)
 	nr <- length(index)
 	nc <- length(batchnames)
 	statsA.AA <- statsA.AB <- statsA.BB <- statsB.AA <- statsB.AB <- statsB.BB <- vector("list", nc)
 	corrAA <- corrAB <- corrBB <- tau2A.AA <- tau2A.BB <- tau2B.AA <- tau2B.BB <- matrix(NA, nr, nc)
 	Ns.AA <- Ns.AB <- Ns.BB <- matrix(NA, nr, nc)
0198a9ad
 	if(verbose) message("        Begin reading...")
6846d583
 	GG <- as.matrix(calls(object)[index, ])
 	CP <- as.matrix(snpCallProbability(object)[index, ])
 	AA <- as.matrix(A(object)[index, ])
 	BB <- as.matrix(B(object)[index, ])
4191b5f9
 	FL <- as.matrix(flags(object)[index, ])
453e688a
 	summaryStats <- function(X, INT, FUNS){
 		tmp <- matrix(NA, nrow(X), length(FUNS))
 		for(j in seq_along(FUNS)){
 			FUN <- match.fun(FUNS[j])
 			tmp[, j] <- FUN(X*INT, na.rm=TRUE)
 		}
 		tmp
 	}
0198a9ad
 	if(verbose) message("        Computing summaries...")
6846d583
 	for(k in seq_along(batches)){
1813938e
 		##note that the genotype frequency for AA would include 'A' on chromosome X for men
 		sample.index <- batches[[k]]
 		this.batch <- unique(as.character(batch(object)[sample.index]))
6846d583
 		j <- match(this.batch, batchnames)
a966e049
 		G <- GG[, sample.index, drop=FALSE]
 		xx <- CP[, sample.index, drop=FALSE]
6846d583
 		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
453e688a
 		## Some markers may have genotype confidences scores that are ALL below the threshold
 		## For these markers, provide statistical summaries based on all the samples and flag
 		## Provide summaries for these markers and flag to indicate the problem
 		## RS: check whether we need the next to lines and, if so, effect downstream
 		ii <- which(rowSums(highConf) == 0)
 		if(length(ii) > 0) highConf[ii, ] <- TRUE
6846d583
 		G <- G*highConf
453e688a
 		## table(rowSums(G==0))
 		##G <- G*highConf*NORM
a966e049
 		A <- AA[, sample.index, drop=FALSE]
 		B <- BB[, sample.index, drop=FALSE]
6846d583
 		## this can be time consuming...do only once
 		G.AA <- G==1
 		G.AA[G.AA==FALSE] <- NA
 		G.AB <- G==2
 		G.AB[G.AB==FALSE] <- NA
 		G.BB <- G==3
 		G.BB[G.BB==FALSE] <- NA
 		Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE)
 		Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE)
 		Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE)
1813938e
 		if(CHR.X){
 			gender <- object$gender[sample.index]
 			sample.index <- sample.index[gender == 2]
 			if(length(sample.index) == 0) next()
 		}
 		stats <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
 		medianA.AA(object)[index, k] <- stats[, 1]
453e688a
 		##missing.index <- which(rowSums(is.na(medianA.AA(object)[index, ,drop=FALSE])) > 0)
1813938e
 		madA.AA(object)[index, k] <- stats[, 2]
 		stats <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
 		medianA.AB(object)[index, k] <- stats[, 1]
 		madA.AB(object)[index, k] <- stats[, 2]
 		stats <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
 		medianA.BB(object)[index, k] <- stats[, 1]
 		madA.BB(object)[index, k] <- stats[, 2]
 		stats <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
 		medianB.AA(object)[index, k] <- stats[, 1]
 		madB.AA(object)[index, k] <- stats[, 2]
 		stats <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
 		medianB.AB(object)[index, k] <- stats[, 1]
 		madB.AB(object)[index, k] <- stats[, 2]
cbbfdda9
 		stats <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
1813938e
 		medianB.BB(object)[index, k] <- stats[, 1]
 		madB.BB(object)[index, k] <- stats[, 2]
6846d583
 		## log2 Transform Intensities
 		A <- log2(A); B <- log2(B)
 		tau2A.AA[, k] <- summaryStats(G.AA, A, FUNS="rowMAD")^2
 		tau2A.BB[, k] <- summaryStats(G.BB, A, FUNS="rowMAD")^2
 		tau2B.AA[, k] <- summaryStats(G.AA, B, FUNS="rowMAD")^2
 		tau2B.BB[, k] <- summaryStats(G.BB, B, FUNS="rowMAD")^2
 		corrAA[, k] <- rowCors(A*G.AA, B*G.AA, na.rm=TRUE)
 		corrAB[, k] <- rowCors(A*G.AB, B*G.AB, na.rm=TRUE)
 		corrBB[, k] <- rowCors(A*G.BB, B*G.BB, na.rm=TRUE)
453e688a
 		rm(A, B, G.AA, G.AB, G.BB, xx, highConf, G)
 		gc()
 	}
 	##---------------------------------------------------------------------------
 	## grand mean
 	##---------------------------------------------------------------------------
7978fa6d
 	if(FALSE){ ## no implemented
 		if(length(batches) > 1 && "grandMean" %in% batchNames(object)){
 			k <- match("grandMean", batchNames(object))
 			if(verbose) message("        computing grand means...")
 			highConf <- (1-exp(-CP/1000)) > GT.CONF.THR
 			rm(CP); gc()
 			## Some markers may have genotype confidences scores that are ALL below the threshold
 			## For these markers, provide statistical summaries based on all the samples and flag
 			## Provide summaries for these markers and flag to indicate the problem
 			ii <- which(rowSums(highConf) == 0)
 			if(length(ii) > 0) highConf[ii, ] <- TRUE
 			GG <- GG*highConf
 			rm(highConf); gc()
 			Ns <- list(Ns.AA, Ns.AB, Ns.BB)
 			rm(Ns.AA, Ns.AB, Ns.BB) ; gc()
 			I <- list(AA, BB)
 			lI <- lapply(I, log2)
 			cors <- list(corrAA, corrAB, corrBB)
 			rm(corrAA, corrAB, corrBB); gc()
 			rm(AA,BB); gc()
 			tau2 <- list(AA=list(tau2A.AA,
 				     tau2B.AA),
 				     AB=list(NULL, NULL),
 				     BB=list(tau2A.BB,
 				     tau2B.BB))
 			rm(tau2A.AA, tau2B.AA, tau2A.BB, tau2B.BB); gc()
 			for(i in 1:3){
 				G.call <- isCall(GG, call=i)
 				for(j in 1:2){
 					computeSummary(object,
 						       G.call,
 						       call=i,
 						       I=I[[j]],
 						       allele=c("A", "B")[j],
 						       Ns=Ns[[i]],
 						       call=i,
 						       tau2=tau2[[i]][[j]],
 						       index=index)
 				}
 				updateCors(cors[[i]], G.call, lI)
 			}
 			##I <- lapply(I, log2)
 			##AA <- log2(AA)
 			##BB <- log2(BB)
 			##		tau2A.AA[, k] <- summaryStats(G.AA, AA, FUNS="rowMAD")^2
 			##		tau2A.BB[, k] <- summaryStats(G.BB, AA, FUNS="rowMAD")^2
 			##tau2B.AA[, k] <- summaryStats(G.AA, BB, FUNS="rowMAD")^2
 			##tau2B.BB[, k] <- summaryStats(G.BB, BB, FUNS="rowMAD")^2
 			##		corrAA[, k] <- rowCors(AA*G.AA, BB*G.AA, na.rm=TRUE)
 			##		corrAB[, k] <- rowCors(AA*G.AB, BB*G.AB, na.rm=TRUE)
 			##		corrBB[, k] <- rowCors(AA*G.BB, BB*G.BB, na.rm=TRUE)
 			##		rm(AA, BB); gc()
 			##
 			## TODO:   fill in NAs -- use code from shrinkGenotypeSummaries
 			##
 			##		rm(GG, CP, AA, BB, FL, stats)
 			##		gc()
 			##		G.AA <- GG==1
 			##		G.AA[G.AA==FALSE] <- NA
 			##		G.AB <- GG==2
 			##		G.AB[G.AB==FALSE] <- NA
 			##		G.BB <- GG==3
 			##		G.BB[G.BB==FALSE] <- NA
 			##		Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE)
 			##		Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE)
 			##		Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE)
 			##		N.AA(object)[index,] <- Ns.AA
 			##		N.AB(object)[index,] <- Ns.AB
 			##		N.BB(object)[index,] <- Ns.BB
 
 			## Calculate row medians and MADs
 			##		medianA.AA(object)[index, k] <- stats[, 1]
 			##		madA.AA(object)[index, k] <- stats[, 2]
 			##		stats <- summaryStats(G.AB, AA, FUNS=c("rowMedians", "rowMAD"))
 			##		medianA.AB(object)[index, k] <- stats[, 1]
 			##		madA.AB(object)[index, k] <- stats[, 2]
 			##		stats <- summaryStats(G.BB, AA, FUNS=c("rowMedians", "rowMAD"))
 			##		medianA.BB(object)[index, k] <- stats[, 1]
 			##		madA.BB(object)[index, k] <- stats[, 2]
 			##		stats <- summaryStats(G.AA, BB, FUNS=c("rowMedians", "rowMAD"))
 			##		medianB.AA(object)[index, k] <- stats[, 1]
 			##		madB.AA(object)[index, k] <- stats[, 2]
 			##		stats <- summaryStats(G.AB, BB, FUNS=c("rowMedians", "rowMAD"))
 			##		medianB.AB(object)[index, k] <- stats[, 1]
 			##		madB.AB(object)[index, k] <- stats[, 2]
 			##		stats <- summaryStats(G.BB, BB, FUNS=c("rowMedians", "rowMAD"))
 			##		medianB.BB(object)[index, k] <- stats[, 1]
 			##		madB.BB(object)[index, k] <- stats[, 2]
 			## log2 Transform Intensities
 		}
6846d583
 	}
7978fa6d
 	##	if(verbose) message("        Begin writing...")
6846d583
 	N.AA(object)[index,] <- Ns.AA
 	N.AB(object)[index,] <- Ns.AB
 	N.BB(object)[index,] <- Ns.BB
 	corrAA(object)[index,] <- corrAA
 	corrAB(object)[index,] <- corrAB
 	corrBB(object)[index,] <- corrBB
 	tau2A.AA(object)[index, ] <- tau2A.AA
 	tau2A.BB(object)[index, ] <- tau2A.BB
 	tau2B.AA(object)[index, ] <- tau2B.AA
 	tau2B.BB(object)[index, ] <- tau2B.BB
 	if(is.lds) return(TRUE) else return(object)
 }
 
7978fa6d
 isCall <- function(G, call){
 	G.call <- G==call
 	G.call[G.call==FALSE] <- NA
 	G.call
 }
 
ad3cc706
 ##computeSummary <- function(object, G.call, call, I, allele, Ns, index){
 ##	k <- match("grandMean", batchNames(object))
 ##	stats <- summaryStats(G.call, I, FUNS=c("rowMedians", "rowMAD"))
 ##	Ns[, k] <- rowSums(G.call, na.rm=TRUE)
 ##	updateStats(stats, Ns, object, call, allele, index)
 ##	gc()
 ##	return()
 ##}
 
 ##updateTau <- function(object, tau2, G.call, call, I, allele, index){
 ##	k <- match("grandMean", batchNames(object))
 ##	logI <- log2(I)
 ##	rm(I); gc()
 ##	tau2[, k] <- summaryStats(G.call, logI, FUNS="rowMAD")^2
 ##	if(call==1 & allele=="A"){
 ##		tau2A.AA(object)[index, ] <- tau2
 ##	}
 ##	if(call==1 & allele=="B"){
 ##		tau2B.AA(object)[index, ] <- tau2
 ##	}
 ##	if(call==3 & allele=="A"){
 ##		tau2A.BB(object)[index, ] <- tau2
 ##	}
 ##	if(call==3 & allele=="B"){
 ##		tau2B.BB(object)[index, ] <- tau2
 ##	}
 ##	NULL
 ##}
 
 ##updateCors <- function(cors, G.call, I){
 ##	k <- match("grandMean", batchNames(object))
 ##	cors[, k] <- rowCors(I[[1]]*G.call, I[[2]]*G.call, na.rm=TRUE)
 ##	if(call==1){
 ##		corrAA(object)[index, ] <- cors
 ##	}
 ##	if(call==2){
 ##		corrAB(object)[index, ] <- cors
 ##	}
 ##	if(call==3){
 ##		corrBB(object)[index, ] <- cors
 ##	}
 ##}
 
 ##updateStats <- function(stats, Ns, object, call, allele, tau2, index){
 ##	if(call==1){
 ##		Ns.AA(object)[index, ] <- Ns
 ##		if(allele=="A"){
 ##			medianA.AA(object)[index, k] <- stats[, 1]
 ##			madA.AA(object)[index, k] <- stats[, 2]
 ##			updateTau(object, tau2, G.call, call, I, allele, index)
 ##		} else {
 ##			medianB.AA(object)[index, k] <- stats[, 1]
 ##			madB.AA(object)[index, k] <- stats[, 2]
 ##			updateTau(object, tau2, G.call, call, I, allele, index)
 ##		}
 ##	}
 ##	if(call==2){
 ##		Ns.AB(object)[index, ] <- Ns
 ##		if(allele=="A"){
 ##			medianA.AB(object)[index, k] <- stats[, 1]
 ##			madA.AB(object)[index, k] <- stats[, 2]
 ##		} else {
 ##			medianB.AB(object)[index, k] <- stats[, 1]
 ##			madB.AB(object)[index, k] <- stats[, 2]
 ##		}
 ##	}
 ##	if(call==3){
 ##		Ns.BB(object)[index, ] <- Ns
 ##		if(allele=="A"){
 ##			medianA.BB(object)[index, k] <- stats[, 1]
 ##			madA.BB(object)[index, k] <- stats[, 2]
 ##			updateTau(object, tau2, G.call, call, I, allele, index)
 ##		} else {
 ##			medianB.BB(object)[index, k] <- stats[, 1]
 ##			madB.BB(object)[index, k] <- stats[, 2]
 ##			updateTau(object, tau2, G.call, call, I, allele, index)
 ##		}
 ##	}
 ##}
7978fa6d
 
c45787d3
 crlmmCopynumber <- function(object,
d08acba1
 			    MIN.SAMPLES=10,
 			    SNRMin=5,
 			    MIN.OBS=1,
 			    DF.PRIOR=50,
 			    bias.adj=FALSE,
 			    prior.prob=rep(1/4,4),
 			    seed=1,
 			    verbose=TRUE,
4191b5f9
 			    GT.CONF.THR=0.80,
d08acba1
 			    MIN.NU=2^3,
 			    MIN.PHI=2^3,
 			    THR.NU.PHI=TRUE,
e31c7794
 			    type=c("SNP", "NP", "X.SNP", "X.NP"),
 			    fit.linearModel=TRUE){
415a3b06
 	typeof <- paste(type, collapse=",")
 	stopifnot(typeof %in% c("SNP", "NP", "SNP,NP", "SNP,X.SNP", "SNP,X.NP", "SNP,NP,X.SNP", "SNP,NP,X.SNP,X.NP"))
453e688a
 	if(GT.CONF.THR >= 1 | GT.CONF.THR < 0) stop("GT.CONF.THR must be in [0,1)")
b5b74b9f
 	batch <- batch(object)
8800496d
 	is.snp <- isSnp(object)
 	is.autosome <- chromosome(object) < 23
 	is.annotated <- !is.na(chromosome(object))
 	is.X <- chromosome(object) == 23
6846d583
 	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
66900fea
 	if(is.lds) stopifnot(isPackageLoaded("ff"))
6846d583
 	samplesPerBatch <- table(as.character(batch(object)))
f0f8ba16
 	open(object)
d08acba1
 	if(any(samplesPerBatch < MIN.SAMPLES)){
453e688a
 		tmp <- paste(names(samplesPerBatch)[samplesPerBatch < MIN.SAMPLES], collapse=", ")
 		message("The following batches have fewer than ", MIN.SAMPLES, " samples: ",  tmp)
 		message("Not estimating copy number parameters for batch ", tmp)
b5b74b9f
 	}
66900fea
 	mylabel <- function(marker.type){
 		switch(marker.type,
 		       SNP="autosomal SNPs",
 		       NP="autosomal nonpolymorphic markers",
 		       X.SNP="chromosome X SNPs",
 		       X.NP="chromosome X nonpolymorphic markers")
 	}
783dc471
 	if(verbose) message("Computing summary statistics of the genotype clusters for each batch")
453e688a
 	I <- which(type %in% c("SNP", "NP", "X.NP"))
 	for(j in seq_along(I)){ ## do not do X.SNP.  Do this during fit.lm3
 		i <- I[j]
66900fea
 		marker.type <- type[i]
 		if(verbose) message(paste("...", mylabel(marker.type)))
 		##if(verbose) message(paste("Computing summary statistics for ", mylabel(marker.type), " genotype clusters for each batch")
453e688a
 		marker.index <- whichMarkers(marker.type, is.snp,
 					     is.autosome, is.annotated, is.X)
16fadf14
 		if(length(marker.index) == 0) next()
 		res <- genotypeSummary(object=object,
 				       GT.CONF.THR=GT.CONF.THR,
 				       type=marker.type,
 				       verbose=verbose,
 				       marker.index=marker.index)
 		if(!is.lds) {object <- res; rm(res); gc()}
66900fea
 	}
3e4db4b1
 	if(verbose) message("Imputing unobserved genotype medians and shrinking the variances (within-batch, across loci) ")##SNPs only
453e688a
 	if("SNP" %in% type){
 		marker.index <- whichMarkers("SNP", is.snp,
 					     is.autosome, is.annotated, is.X)
16fadf14
 		if(length(marker.index) > 0){
 			res <- shrinkSummary(object=object,
 					     MIN.OBS=MIN.OBS,
 					     MIN.SAMPLES=MIN.SAMPLES,
 					     DF.PRIOR=DF.PRIOR,
 					     type="SNP",
 					     verbose=verbose,
 					     marker.index=marker.index)
 			if(!is.lds) {object <- res; rm(res); gc()}
 		}
453e688a
 	}
66900fea
 	if(verbose) message("Estimating parameters for copy number")##SNPs only
e31c7794
 	if(fit.linearModel){
 		for(i in seq_along(type)){
 			marker.type <- type[i]
 			message(paste("...", mylabel(marker.type)))
 			CHR.X <- ifelse(marker.type %in% c("X.SNP", "X.NP"), TRUE, FALSE)
 			marker.index <- whichMarkers(marker.type, is.snp,
 						     is.autosome, is.annotated, is.X)
 			if(length(marker.index) == 0) next()
 			res <- estimateCnParameters(object=object,
 						    type=marker.type,
 						    SNRMin=SNRMin,
 						    DF.PRIOR=DF.PRIOR,
 						    GT.CONF.THR=GT.CONF.THR,
 						    MIN.SAMPLES=MIN.SAMPLES,
 						    MIN.OBS=MIN.OBS,
 						    MIN.NU=MIN.NU,
 						    MIN.PHI=MIN.PHI,
 						    THR.NU.PHI=THR.NU.PHI,
 						    verbose=verbose,
 						    marker.index=marker.index,
 						    is.lds=is.lds,
 						    CHR.X=CHR.X)
 		}
 		close(object)
 	}
16fadf14
 	if(is.lds) return(TRUE) else return(object)
66900fea
 }
e31c7794
 
6b75ec77
 crlmmCopynumber2 <- function(){
 	.Defunct(msg="The crlmmCopynumber2 function has been deprecated. The function crlmmCopynumber should be used instead.  crlmmCopynumber will support large data using ff provided that the ff package is loaded.")
 }
 crlmmCopynumberLD <- crlmmCopynumber2
 
66900fea
 
453e688a
 
66900fea
 estimateCnParameters <- function(object,
 				 type=c("SNP", "NP", "X.SNP", "X.NP"),
 				 verbose=TRUE,
 				 SNRMin=5,
 				 DF.PRIOR=50,
 				 GT.CONF.THR=0.95,
 				 MIN.SAMPLES=10,
 				 MIN.OBS=1,
 				 MIN.NU=8,
 				 MIN.PHI=8,
 				 THR.NU.PHI=TRUE,
 				 marker.index,
 				 CHR.X,
 				 is.lds){
 	if(missing(marker.index)){
 		batch <- batch(object)
 		is.snp <- isSnp(object)
 		is.autosome <- chromosome(object) < 23
 		is.annotated <- !is.na(chromosome(object))
 		is.X <- chromosome(object) == 23
 		is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
df2226f0
 		if(is.lds) stopifnot(isPackageLoaded("ff"))
66900fea
 		CHR.X <- ifelse(type[[1]] %in% c("X.SNP", "X.NP"), TRUE, FALSE)
 		marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
b5b74b9f
 	}
d08acba1
 	lmFxn <- function(type){
 		switch(type,
6846d583
 		       SNP="fit.lm1",
 		       NP="fit.lm2",
 		       X.SNP="fit.lm3",
 		       X.NP="fit.lm4")
b5b74b9f
 	}
a070e5bd
 	myfun <- lmFxn(type[[1]])
 	FUN <- get(myfun)
d08acba1
 	if(is.lds){
063b3d14
 		index.list <- splitIndicesByLength(marker.index, ocProbesets())
 ##		if(parStatus()){
 ##			index.list <- splitIndicesByNode(marker.index)
 ##		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
d08acba1
 		ocLapply(seq(along=index.list),
 			 FUN,
 			 index.list=index.list,
 			 marker.index=marker.index,
 			 object=object,
 			 batchSize=ocProbesets(),
 			 SNRMin=SNRMin,
 			 MIN.SAMPLES=MIN.SAMPLES,
 			 MIN.OBS=MIN.OBS,
 			 DF=DF.PRIOR,
 			 GT.CONF.THR=GT.CONF.THR,
 			 THR.NU.PHI=THR.NU.PHI,
 			 MIN.NU=MIN.NU,
 			 MIN.PHI=MIN.PHI,
 			 verbose=verbose,
6846d583
 			 is.lds=is.lds,
 			 CHR.X=CHR.X,
d08acba1
 			 neededPkgs="crlmm")
 	} else {
a070e5bd
 		##FUN <- match.fun(FUN)
6846d583
 		object <- FUN(strata=1,
 			      index.list=list(marker.index),
 			      marker.index=marker.index,
 			      object=object,
56b783ec
 			      batchSize=ocProbesets(),
6846d583
 			      SNRMin=SNRMin,
 			      MIN.SAMPLES=MIN.SAMPLES,
 			      MIN.OBS=MIN.OBS,
 			      DF.PRIOR=DF.PRIOR,
 			      GT.CONF.THR=GT.CONF.THR,
 			      THR.NU.PHI=THR.NU.PHI,
 			      MIN.NU=MIN.NU,
 			      MIN.PHI=MIN.PHI,
 			      is.lds=is.lds,
 			      CHR.X=CHR.X,
 			      verbose=verbose)
d08acba1
 	}
b5b74b9f
 	message("finished")
d08acba1
 	return(object)
b5b74b9f
 }
2fd64c70
 
 
 imputeAA.AB <- function(index, N.AA, N.AB, N.BB,
 			medianA.AA, medianA.AB, medianA.BB,
 			medianB.AA, medianB.AB, medianB.BB){
 	gt.to.impute <- 1:2
 	imputed <- rep(FALSE, length(index))
 	for(i in index){
 		Ns <- cbind(N.AA[i, ], N.AB[i, ], N.BB[i, ])
 		medianA <- cbind(medianA.AA[i, ], medianA.AB[i, ], medianA.BB[i, ])
 		medianB <- cbind(medianB.AA[i, ], medianB.AB[i, ], medianB.BB[i, ])
 		colnames(medianA) <- colnames(medianB) <- c("AA", "AB", "BB")
 
 		##Find batches with sufficient data
 		K <- which(rowSums(Ns < 3) == 0)
 		if(length(K) < 1) next() ## nothing we can do.  use information from other snps
 		X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
 		Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
 		tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
453e688a
 		if(is.null(tmp)) {
 			##cat(".");
 			next()
 		}
2fd64c70
 		## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
 		L <- which(rowSums(Ns < 3) == 2)
453e688a
 		if(length(L) == 0) next()
2fd64c70
 		Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
 		imputedVals <- Z %*% betahat
 		medianA.AA[i, L] <- imputedVals[, 1]
 		medianA.AB[i, L] <- imputedVals[, 2]
 		medianB.AA[i, L] <- imputedVals[, 3]
 		medianB.AB[i, L] <- imputedVals[, 4]
 		imputed[i] <- TRUE
 	}
 	return(list(medianA.AA=medianA.AA, medianA.AB=medianA.AB, medianA.BB=medianA.BB,
 		    medianB.AA=medianB.AA, medianB.AB=medianB.AB, medianB.BB=medianB.BB,
 		    imputed=imputed))
 }
 
 imputeAB.BB <- function(index, N.AA, N.AB, N.BB,
 			medianA.AA, medianA.AB, medianA.BB,
 			medianB.AA, medianB.AB, medianB.BB){
 	gt.to.impute <- 2:3
 	imputed <- rep(FALSE, length(index))
 	for(j in seq_along(index)){
 		i <- index[j]
 		Ns <- cbind(N.AA[i, ], N.AB[i, ], N.BB[i, ])
 		medianA <- cbind(medianA.AA[i, ], medianA.AB[i, ], medianA.BB[i, ])
 		medianB <- cbind(medianB.AA[i, ], medianB.AB[i, ], medianB.BB[i, ])
 		colnames(medianA) <- colnames(medianB) <- c("AA", "AB", "BB")
 
 		##Find batches with sufficient data
 		K <- which(rowSums(Ns < 3) == 0)
 		if(length(K) < 1) next() ## nothing we can do.  use information from other snps
 		X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
 		Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
 		tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
453e688a
 		if(is.null(tmp)) {
 ##			cat(".");
 			next()
 		}
2fd64c70
 
 		## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
 		L <- which(rowSums(Ns < 3) == 2)
453e688a
 		if(length(L) == 0) next()
2fd64c70
 		Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
 		imputedVals <- Z %*% betahat
 		medianA.AB[i, L] <- imputedVals[, 1]
 		medianA.BB[i, L] <- imputedVals[, 2]
 		medianB.AB[i, L] <- imputedVals[, 3]
 		medianB.BB[i, L] <- imputedVals[, 4]
 		imputed[j] <- TRUE
 	}
 	return(list(medianA.AA=medianA.AA, medianA.AB=medianA.AB, medianA.BB=medianA.BB,
 		    medianB.AA=medianB.AA, medianB.AB=medianB.AB, medianB.BB=medianB.BB,
 		    imputed=imputed))
 }
 
 imputeAA <- function(index, N.AA, N.AB, N.BB,
 		     medianA.AA, medianA.AB, medianA.BB,
 		     medianB.AA, medianB.AB, medianB.BB){
 	gt.to.impute <- 1
 	imputed <- rep(FALSE, length(index))
 	for(j in seq_along(index)){
 		i <- index[j]
 		Ns <- cbind(N.AA[i, ], N.AB[i, ], N.BB[i, ])
 		medianA <- cbind(medianA.AA[i, ], medianA.AB[i, ], medianA.BB[i, ])
 		medianB <- cbind(medianB.AA[i, ], medianB.AB[i, ], medianB.BB[i, ])
 		colnames(medianA) <- colnames(medianB) <- c("AA", "AB", "BB")
 
 		##Find batches with sufficient data
 		K <- which(rowSums(Ns < 3) == 0)
 		if(length(K) < 1) next() ## nothing we can do.  use information from other snps
 		X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
 		Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
 		tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
453e688a
 		if(is.null(tmp)) {
 			##cat(".");
 			next()
 		}
2fd64c70
 		## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
 		L <- which(rowSums(Ns < 3) == 1)
453e688a
 		if(length(L) == 0) next()
2fd64c70
 		Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
 		imputedVals <- Z %*% betahat
 		medianA.AA[i, L] <- imputedVals[, 1]
 		medianB.AA[i, L] <- imputedVals[, 2]
 		imputed[j] <- TRUE
 	}
 	return(list(medianA.AA=medianA.AA, medianA.AB=medianA.AB, medianA.BB=medianA.BB,
 		    medianB.AA=medianB.AA, medianB.AB=medianB.AB, medianB.BB=medianB.BB,
 		    imputed=imputed))
 }
 
 imputeBB <- function(index, N.AA, N.AB, N.BB,
 		     medianA.AA, medianA.AB, medianA.BB,
 		     medianB.AA, medianB.AB, medianB.BB){
 	gt.to.impute <- 3
 	imputed <- rep(FALSE, length(index))
 	for(j in seq_along(index)){
 		i <- index[j]
 		Ns <- cbind(N.AA[i, ], N.AB[i, ], N.BB[i, ])
 		medianA <- cbind(medianA.AA[i, ], medianA.AB[i, ], medianA.BB[i, ])
 		medianB <- cbind(medianB.AA[i, ], medianB.AB[i, ], medianB.BB[i, ])
 		colnames(medianA) <- colnames(medianB) <- c("AA", "AB", "BB")
 
 		##Find batches with sufficient data
 		K <- which(rowSums(Ns < 3) == 0)
 		if(length(K) < 1) next() ## nothing we can do.  use information from other snps
 		X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
 		Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
453e688a
 		colnames(Y) <- c("A.BB", "B.BB")
2fd64c70
 		tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
453e688a
 		if(is.null(tmp)) {
 			##cat(".");
 			next()
 		}
 ##		else{
 ##			R <- Y-crossprod(X, betahat)
 ##			RSS <- t(R)%*%R
 ##		}
2fd64c70
 		## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
 		L <- which(rowSums(Ns < 3) == 1)
453e688a
 		if(length(L) == 0) next()
2fd64c70
 		Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
 		imputedVals <- Z %*% betahat
 		medianA.BB[i, L] <- imputedVals[, 1]
 		medianB.BB[i, L] <- imputedVals[, 2]
 		imputed[j] <- TRUE
 	}
 	return(list(medianA.AA=medianA.AA, medianA.AB=medianA.AB, medianA.BB=medianA.BB,
 		    medianB.AA=medianB.AA, medianB.AB=medianB.AB, medianB.BB=medianB.BB,
 		    imputed=imputed))
 }
 
 imputeAcrossBatch <- function(N.AA, N.AB, N.BB,
 			      medianA.AA, medianA.AB, medianA.BB,
 			      medianB.AA, medianB.AB, medianB.BB){
 	N.missing <- (N.AA < 3) + (N.AB < 3) + (N.BB < 3)
 	## find all indices in which one or more batches need to have 2 genotypes imputed
 	missingAA.AB <- (N.AA < 3) & (N.AB < 3)
 	missingAB.BB <- (N.AB < 3) & (N.BB < 3)
 	missingAA <- (N.AA < 3) & (N.AB >= 3)
 	missingBB <- (N.BB < 3) & (N.AB >= 3)
 	index <- list(AA.AB=which(rowSums(missingAA.AB) > 0),
 		      AB.BB=which(rowSums(missingAB.BB) > 0),
 		      AA=which(rowSums(missingAA) > 0),
 		      BB=which(rowSums(missingBB) > 0))
 	imputeNone <- which(rowSums(N.missing == 0) > 0)
 	## only works if there are batches with complete data
 	index <- lapply(index, intersect, y=imputeNone)
 	##indices.to.update <- rep(1:4, each=sapply(index, length))
 	updated <- vector("list", 4)
 	names(updated) <- c("AA.AB", "AB.BB", "AA", "BB")
 	if(length(index[["AA.AB"]] > 0)){
 		res <- imputeAA.AB(index[["AA.AB"]],
453e688a
 				   N.AA,
 				   N.AB,
 				   N.BB,
 				   medianA.AA,
 				   medianA.AB,
 				   medianA.BB,
 				   medianB.AA, medianB.AB, medianB.BB)
2fd64c70
 		updated$AA.AB <- res$imputed
 	}
 	if(length(index[["AB.BB"]] > 0)){
 		res <- imputeAB.BB(index[["AB.BB"]],
 				   N.AA,
 				   N.AB,
 				   N.BB,
 				   res[["medianA.AA"]],
 				   res[["medianA.AB"]],
 				   res[["medianA.BB"]],
453e688a
 				   res[["medianB.AA"]],
 				   res[["medianB.AB"]],
 				   res[["medianB.BB"]])
2fd64c70
 		updated$AB.BB <- res$imputed
 	}
 	if(length(index[["AA"]] > 0)){
 		res <- imputeAA(index[["AA"]],
 				N.AA,
 				N.AB,
 				N.BB,
 				res[["medianA.AA"]],
 				res[["medianA.AB"]],
 				res[["medianA.BB"]],
 				res[["medianB.AA"]], res[["medianB.AB"]], res[["medianB.BB"]])
 		updated$AA <- res$imputed
 	}
 	if(length(index[["BB"]] > 0)){
 		res <- imputeBB(index[["BB"]],
 				N.AA,
 				N.AB,
 				N.BB,
 				res[["medianA.AA"]],
 				res[["medianA.AB"]],
 				res[["medianA.BB"]],
453e688a
 				res[["medianB.AA"]],
 				res[["medianB.AB"]],
 				res[["medianB.BB"]])
2fd64c70
 		updated$BB <- res$imputed
 	}
 	updated.indices <- unlist(updated)
453e688a
 	return(list(res, updated))
 }
 
86e551f4
 
e516fadb
 ##calculatePosteriorMean <- function(object, type=c("SNP", "NP", "X.SNP", "X.NP"), verbose=TRUE,
 ##				   prior.prob=c(1/7, 1/7, 3/7, 1/7, 1/7),
 ##				   CN=0:4, scale.sd=1){
 ##	stopifnot(type %in% c("SNP", "NP", "X.SNP", "X.NP"))
 ##	stopifnot(sum(prior.prob)==1)
 ##	stopifnot(length(CN) == length(prior.prob))
 ##	batch <- batch(object)
 ##	is.snp <- isSnp(object)
 ##	is.autosome <- chromosome(object) < 23
 ##	is.annotated <- !is.na(chromosome(object))
 ##	is.X <- chromosome(object) == 23
 ##	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
 ##	if(is.lds) stopifnot(isPackageLoaded("ff"))
 ##	samplesPerBatch <- table(as.character(batch(object)))
 ##	if(!"posteriorMean" %in% assayDataElementNames(object)){
 ##		message("adding <posteriorMean> slot to assayData")
 ##		pM <- matrix(NA, nrow(object), ncol(object), dimnames=list(featureNames(object), sampleNames(object)))
 ##		tmp <- assayDataNew(alleleA=A(object),
 ##				    alleleB=B(object),
 ##				    call=calls(object),
 ##				    callProbability=snpCallProbability(object),
 ##				    posteriorMean=pM)
 ##		assayData(object) <- tmp
 ##	}
 ##	## add new assay data element for posterior probabilities
 ##	mylabel <- function(marker.type){
 ##		switch(marker.type,
 ##		       SNP="autosomal SNPs",
 ##		       NP="autosomal nonpolymorphic markers",
 ##		       X.SNP="chromosome X SNPs",
 ##		       X.NP="chromosome X nonpolymorphic markers")
 ##	}
 ##	if(type=="SNP"){
 ##		if(verbose) message(paste("...", mylabel(type)))
 ##		marker.index <- whichMarkers("SNP",
 ##					     is.snp,
 ##					     is.autosome,
 ##					     is.annotated,
 ##					     is.X)
 ##		marker.list <- splitIndicesByLength(marker.index, ocProbesets())
 ##		emit <- ocLapply(seq_along(marker.list),
 ##				 posteriorMean.snp,
 ##				 object=object,
 ##				 index.list=marker.list,
 ##				 verbose=verbose,
 ##				 prior.prob=prior.prob,
 ##				 CN=CN,
 ##				 scale.sd=scale.sd)
 ##		#for(i in seq_along(marker.list)){
 ##		#index <- marker.list[[i]]
 ##
 ##	#}
 ##	} else stop("type not available")
 ##	if(length(emit)==1) emit <- emit[[1]] else {
 ##		state.names <- dimnames(emit[[1]])[[3]]
 ##		tmp <- array(NA, dim=c(length(unlist(marker.list)), ncol(object), length(prior.prob)))
 ##		for(i in seq_along(marker.list)){
 ##			marker.index <- marker.list[[i]]
 ##			tmp[marker.index, , ] <- emit[[i]]
 ##		}
 ##		emit <- tmp
 ##		dimnames(emit) <- list(featureNames(object)[unlist(marker.list)],
 ##				       sampleNames(object),
 ##				       state.names)
 ##	}#stop("need to rbind elements of emit list?")
 ##	##tmp <- do.call("rbind", emit)
 ##	match.index <- match(rownames(emit), featureNames(object))
 ##	S <- length(prior.prob)
 ##	pM <- matrix(0, length(match.index), ncol(object), dimnames=list(featureNames(object)[match.index], sampleNames(object)))
 ##	for(i in 1:S) pM <- pM + CN[i]*emit[, , i]
 ##	posteriorMean(object)[match.index, ] <- pM
 ##	return(object)
 ##}
453e688a
 
b03dd092
 .open <- function(object){
453e688a
 		open(B(object))
 		open(tau2A.AA(object))
 		open(tau2B.BB(object))
 		open(tau2A.BB(object))
 		open(tau2B.AA(object))
 		open(corrAA(object))
 		open(corrAB(object))
 		open(corrBB(object))
 		open(nuA(object))
 		open(nuB(object))
 		open(phiA(object))
 		open(phiB(object))
b03dd092
 }
 .close <- function(object){
 	close(A(object))
 	close(B(object))
 	close(tau2A.AA(object))
 	close(tau2B.BB(object))
 	close(tau2A.BB(object))
 	close(tau2B.AA(object))
 	close(corrAA(object))
 	close(corrAB(object))
 	close(corrBB(object))
 	close(nuA(object))
 	close(nuB(object))
 	close(phiA(object))
 	close(phiB(object))
 }
 
 sum.mymatrix <- function(...){
 	x <- list(...)
 	return(x[[1]] + do.call(sum, x[-1]))
 }
 numberGenotypes <- function(CT){
 	stopifnot(length(CT)==1)
 	copynumber <- paste("cn", CT, sep="")
 	switch(copynumber,
 		cn0=1,
 		cn1=2,
 		cn2=3,
 		cn3=4,
 		cn4=4,
 		cn5=6, NULL)
 }
 
64c14dfc
 .posterior <- function(CT,
 		       tau2A,
 		       tau2B,
 		       sig2A,
 		       sig2B,
39257387
 		       nuA,
 		       nuB,
 		       phiA,
 		       phiB,
 		       corrAA,
 		       corrAB,
 		       corrBB,
64c14dfc
 		       a,
39257387
 		       b,
 		       scale.sd){
64c14dfc
 	## 5: AAAAA, AAAAB, AAABB, AABBB, ABBBB, BBBBB
 	##CN=4
 	## AAAA, AAAB, AABB, ABBB, BBBB:  L = 4
 	##CN=3
 	##  AAA, AAB, ABB, BBB ; L = 4
 	## CN=2
 	##  AA, AB, BB; L=3
 	## CN = 1: A, B; L=2
 	## CN = 0: null; L=1
 	L <- numberGenotypes(CT)
 	stopifnot(!is.null(L))
 	f.x.y <- vector("list", L)
 	for(i in seq_along(f.x.y)){
 		CA <- (0:CT)[i]
 		CB <- CT-CA
 		A.scale <- sqrt(tau2A*(CA==0) + sig2A*(CA > 0))
 		B.scale <- sqrt(tau2B*(CB==0) + sig2B*(CB > 0))
 		if(CA == 0 | CB == 0){## possibly homozygous BB and AA, respectively
 			A.scale <- A.scale*scale.sd[1]
 			B.scale <- B.scale*scale.sd[1]
 		} else { ## one or both greater than zero
 			if(length(scale.sd) == 1) scale.sd <- rep(scale.sd, 2)
 			A.scale <- A.scale*scale.sd[2]
 			B.scale <- B.scale*scale.sd[2]
 		}
 		A.scale <- as.numeric(A.scale)
 		B.scale <- as.numeric(B.scale)
 		A.scale2 <- A.scale^2
 		B.scale2 <- B.scale^2
 		if(CA == 0 & CB == 0) rho <- 0
 		if(CA == 0 & CB > 0) rho <- corrBB
 		if(CA > 0 & CB == 0) rho <- corrAA
 		if(CA > 0 & CB > 0) rho <- corrAB
 		rho <- as.numeric(rho)
 		meanA <- as.numeric(log2(nuA+CA*phiA))
 		meanB <- as.numeric(log2(nuB+CB*phiB))
 		covs <- rho*A.scale*B.scale
 		##x <- a[, J]
 		x <- a
 		y <- b
 		Q.x.y <- 1/(1-rho^2)*((x - meanA)^2/A.scale2 + (y - meanB)^2/B.scale2 - (2*rho*((x - meanA)*(y - meanB)))/(A.scale*B.scale))
 		f.x.y[[i]] <- 1/(2*pi*A.scale*B.scale*sqrt(1-rho^2))*exp(-0.5*Q.x.y)
 		class(f.x.y[[i]]) <- "mymatrix"
 	}
 	res <- do.call("sum", f.x.y)
 	return(res)
 }
 
e516fadb
 
 
 
 
 genotypeCorrelation <- function(genotype){
 	switch(genotype,
 	       NULL="NULL",
 	       A="AA",
 	       B="BB",
 	       AA="AA",
 	       AB="AB",
 	       BB="BB",
 	       AAA="AA",
 	       AAB="AB",
 	       ABB="AB",
 	       BBB="BB",
 	       AAAA="AA",
 	       AAAB="AB",
 	       AABB="AB",
 	       ABBB="AB",
 	       BBBB="BB")
 }
 
b03dd092
 posteriorMean.snp <- function(stratum, object, index.list, CN,
 			      prior.prob=c(1/7, 1/7, 3/7, 1/7, 1/7), is.lds=TRUE, verbose=TRUE,
 			      scale.sd=1){
 	if(length(scale.sd) == 1) rep(scale.sd,2)
 	if(verbose) message("Probe stratum ", stratum, " of ", length(index.list))
 	index <- index.list[[stratum]]
 	test <- tryCatch(open(A(object)), error=function(e) NULL)
39257387
 	if(!is.null(test)) invisible(.open(object))
453e688a
 	a <- log2(as.matrix(A(object)[index, ]))
 	b <- log2(as.matrix(B(object)[index, ]))
c75c54c0
 	NN <- Ns(object, i=index)[, , 1]
453e688a
 	sig2A <- as.matrix(tau2A.AA(object)[index,])
 	sig2B <- as.matrix(tau2B.BB(object)[index,])
 	tau2A <- as.matrix(tau2A.BB(object)[index,])
 	tau2B <- as.matrix(tau2B.AA(object)[index,])
 	corrAA <- as.matrix(corrAA(object)[index, ])
 	corrAB <- as.matrix(corrAB(object)[index, ])
 	corrBB <- as.matrix(corrBB(object)[index, ])
 	nuA <- as.matrix(nuA(object)[index, ])
 	phiA <- as.matrix(phiA(object)[index, ])
 	nuB <- as.matrix(nuB(object)[index, ])
 	phiB <- as.matrix(phiB(object)[index, ])
39257387
 	if(!is.null(test)) invisible(.close(object))
c75c54c0
 	S <- length(prior.prob)
 	emit <- array(NA, dim=c(nrow(a), ncol(a), S))##SNPs x sample x 'truth'
453e688a
 	sample.index <- split(1:ncol(object), batch(object))
dd44a16e
 	##emit <- vector("list", length(sample.index))
 	for(j in seq_along(sample.index)){
 		cat("batch ", j, "\n")
 		J <- sample.index[[j]]
 		probs <- array(NA, dim=c(nrow(a), length(J), S))
 		for(k in seq_along(CN)){
64c14dfc
 			##CT <- CN[k]
39257387
 			probs[, , k] <- .posterior(CT=CN[k],
 						   tau2A=tau2A,
 						   tau2B=tau2B,
 						   sig2A=sig2A,
 						   sig2B=sig2B,
 						   nuA=nuA,
 						   nuB=nuB,
 						   phiA=phiA,
 						   phiB=phiB,
 						   corrAA=corrAA,
 						   corrAB=corrAB,
 						   corrBB=corrBB,
 						   a=a[, J],
 						   b=b[, J],
 						   scale.sd=scale.sd)
64c14dfc
 			##probs[, , k] <- do.call("sum", f.x.y)
453e688a
 			##if none of the states are likely (outlier), assign NA
 			##		emit[, , counter] <- f.x.y
 			##		counter <- counter+1
 		}
 		emit[, J, ] <- probs
 	}
c75c54c0
 	for(i in 1:S) emit[, , i] <- emit[, , i]*prior.prob[i]
453e688a
 	total <- matrix(0, nrow(emit), ncol(emit))
c75c54c0
 	for(i in 1:S) total <- total+emit[, , i]
453e688a
 	## how to handle outliers...
 	##  - use priors (posterior mean will likely be near 2).  smoothing needs to take into account the uncertainty
 	##  - need uncertainty estimates for posterior means
c75c54c0
 	for(i in 1:S) emit[, , i] <- emit[, , i]/total
aa4a7e0b
 	dimnames(emit) <- list(featureNames(object)[index], sampleNames(object), paste("states", 1:S, sep="_"))
c75c54c0
 	## for one marker/one sample, the emission probs must sum to 1
453e688a
 	return(emit)
 }
 
 
 
 
 
5b19b0f1
 ## used for testing
 ##crlmm2.2 <- function(filenames, row.names=TRUE, col.names=TRUE,
 ##                   probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
 ##                   save.it=FALSE, load.it=FALSE, intensityFile,
 ##                   mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
 ##                   cdfName, sns, recallMin=10, recallRegMin=1000,
 ##                   returnParams=FALSE, badSNP=.7){
 ##  if ((load.it || save.it) && missing(intensityFile))
 ##    stop("'intensityFile' is missing, and you chose either load.it or save.it")
 ##  if (missing(sns)) sns <- basename(filenames)
 ##  if (!missing(intensityFile))
 ##    if (load.it & !file.exists(intensityFile)){
 ##      load.it <- FALSE
 ##      message("File ", intensityFile, " does not exist.")
 ##      message("Not loading it, but running SNPRMA from scratch.")
 ##    }
 ##  if (!load.it){
 ##    res <- snprma2(filenames, fitMixture=TRUE,
 ##                   mixtureSampleSize=mixtureSampleSize, verbose=verbose,
 ##                   eps=eps, cdfName=cdfName, sns=sns)
 ##    open(res[["A"]])
 ##    open(res[["B"]])
 ##    open(res[["SNR"]])
 ##    open(res[["mixtureParams"]])
 ##    if(save.it){
 ##      t0 <- proc.time()
 ##      save(res, file=intensityFile)
 ##      t0 <- proc.time()-t0
 ##      if (verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".")
 ##    }
 ##  }else{
 ##    if (verbose) message("Loading ", intensityFile, ".")
 ##    obj <- load(intensityFile)
 ##    if (verbose) message("Done.")
 ##    if (obj != "res")
 ##      stop("Object in ", intensityFile, " seems to be invalid.")
 ##  }
 ##  if(row.names) row.names=res$gns else row.names=NULL
 ##  if(col.names) col.names=res$sns else col.names=NULL
 ##  res2 <- crlmmGT2(res[["A"]], res[["B"]], res[["SNR"]],
 ##                   res[["mixtureParams"]], res[["cdfName"]],
 ##                   gender=gender, row.names=row.names,
 ##                   col.names=col.names, recallMin=recallMin,
 ##                   recallRegMin=1000, SNRMin=SNRMin,
 ##                   returnParams=returnParams, badSNP=badSNP,
 ##                   verbose=verbose)
 ##
 ##  res2[["SNR"]] <- res[["SNR"]]
 ##  res2[["SKW"]] <- res[["SKW"]]
 ##  return(list2SnpSet(res2, returnParams=returnParams))
 ##}
e516fadb
 
8be0a109
 genotypes <- function(copyNumber, is.snp=TRUE){
e516fadb
 	stopifnot(copyNumber %in% 0:4)
 	cn <- paste("x", copyNumber, sep="")
8be0a109
 	if(is.snp){
 		res <- switch(cn,
 			      x0="NULL",
 			      x1=LETTERS[1:2],
 			      x2=c("AA", "AB", "BB"),
 			      x3=c("AAA", "AAB", "ABB", "BBB"),
 			      x4=c("AAAA", "AAAB", "AABB", "ABBB", "BBBB"))
 	} else {
 		res <- switch(cn,
 			      x0="NULL",
 			      x1="A",
 			      x2="AA",
 			      x3="AAA",
 			      x4="AAAA")
 	}
 	return(res)
e516fadb
 }
 
 dbvn <- function(x, mu, Sigma){
 	##stopifnot(ncol(x)==2)
96fb65cc
 	logA <- x[, , 1]
 	logB <- x[, , 2]
e516fadb
 	sigma2A <- Sigma[,1]
 	sigma2B <- Sigma[,3]
 	sigmaAB <- sqrt(sigma2A*sigma2B)
 	rho <- Sigma[,2]
96fb65cc
 	muA <- mu[, 1]
 	muB <- mu[, 2]
e516fadb
 	Q.x.y <- 1/(1-rho^2)*((logA - muA)^2/sigma2A + (logB - muB)^2/sigma2B - (2*rho*((logA - muA)*(logB - muB)))/sigmaAB)
 	f.x.y <- 1/(2*pi*sigmaAB*sqrt(1-rho^2))*exp(-0.5*Q.x.y)
 }
96fb65cc
 
 ABpanel <- function(x, y, predictRegion,
 		    copyNumber=0:4,
004788fa
 		    fill,
96fb65cc
 		    ...,
 		    subscripts){
 	panel.grid(h=5, v=5)
004788fa
 	if(!missing(fill)){
 		panel.xyplot(x, y, fill=fill[subscripts], ...)
 	} else {
 		panel.xyplot(x, y, ...)
 	}
 	pn <- panel.number()
32b8047e
 	if(!is.null(copyNumber)){
 		for(CN in copyNumber){
 			gts <- genotypes(CN)
 			index <- match(gts, names(predictRegion))
 			pr <- predictRegion[index]
 			for(i in seq_along(pr)){ ## for each genotype
 				## scale?
 				pr2 <- pr[[i]]
 				mu <- pr2$mu[pn, , , drop=FALSE] ## pn=panel number
 				Sigma <- pr2$cov[pn, , ,drop=FALSE]
 				for(j in seq_len(dim(mu)[3])){  ## for each batch
 					dat.ellipse <- ellipse(x=Sigma[, 2, j],
 							       centre=mu[ , , j],
 							       scale=c(sqrt(Sigma[ , 1, j]),
 							       sqrt(Sigma[, 3, j])))
 					lpolygon(dat.ellipse[,1], dat.ellipse[,2], ...)
 				}
96fb65cc
 			}
 		}
 	}
 }
dd7ad7b9
 
063b3d14
 calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
 			    batch.name,
 			    feature.index){
 	index.np <- which(!(isSnp(object)[feature.index]))
 	j <- match(batch.name, batchNames(object))
 	if(missing(j)) stop("batch.name did not match in batchNames(object)")
 	CHR <- unique(chromosome(object)[feature.index])
 	isX <- CHR == "X"
 	centers <- getMedians(batchStatistics(object))
 	lapply(centers, open)
 	centersMatrix <- lapply(centers, function(x, i, j) x[i, j], j=j, i=feature.index)
 	lapply(centers, close)
 	centersMatrix <- do.call(cbind, centersMatrix)
808cfecc
 	centersA <- centersMatrix[, 1:3, drop=FALSE]
 	centersB <- centersMatrix[, 4:6, drop=FALSE]
063b3d14
 	rm(centers, centersMatrix); gc()
 	centersA[centersA < 64] <- 64
 	centersB[centersB < 64] <- 64
808cfecc
 	theta <- as.matrix(atan2(centersB, centersA) * 2/pi)
063b3d14
 	centersA[is.na(centersA)] <- 0
 	centersB[is.na(centersB)] <- 0
 	if(length(index.np) > 0) theta[index.np, ] <- 1
 	##
 	r <- centersA+centersB
 	J <- which(batch(object)==batch.name)
 	open(A(object))
 	open(B(object))
 	a <- as.matrix(A(object)[feature.index, J, drop=FALSE])
 	b <- as.matrix(B(object)[feature.index, J, drop=FALSE])
 	close(A(object))
 	close(B(object))
 	if(length(index.np) > 0) b[index.np, ] <- 0L
 	obs.theta <- atan2(b, a)*2/pi
 	calculateBandR <- function(o, r, theta, not.na, M){
 		lessAA <- o < theta[, 1]
 		lessAB <- o < theta[, 2]
 		lessBB <- o < theta[, 3]
 		ii <- which(lessAA & not.na)
 		jj <- which(!lessBB & not.na)
 		i <- which(!lessAA & lessAB & not.na)
 		j <- which(!lessAB & lessBB & not.na)
 		M[i, 1] <- 0.5*(o[i]-theta[i,1])/(theta[i,2]-theta[i,1])
 		M[j, 1] <- 0.5*(o[j]-theta[j,2])/(theta[j,3]-theta[j,2]) + 0.5
 		M[ii, 1] <- 0
 		M[jj, 1] <- 1
 		M[i, 2] <- (o[i]-theta[i,1])*(r[i,2]-r[i,1])/(theta[i,2]-theta[i,1]) + r[i, 1]
 		M[j, 2] <- (o[j]-theta[j,2])*(r[j,3]-r[j,2])/(theta[j,3]-theta[j,2]) + r[j, 2]
 		M[ii, 2] <- r[ii, 1]
 		M[jj, 2] <- r[jj, 3]
 		return(M)
 	}
 	not.na <- !is.na(theta[,1])
 	r.expected <- bf <- matrix(NA, length(feature.index), ncol(a))
 	M <- matrix(NA, length(feature.index), 2)
 	for(j in seq_len(ncol(a))){
 		tmp <- calculateBandR(obs.theta[, j], r=r, theta=theta, not.na=not.na, M=M)
 		bf[, j] <- tmp[,1]
 		r.expected[,j] <- tmp[,2]
 	}
 	bf[bf < 0] <- 0
 	bf[bf > 1] <- 1
 	if(length(index.np) > 0) r.expected[index.np, ] <- centersA[index.np, 1]
 	obs.r <- a+b
 	lrr <- log2(obs.r/r.expected)
 	dimnames(bf) <- dimnames(lrr) <- list(featureNames(object)[feature.index],
 					      sampleNames(object)[J])
 	bf <- integerMatrix(bf, 1000)
 	lrr <- integerMatrix(lrr, 100)
 	return(list(baf=bf, lrr=lrr))
dd7ad7b9
 }
808cfecc