16fadf14 |
NC <- ncol(A)
##
if(verbose) message("Loading annotations.")
obj1 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
obj2 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
## this is toget rid of the 'no visible binding' notes
## variable definitions
XIndex <- getVarInEnv("XIndex")
autosomeIndex <- getVarInEnv("autosomeIndex")
YIndex <- getVarInEnv("YIndex")
SMEDIAN <- getVarInEnv("SMEDIAN")
theKnots <- getVarInEnv("theKnots")
regionInfo <- getVarInEnv("regionInfo")
params <- getVarInEnv("params")
rm(list=c(obj1, obj2), envir=.crlmmPkgEnv)
rm(obj1, obj2)
##
## IF gender not provide, we predict
## FIXME: XIndex may be greater than ocProbesets()
if(is.null(gender)){
if(verbose) message("Determining gender.")
## XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2
XMedian <- ocLapply(splitIndicesByNode(1:NC), predictGender, theA=A, theB=B, XIndex=XIndex, neededPkgs="crlmm")
XMedian <- unlist(XMedian)
if(sum(SNR[] > SNRMin)==1){
gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5)))
}else{
gender <- kmeans(XMedian, c(min(XMedian[SNR[]>SNRMin]), max(XMedian[SNR[]>SNRMin])))[["cluster"]]
}
}
##
Indexes <- list(autosomeIndex, XIndex, YIndex)
cIndexes <- list(keepIndex,
keepIndex[which(gender[keepIndex]==2)],
keepIndex[which(gender[keepIndex]==1)])
## call C
fIndex <- which(gender==2)
mIndex <- which(gender==1)
## different here
## use gtypeCallerR in batches
##snpBatches <- splitIndicesByLength(1:nrow(A), ocProbesets())
newparamsBatch <- vector("list", length(snpBatches))
process1 <- function(idxBatch, snpBatches, autosomeIndex, XIndex,
YIndex, A, B, mixtureParams, fIndex, mIndex,
params, cIndexes, SMEDIAN, theKnots, DF, probs, batchSize){
open(A)
open(B)
open(mixtureParams)
snps <- snpBatches[[idxBatch]]
rSnps <- range(snps)
last <- (idxBatch-1)*batchSize
IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last,
XIndex[XIndex %in% snps]-last,
YIndex[YIndex %in% snps]-last)
IndexesBatch <- lapply(IndexesBatch, as.integer)
tmpA <- as.matrix(A[snps,])
tmpB <- as.matrix(B[snps,])
## newparamsBatch[[idxBatch]]
tmp <- gtypeCallerR(tmpA, tmpB, fIndex, mIndex,
params[["centers"]][snps,],
params[["scales"]][snps,],
params[["N"]][snps,],
IndexesBatch, cIndexes,
sapply(IndexesBatch, length),
sapply(cIndexes, length), SMEDIAN,
theKnots, mixtureParams[], DF, probs, 0.025)
rm(snps, rSnps, IndexesBatch, tmpA, tmpB, last)
gc(verbose=FALSE)
close(A)
close(B)
close(mixtureParams)
tmp
}
##
|
16fadf14 |
newparamsBatch <- ocLapply(seq(along=snpBatches), process1,
snpBatches=snpBatches,
autosomeIndex=autosomeIndex, XIndex=XIndex,
YIndex=YIndex, A=A, B=B,
mixtureParams=mixtureParams, fIndex=fIndex,
mIndex=mIndex, params=params,
cIndexes=cIndexes, SMEDIAN=SMEDIAN,
theKnots=theKnots, DF=DF, probs=probs,
batchSize=ocProbesets())
newparams <- vector("list", 3)
names(newparams) <- c("centers", "scales", "N")
newparams[["centers"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 1))
newparams[["scales"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 2))
newparams[["N"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 3))
rm(newparamsBatch); gc(verbose=FALSE)
if(verbose) message("Done.")
if(verbose) message("Estimating recalibration parameters.")
d <- newparams[["centers"]] - params$centers
##
##regression
Index <- intersect(which(pmin(newparams[["N"]][, 1],
newparams[["N"]][, 2],
newparams[["N"]][, 3]) > recallMin &
!apply(regionInfo, 1, any)),
autosomeIndex)
if(length(Index) < recallRegMin){
warning("Recalibration not possible. Possible cause: small sample size.")
newparams <- params
dev <- vector("numeric", nrow(newparams[["centers"]]))
SS <- matrix(Inf, 3, 3)
DD <- 0
}else{
data4reg <- as.data.frame(newparams[["centers"]][Index,])
names(data4reg) <- c("AA", "AB", "BB")
regParams <- cbind( coef(lm(AA~AB*BB, data=data4reg)),
c(coef(lm(AB~AA+BB, data=data4reg)), 0),
coef(lm(BB~AA*AB, data=data4reg)))
rownames(regParams) <- c("intercept", "X", "Y", "XY")
rm(data4reg)
##
minN <- 3
newparams[["centers"]][newparams[["N"]] < minN] <- NA
Index <- setdiff(which(rowSums(is.na(newparams[["centers"]]))==1), YIndex)
if(verbose) message("Filling out empty centers", appendLF=FALSE)
for(i in Index){
if(verbose) if(i%%10000==0) message(".", appendLF=FALSE)
mu <- newparams[["centers"]][i, ]
j <- which(is.na(mu))
newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j]
rm(mu, j)
}
##
##remaing NAs are made like originals
if(length(YIndex)>0){
noMoveIndex <- union(setdiff(which(rowSums(is.na(newparams[["centers"]]))>0), YIndex),
YIndex[rowSums(is.na(newparams[["centers"]][YIndex, ])>1)])
}
snps2ignore <- which(rowSums(is.na(newparams[["centers"]])) > 0)
snps2keep <- setdiff(autosomeIndex, snps2ignore)
rm(snps2ignore)
newparams[["centers"]][is.na(newparams[["centers"]])] <- params[["centers"]][is.na(newparams[["centers"]])]
if(verbose) cat("\n")
##
if(verbose) message("Calculating and standardizing size of shift... ", appendLF=FALSE)
GG <- DD <- newparams[["centers"]] - params[["centers"]]
DD <- sweep(DD, 2, colMeans(DD[autosomeIndex, ]))
SS <- cov(DD[autosomeIndex, ])
SSI <- solve(SS)
dev <- vector("numeric", nrow(DD))
if(length(YIndex)){
dev[-YIndex] <- apply(DD[-YIndex, ], 1, function(x) x%*%SSI%*%x)
dev[-YIndex] <- 1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev[-YIndex])
##Now Y (only two params)
SSY <- SS[c(1, 3), c(1, 3)]
SSI <- solve(SSY)
dev[YIndex] <- apply(DD[YIndex, c(1, 3)], 1, function(x) x%*%SSI%*%x)
dev[YIndex] <- 1/sqrt( (2*pi)^2*det(SSY))*exp(-0.5*dev[YIndex])
} else {
dev=apply(DD,1,function(x) x%*%SSI%*%x)
dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev)
}
}
if (verbose) message("OK")
##
## BC: must keep SD
params[-2] <- newparams[-2]
rm(newparams)
gc(verbose=FALSE)
##
if(verbose) message("Calling ", NR, " SNPs... ", appendLF=FALSE)
##
## ###################
## ## MOVE TO C#######
##
## running in batches
process2 <- function(idxBatch, snpBatches, autosomeIndex, XIndex,
YIndex, A, B, mixtureParams, fIndex, mIndex,
params, cIndexes, SMEDIAN, theKnots, DF, probs,
regionInfo, batchSize){
open(A)
open(B)
open(mixtureParams)
snps <- snpBatches[[idxBatch]]
tmpA <- as.matrix(A[snps,])
tmpB <- as.matrix(B[snps,])
rSnps <- range(snps)
last <- (idxBatch-1)*batchSize
IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last,
XIndex[XIndex %in% snps]-last,
YIndex[YIndex %in% snps]-last)
IndexesBatch <- lapply(IndexesBatch, as.integer)
ImNull <- gtypeCallerR2(tmpA, tmpB, fIndex, mIndex,
params[["centers"]][snps,],
params[["scales"]][snps,],
params[["N"]][snps,],
IndexesBatch, cIndexes,
sapply(IndexesBatch, length),
sapply(cIndexes, length),
SMEDIAN, theKnots, mixtureParams[],
DF, probs, 0.025,
which(regionInfo[snps, 2]),
which(regionInfo[snps, 1]))
A[snps,] <- tmpA
B[snps,] <- tmpB
rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull, last)
gc(verbose=FALSE)
close(A)
close(B)
close(mixtureParams)
}
##
ocLapply(seq(along=snpBatches), process2, snpBatches=snpBatches,
autosomeIndex=autosomeIndex, XIndex=XIndex, YIndex=YIndex,
A=A, B=B, mixtureParams=mixtureParams, fIndex=fIndex,
mIndex=mIndex, params=params, cIndexes=cIndexes,
SMEDIAN=SMEDIAN, theKnots=theKnots, DF=DF, probs=probs,
regionInfo=regionInfo, batchSize=ocProbesets())
## END MOVE TO C#######
## ##################
##
dev <- dev/(dev+1/383)
if(!is.null(row.names)){ rownames(A) <- rownames(B) <- names(dev) <- row.names}
if(!is.null(col.names)){ colnames(A) <- colnames(B) <- col.names}
##
if(length(Index) >= recallRegMin){
tmp4batchQC <- DD[autosomeIndex,]/(params[["N"]][autosomeIndex,]+1)
tmpSnpQc <- dev[autosomeIndex]
SS <- cov(tmp4batchQC[tmpSnpQc < badSNP,])
batchQC <- mean(diag(SS))
}else{
SS <- matrix(0, 3, 3)
batchQC <- Inf
}
##
if(verbose) message("Done.")
if (returnParams){
return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, params=params, DD=DD, covDD=SS, gender=gender, pkgname=pkgname))
}else{
return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, DD=DD, covDD=SS, gender=gender, pkgname=pkgname))
}
}
|