2ae7850e |
if (missing(sns)) sns <- basename(filenames)
##ADD CHECK TO SEE IF LOADED
if (missing(cdfName))
cdfName <- read.celfile.header(filenames[1])$cdfName
## stuffDir <- changeToCrlmmAnnotationName(cdfName)
pkgname <- getCrlmmAnnotationName(cdfName)
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
message(strwrap(msg))
stop("Package ", pkgname, " could not be found.")
rm(suggCall, msg)
}
if(verbose) message("Loading annotations and mixture model parameters.")
loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
autosomeIndex <- getVarInEnv("autosomeIndex")
pnsa <- getVarInEnv("pnsa")
pnsb <- getVarInEnv("pnsb")
fid <- getVarInEnv("fid")
reference <- getVarInEnv("reference")
aIndex <- getVarInEnv("aIndex")
bIndex <- getVarInEnv("bIndex")
SMEDIAN <- getVarInEnv("SMEDIAN")
theKnots <- getVarInEnv("theKnots")
gns <- getVarInEnv("gns")
|
2ae7850e |
if(verbose){
message("Processing ", length(filenames), " files.")
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
}
##We start looping throug cel files
idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
for(i in seq(along=filenames)){
y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
x <- log2(y[idx2])
SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
rm(x)
y <- normalize.quantiles.use.target(y, target=reference)
A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
##Now to fit the EM
if(fitMixture){
S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
M <- log2(A[idx, i])-log2(B[idx, i])
|
2ae7850e |
##we need to test the choice of eps.. it is not the max diff between funcs
tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
mixtureParams[, i] <- tmp[["coef"]]
SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
}
if (verbose)
if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
else cat(".")
}
if (verbose)
if (getRversion() > '2.7.0') close(pb)
else cat("\n")
if (!fitMixture) SNR <- mixtureParams <- NA
## gns comes from preprocStuff.rda
list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
|
3620e6b1 |
fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
##56 stands for 5 and 6 arrays but will also work for Illumina
##Note the unfortunate choice of numbering:
##1 is BB, 2 AB, and 3 AA. Opposite to everything else!
##this is legacy code I decided not to change.
## this why at the end we report -coefs: F1 is the negative f
mus <- append(quantile(M, c(1, 5)/6, names=FALSE), 0, 1)
sigmas <- rep(mad(c(M[M<mus[1]]-mus[1], M[M>mus[3]]-mus[3])), 3)
sigmas[2] <- sigmas[2]/2
weights <- apply(cbind(mus, sigmas), 1, function(p) dnorm(M, p[1], p[2]))
previousF1 <- -Inf
change <- eps+1
it <- 0
if(verbose) message("Max change must be under ", eps, ".")
matS <- stupidSplineBasis(S, knots)
while (change > eps & it < maxit){
it <- it+1
## E
z <- sweep(weights, 2, probs, "*")
LogLik <- rowSums(z)
z <- sweep(z, 1, LogLik, "/")
probs <- colMeans(z)
## M
fit1 <- crossprod(chol2inv(chol(crossprod(sweep(matS, 1, z[, 1], FUN="*"), matS))), crossprod(matS, z[, 1]*M))
fit2 <- sum(z[, 2]*M)/sum(z[, 2])
F1 <- matS%*%fit1
sigmas[c(1, 3)] <- sqrt(sum(z[, 1]*(M-F1)^2)/sum(z[, 1]))
sigmas[2] <- sqrt(sum(z[, 2]*(M-fit2)^2)/sum(z[, 2]))
weights[, 1] <- dnorm(M, F1, sigmas[1])
weights[, 2] <- dnorm(M, fit2, sigmas[2])
weights[, 3] <- dnorm(M, -F1, sigmas[3])
change <- max(abs(F1-previousF1))
previousF1 <- F1
if(verbose) message("Iter ", it, ": ", change, ".")
}
medF1 <- median(-F1)
return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2]))
}
|