a3335242 |
##---------------------------------------------------------------------------
##---------------------------------------------------------------------------
getProtocolData.Affy <- function(filenames){
scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
rownames(scanDates) <- basename(rownames(scanDates))
protocoldata <- new("AnnotatedDataFrame",
data=scanDates,
varMetadata=data.frame(labelDescription=colnames(scanDates),
row.names=colnames(scanDates)))
return(protocoldata)
}
|
6a6b67c5 |
getFeatureData <- function(cdfName, copynumber=FALSE){
|
a3335242 |
pkgname <- getCrlmmAnnotationName(cdfName)
if(!require(pkgname, character.only=TRUE)){
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
message(strwrap(msg))
stop("Package ", pkgname, " could not be found.")
rm(suggCall, msg)
}
loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
gns <- getVarInEnv("gns")
path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
load(file.path(path, "snpProbes.rda"))
|
b5b74b9f |
snpProbes <- get("snpProbes")
|
a3335242 |
if(copynumber){
load(file.path(path, "cnProbes.rda"))
cnProbes <- get("cnProbes")
snpIndex <- seq(along=gns)
|
81601623 |
npIndex <- seq(along=rownames(cnProbes)) + max(snpIndex)
|
a3335242 |
featurenames <- c(gns, rownames(cnProbes))
} else featurenames <- gns
fvarlabels=c("chromosome", "position", "isSnp")
M <- matrix(NA, length(featurenames), 3, dimnames=list(featurenames, fvarlabels))
index <- match(rownames(snpProbes), rownames(M)) #only snp probes in M get assigned position
M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))]
|
6a6b67c5 |
M[index, "chromosome"] <- chromosome2integer(snpProbes[, grep("chr", colnames(snpProbes))])
|
a3335242 |
M[index, "isSnp"] <- 1L
|
7a41ca3b |
##index <- which(is.na(M[, "isSnp"]))
##M[index, "isSnp"] <- 1L
|
a3335242 |
if(copynumber){
index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position
M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))]
|
6a6b67c5 |
M[index, "chromosome"] <- chromosome2integer(cnProbes[, grep("chr", colnames(cnProbes))])
|
a3335242 |
M[index, "isSnp"] <- 0L
}
|
cb875a5a |
##A few of the snpProbes do not match -- I think it is chromosome Y.
|
8247639c |
M[is.na(M[, "chromosome"]), "isSnp"] <- 1L
|
6a6b67c5 |
M <- data.frame(M)
return(new("AnnotatedDataFrame", data=M))
|
a3335242 |
}
|
6a6b67c5 |
getFeatureData.Affy <- getFeatureData
|
a097b918 |
|
ada0da1e |
construct <- function(filenames,
cdfName,
copynumber=TRUE,
|
518a2908 |
sns, verbose=TRUE, batch, fns){
|
dded663e |
if(!missing(batch)){
stopifnot(length(batch) == length(sns))
}
if(missing(sns) & missing(filenames)) stop("one of filenames or samplenames (sns) must be provided")
|
ada0da1e |
if(verbose) message("Initializing container for copy number estimation")
|
6a6b67c5 |
featureData <- getFeatureData(cdfName, copynumber=copynumber)
|
518a2908 |
if(!missing(fns)){
|
7a41ca3b |
warning("subsetting the featureData can cause the snprma object and CNSet object to be misaligned. This problem is fixed in devel as we match the names prior to assigning values from snprma")
|
518a2908 |
index <- match(fns, featureNames(featureData))
if(all(is.na(index))) stop("fns not in featureNames")
featureData <- featureData[index, ]
}
|
dded663e |
nr <- nrow(featureData); nc <- length(sns)
|
69ec8644 |
cnSet <- new("CNSet",
alleleA=initializeBigMatrix(name="A", nr, nc),
alleleB=initializeBigMatrix(name="B", nr, nc),
call=initializeBigMatrix(name="call", nr, nc),
callProbability=initializeBigMatrix(name="callPr", nr,nc),
annotation=cdfName,
batch=batch)
sampleNames(cnSet) <- sns
if(!missing(filenames)){
if(missing(sns)) sns <- basename(filenames)
protocolData <- getProtocolData.Affy(filenames)
} else{
protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
}
rownames(pData(protocolData)) <- sns
|
d08acba1 |
protocolData(cnSet) <- protocolData
|
69ec8644 |
featureData(cnSet) <- featureData
|
d08acba1 |
featureNames(cnSet) <- featureNames(featureData)
|
da553af0 |
pd <- data.frame(matrix(NA, nc, 3), row.names=sns)
colnames(pd)=c("SKW", "SNR", "gender")
|
69ec8644 |
phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
|
7a41ca3b |
gns <- getVarInEnv("gns")
stopifnot(identical(gns, featureNames(cnSet)[1:length(gns)]))
|
69ec8644 |
return(cnSet)
|
e272e5d6 |
}
|
ea66649b |
|
d08acba1 |
genotype <- function(filenames,
|
705ef5e0 |
cdfName,
batch,
mixtureSampleSize=10^5,
eps=0.1,
verbose=TRUE,
seed=1,
sns,
probs=rep(1/3, 3),
DF=6,
SNRMin=5,
recallMin=10,
recallRegMin=1000,
gender=NULL,
returnParams=TRUE,
badSNP=0.7){
|
d08acba1 |
is.lds <- ifelse(isPackageLoaded("ff"), TRUE, FALSE)
|
d5bc779e |
if(missing(cdfName)) stop("must specify cdfName")
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames")
if(missing(sns)) sns <- basename(filenames)
callSet <- construct(filenames=filenames,
cdfName=cdfName,
copynumber=TRUE,
sns=sns,
|
a097b918 |
verbose=verbose,
batch=batch)
|
d08acba1 |
FUN <- ifelse(is.lds, "snprma2", "snprma")
snprmaFxn <- function(FUN,...){
switch(FUN,
snprma=snprma(...),
snprma2=snprma2(...))
}
|
73fd7374 |
snprmaRes <- snprmaFxn(FUN,
filenames=filenames,
|
d08acba1 |
mixtureSampleSize=mixtureSampleSize,
fitMixture=TRUE,
eps=eps,
verbose=verbose,
seed=seed,
cdfName=cdfName,
sns=sns)
|
f3d40144 |
gns <- snprmaRes[["gns"]]
snp.I <- isSnp(callSet)
is.snp <- which(snp.I)
snp.index <- match(featureNames(callSet)[is.snp], gns)
stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index]))
## is.snp <- isSnp(callSet)
## snp.index <- which(is.snp)
if(is.lds) open(callSet)
mixtureParams <- matrix(NA, 4, length(filenames))
|
73fd7374 |
##message("Saving snprmaRes file")
##save(snprmaRes, file=file.path(outdir, "snprmaRes.rda"))
|
d5bc779e |
if(verbose) message("Finished preprocessing.")
|
cb875a5a |
gns <- snprmaRes[["gns"]]
snp.I <- isSnp(callSet)
is.snp <- which(snp.I)
snp.index <- match(featureNames(callSet)[is.snp], gns)
stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index]))
if(is.lds) open(callSet)
mixtureParams <- matrix(NA, 4, length(filenames))
|
d08acba1 |
if(is.lds){
open(snprmaRes[["A"]])
open(snprmaRes[["B"]])
|
73fd7374 |
open(snprmaRes[["SNR"]])
open(snprmaRes[["mixtureParams"]])
|
89c5fe95 |
bb <- getOption("ffbatchbytes")
ffcolapply(A(callSet)[is.snp, i1:i2] <- snprmaRes[["A"]][snp.index, i1:i2], X=snprmaRes[["A"]],
BATCHBYTES=bb)
ffcolapply(B(callSet)[is.snp, i1:i2] <- snprmaRes[["B"]][snp.index, i1:i2], X=snprmaRes[["B"]],
BATCHBYTES=bb)
|
d08acba1 |
} else{
|
f3d40144 |
A(callSet)[is.snp, ] <- snprmaRes[["A"]][snp.index, ]
B(callSet)[is.snp, ] <- snprmaRes[["B"]][snp.index, ]
|
a097b918 |
}
|
cb875a5a |
pData(callSet)$SKW <- snprmaRes[["SKW"]]
pData(callSet)$SNR <- snprmaRes[["SNR"]]
mixtureParams <- snprmaRes$mixtureParams
|
7a41ca3b |
np.index <- which(!snp.I)
|
cb875a5a |
if(verbose) message("Normalizing nonpolymorphic markers")
|
d08acba1 |
FUN <- ifelse(is.lds, "cnrma2", "cnrma")
## main purpose is to update 'alleleA'
cnrmaFxn <- function(FUN,...){
switch(FUN,
cnrma=cnrma(...),
cnrma2=cnrma2(...))
}
## consider passing only A for NPs.
|
6a6b67c5 |
if(verbose) message("Quantile normalizing nonpolymorphic markers")
|
d08acba1 |
AA <- cnrmaFxn(FUN, A=A(callSet),
filenames=filenames,
row.names=featureNames(callSet)[np.index],
cdfName=cdfName,
sns=sns,
seed=seed,
verbose=verbose)
if(!is.lds) A(callSet) <- AA
rm(AA)
FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT")
## genotyping
crlmmGTfxn <- function(FUN,...){
switch(FUN,
crlmmGT2=crlmmGT2(...),
crlmmGT=crlmmGT(...))
}
|
73fd7374 |
if(verbose) message("Running crlmmGT2")
|
d08acba1 |
tmp <- crlmmGTfxn(FUN,
A=snprmaRes[["A"]],
B=snprmaRes[["B"]],
SNR=snprmaRes[["SNR"]],
mixtureParams=snprmaRes[["mixtureParams"]],
cdfName=cdfName,
row.names=NULL,
col.names=sampleNames(callSet),
SNRMin=SNRMin,
recallMin=recallMin,
recallRegMin=recallRegMin,
gender=gender,
verbose=verbose,
returnParams=returnParams,
badSNP=badSNP)
if(verbose) message("Genotyping finished. Updating container with genotype calls and confidence scores.")
if(is.lds){
open(tmp[["calls"]])
open(tmp[["confs"]])
|
e4db8f9d |
ffcolapply(snpCall(callSet)[is.snp, i1:i2] <- tmp[["calls"]][snp.index, i1:i2], X=tmp[["calls"]],
|
89c5fe95 |
BATCHBYTES=bb)
|
e4db8f9d |
ffcolapply(snpCallProbability(callSet)[is.snp, i1:i2] <- tmp[["confs"]][snp.index, i1:i2], X=tmp[["confs"]],
|
89c5fe95 |
BATCHBYTES=bb)
|
cb875a5a |
close(tmp[["calls"]])
close(tmp[["confs"]])
|
d08acba1 |
} else {
|
89c5fe95 |
calls(callSet)[is.snp, ] <- tmp[["calls"]][snp.index, ]
snpCallProbability(callSet)[is.snp, ] <- tmp[["confs"]][snp.index, ]
|
a097b918 |
}
|
73fd7374 |
message("Finished updating. Cleaning up.")
|
d5bc779e |
callSet$gender <- tmp$gender
|
cb875a5a |
if(is.lds){
delete(snprmaRes[["A"]])
delete(snprmaRes[["B"]])
delete(snprmaRes[["mixtureParams"]])
rm(snprmaRes)
}
|
d08acba1 |
close(callSet)
|
a097b918 |
return(callSet)
}
|
73fd7374 |
|
17158566 |
genotype2 <- function(){
.Defunct(msg="The genotype2 function has been deprecated. The function genotype should be used instead. genotype will support large data using ff provided that the ff package is loaded.")
}
genotypeLD <- genotype2
|
a097b918 |
|
50b0d125 |
rowCovs <- function(x, y, ...){
notna <- !is.na(x)
N <- rowSums(notna)
x <- suppressWarnings(log2(x))
if(!missing(y)) y <- suppressWarnings(log2(y))
return(rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1))
}
rowMAD <- function(x, y, ...){
notna <- !is.na(x)
mad <- 1.4826*rowMedians(abs(x-rowMedians(x, ...)), ...)
return(mad)
}
|
d5bc779e |
shrink <- function(x, Ns, DF.PRIOR){
DF <- Ns-1
DF[DF < 1] <- 1
x.0 <- apply(x, 2, median, na.rm=TRUE)
x <- (x*DF + x.0*DF.PRIOR)/(DF.PRIOR + DF)
for(j in 1:ncol(x)) x[is.na(x[, j]), j] <- x.0[j]
return(x)
}
|
50b0d125 |
rowCors <- function(x, y, ...){
N <- rowSums(!is.na(x))
x <- suppressWarnings(log2(x))
y <- suppressWarnings(log2(y))
sd.x <- rowSds(x, ...)
sd.y <- rowSds(y, ...)
covar <- rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1)
return(covar/(sd.x*sd.y))
}
|
5a23c457 |
dqrlsWrapper <- function(x, y, wts, tol=1e-7){
n <- NROW(y)
p <- ncol(x)
ny <- NCOL(y)
.Fortran("dqrls", qr=x*wts, n=n, p=p, y=y * wts, ny=ny,
tol=as.double(tol), coefficients=mat.or.vec(p, ny),
residuals=y, effects=mat.or.vec(n, ny),
rank=integer(1L), pivot=1L:p, qraux=double(p),
work=double(2 * p), PACKAGE="base")[["coefficients"]]
|
50b0d125 |
}
|
6846d583 |
fit.wls <- function(NN, sigma, allele, Y, autosome, X){
Np <- NN
Np[Np < 1] <- 1
W <- (sigma/sqrt(Np))^-1
Ystar <- Y*W
|
3c6c270e |
complete <- which(rowSums(is.na(W)) == 0 & rowSums(is.na(Ystar)) == 0)
|
d5bc779e |
if(missing(allele)) stop("must specify allele")
|
6846d583 |
if(autosome & missing(X)){
|
5a23c457 |
if(allele == "A") X <- cbind(1, 2:0)
if(allele == "B") X <- cbind(1, 0:2)
|
6846d583 |
}
if(!autosome & missing(X)){
|
5a23c457 |
if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
}
|
6846d583 |
betahat <- matrix(NA, ncol(X), nrow(Ystar))
|
5a23c457 |
ww <- rep(1, ncol(Ystar))
|
3c6c270e |
for(i in complete){
|
5a23c457 |
betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww)
|
dded663e |
##ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
|
d5bc779e |
}
|
dded663e |
return(betahat)
|
d5bc779e |
}
|
6846d583 |
shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAMPLES, DF.PRIOR,
verbose, is.lds){
if(is.lds) {physical <- get("physical"); open(object)}
if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
marker.index <- index.list[[strata]]
batches <- split(seq_along(batch(object)), as.character(batch(object)))
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
batchnames <- batchNames(object)
N.AA <- as.matrix(N.AA(object)[marker.index, ])
N.AB <- as.matrix(N.AB(object)[marker.index, ])
N.BB <- as.matrix(N.BB(object)[marker.index, ])
medianA.AA <- as.matrix(medianA.AA(object)[marker.index,])
medianA.AB <- as.matrix(medianA.AB(object)[marker.index,])
medianA.BB <- as.matrix(medianA.BB(object)[marker.index,])
medianB.AA <- as.matrix(medianB.AA(object)[marker.index,])
medianB.AB <- as.matrix(medianB.AB(object)[marker.index,])
medianB.BB <- as.matrix(medianB.BB(object)[marker.index,])
madA.AA <- as.matrix(madA.AA(object)[marker.index,])
madA.AB <- as.matrix(madA.AB(object)[marker.index,])
madA.BB <- as.matrix(madA.BB(object)[marker.index,])
madB.AA <- as.matrix(madB.AA(object)[marker.index,])
madB.AB <- as.matrix(madB.AB(object)[marker.index,])
madB.BB <- as.matrix(madB.BB(object)[marker.index,])
medianA <- medianB <- shrink.madB <- shrink.madA <- vector("list", length(batchnames))
shrink.tau2A.AA <- tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index,])
shrink.tau2B.BB <- tau2B.BB <- as.matrix(tau2B.BB(object)[marker.index,])
shrink.tau2A.BB <- tau2A.BB <- as.matrix(tau2A.BB(object)[marker.index,])
shrink.tau2B.AA <- tau2B.AA <- as.matrix(tau2B.AA(object)[marker.index,])
shrink.corrAA <- corrAA <- as.matrix(corrAA(object)[marker.index, ])
shrink.corrAB <- corrAB <- as.matrix(corrAB(object)[marker.index, ])
shrink.corrBB <- corrBB <- as.matrix(corrBB(object)[marker.index, ])
flags <- as.matrix(flags(object)[marker.index, ])
for(k in seq(along=batches)){
B <- batches[[k]]
this.batch <- unique(as.character(batch(object)[B]))
medianA[[k]] <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
medianB[[k]] <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
##RS: estimate DF.PRIOR
shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR)
shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR)
## an estimate of the background variance is the MAD
## of the log2(allele A) intensities among subjects with
## genotypes BB
shrink.tau2A.BB[, k] <- shrink(tau2A.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE]
shrink.tau2B.AA[, k] <- shrink(tau2B.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE]
## an estimate of the signal variance is the MAD
## of the log2(allele A) intensities among subjects with
## genotypes AA
shrink.tau2A.AA[, k] <- shrink(tau2A.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE]
shrink.tau2B.BB[, k] <- shrink(tau2B.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE]
cor.AA <- corrAA[, k, drop=FALSE]
cor.AB <- corrAB[, k, drop=FALSE]
cor.BB <- corrBB[, k, drop=FALSE]
shrink.corrAA[, k] <- shrink(cor.AA, NN[, 1], DF.PRIOR)
shrink.corrAB[, k] <- shrink(cor.AB, NN[, 2], DF.PRIOR)
shrink.corrBB[, k] <- shrink(cor.BB, NN[, 3], DF.PRIOR)
|
66900fea |
##
|
6846d583 |
##---------------------------------------------------------------------------
## SNPs that we'll use for imputing location/scale of unobserved genotypes
##---------------------------------------------------------------------------
index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS)
|
95a5f141 |
if(length(index.complete) == 1){
if(index.complete == FALSE) return()
}
|
66900fea |
##
|
6846d583 |
##---------------------------------------------------------------------------
## Impute sufficient statistics for unobserved genotypes (plate-specific)
##---------------------------------------------------------------------------
unobservedAA <- NN[, 1] < MIN.OBS
unobservedAB <- NN[, 2] < MIN.OBS
unobservedBB <- NN[, 3] < MIN.OBS
unobserved.index <- vector("list", 3)
unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS))
unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS))
unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS))
res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index)
medianA[[k]] <- res[[1]]
medianB[[k]] <- res[[2]]
rm(res)
##the NA's in 'medianA' and 'medianB' are monomorphic if MIN.OBS = 1
|
66900fea |
##
|
6846d583 |
## RS: For Monomorphic SNPs a mixture model may be better
## RS: Further, we can improve estimation by borrowing strength across batch
unobserved.index[[1]] <- which(unobservedAA & unobservedAB)
unobserved.index[[2]] <- which(unobservedBB & unobservedAB)
unobserved.index[[3]] <- which(unobservedAA & unobservedBB) ## strange
res <- imputeCentersForMonomorphicSnps(medianA[[k]], medianB[[k]],
index.complete,
unobserved.index)
medianA[[k]] <- res[[1]]; medianB[[k]] <- res[[2]]
rm(res)
negA <- rowSums(medianA[[k]] < 0) > 0
negB <- rowSums(medianB[[k]] < 0) > 0
flags[, k] <- as.integer(rowSums(NN == 0) > 0 | negA | negB)
}
flags(object)[marker.index, ] <- flags
medianA.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 1]))
medianA.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 2]))
medianA.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 3]))
medianB.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 1]))
medianB.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 2]))
medianB.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 3]))
|
66900fea |
##
|
6846d583 |
madA.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 1]))
madA.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 2]))
madA.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 3]))
madB.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 1]))
madB.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 2]))
madB.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 3]))
|
66900fea |
##
|
6846d583 |
corrAA(object)[marker.index, ] <- shrink.corrAA
corrAB(object)[marker.index, ] <- shrink.corrAB
corrBB(object)[marker.index, ] <- shrink.corrBB
tau2A.AA(object)[marker.index,] <- shrink.tau2A.AA
tau2A.BB(object)[marker.index,] <- shrink.tau2A.BB
tau2B.AA(object)[marker.index,] <- shrink.tau2B.AA
tau2B.BB(object)[marker.index,] <- shrink.tau2B.BB
|
66900fea |
if(is.lds) return(TRUE) else return(object)
|
6846d583 |
}
fit.lm1 <- function(strata,
|
d08acba1 |
index.list,
|
d5bc779e |
object,
MIN.SAMPLES,
THR.NU.PHI,
MIN.NU,
MIN.PHI,
|
6846d583 |
verbose, is.lds,
CHR.X, ...){
if(is.lds) {physical <- get("physical"); open(object)}
if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
snps <- index.list[[strata]]
batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
ea66649b |
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
|
d08acba1 |
batchnames <- batchNames(object)
|
6846d583 |
N.AA <- as.matrix(N.AA(object)[snps, ])
N.AB <- as.matrix(N.AB(object)[snps, ])
N.BB <- as.matrix(N.BB(object)[snps, ])
medianA.AA <- as.matrix(medianA.AA(object)[snps,])
medianA.AB <- as.matrix(medianA.AB(object)[snps,])
medianA.BB <- as.matrix(medianA.BB(object)[snps,])
medianB.AA <- as.matrix(medianB.AA(object)[snps,])
medianB.AB <- as.matrix(medianB.AB(object)[snps,])
medianB.BB <- as.matrix(medianB.BB(object)[snps,])
madA.AA <- as.matrix(madA.AA(object)[snps,])
madA.AB <- as.matrix(madA.AB(object)[snps,])
madA.BB <- as.matrix(madA.BB(object)[snps,])
madB.AA <- as.matrix(madB.AA(object)[snps,])
madB.AB <- as.matrix(madB.AB(object)[snps,])
madB.BB <- as.matrix(madB.BB(object)[snps,])
tau2A.AA <- as.matrix(tau2A.AA(object)[snps,])
tau2B.BB <- as.matrix(tau2B.BB(object)[snps,])
tau2A.BB <- as.matrix(tau2A.BB(object)[snps,])
tau2B.AA <- as.matrix(tau2B.AA(object)[snps,])
corrAA <- as.matrix(corrAA(object)[snps, ])
corrAB <- as.matrix(corrAB(object)[snps, ])
corrBB <- as.matrix(corrBB(object)[snps, ])
nuA <- as.matrix(nuA(object)[snps, ])
phiA <- as.matrix(phiA(object)[snps, ])
nuB <- as.matrix(nuB(object)[snps, ])
phiB <- as.matrix(phiB(object)[snps, ])
flags <- as.matrix(flags(object)[snps, ])
for(k in seq(along=batches)){
B <- batches[[k]]
|
66900fea |
if(length(B) < MIN.SAMPLES) next()
|
6846d583 |
this.batch <- unique(as.character(batch(object)[B]))
medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
|
a097b918 |
## we're regressing on the medians using the standard errors (hence the division by N) as weights
|
6846d583 |
res <- fit.wls(NN=NN, sigma=madA, allele="A", Y=medianA, autosome=!CHR.X)
nuA[, k] <- res[1, ]
phiA[, k] <- res[2, ]
rm(res)
res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns)
nuB[, k] <- res[1, ]
phiB[, k] <- res[2, ]
|
9dfdcb66 |
## cA[, k] <- matrix((1/phiA[, J]*(A-nuA[, J])), nrow(A), ncol(A))
## cB[, k] <- matrix((1/phiB[, J]*(B-nuB[, J])), nrow(B), ncol(B))
|
d5bc779e |
}
|
d08acba1 |
if(THR.NU.PHI){
nuA[nuA < MIN.NU] <- MIN.NU
nuB[nuB < MIN.NU] <- MIN.NU
phiA[phiA < MIN.PHI] <- MIN.PHI
phiB[phiB < MIN.PHI] <- MIN.PHI
}
|
6846d583 |
nuA(object)[snps, ] <- nuA
nuB(object)[snps, ] <- nuB
phiA(object)[snps, ] <- phiA
phiB(object)[snps, ] <- phiB
if(is.lds){
close(object)
|
d08acba1 |
return(TRUE)
} else{
return(object)
}
|
d5bc779e |
}
|
73fd7374 |
## nonpolymorphic markers
|
6846d583 |
fit.lm2 <- function(strata,
|
d08acba1 |
index.list,
|
d5bc779e |
object,
MIN.SAMPLES,
THR.NU.PHI,
MIN.NU,
MIN.PHI,
|
6846d583 |
verbose, is.lds, CHR.X, ...){
if(is.lds) {physical <- get("physical"); open(object)}
if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
marker.index <- index.list[[strata]]
batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
d5bc779e |
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
|
69ec8644 |
|
d5bc779e |
ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
|
6846d583 |
flags <- as.matrix(flags(object)[ii, ])
fns <- featureNames(object)[ii]
fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
snp.index <- sample(match(fns.noflags, featureNames(object)), 5000)
nuA.np <- as.matrix(nuA(object)[marker.index, ])
phiA.np <- as.matrix(phiA(object)[marker.index, ])
tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, ])
nuA.snp <- as.matrix(nuA(object)[snp.index, ])
nuB.snp <- as.matrix(nuB(object)[snp.index, ])
phiA.snp <- as.matrix(phiA(object)[snp.index, ])
phiB.snp <- as.matrix(phiB(object)[snp.index, ])
medianA.AA <- as.matrix(medianA.AA(object)[snp.index,])
medianB.BB <- as.matrix(medianB.BB(object)[snp.index,])
medianA.AA.np <- as.matrix(medianA.AA(object)[marker.index,])
for(k in seq_along(batches)){
B <- batches[[k]]
this.batch <- unique(as.character(batch(object)[B]))
X <- cbind(1, log2(c(medianA.AA[, k], medianB.BB[, k])))
|
3e4db4b1 |
Y <- log2(c(phiA.snp[, k], phiB.snp[, k]))
|
d5bc779e |
betahat <- solve(crossprod(X), crossprod(X, Y))
|
73fd7374 |
##crosshyb <- max(median(medianA.AA[, k]) - median(medianA.AA.np[, k]), 0)
crosshyb <- 0
|
6846d583 |
X <- cbind(1, log2(medianA.AA.np[, k] + crosshyb))
|
d5bc779e |
logPhiT <- X %*% betahat
|
6846d583 |
phiA.np[, k] <- 2^(logPhiT)
nuA.np[, k] <- medianA.AA.np[,k]-2*phiA.np[, k]
|
29f437a0 |
## cA[, k] <- 1/phiA.np[, J] * (A.np - nuA.np[, J])
|
6846d583 |
}
if(THR.NU.PHI){
nuA.np[nuA.np < MIN.NU] <- MIN.NU
phiA.np[phiA.np < MIN.PHI] <- MIN.PHI
|
d5bc779e |
}
|
6846d583 |
nuA(object)[marker.index, ] <- nuA.np
phiA(object)[marker.index, ] <- phiA.np
if(is.lds) { close(object); return(TRUE)}
return(object)
|
d5bc779e |
}
|
6846d583 |
summarizeMaleXNps <- function(marker.index,
batches,
object, MIN.SAMPLES){
nr <- length(marker.index)
nc <- length(batchNames(object))
NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
gender <- object$gender
AA <- as.matrix(A(object)[marker.index, gender==1])
madA.AA <- medianA.AA <- matrix(NA, nr, nc)
numberMenPerBatch <- rep(NA, nc)
for(k in seq_along(batches)){
B <- batches[[k]]
this.batch <- unique(as.character(batch(object)[B]))
gender <- object$gender[B]
if(sum(gender==1) < MIN.SAMPLES) next()
sns.batch <- sampleNames(object)[B]
##subset GG apppriately
sns <- colnames(AA)
J <- sns%in%sns.batch
numberMenPerBatch[k] <- length(J)
medianA.AA[, k] <- rowMedians(AA[, J], na.rm=TRUE)
madA.AA[, k] <- rowMAD(AA[, J], na.rm=TRUE)
}
return(list(medianA.AA=medianA.AA,
madA.AA=madA.AA))
}
summarizeMaleXGenotypes <- function(marker.index,
batches,
object,
GT.CONF.THR,
MIN.OBS,
MIN.SAMPLES,
verbose,
is.lds,
DF.PRIOR,...){
nr <- length(marker.index)
nc <- length(batchNames(object))
NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
gender <- object$gender
GG <- as.matrix(calls(object)[marker.index, gender==1])
CP <- as.matrix(snpCallProbability(object)[marker.index, gender==1])
AA <- as.matrix(A(object)[marker.index, gender==1])
BB <- as.matrix(B(object)[marker.index, gender==1])
for(k in seq_along(batches)){
B <- batches[[k]]
this.batch <- unique(as.character(batch(object)[B]))
gender <- object$gender[B]
if(sum(gender==1) < MIN.SAMPLES) next()
sns.batch <- sampleNames(object)[B]
##subset GG apppriately
sns <- colnames(GG)
J <- sns%in%sns.batch
G <- GG[, J]
xx <- CP[, J]
highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
G <- G*highConf
A <- AA[, J]
B <- BB[, J]
G.AA <- G==1
G.AA[G.AA==FALSE] <- NA
G.AB <- G==2
G.AB[G.AB==FALSE] <- NA
G.BB <- G==3
G.BB[G.BB==FALSE] <- NA
N.AA.M <- rowSums(G.AA, na.rm=TRUE)
N.AB.M <- rowSums(G.AB, na.rm=TRUE)
N.BB.M <- rowSums(G.BB, na.rm=TRUE)
summaryStats <- function(X, INT, FUNS){
tmp <- matrix(NA, nrow(X), length(FUNS))
for(j in seq_along(FUNS)){
FUN <- match.fun(FUNS[j])
tmp[, j] <- FUN(X*INT, na.rm=TRUE)
}
tmp
}
statsA.AA <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
statsA.BB <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
statsB.AA <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
statsB.BB <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
medianA <- cbind(statsA.AA[, 1], statsA.AB[, 1], statsA.BB[, 1])
medianB <- cbind(statsB.AA[, 1], statsB.AB[, 1], statsB.BB[, 1])
madA <- cbind(statsA.AA[, 1], statsA.AB[, 1], statsA.BB[, 1])
madB <- cbind(statsB.AA[, 1], statsB.AB[, 1], statsB.BB[, 1])
rm(statsA.AA, statsA.AB, statsA.BB, statsB.AA, statsB.AB, statsB.BB)
NN.M <- cbind(N.AA.M, N.AB.M, N.BB.M)
NN.Mlist[[k]] <- NN.M
shrink.madA[[k]] <- shrink(madA, NN.M, DF.PRIOR)
shrink.madB[[k]] <- shrink(madB, NN.M, DF.PRIOR)
##---------------------------------------------------------------------------
## SNPs that we'll use for imputing location/scale of unobserved genotypes
##---------------------------------------------------------------------------
index.complete <- indexComplete(NN.M[, -2], medianA, medianB, MIN.OBS)
|
95a5f141 |
if(length(index.complete) == 1){
if(index.complete == FALSE) return()
}
|
6846d583 |
##---------------------------------------------------------------------------
## Impute sufficient statistics for unobserved genotypes (plate-specific)
##---------------------------------------------------------------------------
res <- imputeCenterX(medianA, medianB, NN.M, index.complete, MIN.OBS)
imputed.medianA[[k]] <- res[[1]]
imputed.medianB[[k]] <- res[[2]]
}
return(list(madA=shrink.madA,
madB=shrink.madB,
NN.M=NN.Mlist,
medianA=imputed.medianA,
medianB=imputed.medianB))
}
## X chromosome, SNPs
fit.lm3 <- function(strata,
|
d08acba1 |
index.list,
|
d5bc779e |
object,
SNRMin,
MIN.SAMPLES,
MIN.OBS,
DF.PRIOR,
GT.CONF.THR,
THR.NU.PHI,
MIN.NU,
MIN.PHI,
|
6846d583 |
verbose, is.lds, CHR.X, ...){
if(is.lds) {physical <- get("physical"); open(object)}
if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
|
d5bc779e |
gender <- object$gender
|
6846d583 |
enough.males <- sum(gender==1) > MIN.SAMPLES
enough.females <- sum(gender==2) > MIN.SAMPLES
if(!enough.males & !enough.females){
message(paste("fewer than", MIN.SAMPLES, "men and women. Copy number not estimated for CHR X"))
return(object)
}
marker.index <- index.list[[strata]]
batches <- split(seq_along(batch(object)), as.character(batch(object)))
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
nuA <- as.matrix(nuA(object)[marker.index, ])
nuB <- as.matrix(nuB(object)[marker.index, ])
phiA <- as.matrix(phiA(object)[marker.index, ])
phiB <- as.matrix(phiB(object)[marker.index, ])
phiA2 <- as.matrix(phiPrimeA(object)[marker.index, ])
phiB2 <- as.matrix(phiPrimeB(object)[marker.index, ])
if(enough.males){
res <- summarizeMaleXGenotypes(marker.index=marker.index, batches=batches,
object=object, GT.CONF.THR=GT.CONF.THR,
MIN.SAMPLES=MIN.SAMPLES,
MIN.OBS=MIN.OBS,
verbose=verbose, is.lds=is.lds,
DF.PRIOR=DF.PRIOR/2)
madA.Mlist <- res[["madA"]]
madB.Mlist <- res[["madB"]]
medianA.Mlist <- res[["medianA"]]
medianB.Mlist <- res[["medianB"]]
NN.Mlist <- res[["NN.M"]]
rm(res)
## Need N, median, mad
}
if(enough.females){
N.AA.F <- as.matrix(N.AA(object)[marker.index, ])
N.AB.F <- as.matrix(N.AB(object)[marker.index, ])
N.BB.F <- as.matrix(N.BB(object)[marker.index, ])
medianA.AA <- as.matrix(medianA.AA(object)[marker.index,])
medianA.AB <- as.matrix(medianA.AB(object)[marker.index,])
medianA.BB <- as.matrix(medianA.BB(object)[marker.index,])
medianB.AA <- as.matrix(medianB.AA(object)[marker.index,])
medianB.AB <- as.matrix(medianB.AB(object)[marker.index,])
medianB.BB <- as.matrix(medianB.BB(object)[marker.index,])
madA.AA <- as.matrix(madA.AA(object)[marker.index,])
madA.AB <- as.matrix(madA.AB(object)[marker.index,])
madA.BB <- as.matrix(madA.BB(object)[marker.index,])
madB.AA <- as.matrix(madB.AA(object)[marker.index,])
madB.AB <- as.matrix(madB.AB(object)[marker.index,])
madB.BB <- as.matrix(madB.BB(object)[marker.index,])
}
for(k in seq_along(batches)){
B <- batches[[k]]
this.batch <- unique(as.character(batch(object)[B]))
gender <- object$gender[B]
enough.men <- sum(gender==1) >= MIN.SAMPLES
enough.women <- sum(gender==2) >= MIN.SAMPLES
if(!enough.men & !enough.women) {
if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. "))
next()
|
d5bc779e |
}
|
6846d583 |
if(enough.women){
medianA.F <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
medianB.F <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
madA.F <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
madB.F <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
NN.F <- cbind(N.AA.F[, k], N.AB.F[, k], N.BB.F[, k])
}
if(enough.men){
madA.M <- madA.Mlist[[k]]
madB.M <- madB.Mlist[[k]]
medianA.M <- medianA.Mlist[[k]]
medianB.M <- medianB.Mlist[[k]]
NN.M <- NN.Mlist[[k]]
}
if(enough.men & enough.women){
betas <- fit.wls(cbind(NN.M[, c(1,3)], NN.F),
sigma=cbind(madA.M[, c(1,3)], madA.F),
allele="A",
Y=cbind(medianA.M[, c(1,3)], medianA.F),
autosome=FALSE)
nuA[, k] <- betas[1, ]
phiA[, k] <- betas[2, ]
phiA2[, k] <- betas[3, ]
betas <- fit.wls(cbind(NN.M[, c(1,3)], NN.F),
sigma=cbind(madB.M[, c(1,3)], madB.F),
allele="B",
Y=cbind(medianB.M[, c(1,3)], medianB.F),
autosome=FALSE)
nuB[, k] <- betas[1, ]
phiB[, k] <- betas[2, ]
phiB2[, k] <- betas[3, ]
|
d5bc779e |
}
|
6846d583 |
if(enough.men & !enough.women){
betas <- fit.wls(NN.M[, c(1,3)],
sigma=madA.M[, c(1,3)],
allele="A",
Y=medianA.M[, c(1,3)],
autosome=FALSE,
X=cbind(1, c(0, 1)))
nuA[, k] <- betas[1, ]
phiA[, k] <- betas[2, ]
betas <- fit.wls(NN.M[, c(1,3)],
sigma=madB.M[, c(1,3)],
allele="B",
Y=medianB.M[, c(1,3)],
autosome=FALSE,
X=cbind(1, c(0, 1)))
nuB[, k] <- betas[1, ]
phiB[, k] <- betas[2, ]
}
if(!enough.men & enough.women){
betas <- fit.wls(NN.F,
sigma=madA.F,
allele="A",
Y=medianA.F,
autosome=TRUE) ## can just use the usual design matrix for the women-only analysis
nuA[, k] <- betas[1, ]
phiA[, k] <- betas[2, ]
betas <- fit.wls(NN.F,
sigma=madB.F,
allele="B",
Y=medianB.F,
autosome=TRUE) ## can just use the usual design matrix for the women-only analysis
nuB[, k] <- betas[1, ]
phiB[, k] <- betas[2, ]
|
d5bc779e |
}
}
|
6846d583 |
if(THR.NU.PHI){
nuA[nuA < MIN.NU] <- MIN.NU
nuB[nuB < MIN.NU] <- MIN.NU
phiA[phiA < MIN.PHI] <- MIN.PHI
|
f3d40144 |
phiA2[phiA2 < 1] <- 1
|
6846d583 |
phiB[phiB < MIN.PHI] <- MIN.PHI
|
f3d40144 |
phiB2[phiB2 < 1] <- 1
|
6846d583 |
}
nuA(object)[marker.index, ] <- nuA
nuB(object)[marker.index, ] <- nuB
phiA(object)[marker.index, ] <- phiA
phiB(object)[marker.index, ] <- phiB
phiPrimeA(object)[marker.index, ] <- phiA2
phiPrimeB(object)[marker.index, ] <- phiB2
|
66900fea |
if(is.lds) {close(object); return(TRUE)} else return(object)
|
d5bc779e |
}
|
6846d583 |
fit.lm4 <- function(strata,
|
d08acba1 |
index.list,
|
d5bc779e |
object,
MIN.SAMPLES,
THR.NU.PHI,
MIN.NU,
MIN.PHI,
|
6846d583 |
verbose, is.lds, ...){
if(is.lds) {physical <- get("physical"); open(object)}
|
d5bc779e |
gender <- object$gender
|
6846d583 |
enough.males <- sum(gender==1) > MIN.SAMPLES
enough.females <- sum(gender==2) > MIN.SAMPLES
if(!enough.males & !enough.females){
message(paste("fewer than", MIN.SAMPLES, "men and women. Copy number not estimated for CHR X"))
return(object)
}
if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
marker.index <- index.list[[strata]]
batches <- split(seq_along(batch(object)), as.character(batch(object)))
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
nc <- length(batchNames(object))
if(enough.males){
res <- summarizeMaleXNps(marker.index=marker.index,
batches=batches,
object=object, MIN.SAMPLES=MIN.SAMPLES)
medianA.AA.M <- res[["medianA.AA"]]
madA.AA.M <- res[["madA.AA"]]
|
d5bc779e |
|
6846d583 |
}
medianA.AA.F <- as.matrix(medianA.AA(object)[marker.index, ]) ## median for women
madA.AA.F <- as.matrix(madA.AA(object)[marker.index, ]) ## median for women
split.gender <- split(gender, as.character(batch(object)))
N.M <- sapply(split.gender, function(x) sum(x==1))
N.F <- sapply(split.gender, function(x) sum(x==2))
nuA <- as.matrix(nuA(object)[marker.index, ])
nuB <- as.matrix(nuB(object)[marker.index, ])
phiA <- as.matrix(phiA(object)[marker.index, ])
phiB <- as.matrix(phiB(object)[marker.index, ])
ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
fns <- featureNames(object)[ii]
flags <- as.matrix(flags(object)[ii, ])
fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
snp.index <- sample(match(fns.noflags, featureNames(object)), 10000)
N.AA <- as.matrix(N.AA(object)[snp.index, ])
N.AB <- as.matrix(N.AA(object)[snp.index, ])
N.BB <- as.matrix(N.AA(object)[snp.index, ])
enoughAA <- rowSums(N.AA < 5) == 0
enoughAB <- rowSums(N.AB < 5) == 0
enoughBB <- rowSums(N.BB < 5) == 0
snp.index <- snp.index[enoughAA & enoughAB & enoughBB]
stopifnot(length(snp.index) > 100)
nuA.snp.notmissing <- rowSums(is.na(as.matrix(nuA(object)[snp.index, ]))) == 0
nuA.snp.notnegative <- rowSums(as.matrix(nuA(object)[snp.index, ]) < 20) == 0
snp.index <- snp.index[nuA.snp.notmissing & nuA.snp.notnegative]
stopifnot(length(snp.index) > 100)
medianA.AA.snp <- as.matrix(medianA.AA(object)[snp.index,])
medianB.BB.snp <- as.matrix(medianB.BB(object)[snp.index,])
nuA.snp <- as.matrix(nuA(object)[snp.index, ])
nuB.snp <- as.matrix(nuB(object)[snp.index, ])
phiA.snp <- as.matrix(phiA(object)[snp.index, ])
phiB.snp <- as.matrix(phiB(object)[snp.index, ])
## pseudoAR <- position(object)[snp.index] < 2709520 | (position(object)[snp.index] > 154584237 & position(object)[snp.index] < 154913754)
## pseudoAR[is.na(pseudoAR)] <- FALSE
for(k in seq_along(batches)){
B <- batches[[k]]
this.batch <- unique(as.character(batch(object)[B]))
gender <- object$gender[B]
enough.men <- N.M[k] >= MIN.SAMPLES
enough.women <- N.F[k] >= MIN.SAMPLES
if(!enough.men & !enough.women) {
if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. "))
next()
}
tmp <- cbind(medianA.AA.snp[, k], medianB.BB.snp[,k], phiA.snp[, k], phiB.snp[, k])
tmp <- tmp[rowSums(is.na(tmp) == 0) & rowSums(tmp < 20) == 0, ]
|
4191b5f9 |
if(nrow(tmp) < 100){
stop("too few markers for estimating nonpolymorphic CN on chromosome X")
}
|
6846d583 |
X <- cbind(1, log2(c(tmp[, 1], tmp[, 2])))
Y <- log2(c(tmp[, 3], tmp[, 4]))
|
d5bc779e |
betahat <- solve(crossprod(X), crossprod(X, Y))
|
910549db |
if(enough.men){
|
4191b5f9 |
X.men <- cbind(1, log2(medianA.AA.M[, k]))
|
910549db |
Yhat1 <- as.numeric(X.men %*% betahat)
## put intercept and slope for men in nuB and phiB
phiB[, k] <- 2^(Yhat1)
|
cb875a5a |
nuB[, k] <- medianA.AA.M[, k] - 1*phiB[, k]
|
910549db |
}
|
4191b5f9 |
X.fem <- cbind(1, log2(medianA.AA.F[, k]))
|
6846d583 |
Yhat2 <- as.numeric(X.fem %*% betahat)
phiA[, k] <- 2^(Yhat2)
|
4191b5f9 |
nuA[, k] <- medianA.AA.F[, k] - 2*phiA[, k]
|
6846d583 |
}
if(THR.NU.PHI){
nuA[nuA < MIN.NU] <- MIN.NU
phiA[phiA < MIN.PHI] <- MIN.PHI
nuB[nuB < MIN.NU] <- MIN.NU
phiB[phiB < MIN.PHI] <- MIN.PHI
}
nuA(object)[marker.index, ] <- nuA
phiA(object)[marker.index, ] <- phiA
nuB(object)[marker.index, ] <- nuB
phiB(object)[marker.index, ] <- phiB
if(is.lds) {close(object); return(TRUE)} else return(object)
|
d5bc779e |
}
|
82be31d4 |
|
5fcfed7d |
whichPlatform <- function(cdfName){
index <- grep("genomewidesnp", cdfName)
if(length(index) > 0){
platform <- "affymetrix"
} else{
index <- grep("human", cdfName)
platform <- "illumina"
}
return(platform)
}
|
50b0d125 |
|
d08acba1 |
cnrma <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){
|
d5bc779e |
if(missing(cdfName)) stop("must specify cdfName")
pkgname <- getCrlmmAnnotationName(cdfName)
require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
if (missing(sns)) sns <- basename(filenames)
if(verbose) message("Loading annotations for nonpolymorphic probes")
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
fid <- getVarInEnv("npProbesFid")
|
d08acba1 |
if(cdfName=="genomewidesnp6"){
loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
}
if(cdfName=="genomewidesnp5"){
loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
}
reference <- getVarInEnv("reference")
fid <- fid[match(row.names, names(fid))]
np.index <- match(row.names, rownames(A))
gns <- names(fid)
set.seed(seed)
|
02a10636 |
##idx2 <- sample(length(fid), 10^5)
|
d08acba1 |
for (k in seq_along(filenames)){
y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
A[np.index, k] <- as.integer(normalize.quantiles.use.target(y, target=reference))
}
return(A)
}
cnrma2 <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){
if(missing(cdfName)) stop("must specify cdfName")
pkgname <- getCrlmmAnnotationName(cdfName)
require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
if (missing(sns)) sns <- basename(filenames)
|
d5bc779e |
sampleBatches <- splitIndicesByNode(seq(along=filenames))
if(verbose) message("Processing nonpolymorphic probes for ", length(filenames), " files.")
|
d08acba1 |
## updates A
|
d5bc779e |
ocLapply(sampleBatches,
processCEL2,
row.names=row.names,
filenames=filenames,
A=A,
seed=seed,
pkgname=pkgname,
cdfName=cdfName,
neededPkgs=c("crlmm", pkgname))
|
d08acba1 |
##list(sns=sns, gns=row.names, SKW=SKW, cdfName=cdfName)
return(A)
|
d5bc779e |
}
|
d08acba1 |
processCEL2 <- function(i, filenames, row.names, A, seed, cdfName, pkgname){
|
d5bc779e |
if(cdfName=="genomewidesnp6"){
loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
}
if(cdfName=="genomewidesnp5"){
loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
}
reference <- getVarInEnv("reference")
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
fid <- getVarInEnv("npProbesFid")
|
b0d97274 |
row.names <- row.names[row.names %in% names(fid)] ##removes chromosome Y
|
d5bc779e |
fid <- fid[match(row.names, names(fid))]
|
b0d97274 |
##stopifnot(all.equal(names(fid), row.names))
|
d5bc779e |
np.index <- match(row.names, rownames(A))
gns <- names(fid)
set.seed(seed)
|
02a10636 |
##idx2 <- sample(length(fid), 10^5)
|
d5bc779e |
for (k in i){
y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
A[np.index, k] <- as.integer(normalize.quantiles.use.target(y, target=reference))
}
|
d08acba1 |
return(TRUE)
|
d5bc779e |
}
|
50b0d125 |
|
3c6c270e |
imputeCenter <- function(muA, muB, index.complete, index.missing){
index <- index.missing
mnA <- muA
mnB <- muB
for(j in 1:3){
if(length(index[[j]]) == 0) next()
X <- cbind(1, mnA[index.complete, -j, drop=FALSE], mnB[index.complete, -j, drop=FALSE])
Y <- cbind(mnA[index.complete, j], mnB[index.complete, j])
betahat <- solve(crossprod(X), crossprod(X,Y))
X <- cbind(1, mnA[index[[j]], -j, drop=FALSE], mnB[index[[j]], -j, drop=FALSE])
mus <- X %*% betahat
muA[index[[j]], j] <- mus[, 1]
muB[index[[j]], j] <- mus[, 2]
}
list(muA, muB)
}
|
6846d583 |
indexComplete <- function(NN, medianA, medianB, MIN.OBS){
Nindex <- which(rowSums(NN > MIN.OBS) == ncol(NN))
correct.order <- which(medianA[, 1] > medianA[, ncol(medianA)] & medianB[, ncol(medianB)] > medianB[, 1])
index.complete <- intersect(Nindex, correct.order)
size <- min(5000, length(index.complete))
if(size == 5000) index.complete <- sample(index.complete, 5000, replace=TRUE)
if(length(index.complete) < 100){
|
8247639c |
warning("fewer than 100 snps pass criteria for imputing unobserve dgenotype location/scale")
return(FALSE)
|
6846d583 |
}
return(index.complete)
}
imputeCentersForMonomorphicSnps <- function(medianA, medianB, index.complete, unobserved.index){
cols <- c(3, 1, 2)
for(j in 1:3){
|
66900fea |
if(length(unobserved.index[[j]]) == 0) next()
|
6846d583 |
kk <- cols[j]
X <- cbind(1, medianA[index.complete, kk], medianB[index.complete, kk])
Y <- cbind(medianA[index.complete, -kk],
medianB[index.complete, -kk])
betahat <- solve(crossprod(X), crossprod(X,Y))
X <- cbind(1, medianA[unobserved.index[[j]], kk], medianB[unobserved.index[[j]], kk])
mus <- X %*% betahat
medianA[unobserved.index[[j]], -kk] <- mus[, 1:2]
medianB[unobserved.index[[j]], -kk] <- mus[, 3:4]
}
list(medianA=medianA, medianB=medianB)
}
|
3c6c270e |
imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
index1 <- which(Ns[, 1] == 0 & Ns[, 3] > MIN.OBS)
if(length(index1) > 0){
|
6846d583 |
X <- cbind(1, muA[index.complete, 3], muB[index.complete, 3])
Y <- cbind(1, muA[index.complete, 1], muB[index.complete, 1])
## X <- cbind(1, muA[index.complete[[1]], 3], muB[index.complete[[1]], 3])
## Y <- cbind(1, muA[index.complete[[1]], 1], muB[index.complete[[1]], 1])
|
3c6c270e |
betahat <- solve(crossprod(X), crossprod(X,Y))
##now with the incomplete SNPs
X <- cbind(1, muA[index1, 3], muB[index1, 3])
mus <- X %*% betahat
muA[index1, 1] <- mus[, 2]
muB[index1, 1] <- mus[, 3]
}
index1 <- which(Ns[, 3] == 0)
if(length(index1) > 0){
|
6846d583 |
X <- cbind(1, muA[index.complete, 1], muB[index.complete, 1])
Y <- cbind(1, muA[index.complete, 3], muB[index.complete, 3])
## X <- cbind(1, muA[index.complete[[2]], 1], muB[index.complete[[2]], 1])
## Y <- cbind(1, muA[index.complete[[2]], 3], muB[index.complete[[2]], 3])
|
3c6c270e |
betahat <- solve(crossprod(X), crossprod(X,Y))
##now with the incomplete SNPs
X <- cbind(1, muA[index1, 1], muB[index1, 1])
mus <- X %*% betahat
muA[index1, 3] <- mus[, 2]
muB[index1, 3] <- mus[, 3]
}
list(muA, muB)
}
|
50b0d125 |
|
a13f66d1 |
|
eb0b8dd0 |
## constructors for Illumina platform
constructIlluminaFeatureData <- function(gns, cdfName){
|
69ec8644 |
pkgname <- paste(cdfName, "Crlmm", sep="")
|
eb0b8dd0 |
path <- system.file("extdata", package=pkgname)
load(file.path(path, "cnProbes.rda"))
load(file.path(path, "snpProbes.rda"))
cnProbes$chr <- chromosome2integer(cnProbes$chr)
cnProbes <- as.matrix(cnProbes)
snpProbes$chr <- chromosome2integer(snpProbes$chr)
snpProbes <- as.matrix(snpProbes)
mapping <- rbind(snpProbes, cnProbes, deparse.level=0)
mapping <- mapping[match(gns, rownames(mapping)), ]
isSnp <- 1L-as.integer(gns %in% rownames(cnProbes))
mapping <- cbind(mapping, isSnp, deparse.level=0)
stopifnot(identical(rownames(mapping), gns))
colnames(mapping) <- c("chromosome", "position", "isSnp")
new("AnnotatedDataFrame",
data=data.frame(mapping),
|
69ec8644 |
varMetadata=data.frame(labelDescription=colnames(mapping)))
|
eb0b8dd0 |
}
|
0198a9ad |
constructIlluminaAssayData <- function(np, snp, object, storage.mode="environment", nr){
|
eb0b8dd0 |
stopifnot(identical(snp$gns, featureNames(object)))
gns <- c(featureNames(object), np$gns)
sns <- np$sns
np <- np[1:2]
snp <- snp[1:2]
stripnames <- function(x) {
dimnames(x) <- NULL
x
}
np <- lapply(np, stripnames)
snp <- lapply(snp, stripnames)
|
fd0501ad |
is.ff <- is(snp[[1]], "ff") | is(snp[[1]], "ffdf")
if(is.ff){
|
3e4db4b1 |
lapply(snp, open)
open(calls(object))
open(snpCallProbability(object))
## lapply(np, open)
}
##tmp <- rbind(as.matrix(snp[[1]]), as.matrix(np[[1]]), deparse.level=0)
A.snp <- snp[[1]]
B.snp <- snp[[2]]
##Why is np not a ff object?
A.np <- np[[1]]
B.np <- np[[2]]
nc <- ncol(object)
|
fd0501ad |
if(is.ff){
|
3e4db4b1 |
NA.vec <- rep(NA, nrow(A.np))
AA <- initializeBigMatrix("A", nr, nc, vmode="integer")
BB <- initializeBigMatrix("B", nr, nc, vmode="integer")
GG <- initializeBigMatrix("calls", nr, nc, vmode="integer")
PP <- initializeBigMatrix("confs", nr, nc, vmode="integer")
for(j in 1:ncol(object)){
|
0198a9ad |
AA[, j] <- c(snp[[1]][, j], np[[1]][, j])
BB[, j] <- c(snp[[2]][, j], np[[2]][, j])
GG[, j] <- c(calls(object)[, j], NA.vec)
PP[, j] <- c(snpCallProbability(object)[, j], NA.vec)
|
3e4db4b1 |
}
} else {
|
0198a9ad |
AA <- rbind(snp[[1]], np[[1]], deparse.level=0)
BB <- rbind(snp[[2]], np[[2]], deparse.level=0)
|
3e4db4b1 |
gt <- stripnames(calls(object))
emptyMatrix <- matrix(integer(), nrow(np[[1]]), ncol(A))
|
0198a9ad |
GG <- rbind(gt, emptyMatrix, deparse.level=0)
|
3e4db4b1 |
pr <- stripnames(snpCallProbability(object))
|
0198a9ad |
PP <- rbind(pr, emptyMatrix, deparse.level=0)
|
3e4db4b1 |
}
assayDataNew(storage.mode,
alleleA=AA,
alleleB=BB,
call=GG,
callProbability=PP)
|
eb0b8dd0 |
}
|
57284b5e |
constructIlluminaCNSet <- function(crlmmResult,
|
76f72a5a |
path,
|
57284b5e |
snpFile,
cnFile){
|
76f72a5a |
load(file.path(path, "snpFile.rda"))
|
57284b5e |
res <- get("res")
|
76f72a5a |
load(file.path(path, "cnFile.rda"))
|
69ec8644 |
cnAB <- get("cnAB")
|
4923ac09 |
fD <- constructIlluminaFeatureData(c(res$gns, cnAB$gns), cdfName="human370v1c")
|
0198a9ad |
##new.order <- order(fD$chromosome, fD$position)
##fD <- fD[new.order, ]
aD <- constructIlluminaAssayData(cnAB, res, crlmmResult, nr=nrow(fD))
|
81601623 |
##protocolData(crlmmResult)$batch <- vector("integer", ncol(crlmmResult))
|
3e4db4b1 |
new("CNSet",
call=aD[["call"]],
callProbability=aD[["callProbability"]],
alleleA=aD[["alleleA"]],
alleleB=aD[["alleleB"]],
phenoData=phenoData(crlmmResult),
protocolData=protocolData(crlmmResult),
featureData=fD,
batch=batch,
annotation="human370v1c")
|
0198a9ad |
|
4923ac09 |
}
|
69ec8644 |
|
4923ac09 |
|
76f72a5a |
ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){
|
b5b74b9f |
ubatch <- unique(batch(object))[batch]
|
76f72a5a |
Nu <- nu(object, allele)[index, batch]
Phi <- phi(object, allele)[index, batch]
centers <- list(Nu, Nu+Phi, Nu+2*Phi)
if(log.it)
centers <- lapply(centers, log2)
myLabels <- function(allele){
switch(allele,
A=c("BB", "AB", "AA"),
B=c("AA", "AB", "BB"),
stop("allele must be 'A' or 'B'")
)
}
nms <- myLabels(allele)
names(centers) <- nms
fns <- featureNames(object)[index]
centers$fns <- fns
|
69ec8644 |
return(centers)
|
76f72a5a |
}
|
b5b74b9f |
|
8800496d |
|
6846d583 |
shrinkSummary <- function(object,
|
66900fea |
type=c("SNP", "X.SNP"), ##"X.snps", "X.nps"),
|
6846d583 |
MIN.OBS=1,
MIN.SAMPLES=10,
DF.PRIOR=50,
|
66900fea |
verbose=TRUE,
marker.index,
is.lds){
|
3e4db4b1 |
stopifnot(type[[1]] %in% c("SNP", "X.SNP"))
if(type[[1]] == "X.SNP"){
|
6846d583 |
gender <- object$gender
if(sum(gender == 2) < 3) {
|
910549db |
message("too few females to estimate within genotype summary statistics on CHR X")
return(object)
|
6846d583 |
}
CHR.X <- TRUE
} else CHR.X <- FALSE
|
66900fea |
if(missing(marker.index)){
batch <- batch(object)
is.snp <- isSnp(object)
is.autosome <- chromosome(object) < 23
is.annotated <- !is.na(chromosome(object))
is.X <- chromosome(object) == 23
is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
|
df2226f0 |
if(is.lds) stopifnot(isPackageLoaded("ff"))
|
66900fea |
marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
|
6846d583 |
}
if(is.lds){
index.list <- splitIndicesByLength(marker.index, ocProbesets())
ocLapply(seq(along=index.list),
|
3e4db4b1 |
shrinkGenotypeSummaries,
|
6846d583 |
index.list=index.list,
object=object,
verbose=verbose,
MIN.OBS=MIN.OBS,
MIN.SAMPLES=MIN.SAMPLES,
DF.PRIOR=DF.PRIOR,
is.lds=is.lds,
neededPkgs="crlmm")
} else {
|
3e4db4b1 |
object <- shrinkGenotypeSummaries(strata=1,
|
6846d583 |
index.list=list(marker.index),
object=object,
|
66900fea |
verbose=verbose,
|
6846d583 |
MIN.OBS=MIN.OBS,
MIN.SAMPLES=MIN.SAMPLES,
DF.PRIOR=DF.PRIOR,
is.lds=is.lds)
}
return(object)
}
genotypeSummary <- function(object,
GT.CONF.THR=0.95,
type=c("SNP", "NP", "X.SNP", "X.NP"), ##"X.snps", "X.nps"),
|
66900fea |
verbose=TRUE,
marker.index,
is.lds){
|
6846d583 |
if(type == "X.SNP" | type=="X.NP"){
gender <- object$gender
if(sum(gender == 2) < 3) {
|
910549db |
message("too few females to estimate within genotype summary statistics on CHR X")
return(object)
|
6846d583 |
}
CHR.X <- TRUE
} else CHR.X <- FALSE
|
66900fea |
if(missing(marker.index)){
batch <- batch(object)
is.snp <- isSnp(object)
is.autosome <- chromosome(object) < 23
is.annotated <- !is.na(chromosome(object))
is.X <- chromosome(object) == 23
is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
|
df2226f0 |
if(is.lds) stopifnot(isPackageLoaded("ff"))
|
66900fea |
marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
|
6846d583 |
}
summaryFxn <- function(type){
switch(type,
SNP="summarizeSnps",
NP="summarizeNps",
X.SNP="summarizeSnps",
X.NP="summarizeNps")
}
|
a070e5bd |
myf <- summaryFxn(type[[1]])
FUN <- get(myf)
|
6846d583 |
if(is.lds){
index.list <- splitIndicesByLength(marker.index, ocProbesets())
ocLapply(seq(along=index.list),
FUN,
index.list=index.list,
object=object,
batchSize=ocProbesets(),
GT.CONF.THR=GT.CONF.THR,
verbose=verbose,
is.lds=is.lds,
CHR.X=CHR.X,
neededPkgs="crlmm")
|
a070e5bd |
} else{
|
66900fea |
object <- FUN(strata=1,
|
6846d583 |
index.list=list(marker.index),
object=object,
batchSize=ocProbesets(),
GT.CONF.THR=GT.CONF.THR,
verbose=verbose,
|
66900fea |
is.lds=is.lds,
CHR.X=CHR.X)
|
6846d583 |
}
return(object)
}
|
66900fea |
whichMarkers <- function(type, is.snp, is.autosome, is.annotated, is.X){
switch(type,
SNP=which(is.snp & is.autosome & is.annotated),
NP=which(!is.snp & is.autosome),
X.SNP=which(is.snp & is.X),
X.NP=which(!is.snp & is.X),
stop("'type' must be one of 'SNP', 'NP', 'X.SNP', or 'X.NP'")
)
}
|
6846d583 |
summarizeNps <- function(strata, index.list, object, batchSize,
GT.CONF.THR, verbose, is.lds, CHR.X, ...){
if(is.lds) {physical <- get("physical"); open(object)}
if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
index <- index.list[[strata]]
if(CHR.X) {
sample.index <- which(object$gender==2)
batches <- split(sample.index, as.character(batch(object))[sample.index])
} else {
batches <- split(seq_along(batch(object)), as.character(batch(object)))
}
batchnames <- batchNames(object)
nr <- length(index)
nc <- length(batchnames)
N.AA <- medianA.AA <- madA.AA <- tau2A.AA <- matrix(NA, nr, nc)
|
73fd7374 |
is.illumina <- whichPlatform(annotation(object)) == "illumina"
|
6846d583 |
AA <- as.matrix(A(object)[index, ])
|
73fd7374 |
if(is.illumina){
BB <- as.matrix(B(object)[index, ])
AVG <- (AA+BB)/2
A(object)[index, ] <- AVG
AA <- AVG
rm(AVG, BB)
}
|
6846d583 |
for(k in seq_along(batches)){
B <- batches[[k]]
N.AA[, k] <- length(B)
this.batch <- unique(as.character(batch(object)[B]))
j <- match(this.batch, batchnames)
|
73fd7374 |
I.A <- AA[, B]
## if(is.illumina){
## I.B <- BB[, B]
## I.A <- I.A + I.B
## }
medianA.AA[, k] <- rowMedians(I.A, na.rm=TRUE)
madA.AA[, k] <- rowMAD(I.A, na.rm=TRUE)
|
6846d583 |
## log2 Transform Intensities
|
73fd7374 |
I.A <- log2(I.A)
tau2A.AA[, k] <- rowMAD(I.A, na.rm=TRUE)^2
|
6846d583 |
}
N.AA(object)[index,] <- N.AA
medianA.AA(object)[index,] <- medianA.AA
madA.AA(object)[index, ] <- madA.AA
tau2A.AA(object)[index, ] <- tau2A.AA
if(is.lds) return(TRUE) else return(object)
}
summarizeSnps <- function(strata,
index.list,
object,
GT.CONF.THR,
verbose, is.lds, CHR.X, ...){
if(is.lds) {
physical <- get("physical")
open(object)
}
if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
index <- index.list[[strata]]
if(CHR.X) {
sample.index <- which(object$gender==2)
batches <- split(sample.index, as.character(batch(object))[sample.index])
} else {
batches <- split(seq_along(batch(object)), as.character(batch(object)))
}
batchnames <- batchNames(object)
nr <- length(index)
nc <- length(batchnames)
statsA.AA <- statsA.AB <- statsA.BB <- statsB.AA <- statsB.AB <- statsB.BB <- vector("list", nc)
corrAA <- corrAB <- corrBB <- tau2A.AA <- tau2A.BB <- tau2B.AA <- tau2B.BB <- matrix(NA, nr, nc)
Ns.AA <- Ns.AB <- Ns.BB <- matrix(NA, nr, nc)
|
0198a9ad |
if(verbose) message(" Begin reading...")
|
6846d583 |
GG <- as.matrix(calls(object)[index, ])
CP <- as.matrix(snpCallProbability(object)[index, ])
AA <- as.matrix(A(object)[index, ])
BB <- as.matrix(B(object)[index, ])
|
4191b5f9 |
FL <- as.matrix(flags(object)[index, ])
|
0198a9ad |
if(verbose) message(" Computing summaries...")
|
6846d583 |
for(k in seq_along(batches)){
B <- batches[[k]]
this.batch <- unique(as.character(batch(object)[B]))
j <- match(this.batch, batchnames)
G <- GG[, B]
##NORM <- normal.index[, k]
xx <- CP[, B]
highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
G <- G*highConf
|
4191b5f9 |
## Some markers may have genotype confidences scores that are ALL below the threshold
## For these markers, provide statistical summaries based on all the samples and flag
## Provide summaries for these markers and flag to indicate the problem
ii <- which(rowSums(G) == 0)
G[ii, ] <- GG[ii, B]
## table(rowSums(G==0))
|
73fd7374 |
##G <- G*highConf*NORM
|
6846d583 |
A <- AA[, B]
B <- BB[, B]
## this can be time consuming...do only once
G.AA <- G==1
G.AA[G.AA==FALSE] <- NA
G.AB <- G==2
G.AB[G.AB==FALSE] <- NA
G.BB <- G==3
G.BB[G.BB==FALSE] <- NA
Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE)
Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE)
Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE)
summaryStats <- function(X, INT, FUNS){
tmp <- matrix(NA, nrow(X), length(FUNS))
for(j in seq_along(FUNS)){
FUN <- match.fun(FUNS[j])
tmp[, j] <- FUN(X*INT, na.rm=TRUE)
}
tmp
}
statsA.AA[[k]] <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
statsA.AB[[k]] <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
statsA.BB[[k]] <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
statsB.AA[[k]] <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
statsB.AB[[k]] <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
statsB.BB[[k]] <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
## log2 Transform Intensities
A <- log2(A); B <- log2(B)
tau2A.AA[, k] <- summaryStats(G.AA, A, FUNS="rowMAD")^2
tau2A.BB[, k] <- summaryStats(G.BB, A, FUNS="rowMAD")^2
tau2B.AA[, k] <- summaryStats(G.AA, B, FUNS="rowMAD")^2
tau2B.BB[, k] <- summaryStats(G.BB, B, FUNS="rowMAD")^2
corrAA[, k] <- rowCors(A*G.AA, B*G.AA, na.rm=TRUE)
corrAB[, k] <- rowCors(A*G.AB, B*G.AB, na.rm=TRUE)
corrBB[, k] <- rowCors(A*G.BB, B*G.BB, na.rm=TRUE)
}
|
0198a9ad |
if(verbose) message(" Begin writing...")
|
6846d583 |
N.AA(object)[index,] <- Ns.AA
N.AB(object)[index,] <- Ns.AB
N.BB(object)[index,] <- Ns.BB
corrAA(object)[index,] <- corrAA
corrAB(object)[index,] <- corrAB
corrBB(object)[index,] <- corrBB
medianA.AA(object)[index,] <- do.call(cbind, lapply(statsA.AA, function(x) x[, 1]))
medianA.AB(object)[index,] <- do.call(cbind, lapply(statsA.AB, function(x) x[, 1]))
medianA.BB(object)[index,] <- do.call(cbind, lapply(statsA.BB, function(x) x[, 1]))
medianB.AA(object)[index,] <- do.call(cbind, lapply(statsB.AA, function(x) x[, 1]))
medianB.AB(object)[index,] <- do.call(cbind, lapply(statsB.AB, function(x) x[, 1]))
medianB.BB(object)[index,] <- do.call(cbind, lapply(statsB.BB, function(x) x[, 1]))
madA.AA(object)[index,] <- do.call(cbind, lapply(statsA.AA, function(x) x[, 2]))
madA.AB(object)[index,] <- do.call(cbind, lapply(statsA.AB, function(x) x[, 2]))
madA.BB(object)[index,] <- do.call(cbind, lapply(statsA.BB, function(x) x[, 2]))
madB.AA(object)[index,] <- do.call(cbind, lapply(statsB.AA, function(x) x[, 2]))
madB.AB(object)[index,] <- do.call(cbind, lapply(statsB.AB, function(x) x[, 2]))
madB.BB(object)[index,] <- do.call(cbind, lapply(statsB.BB, function(x) x[, 2]))
tau2A.AA(object)[index, ] <- tau2A.AA
## tau2A.AB(object)[index, ] <- tau2A.AB
tau2A.BB(object)[index, ] <- tau2A.BB
tau2B.AA(object)[index, ] <- tau2B.AA
## tau2B.AB(object)[index, ] <- tau2B.AB
tau2B.BB(object)[index, ] <- tau2B.BB
if(is.lds) return(TRUE) else return(object)
}
|
c45787d3 |
crlmmCopynumber <- function(object,
|
d08acba1 |
MIN.SAMPLES=10,
SNRMin=5,
MIN.OBS=1,
DF.PRIOR=50,
bias.adj=FALSE,
prior.prob=rep(1/4,4),
seed=1,
verbose=TRUE,
|
4191b5f9 |
GT.CONF.THR=0.80,
|
d08acba1 |
MIN.NU=2^3,
MIN.PHI=2^3,
THR.NU.PHI=TRUE,
|
6846d583 |
type=c("SNP", "NP", "X.SNP", "X.NP")){
|
66900fea |
stopifnot(type %in% c("SNP", "NP", "X.SNP", "X.NP"))
|
b5b74b9f |
batch <- batch(object)
|
8800496d |
is.snp <- isSnp(object)
is.autosome <- chromosome(object) < 23
is.annotated <- !is.na(chromosome(object))
is.X <- chromosome(object) == 23
|
6846d583 |
is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
|
66900fea |
if(is.lds) stopifnot(isPackageLoaded("ff"))
|
6846d583 |
samplesPerBatch <- table(as.character(batch(object)))
|
d08acba1 |
if(any(samplesPerBatch < MIN.SAMPLES)){
warning("The following batches have fewer than ", MIN.SAMPLES, ":")
|
f3d40144 |
message(paste(names(samplesPerBatch)[samplesPerBatch < MIN.SAMPLES], collapse=", "))
|
d08acba1 |
message("Not estimating copy number for the above batches")
|
b5b74b9f |
}
|
66900fea |
mylabel <- function(marker.type){
switch(marker.type,
SNP="autosomal SNPs",
NP="autosomal nonpolymorphic markers",
X.SNP="chromosome X SNPs",
X.NP="chromosome X nonpolymorphic markers")
}
|
4191b5f9 |
if(verbose & is.lds) {
message("Large data support with ff package is enabled.")
message("To reduce RAM, the markers are divided into several strata (see ?ocProbesets for details) and summary statistics are computed on each stratum")
}
|
66900fea |
for(i in seq_along(type)){
## do all types
marker.type <- type[i]
if(verbose) message(paste("...", mylabel(marker.type)))
##if(verbose) message(paste("Computing summary statistics for ", mylabel(marker.type), " genotype clusters for each batch")
|
4191b5f9 |
marker.index <- whichMarkers(marker.type,
is.snp,
is.autosome,
is.annotated,
is.X)
|
66900fea |
object <- genotypeSummary(object=object,
GT.CONF.THR=GT.CONF.THR,
type=marker.type,
verbose=verbose,
marker.index=marker.index,
is.lds=is.lds)
}
|
3e4db4b1 |
if(verbose) message("Imputing unobserved genotype medians and shrinking the variances (within-batch, across loci) ")##SNPs only
|
66900fea |
for(i in seq_along(type)){
marker.type <- type[i]
if(!marker.type %in% c("SNP", "X.SNP")) next()
message(paste("...", mylabel(marker.type)))
marker.index <- whichMarkers(marker.type, is.snp,
is.autosome, is.annotated, is.X)
object <- shrinkSummary(object=object,
MIN.OBS=MIN.OBS,
MIN.SAMPLES=MIN.SAMPLES,
DF.PRIOR=DF.PRIOR,
type=marker.type,
verbose=verbose,
marker.index=marker.index,
is.lds=is.lds)
}
if(verbose) message("Estimating parameters for copy number")##SNPs only
for(i in seq_along(type)){
marker.type <- type[i]
message(paste("...", mylabel(marker.type)))
CHR.X <- ifelse(marker.type %in% c("X.SNP", "X.NP"), TRUE, FALSE)
marker.index <- whichMarkers(marker.type, is.snp,
is.autosome, is.annotated, is.X)
object <- estimateCnParameters(object=object,
type=marker.type,
SNRMin=SNRMin,
DF.PRIOR=DF.PRIOR,
GT.CONF.THR=GT.CONF.THR,
MIN.SAMPLES=MIN.SAMPLES,
MIN.OBS=MIN.OBS,
MIN.NU=MIN.NU,
MIN.PHI=MIN.PHI,
THR.NU.PHI=THR.NU.PHI,
verbose=verbose,
marker.index=marker.index,
is.lds=is.lds,
CHR.X=CHR.X)
}
return(object)
}
|
6b75ec77 |
crlmmCopynumber2 <- function(){
.Defunct(msg="The crlmmCopynumber2 function has been deprecated. The function crlmmCopynumber should be used instead. crlmmCopynumber will support large data using ff provided that the ff package is loaded.")
}
crlmmCopynumberLD <- crlmmCopynumber2
|
66900fea |
estimateCnParameters <- function(object,
type=c("SNP", "NP", "X.SNP", "X.NP"),
verbose=TRUE,
SNRMin=5,
DF.PRIOR=50,
GT.CONF.THR=0.95,
MIN.SAMPLES=10,
MIN.OBS=1,
MIN.NU=8,
MIN.PHI=8,
THR.NU.PHI=TRUE,
marker.index,
CHR.X,
is.lds){
if(missing(marker.index)){
batch <- batch(object)
is.snp <- isSnp(object)
is.autosome <- chromosome(object) < 23
is.annotated <- !is.na(chromosome(object))
is.X <- chromosome(object) == 23
is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
|
df2226f0 |
if(is.lds) stopifnot(isPackageLoaded("ff"))
|
66900fea |
CHR.X <- ifelse(type[[1]] %in% c("X.SNP", "X.NP"), TRUE, FALSE)
marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
|
b5b74b9f |
}
|
d08acba1 |
lmFxn <- function(type){
switch(type,
|
6846d583 |
SNP="fit.lm1",
NP="fit.lm2",
X.SNP="fit.lm3",
X.NP="fit.lm4")
|
b5b74b9f |
}
|
a070e5bd |
myfun <- lmFxn(type[[1]])
FUN <- get(myfun)
|
d08acba1 |
if(is.lds){
|
6846d583 |
index.list <- splitIndicesByLength(marker.index, ocProbesets())
|
d08acba1 |
ocLapply(seq(along=index.list),
FUN,
index.list=index.list,
marker.index=marker.index,
object=object,
batchSize=ocProbesets(),
SNRMin=SNRMin,
MIN.SAMPLES=MIN.SAMPLES,
MIN.OBS=MIN.OBS,
DF=DF.PRIOR,
GT.CONF.THR=GT.CONF.THR,
THR.NU.PHI=THR.NU.PHI,
MIN.NU=MIN.NU,
MIN.PHI=MIN.PHI,
verbose=verbose,
|
6846d583 |
is.lds=is.lds,
CHR.X=CHR.X,
|
d08acba1 |
neededPkgs="crlmm")
} else {
|
a070e5bd |
##FUN <- match.fun(FUN)
|
6846d583 |
object <- FUN(strata=1,
index.list=list(marker.index),
marker.index=marker.index,
object=object,
|
56b783ec |
batchSize=ocProbesets(),
|
6846d583 |
SNRMin=SNRMin,
MIN.SAMPLES=MIN.SAMPLES,
MIN.OBS=MIN.OBS,
DF.PRIOR=DF.PRIOR,
GT.CONF.THR=GT.CONF.THR,
THR.NU.PHI=THR.NU.PHI,
MIN.NU=MIN.NU,
MIN.PHI=MIN.PHI,
is.lds=is.lds,
CHR.X=CHR.X,
verbose=verbose)
|
d08acba1 |
}
|
b5b74b9f |
message("finished")
|
d08acba1 |
return(object)
|
b5b74b9f |
}
|