git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38183 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -28,3 +28,27 @@ BioC data packages |
28 | 28 |
|
29 | 29 |
* Loaded data in cnrma-functions.R to an environment and extracted from there |
30 | 30 |
so we can get rid of the NOTES complaining about 'no visible bindings'. |
31 |
+ |
|
32 |
+2009-03-18 Rob Scharpf <rscharpf@jhsph.edu> - committed version 1.0.57 |
|
33 |
+* Added steps object to instantiateObjects to speed up debugging |
|
34 |
+* Reformulated the regression for chromosome X |
|
35 |
+ - estimate cross-hybridization using chromosome X |
|
36 |
+* Take into account pseudo-autosomal regions on chr X for copy number estimation |
|
37 |
+* Replaced get() and assign() with '<-' operations to improve readability |
|
38 |
+* Function to compute posterior means of copy number estimates |
|
39 |
+ |
|
40 |
+2009-03-19 Rob Scharpf - committed version 1.0.59 |
|
41 |
+* Requires genomewidesnp6Crlmm version 1.0.1 or greater |
|
42 |
+ |
|
43 |
+2009-03-25 Rob Scharpf - committed version 1.0.60 |
|
44 |
+* simplified some of the preliminary steps for the computeCopynumber function |
|
45 |
+** crlmm output subset within the body of the function |
|
46 |
+* modified vignette -- store results in an oligoSnpSet object (for now) |
|
47 |
+* added function to extract the date from the celfile headers (celDates) |
|
48 |
+ |
|
49 |
+ |
|
50 |
+ |
|
51 |
+ |
|
52 |
+ |
|
53 |
+ |
|
54 |
+ |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for SNP 5.0 and 6.0 arrays. |
4 |
-Version: 1.0.59 |
|
4 |
+Version: 1.0.60 |
|
5 | 5 |
Date: 2008-12-28 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu> |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
useDynLib("crlmm", .registration=TRUE) |
2 |
-export("crlmm", "list.celfiles", "computeCopynumber", "cnrma") |
|
2 |
+export("crlmm", "list.celfiles", "computeCopynumber", "cnrma", "celDates") |
|
3 | 3 |
exportMethods("list2crlmmSet", "calls", "confs") |
4 | 4 |
exportClasses("crlmmSet") |
5 | 5 |
import(Biobase) |
... | ... |
@@ -22,21 +22,34 @@ rowCors <- function(x, y, ...){ |
22 | 22 |
return(covar/(sd.x*sd.y)) |
23 | 23 |
} |
24 | 24 |
|
25 |
-nuphiAllele <- function(allele, Ystar, W, envir, p){ |
|
26 |
- Ns <- get("Ns", envir) |
|
27 |
- NOHET <- mean(Ns[, p, 2], na.rm=TRUE) < 0.05 |
|
28 |
- chrom <- get("chrom", envir) |
|
25 |
+generateX <- function(w, X) as.numeric(diag(w) %*% X) |
|
26 |
+generateIXTX <- function(x, nrow=3) { |
|
27 |
+ X <- matrix(x, nrow=nrow) |
|
28 |
+ XTX <- crossprod(X) |
|
29 |
+ solve(XTX) |
|
30 |
+} |
|
31 |
+nuphiAllele <- function(p, allele, Ystar, W, envir){ |
|
32 |
+ Ns <- envir[["Ns"]] |
|
33 |
+ CHR <- envir[["chrom"]] |
|
34 |
+ nuA <- envir[["nuA"]] |
|
35 |
+ nuB <- envir[["nuB"]] |
|
36 |
+ nuA.se <- envir[["nuA.se"]] |
|
37 |
+ nuB.se <- envir[["nuB.se"]] |
|
38 |
+ phiA <- envir[["phiA"]] |
|
39 |
+ phiB <- envir[["phiB"]] |
|
40 |
+ phiA.se <- envir[["phiA.se"]] |
|
41 |
+ phiB.se <- envir[["phiB.se"]] |
|
42 |
+ if(CHR==23){ |
|
43 |
+ phiAx <- envir[["phiAx"]] |
|
44 |
+ phiBx <- envir[["phiBx"]] |
|
45 |
+ } |
|
46 |
+ complete <- rowSums(is.na(W)) == 0 |
|
47 |
+ NOHET <- mean(Ns[, p, "AB"], na.rm=TRUE) < 0.05 |
|
29 | 48 |
if(missing(allele)) stop("must specify allele") |
30 |
- if(chrom == 23){ |
|
31 |
- gender <- get("gender", envir) |
|
32 |
- if(allele == "A" & gender == "female") |
|
33 |
- X <- cbind(1, 2:0) |
|
34 |
- if(allele == "B" & gender == "female") |
|
35 |
- X <- cbind(1, 0:2) |
|
36 |
- if(allele == "A" & gender == "male") |
|
37 |
- X <- cbind(1, 1:0) |
|
38 |
- if(allele == "B" & gender == "male") |
|
39 |
- X <- cbind(1, 0:1) |
|
49 |
+ if(CHR == 23){ |
|
50 |
+ gender <- envir[["gender"]] |
|
51 |
+ if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2)) |
|
52 |
+ if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0)) |
|
40 | 53 |
} else {##autosome |
41 | 54 |
if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2) |
42 | 55 |
if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous |
... | ... |
@@ -47,40 +60,57 @@ nuphiAllele <- function(allele, Ystar, W, envir, p){ |
47 | 60 |
stop("Inf values in W or V") |
48 | 61 |
} |
49 | 62 |
##How to quickly generate Xstar, Xstar = diag(W) %*% X |
50 |
- generateX <- function(w, X) as.numeric(diag(w) %*% X) |
|
51 |
- ##Not instant |
|
52 | 63 |
Xstar <- apply(W, 1, generateX, X) |
53 |
- generateIXTX <- function(x, nrow=3) { |
|
54 |
- X <- matrix(x, nrow=nrow) |
|
55 |
- XTX <- crossprod(X) |
|
56 |
- solve(XTX) |
|
57 |
- } |
|
58 |
- ##a little slow |
|
59 | 64 |
IXTX <- apply(Xstar, 2, generateIXTX, nrow=nrow(X)) |
60 |
- betahat <- matrix(NA, 2, nrow(Ystar)) |
|
61 |
- ses <- matrix(NA, 2, nrow(Ystar)) |
|
65 |
+ if(CHR == 23){ |
|
66 |
+ betahat <- matrix(NA, 3, nrow(Ystar)) |
|
67 |
+ ses <- matrix(NA, 3, nrow(Ystar)) |
|
68 |
+ } else{ |
|
69 |
+ betahat <- matrix(NA, 2, nrow(Ystar)) |
|
70 |
+ ses <- matrix(NA, 2, nrow(Ystar)) |
|
71 |
+ } |
|
62 | 72 |
for(i in 1:nrow(Ystar)){ |
63 |
- betahat[, i] <- crossprod(matrix(IXTX[, i], 2, 2), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ])) |
|
64 |
- ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), 2) %*% matrix(betahat[, i], 2, 1))^2) |
|
65 |
- ses[, i] <- sqrt(diag(matrix(IXTX[, i], 2, 2) * ssr)) |
|
73 |
+ betahat[, i] <- crossprod(matrix(IXTX[, i], ncol(X), ncol(X)), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ])) |
|
74 |
+ ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2) |
|
75 |
+ ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr)) |
|
76 |
+ } |
|
77 |
+ if(allele == "A"){ |
|
78 |
+ nuA[complete, p] <- betahat[1, ] |
|
79 |
+ phiA[complete, p] <- betahat[2, ] |
|
80 |
+ nuA.se[complete, p] <- ses[1, ] |
|
81 |
+ phiA.se[complete, p] <- ses[2, ] |
|
82 |
+ envir[["nuA"]] <- nuA |
|
83 |
+ envir[["phiA"]] <- phiA |
|
84 |
+ envir[["nuA.se"]] <- nuA.se |
|
85 |
+ envir[["phiA.se"]] <- phiA.se |
|
86 |
+ if(CHR == 23){ |
|
87 |
+ phiAx[complete, p] <- betahat[3, ] |
|
88 |
+ envir[["phiAx"]] <- phiAx |
|
89 |
+ } |
|
90 |
+ } |
|
91 |
+ if(allele=="B"){ |
|
92 |
+ nuB[complete, p] <- betahat[1, ] |
|
93 |
+ phiB[complete, p] <- betahat[2, ] |
|
94 |
+ nuB.se[complete, p] <- ses[1, ] |
|
95 |
+ phiB.se[complete, p] <- ses[2, ] |
|
96 |
+ envir[["nuB"]] <- nuB |
|
97 |
+ envir[["phiB"]] <- phiB |
|
98 |
+ envir[["nuB.se"]] <- nuB.se |
|
99 |
+ envir[["phiB.se"]] <- phiB.se |
|
100 |
+ if(CHR == 23){ |
|
101 |
+ phiBx[complete, p] <- betahat[3, ] |
|
102 |
+ envir[["phiBx"]] <- phiBx |
|
103 |
+ } |
|
66 | 104 |
} |
67 |
- nu <- betahat[1, ] |
|
68 |
- phi <- betahat[2, ] |
|
69 |
- nu.se <- ses[1,] |
|
70 |
- phi.se <- ses[2,] |
|
71 |
- ##dimnames(nu) <- dimnames(phi) <- dimnames(nu.ses) <- dimnames(phi.ses) |
|
72 |
- assign("nu", nu, envir=envir) |
|
73 |
- assign("phi", phi, envir=envir) |
|
74 |
- assign("nu.se", nu.se, envir=envir) |
|
75 |
- assign("phi.se", phi.se, envir=envir) |
|
76 | 105 |
} |
77 | 106 |
|
78 |
-celDatesFrom <- function(celfiles, path=""){ |
|
107 |
+celDates <- function(celfiles){ |
|
108 |
+ if(!all(file.exists(celfiles))) stop("1 or more cel file does not exist") |
|
79 | 109 |
celdates <- vector("character", length(celfiles)) |
80 | 110 |
celtimes <- vector("character", length(celfiles)) |
81 | 111 |
for(i in seq(along=celfiles)){ |
82 | 112 |
if(i %% 100 == 0) cat(".") |
83 |
- tmp <- read.celfile.header(file.path(path, celfiles[i]), info="full")$DatHeader |
|
113 |
+ tmp <- read.celfile.header(celfiles[i], info="full")$DatHeader |
|
84 | 114 |
tmp <- strsplit(tmp, "\ +") |
85 | 115 |
celdates[i] <- tmp[[1]][6] |
86 | 116 |
celtimes[i] <- tmp[[1]][7] |
... | ... |
@@ -91,14 +121,13 @@ celDatesFrom <- function(celfiles, path=""){ |
91 | 121 |
} |
92 | 122 |
|
93 | 123 |
cnrma <- function(filenames, sns, cdfName, seed=1, verbose=FALSE){ |
94 |
- ## BC: 03/14/09 |
|
95 |
- ## getting pkgname from cdfName, in the future this might be useful |
|
96 |
- ## as the method might be implemented for other platforms |
|
97 |
- pkgname <- getCrlmmAnnotationName(cdfName) |
|
98 |
- |
|
124 |
+ ## BC: 03/14/09 |
|
125 |
+ ## getting pkgname from cdfName, in the future this might be useful |
|
126 |
+ ## as the method might be implemented for other platforms |
|
127 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
99 | 128 |
require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
100 | 129 |
if (missing(sns)) sns <- basename(filenames) |
101 |
- ## Loading data in .crlmmPkgEnv and extracting from there |
|
130 |
+ ## Loading data in .crlmmPkgEnv and extracting from there |
|
102 | 131 |
data("npProbesFid", package=pkgname, envir=.crlmmPkgEnv) |
103 | 132 |
fid <- getVarInEnv("npProbesFid") |
104 | 133 |
gc() |
... | ... |
@@ -113,7 +142,7 @@ cnrma <- function(filenames, sns, cdfName, seed=1, verbose=FALSE){ |
113 | 142 |
} |
114 | 143 |
##load reference distribution obtained from hapmap |
115 | 144 |
data(list="1m_reference_cn", package="genomewidesnp6Crlmm", envir=.crlmmPkgEnv) |
116 |
- reference <- getVarInEnv("reference") |
|
145 |
+ reference <- getVarInEnv("reference") |
|
117 | 146 |
for(i in seq(along=filenames)){ |
118 | 147 |
y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
119 | 148 |
x <- log2(y[idx2]) |
... | ... |
@@ -137,412 +166,471 @@ getFlags <- function(phi.thr, envir){ |
137 | 166 |
phiB <- get("phiB", envir) |
138 | 167 |
|
139 | 168 |
negativeNus <- nuA < 1 | nuB < 1 |
140 |
- negativeNus <- negativeNus > 0 |
|
141 |
- |
|
142 | 169 |
negativePhis <- phiA < phi.thr | phiB < phi.thr |
143 |
- negativePhis <- negativePhis > 0 |
|
144 | 170 |
negativeCoef <- negativeNus | negativePhis |
145 | 171 |
|
146 | 172 |
notfinitePhi <- !is.finite(phiA) | !is.finite(phiB) |
147 |
- notfinitePhi <- notfinitePhi > 0 |
|
148 |
- flags <- negativeCoef | notfinitePhi |
|
173 |
+ flags <- negativeCoef | notfinitePhi |
|
174 |
+ return(flags) |
|
149 | 175 |
} |
150 | 176 |
|
151 | 177 |
goodSnps <- function(phi.thr, envir, fewAA=20, fewBB=20){ |
152 | 178 |
Ns <- get("Ns", envir) |
153 | 179 |
flags <- getFlags(phi.thr=phi.thr, envir) |
154 |
- fewAA <- Ns[, , 1] < fewAA |
|
155 |
- fewBB <- Ns[, , 3] < fewBB |
|
180 |
+ fewAA <- Ns[, , "AA"] < fewAA |
|
181 |
+ fewBB <- Ns[, , "BB"] < fewBB |
|
156 | 182 |
flagsA <- flags | fewAA |
157 | 183 |
flagsB <- flags | fewBB |
158 | 184 |
flags <- list(A=flagsA, B=flagsB) |
159 | 185 |
return(flags) |
160 | 186 |
} |
161 | 187 |
|
162 |
-instantiateObjects <- function(calls, NP, plate, envir, chrom){ |
|
163 |
- if(missing(chrom)) stop("must specify chrom") |
|
164 |
- assign("chrom", chrom, envir) |
|
188 |
+instantiateObjects <- function(calls, NP, plate, envir, chrom, A, B, |
|
189 |
+ gender, SNRmin=5, SNR){ |
|
190 |
+ envir[["chrom"]] <- chrom |
|
191 |
+ CHR_INDEX <- paste(chrom, "index", sep="") |
|
192 |
+ data(list=CHR_INDEX, package="genomewidesnp6Crlmm") |
|
193 |
+ A <- A[index[[1]], SNR > SNRmin] |
|
194 |
+ B <- B[index[[1]], SNR > SNRmin] |
|
195 |
+ calls <- calls[index[[1]], SNR > SNRmin] |
|
196 |
+ conf <- conf[index[[1]], SNR > SNRmin] |
|
197 |
+ NP <- NP[index[[2]], SNR > SNRmin] |
|
198 |
+ plate <- plate[SNR > SNRmin] |
|
199 |
+ uplate <- unique(plate) |
|
200 |
+ SNR <- SNR[SNR > SNRmin] |
|
201 |
+ |
|
202 |
+ envir[["uplate"]] <- uplate |
|
203 |
+ envir[["plate"]] <- plate |
|
204 |
+ envir[["NP"]] <- NP |
|
205 |
+ envir[["A"]] <- A |
|
206 |
+ envir[["B"]] <- B |
|
207 |
+ envir[["calls"]] <- calls |
|
208 |
+ envir[["conf"]] <- conf |
|
165 | 209 |
snps <- rownames(calls) |
166 | 210 |
cnvs <- rownames(NP) |
167 | 211 |
sns <- basename(colnames(calls)) |
168 | 212 |
stopifnot(identical(colnames(calls), colnames(NP))) |
169 |
- assign("sns", sns, envir) |
|
170 |
- assign("snps", snps, envir) |
|
171 |
- assign("cnvs", cnvs, envir) |
|
213 |
+ envir[["sns"]] <- sns |
|
214 |
+ envir[["snps"]] <- snps |
|
215 |
+ envir[["cnvs"]] <- cnvs |
|
172 | 216 |
|
217 |
+ if(chrom == 23){ |
|
218 |
+ if(is.null(gender)){ |
|
219 |
+ message("Estimating gender") |
|
220 |
+ XMedian <- apply(log2(A[, , drop=FALSE]) + log2(B[, , drop=FALSE]), 2, median)/2 |
|
221 |
+ gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRmin]), max(XMedian[SNR>SNRmin])))[["cluster"]] |
|
222 |
+ ##gender <- getGender(res) |
|
223 |
+ gender[gender==2] <- "female" |
|
224 |
+ gender[gender=="1"] <- "male" |
|
225 |
+ envir[["gender"]] <- gender |
|
226 |
+ } else envir[["gender"]] <- gender |
|
227 |
+ phiAx <- matrix(NA, nrow(calls), length(uplate)) |
|
228 |
+ envir[["phiAx"]] <- phiAx |
|
229 |
+ envir[["phiBx"]] <- phiAx |
|
230 |
+ } |
|
173 | 231 |
CA <- CB <- matrix(NA, nrow(calls), ncol(calls)) |
174 |
- assign("plate", plate, envir) |
|
175 |
- uplate <- unique(plate) |
|
176 |
- Ns <- array(NA, dim=c(nrow(calls), length(uplate), 3)) |
|
177 |
- dimnames(Ns)[[3]] <- c("AA", "AB", "BB") |
|
178 |
- muA.AA <- matrix(NA, nrow=nrow(calls), length(uplate)) |
|
179 |
- ##Sigma.AA <- array(NA, dim=c(nrow(calls), 3, length(uplate))) |
|
180 |
- ##dimnames(Sigma.AA)[2:3] <- list(c("varA", "varB", "cov"), uplate) |
|
232 |
+ envir[["CA"]] <- CA |
|
233 |
+ envir[["CB"]] <- CB |
|
234 |
+ |
|
235 |
+ Ns <- array(NA, dim=c(nrow(calls), length(uplate), 5)) |
|
236 |
+ dimnames(Ns)[[3]] <- c("A", "B", "AA", "AB", "BB") |
|
237 |
+ |
|
238 |
+ envir[["Ns"]] <- envir[["muB"]] <- envir[["muA"]] <- Ns |
|
239 |
+ envir[["vB"]] <- envir[["vA"]] <- Ns |
|
181 | 240 |
|
182 |
- NP.CT <- matrix(NA, nrow(NP), ncol(NP)) |
|
183 |
- NP.sds <- matrix(NA, nrow(NP), ncol(NP)) |
|
184 |
- assign("NP.CT", NP.CT, envir) |
|
185 |
- assign("NP.sds", NP.sds, envir) |
|
186 |
- nus <- matrix(NA, nrow(NP), length(uplate)) |
|
187 |
- assign("nus", nus, envir=envir) |
|
188 |
- assign("phis", nus, envir=envir) |
|
241 |
+ CT.sds <- CT <- matrix(NA, nrow(NP), length(sns)) |
|
242 |
+ ##NP.CT <- matrix(NA, nrow(NP), ncol(NP)) |
|
243 |
+ ##NP.sds <- matrix(NA, nrow(NP), ncol(NP)) |
|
244 |
+ envir[["CT"]] <- CT |
|
245 |
+ envir[["CT.sds"]] <- CT.sds |
|
246 |
+ |
|
247 |
+ ##assign("NP.CT", NP.CT, envir) |
|
248 |
+ ##assign("NP.sds", NP.sds, envir) |
|
249 |
+ nuT <- matrix(NA, nrow(NP), length(uplate)) |
|
250 |
+ phiT <- nuT |
|
251 |
+ envir[["nuT"]] <- nuT |
|
252 |
+ envir[["phiT"]] <- phiT |
|
253 |
+ ##assign("nus", nus, envir=envir) |
|
254 |
+ ##assign("phis", nus, envir=envir) |
|
189 | 255 |
|
190 | 256 |
plates.completed <- rep(FALSE, length(uplate)) |
191 |
- assign("plates.completed", plates.completed, envir) |
|
192 |
- |
|
193 |
- fit.variance <- NULL |
|
194 |
- assign("fit.variance", fit.variance, envir=envir) |
|
195 |
- npflags <- snpflags <- vector("list", length(uplate)) |
|
196 |
- assign("snpflags", snpflags, envir=envir) |
|
197 |
- assign("npflags", npflags, envir=envir) |
|
198 |
- |
|
199 |
- assign("Ns", Ns, envir=envir) |
|
200 |
- assign("uplate", uplate, envir=envir) |
|
201 |
- assign("muA.AA", muA.AA, envir=envir) |
|
202 |
- assign("muA.AB", muA.AA, envir=envir) |
|
203 |
- assign("muA.BB", muA.AA, envir=envir) |
|
204 |
- assign("muB.AA", muA.AA, envir=envir) |
|
205 |
- assign("muB.AB", muA.AA, envir=envir) |
|
206 |
- assign("muB.BB", muA.AA, envir=envir) |
|
207 |
- |
|
208 |
- assign("tau2A", muA.AA, envir=envir) |
|
209 |
- assign("tau2B", muA.AA, envir=envir) |
|
210 |
- assign("sig2A", muA.AA, envir=envir) |
|
211 |
- assign("sig2B", muA.AA, envir=envir) |
|
212 |
- assign("corr", muA.AA, envir=envir) |
|
213 |
- assign("corrA.BB", muA.AA, envir=envir) |
|
214 |
- assign("corrB.AA", muA.AA, envir=envir) |
|
215 |
- |
|
216 |
- assign("nuA", muA.AA, envir=envir) |
|
217 |
- assign("nuB", muA.AA, envir=envir) |
|
218 |
- assign("phiA", muA.AA, envir=envir) |
|
219 |
- assign("phiB", muA.AA, envir=envir) |
|
220 |
- assign("nuA.se", muA.AA, envir=envir) |
|
221 |
- assign("nuB.se", muA.AA, envir=envir) |
|
222 |
- assign("phiA.se", muA.AA, envir=envir) |
|
223 |
- assign("phiB.se", muA.AA, envir=envir) |
|
224 |
- assign("sd.CT", CA, envir=envir) |
|
225 |
- assign("CA", CA, envir=envir) |
|
226 |
- assign("CB", CB, envir=envir) |
|
257 |
+ envir[["plates.completed"]] <- plates.completed |
|
258 |
+ |
|
259 |
+ steps <- rep(FALSE, 4) |
|
260 |
+ names(steps) <- c("suffStats", "coef", "snp-cn", "np-cn") |
|
261 |
+ envir[["steps"]] <- steps |
|
262 |
+ |
|
263 |
+ snpflags <- matrix(FALSE, length(snps), length(uplate)) |
|
264 |
+ npflags <- matrix(FALSE, length(cnvs), length(uplate)) |
|
265 |
+ ##assign("snpflags", snpflags, envir=envir) |
|
266 |
+ ##assign("npflags", npflags, envir=envir) |
|
267 |
+ envir[["snpflags"]] <- snpflags |
|
268 |
+ envir[["npflags"]] <- npflags |
|
269 |
+ |
|
270 |
+ tau2A <- matrix(NA, nrow(calls), length(uplate)) |
|
271 |
+ envir[["tau2A"]] <- tau2A |
|
272 |
+ envir[["tau2B"]] <- tau2A |
|
273 |
+ envir[["sig2A"]] <- tau2A |
|
274 |
+ envir[["sig2B"]] <- tau2A |
|
275 |
+ sig2T <- matrix(NA, nrow(NP), ncol(NP)) |
|
276 |
+ envir[["sig2T"]] <- sig2T |
|
277 |
+ envir[["corr"]] <- tau2A |
|
278 |
+ envir[["corrA.BB"]] <- tau2A |
|
279 |
+ envir[["corrB.AA"]] <- tau2A |
|
280 |
+ envir[["nuB"]] <- envir[["nuA"]] <- tau2A |
|
281 |
+ envir[["phiB"]] <- envir[["phiA"]] <- tau2A |
|
282 |
+ envir[["nuB.se"]] <- envir[["nuA.se"]] <- tau2A |
|
283 |
+ envir[["phiB.se"]] <- envir[["phiA.se"]] <- tau2A |
|
284 |
+ normal <- matrix(TRUE, nrow(A), ncol(A)) |
|
285 |
+ normalNP <- matrix(TRUE, nrow(NP), ncol(NP)) |
|
286 |
+ envir[["normal"]] <- normal |
|
287 |
+ envir[["normalNP"]] <- normalNP |
|
227 | 288 |
} |
228 | 289 |
|
229 | 290 |
|
230 | 291 |
|
231 |
-computeCopynumber <- function(A, |
|
292 |
+computeCopynumber <- function(chrom, |
|
293 |
+ A, |
|
232 | 294 |
B, |
233 | 295 |
calls, |
234 | 296 |
conf, |
235 | 297 |
NP, |
236 | 298 |
plate, |
237 |
- fit.variance=NULL, |
|
238 | 299 |
MIN.OBS=1, |
239 | 300 |
envir, |
240 |
- chrom, P, DF.PRIOR=50, CONF.THR=0.99, |
|
241 |
- trim=0, upperTail=TRUE, |
|
301 |
+ P, |
|
302 |
+ DF.PRIOR=50, |
|
303 |
+ CONF.THR=0.99, |
|
242 | 304 |
bias.adj=FALSE, |
243 |
- priorProb, ...){ |
|
244 |
- if(length(ls(envir)) == 0) instantiateObjects(calls, NP, plate, envir, chrom) |
|
305 |
+ priorProb, |
|
306 |
+ gender=NULL, |
|
307 |
+ SNR, |
|
308 |
+ SNRmin=5, seed=123, verbose=TRUE, ...){ |
|
309 |
+ set.seed(seed) |
|
310 |
+ if(missing(chrom)) stop("must specify chromosome") |
|
311 |
+ if(length(ls(envir)) == 0) { |
|
312 |
+ instantiateObjects(calls=calls, NP=NP, plate=plate, |
|
313 |
+ envir=envir, chrom=chrom, A=A, B=B, |
|
314 |
+ gender=gender, SNR=SNR, SNRmin=SNRmin) |
|
315 |
+ } |
|
316 |
+ plate <- envir[["plate"]] |
|
317 |
+ uplate <- envir[["uplate"]] |
|
318 |
+ calls <- envir[["calls"]] |
|
319 |
+ A <- envir[["A"]] |
|
320 |
+ B <- envir[["B"]] |
|
321 |
+ conf <- envir[["conf"]] |
|
322 |
+ NP <- envir[["NP"]] |
|
245 | 323 |
if(bias.adj){ |
246 | 324 |
##assign uniform priors for total copy number states |
247 | 325 |
if(missing(priorProb)) priorProb <- rep(1/4, 4) |
326 |
+ envir[["steps"]] <- rep(FALSE, 4) |
|
248 | 327 |
} |
249 | 328 |
##will be updating these objects |
250 |
- uplate <- get("uplate", envir) |
|
251 | 329 |
message("Sufficient statistics") |
252 | 330 |
if(missing(P)) P <- seq(along=uplate) |
253 |
- for(p in P){ |
|
254 |
- cat(".") |
|
255 |
- if(sum(plate == uplate[p]) < 10) next() |
|
256 |
- oneBatch(plateIndex=p, |
|
257 |
- G=calls[, plate==uplate[p]], |
|
258 |
- A=A[, plate==uplate[p]], |
|
259 |
- B=B[, plate==uplate[p]], |
|
260 |
- conf=conf[, plate==uplate[p]], |
|
261 |
- CONF.THR=CONF.THR, |
|
262 |
- envir=envir, |
|
263 |
- MIN.OBS=MIN.OBS, |
|
264 |
- DF.PRIOR=DF.PRIOR, |
|
265 |
- trim=trim, upperTail=upperTail, |
|
266 |
- bias.adj=bias.adj) |
|
331 |
+ steps <- envir[["steps"]] |
|
332 |
+ if(!steps[1]){ |
|
333 |
+ for(p in P){ |
|
334 |
+ cat(".") |
|
335 |
+ if(sum(plate == uplate[p]) < 10) next() |
|
336 |
+ J <- plate==uplate[p] |
|
337 |
+ oneBatch(plateIndex=p, |
|
338 |
+ A=A[, J], |
|
339 |
+ B=B[, J], |
|
340 |
+ calls=calls[, J], |
|
341 |
+ conf=conf[, J], |
|
342 |
+ gender=NULL, |
|
343 |
+ NP[, J], |
|
344 |
+ plate[J], |
|
345 |
+ MIN.OBS=1, |
|
346 |
+ envir=envir, |
|
347 |
+ DF.PRIOR=DF.PRIOR, |
|
348 |
+ CONF.THR=CONF.THR, |
|
349 |
+ bias.adj=bias.adj, |
|
350 |
+ priorProb=priorProb,...) |
|
351 |
+ } |
|
352 |
+ steps[1] <- TRUE |
|
353 |
+ envir[["steps"]] <- steps |
|
267 | 354 |
} |
268 |
- message("\nEstimating coefficients") |
|
269 |
- for(p in P){ |
|
270 |
- cat(".") |
|
271 |
- coefs(plateIndex=p, conf=conf[, plate==uplate[p]], |
|
272 |
- envir=envir, CONF.THR=CONF.THR, MIN.OBS=MIN.OBS) |
|
355 |
+ if(!steps[2]){ |
|
356 |
+ message("\nEstimating coefficients") |
|
357 |
+ for(p in P){ |
|
358 |
+ cat(".") |
|
359 |
+ coefs(plateIndex=p, conf=conf[, plate==uplate[p]], |
|
360 |
+ envir=envir, CONF.THR=CONF.THR, MIN.OBS=MIN.OBS) |
|
361 |
+ } |
|
362 |
+ steps[2] <- TRUE |
|
363 |
+ envir[["steps"]] <- steps |
|
273 | 364 |
} |
274 |
- message("\nAllele specific copy number") |
|
275 |
- for(p in P){ |
|
276 |
- cat(".") |
|
277 |
- polymorphic(plateIndex=p, |
|
278 |
- A=A[, plate==uplate[p]], |
|
279 |
- B=B[, plate==uplate[p]], |
|
280 |
- envir=envir) |
|
365 |
+ if(!steps[3]){ |
|
366 |
+ message("\nAllele specific copy number") |
|
367 |
+ for(p in P){ |
|
368 |
+ cat(".") |
|
369 |
+ polymorphic(plateIndex=p, |
|
370 |
+ A=A[, plate==uplate[p]], |
|
371 |
+ B=B[, plate==uplate[p]], |
|
372 |
+ envir=envir) |
|
373 |
+ } |
|
374 |
+ steps[3] <- TRUE |
|
375 |
+ envir[["snp-cn"]] <- steps |
|
281 | 376 |
} |
282 |
- message("\nCopy number for nonpolymorphic probes...") |
|
283 |
- for(p in P){ |
|
284 |
- cat(".") |
|
285 |
- nonpolymorphic(plateIndex=p, |
|
286 |
- NP=NP[, plate==uplate[p]], |
|
287 |
- envir=envir) |
|
377 |
+ if(!steps[4]){ |
|
378 |
+ message("\nCopy number for nonpolymorphic probes...") |
|
379 |
+ for(p in P){ |
|
380 |
+ cat(".") |
|
381 |
+ nonpolymorphic(plateIndex=p, |
|
382 |
+ NP=NP[, plate==uplate[p]], |
|
383 |
+ envir=envir) |
|
384 |
+ } |
|
385 |
+ steps[4] <- TRUE |
|
386 |
+ envir[["np-cn"]] <- steps |
|
288 | 387 |
} |
289 |
-## snpflags <- get("snpflags", envir) |
|
290 |
-## npflags <- get("npflags", envir) |
|
291 |
-## flags <- sapply(snpflags, length) |
|
292 |
-## flags.np <- sapply(npflags, length) |
|
293 |
-## if(any(flags > 0) | any(flags.np > 0)) |
|
294 |
-## warning("some SNPs were flagged -- possible NAs. Check the indices in snpflags and npflags") |
|
295 | 388 |
} |
296 | 389 |
|
297 |
-nonpolymorphic <- function(plateIndex, NP, envir){ |
|
390 |
+nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50){ |
|
298 | 391 |
p <- plateIndex |
299 |
- plate <- get("plate", envir) |
|
300 |
- uplate <- get("uplate", envir) |
|
301 |
- plates.completed <- get("plates.completed", envir) |
|
392 |
+ CHR <- envir[["chrom"]] |
|
393 |
+ plate <- envir[["plate"]] |
|
394 |
+ uplate <- envir[["uplate"]] |
|
395 |
+ plates.completed <- envir[["plates.completed"]] |
|
302 | 396 |
if(!plates.completed[p]) return() |
303 |
- ##snpflags <- get("snpflags", envir) |
|
304 |
- ##if(is.null(snpflags)){ |
|
305 |
- snpflags <- goodSnps(phi.thr=10, envir=envir, fewAA=10, fewBB=10) |
|
306 |
- ##assign("snpflags", snpflags, envir=envir) |
|
397 |
+ snpflags <- goodSnps(phi.thr=2^6, envir=envir, fewAA=10, fewBB=10) |
|
307 | 398 |
flagsA <- snpflags$A[, p] |
308 | 399 |
flagsB <- snpflags$B[, p] |
309 | 400 |
if(all(flagsA) | all(flagsB)) stop("all snps are flagged") |
310 |
- nuA <- get("nuA", envir) |
|
311 |
- nuB <- get("nuB", envir) |
|
312 |
- phiA <- get("phiA", envir) |
|
313 |
- phiB <- get("phiB", envir) |
|
314 |
- uplate <- get("uplate", envir) |
|
315 |
- sns <- get("sns", envir) |
|
316 |
- muA.AA <- get("muA.AA", envir) |
|
317 |
- muA.AB <- get("muA.AB", envir) |
|
318 |
- muA.BB <- get("muA.BB", envir) |
|
319 |
- muB.AA <- get("muB.AA", envir) |
|
320 |
- muB.AB <- get("muB.AB", envir) |
|
321 |
- muB.BB <- get("muB.BB", envir) |
|
322 |
- NP.CT <- get("NP.CT", envir) |
|
323 |
- nus <- get("nus", envir) |
|
324 |
- phis <- get("phis", envir) |
|
325 |
- NP.sds <- get("NP.sds", envir) |
|
326 |
- ##fit.variance <- get("fit.variance", envir) |
|
327 |
- NP.CT <- get("NP.CT", envir) |
|
401 |
+ nuA <- envir[["nuA"]][, p] |
|
402 |
+ nuB <- envir[["nuB"]][, p] |
|
403 |
+ phiA <- envir[["phiA"]][, p] |
|
404 |
+ phiB <- envir[["phiB"]][, p] |
|
405 |
+ uplate <- envir[["uplate"]] |
|
406 |
+ sns <- envir[["sns"]] |
|
407 |
+ muA <- envir[["muA"]][, p, ] |
|
408 |
+ muB <- envir[["muB"]][, p, ] |
|
409 |
+ nuT <- envir[["nuT"]] |
|
410 |
+ phiT <- envir[["phiT"]] |
|
411 |
+ sig2T <- envir[["sig2T"]] |
|
412 |
+ A <- envir[["A"]][, plate==uplate[p]] |
|
413 |
+ B <- envir[["B"]][, plate==uplate[p]] |
|
414 |
+ CA <- envir[["A"]][, plate==uplate[p]] |
|
415 |
+ CB <- envir[["B"]][, plate==uplate[p]] |
|
416 |
+ if(CHR == 23){ |
|
417 |
+ phiAx <- envir[["phiAx"]][, p] |
|
418 |
+ phiBx <- envir[["phiBx"]][, p] |
|
419 |
+ } |
|
328 | 420 |
##--------------------------------------------------------------------------- |
329 | 421 |
## Train on unflagged SNPs |
330 |
- ##--------------------------------------------------------------------------- |
|
422 |
+ ##--------------------------------------------------------------------------- |
|
423 |
+ ##Might be best to train using the X chromosome, since for the |
|
424 |
+ ##X phi and nu have been adjusted for cross-hybridization |
|
331 | 425 |
plateInd <- plate == uplate[p] |
332 |
- muA.AAp <- muA.AA[!flagsA, p] |
|
333 |
- muB.BBp <- muB.BB[!flagsB, p] |
|
334 |
- |
|
335 |
- ##From SNP data |
|
336 |
- X <- cbind(1, log(c(muA.AAp, muB.BBp))) |
|
337 |
- Y <- log(c(phiA[!flagsA, p], phiB[!flagsB, p])) |
|
338 |
- ##Y.nu <- log(c(nuA[!snpflags$A, p], nuB[!snpflags$B, p])) |
|
339 |
- ##Y <- cbind(Y.nu, Y.phi) |
|
340 |
- betahat <- solve(crossprod(X), crossprod(X, Y)) |
|
341 |
- ##Prediction |
|
342 |
- mus <- rowMedians(NP, na.rm=TRUE) |
|
343 |
- X <- cbind(1, log(mus)) |
|
344 |
- Yhat <- as.numeric(X %*% betahat) |
|
345 |
- ##NP.nus[, p] <- exp(Yhat[, 1]) |
|
346 |
- ##NP.phis[, p] <- exp(Yhat[, 2]) |
|
347 |
- phi <- exp(Yhat) |
|
348 |
- nu <- mus - 2*phi |
|
349 |
- |
|
350 |
- phis[, p] <- as.integer(phi) |
|
351 |
- nus[, p] <- as.integer(nu) |
|
352 |
- ##plot(NP.phis[, p], NP.nus[, p], pch=".") |
|
353 |
- CT <- 1/phi*(NP-nu) |
|
354 |
- tmp <- matrix(as.integer(CT*100), nrow(CT), ncol(CT)) |
|
355 |
- NP.CT[, plateInd] <- tmp |
|
356 |
- if(FALSE){ |
|
357 |
- ##should be centered at 2 |
|
358 |
- tmp <- rowMeans(NP.CT[, plateInd]) |
|
359 |
- hist(tmp/100, breaks=200) |
|
360 |
- } |
|
426 |
+ ##muA <- muA[!flagsA, p, c("A", "AA")] |
|
427 |
+ ##muB <- muB[!flagsB, p, c("B", "BB")] |
|
428 |
+ muA <- muA[!flagsA, "AA"] |
|
429 |
+ muB <- muB[!flagsB, "BB"] |
|
430 |
+ X <- cbind(1, log2(c(muA, muB))) |
|
431 |
+ Y <- log2(c(phiA[!flagsA], phiB[!flagsB])) |
|
432 |
+ if(nrow(X) > 5000) ix <- sample(1:nrow(X), 5000) |
|
433 |
+ ##X <- X[ix, ] |
|
434 |
+ ##Y <- Y[ix] |
|
435 |
+ betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix])) |
|
436 |
+## Yhat <- X%*%betahat |
|
437 |
+## phihat <- 2^Yhat |
|
438 |
+## nuhat <- 2^X[, 2] - 2*phihat |
|
439 |
+## nuAB <- c(nuA[!flagsA], nuB[!flagsB]) |
|
440 |
+## plot(log2(nuhat), log2(nuAB), pch=".") |
|
441 |
+## plot(log2(nuhat)-log2(nuAB), pch=".") |
|
442 |
+## hist(log2(nuAB)) |
|
443 |
+ ##plot(Y-Yhat, pch=".") |
|
444 |
+ ##plot(Y, Yhat, pch=".") |
|
445 |
+ ##calculate R2 |
|
446 |
+ if(CHR == 23){ |
|
447 |
+ cnvs <- envir[["cnvs"]] |
|
448 |
+ data(cnProbes, package="genomewidesnp6Crlmm") |
|
449 |
+ cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ] |
|
450 |
+ par <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754) |
|
451 |
+ gender <- envir[["gender"]] |
|
452 |
+ mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE) |
|
453 |
+ mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE) |
|
454 |
+ mus <- log(cbind(mu1, mu2)) |
|
455 |
+ X <- cbind(1, mus[, 1]) |
|
456 |
+ ##For build Hg18 |
|
457 |
+ ##http://genome.ucsc.edu/cgi-bin/hgGateway |
|
458 |
+ ##pseudo-autosomal regions on X |
|
459 |
+ ##chrX:1-2,709,520 and chrX:154584237-154913754, respectively |
|
460 |
+ Yhat1 <- as.numeric(X %*% betahat) |
|
461 |
+ X <- cbind(1, mus[, 2]) |
|
462 |
+ Yhat2 <- as.numeric(X %*% betahat) |
|
463 |
+ phi1 <- exp(Yhat1) |
|
464 |
+ phi2 <- exp(Yhat2) |
|
465 |
+ nu1 <- exp(mus[, 1]) - phi1 |
|
466 |
+ nu1[par] <- exp(mus[par, 1]) - 2*phi1[par] |
|
467 |
+ nu2 <- exp(mus[, 2]) - 2*phi2 |
|
468 |
+ CT1 <- 1/phi1*(NP[, gender=="male"]-nu1) |
|
469 |
+ CT2 <- 1/phi2*(NP[, gender=="female"]-nu2) |
|
470 |
+ CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1)) |
|
471 |
+ CT2 <- matrix(as.integer(100*CT2), nrow(CT2), ncol(CT2)) |
|
472 |
+ CT <- envir[["CT"]] |
|
473 |
+ CT[, plate==uplate[p] & gender=="male"] <- CT1 |
|
474 |
+ CT[, plate==uplate[p] & gender=="female"] <- CT2 |
|
475 |
+ envir[["CT"]] <- CT |
|
476 |
+ } else { |
|
477 |
+ normalNP <- envir[["normalNP"]] |
|
478 |
+ normalNP <- normalNP[, plate==uplate[p]] |
|
479 |
+ mus <- rowMedians(NP * normalNP, na.rm=TRUE) |
|
480 |
+ crosshyb <- median(muA) - median(mus) |
|
481 |
+ X <- cbind(1, log2(mus+crosshyb)) |
|
482 |
+ ##X <- cbind(1, log2(mus)) |
|
483 |
+ logPhiT <- X %*% betahat |
|
484 |
+ phiT[, p] <- 2^(logPhiT) |
|
485 |
+ nuT[, p] <- mus - 2*phiT[, p] |
|
486 |
+ T <- 1/phiT[, p]*(NP - nuT[, p]) |
|
487 |
+ CT <- envir[["CT"]] |
|
488 |
+ CT[, plate==uplate[p]] <- matrix(as.integer(100*T), nrow(T), ncol(T)) |
|
361 | 489 |
|
362 |
- ##--------------------------------------------------------------------------- |
|
363 |
- ## For NA SNPs, treat as nonpolymorphic |
|
364 |
- ##--------------------------------------------------------------------------- |
|
365 |
- CA <- get("CA", envir) |
|
366 |
- CB <- get("CB", envir) |
|
367 |
- tmpA <- CA[, plate==uplate[p]] |
|
368 |
- tmpB <- CB[, plate==uplate[p]] |
|
369 |
- indexA <- which(rowSums(is.na(tmpA)) > 0) |
|
370 |
- indexB <- which(rowSums(is.na(tmpB)) > 0) |
|
371 |
- index <- union(indexA, indexB) |
|
372 |
- if(length(index) > 0){ |
|
373 |
- npflags <- get("npflags", envir) |
|
374 |
- ##warning(paste(length(index), "indices have NAs for the copy number estimates")) |
|
375 |
- npflags[[p]] <- unique(c(npflags[[p]], index)) |
|
376 |
- assign("npflags", npflags, envir) |
|
490 |
+ ##Variance for prediction region |
|
491 |
+ sig2T[, plate==uplate[[p]]] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
492 |
+ envir[["sig2T"]] <- sig2T |
|
493 |
+ envir[["CT"]] <- CT |
|
494 |
+ envir[["phiT"]] <- phiT |
|
495 |
+ envir[["nuT"]] <- nuT |
|
377 | 496 |
} |
378 |
- ##if(length(index) > 0) browser() |
|
379 |
- |
|
380 |
- ##--------------------------------------------------------------------------- |
|
381 |
- ## this part could stand improvement |
|
382 |
- ## - estimate the uncertainty |
|
383 | 497 |
##--------------------------------------------------------------------------- |
384 |
- |
|
498 |
+ ## For NA SNPs, treat as nonpolymorphic |
|
385 | 499 |
##--------------------------------------------------------------------------- |
386 |
- ## Estimate variance for copy number probes |
|
387 |
- ## VAR(CT) = var(1/phi * (NP - nu)) |
|
388 |
- ## = 1/phi^2 * VAR(NP) |
|
389 |
- ## = 1/phi^2 * f(mu) * sigma |
|
390 |
- ## ** Phi is predicted from a regression using the row medians as predictors |
|
391 |
- ## -- this may underestimate the uncertainty |
|
392 |
- ##--------------------------------------------------------------------------- |
|
393 |
-## log.mus <- log2(mus) |
|
394 |
-## log.var <- log2(rowVars(NP)) |
|
395 |
-## f <- predict(fit.variance, newdata=data.frame(log.mus=log.mus)) |
|
396 |
-## resid <- log.var - f |
|
397 |
-## sds <- 1/phi*sqrt(2^(predict(fit.variance, newdata=data.frame(log.mus=log.mus+resid)))) |
|
398 |
-## |
|
399 |
-## robustSD <- function(X) diff(quantile(X, probs=c(0.16, (1-0.16)), na.rm=TRUE))/2 |
|
400 |
-## sample.sds <- apply(CT, 2, robustSD) |
|
401 |
-## sample.sds <- sample.sds/median(sample.sds, na.rm=TRUE) |
|
402 |
-## NP.sds[, plate==uplate[p]] <- sds %*% t(sample.sds) |
|
403 |
- assign("phis", phis, envir) |
|
404 |
- assign("nus", nus, envir) |
|
405 |
- assign("NP.CT", NP.CT, envir) |
|
406 |
-## assign("NP.sds", NP.sds, envir) |
|
407 |
-## firstPass.NP <- list(phis=phis, nus=nus, CT=NP.CT, sds=sds, |
|
408 |
-## gns=gns, sns=sns, bns=bns) |
|
409 |
-## firstPass.NP |
|
410 | 500 |
} |
411 | 501 |
|
412 |
-oneBatch <- function(plateIndex, G, A, B, conf, CONF.THR=0.99, MIN.OBS=3, DF.PRIOR, envir, trim, upperTail, bias.adj=FALSE, priorProb, ...){ |
|
413 |
- p <- plateIndex |
|
414 |
- plate <- get("plate", envir) |
|
415 |
- Ns <- get("Ns", envir) |
|
502 |
+withinGenotypeMoments <- function(p, A, B, calls, conf, CONF.THR, DF.PRIOR, envir){ |
|
503 |
+ CHR <- envir[["chrom"]] |
|
504 |
+ Ns <- envir[["Ns"]] |
|
505 |
+ muA <- envir[["muA"]] |
|
506 |
+ muB <- envir[["muB"]] |
|
507 |
+ vA <- envir[["vA"]] |
|
508 |
+ vB <- envir[["vB"]] |
|
509 |
+ plate <- envir[["plate"]] |
|
510 |
+ uplate <- envir[["uplate"]] |
|
511 |
+ normal <- envir[["normal"]][, plate==uplate[p]] |
|
512 |
+ G <- calls; rm(calls); gc() |
|
513 |
+ if(is.null(normal)) normal <- matrix(TRUE, nrow(G), ncol(G)) |
|
514 |
+ |
|
416 | 515 |
highConf <- 1-exp(-conf/1000) |
417 | 516 |
highConf <- highConf > CONF.THR |
418 |
- AA <- G == 1 & highConf |
|
419 |
- AB <- G == 2 & highConf |
|
420 |
- BB <- G == 3 & highConf |
|
421 |
- Ns[, p, "AA"] <- rowSums(AA) |
|
422 |
- Ns[, p, "AB"] <- rowSums(AB) |
|
423 |
- Ns[, p, "BB"] <- rowSums(BB) |
|
424 |
- assign("Ns", Ns, envir) |
|
425 |
- AA[AA == FALSE] <- NA |
|
426 |
- AB[AB == FALSE] <- NA |
|
427 |
- BB[BB == FALSE] <- NA |
|
428 |
- ##--------------------------------------------------------------------------- |
|
429 |
- ## Sufficient statistics (plate-specific) |
|
430 |
- ##--------------------------------------------------------------------------- |
|
431 |
- AA.A <- AA*A |
|
432 |
- AB.A <- AB*A |
|
433 |
- BB.A <- BB*A |
|
434 |
- AA.B <- AA*B |
|
435 |
- AB.B <- AB*B |
|
436 |
- BB.B <- BB*B |
|
437 |
- locationAndScale(p=p, AA.A=AA.A, AB.A=AB.A, BB.A=BB.A, |
|
438 |
- AA.B=AA.B, AB.B=AB.B, BB.B=BB.B, |
|
439 |
- envir=envir, DF.PRIOR=DF.PRIOR) |
|
440 |
- muA.AA <- get("muA.AA", envir) |
|
441 |
- muA.AB <- get("muA.AB", envir) |
|
442 |
- muA.BB <- get("muA.BB", envir) |
|
443 |
- muB.AA <- get("muB.AA", envir) |
|
444 |
- muB.AB <- get("muB.AB", envir) |
|
445 |
- muB.BB <- get("muB.BB", envir) |
|
446 |
- sigmaA <- get("sigmaA", envir) |
|
447 |
- sigmaB <- get("sigmaB", envir) |
|
448 |
- tau2A <- get("tau2A", envir) |
|
449 |
- tau2B <- get("tau2B", envir) |
|
450 |
- sig2A <- get("sig2A", envir) |
|
451 |
- sig2B <- get("sig2B", envir) |
|
452 |
- corr <- get("corr", envir) |
|
453 |
- corrA.BB <- get("corrA.BB", envir) ## B allele |
|
454 |
- corrB.AA <- get("corrB.AA", envir) |
|
517 |
+ if(CHR == 23){ |
|
518 |
+ gender <- envir[["gender"]] |
|
519 |
+ IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE) |
|
520 |
+ IX <- IX == "female" |
|
521 |
+ } else IX <- matrix(TRUE, nrow(G), ncol(G)) |
|
522 |
+ |
|
523 |
+ index <- GT.B <- GT.A <- vector("list", 3) |
|
524 |
+ names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB") |
|
525 |
+ ##-------------------------------------------------- |
|
526 |
+ ##within-genotype sufficient statistics |
|
527 |
+ ##-------------------------------------------------- |
|
528 |
+ GT.B <- GT.A <- list() |
|
529 |
+ for(j in 1:3){ |
|
530 |
+ GT <- G==j & highConf & IX & normal |
|
531 |
+ Ns[, p, j+2] <- rowSums(GT, na.rm=TRUE) |
|
532 |
+ GT[GT == FALSE] <- NA |
|
533 |
+ GT.A[[j]] <- GT*A |
|
534 |
+ GT.B[[j]] <- GT*B |
|
535 |
+ index[[j]] <- which(Ns[, p, j+2] > 0) |
|
536 |
+ |
|
537 |
+ muA[, p, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE) |
|
538 |
+ muB[, p, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE) |
|
539 |
+ vA[, p, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE) |
|
540 |
+ vB[, p, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE) |
|
541 |
+ |
|
542 |
+ ##Shrink towards the typical variance |
|
543 |
+ DF <- Ns[, p, j+2]-1 |
|
544 |
+ DF[DF < 1] <- 1 |
|
545 |
+ v0A <- median(vA[, p, j+2], na.rm=TRUE) |
|
546 |
+ v0B <- median(vB[, p, j+2], na.rm=TRUE) |
|
547 |
+ vA[, p, j+2] <- (vA[, p, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
548 |
+ vA[is.na(vA[, p, j+2]), p, j+2] <- v0A |
|
549 |
+ vB[, p, j+2] <- (vB[, p, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
550 |
+ vB[is.na(vB[, p, j+2]), p, j+2] <- v0B |
|
551 |
+ } |
|
552 |
+ if(CHR == 23){ |
|
553 |
+ k <- 1 |
|
554 |
+ for(j in c(1,3)){ |
|
555 |
+ GT <- G==j & highConf & !IX |
|
556 |
+ Ns[, p, k] <- rowSums(GT) |
|
557 |
+ GT[GT == FALSE] <- NA |
|
558 |
+ muA[, p, k] <- rowMedians(GT*A, na.rm=TRUE) |
|
559 |
+ muB[, p, k] <- rowMedians(GT*B, na.rm=TRUE) |
|
560 |
+ vA[, p, k] <- rowMAD(GT*A, na.rm=TRUE) |
|
561 |
+ vB[, p, k] <- rowMAD(GT*B, na.rm=TRUE) |
|
562 |
+ |
|
563 |
+ DF <- Ns[, p, k]-1 |
|
564 |
+ DF[DF < 1] <- 1 |
|
565 |
+ v0A <- median(vA[, p, k], na.rm=TRUE) |
|
566 |
+ v0B <- median(vB[, p, k], na.rm=TRUE) |
|
567 |
+ vA[, p, k] <- (vA[, p, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
568 |
+ vA[is.na(vA[, p, k]), p, k] <- v0A |
|
569 |
+ vB[, p, k] <- (vB[, p, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
570 |
+ vB[is.na(vB[, p, k]), p, k] <- v0B |
|
571 |
+ k <- k+1 |
|
572 |
+ } |
|
573 |
+ } |
|
574 |
+ envir[["GT.A"]] <- GT.A |
|
575 |
+ envir[["GT.B"]] <- GT.B |
|
576 |
+ envir[["Ns"]] <- Ns |
|
577 |
+ envir[["index"]] <- index |
|
578 |
+ envir[["muA"]] <- muA |
|
579 |
+ envir[["muB"]] <- muB |
|
580 |
+ envir[["vA"]] <- vA |
|
581 |
+ envir[["vB"]] <- vB |
|
582 |
+} |
|
583 |
+ |
|
584 |
+ |
|
585 |
+oneBatch <- function(plateIndex, |
|
586 |
+ A, |
|
587 |
+ B, |
|
588 |
+ calls, |
|
589 |
+ conf, |
|
590 |
+ gender, |
|
591 |
+ NP, |
|
592 |
+ plate, |
|
593 |
+ MIN.OBS=1, |
|
594 |
+ envir, |
|
595 |
+ DF.PRIOR=50, |
|
596 |
+ CONF.THR=0.99, |
|
597 |
+ trim=0, |
|
598 |
+ bias.adj=FALSE, priorProb, ...){ |
|
599 |
+ p <- plateIndex |
|
600 |
+ CHR <- envir[["chrom"]] |
|
455 | 601 |
if(bias.adj){ |
456 |
- ##First check that nu and phi are available |
|
457 |
- nuA <- get("nuA", envir) |
|
602 |
+ nuA <- envir[["nuA"]] |
|
458 | 603 |
if(all(is.na(nuA))) { |
459 | 604 |
message("Background and signal coefficients have not yet been estimated -- can not do bias correction yet") |
460 |
- message("Must run computeCopynumber a second time with bias.adj=TRUE to do the adjustment") |
|
461 |
- } else { |
|
462 |
- message("running bias adjustment") |
|
463 |
- normal <- biasAdj(A=A, B=B, plateIndex=p, envir=envir, priorProb=priorProb) |
|
464 |
- normal[normal == FALSE] <- NA |
|
465 |
- AA.A <- AA.A*normal |
|
466 |
- AB.A <- AB.A*normal |
|
467 |
- BB.A <- BB.A*normal |
|
468 |
- AA.B <- AA.B*normal |
|
469 |
- AB.B <- AB.B*normal |
|
470 |
- BB.B <- BB.B*normal |
|
471 |
- message("Recomputing location and scale parameters") |
|
472 |
- locationAndScale(p=p, AA.A=AA.A, AB.A=AB.A, BB.A=BB.A, |
|
473 |
- AA.B=AA.B, AB.B=AB.B, BB.B=BB.B, |
|
474 |
- envir=envir, DF.PRIOR=DF.PRIOR) |
|
475 |
- muA.AA <- get("muA.AA", envir) |
|
476 |
- muA.AB <- get("muA.AB", envir) |
|
477 |
- muA.BB <- get("muA.BB", envir) |
|
478 |
- muB.AA <- get("muB.AA", envir) |
|
479 |
- muB.AB <- get("muB.AB", envir) |
|
480 |
- muB.BB <- get("muB.BB", envir) |
|
481 |
- tau2A <- get("tau2A", envir) |
|
482 |
- tau2B <- get("tau2B", envir) |
|
483 |
- sig2A <- get("sig2A", envir) |
|
484 |
- sig2B <- get("sig2B", envir) |
|
485 |
- sigmaA <- get("sigmaA", envir) |
|
486 |
- sigmaB <- get("sigmaB", envir) |
|
487 |
- corr <- get("corr", envir) |
|
488 |
- corrA.BB <- get("corrA.BB", envir) ## B allele |
|
489 |
- corrB.AA <- get("corrB.AA", envir) |
|
490 |
- Ns.unadj <- Ns |
|
491 |
- assign("Ns.unadj", Ns.unadj, envir) |
|
492 |
- Ns[, p, "AA"] <- rowSums(!is.na(AA.A)) |
|
493 |
- Ns[, p, "AB"] <- rowSums(!is.na(AB.A)) |
|
494 |
- Ns[, p, "BB"] <- rowSums(!is.na(BB.A)) |
|
605 |
+ stop("Must run computeCopynumber a second time with bias.adj=TRUE to do the adjustment") |
|
495 | 606 |
} |
607 |
+ message("running bias adjustment") |
|
608 |
+ ##adjustment for nonpolymorphic probes |
|
609 |
+ biasAdjNP(plateIndex=p, envir=envir, priorProb=priorProb) |
|
610 |
+ ##adjustment for SNPs |
|
611 |
+ biasAdj(plateIndex=p, envir=envir, priorProb=priorProb) |
|
612 |
+ message("Recomputing location and scale parameters") |
|
496 | 613 |
} |
497 |
-## if(trim > 0){ |
|
498 |
-## ##rowMedians is not robust enough when a variant is common |
|
499 |
-## ##Try a trimmed rowMedian, trimming only one tail (must specify which) |
|
500 |
-## ## - for genotypes with 3 or more observations, exclude the upper X% |
|
501 |
-## ## - one way to do this is to replace these observations with NAs |
|
502 |
-## ## e.g, exclude round(Ns * X%, 0) observations |
|
503 |
-## ## - for genotypes with fewer than 10 observations, |
|
504 |
-## ## - recalculate rowMedians |
|
505 |
-## replaceWithNAs <- function(x, trim, upperTail){ |
|
506 |
-## ##put NA's last if trimming the upperTail |
|
507 |
-## if(upperTail) decreasing <- TRUE else decreasing <- FALSE |
|
508 |
-## NN <- round(sum(!is.na(x)) * trim, 0) |
|
509 |
-## ix <- order(x, decreasing=decreasing, na.last=TRUE)[1:NN] |
|
510 |
-## x[ix] <- NA |
|
511 |
-## return(x) |
|
512 |
-## } |
|
513 |
-## ##which rows should be trimmed |
|
514 |
-## rowsToTrim <- which(round(Ns[, p, "AA"] * trim, 0) > 0) |
|
515 |
-## ##replace values in the tail of A with NAs |
|
516 |
-## AA.A[rowsToTrim, ] <- t(apply(AA.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail)) |
|
517 |
-## AA.B[rowsToTrim, ] <- t(apply(AA.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail)) |
|
518 |
-## rowsToTrim <- which(round(Ns[, p, "AB"] * trim, 0) > 0) |
|
519 |
-## AB.A[rowsToTrim, ] <- t(apply(AB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail)) |
|
520 |
-## AB.B[rowsToTrim, ] <- t(apply(AB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail)) |
|
521 |
-## rowsToTrim <- which(round(Ns[, p, "BB"] * trim, 0) > 0) |
|
522 |
-## BB.A[rowsToTrim, ] <- t(apply(BB.A[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail)) |
|
523 |
-## BB.B[rowsToTrim, ] <- t(apply(BB.B[rowsToTrim, ], 1, replaceWithNAs, trim=trim, upperTail=upperTail)) |
|
524 |
-## |
|
525 |
-## ##Should probably recompute the Ns -- change the |
|
526 |
-## ##degrees of freedom |
|
527 |
-## Ns[, p, "AA"] <- rowSums(!is.na(AA.A)) |
|
528 |
-## Ns[, p, "AB"] <- rowSums(!is.na(AB.A)) |
|
529 |
-## Ns[, p, "BB"] <- rowSums(!is.na(BB.A)) |
|
530 |
-## } |
|
614 |
+ withinGenotypeMoments(p=p, A=A, B=B, calls=calls, conf=conf, CONF.THR=CONF.THR, |
|
615 |
+ DF.PRIOR=DF.PRIOR, envir=envir) |
|
616 |
+ GT.A <- envir[["GT.A"]] |
|
617 |
+ GT.B <- envir[["GT.B"]] |
|
618 |
+ index <- envir[["index"]] |
|
619 |
+ locationAndScale(p=p, GT.A=GT.A, GT.B=GT.B, index=index, envir=envir, DF.PRIOR=DF.PRIOR) |
|
620 |
+ muA <- envir[["muA"]] |
|
621 |
+ muB <- envir[["muB"]] |
|
622 |
+ Ns <- envir[["Ns"]] |
|
623 |
+ |
|
531 | 624 |
##--------------------------------------------------------------------------- |
532 | 625 |
## Predict sufficient statistics for unobserved genotypes (plate-specific) |
533 | 626 |
##--------------------------------------------------------------------------- |
534 |
-## NN <- Ns |
|
535 |
-## NN[, p, "AA"] <- rowSums(AA & highConf, na.rm=TRUE) ##how many AA were called with high confidence |
|
536 |
-## NN[, p, "AB"] <- rowSums(AB & highConf, na.rm=TRUE) |
|
537 |
-## NN[, p, "BB"] <- rowSums(BB & highConf, na.rm=TRUE) |
|
538 | 627 |
index.AA <- which(Ns[, p, "AA"] >= 3) |
539 | 628 |
index.AB <- which(Ns[, p, "AB"] >= 3) |
540 | 629 |
index.BB <- which(Ns[, p, "BB"] >= 3) |
541 |
- correct.orderA <- muA.AA[, p] > muA.BB[, p] |
|
542 |
- correct.orderB <- muB.BB[, p] > muB.AA[, p] |
|
543 |
- if(length(index.AB) > 0){ |
|
544 |
- nobs <- rowSums(Ns[, p, ] >= MIN.OBS) == 3 |
|
545 |
- } else nobs <- rowSums(Ns[, p, c(1,3)] >= MIN.OBS) == 2 |
|
630 |
+ correct.orderA <- muA[, p, "AA"] > muA[, p, "BB"] |
|
631 |
+ correct.orderB <- muB[, p, "BB"] > muB[, p, "AA"] |
|
632 |
+ ##For chr X, this will ignore the males |
|
633 |
+ nobs <- rowSums(Ns[, p, 3:5] >= MIN.OBS, na.rm=TRUE) == 3 |
|
546 | 634 |
index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here |
547 | 635 |
size <- min(5000, length(index.complete)) |
548 | 636 |
if(size == 5000) index.complete <- sample(index.complete, 5000) |
... | ... |
@@ -550,192 +638,112 @@ oneBatch <- function(plateIndex, G, A, B, conf, CONF.THR=0.99, MIN.OBS=3, DF.PRI |
550 | 638 |
warning("fewer than 200 snps pass criteria for predicting the sufficient statistics") |
551 | 639 |
stop() |
552 | 640 |
} |
553 |
- index.AA <- which(Ns[, p, "AA"] == 0 & (Ns[, p, "AB"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS)) |
|
554 |
- index.AB <- which(Ns[, p, "AB"] == 0 & (Ns[, p, "AA"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS)) |
|
555 |
- index.BB <- which(Ns[, p, "BB"] == 0 & (Ns[, p, "AB"] >= MIN.OBS & Ns[, p, "AA"] >= MIN.OBS)) |
|
556 |
- if(length(index.AA) > 0){ |
|
557 |
- ##Predict mean for AA genotypes |
|
558 |
- X.AA <- cbind(1, muA.AB[index.complete, p], muA.BB[index.complete, p], |
|
559 |
- muB.AB[index.complete, p], muB.BB[index.complete, p]) |
|
560 |
- Y <- cbind(muA.AA[index.complete, p], muB.AA[index.complete, p]) |
|
561 |
- XtY <- crossprod(X.AA, Y) |
|
562 |
- betahat <- solve(crossprod(X.AA), XtY) |
|
563 |
- X.AA <- cbind(1, muA.AB[index.AA, p], muA.BB[index.AA, p], |
|
564 |
- muB.AB[index.AA, p], muB.BB[index.AA, p]) |
|
565 |
- musAA <- X.AA %*% betahat |
|
566 |
- musAA[musAA <= 100] <- 100 |
|
567 |
- muA.AA[index.AA, p] <- musAA[, 1] |
|
568 |
- muB.AA[index.AA, p] <- musAA[, 2] |
|
569 |
- } |
|
570 |
- if(length(index.AB) > 0){ |
|
571 |
- ##Predict mean for AB genotypes |
|
572 |
- X.AB <- cbind(1, muA.AA[index.complete, p], muA.BB[index.complete, p], |
|
573 |
- muB.AA[index.complete, p], muB.BB[index.complete, p]) |
|
574 |
- Y <- cbind(muA.AB[index.complete, p], muB.AB[index.complete, p]) |
|
575 |
- XtY <- crossprod(X.AB, Y) |
|
576 |
- betahat <- solve(crossprod(X.AB), XtY) |
|
577 |
- X.AB <- cbind(1, muA.AA[index.AB, p], muA.BB[index.AB, p], |
|
578 |
- muB.AA[index.AB, p], muB.BB[index.AB, p]) |
|
579 |
- musAB <- X.AB %*% betahat |
|
580 |
- musAB[musAB <= 100] <- 100 |
|
581 |
- muA.AB[index.AB, p] <- musAB[, 1] |
|
582 |
- muB.AB[index.AB, p] <- musAB[, 2] |
|
641 |
+ index[[1]] <- which(Ns[, p, "AA"] == 0 & (Ns[, p, "AB"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS)) |
|
642 |
+ index[[2]] <- which(Ns[, p, "AB"] == 0 & (Ns[, p, "AA"] >= MIN.OBS & Ns[, p, "BB"] >= MIN.OBS)) |
|
643 |
+ index[[3]] <- which(Ns[, p, "BB"] == 0 & (Ns[, p, "AB"] >= MIN.OBS & Ns[, p, "AA"] >= MIN.OBS)) |
|
644 |
+ mnA <- muA[, p, 3:5] |
|
645 |
+ mnB <- muB[, p, 3:5] |
|
646 |
+ for(j in 1:3){ |
|
647 |
+ if(length(index[[j]]) == 0) next() |
|
648 |
+ X <- cbind(1, mnA[index.complete, -j], mnB[index.complete, -j]) |
|
649 |
+ Y <- cbind(mnA[index.complete, j], mnB[index.complete, j]) |
|
650 |
+ betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
651 |
+ X <- cbind(1, mnA[index[[j]], -j], mnB[index[[j]], -j]) |
|
652 |
+ mus <- X %*% betahat |
|
653 |
+ muA[index[[j]], p, j+2] <- mus[, 1] |
|
654 |
+ muB[index[[j]], p, j+2] <- mus[, 2] |
|
583 | 655 |
} |
584 |
- if(length(index.BB) > 0){ |
|
585 |
- ##Predict mean for BB genotypes |
|
586 |
- X.BB <- cbind(1, muA.AA[index.complete, p], muA.AB[index.complete, p], |
|
587 |
- muB.AA[index.complete, p], muB.AB[index.complete, p]) |
|
588 |
- Y <- cbind(muA.BB[index.complete, p], muB.BB[index.complete, p]) |
|
589 |
- XtY <- crossprod(X.BB, Y) |
|
590 |
- betahat <- solve(crossprod(X.BB), XtY) |
|
591 |
- X.BB <- cbind(1, muA.AA[index.BB, p], muA.AB[index.BB, p], |
|
592 |
- muB.AA[index.BB, p], muB.AB[index.BB, p]) |
|
593 |
- musBB <- X.BB %*% betahat |
|
594 |
- musBB[musBB <= 100] <- 100 |
|
595 |
- muA.BB[index.BB, p] <- musBB[, 1] |
|
596 |
- muB.BB[index.BB, p] <- musBB[, 2] |
|
656 |
+ nobsA <- Ns[, p, "A"] > 10 |
|
657 |
+ nobsB <- Ns[, p, "B"] > 10 |
|
658 |
+ complete <- list() |
|
659 |
+ complete[[1]] <- which(correct.orderA & correct.orderB & nobsA) ##be selective here |
|
660 |
+ complete[[2]] <- which(correct.orderA & correct.orderB & nobsB) ##be selective here |
|
661 |
+ size <- min(5000, length(complete[[1]])) |
|
662 |
+ if(size == 5000) complete <- lapply(complete, function(x) sample(x, size)) |
|
663 |
+ if(CHR == 23){ |
|
664 |
+ index[[1]] <- which(Ns[, p, "A"] == 0) |
|
665 |
+ index[[2]] <- which(Ns[, p, "B"] == 0) |
|
666 |
+ cols <- 2:1 |
|
667 |
+ for(j in 1:2){ |
|
668 |
+ if(length(index[[j]]) == 0) next() |
|
669 |
+ X <- cbind(1, muA[complete[[j]], p, cols[j]], muB[complete[[j]], p, cols[j]]) |
|
670 |
+ Y <- cbind(muA[complete[[j]], p, j], muB[complete[[j]], p, j]) |
|
671 |
+ betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
672 |
+ X <- cbind(1, muA[index[[j]], p, cols[j]], muB[index[[j]], p, cols[j]]) |
|
673 |
+ mus <- X %*% betahat |
|
674 |
+ muA[index[[j]], p, j] <- mus[, 1] |
|
675 |
+ muB[index[[j]], p, j] <- mus[, 2] |
|
676 |
+ } |
|
597 | 677 |
} |
598 | 678 |
##missing two genotypes |
599 |
-# index.AA <- which(NN[, p, "AA"] == 0 & (NN[, p, "AB"] < MIN.OBS | NN[, p, "BB"] < MIN.OBS)) |
|
600 |
-# index.AB <- which(NN[, p, "AB"] == 0 & (NN[, p, "AA"] < MIN.OBS | NN[, p, "BB"] < MIN.OBS)) |
|
601 |
-# index.BB <- which(NN[, p, "BB"] == 0 & (NN[, p, "AB"] >= MIN.OBS | NN[, p, "AA"] >= MIN.OBS)) |
|
602 | 679 |
noAA <- Ns[, p, "AA"] < MIN.OBS |
603 | 680 |
noAB <- Ns[, p, "AB"] < MIN.OBS |
604 | 681 |
noBB <- Ns[, p, "BB"] < MIN.OBS |
682 |
+ index[[1]] <- noAA & noAB |
|
683 |
+ index[[2]] <- noBB & noAB |
|
684 |
+ index[[3]] <- noAA & noBB |
|
685 |
+ snpflags <- envir[["snpflags"]] |
|
686 |
+ snpflags[, p] <- index[[1]] | index[[2]] | index[[3]] |
|
687 |
+ |
|
605 | 688 |
##--------------------------------------------------------------------------- |
606 | 689 |
## Two genotype clusters not observed -- would sequence help? (didn't seem that helpful) |
607 | 690 |
## 1 extract index of complete data |
608 | 691 |
## 2 Regress mu1,mu3 ~ sequence + mu2 |
609 | 692 |
## 3 Predict mu1*, mu3* for missing genotypes |
610 |
- ##--------------------------------------------------------------------------- |
|
611 |
- if(sum(noAA & noAB) > 0){ |
|
612 |
- ##predict AA and AB centers |
|
613 |
- X <- cbind(1, muA.BB[index.complete, p], muB.BB[index.complete, p]) |
|
614 |
- Y <- cbind(muA.AA[index.complete, p], |
|
615 |
- muB.AA[index.complete, p], |
|
616 |
- muA.AB[index.complete, p], |
|
617 |
- muB.AB[index.complete, p]) |
|
618 |
- XtY <- crossprod(X, Y) |
|
619 |
- betahat <- solve(crossprod(X), XtY) |
|
620 |
- X <- cbind(1, muA.BB[noAA & noAB, p], muB.BB[noAA & noAB, p]) |
|
621 |
- mus <- X %*% betahat |
|
622 |
- mus[mus <= 100] <- 100 |
|
623 |
- muA.AA[noAA & noAB, p] <- mus[, 1] |
|
624 |
- muB.AA[noAA & noAB, p] <- mus[, 2] |
|
625 |
- muA.AB[noAA & noAB, p] <- mus[, 3] |
|
626 |
- muB.AB[noAA & noAB, p] <- mus[, 4] |
|
627 |
- } |
|
628 |
- if(sum(noBB & noAB) > 0){ |
|
629 |
- ##predict AB and BB centers |
|
630 |
- X <- cbind(1, muA.AA[index.complete, p], muB.AA[index.complete, p]) |
|
631 |
- Y <- cbind(muA.AB[index.complete, p], |
|
632 |
- muB.AB[index.complete, p], |
|
633 |
- muA.BB[index.complete, p], #muA.AB[index.complete, p], |
|
634 |
- muB.BB[index.complete, p])#, muB.AB[index.complete, p]) |
|
635 |
- XtY <- crossprod(X, Y) |
|
636 |
- betahat <- solve(crossprod(X), XtY) |
|
637 |
- X <- cbind(1, muA.AA[noBB & noAB, p], muB.AA[noBB & noAB, p]) |
|
638 |
- mus <- X %*% betahat |
|
639 |
- mus[mus <= 100] <- 100 |
|
640 |
- muA.AB[noBB & noAB, p] <- mus[, 1] |
|
641 |
- muB.AB[noBB & noAB, p] <- mus[, 2] |
|
642 |
- muA.BB[noBB & noAB, p] <- mus[, 3] |
|
643 |
- muB.BB[noBB & noAB, p] <- mus[, 4] |
|
644 |
- } |
|
645 |
- if(sum(noAA & noBB) > 0){ |
|
646 |
- ##predict AA and BB centers |
|
647 |
- X <- cbind(1, muA.AB[index.complete, p], muB.AB[index.complete, p]) |
|
648 |
- Y <- cbind(muA.AA[index.complete, p], |
|
649 |
- muB.AA[index.complete, p], |
|
650 |
- muA.BB[index.complete, p], #muA.AB[index.complete, p], |
|
651 |
- muB.BB[index.complete, p])#, muB.AB[index.complete, p]) |
|
652 |
- XtY <- crossprod(X, Y) |
|
653 |
- betahat <- solve(crossprod(X), XtY) |
|
654 |
- X <- cbind(1, muA.AB[noAA & noBB, p], muB.AB[noAA & noBB, p]) |
|
693 |
+ ##--------------------------------------------------------------------------- |
|
694 |
+ cols <- c(3, 1, 2) |
|
695 |
+ for(j in 1:3){ |
|
696 |
+ if(sum(index[[1]]) == 0) next() |
|
697 |
+ k <- cols[j] |
|
698 |
+ X <- cbind(1, mnA[index.complete, k], mnB[index.complete, k]) |
|
699 |
+ Y <- cbind(mnA[index.complete, -k], |
|
700 |
+ mnB[index.complete, -k]) |
|
701 |
+ betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
702 |
+ X <- cbind(1, mnA[index[[j]], k], mnB[index[[j]], k]) |
|
655 | 703 |
mus <- X %*% betahat |
656 |
- mus[mus <= 100] <- 100 |
|
657 |
- muA.AA[noAA & noBB, p] <- mus[, 1] |
|
658 |
- muB.AA[noAA & noBB, p] <- mus[, 2] |
|
659 |
- muA.BB[noAA & noBB, p] <- mus[, 3] |
|
660 |
- muB.BB[noAA & noBB, p] <- mus[, 4] |
|
704 |
+ muA[index[[j]], p, -c(1, 2, k+2)] <- mus[, 1:2] |
|
705 |
+ muB[index[[j]], p, -c(1, 2, k+2)] <- mus[, 3:4] |
|
661 | 706 |
} |
707 |
+ |
|
708 |
+ negA <- rowSums(muA[, p, ] < 0) > 0 |
|
709 |
+ negB <- rowSums(muB[, p, ] < 0) > 0 |
|
710 |
+ snpflags[, p] <- snpflags[, p] | negA | negB | rowSums(is.na(muA[, p, 3:5]), na.rm=TRUE) > 0 |
|
711 |
+ envir[["snpflags"]] <- snpflags |
|
662 | 712 |
dn.Ns <- dimnames(Ns) |
663 | 713 |
Ns <- array(as.integer(Ns), dim=dim(Ns)) |
664 | 714 |
dimnames(Ns)[[3]] <- dn.Ns[[3]] |
665 |
- if(any(is.na(muA.AA) | any(is.na(muA.AB)) | any(is.na(muA.BB)))) |
|
666 |
- warning("Some SNPs do not have any genotype calls above the confidence threshold. Check the indices in snpflags and npflags.") |
|
667 |
- assign("muA.AA", muA.AA, envir) |
|
668 |
- assign("muA.AB", muA.AB, envir) |
|
669 |
- assign("muA.BB", muA.BB, envir) |
|
670 |
- assign("muB.AA", muB.AA, envir) |
|
671 |
- assign("muB.AB", muB.AB, envir) |
|
672 |
- assign("muB.BB", muB.BB, envir) |
|
673 |
- assign("sigmaA", sigmaA, envir) |
|
674 |
- assign("sigmaB", sigmaB, envir) |
|
675 |
- assign("tau2A", tau2A, envir) |
|
676 |
- assign("tau2B", tau2B, envir) |
|
677 |
- assign("sig2A", sig2A, envir) |
|
678 |
- assign("sig2B", sig2B, envir) |
|
679 |
- assign("corr", corr, envir) |
|
680 |
- assign("corrB.AA", corrB.AA, envir) |
|
681 |
- assign("corrA.BB", corrA.BB, envir) |
|
682 |
- assign("Ns", Ns, envir) |
|
683 |
- plates.completed <- get("plates.completed", envir) |
|
715 |
+ envir[["Ns"]] <- Ns |
|
716 |
+ envir[["muA"]] <- muA |
|
717 |
+ envir[["muB"]] <- muB |
|
718 |
+ plates.completed <- envir[["plates.completed"]] |
|
684 | 719 |
plates.completed[p] <- TRUE |
685 |
- assign("plates.completed", plates.completed, envir) |
|
720 |
+ envir[["plates.completed"]] <- plates.completed |
|
686 | 721 |
} |
687 | 722 |
|
688 |
-locationAndScale <- function(p, AA.A, AB.A, BB.A, |
|
689 |
- AA.B, AB.B, BB.B, envir, |
|
690 |
- DF.PRIOR){ |
|
691 |
- muA.AA <- get("muA.AA", envir) |
|
692 |
- muA.AB <- get("muA.AB", envir) |
|
693 |
- muA.BB <- get("muA.BB", envir) |
|
694 |
- muB.AA <- get("muB.AA", envir) |
|
695 |
- muB.AB <- get("muB.AB", envir) |
|
696 |
- muB.BB <- get("muB.BB", envir) |
|
697 |
- tau2A <- get("tau2A", envir) |
|
698 |
- tau2B <- get("tau2B", envir) |
|
699 |
- sig2A <- get("sig2A", envir) |
|
700 |
- sig2B <- get("sig2B", envir) |
|
701 |
- corr <- get("corr", envir) |
|
702 |
- corrA.BB <- get("corrA.BB", envir) ## B allele |
|
703 |
- corrB.AA <- get("corrB.AA", envir) |
|
704 |
- Ns <- get("Ns", envir) |
|
705 |
- muA.AA[, p] <- rowMedians(AA.A, na.rm=TRUE) |
|
706 |
- muA.AB[, p] <- rowMedians(AB.A, na.rm=TRUE) |
|
707 |
- muA.BB[, p] <- rowMedians(BB.A, na.rm=TRUE) |
|
708 |
- muB.AA[, p] <- rowMedians(AA.B, na.rm=TRUE) |
|
709 |
- muB.AB[, p] <- rowMedians(AB.B, na.rm=TRUE) |
|
710 |
- muB.BB[, p] <- rowMedians(BB.B, na.rm=TRUE) |
|
711 |
- sigmaA <- matrix(NA, nrow(AA.A), 3) |
|
712 |
- sigmaB <- matrix(NA, nrow(AA.A), 3) |
|
713 |
- colnames(sigmaB) <- colnames(sigmaA) <- c("AA", "AB", "BB") |
|
714 |
- sigmaA[, "AA"] <- rowMAD(AA.A, na.rm=TRUE) |
|
715 |
- sigmaA[, "AB"] <- rowMAD(AB.A, na.rm=TRUE) |
|
716 |
- sigmaA[, "BB"] <- rowMAD(BB.A, na.rm=TRUE) |
|
717 |
- sigmaB[, "AA"] <- rowMAD(AA.B, na.rm=TRUE) |
|
718 |
- sigmaB[, "AB"] <- rowMAD(AB.B, na.rm=TRUE) |
|
719 |
- sigmaB[, "BB"] <- rowMAD(BB.B, na.rm=TRUE) |
|
723 |
+locationAndScale <- function(p, GT.A, GT.B, index, envir, DF.PRIOR){ |
|
724 |
+ tau2A <- envir[["tau2A"]] |
|
725 |
+ tau2B <- envir[["tau2B"]] |
|
726 |
+ sig2A <- envir[["sig2A"]] |
|
727 |
+ sig2B <- envir[["sig2B"]] |
|
720 | 728 |
|
721 |
- ##shrink |
|
722 |
- DF <- Ns[, p, ]-1 |
|
723 |
- DF[DF < 1] <- 1 |
|
724 |
- medsA <- apply(sigmaA, 2, "median", na.rm=TRUE) |
|
725 |
- medsB <- apply(sigmaB, 2, "median", na.rm=TRUE) |
|
726 |
- for(m in 1:3){ |
|
727 |
- sigmaA[, m] <- (sigmaA[, m]*DF[, m] + medsA[m]*DF.PRIOR)/(DF.PRIOR+DF[, m]) |
|
728 |
- sigmaA[is.na(sigmaA[, m]), m] <- medsA[m] |
|
729 |
- sigmaB[, m] <- (sigmaB[, m]*DF[, m] + medsB[m]*DF.PRIOR)/(DF.PRIOR+DF[, m]) |
|
730 |
- sigmaB[is.na(sigmaB[, m]), m] <- medsB[m] |
|
731 |
- } |
|
732 |
- index.AA <- which(Ns[, p, "AA"] >= 2) |
|
733 |
- index.AB <- which(Ns[, p, "AB"] >= 2) |
|
734 |
- index.BB <- which(Ns[, p, "BB"] >= 2) |
|
729 |
+ corr <- envir[["corr"]] |
|
730 |
+ corrA.BB <- envir[["corrA.BB"]] |
|
731 |
+ corrB.AA <- envir[["corrB.AA"]] |
|
732 |
+ Ns <- get("Ns", envir) |
|
733 |
+ |
|
734 |
+ index.AA <- index[[1]] |
|
735 |
+ index.AB <- index[[2]] |
|
736 |
+ index.BB <- index[[3]] |
|
737 |
+ rm(index); gc() |
|
735 | 738 |
|
739 |
+ AA.A <- GT.A[[1]] |
|
740 |
+ AB.A <- GT.A[[2]] |
|
741 |
+ BB.A <- GT.A[[3]] |
|
742 |
+ |
|
743 |
+ AA.B <- GT.B[[1]] |
|
744 |
+ AB.B <- GT.B[[2]] |
|
745 |
+ BB.B <- GT.B[[3]] |
|
736 | 746 |
x <- BB.A[index.BB, ] |
737 |
- ##x <- Ip[index.BB, , "BB", "A"] |
|
738 |
- ##tau2A[index.BB, p] <- rowVars(x, na.rm=TRUE)##var(log(IA)| BB) |
|
739 | 747 |
tau2A[index.BB, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
740 | 748 |
DF <- Ns[, p, "BB"]-1 |
741 | 749 |
DF[DF < 1] <- 1 |
... | ... |
@@ -743,14 +751,12 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A, |
743 | 751 |
tau2A[, p] <- (tau2A[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
744 | 752 |
tau2A[is.na(tau2A[, p]), p] <- med |
745 | 753 |
|
746 |
- ##x <- Ip[index.BB, , "BB", "B"] |
|
747 | 754 |
x <- BB.B[index.BB, ] |
748 | 755 |
sig2B[index.BB, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
749 | 756 |
med <- median(sig2B[, p], na.rm=TRUE) |
750 | 757 |
sig2B[, p] <- (sig2B[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
751 | 758 |
sig2B[is.na(sig2B[, p]), p] <- med |
752 | 759 |
|
753 |
- ##x <- Ip[index.AA, , "AA", "B"] |
|
754 | 760 |
x <- AA.B[index.AA, ] |
755 | 761 |
tau2B[index.AA, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
756 | 762 |
DF <- Ns[, p, "AA"]-1 |
... | ... |
@@ -759,19 +765,15 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A, |
759 | 765 |
tau2B[, p] <- (tau2B[, p] * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
760 | 766 |
tau2B[is.na(tau2B[, p]), p] <- med |
761 | 767 |
|
762 |
- ##x <- Ip[index.AA, , "AA", "A"] |
|
763 | 768 |
x <- AA.A[index.AA, ] |
764 | 769 |
sig2A[index.AA, p] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2##var(log(IA)|AA) |
765 | 770 |
med <- median(sig2A[, p], na.rm=TRUE) |
766 | 771 |
sig2A[, p] <- (sig2A[, p]*DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
767 | 772 |
sig2A[is.na(sig2A[, p]), p] <- med |
768 | 773 |
|
769 |
- ##estimate the correlation where there is at least 1 or more copies (AB genotypes) |
|
770 | 774 |
if(length(index.AB) > 0){ ##all homozygous is possible |
771 | 775 |
x <- AB.A[index.AB, ] |
772 | 776 |
y <- AB.B[index.AB, ] |
773 |
- ##x <- Ip[index.AB, , "AB", "A"] |
|
774 |
- ##y <- Ip[index.AB, , "AB", "B"] |
|
775 | 777 |
corr[index.AB, p] <- rowCors(x, y, na.rm=TRUE) |
776 | 778 |
corr[corr < 0] <- 0 |
777 | 779 |
DF <- Ns[, p, "AB"]-1 |
... | ... |
@@ -780,11 +782,8 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A, |
780 | 782 |
corr[, p] <- (corr[, p]*DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
781 | 783 |
corr[is.na(corr[, p]), p] <- med |
782 | 784 |
} |
783 |
- ##estimate the correlation of the background errors and AA, BB genotypes |
|
784 |
- ##backgroundB <- Ip[index.AA, , "AA", "B"] |
|
785 | 785 |
backgroundB <- AA.B[index.AA, ] |
786 | 786 |
signalA <- AA.A[index.AA, ] |
787 |
- ##signalA <- Ip[index.AA, , "AA", "A"] |
|
788 | 787 |
corrB.AA[index.AA, p] <- rowCors(backgroundB, signalA, na.rm=TRUE) |
789 | 788 |
DF <- Ns[, p, "AA"]-1 |
790 | 789 |
DF[DF < 1] <- 1 |
... | ... |
@@ -792,10 +791,8 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A, |
792 | 791 |
corrB.AA[, p] <- (corrB.AA[, p]*DF + med*DF.PRIOR)/(DF.PRIOR + DF) |
793 | 792 |
corrB.AA[is.na(corrB.AA[, p]), p] <- med |
794 | 793 |
|
795 |
- ##backgroundA <- Ip[index.BB, , "BB", "A"] |
|
796 | 794 |
backgroundA <- BB.A[index.BB, ] |
797 | 795 |
signalB <- BB.B[index.BB, ] |
798 |
- ##signalB <- Ip[index.BB, , "BB", "B"] |
|
799 | 796 |
corrA.BB[index.BB, p] <- rowCors(backgroundA, signalB, na.rm=TRUE) |
800 | 797 |
DF <- Ns[, p, "BB"]-1 |
801 | 798 |
DF[DF < 1] <- 1 |
... | ... |
@@ -803,310 +800,183 @@ locationAndScale <- function(p, AA.A, AB.A, BB.A, |
803 | 800 |
corrA.BB[, p] <- (corrA.BB[, p]*DF + med*DF.PRIOR)/(DF.PRIOR + DF) |
804 | 801 |
corrA.BB[is.na(corrA.BB[, p]), p] <- med |
805 | 802 |
|
806 |
- assign("muA.AA", muA.AA, envir) |
|
807 |
- assign("muA.AB", muA.AB, envir) |
|
808 |
- assign("muA.BB", muA.BB, envir) |
|
809 |
- assign("muB.AA", muB.AA, envir) |
|
810 |
- assign("muB.AB", muB.AB, envir) |
|
811 |
- assign("muB.BB", muB.BB, envir) |
|
812 |
- assign("sigmaA", sigmaA, envir) |
|
813 |
- assign("sigmaB", sigmaB, envir) |
|
814 |
- assign("tau2A", tau2A, envir) |
|
815 |
- assign("tau2B", tau2B, envir) |
|
816 |
- assign("sig2A", sig2A, envir) |
|
817 |
- assign("sig2B", sig2B, envir) |
|
818 |
- assign("corr", corr, envir) |
|
819 |
- assign("corrB.AA", corrB.AA, envir) |
|
820 |
- assign("corrA.BB", corrA.BB, envir) |
|
803 |
+ envir[["tau2A"]] <- tau2A |
|
804 |
+ envir[["tau2B"]] <- tau2B |
|
805 |
+ envir[["sig2A"]] <- sig2A |
|
806 |
+ envir[["sig2B"]] <- sig2B |
|
807 |
+ envir[["corr"]] <- corr |
|
808 |
+ envir[["corrB.AA"]] <- corrB.AA |
|
809 |
+ envir[["corrA.BB"]] <- corrA.BB |
|
821 | 810 |
} |
822 | 811 |
|
823 | 812 |
coefs <- function(plateIndex, conf, MIN.OBS=3, envir, CONF.THR=0.99){ |
824 | 813 |
p <- plateIndex |
825 |
- plates.completed <- get("plates.completed", envir) |
|
814 |
+ plates.completed <- envir[["plates.completed"]] |
|
826 | 815 |
if(!plates.completed[p]) return() |
827 |
- plate <- get("plate", envir) |
|
828 |
- nuA <- get("nuA", envir) |
|
829 |
- nuB <- get("nuB", envir) |
|
830 |
- nuA.se <- get("nuA.se", envir) |
|
831 |
- nuB.se <- get("nuB.se", envir) |
|
832 |
- phiA <- get("phiA", envir) |
|
833 |
- phiB <- get("phiB", envir) |
|
834 |
- phiA.se <- get("phiA.se", envir) |
|
835 |
- phiB.se <- get("phiB.se", envir) |
|
836 |
- muA.AA <- get("muA.AA", envir) |
|
837 |
- muA.AB <- get("muA.AB", envir) |
|
838 |
- muA.BB <- get("muA.BB", envir) |
|
839 |
- muB.AA <- get("muB.AA", envir) |
|
840 |
- muB.AB <- get("muB.AB", envir) |
|
841 |