Added the following functions:
o summarizeMaleXGenotypes
- impute genotype 'A' and genotype 'B' location when unobserved
- shrink within genotype variances
o shrinkGenotypeSummaries
- impute unobserved genotype 'AA', 'AB' and 'BB' genotypes
- shrink within genotype variances
o summarizeSnps
- within genotype location and scale. Genotype frequencies
o summarizeNps
- location and scale at nonpolymorphic loci
o genotypeSummary
- wrapper for summarizeSnps and summarizeNps
Extensively edited the following functions:
o fit.lm1 - fit.lm4
(these functions only fit the linear model to the within/genotype location scale)
Requires oligoClasses 1.11.7
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48959 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -10,7 +10,7 @@ License: Artistic-2.0 |
10 | 10 |
Depends: R (>= 2.11.0), |
11 | 11 |
methods, |
12 | 12 |
Biobase (>= 2.9.0), |
13 |
- oligoClasses (>= 1.11.6) |
|
13 |
+ oligoClasses (>= 1.11.7) |
|
14 | 14 |
Imports: affyio (>= 1.15.2), |
15 | 15 |
ellipse, |
16 | 16 |
ff, |
... | ... |
@@ -27,6 +27,7 @@ Suggests: hapmapsnp6, |
27 | 27 |
ellipse |
28 | 28 |
Collate: AllGenerics.R |
29 | 29 |
AllClasses.R |
30 |
+ methods-AssayData.R |
|
30 | 31 |
methods-CNSet.R |
31 | 32 |
methods-eSet.R |
32 | 33 |
methods-SnpSuperSet.R |
... | ... |
@@ -29,7 +29,9 @@ importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet) |
29 | 29 |
importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs, |
30 | 30 |
"confs<-", cnConfidence, "cnConfidence<-", isSnp, |
31 | 31 |
chromosome, position, A, B, |
32 |
- "A<-", "B<-", open, close, lM, "lM<-", flags) |
|
32 |
+ "A<-", "B<-", open, close, lM, flags, |
|
33 |
+ batchStatistics, "batchStatistics<-", updateObject) |
|
34 |
+ |
|
33 | 35 |
|
34 | 36 |
importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles, |
35 | 37 |
copyNumber, initializeBigMatrix, initializeBigVector) |
... | ... |
@@ -55,7 +57,7 @@ importFrom(mvtnorm, dmvnorm) |
55 | 57 |
importFrom(ellipse, ellipse) |
56 | 58 |
|
57 | 59 |
importFrom(ff, ffdf, physical.ff, physical.ffdf) |
58 |
-importClassesFrom(oligoClasses, ffdf) |
|
60 |
+importClassesFrom(oligoClasses, ffdf, ff_matrix) |
|
59 | 61 |
|
60 | 62 |
exportMethods(lines) |
61 | 63 |
exportMethods(CA, CB) |
... | ... |
@@ -79,8 +81,12 @@ export(computeCopynumber, ACN) |
79 | 81 |
export(totalCopyNumber) |
80 | 82 |
export(cnrma, cnrma2) |
81 | 83 |
|
82 |
-exportMethods("A<-", "B<-", "nuA", "nuB", "phiA", "phiB", "nuA<-", "nuB<-", "phiA<-", "phiB<-", "numberGenotype<-") |
|
83 |
- |
|
84 |
+exportMethods("A<-", "B<-", "nuA", "nuB", "phiA", "phiB", "nuA<-", "nuB<-", "phiA<-", "phiB<-") |
|
85 |
+exportMethods(Ns, medians, mads, tau2, corr) |
|
86 |
+exportClasses(ff_or_matrix) |
|
87 |
+export(genotypeSummary, summarizeSnps, summarizeNps, shrinkSummary, shrinkGenotypeSummaries, summarizeMaleXGenotypes, |
|
88 |
+ indexComplete) |
|
89 |
+export(Ns) |
|
84 | 90 |
## For debugging |
85 | 91 |
## exportPattern("^[^\\.]") |
86 | 92 |
|
... | ... |
@@ -12,19 +12,84 @@ setGeneric("CB", function(object, ...) standardGeneric("CB")) |
12 | 12 |
##setGeneric("totalCopyNumber", function(object, ...) standardGeneric("totalCopyNumber")) |
13 | 13 |
|
14 | 14 |
|
15 |
+setGeneric("Ns", function(object, ...) standardGeneric("Ns")) |
|
16 |
+setGeneric("corr", function(object, ...) standardGeneric("corr")) |
|
17 |
+setGeneric("mads", function(object, ...) standardGeneric("mads")) |
|
18 |
+setGeneric("medians", function(object, ...) standardGeneric("medians")) |
|
19 |
+setGeneric("tau2", function(object, ...) standardGeneric("tau2")) |
|
20 |
+ |
|
15 | 21 |
## The generics below are for internal use with copy number methods |
16 | 22 |
## If we keep them in oligoClasses, we need to export and document |
23 |
+setGeneric("N.AA", function(object) standardGeneric("N.AA")) |
|
24 |
+setGeneric("N.AB", function(object) standardGeneric("N.AB")) |
|
25 |
+setGeneric("N.BB", function(object) standardGeneric("N.BB")) |
|
26 |
+setGeneric("N.AA<-", function(object, value) standardGeneric("N.AA<-")) |
|
27 |
+setGeneric("N.AB<-", function(object, value) standardGeneric("N.AB<-")) |
|
28 |
+setGeneric("N.BB<-", function(object, value) standardGeneric("N.BB<-")) |
|
29 |
+ |
|
30 |
+setGeneric("medians", function(object, ...) standardGeneric("medians")) |
|
31 |
+setGeneric("medianA.AA", function(object) standardGeneric("medianA.AA")) |
|
32 |
+setGeneric("medianA.AB", function(object) standardGeneric("medianA.AB")) |
|
33 |
+setGeneric("medianA.BB", function(object) standardGeneric("medianA.BB")) |
|
34 |
+setGeneric("medianB.AA", function(object) standardGeneric("medianB.AA")) |
|
35 |
+setGeneric("medianB.AB", function(object) standardGeneric("medianB.AB")) |
|
36 |
+setGeneric("medianB.BB", function(object) standardGeneric("medianB.BB")) |
|
37 |
+setGeneric("medianA.AA<-", function(object, value) standardGeneric("medianA.AA<-")) |
|
38 |
+setGeneric("medianA.AB<-", function(object, value) standardGeneric("medianA.AB<-")) |
|
39 |
+setGeneric("medianA.BB<-", function(object, value) standardGeneric("medianA.BB<-")) |
|
40 |
+setGeneric("medianB.AA<-", function(object, value) standardGeneric("medianB.AA<-")) |
|
41 |
+setGeneric("medianB.AB<-", function(object, value) standardGeneric("medianB.AB<-")) |
|
42 |
+setGeneric("medianB.BB<-", function(object, value) standardGeneric("medianB.BB<-")) |
|
43 |
+ |
|
44 |
+ |
|
45 |
+ |
|
46 |
+setGeneric("madA.AA", function(object) standardGeneric("madA.AA")) |
|
47 |
+setGeneric("madA.AB", function(object) standardGeneric("madA.AB")) |
|
48 |
+setGeneric("madA.BB", function(object) standardGeneric("madA.BB")) |
|
49 |
+setGeneric("madB.AA", function(object) standardGeneric("madB.AA")) |
|
50 |
+setGeneric("madB.AB", function(object) standardGeneric("madB.AB")) |
|
51 |
+setGeneric("madB.BB", function(object) standardGeneric("madB.BB")) |
|
52 |
+setGeneric("madA.AA<-", function(object, value) standardGeneric("madA.AA<-")) |
|
53 |
+setGeneric("madA.AB<-", function(object, value) standardGeneric("madA.AB<-")) |
|
54 |
+setGeneric("madA.BB<-", function(object, value) standardGeneric("madA.BB<-")) |
|
55 |
+setGeneric("madB.AA<-", function(object, value) standardGeneric("madB.AA<-")) |
|
56 |
+setGeneric("madB.AB<-", function(object, value) standardGeneric("madB.AB<-")) |
|
57 |
+setGeneric("madB.BB<-", function(object, value) standardGeneric("madB.BB<-")) |
|
58 |
+ |
|
59 |
+setGeneric("tau2A.AA", function(object) standardGeneric("tau2A.AA")) |
|
60 |
+##setGeneric("tau2A.AB", function(object) standardGeneric("tau2A.AB")) |
|
61 |
+setGeneric("tau2A.BB", function(object) standardGeneric("tau2A.BB")) |
|
62 |
+setGeneric("tau2B.AA", function(object) standardGeneric("tau2B.AA")) |
|
63 |
+##setGeneric("tau2B.AB", function(object) standardGeneric("tau2B.AB")) |
|
64 |
+setGeneric("tau2B.BB", function(object) standardGeneric("tau2B.BB")) |
|
65 |
+setGeneric("tau2A.AA<-", function(object, value) standardGeneric("tau2A.AA<-")) |
|
66 |
+##setGeneric("tau2A.AB<-", function(object, value) standardGeneric("tau2A.AB<-")) |
|
67 |
+setGeneric("tau2A.BB<-", function(object, value) standardGeneric("tau2A.BB<-")) |
|
68 |
+setGeneric("tau2B.AA<-", function(object, value) standardGeneric("tau2B.AA<-")) |
|
69 |
+##setGeneric("tau2B.AB<-", function(object, value) standardGeneric("tau2B.AB<-")) |
|
70 |
+setGeneric("tau2B.BB<-", function(object, value) standardGeneric("tau2B.BB<-")) |
|
71 |
+ |
|
72 |
+setGeneric("corrAA", function(object) standardGeneric("corrAA")) |
|
73 |
+setGeneric("corrAB", function(object) standardGeneric("corrAB")) |
|
74 |
+setGeneric("corrBB", function(object) standardGeneric("corrBB")) |
|
75 |
+setGeneric("corrAA<-", function(object, value) standardGeneric("corrAA<-")) |
|
76 |
+setGeneric("corrAB<-", function(object, value) standardGeneric("corrAB<-")) |
|
77 |
+setGeneric("corrBB<-", function(object, value) standardGeneric("corrBB<-")) |
|
78 |
+ |
|
79 |
+ |
|
80 |
+ |
|
17 | 81 |
setGeneric("nuA", function(object) standardGeneric("nuA")) |
18 | 82 |
setGeneric("nuB", function(object) standardGeneric("nuB")) |
19 | 83 |
setGeneric("phiA", function(object) standardGeneric("phiA")) |
20 | 84 |
setGeneric("phiB", function(object) standardGeneric("phiB")) |
85 |
+setGeneric("phiPrimeA", function(object) standardGeneric("phiPrimeA")) |
|
86 |
+setGeneric("phiPrimeB", function(object) standardGeneric("phiPrimeB")) |
|
87 |
+setGeneric("phiPrimeA<-", function(object, value) standardGeneric("phiPrimeA<-")) |
|
88 |
+setGeneric("phiPrimeB<-", function(object, value) standardGeneric("phiPrimeB<-")) |
|
21 | 89 |
setGeneric("sigma2A", function(object) standardGeneric("sigma2A")) |
22 | 90 |
setGeneric("sigma2B", function(object) standardGeneric("sigma2B")) |
23 | 91 |
setGeneric("tau2A", function(object) standardGeneric("tau2A")) |
24 | 92 |
setGeneric("tau2B", function(object) standardGeneric("tau2B")) |
25 |
-setGeneric("corrAA", function(object) standardGeneric("corrAA")) |
|
26 |
-setGeneric("corrBB", function(object) standardGeneric("corrBB")) |
|
27 |
-setGeneric("corrAB", function(object) standardGeneric("corrAB")) |
|
28 | 93 |
setGeneric("nuA<-", function(object, value) standardGeneric("nuA<-")) |
29 | 94 |
setGeneric("nuB<-", function(object, value) standardGeneric("nuB<-")) |
30 | 95 |
setGeneric("phiA<-", function(object, value) standardGeneric("phiA<-")) |
... | ... |
@@ -33,11 +98,6 @@ setGeneric("sigma2A<-", function(object, value) standardGeneric("sigma2A<-")) |
33 | 98 |
setGeneric("sigma2B<-", function(object, value) standardGeneric("sigma2B<-")) |
34 | 99 |
setGeneric("tau2A<-", function(object, value) standardGeneric("tau2A<-")) |
35 | 100 |
setGeneric("tau2B<-", function(object, value) standardGeneric("tau2B<-")) |
36 |
-setGeneric("corrAA<-", function(object, value) standardGeneric("corrAA<-")) |
|
37 |
-setGeneric("corrAB<-", function(object, value) standardGeneric("corrAB<-")) |
|
38 |
-setGeneric("corrBB<-", function(object, value) standardGeneric("corrBB<-")) |
|
39 | 101 |
setGeneric("flags<-", function(object, value) standardGeneric("flags<-")) |
40 | 102 |
|
41 |
-setGeneric("numberGenotype<-", function(object, value, ...) standardGeneric("numberGenotype<-")) |
|
42 |
- |
|
43 | 103 |
|
... | ... |
@@ -362,13 +362,13 @@ rowCors <- function(x, y, ...){ |
362 | 362 |
return(covar/(sd.x*sd.y)) |
363 | 363 |
} |
364 | 364 |
|
365 |
-corByGenotype <- function(A, B, G, Ns, which.cluster=c(1,2,3)[1], DF.PRIOR){ |
|
365 |
+corByGenotype <- function(A, B, G, Ns, which.cluster=c(1,2,3)[1]){##, DF.PRIOR){ |
|
366 | 366 |
x <- A * (G == which.cluster) |
367 | 367 |
x[x==0] <- NA |
368 | 368 |
y <- B * (G == which.cluster) |
369 | 369 |
res <- as.matrix(rowCors(x, y, na.rm=TRUE)) |
370 |
- cors <- shrink(res, Ns[, which.cluster], DF.PRIOR) |
|
371 |
- cors |
|
370 |
+## cors <- shrink(res, Ns[, which.cluster], DF.PRIOR) |
|
371 |
+ res |
|
372 | 372 |
} |
373 | 373 |
|
374 | 374 |
dqrlsWrapper <- function(x, y, wts, tol=1e-7){ |
... | ... |
@@ -382,24 +382,38 @@ dqrlsWrapper <- function(x, y, wts, tol=1e-7){ |
382 | 382 |
work=double(2 * p), PACKAGE="base")[["coefficients"]] |
383 | 383 |
} |
384 | 384 |
|
385 |
-fit.wls <- function(allele, Ystar, W, Ns, autosome=TRUE){ |
|
385 |
+ |
|
386 |
+##fit.wls <- function(allele, Ystar, W, Ns, autosome=TRUE){ |
|
387 |
+fit.wls <- function(NN, sigma, allele, Y, autosome, X){ |
|
388 |
+ ## Np <- NN |
|
389 |
+## Np[Np < 1] <- 1 |
|
390 |
+## vA2 <- vA^2/Np |
|
391 |
+## vB2 <- vB^2/Np |
|
392 |
+## wA <- sqrt(1/vA2) |
|
393 |
+## wB <- sqrt(1/vB2) |
|
394 |
+## YA <- muA*wA |
|
395 |
+## YB <- muB*wB |
|
396 |
+ Np <- NN |
|
397 |
+ Np[Np < 1] <- 1 |
|
398 |
+ W <- (sigma/sqrt(Np))^-1 |
|
399 |
+ Ystar <- Y*W |
|
386 | 400 |
complete <- which(rowSums(is.na(W)) == 0 & rowSums(is.na(Ystar)) == 0) |
387 | 401 |
## if(any(!is.finite(W))){## | any(!is.finite(V))){ |
388 | 402 |
## i <- which(rowSums(!is.finite(W)) > 0) |
389 | 403 |
## stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ") |
390 | 404 |
## } |
391 |
- NOHET <- mean(Ns[, 2], na.rm=TRUE) < 0.05 |
|
405 |
+## NOHET <- mean(NN[, 2], na.rm=TRUE) < 0.05 |
|
392 | 406 |
if(missing(allele)) stop("must specify allele") |
393 |
- if(autosome){ |
|
407 |
+ if(autosome & missing(X)){ |
|
394 | 408 |
if(allele == "A") X <- cbind(1, 2:0) |
395 | 409 |
if(allele == "B") X <- cbind(1, 0:2) |
396 |
- betahat <- matrix(NA, 2, nrow(Ystar)) |
|
397 |
- } else { |
|
410 |
+ } |
|
411 |
+ if(!autosome & missing(X)){ |
|
398 | 412 |
if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2)) |
399 | 413 |
if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0)) |
400 |
- betahat <- matrix(NA, 3, nrow(Ystar)) |
|
401 | 414 |
} |
402 |
- if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous |
|
415 |
+ betahat <- matrix(NA, ncol(X), nrow(Ystar)) |
|
416 |
+## if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous |
|
403 | 417 |
##How to quickly generate Xstar, Xstar = diag(W) %*% X |
404 | 418 |
##Xstar <- apply(W, 1, generateX, X) |
405 | 419 |
ww <- rep(1, ncol(Ystar)) |
... | ... |
@@ -780,11 +794,136 @@ crlmmCopynumberLD <- function(object, |
780 | 794 |
} |
781 | 795 |
crlmmCopynumber2 <- crlmmCopynumberLD |
782 | 796 |
|
783 |
-fit.lm1 <- function(strata.index, |
|
797 |
+ |
|
798 |
+ |
|
799 |
+ |
|
800 |
+shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAMPLES, DF.PRIOR, |
|
801 |
+ verbose, is.lds){ |
|
802 |
+ if(is.lds) {physical <- get("physical"); open(object)} |
|
803 |
+ if(verbose) message("Probe stratum ", strata, " of ", length(index.list)) |
|
804 |
+ marker.index <- index.list[[strata]] |
|
805 |
+ batches <- split(seq_along(batch(object)), as.character(batch(object))) |
|
806 |
+ batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
807 |
+ batchnames <- batchNames(object) |
|
808 |
+ N.AA <- as.matrix(N.AA(object)[marker.index, ]) |
|
809 |
+ N.AB <- as.matrix(N.AB(object)[marker.index, ]) |
|
810 |
+ N.BB <- as.matrix(N.BB(object)[marker.index, ]) |
|
811 |
+ medianA.AA <- as.matrix(medianA.AA(object)[marker.index,]) |
|
812 |
+ medianA.AB <- as.matrix(medianA.AB(object)[marker.index,]) |
|
813 |
+ medianA.BB <- as.matrix(medianA.BB(object)[marker.index,]) |
|
814 |
+ medianB.AA <- as.matrix(medianB.AA(object)[marker.index,]) |
|
815 |
+ medianB.AB <- as.matrix(medianB.AB(object)[marker.index,]) |
|
816 |
+ medianB.BB <- as.matrix(medianB.BB(object)[marker.index,]) |
|
817 |
+ madA.AA <- as.matrix(madA.AA(object)[marker.index,]) |
|
818 |
+ madA.AB <- as.matrix(madA.AB(object)[marker.index,]) |
|
819 |
+ madA.BB <- as.matrix(madA.BB(object)[marker.index,]) |
|
820 |
+ madB.AA <- as.matrix(madB.AA(object)[marker.index,]) |
|
821 |
+ madB.AB <- as.matrix(madB.AB(object)[marker.index,]) |
|
822 |
+ madB.BB <- as.matrix(madB.BB(object)[marker.index,]) |
|
823 |
+ medianA <- medianB <- shrink.madB <- shrink.madA <- vector("list", length(batchnames)) |
|
824 |
+ shrink.tau2A.AA <- tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index,]) |
|
825 |
+ shrink.tau2B.BB <- tau2B.BB <- as.matrix(tau2B.BB(object)[marker.index,]) |
|
826 |
+ shrink.tau2A.BB <- tau2A.BB <- as.matrix(tau2A.BB(object)[marker.index,]) |
|
827 |
+ shrink.tau2B.AA <- tau2B.AA <- as.matrix(tau2B.AA(object)[marker.index,]) |
|
828 |
+ shrink.corrAA <- corrAA <- as.matrix(corrAA(object)[marker.index, ]) |
|
829 |
+ shrink.corrAB <- corrAB <- as.matrix(corrAB(object)[marker.index, ]) |
|
830 |
+ shrink.corrBB <- corrBB <- as.matrix(corrBB(object)[marker.index, ]) |
|
831 |
+ flags <- as.matrix(flags(object)[marker.index, ]) |
|
832 |
+ for(k in seq(along=batches)){ |
|
833 |
+ B <- batches[[k]] |
|
834 |
+ this.batch <- unique(as.character(batch(object)[B])) |
|
835 |
+ |
|
836 |
+ medianA[[k]] <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k]) |
|
837 |
+ medianB[[k]] <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k]) |
|
838 |
+ madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k]) |
|
839 |
+ madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k]) |
|
840 |
+ NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k]) |
|
841 |
+ ##RS: estimate DF.PRIOR |
|
842 |
+ shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR) |
|
843 |
+ shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR) |
|
844 |
+ |
|
845 |
+ ## an estimate of the background variance is the MAD |
|
846 |
+ ## of the log2(allele A) intensities among subjects with |
|
847 |
+ ## genotypes BB |
|
848 |
+ shrink.tau2A.BB[, k] <- shrink(tau2A.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE] |
|
849 |
+ shrink.tau2B.AA[, k] <- shrink(tau2B.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE] |
|
850 |
+ ## an estimate of the signal variance is the MAD |
|
851 |
+ ## of the log2(allele A) intensities among subjects with |
|
852 |
+ ## genotypes AA |
|
853 |
+ shrink.tau2A.AA[, k] <- shrink(tau2A.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE] |
|
854 |
+ shrink.tau2B.BB[, k] <- shrink(tau2B.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE] |
|
855 |
+ cor.AA <- corrAA[, k, drop=FALSE] |
|
856 |
+ cor.AB <- corrAB[, k, drop=FALSE] |
|
857 |
+ cor.BB <- corrBB[, k, drop=FALSE] |
|
858 |
+ shrink.corrAA[, k] <- shrink(cor.AA, NN[, 1], DF.PRIOR) |
|
859 |
+ shrink.corrAB[, k] <- shrink(cor.AB, NN[, 2], DF.PRIOR) |
|
860 |
+ shrink.corrBB[, k] <- shrink(cor.BB, NN[, 3], DF.PRIOR) |
|
861 |
+ |
|
862 |
+ ##--------------------------------------------------------------------------- |
|
863 |
+ ## SNPs that we'll use for imputing location/scale of unobserved genotypes |
|
864 |
+ ##--------------------------------------------------------------------------- |
|
865 |
+ index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS) |
|
866 |
+ |
|
867 |
+ ##--------------------------------------------------------------------------- |
|
868 |
+ ## Impute sufficient statistics for unobserved genotypes (plate-specific) |
|
869 |
+ ##--------------------------------------------------------------------------- |
|
870 |
+ unobservedAA <- NN[, 1] < MIN.OBS |
|
871 |
+ unobservedAB <- NN[, 2] < MIN.OBS |
|
872 |
+ unobservedBB <- NN[, 3] < MIN.OBS |
|
873 |
+ unobserved.index <- vector("list", 3) |
|
874 |
+ unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS)) |
|
875 |
+ unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS)) |
|
876 |
+ unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS)) |
|
877 |
+ res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index) |
|
878 |
+ medianA[[k]] <- res[[1]] |
|
879 |
+ medianB[[k]] <- res[[2]] |
|
880 |
+ rm(res) |
|
881 |
+ ##the NA's in 'medianA' and 'medianB' are monomorphic if MIN.OBS = 1 |
|
882 |
+ |
|
883 |
+ ## RS: For Monomorphic SNPs a mixture model may be better |
|
884 |
+ ## RS: Further, we can improve estimation by borrowing strength across batch |
|
885 |
+ unobserved.index[[1]] <- which(unobservedAA & unobservedAB) |
|
886 |
+ unobserved.index[[2]] <- which(unobservedBB & unobservedAB) |
|
887 |
+ unobserved.index[[3]] <- which(unobservedAA & unobservedBB) ## strange |
|
888 |
+ res <- imputeCentersForMonomorphicSnps(medianA[[k]], medianB[[k]], |
|
889 |
+ index.complete, |
|
890 |
+ unobserved.index) |
|
891 |
+ medianA[[k]] <- res[[1]]; medianB[[k]] <- res[[2]] |
|
892 |
+ rm(res) |
|
893 |
+ negA <- rowSums(medianA[[k]] < 0) > 0 |
|
894 |
+ negB <- rowSums(medianB[[k]] < 0) > 0 |
|
895 |
+ flags[, k] <- as.integer(rowSums(NN == 0) > 0 | negA | negB) |
|
896 |
+ } |
|
897 |
+ flags(object)[marker.index, ] <- flags |
|
898 |
+ medianA.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 1])) |
|
899 |
+ medianA.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 2])) |
|
900 |
+ medianA.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 3])) |
|
901 |
+ medianB.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 1])) |
|
902 |
+ medianB.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 2])) |
|
903 |
+ medianB.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 3])) |
|
904 |
+ |
|
905 |
+ madA.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 1])) |
|
906 |
+ madA.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 2])) |
|
907 |
+ madA.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 3])) |
|
908 |
+ madB.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 1])) |
|
909 |
+ madB.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 2])) |
|
910 |
+ madB.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 3])) |
|
911 |
+ |
|
912 |
+ corrAA(object)[marker.index, ] <- shrink.corrAA |
|
913 |
+ corrAB(object)[marker.index, ] <- shrink.corrAB |
|
914 |
+ corrBB(object)[marker.index, ] <- shrink.corrBB |
|
915 |
+ tau2A.AA(object)[marker.index,] <- shrink.tau2A.AA |
|
916 |
+ tau2A.BB(object)[marker.index,] <- shrink.tau2A.BB |
|
917 |
+ tau2B.AA(object)[marker.index,] <- shrink.tau2B.AA |
|
918 |
+ tau2B.BB(object)[marker.index,] <- shrink.tau2B.BB |
|
919 |
+ if(is.lds) return(TRUE) else retrun(object) |
|
920 |
+} |
|
921 |
+ |
|
922 |
+ |
|
923 |
+ |
|
924 |
+fit.lm1 <- function(strata, |
|
784 | 925 |
index.list, |
785 |
- marker.index, |
|
786 | 926 |
object, |
787 |
- batchSize, |
|
788 | 927 |
SNRMin, |
789 | 928 |
MIN.SAMPLES, |
790 | 929 |
MIN.OBS, |
... | ... |
@@ -793,142 +932,63 @@ fit.lm1 <- function(strata.index, |
793 | 932 |
THR.NU.PHI, |
794 | 933 |
MIN.NU, |
795 | 934 |
MIN.PHI, |
796 |
- verbose,...){ |
|
797 |
- if(isPackageLoaded("ff")) physical <- get("physical") |
|
798 |
- is.lds <- ifelse(is(calls(object), "ffdf") | is(calls(object), "ff_matrix"), TRUE, FALSE) |
|
799 |
- if(verbose) message("Probe stratum ", strata.index, " of ", length(index.list)) |
|
800 |
- snps <- index.list[[strata.index]] |
|
801 |
- batches <- split(seq_along(batch(object)), batch(object)) |
|
935 |
+ verbose, is.lds, |
|
936 |
+ CHR.X, ...){ |
|
937 |
+ if(is.lds) {physical <- get("physical"); open(object)} |
|
938 |
+ if(verbose) message("Probe stratum ", strata, " of ", length(index.list)) |
|
939 |
+ snps <- index.list[[strata]] |
|
940 |
+ batches <- split(seq_along(batch(object)), as.character(batch(object))) |
|
802 | 941 |
batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
803 |
- open(object) |
|
804 |
- corrAB <- corrBB <- corrAA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batch(object)))) |
|
805 |
- flags <- nuA <- nuB <- phiA <- phiB <- corrAB |
|
806 |
- ## should the 'normal' indicator |
|
807 |
- NORM <- 1 |
|
808 |
- |
|
809 |
- Ns.names <- ls(numberGenotype(object)) |
|
810 | 942 |
batchnames <- batchNames(object) |
811 |
- GG <- as.matrix(calls(object)[snps, ]) |
|
812 |
- CP <- as.matrix(snpCallProbability(object)[snps, ]) |
|
813 |
- AA <- as.matrix(A(object)[snps, ]) |
|
814 |
- BB <- as.matrix(B(object)[snps, ]) |
|
815 |
- zzzz <- 1 |
|
816 |
- for(k in batches){ |
|
817 |
- this.batch <- unique(as.character(batch(object)[k])) |
|
818 |
- zzzz <- zzzz+1 |
|
819 |
- G <- GG[, k] |
|
820 |
- ##NORM <- normal.snps[, k] |
|
821 |
- xx <- CP[, k] |
|
822 |
- highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
823 |
- G <- G*highConf*NORM |
|
824 |
- A <- AA[, k] |
|
825 |
- B <- BB[, k] |
|
826 |
- ##index <- GT.B <- GT.A <- vector("list", 3) |
|
827 |
- ##names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB") |
|
828 |
- Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G) |
|
829 |
- muA <- applyByGenotype(A, rowMedians, G) |
|
830 |
- muB <- applyByGenotype(B, rowMedians, G) |
|
831 |
- vA <- applyByGenotype(A, rowMAD, G) |
|
832 |
- vB <- applyByGenotype(B, rowMAD, G) |
|
833 |
- vA <- shrink(vA, Ns, DF.PRIOR) |
|
834 |
- vB <- shrink(vB, Ns, DF.PRIOR) |
|
835 |
- ##location and scale |
|
836 |
- J <- match(unique(batch(object)[k]), batchnames)##unique(batch(object))) |
|
837 |
- ##background variance for alleleA |
|
838 |
- taus <- applyByGenotype(log2(A), rowMAD, G)^2 |
|
839 |
- tau2A[, J] <- shrink(taus[, 3, drop=FALSE], Ns[, 3], DF.PRIOR) |
|
840 |
- sig2A[, J] <- shrink(taus[, 1, drop=FALSE], Ns[, 1], DF.PRIOR) |
|
841 |
- taus <- applyByGenotype(log2(B), rowMAD, G)^2 |
|
842 |
- tau2B[, J] <- shrink(taus[, 3, drop=FALSE], Ns[, 1], DF.PRIOR) |
|
843 |
- sig2B[, J] <- shrink(taus[, 1, drop=FALSE], Ns[, 3], DF.PRIOR) |
|
844 |
- |
|
845 |
- corrAB[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=2, DF.PRIOR) |
|
846 |
- corrAA[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=1, DF.PRIOR) |
|
847 |
- corrBB[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=3, DF.PRIOR) |
|
848 |
- |
|
849 |
- ##--------------------------------------------------------------------------- |
|
850 |
- ## Impute sufficient statistics for unobserved genotypes (plate-specific) |
|
851 |
- ##--------------------------------------------------------------------------- |
|
852 |
- index <- apply(Ns, 2, function(x, MIN.OBS) which(x > MIN.OBS), MIN.OBS) |
|
853 |
- correct.orderA <- muA[, 1] > muA[, 3] |
|
854 |
- correct.orderB <- muB[, 3] > muB[, 1] |
|
855 |
- index.complete <- intersect(which(correct.orderA & correct.orderB), intersect(index[[1]], intersect(index[[2]], index[[3]]))) |
|
856 |
- size <- min(5000, length(index.complete)) |
|
857 |
- if(size == 5000) index.complete <- sample(index.complete, 5000, replace=TRUE) |
|
858 |
- if(length(index.complete) < 200){ |
|
859 |
- warning("fewer than 200 snps pass criteria for predicting the sufficient statistics") |
|
860 |
- return() |
|
861 |
- } |
|
862 |
- index <- vector("list", 3) |
|
863 |
- index[[1]] <- which(Ns[, 1] == 0 & (Ns[, 2] >= MIN.OBS & Ns[, 3] >= MIN.OBS)) |
|
864 |
- index[[2]] <- which(Ns[, 2] == 0 & (Ns[, 1] >= MIN.OBS & Ns[, 3] >= MIN.OBS)) |
|
865 |
- index[[3]] <- which(Ns[, 3] == 0 & (Ns[, 2] >= MIN.OBS & Ns[, 1] >= MIN.OBS)) |
|
866 |
- res <- imputeCenter(muA, muB, index.complete, index) |
|
867 |
- muA <- res[[1]] |
|
868 |
- muB <- res[[2]] |
|
869 |
- |
|
870 |
- ## Monomorphic SNPs. Mixture model may be better |
|
871 |
- ## Improve estimation by borrowing strength across batch |
|
872 |
- noAA <- Ns[, 1] < MIN.OBS |
|
873 |
- noAB <- Ns[, 2] < MIN.OBS |
|
874 |
- noBB <- Ns[, 3] < MIN.OBS |
|
875 |
- index[[1]] <- noAA & noAB |
|
876 |
- index[[2]] <- noBB & noAB |
|
877 |
- index[[3]] <- noAA & noBB |
|
878 |
- cols <- c(3, 1, 2) |
|
879 |
- for(j in 1:3){ |
|
880 |
- if(sum(index[[j]]) == 0) next() |
|
881 |
- kk <- cols[j] |
|
882 |
- X <- cbind(1, muA[index.complete, kk], muB[index.complete, kk]) |
|
883 |
- Y <- cbind(muA[index.complete, -kk], |
|
884 |
- muB[index.complete, -kk]) |
|
885 |
- betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
886 |
- X <- cbind(1, muA[index[[j]], kk], muB[index[[j]], kk]) |
|
887 |
- mus <- X %*% betahat |
|
888 |
- muA[index[[j]], -kk] <- mus[, 1:2] |
|
889 |
- muB[index[[j]], -kk] <- mus[, 3:4] |
|
890 |
- } |
|
891 |
- rm(betahat, X, Y, mus, index, noAA, noAB, noBB, res) |
|
892 |
- ##gc() |
|
893 |
- negA <- rowSums(muA < 0) > 0 |
|
894 |
- negB <- rowSums(muB < 0) > 0 |
|
895 |
- flags[, J] <- rowSums(Ns == 0) > 0 |
|
896 |
- ##flags[, J] <- index[[1]] | index[[2]] | index[[3]] | rowSums( |
|
943 |
+ N.AA <- as.matrix(N.AA(object)[snps, ]) |
|
944 |
+ N.AB <- as.matrix(N.AB(object)[snps, ]) |
|
945 |
+ N.BB <- as.matrix(N.BB(object)[snps, ]) |
|
946 |
+ medianA.AA <- as.matrix(medianA.AA(object)[snps,]) |
|
947 |
+ medianA.AB <- as.matrix(medianA.AB(object)[snps,]) |
|
948 |
+ medianA.BB <- as.matrix(medianA.BB(object)[snps,]) |
|
949 |
+ medianB.AA <- as.matrix(medianB.AA(object)[snps,]) |
|
950 |
+ medianB.AB <- as.matrix(medianB.AB(object)[snps,]) |
|
951 |
+ medianB.BB <- as.matrix(medianB.BB(object)[snps,]) |
|
952 |
+ madA.AA <- as.matrix(madA.AA(object)[snps,]) |
|
953 |
+ madA.AB <- as.matrix(madA.AB(object)[snps,]) |
|
954 |
+ madA.BB <- as.matrix(madA.BB(object)[snps,]) |
|
955 |
+ madB.AA <- as.matrix(madB.AA(object)[snps,]) |
|
956 |
+ madB.AB <- as.matrix(madB.AB(object)[snps,]) |
|
957 |
+ madB.BB <- as.matrix(madB.BB(object)[snps,]) |
|
958 |
+ tau2A.AA <- as.matrix(tau2A.AA(object)[snps,]) |
|
959 |
+ tau2B.BB <- as.matrix(tau2B.BB(object)[snps,]) |
|
960 |
+ tau2A.BB <- as.matrix(tau2A.BB(object)[snps,]) |
|
961 |
+ tau2B.AA <- as.matrix(tau2B.AA(object)[snps,]) |
|
962 |
+ corrAA <- as.matrix(corrAA(object)[snps, ]) |
|
963 |
+ corrAB <- as.matrix(corrAB(object)[snps, ]) |
|
964 |
+ corrBB <- as.matrix(corrBB(object)[snps, ]) |
|
965 |
+ nuA <- as.matrix(nuA(object)[snps, ]) |
|
966 |
+ phiA <- as.matrix(phiA(object)[snps, ]) |
|
967 |
+ nuB <- as.matrix(nuB(object)[snps, ]) |
|
968 |
+ phiB <- as.matrix(phiB(object)[snps, ]) |
|
969 |
+ flags <- as.matrix(flags(object)[snps, ]) |
|
970 |
+ for(k in seq(along=batches)){ |
|
971 |
+ B <- batches[[k]] |
|
972 |
+ this.batch <- unique(as.character(batch(object)[B])) |
|
973 |
+ medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k]) |
|
974 |
+ medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k]) |
|
975 |
+ madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k]) |
|
976 |
+ madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k]) |
|
977 |
+ NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k]) |
|
897 | 978 |
## we're regressing on the medians using the standard errors (hence the division by N) as weights |
898 |
- ##formerly coefs() |
|
899 |
- Np <- Ns |
|
900 |
- Np[Np < 1] <- 1 |
|
901 |
- vA2 <- vA^2/Np |
|
902 |
- vB2 <- vB^2/Np |
|
903 |
- wA <- sqrt(1/vA2) |
|
904 |
- wB <- sqrt(1/vB2) |
|
905 |
- YA <- muA*wA |
|
906 |
- YB <- muB*wB |
|
907 |
- res <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns) |
|
908 |
-## } else{ |
|
909 |
-## if(zzzz==1) message("currently, only weighted least squares (wls) is available... fitting wls") |
|
910 |
-## res <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns) |
|
911 |
-## } |
|
912 |
- nuA[, J] <- res[[1]] |
|
913 |
- phiA[, J] <- res[[2]] |
|
914 |
- res <- fit.wls(allele="B", Ystar=YB, W=wB, Ns=Ns) |
|
915 |
-## } else { |
|
916 |
-## if(zzzz==1) message("currently, only weighted least squares (wls) is available... fitting wls") |
|
917 |
-## res <- fit.wls(allele="B", Ystar=YB, W=wB, Ns=Ns) |
|
918 |
-## } |
|
919 |
- ##nuB[, J] <- res[[1]] |
|
920 |
- nuB[, J] <- res[1, ] |
|
921 |
- ##phiB[, J] <- res[[2]] |
|
922 |
- phiB[, J] <- res[2, ] |
|
979 |
+ res <- fit.wls(NN=NN, sigma=madA, allele="A", Y=medianA, autosome=!CHR.X) |
|
980 |
+ nuA[, k] <- res[1, ] |
|
981 |
+ phiA[, k] <- res[2, ] |
|
982 |
+ rm(res) |
|
983 |
+ ##res <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns) |
|
984 |
+ ##nuA[, J] <- res[[1]] |
|
985 |
+ ##phiA[, J] <- res[[2]] |
|
986 |
+ res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns) |
|
987 |
+ nuB[, k] <- res[1, ] |
|
988 |
+ phiB[, k] <- res[2, ] |
|
923 | 989 |
## cA[, k] <- matrix((1/phiA[, J]*(A-nuA[, J])), nrow(A), ncol(A)) |
924 | 990 |
## cB[, k] <- matrix((1/phiB[, J]*(B-nuB[, J])), nrow(B), ncol(B)) |
925 |
- jj <- match(this.batch, Ns.names) |
|
926 |
- numberGenotype(object, this.batch)[snps, ] <- Ns |
|
927 |
- rm(G, A, B, wA, wB, YA,YB, res, negA, negB, Np, Ns) |
|
928 |
- ##gc() |
|
929 | 991 |
} |
930 |
- nGt <- lapply(nGt, function(x){ colnames(x) <- c("AA", "AB", "BB"); return(x)}) |
|
931 |
- |
|
932 | 992 |
if(THR.NU.PHI){ |
933 | 993 |
nuA[nuA < MIN.NU] <- MIN.NU |
934 | 994 |
nuB[nuB < MIN.NU] <- MIN.NU |
... | ... |
@@ -943,34 +1003,21 @@ fit.lm1 <- function(strata.index, |
943 | 1003 |
## cB <- matrix(as.integer(cB*100), nrow(cB), ncol(cB)) |
944 | 1004 |
## CA(object)[snps, ] <- cA |
945 | 1005 |
## CB(object)[snps, ] <- cB |
946 |
- if(is.lds) lapply(lM(object), open) |
|
947 |
- flags(object)[snps, ] <- flags |
|
948 |
- tau2A(object) <- tau2A |
|
949 |
- tau2B(object) <- tau2B |
|
950 |
- sigma2A(object) <- sig2A |
|
951 |
- sigma2B(object) <- sig2B |
|
952 |
- nuA(object) <- nuA |
|
953 |
- nuB(object) <- nuB |
|
954 |
- phiA(object) <- phiA |
|
955 |
- phiB(object) <- phiB |
|
956 |
- corrAA(object) <- corrAA |
|
957 |
- corrBB(object) <- corrBB |
|
958 |
- corrAB(object) <- corrAB |
|
959 |
- if(is.lds) { |
|
960 |
- lapply(assayData(object), close) |
|
961 |
- lapply(lM(object), close) |
|
1006 |
+ nuA(object)[snps, ] <- nuA |
|
1007 |
+ nuB(object)[snps, ] <- nuB |
|
1008 |
+ phiA(object)[snps, ] <- phiA |
|
1009 |
+ phiB(object)[snps, ] <- phiB |
|
1010 |
+ if(is.lds){ |
|
1011 |
+ close(object) |
|
962 | 1012 |
return(TRUE) |
963 | 1013 |
} else{ |
964 | 1014 |
return(object) |
965 | 1015 |
} |
966 | 1016 |
} |
967 | 1017 |
|
968 |
-fit.lm2 <- function(strata.index, |
|
1018 |
+fit.lm2 <- function(strata, |
|
969 | 1019 |
index.list, |
970 |
- marker.index, |
|
971 | 1020 |
object, |
972 |
- Ns, |
|
973 |
- batchSize, |
|
974 | 1021 |
SNRMin, |
975 | 1022 |
MIN.SAMPLES, |
976 | 1023 |
MIN.OBS, |
... | ... |
@@ -979,107 +1026,238 @@ fit.lm2 <- function(strata.index, |
979 | 1026 |
THR.NU.PHI, |
980 | 1027 |
MIN.NU, |
981 | 1028 |
MIN.PHI, |
982 |
- verbose,...){ |
|
983 |
- physical <- get("physical") |
|
984 |
- if(verbose) message("Probe stratum ", strata.index, " of ", length(index.list)) |
|
985 |
- snps <- index.list[[strata.index]] |
|
986 |
- batches <- split(seq(along=batch(object)), batch(object)) |
|
1029 |
+ verbose, is.lds, CHR.X, ...){ |
|
1030 |
+ if(is.lds) {physical <- get("physical"); open(object)} |
|
1031 |
+ if(verbose) message("Probe stratum ", strata, " of ", length(index.list)) |
|
1032 |
+ marker.index <- index.list[[strata]] |
|
1033 |
+ batches <- split(seq_along(batch(object)), as.character(batch(object))) |
|
987 | 1034 |
batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
988 | 1035 |
|
989 |
- open(object) |
|
990 |
- open(snpflags) |
|
991 |
-## open(normal) |
|
992 | 1036 |
|
993 |
-## cA <- matrix(NA, length(snps), ncol(object)) |
|
994 | 1037 |
ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object)) |
995 |
- flags <- as.matrix(snpflags[,]) |
|
996 |
- noflags <- rowSums(flags, na.rm=TRUE) == 0 ##NA's for unevaluated batches |
|
997 |
- ## We do not want to write to discuss for each batch. More efficient to |
|
998 |
- ## write to disk after estimating these parameters for all batches. |
|
999 |
- nuA.np <- phiA.np <- sig2A.np <- matrix(NA, length(snps), length(unique(batch(object)))) |
|
1038 |
+ flags <- as.matrix(flags(object)[ii, ]) |
|
1039 |
+ fns <- featureNames(object)[ii] |
|
1040 |
+ fns.noflags <- fns[rowSums(flags, na.rm=T) == 0] |
|
1041 |
+ snp.index <- sample(match(fns.noflags, featureNames(object)), 5000) |
|
1042 |
+ |
|
1043 |
+ ##flags <- as.matrix(snpflags[,]) |
|
1044 |
+ ##noflags <- rowSums(flags, na.rm=TRUE) == 0 ##NA's for unevaluated batches |
|
1045 |
+ |
|
1046 |
+ nuA.np <- as.matrix(nuA(object)[marker.index, ]) |
|
1047 |
+ phiA.np <- as.matrix(phiA(object)[marker.index, ]) |
|
1048 |
+ tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, ]) |
|
1049 |
+ |
|
1050 |
+ ##nuA.np <- phiA.np <- sig2A.np <- matrix(NA, length(marker.index), length(unique(batch(object)))) |
|
1000 | 1051 |
## for imputation, we need the corresponding parameters of the snps |
1001 |
- NN <- min(10e3, length(which(ii & noflags))) |
|
1002 |
- snp.ind <- sample(which(ii & noflags), NN) |
|
1003 |
- nnuA.snp <- as.matrix(physical(lM(object))$nuA[snp.ind,]) |
|
1004 |
- pphiA.snp <- as.matrix(physical(lM(object))$phiA[snp.ind,]) |
|
1005 |
- nnuB.snp <- as.matrix(physical(lM(object))$nuB[snp.ind,]) |
|
1006 |
- pphiB.snp <- as.matrix(physical(lM(object))$phiB[snp.ind,]) |
|
1007 |
- |
|
1008 |
- AA.snp <- as.matrix(A(object)[snp.ind, ]) |
|
1009 |
- BB.snp <- as.matrix(B(object)[snp.ind, ]) |
|
1052 |
+ ##NN <- min(10e3, length(which(ii & noflags))) |
|
1053 |
+ ##snp.ind <- sample(which(ii & noflags), NN) |
|
1054 |
+ nuA.snp <- as.matrix(nuA(object)[snp.index, ]) |
|
1055 |
+ nuB.snp <- as.matrix(nuB(object)[snp.index, ]) |
|
1056 |
+ phiA.snp <- as.matrix(phiA(object)[snp.index, ]) |
|
1057 |
+ phiB.snp <- as.matrix(phiB(object)[snp.index, ]) |
|
1058 |
+ medianA.AA <- as.matrix(medianA.AA(object)[snp.index,]) |
|
1059 |
+ medianB.BB <- as.matrix(medianB.BB(object)[snp.index,]) |
|
1060 |
+ |
|
1061 |
+ |
|
1062 |
+ |
|
1063 |
+ medianA.AA.np <- as.matrix(medianA.AA(object)[marker.index,]) |
|
1064 |
+ |
|
1065 |
+## nnuA.snp <- as.matrix(physical(lM(object))$nuA[snp.ind,]) |
|
1066 |
+## pphiA.snp <- as.matrix(physical(lM(object))$phiA[snp.ind,]) |
|
1067 |
+## nnuB.snp <- as.matrix(physical(lM(object))$nuB[snp.ind,]) |
|
1068 |
+## pphiB.snp <- as.matrix(physical(lM(object))$phiB[snp.ind,]) |
|
1069 |
+ |
|
1070 |
+## AA.snp <- as.matrix(A(object)[snp.ind, ]) |
|
1071 |
+## BB.snp <- as.matrix(B(object)[snp.ind, ]) |
|
1010 | 1072 |
## NNORM.snp <- as.matrix(normal[snp.ind, ]) |
1011 | 1073 |
## NORM.np <- as.matrix(normal[snps, ]) |
1012 |
- AA.np <- as.matrix(A(object)[snps, ]) |
|
1013 |
- GG <- as.matrix(calls(object)[snp.ind, ]) |
|
1014 |
- CP <- as.matrix(snpCallProbability(object)[snp.ind, ]) |
|
1015 |
- for(k in batches){ |
|
1016 |
- ##if(verbose) message("SNP batch ", ii, " of ", length(batches)) |
|
1017 |
- J <- match(unique(batch(object)[k]), unique(batch(object))) |
|
1018 |
-## snp.index <- snp.ind & nuA[, J] > 20 & nuB[, J] > 20 & phiA[, J] > 20 & phiB[, J] > 20 |
|
1019 |
-## if(sum(snp.index) >= 5000){ |
|
1020 |
-## snp.index <- sample(which(snp.index), 5000) |
|
1021 |
-## } else snp.index <- which(snp.index) |
|
1022 |
- phiA.snp <- pphiA.snp[, J] |
|
1023 |
- phiB.snp <- pphiB.snp[, J] |
|
1024 |
- A.snp <- AA.snp[, k] |
|
1025 |
- B.snp <- BB.snp[, k] |
|
1026 |
- NORM.snp <- NNORM.snp[, k] |
|
1027 |
- G <- GG[, k] |
|
1028 |
- xx <- CP[, k] |
|
1029 |
- highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1030 |
- G <- G*highConf*NORM.snp |
|
1031 |
- G[G==0] <- NA |
|
1074 |
+## AA.np <- as.matrix(A(object)[marker.index, ]) |
|
1075 |
+## GG <- as.matrix(calls(object)[snp.ind, ]) |
|
1076 |
+## CP <- as.matrix(snpCallProbability(object)[snp.ind, ]) |
|
1077 |
+ for(k in seq_along(batches)){ |
|
1078 |
+ B <- batches[[k]] |
|
1079 |
+ this.batch <- unique(as.character(batch(object)[B])) |
|
1080 |
+## phiA.snp <- pphiA.snp[, J] |
|
1081 |
+## phiB.snp <- pphiB.snp[, J] |
|
1082 |
+## A.snp <- AA.snp[, k] |
|
1083 |
+## B.snp <- BB.snp[, k] |
|
1084 |
+## NORM.snp <- NNORM.snp[, k] |
|
1085 |
+## G <- GG[, k] |
|
1086 |
+## xx <- CP[, k] |
|
1087 |
+## highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1088 |
+## G <- G*highConf*NORM.snp |
|
1089 |
+## G[G==0] <- NA |
|
1032 | 1090 |
##nonpolymorphic |
1033 |
- A.np <- AA.np[, k] |
|
1034 |
- Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G) |
|
1035 |
- muA <- applyByGenotype(A.snp, rowMedians, G) |
|
1036 |
- muB <- applyByGenotype(B.snp, rowMedians, G) |
|
1037 |
- muA <- muA[, 1] |
|
1038 |
- muB <- muB[, 3] |
|
1039 |
- X <- cbind(1, log2(c(muA, muB))) |
|
1091 |
+## A.np <- AA.np[, k] |
|
1092 |
+## Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G) |
|
1093 |
+## muA <- applyByGenotype(A.snp, rowMedians, G) |
|
1094 |
+## muB <- applyByGenotype(B.snp, rowMedians, G) |
|
1095 |
+## muA <- muA[, 1] |
|
1096 |
+## muB <- muB[, 3] |
|
1097 |
+## X <- cbind(1, log2(c(muA, muB))) |
|
1098 |
+ X <- cbind(1, log2(c(medianA.AA[, k], medianB.BB[, k]))) |
|
1040 | 1099 |
Y <- log2(c(phiA.snp, phiB.snp)) |
1041 | 1100 |
betahat <- solve(crossprod(X), crossprod(X, Y)) |
1042 | 1101 |
## |
1043 |
- mus <- rowMedians(A.np * NORM.np[, k], na.rm=TRUE) |
|
1044 |
- crosshyb <- max(median(muA) - median(mus), 0) |
|
1045 |
- X <- cbind(1, log2(mus+crosshyb)) |
|
1102 |
+## mus <- rowMedians(A.np * NORM.np[, k], na.rm=TRUE) |
|
1103 |
+## averaging across markers, is there a difference in the |
|
1104 |
+## typical AA intensity for SNPs and the AA intensity for |
|
1105 |
+## nonpolymorphic loci |
|
1106 |
+## crosshyb <- max(median(muA) - median(mus), 0) |
|
1107 |
+ crosshyb <- max(median(medianA.AA[, k]) - median(medianA.AA.np[, k]), 0) |
|
1108 |
+## X <- cbind(1, log2(mus+crosshyb)) |
|
1109 |
+ X <- cbind(1, log2(medianA.AA.np[, k] + crosshyb)) |
|
1046 | 1110 |
logPhiT <- X %*% betahat |
1047 |
- phiA.np[, J] <- 2^(logPhiT) |
|
1048 |
- nuA.np[, J] <- mus-2*phiA.np[, J] |
|
1049 |
- if(THR.NU.PHI){ |
|
1050 |
- nuA.np[nuA.np[, J] < MIN.NU, J] <- MIN.NU |
|
1051 |
- phiA.np[phiA.np[, J] < MIN.PHI, J] <- MIN.PHI |
|
1052 |
- } |
|
1111 |
+ phiA.np[, k] <- 2^(logPhiT) |
|
1112 |
+ nuA.np[, k] <- medianA.AA.np[,k]-2*phiA.np[, k] |
|
1053 | 1113 |
## cA[, k] <- 1/phiA.np[, J] * (A.np - nuA.np[, J]) |
1054 |
- sig2A.np[, J] <- rowMAD(log2(A.np*NORM.np[, k]), na.rm=TRUE) |
|
1055 |
- rm(NORM.snp, highConf, xx, G, Ns, A.np, X, Y, betahat, mus, logPhiT) |
|
1056 |
- gc() |
|
1114 |
+## sig2A.np[, J] <- rowMAD(log2(A.np*NORM.np[, k]), na.rm=TRUE) |
|
1115 |
+## rm(NORM.snp, highConf, xx, G, Ns, A.np, X, Y, betahat, mus, logPhiT) |
|
1116 |
+## gc() |
|
1117 |
+ } |
|
1118 |
+ if(THR.NU.PHI){ |
|
1119 |
+ nuA.np[nuA.np < MIN.NU] <- MIN.NU |
|
1120 |
+ phiA.np[phiA.np < MIN.PHI] <- MIN.PHI |
|
1057 | 1121 |
} |
1058 | 1122 |
## cA[cA < 0.05] <- 0.05 |
1059 | 1123 |
## cA[cA > 5] <- 5 |
1060 | 1124 |
## cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA)) |
1061 |
-## CA(object)[snps, ] <- cA |
|
1062 |
- tmp <- physical(lM(object))$nuA |
|
1063 |
- tmp[snps, ] <- nuA.np |
|
1064 |
- lM(object)$nuA <- tmp |
|
1065 |
- tmp <- physical(lM(object))$sig2A |
|
1066 |
- tmp[snps, ] <- sig2A.np |
|
1067 |
- lM(object)$sig2A <- tmp |
|
1068 |
- tmp <- physical(lM(object))$phiA |
|
1069 |
- tmp[snps, ] <- phiA.np |
|
1070 |
- lM(object)$sig2A <- tmp |
|
1071 |
- lapply(assayData(object), close) |
|
1072 |
- lapply(lM(object), close) |
|
1073 |
- TRUE |
|
1125 |
+ nuA(object)[marker.index, ] <- nuA.np |
|
1126 |
+ phiA(object)[marker.index, ] <- phiA.np |
|
1127 |
+ if(is.lds) { close(object); return(TRUE)} |
|
1128 |
+ return(object) |
|
1074 | 1129 |
} |
1075 | 1130 |
|
1131 |
+summarizeMaleXNps <- function(marker.index, |
|
1132 |
+ batches, |
|
1133 |
+ object, MIN.SAMPLES){ |
|
1134 |
+ nr <- length(marker.index) |
|
1135 |
+ nc <- length(batchNames(object)) |
|
1136 |
+ NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc) |
|
1137 |
+ gender <- object$gender |
|
1138 |
+ AA <- as.matrix(A(object)[marker.index, gender==1]) |
|
1139 |
+ madA.AA <- medianA.AA <- matrix(NA, nr, nc) |
|
1140 |
+ numberMenPerBatch <- rep(NA, nc) |
|
1141 |
+ for(k in seq_along(batches)){ |
|
1142 |
+ B <- batches[[k]] |
|
1143 |
+ this.batch <- unique(as.character(batch(object)[B])) |
|
1144 |
+ gender <- object$gender[B] |
|
1145 |
+ if(sum(gender==1) < MIN.SAMPLES) next() |
|
1146 |
+ sns.batch <- sampleNames(object)[B] |
|
1147 |
+ ##subset GG apppriately |
|
1148 |
+ sns <- colnames(AA) |
|
1149 |
+ J <- sns%in%sns.batch |
|
1150 |
+ numberMenPerBatch[k] <- length(J) |
|
1151 |
+ medianA.AA[, k] <- rowMedians(AA[, J], na.rm=TRUE) |
|
1152 |
+ madA.AA[, k] <- rowMAD(AA[, J], na.rm=TRUE) |
|
1153 |
+ } |
|
1154 |
+ return(list(medianA.AA=medianA.AA, |
|
1155 |
+ madA.AA=madA.AA)) |
|
1156 |
+} |
|
1157 |
+ |
|
1158 |
+ |
|
1159 |
+summarizeMaleXGenotypes <- function(marker.index, |
|
1160 |
+ batches, |
|
1161 |
+ object, |
|
1162 |
+ GT.CONF.THR, |
|
1163 |
+ MIN.OBS, |
|
1164 |
+ MIN.SAMPLES, |
|
1165 |
+ verbose, |
|
1166 |
+ is.lds, |
|
1167 |
+ DF.PRIOR,...){ |
|
1168 |
+ nr <- length(marker.index) |
|
1169 |
+ nc <- length(batchNames(object)) |
|
1170 |
+ NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc) |
|
1171 |
+ gender <- object$gender |
|
1172 |
+ GG <- as.matrix(calls(object)[marker.index, gender==1]) |
|
1173 |
+ CP <- as.matrix(snpCallProbability(object)[marker.index, gender==1]) |
|
1174 |
+ AA <- as.matrix(A(object)[marker.index, gender==1]) |
|
1175 |
+ BB <- as.matrix(B(object)[marker.index, gender==1]) |
|
1176 |
+ for(k in seq_along(batches)){ |
|
1177 |
+ B <- batches[[k]] |
|
1178 |
+ this.batch <- unique(as.character(batch(object)[B])) |
|
1179 |
+ gender <- object$gender[B] |
|
1180 |
+ if(sum(gender==1) < MIN.SAMPLES) next() |
|
1181 |
+ sns.batch <- sampleNames(object)[B] |
|
1182 |
+ ##subset GG apppriately |
|
1183 |
+ sns <- colnames(GG) |
|
1184 |
+ J <- sns%in%sns.batch |
|
1185 |
+ G <- GG[, J] |
|
1186 |
+ xx <- CP[, J] |
|
1187 |
+ highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1188 |
+ G <- G*highConf |
|
1189 |
+ A <- AA[, J] |
|
1190 |
+ B <- BB[, J] |
|
1191 |
+ G.AA <- G==1 |
|
1192 |
+ G.AA[G.AA==FALSE] <- NA |
|
1193 |
+ G.AB <- G==2 |
|
1194 |
+ G.AB[G.AB==FALSE] <- NA |
|
1195 |
+ G.BB <- G==3 |
|
1196 |
+ G.BB[G.BB==FALSE] <- NA |
|
1197 |
+ N.AA.M <- rowSums(G.AA, na.rm=TRUE) |
|
1198 |
+ N.AB.M <- rowSums(G.AB, na.rm=TRUE) |
|
1199 |
+ N.BB.M <- rowSums(G.BB, na.rm=TRUE) |
|
1200 |
+ summaryStats <- function(X, INT, FUNS){ |
|
1201 |
+ tmp <- matrix(NA, nrow(X), length(FUNS)) |
|
1202 |
+ for(j in seq_along(FUNS)){ |
|
1203 |
+ FUN <- match.fun(FUNS[j]) |
|
1204 |
+ tmp[, j] <- FUN(X*INT, na.rm=TRUE) |
|
1205 |
+ } |
|
1206 |
+ tmp |
|
1207 |
+ } |
|
1208 |
+ statsA.AA <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD")) |
|
1209 |
+ statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD")) |
|
1210 |
+ statsA.BB <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD")) |
|
1211 |
+ statsB.AA <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD")) |
|
1212 |
+ statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD")) |
|
1213 |
+ statsB.BB <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD")) |
|
1214 |
+ medianA <- cbind(statsA.AA[, 1], statsA.AB[, 1], statsA.BB[, 1]) |
|
1215 |
+ medianB <- cbind(statsB.AA[, 1], statsB.AB[, 1], statsB.BB[, 1]) |
|
1216 |
+ madA <- cbind(statsA.AA[, 1], statsA.AB[, 1], statsA.BB[, 1]) |
|
1217 |
+ madB <- cbind(statsB.AA[, 1], statsB.AB[, 1], statsB.BB[, 1]) |
|
1218 |
+ rm(statsA.AA, statsA.AB, statsA.BB, statsB.AA, statsB.AB, statsB.BB) |
|
1219 |
+ |
|
1220 |
+## A <- log2(A); B <- log2(B) |
|
1221 |
+## tau2A.AA <- summaryStats(G.AA, A, FUNS="rowMAD")^2 |
|
1222 |
+## tau2A.BB <- summaryStats(G.BB, A, FUNS="rowMAD")^2 |
|
1223 |
+## tau2B.AA <- summaryStats(G.AA, B, FUNS="rowMAD")^2 |
|
1224 |
+## tau2B.BB <- summaryStats(G.BB, B, FUNS="rowMAD")^2 |
|
1225 |
+ ##tau2A <- cbind(tau2A.AA, tau2A.BB) |
|
1226 |
+ ##tau2B <- cbind(tau2B.AA, tau2B.BB) |
|
1227 |
+ NN.M <- cbind(N.AA.M, N.AB.M, N.BB.M) |
|
1228 |
+ NN.Mlist[[k]] <- NN.M |
|
1229 |
+ |
|
1230 |
+ shrink.madA[[k]] <- shrink(madA, NN.M, DF.PRIOR) |
|
1231 |
+ shrink.madB[[k]] <- shrink(madB, NN.M, DF.PRIOR) |
|
1232 |
+ |
|
1233 |
+## shrink.tau2A.BB[, k] <- shrink(tau2A.BB[, k, drop=FALSE], NN.M[, 3], DF.PRIOR)[, drop=FALSE] |
|
1234 |
+## shrink.tau2B.AA[, k] <- shrink(tau2B.AA[, k, drop=FALSE], NN.M[, 1], DF.PRIOR)[, drop=FALSE] |
|
1235 |
+## shrink.tau2A.AA[, k] <- shrink(tau2A.AA[, k, drop=FALSE], NN.M[, 1], DF.PRIOR)[, drop=FALSE] |
|
1236 |
+## shrink.tau2B.BB[, k] <- shrink(tau2B.BB[, k, drop=FALSE], NN.M[, 3], DF.PRIOR)[, drop=FALSE] |
|
1076 | 1237 |
|
1077 |
-fit.lm3 <- function(strata.index, |
|
1238 |
+ ##--------------------------------------------------------------------------- |
|
1239 |
+ ## SNPs that we'll use for imputing location/scale of unobserved genotypes |
|
1240 |
+ ##--------------------------------------------------------------------------- |
|
1241 |
+ index.complete <- indexComplete(NN.M[, -2], medianA, medianB, MIN.OBS) |
|
1242 |
+ |
|
1243 |
+ ##--------------------------------------------------------------------------- |
|
1244 |
+ ## Impute sufficient statistics for unobserved genotypes (plate-specific) |
|
1245 |
+ ##--------------------------------------------------------------------------- |
|
1246 |
+ res <- imputeCenterX(medianA, medianB, NN.M, index.complete, MIN.OBS) |
|
1247 |
+ imputed.medianA[[k]] <- res[[1]] |
|
1248 |
+ imputed.medianB[[k]] <- res[[2]] |
|
1249 |
+ } |
|
1250 |
+ return(list(madA=shrink.madA, |
|
1251 |
+ madB=shrink.madB, |
|
1252 |
+ NN.M=NN.Mlist, |
|
1253 |
+ medianA=imputed.medianA, |
|
1254 |
+ medianB=imputed.medianB)) |
|
1255 |
+} |
|
1256 |
+ |
|
1257 |
+## X chromosome, SNPs |
|
1258 |
+fit.lm3 <- function(strata, |
|
1078 | 1259 |
index.list, |
1079 |
- marker.index, |
|
1080 | 1260 |
object, |
1081 |
- Ns, |
|
1082 |
- batchSize, |
|
1083 | 1261 |
SNRMin, |
1084 | 1262 |
MIN.SAMPLES, |
1085 | 1263 |
MIN.OBS, |
... | ... |
@@ -1088,235 +1266,154 @@ fit.lm3 <- function(strata.index, |
1088 | 1266 |
THR.NU.PHI, |
1089 | 1267 |
MIN.NU, |
1090 | 1268 |
MIN.PHI, |
1091 |
- verbose, ...){ |
|
1092 |
- physical <- get("physical") |
|
1093 |
- if(verbose) message("Probe stratum ", strata.index, " of ", length(index.list)) |
|
1094 |
- snps <- index.list[[strata.index]] |
|
1095 |
- batches <- split(seq(along=batch(object)), batch(object)) |
|
1096 |
- batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
1097 |
- |
|
1098 |
- open(snpflags) |
|
1099 |
- open(normal) |
|
1100 |
- open(object) |
|
1101 |
- corrAB <- corrBB <- corrAA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batch(object)))) |
|
1102 |
- phiA2 <- phiB2 <- tau2A |
|
1103 |
- flags <- nuA <- nuB <- phiA <- phiB <- corrAB |
|
1104 |
-## cB <- cA <- matrix(NA, length(snps), ncol(object)) |
|
1269 |
+ verbose, is.lds, CHR.X, ...){ |
|
1270 |
+ if(is.lds) {physical <- get("physical"); open(object)} |
|
1271 |
+ if(verbose) message("Probe stratum ", strata, " of ", length(index.list)) |
|
1105 | 1272 |
gender <- object$gender |
1106 |
- IX <- matrix(gender, length(snps), ncol(object)) |
|
1107 |
- NORM <- normal[snps,] |
|
1108 |
- IX <- IX==2 |
|
1109 |
- |
|
1110 |
- GG <- as.matrix(calls(object)[snps, ]) |
|
1111 |
- CP <- as.matrix(snpCallProbability(object)[snps,]) |
|
1112 |
- AA <- as.matrix(A(object)[snps, ]) |
|
1113 |
- BB <- as.matrix(B(object)[snps, ]) |
|
1114 |
- for(k in batches){ |
|
1115 |
- ##if(verbose) message("SNP batch ", ii, " of ", length(batches)) |
|
1116 |
- ## within-genotype moments |
|
1117 |
- gender <- object$gender[k] |
|
1118 |
- G <- GG[, k] |
|
1119 |
- xx <- CP[, k] |
|
1120 |
- highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1121 |
- G <- G*highConf*NORM[, k] |
|
1122 |
- A <- AA[, k] |
|
1123 |
- B <- BB[, k] |
|
1124 |
- ##index <- GT.B <- GT.A <- vector("list", 3) |
|
1125 |
- ##names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB") |
|
1126 |
- Ns.F <- applyByGenotype(matrix(1, nrow(G), sum(gender==2)), rowSums, G[, gender==2]) |
|
1127 |
- Ns.M <- applyByGenotype(matrix(1, nrow(G), sum(gender==1)), rowSums, G[, gender==1]) |
|
1128 |
- Ns <- cbind(Ns.M[, 1], Ns.M[, 3], Ns.F) |
|
1129 |
- muA.F <- applyByGenotype(A[, gender==2], rowMedians, G[, gender==2]) |
|
1130 |
- muA.M <- applyByGenotype(A[, gender==1], rowMedians, G[, gender==1]) |
|
1131 |
- muB.F <- applyByGenotype(B[, gender==2], rowMedians, G[, gender==2]) |
|
1132 |
- muB.M <- applyByGenotype(B[, gender==1], rowMedians, G[, gender==1]) |
|
1133 |
- vA.F <- applyByGenotype(A[, gender==2], rowMAD, G[, gender==2]) |
|
1134 |
- vB.F <- applyByGenotype(B[, gender==2], rowMAD, G[, gender==2]) |
|
1135 |
- vA.M <- applyByGenotype(A[, gender==1], rowMAD, G[, gender==1]) |
|
1136 |
- vB.M <- applyByGenotype(B[, gender==1], rowMAD, G[, gender==1]) |
|
1137 |
- vA.F <- shrink(vA.F, Ns.F, DF.PRIOR) |
|
1138 |
- vA.M <- shrink(vA.M, Ns.M, DF.PRIOR) |
|
1139 |
- vB.F <- shrink(vB.F, Ns.F, DF.PRIOR) |
|
1140 |
- vB.M <- shrink(vB.M, Ns.M, DF.PRIOR) |
|
1141 |
- ##location and scale |
|
1142 |
- J <- match(unique(batch(object)[k]), unique(batch(object))) |
|
1143 |
- ##background variance for alleleA |
|
1144 |
- taus <- applyByGenotype(log2(A[, gender==2]), rowMAD, G[, gender==2])^2 |
|
1145 |
- tau2A[, J] <- shrink(taus[, 3, drop=FALSE], Ns.F[, 3], DF.PRIOR) |
|
1146 |
- sig2A[, J] <- shrink(taus[, 1, drop=FALSE], Ns.F[, 1], DF.PRIOR) |
|
1147 |
- taus <- applyByGenotype(log2(B[, gender==2]), rowMAD, G[, gender==2])^2 |
|
1148 |
- tau2B[, J] <- shrink(taus[, 3, drop=FALSE], Ns.F[, 1], DF.PRIOR) |
|
1149 |
- sig2B[, J] <- shrink(taus[, 1, drop=FALSE], Ns.F[, 3], DF.PRIOR) |
|
1150 |
- corrAB[, J] <- corByGenotype(A=A[, gender==2], B=B[, gender==2], G=G[, gender==2], Ns=Ns.F, which.cluster=2, DF.PRIOR) |
|
1151 |
- corrAA[, J] <- corByGenotype(A=A[, gender==2], B=B[, gender==2], G=G[, gender==2], Ns=Ns.F, which.cluster=1, DF.PRIOR) |
|
1152 |
- corrBB[, J] <- corByGenotype(A=A[, gender==2], B=B[, gender==2], G=G[, gender==2], Ns=Ns.F, which.cluster=3, DF.PRIOR) |
|
1153 |
- ##formerly oneBatch()... |
|
1154 |
- ##--------------------------------------------------------------------------- |
|
1155 |
- ## Impute sufficient statistics for unobserved genotypes (plate-specific) |
|
1156 |
- ##--------------------------------------------------------------------------- |
|
1157 |
- index <- apply(Ns.F, 2, function(x, MIN.OBS) which(x >= MIN.OBS), MIN.OBS) |
|
1158 |
- correct.orderA <- muA.F[, 1] > muA.F[, 3] |
|
1159 |
- correct.orderB <- muB.F[, 3] > muB.F[, 1] |
|
1160 |
- index.complete <- intersect(which(correct.orderA & correct.orderB), intersect(index[[1]], intersect(index[[2]], index[[3]]))) |
|
1161 |
- size <- min(5000, length(index.complete)) |
|
1162 |
- if(length(index.complete) < 200){ |
|
1163 |
- warning("fewer than 200 snps pass criteria for predicting the sufficient statistics") |
|
1164 |
- return() |
|
1273 |
+ enough.males <- sum(gender==1) > MIN.SAMPLES |
|
1274 |
+ enough.females <- sum(gender==2) > MIN.SAMPLES |
|
1275 |
+ if(!enough.males & !enough.females){ |
|
1276 |
+ message(paste("fewer than", MIN.SAMPLES, "men and women. Copy number not estimated for CHR X")) |
|
1277 |
+ return(object) |
|
1278 |
+ } |
|
1279 |
+ marker.index <- index.list[[strata]] |
|
1280 |
+ batches <- split(seq_along(batch(object)), as.character(batch(object))) |
|
1281 |
+ batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
1282 |
+ nuA <- as.matrix(nuA(object)[marker.index, ]) |
|
1283 |
+ nuB <- as.matrix(nuB(object)[marker.index, ]) |
|
1284 |
+ phiA <- as.matrix(phiA(object)[marker.index, ]) |
|
1285 |
+ phiB <- as.matrix(phiB(object)[marker.index, ]) |
|
1286 |
+ phiA2 <- as.matrix(phiPrimeA(object)[marker.index, ]) |
|
1287 |
+ phiB2 <- as.matrix(phiPrimeB(object)[marker.index, ]) |
|
1288 |
+ if(enough.males){ |
|
1289 |
+ res <- summarizeMaleXGenotypes(marker.index=marker.index, batches=batches, |
|
1290 |
+ object=object, GT.CONF.THR=GT.CONF.THR, |
|
1291 |
+ MIN.SAMPLES=MIN.SAMPLES, |
|
1292 |
+ MIN.OBS=MIN.OBS, |
|
1293 |
+ verbose=verbose, is.lds=is.lds, |
|
1294 |
+ DF.PRIOR=DF.PRIOR/2) |
|
1295 |
+ madA.Mlist <- res[["madA"]] |
|
1296 |
+ madB.Mlist <- res[["madB"]] |
|
1297 |
+ medianA.Mlist <- res[["medianA"]] |
|
1298 |
+ medianB.Mlist <- res[["medianB"]] |
|
1299 |
+ NN.Mlist <- res[["NN.M"]] |
|
1300 |
+ rm(res) |
|
1301 |
+ ## Need N, median, mad |
|
1302 |
+ } |
|
1303 |
+ if(enough.females){ |
|
1304 |
+ N.AA.F <- as.matrix(N.AA(object)[marker.index, ]) |
|
1305 |
+ N.AB.F <- as.matrix(N.AB(object)[marker.index, ]) |
|
1306 |
+ N.BB.F <- as.matrix(N.BB(object)[marker.index, ]) |
|
1307 |
+ medianA.AA <- as.matrix(medianA.AA(object)[marker.index,]) |
|
1308 |
+ medianA.AB <- as.matrix(medianA.AB(object)[marker.index,]) |
|
1309 |
+ medianA.BB <- as.matrix(medianA.BB(object)[marker.index,]) |
|
1310 |
+ medianB.AA <- as.matrix(medianB.AA(object)[marker.index,]) |
|
1311 |
+ medianB.AB <- as.matrix(medianB.AB(object)[marker.index,]) |
|
1312 |
+ medianB.BB <- as.matrix(medianB.BB(object)[marker.index,]) |
|
1313 |
+ madA.AA <- as.matrix(madA.AA(object)[marker.index,]) |
|
1314 |
+ madA.AB <- as.matrix(madA.AB(object)[marker.index,]) |
|
1315 |
+ madA.BB <- as.matrix(madA.BB(object)[marker.index,]) |
|
1316 |
+ madB.AA <- as.matrix(madB.AA(object)[marker.index,]) |
|
1317 |
+ madB.AB <- as.matrix(madB.AB(object)[marker.index,]) |
|
1318 |
+ madB.BB <- as.matrix(madB.BB(object)[marker.index,]) |
|
1319 |
+ } |
|
1320 |
+ for(k in seq_along(batches)){ |
|
1321 |
+ B <- batches[[k]] |
|
1322 |
+ this.batch <- unique(as.character(batch(object)[B])) |
|
1323 |
+ gender <- object$gender[B] |
|
1324 |
+ enough.men <- sum(gender==1) >= MIN.SAMPLES |
|
1325 |
+ enough.women <- sum(gender==2) >= MIN.SAMPLES |
|
1326 |
+ if(!enough.men & !enough.women) { |
|
1327 |
+ if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. ")) |
|
1328 |
+ next() |
|
1165 | 1329 |
} |
1166 |
- if(size==5000) index.complete <- sample(index.complete, size) |
|
1167 |
- index <- vector("list", 3) |
|
1168 |
- index[[1]] <- which(Ns.F[, 1] == 0 & (Ns.F[, 2] >= MIN.OBS & Ns.F[, 3] >= MIN.OBS)) |
|
1169 |
- index[[2]] <- which(Ns.F[, 2] == 0 & (Ns.F[, 1] >= MIN.OBS & Ns.F[, 3] >= MIN.OBS)) |
|
1170 |
- index[[3]] <- which(Ns.F[, 3] == 0 & (Ns.F[, 2] >= MIN.OBS & Ns.F[, 1] >= MIN.OBS)) |
|
1171 |
- res <- imputeCenter(muA.F, muB.F, index.complete, index) |
|
1172 |
- muA.F <- res[[1]] |
|
1173 |
- muB.F <- res[[2]] |
|
1174 |
- nobsA <- Ns.M[, 1] > MIN.OBS |
|
1175 |
- nobsB <- Ns.M[, 3] > MIN.OBS |
|
1176 |
- notMissing <- !(is.na(muA.M[, 1]) | is.na(muA.M[, 3]) | is.na(muB.M[, 1]) | is.na(muB.M[, 3])) |
|
1177 |
- complete <- list() |
|
1178 |
- complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here |
|
1179 |
- complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here |
|
1180 |
- size <- min(5000, length(complete[[1]])) |
|
1181 |
- if(size > 5000) complete <- lapply(complete, function(x) sample(x, size)) |
|
1182 |
- ## |
|
1183 |
- res <- imputeCenterX(muA.M, muB.M, Ns.M, complete, MIN.OBS) |
|
1184 |
- muA.M <- res[[1]] |
|
1185 |
- muB.M <- res[[2]] |
|
1186 |
- ## |
|
1187 |
- ## Monomorphic SNPs. Mixture model may be better |
|
1188 |
- ## Improve estimation by borrowing strength across batch |
|
1189 |
- noAA <- Ns.F[, 1] < MIN.OBS |
|
1190 |
- noAB <- Ns.F[, 2] < MIN.OBS |
|
1191 |
- noBB <- Ns.F[, 3] < MIN.OBS |
|
1192 |
- index[[1]] <- noAA & noAB |
|
1193 |
- index[[2]] <- noBB & noAB |
|
1194 |
- index[[3]] <- noAA & noBB |
|
1195 |
- cols <- c(3, 1, 2) |
|
1196 |
- for(j in 1:3){ |
|
1197 |
- if(sum(index[[j]]) == 0) next() |
|
1198 |
- kk <- cols[j] |
|
1199 |
- X <- cbind(1, muA.F[index.complete, kk], muB.F[index.complete, kk]) |
|
1200 |
- Y <- cbind(muA.F[index.complete, -kk], |
|
1201 |
- muB.F[index.complete, -kk]) |
|
1202 |
- betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
1203 |
- X <- cbind(1, muA.F[index[[j]], kk], muB.F[index[[j]], kk]) |
|
1204 |
- mus <- X %*% betahat |
|
1205 |
- muA.F[index[[j]], -kk] <- mus[, 1:2] |
|
1206 |
- muB.F[index[[j]], -kk] <- mus[, 3:4] |
|
1330 |
+ if(enough.women){ |
|
1331 |
+ medianA.F <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k]) |
|
1332 |
+ medianB.F <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k]) |
|
1333 |
+ madA.F <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k]) |
|
1334 |
+ madB.F <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k]) |
|
1335 |
+ NN.F <- cbind(N.AA.F[, k], N.AB.F[, k], N.BB.F[, k]) |
|
1336 |
+ } |
|
1337 |
+ if(enough.men){ |
|
1338 |
+ madA.M <- madA.Mlist[[k]] |
|
1339 |
+ madB.M <- madB.Mlist[[k]] |
|
1340 |
+ medianA.M <- medianA.Mlist[[k]] |
|
1341 |
+ medianB.M <- medianB.Mlist[[k]] |
|
1342 |
+ NN.M <- NN.Mlist[[k]] |
|
1343 |
+ } |
|
1344 |
+ if(enough.men & enough.women){ |
|
1345 |
+ betas <- fit.wls(cbind(NN.M[, c(1,3)], NN.F), |
|
1346 |
+ sigma=cbind(madA.M[, c(1,3)], madA.F), |
|
1347 |
+ allele="A", |
|
1348 |
+ Y=cbind(medianA.M[, c(1,3)], medianA.F), |
|
1349 |
+ autosome=FALSE) |
|
1350 |
+ nuA[, k] <- betas[1, ] |
|
1351 |
+ phiA[, k] <- betas[2, ] |
|
1352 |
+ phiA2[, k] <- betas[3, ] |
|
1353 |
+ betas <- fit.wls(cbind(NN.M[, c(1,3)], NN.F), |
|
1354 |
+ sigma=cbind(madB.M[, c(1,3)], madB.F), |
|
1355 |
+ allele="B", |
|
1356 |
+ Y=cbind(medianB.M[, c(1,3)], medianB.F), |
|
1357 |
+ autosome=FALSE) |
|
1358 |
+ nuB[, k] <- betas[1, ] |
|
1359 |
+ phiB[, k] <- betas[2, ] |
|
1360 |
+ phiB2[, k] <- betas[3, ] |
|
1207 | 1361 |
} |
1208 |
- negA <- rowSums(muA.F < 0) > 0 |
|
1209 |
- negB <- rowSums(muB.F < 0) > 0 |
|
1210 |
- flags[, J] <- rowSums(Ns.F == 0) > 0 | negA | negB |
|
1211 |
- ##flags[, J] <- index[[1]] | index[[2]] | index[[3]] | rowSums( |
|
1212 |
- ##formerly coefs() |
|
1213 |
- Np <- cbind(Ns.M[, c(1,3)], Ns.F) |
|
1214 |
- Np[Np < 1] <- 1 |
|
1215 |
- vA <- cbind(vA.M[, c(1, 3)], vA.F) |
|
1216 |
- vB <- cbind(vB.M[, c(1, 3)], vB.F) |
|
1217 |
- muA <- cbind(muA.M[, c(1,3)], muA.F) |
|
1218 |
- muB <- cbind(muB.M[, c(1,3)], muB.F) |
|
1219 |
- vA2 <- vA^2/Np |
|
1220 |
- vB2 <- vB^2/Np |
|
1221 |
- wA <- sqrt(1/vA2) |
|
1222 |
- wB <- sqrt(1/vB2) |
|
1223 |
- YA <- muA*wA |
|
1224 |
- YB <- muB*wB |
|
1225 |
- ##res <- nuphiAlleleX(allele="A", Ystar=YA, W=wA) |
|
1226 |
- betas <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns, autosome=FALSE) |
|
1227 |
- nuA[, J] <- betas[1, ] |
|
1228 |
- phiA[, J] <- betas[2, ] |
|
1229 |
- phiA2[, J] <- betas[3, ] |
|
1230 |
- rm(betas) |
|
1231 |
- betas <- fit.wls(allele="B", Ystar=YB, W=wB, Ns=Ns, autosome=FALSE) |
|
1232 |
- nuB[, J] <- betas[1, ] |
|
1233 |
- phiB[, J] <- betas[2, ] |
|
1234 |
- phiB2[, J] <- betas[3, ] |
|
1235 |
- if(THR.NU.PHI){ |
|
1236 |
- nuA[nuA[, J] < MIN.NU, J] <- MIN.NU |
|
1237 |
- nuB[nuB[, J] < MIN.NU, J] <- MIN.NU |
|
1238 |
- phiA[phiA[, J] < MIN.PHI, J] <- MIN.PHI |
|
1239 |
- phiA2[phiA2[, J] < MIN.PHI, J] <- MIN.PHI |
|
1240 |
- phiB[phiB[, J] < MIN.PHI, J] <- MIN.PHI |
|
1241 |
- phiB2[phiB2[, J] < MIN.PHI, J] <- MIN.PHI |
|
1362 |
+ if(enough.men & !enough.women){ |
|
1363 |
+ betas <- fit.wls(NN.M[, c(1,3)], |
|
1364 |
+ sigma=madA.M[, c(1,3)], |
|
1365 |
+ allele="A", |
|
1366 |
+ Y=medianA.M[, c(1,3)], |
|
1367 |
+ autosome=FALSE, |
|
1368 |
+ X=cbind(1, c(0, 1))) |
|
1369 |
+ nuA[, k] <- betas[1, ] |
|
1370 |
+ phiA[, k] <- betas[2, ] |
|
1371 |
+ betas <- fit.wls(NN.M[, c(1,3)], |
|
1372 |
+ sigma=madB.M[, c(1,3)], |
|
1373 |
+ allele="B", |
|
1374 |
+ Y=medianB.M[, c(1,3)], |
|
1375 |
+ autosome=FALSE, |
|
1376 |
+ X=cbind(1, c(0, 1))) |
|
1377 |
+ nuB[, k] <- betas[1, ] |
|
1378 |
+ phiB[, k] <- betas[2, ] |
|
1379 |
+ } |
|
1380 |
+ if(!enough.men & enough.women){ |
|
1381 |
+ betas <- fit.wls(NN.F, |
|
1382 |
+ sigma=madA.F, |
|
1383 |
+ allele="A", |
|
1384 |
+ Y=medianA.F, |
|
1385 |
+ autosome=TRUE) ## can just use the usual design matrix for the women-only analysis |
|
1386 |
+ nuA[, k] <- betas[1, ] |
|
1387 |
+ phiA[, k] <- betas[2, ] |
|
1388 |
+ betas <- fit.wls(NN.F, |
|
1389 |
+ sigma=madB.F, |
|
1390 |
+ allele="B", |
|
1391 |
+ Y=medianB.F, |
|
1392 |
+ autosome=TRUE) ## can just use the usual design matrix for the women-only analysis |
|
1393 |
+ nuB[, k] <- betas[1, ] |
|
1394 |
+ phiB[, k] <- betas[2, ] |
|
1242 | 1395 |
} |
1243 |
- phistar <- phiB2[, J]/phiA[, J] |
|
1244 |
-## tmp <- (B-nuB[, J] - phistar*A + phistar*nuA[, J])/phiB[, J] |
|
1245 |
-## cB[, k] <- tmp/(1-phistar*phiA2[, J]/phiB[, J]) |
|
1246 |
-## cA[, k] <- (A-nuA[, J]-phiA2[, J]*cB[, k])/phiA[, J] |
|
1247 |
- ##some of the snps are called for the men, but not the women |
|
1248 |
- rm(YA, YB, wA, wB, res, phistar, A, B, G, index) |
|
1249 |
- ##gc() |
|
1250 | 1396 |
} |
1251 |
-## cA[cA < 0.05] <- 0.05 |
|
1252 |
-## cB[cB < 0.05] <- 0.05 |
|
1253 |
-## cA[cA > 5] <- 5 |
|
1254 |
-## cB[cB > 5] <- 5 |
|
1255 |
- |
|
1256 |
- ##-------------------------------------------------- |
|
1257 |
- ##RS: need to fix. why are there NA's by coercion |
|
1258 |
-## cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA)) |
|
1259 |
- ##-------------------------------------------------- |
|
1260 |
- ##ii <- rowSums(is.na(cA)) > 0 |
|
1261 |
- ##these often arise at SNPs with low confidence scores |
|
1262 |
-## cB <- matrix(as.integer(cB*100), nrow(cB), ncol(cB)) |
|
1263 |
-## CA(object)[snps, ] <- cA |
|
1264 |
-## CB(object)[snps, ] <- cB |
|
1265 |
- snpflags[snps, ] <- flags |
|
1266 |
- tmp <- physical(lM(object))$tau2A |
|
1267 |
- tmp[snps, ] <- tau2A |
|
1268 |
- lM(object)$tau2A <- tmp |
|
1269 |
- tmp <- physical(lM(object))$tau2B |
|
1270 |
- tmp[snps, ] <- tau2B |
|
1271 |
- lM(object)$tau2B <- tmp |
|
1272 |
- tmp <- physical(lM(object))$tau2B |
|
1273 |
- tmp[snps, ] <- tau2B |
|
1274 |
- lM(object)$tau2B <- tmp |
|
1275 |
- tmp <- physical(lM(object))$sig2A |
|
1276 |
- tmp[snps, ] <- sig2A |
|
1277 |
- lM(object)$sig2A <- tmp |
|
1278 |
- tmp <- physical(lM(object))$sig2B |
|
1279 |
- tmp[snps, ] <- sig2B |
|
1280 |
- lM(object)$sig2B <- tmp |
|
1281 |
- tmp <- physical(lM(object))$nuA |
|
1282 |
- tmp[snps, ] <- nuA |
|
1283 |
- lM(object)$nuA <- tmp |
|
1284 |
- tmp <- physical(lM(object))$nuB |
|
1285 |
- tmp[snps, ] <- nuB |
|
1286 |
- lM(object)$nuB <- tmp |
|
1287 |
- tmp <- physical(lM(object))$phiA |
|
1288 |
- tmp[snps, ] <- phiA |
|
1289 |
- lM(object)$phiA <- tmp |
|
1290 |
- tmp <- physical(lM(object))$phiB |
|
1291 |
- tmp[snps, ] <- phiB |
|
1292 |
- lM(object)$phiB <- tmp |
|
1293 |
- tmp <- physical(lM(object))$phiPrimeA |
|
1294 |
- tmp[snps, ] <- phiA2 |
|
1295 |
- lM(object)$phiPrimeA <- tmp |
|
1296 |
- tmp <- physical(lM(object))$phiPrimeB |
|
1297 |
- tmp[snps, ] <- phiB2 |
|
1298 |
- lM(object)$phiPrimeB <- tmp |
|
1299 |
- |
|
1300 |
- tmp <- physical(lM(object))$corrAB |
|
1301 |
- tmp[snps, ] <- corrAB |
|
1302 |
- lM(object)$corrAB <- tmp |
|
1303 |
- tmp <- physical(lM(object))$corrAA |
|
1304 |
- tmp[snps, ] <- corrAA |
|
1305 |
- lM(object)$corrAA <- tmp |
|
1306 |
- tmp <- physical(lM(object))$corrBB |
|
1307 |
- tmp[snps, ] <- corrBB |
|
1308 |
- lM(object)$corrBB <- tmp |
|
1309 |
- lapply(assayData(object), close) |
|
1310 |
- lapply(lM(object), close) |
|
1397 |
+ if(THR.NU.PHI){ |
|
1398 |
+ nuA[nuA < MIN.NU] <- MIN.NU |
|
1399 |
+ nuB[nuB < MIN.NU] <- MIN.NU |
|
1400 |
+ phiA[phiA < MIN.PHI] <- MIN.PHI |
|
1401 |
+ phiA2[phiA2 < MIN.PHI] <- MIN.PHI |
|
1402 |
+ phiB[phiB < MIN.PHI] <- MIN.PHI |
|
1403 |
+ phiB2[phiB2 < MIN.PHI] <- MIN.PHI |
|
1404 |
+ } |
|
1405 |
+ nuA(object)[marker.index, ] <- nuA |
|
1406 |
+ nuB(object)[marker.index, ] <- nuB |
|
1407 |
+ phiA(object)[marker.index, ] <- phiA |
|
1408 |
+ phiB(object)[marker.index, ] <- phiB |
|
1409 |
+ phiPrimeA(object)[marker.index, ] <- phiA2 |
|
1410 |
+ phiPrimeB(object)[marker.index, ] <- phiB2 |
|
1311 | 1411 |
TRUE |
1312 | 1412 |
} |
1313 | 1413 |
|
1314 |
-fit.lm4 <- function(strata.index, |
|
1414 |
+fit.lm4 <- function(strata, |
|
1315 | 1415 |
index.list, |
1316 |
- marker.index, |
|
1317 | 1416 |
object, |
1318 |
- Ns, |
|
1319 |
- batchSize, |
|
1320 | 1417 |
SNRMin, |
1321 | 1418 |
MIN.SAMPLES, |
1322 | 1419 |
MIN.OBS, |
... | ... |
@@ -1325,142 +1422,104 @@ fit.lm4 <- function(strata.index, |
1325 | 1422 |
THR.NU.PHI, |
1326 | 1423 |
MIN.NU, |
1327 | 1424 |
MIN.PHI, |
1328 |
- verbose, ...){ |
|
1329 |
- physical <- get("physical") |
|
1330 |
- if(verbose) message("Probe stratum ", strata.index, " of ", length(index.list)) |
|
1331 |
- open(object) |
|
1332 |
- open(normal) |
|
1333 |
- open(snpflags) |
|
1334 |
- snps <- index.list[[strata.index]] |
|
1335 |
- batches <- split(seq(along=batch(object)), batch(object)) |
|
1336 |
- batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
1337 |
- nuA <- phiA <- sig2A <- tau2A <- matrix(NA, length(snps), length(unique(batch(object)))) |
|
1338 |
-## cA <- matrix(NA, length(snps), ncol(object)) |
|
1339 |
- ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object)) |
|
1340 |
- flags <- snpflags[ii, , drop=FALSE] |
|
1341 |
- noflags <- rowSums(flags, na.rm=TRUE) == 0 |
|
1342 |
- lapply(lM(object), open) |
|
1343 |
- nuIA <- physical(lM(object))$nuA[ii, ] |
|
1344 |
- nuIB <- physical(lM(object))$nuB[ii, ] |
|
1345 |
- phiIA <- physical(lM(object))$phiA[ii,] |
|
1346 |
- phiIB <- physical(lM(object))$phiB[ii,] |
|
1347 |
- |
|
1348 |
- i1 <- rowSums(nuIA < 20, na.rm=TRUE) == 0 |
|
1349 |
- i2 <- rowSums(nuIB < 20, na.rm=TRUE) == 0 |
|
1350 |
- i3 <- rowSums(phiIA < 20, na.rm=TRUE) == 0 |
|
1351 |
- i4 <- rowSums(phiIB < 20, na.rm=TRUE) == 0 |
|
1352 |
- |
|
1353 |
- snp.index <- which(i1 & i2 & i3 & i4 & noflags) |
|
1354 |
- if(length(snp.index) == 0){ |
|
1355 |
- warning("No snps meet the following criteria: (1) nu and phi > 20 and (2) at least MIN.OBS in each genotype cluster. CN not estimated for nonpolymorphic loci on X") |
|
1356 |
- return(TRUE) |
|
1357 |
- } |
|
1358 |
- if(length(snp.index) >= 5000){ |
|
1359 |
- snp.index <- sample(snp.index, 5000) |
|
1360 |
- } |
|
1361 |
- phiA.snp <- physical(lM(object))$phiA[snp.index, , drop=FALSE] |
|
1362 |
- phiB.snp <- physical(lM(object))$phiB[snp.index, , drop=FALSE] |
|
1363 |
- A.snp <- as.matrix(A(object)[snp.index, ]) |
|
1364 |
- B.snp <- as.matrix(B(object)[snp.index, ]) |
|
1365 |
- NORM.snp <- as.matrix(normal[snp.index, ]) |
|
1366 |
- NORM.np <- as.matrix(normal[snps, ]) |
|
1425 |
+ verbose, is.lds, ...){ |
|
1426 |
+ if(is.lds) {physical <- get("physical"); open(object)} |
|
1367 | 1427 |
gender <- object$gender |
1428 |
+ enough.males <- sum(gender==1) > MIN.SAMPLES |
|
1429 |
+ enough.females <- sum(gender==2) > MIN.SAMPLES |
|
1430 |
+ if(!enough.males & !enough.females){ |
|
1431 |
+ message(paste("fewer than", MIN.SAMPLES, "men and women. Copy number not estimated for CHR X")) |
|
1432 |
+ return(object) |
|
1433 |
+ } |
|
1434 |
+ if(verbose) message("Probe stratum ", strata, " of ", length(index.list)) |
|
1435 |
+ marker.index <- index.list[[strata]] |
|
1436 |
+ batches <- split(seq_along(batch(object)), as.character(batch(object))) |
|
1437 |
+ batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
1438 |
+ nc <- length(batchNames(object)) |
|
1368 | 1439 |
|
1440 |
+ if(enough.males){ |
|
1441 |
+ res <- summarizeMaleXNps(marker.index=marker.index, |
|
1442 |
+ batches=batches, |
|
1443 |
+ object=object, MIN.SAMPLES=MIN.SAMPLES) |
|
1444 |
+ medianA.AA.M <- res[["medianA.AA"]] |
|
1445 |
+ madA.AA.M <- res[["madA.AA"]] |
|
1369 | 1446 |
|
1370 |
- pseudoAR <- position(object)[snps] < 2709520 | (position(object)[snps] > 154584237 & position(object)[snps] < 154913754) |
|
1371 |
- pseudoAR[is.na(pseudoAR)] <- FALSE |
|
1447 |
+ } |
|
1448 |
+ medianA.AA.F <- as.matrix(medianA.AA(object)[marker.index, ]) ## median for women |
|
1449 |
+ madA.AA.F <- as.matrix(madA.AA(object)[marker.index, ]) ## median for women |
|
1450 |
+ split.gender <- split(gender, as.character(batch(object))) |
|
1451 |
+ N.M <- sapply(split.gender, function(x) sum(x==1)) |
|
1452 |
+ N.F <- sapply(split.gender, function(x) sum(x==2)) |
|
1453 |
+ nuA <- as.matrix(nuA(object)[marker.index, ]) |
|
1454 |
+ nuB <- as.matrix(nuB(object)[marker.index, ]) |
|
1455 |
+ phiA <- as.matrix(phiA(object)[marker.index, ]) |
|
1456 |
+ phiB <- as.matrix(phiB(object)[marker.index, ]) |
|
1372 | 1457 |
|
1373 |
- GG <- as.matrix(calls(object)[snp.index, ]) |
|
1374 |
- CP <- as.matrix(snpCallProbability(object)[snp.index, ]) |
|
1375 |
- AA.np <- as.matrix(A(object)[snps, ]) |
|
1376 |
- ##if(missing(which.batches)) which.batches <- seq(along=batches) |
|
1377 |
- ##batches <- batches[which.batches] |
|
1378 |
- for(k in batches){ |
|
1379 |
- ##if(verbose) message("SNP batch ", ii, " of ", length(batches)) |
|
1380 |
- G <- GG[, k] |
|
1381 |
- xx <- CP[, k] |
|
1382 |
- highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1383 |
- G <- G*highConf*NORM.snp[, k] |
|
1384 |
- ##snps |
|
1385 |
- AA <- A.snp[, k] |
|
1386 |
- BB <- B.snp[, k] |
|
1387 |
- |
|
1388 |
- |
|
1389 |
- ##index <- GT.B <- GT.A <- vector("list", 3) |
|
1390 |