sufficient statistics for X chromosome no longer written to file.
I do not have an efficient way to do this that is easy to implement.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52819 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -609,7 +609,7 @@ summarizeMaleXNps <- function(marker.index, |
609 | 609 |
} |
610 | 610 |
|
611 | 611 |
|
612 |
-summarizeMaleXGenotypes <- function(marker.index, |
|
612 |
+summarizeXGenotypes <- function(marker.index, |
|
613 | 613 |
batches, |
614 | 614 |
object, |
615 | 615 |
GT.CONF.THR, |
... | ... |
@@ -617,22 +617,26 @@ summarizeMaleXGenotypes <- function(marker.index, |
617 | 617 |
MIN.SAMPLES, |
618 | 618 |
verbose, |
619 | 619 |
is.lds, |
620 |
- DF.PRIOR,...){ |
|
620 |
+ DF.PRIOR, |
|
621 |
+ gender="male", ...){ |
|
622 |
+ if(gender == "male"){ |
|
623 |
+ sample.index <- which(gender==1) |
|
624 |
+ } else sample.index <- which(gender==2) |
|
621 | 625 |
nr <- length(marker.index) |
622 | 626 |
nc <- length(batchNames(object)) |
623 | 627 |
## NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc) |
624 | 628 |
NN.Mlist <- medianA <- medianB <- shrink.madA <- shrink.madB <- vector("list", nc) |
625 | 629 |
gender <- object$gender |
626 |
- GG <- as.matrix(calls(object)[marker.index, gender==1]) |
|
627 |
- CP <- as.matrix(snpCallProbability(object)[marker.index, gender==1]) |
|
628 |
- AA <- as.matrix(A(object)[marker.index, gender==1]) |
|
629 |
- BB <- as.matrix(B(object)[marker.index, gender==1]) |
|
630 |
+ GG <- as.matrix(calls(object)[marker.index, sample.index]) |
|
631 |
+ CP <- as.matrix(snpCallProbability(object)[marker.index, sample.index]) |
|
632 |
+ AA <- as.matrix(A(object)[marker.index, sample.index]) |
|
633 |
+ BB <- as.matrix(B(object)[marker.index, sample.index]) |
|
630 | 634 |
for(k in seq_along(batches)){ |
631 |
- B <- batches[[k]] |
|
632 |
- this.batch <- unique(as.character(batch(object)[B])) |
|
633 |
- gender <- object$gender[B] |
|
634 |
- if(sum(gender==1) < MIN.SAMPLES) next() |
|
635 |
- sns.batch <- sampleNames(object)[B] |
|
635 |
+ sample.index <- batches[[k]] |
|
636 |
+ this.batch <- unique(as.character(batch(object)[sample.index])) |
|
637 |
+ ##gender <- object$gender[sample.index] |
|
638 |
+ if(length(sample.index) < MIN.SAMPLES) next() |
|
639 |
+ sns.batch <- sampleNames(object)[sample.index] |
|
636 | 640 |
##subset GG apppriately |
637 | 641 |
sns <- colnames(GG) |
638 | 642 |
J <- sns%in%sns.batch |
... | ... |
@@ -648,9 +652,10 @@ summarizeMaleXGenotypes <- function(marker.index, |
648 | 652 |
G.AB[G.AB==FALSE] <- NA |
649 | 653 |
G.BB <- G==3 |
650 | 654 |
G.BB[G.BB==FALSE] <- NA |
651 |
- N.AA.M <- rowSums(G.AA, na.rm=TRUE) |
|
652 |
- N.AB.M <- rowSums(G.AB, na.rm=TRUE) |
|
653 |
- N.BB.M <- rowSums(G.BB, na.rm=TRUE) |
|
655 |
+ N.AA <- rowSums(G.AA, na.rm=TRUE) |
|
656 |
+ if(gender == "female") |
|
657 |
+ N.AB <- rowSums(G.AB, na.rm=TRUE) |
|
658 |
+ N.BB <- rowSums(G.BB, na.rm=TRUE) |
|
654 | 659 |
summaryStats <- function(X, INT, FUNS){ |
655 | 660 |
tmp <- matrix(NA, nrow(X), length(FUNS)) |
656 | 661 |
for(j in seq_along(FUNS)){ |
... | ... |
@@ -660,44 +665,68 @@ summarizeMaleXGenotypes <- function(marker.index, |
660 | 665 |
tmp |
661 | 666 |
} |
662 | 667 |
statsA.AA <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD")) |
663 |
- ##statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD")) |
|
668 |
+ if(gender == "female") |
|
669 |
+ statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD")) |
|
664 | 670 |
statsA.BB <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD")) |
665 | 671 |
statsB.AA <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD")) |
666 |
- ##statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD")) |
|
672 |
+ if(gender == "female") |
|
673 |
+ statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD")) |
|
667 | 674 |
statsB.BB <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD")) |
668 |
- medianA[[k]] <- cbind(statsA.AA[, 1], ##statsA.AB[, 1], |
|
669 |
- statsA.BB[, 1]) |
|
670 |
- medianB[[k]] <- cbind(statsB.AA[, 1], ##statsB.AB[, 1], |
|
671 |
- statsB.BB[, 1]) |
|
672 |
- madA <- cbind(statsA.AA[, 2], ##statsA.AB[, 2], |
|
673 |
- statsA.BB[, 2]) |
|
674 |
- madB <- cbind(statsB.AA[, 2], ##statsB.AB[, 2], |
|
675 |
- statsB.BB[, 2]) |
|
676 |
- rm(statsA.AA, ##statsA.AB, |
|
677 |
- statsA.BB, statsB.AA, |
|
678 |
- ##statsB.AB, |
|
679 |
- statsB.BB) |
|
680 |
- NN.M <- cbind(N.AA.M, ##N.AB.M, |
|
681 |
- N.BB.M) |
|
682 |
- NN.Mlist[[k]] <- NN.M |
|
683 |
- shrink.madA[[k]] <- shrink(madA, NN.M, DF.PRIOR) |
|
684 |
- shrink.madB[[k]] <- shrink(madB, NN.M, DF.PRIOR) |
|
675 |
+ if(gender=="male"){ |
|
676 |
+ medianA[[k]] <- cbind(statsA.AA[, 1], ##statsA.AB[, 1], |
|
677 |
+ statsA.BB[, 1]) |
|
678 |
+ medianB[[k]] <- cbind(statsB.AA[, 1], ##statsB.AB[, 1], |
|
679 |
+ statsB.BB[, 1]) |
|
680 |
+ madA <- cbind(statsA.AA[, 2], ##statsA.AB[, 2], |
|
681 |
+ statsA.BB[, 2]) |
|
682 |
+ madB <- cbind(statsB.AA[, 2], ##statsB.AB[, 2], |
|
683 |
+ statsB.BB[, 2]) |
|
684 |
+ NN <- cbind(N.AA, N.BB) |
|
685 |
+ rm(statsA.AA, statsA.BB, statsB.AA, statsB.AB, statsB.BB) |
|
686 |
+ } else { |
|
687 |
+ medianA[[k]] <- cbind(statsA.AA[, 1], statsA.AB[, 1], |
|
688 |
+ statsA.BB[, 1]) |
|
689 |
+ medianB[[k]] <- cbind(statsB.AA[, 1], statsB.AB[, 1], |
|
690 |
+ statsB.BB[, 1]) |
|
691 |
+ madA <- cbind(statsA.AA[, 2], statsA.AB[, 2], |
|
692 |
+ statsA.BB[, 2]) |
|
693 |
+ madB <- cbind(statsB.AA[, 2], statsB.AB[, 2], |
|
694 |
+ statsB.BB[, 2]) |
|
695 |
+ NN <- cbind(N.AA, N.AB, N.BB) |
|
696 |
+ rm(statsA.AA, statsA.AB, statsA.BB, statsB.AA, statsB.AB, statsB.BB) |
|
697 |
+ } |
|
698 |
+ NN.Mlist[[k]] <- NN |
|
699 |
+ shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR) |
|
700 |
+ shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR) |
|
685 | 701 |
##--------------------------------------------------------------------------- |
686 | 702 |
## SNPs that we'll use for imputing location/scale of unobserved genotypes |
687 | 703 |
##--------------------------------------------------------------------------- |
688 |
- ##index.complete <- indexComplete(NN.M[, -2], medianA[[k]], medianB[[k]], MIN.OBS) |
|
689 |
- index.complete <- indexComplete(NN.M, medianA[[k]], medianB[[k]], MIN.OBS) |
|
704 |
+ index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS) |
|
690 | 705 |
|
691 | 706 |
##--------------------------------------------------------------------------- |
692 | 707 |
## Impute sufficient statistics for unobserved genotypes (plate-specific) |
693 | 708 |
##--------------------------------------------------------------------------- |
694 |
- res <- imputeCenterX(medianA[[k]], medianB[[k]], NN.M, index.complete, MIN.OBS) |
|
709 |
+ if(gender=="male"){ |
|
710 |
+ res <- imputeCenterX(medianA[[k]], medianB[[k]], NN.M, index.complete, MIN.OBS) |
|
711 |
+ } else { |
|
712 |
+ unobservedAA <- NN[, 1] < MIN.OBS |
|
713 |
+ unobservedAB <- NN[, 2] < MIN.OBS |
|
714 |
+ unobservedBB <- NN[, 3] < MIN.OBS |
|
715 |
+ unobserved.index <- vector("list", 3) |
|
716 |
+ ## AB, BB observed |
|
717 |
+ unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS)) |
|
718 |
+ ## AA and BB observed (strange) |
|
719 |
+ unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS)) |
|
720 |
+ ## AB and AA observed |
|
721 |
+ unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS)) |
|
722 |
+ res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index) |
|
723 |
+ } |
|
695 | 724 |
medianA[[k]] <- res[[1]] |
696 | 725 |
medianB[[k]] <- res[[2]] |
697 | 726 |
} |
698 | 727 |
return(list(madA=shrink.madA, |
699 | 728 |
madB=shrink.madB, |
700 |
- NN.M=NN.Mlist, |
|
729 |
+ NN=NN.Mlist, |
|
701 | 730 |
medianA=medianA, |
702 | 731 |
medianB=medianB)) |
703 | 732 |
} |
... | ... |
@@ -734,7 +763,7 @@ fit.lm3 <- function(strata, |
734 | 763 |
phiA2 <- as.matrix(phiPrimeA(object)[marker.index, ]) |
735 | 764 |
phiB2 <- as.matrix(phiPrimeB(object)[marker.index, ]) |
736 | 765 |
if(enough.males){ |
737 |
- res <- summarizeMaleXGenotypes(marker.index=marker.index, |
|
766 |
+ res <- summarizeXGenotypes(marker.index=marker.index, |
|
738 | 767 |
batches=batches, |
739 | 768 |
object=object, |
740 | 769 |
GT.CONF.THR=GT.CONF.THR, |
... | ... |
@@ -742,7 +771,8 @@ fit.lm3 <- function(strata, |
742 | 771 |
MIN.OBS=MIN.OBS, |
743 | 772 |
verbose=verbose, |
744 | 773 |
is.lds=is.lds, |
745 |
- DF.PRIOR=DF.PRIOR/2) |
|
774 |
+ DF.PRIOR=DF.PRIOR/2, |
|
775 |
+ gender="male") |
|
746 | 776 |
madA.Mlist <- res[["madA"]] |
747 | 777 |
madB.Mlist <- res[["madB"]] |
748 | 778 |
medianA.Mlist <- res[["medianA"]] |
... | ... |
@@ -752,6 +782,16 @@ fit.lm3 <- function(strata, |
752 | 782 |
## Need N, median, mad |
753 | 783 |
} |
754 | 784 |
if(enough.females){ |
785 |
+ res <- summarizeXGenotypes(marker.index=marker.index, |
|
786 |
+ batches=batches, |
|
787 |
+ object=object, |
|
788 |
+ GT.CONF.THR=GT.CONF.THR, |
|
789 |
+ MIN.SAMPLES=MIN.SAMPLES, |
|
790 |
+ MIN.OBS=MIN.OBS, |
|
791 |
+ verbose=verbose, |
|
792 |
+ is.lds=is.lds, |
|
793 |
+ DF.PRIOR=DF.PRIOR/2, |
|
794 |
+ gender="female") |
|
755 | 795 |
N.AA.F <- as.matrix(N.AA(object)[marker.index, ]) |
756 | 796 |
N.AB.F <- as.matrix(N.AB(object)[marker.index, ]) |
757 | 797 |
N.BB.F <- as.matrix(N.BB(object)[marker.index, ]) |