Browse code

Changed name of function summarizeMaleXGenotypes to summarizeXgenotypes

sufficient statistics for X chromosome no longer written to file.
I do not have an efficient way to do this that is easy to implement.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52819 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 16/02/2011 15:57:20
Showing1 changed files

... ...
@@ -609,7 +609,7 @@ summarizeMaleXNps <- function(marker.index,
609 609
 }
610 610
 
611 611
 
612
-summarizeMaleXGenotypes <- function(marker.index,
612
+summarizeXGenotypes <- function(marker.index,
613 613
 				    batches,
614 614
 				    object,
615 615
 				    GT.CONF.THR,
... ...
@@ -617,22 +617,26 @@ summarizeMaleXGenotypes <- function(marker.index,
617 617
 				    MIN.SAMPLES,
618 618
 				    verbose,
619 619
 				    is.lds,
620
-				    DF.PRIOR,...){
620
+				    DF.PRIOR,
621
+				    gender="male", ...){
622
+	if(gender == "male"){
623
+		sample.index <- which(gender==1)
624
+	} else sample.index <- which(gender==2)
621 625
 	nr <- length(marker.index)
622 626
 	nc <- length(batchNames(object))
623 627
 ##	NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
624 628
 	NN.Mlist <- medianA <- medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
625 629
 	gender <- object$gender
626
-	GG <- as.matrix(calls(object)[marker.index, gender==1])
627
-	CP <- as.matrix(snpCallProbability(object)[marker.index, gender==1])
628
-	AA <- as.matrix(A(object)[marker.index, gender==1])
629
-	BB <- as.matrix(B(object)[marker.index, gender==1])
630
+	GG <- as.matrix(calls(object)[marker.index, sample.index])
631
+	CP <- as.matrix(snpCallProbability(object)[marker.index, sample.index])
632
+	AA <- as.matrix(A(object)[marker.index, sample.index])
633
+	BB <- as.matrix(B(object)[marker.index, sample.index])
630 634
 	for(k in seq_along(batches)){
631
-		B <- batches[[k]]
632
-		this.batch <- unique(as.character(batch(object)[B]))
633
-		gender <- object$gender[B]
634
-		if(sum(gender==1) < MIN.SAMPLES) next()
635
-		sns.batch <- sampleNames(object)[B]
635
+		sample.index <- batches[[k]]
636
+		this.batch <- unique(as.character(batch(object)[sample.index]))
637
+		##gender <- object$gender[sample.index]
638
+		if(length(sample.index) < MIN.SAMPLES) next()
639
+		sns.batch <- sampleNames(object)[sample.index]
636 640
 		##subset GG apppriately
637 641
 		sns <- colnames(GG)
638 642
 		J <- sns%in%sns.batch
... ...
@@ -648,9 +652,10 @@ summarizeMaleXGenotypes <- function(marker.index,
648 652
 		G.AB[G.AB==FALSE] <- NA
649 653
 		G.BB <- G==3
650 654
 		G.BB[G.BB==FALSE] <- NA
651
-		N.AA.M <- rowSums(G.AA, na.rm=TRUE)
652
-		N.AB.M <- rowSums(G.AB, na.rm=TRUE)
653
-		N.BB.M <- rowSums(G.BB, na.rm=TRUE)
655
+		N.AA <- rowSums(G.AA, na.rm=TRUE)
656
+		if(gender == "female")
657
+			N.AB <- rowSums(G.AB, na.rm=TRUE)
658
+		N.BB <- rowSums(G.BB, na.rm=TRUE)
654 659
 		summaryStats <- function(X, INT, FUNS){
655 660
 			tmp <- matrix(NA, nrow(X), length(FUNS))
656 661
 			for(j in seq_along(FUNS)){
... ...
@@ -660,44 +665,68 @@ summarizeMaleXGenotypes <- function(marker.index,
660 665
 			tmp
661 666
 		}
662 667
 		statsA.AA <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
663
-		##statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
668
+		if(gender == "female")
669
+			statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
664 670
 		statsA.BB <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
665 671
 		statsB.AA <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
666
-		##statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
672
+		if(gender == "female")
673
+			statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
667 674
 		statsB.BB <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
668
-		medianA[[k]] <- cbind(statsA.AA[, 1], ##statsA.AB[, 1],
669
-				      statsA.BB[, 1])
670
-		medianB[[k]] <- cbind(statsB.AA[, 1], ##statsB.AB[, 1],
671
-				      statsB.BB[, 1])
672
-		madA <- cbind(statsA.AA[, 2],  ##statsA.AB[, 2],
673
-			      statsA.BB[, 2])
674
-		madB <- cbind(statsB.AA[, 2], ##statsB.AB[, 2],
675
-			      statsB.BB[, 2])
676
-		rm(statsA.AA, ##statsA.AB,
677
-		   statsA.BB, statsB.AA,
678
-		   ##statsB.AB,
679
-		   statsB.BB)
680
-		NN.M <- cbind(N.AA.M, ##N.AB.M,
681
-			      N.BB.M)
682
-		NN.Mlist[[k]] <- NN.M
683
-		shrink.madA[[k]] <- shrink(madA, NN.M, DF.PRIOR)
684
-		shrink.madB[[k]] <- shrink(madB, NN.M, DF.PRIOR)
675
+		if(gender=="male"){
676
+			medianA[[k]] <- cbind(statsA.AA[, 1], ##statsA.AB[, 1],
677
+					      statsA.BB[, 1])
678
+			medianB[[k]] <- cbind(statsB.AA[, 1], ##statsB.AB[, 1],
679
+					      statsB.BB[, 1])
680
+			madA <- cbind(statsA.AA[, 2],  ##statsA.AB[, 2],
681
+				      statsA.BB[, 2])
682
+			madB <- cbind(statsB.AA[, 2], ##statsB.AB[, 2],
683
+				      statsB.BB[, 2])
684
+			NN <- cbind(N.AA, N.BB)
685
+			rm(statsA.AA, statsA.BB, statsB.AA, statsB.AB, statsB.BB)
686
+		} else {
687
+			medianA[[k]] <- cbind(statsA.AA[, 1], statsA.AB[, 1],
688
+					      statsA.BB[, 1])
689
+			medianB[[k]] <- cbind(statsB.AA[, 1], statsB.AB[, 1],
690
+					      statsB.BB[, 1])
691
+			madA <- cbind(statsA.AA[, 2],  statsA.AB[, 2],
692
+				      statsA.BB[, 2])
693
+			madB <- cbind(statsB.AA[, 2], statsB.AB[, 2],
694
+				      statsB.BB[, 2])
695
+			NN <- cbind(N.AA, N.AB, N.BB)
696
+			rm(statsA.AA, statsA.AB, statsA.BB, statsB.AA, statsB.AB, statsB.BB)
697
+		}
698
+		NN.Mlist[[k]] <- NN
699
+		shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR)
700
+		shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR)
685 701
 		##---------------------------------------------------------------------------
686 702
 		## SNPs that we'll use for imputing location/scale of unobserved genotypes
687 703
 		##---------------------------------------------------------------------------
688
-		##index.complete <- indexComplete(NN.M[, -2], medianA[[k]], medianB[[k]], MIN.OBS)
689
-		index.complete <- indexComplete(NN.M, medianA[[k]], medianB[[k]], MIN.OBS)
704
+		index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS)
690 705
 
691 706
 		##---------------------------------------------------------------------------
692 707
 		## Impute sufficient statistics for unobserved genotypes (plate-specific)
693 708
 		##---------------------------------------------------------------------------
694
-		res <- imputeCenterX(medianA[[k]], medianB[[k]], NN.M, index.complete, MIN.OBS)
709
+		if(gender=="male"){
710
+			res <- imputeCenterX(medianA[[k]], medianB[[k]], NN.M, index.complete, MIN.OBS)
711
+		} else {
712
+			unobservedAA <- NN[, 1] < MIN.OBS
713
+			unobservedAB <- NN[, 2] < MIN.OBS
714
+			unobservedBB <- NN[, 3] < MIN.OBS
715
+			unobserved.index <- vector("list", 3)
716
+			## AB, BB observed
717
+			unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS))
718
+			## AA and BB observed (strange)
719
+			unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS))
720
+			## AB and AA observed
721
+			unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS))
722
+			res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index)
723
+		}
695 724
 		medianA[[k]] <- res[[1]]
696 725
 		medianB[[k]] <- res[[2]]
697 726
 	}
698 727
 	return(list(madA=shrink.madA,
699 728
 		    madB=shrink.madB,
700
-		    NN.M=NN.Mlist,
729
+		    NN=NN.Mlist,
701 730
 		    medianA=medianA,
702 731
 		    medianB=medianB))
703 732
 }
... ...
@@ -734,7 +763,7 @@ fit.lm3 <- function(strata,
734 763
 	phiA2 <- as.matrix(phiPrimeA(object)[marker.index, ])
735 764
 	phiB2 <- as.matrix(phiPrimeB(object)[marker.index, ])
736 765
 	if(enough.males){
737
-		res <- summarizeMaleXGenotypes(marker.index=marker.index,
766
+		res <- summarizeXGenotypes(marker.index=marker.index,
738 767
 					       batches=batches,
739 768
 					       object=object,
740 769
 					       GT.CONF.THR=GT.CONF.THR,
... ...
@@ -742,7 +771,8 @@ fit.lm3 <- function(strata,
742 771
 					       MIN.OBS=MIN.OBS,
743 772
 					       verbose=verbose,
744 773
 					       is.lds=is.lds,
745
-					       DF.PRIOR=DF.PRIOR/2)
774
+					       DF.PRIOR=DF.PRIOR/2,
775
+					   gender="male")
746 776
 		madA.Mlist <- res[["madA"]]
747 777
 		madB.Mlist <- res[["madB"]]
748 778
 		medianA.Mlist <- res[["medianA"]]
... ...
@@ -752,6 +782,16 @@ fit.lm3 <- function(strata,
752 782
 		## Need N, median, mad
753 783
 	}
754 784
 	if(enough.females){
785
+		res <- summarizeXGenotypes(marker.index=marker.index,
786
+						 batches=batches,
787
+						 object=object,
788
+						 GT.CONF.THR=GT.CONF.THR,
789
+						 MIN.SAMPLES=MIN.SAMPLES,
790
+						 MIN.OBS=MIN.OBS,
791
+						 verbose=verbose,
792
+						 is.lds=is.lds,
793
+						 DF.PRIOR=DF.PRIOR/2,
794
+					   gender="female")
755 795
 		N.AA.F <- as.matrix(N.AA(object)[marker.index, ])
756 796
 		N.AB.F <- as.matrix(N.AB(object)[marker.index, ])
757 797
 		N.BB.F <- as.matrix(N.BB(object)[marker.index, ])