Browse code

comment grand mean summaries in summarizeSnps

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@54390 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 03/04/2011 02:49:55
Showing2 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.9.27
4
+Version: 1.9.28
5 5
 Date: 2010-12-10
6 6
 Author: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -1821,72 +1821,98 @@ summarizeSnps <- function(strata,
1821 1821
 	##---------------------------------------------------------------------------
1822 1822
 	## grand mean
1823 1823
 	##---------------------------------------------------------------------------
1824
-	if(length(batches) > 1 && "grandMean" %in% batchNames(object)){
1825
-		##k <- k+1
1826
-		k <- match("grandMean", batchNames(object))
1827
-		if(verbose) message("        computing grand means...")
1828
-		##G <- GG[, B]
1829
-		##NORM <- normal.index[, k]
1830
-		##xx <- CP[, B]
1831
-		##highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1832
-		highConf <- (1-exp(-CP/1000)) > GT.CONF.THR
1833
-		##G <- G*highConf
1834
-		## Some markers may have genotype confidences scores that are ALL below the threshold
1835
-		## For these markers, provide statistical summaries based on all the samples and flag
1836
-		## Provide summaries for these markers and flag to indicate the problem
1837
-		ii <- which(rowSums(highConf) == 0)
1838
-		if(length(ii) > 0) highConf[ii, ] <- TRUE
1839
-		GG <- GG*highConf
1840
-		## table(rowSums(G==0))
1841
-		##G <- G*highConf*NORM
1842
-##		A <- AA[, B]
1843
-##		B <- BB[, B]
1844
-		## this can be time consuming...do only once
1845
-##		G.AA <- G==1
1846
-		G.AA <- GG==1
1847
-		G.AA[G.AA==FALSE] <- NA
1848
-		G.AB <- GG==2
1849
-		G.AB[G.AB==FALSE] <- NA
1850
-		G.BB <- GG==3
1851
-		G.BB[G.BB==FALSE] <- NA
1852
-		Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE)
1853
-		Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE)
1854
-		Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE)
1855
-		## Calculate row medians and MADs
1856
-		stats <- summaryStats(G.AA, AA, FUNS=c("rowMedians", "rowMAD"))
1857
-		medianA.AA(object)[index, k] <- stats[, 1]
1858
-		madA.AA(object)[index, k] <- stats[, 2]
1859
-		stats <- summaryStats(G.AB, AA, FUNS=c("rowMedians", "rowMAD"))
1860
-		medianA.AB(object)[index, k] <- stats[, 1]
1861
-		madA.AB(object)[index, k] <- stats[, 2]
1862
-		stats <- summaryStats(G.BB, AA, FUNS=c("rowMedians", "rowMAD"))
1863
-		medianA.BB(object)[index, k] <- stats[, 1]
1864
-		madA.BB(object)[index, k] <- stats[, 2]
1865
-		stats <- summaryStats(G.AA, BB, FUNS=c("rowMedians", "rowMAD"))
1866
-		medianB.AA(object)[index, k] <- stats[, 1]
1867
-		madB.AA(object)[index, k] <- stats[, 2]
1868
-		stats <- summaryStats(G.AB, BB, FUNS=c("rowMedians", "rowMAD"))
1869
-		medianB.AB(object)[index, k] <- stats[, 1]
1870
-		madB.AB(object)[index, k] <- stats[, 2]
1871
-		stats <- summaryStats(G.BB, BB, FUNS=c("rowMedians", "rowMAD"))
1872
-		medianB.BB(object)[index, k] <- stats[, 1]
1873
-		madB.BB(object)[index, k] <- stats[, 2]
1874
-		## log2 Transform Intensities
1875
-		AA <- log2(AA); BB <- log2(BB)
1876
-		tau2A.AA[, k] <- summaryStats(G.AA, AA, FUNS="rowMAD")^2
1877
-		tau2A.BB[, k] <- summaryStats(G.BB, AA, FUNS="rowMAD")^2
1878
-		tau2B.AA[, k] <- summaryStats(G.AA, BB, FUNS="rowMAD")^2
1879
-		tau2B.BB[, k] <- summaryStats(G.BB, BB, FUNS="rowMAD")^2
1880
-		corrAA[, k] <- rowCors(AA*G.AA, BB*G.AA, na.rm=TRUE)
1881
-		corrAB[, k] <- rowCors(AA*G.AB, BB*G.AB, na.rm=TRUE)
1882
-		corrBB[, k] <- rowCors(AA*G.BB, BB*G.BB, na.rm=TRUE)
1883
-		##
1884
-		## TODO:   fill in NAs -- use code from shrinkGenotypeSummaries
1885
-		##
1886
-		rm(GG, CP, AA, BB, FL, stats)
1887
-		gc()
1824
+	if(FALSE){ ## no implemented
1825
+		if(length(batches) > 1 && "grandMean" %in% batchNames(object)){
1826
+			k <- match("grandMean", batchNames(object))
1827
+			if(verbose) message("        computing grand means...")
1828
+			highConf <- (1-exp(-CP/1000)) > GT.CONF.THR
1829
+			rm(CP); gc()
1830
+			## Some markers may have genotype confidences scores that are ALL below the threshold
1831
+			## For these markers, provide statistical summaries based on all the samples and flag
1832
+			## Provide summaries for these markers and flag to indicate the problem
1833
+			ii <- which(rowSums(highConf) == 0)
1834
+			if(length(ii) > 0) highConf[ii, ] <- TRUE
1835
+			GG <- GG*highConf
1836
+			rm(highConf); gc()
1837
+			Ns <- list(Ns.AA, Ns.AB, Ns.BB)
1838
+			rm(Ns.AA, Ns.AB, Ns.BB) ; gc()
1839
+			I <- list(AA, BB)
1840
+			lI <- lapply(I, log2)
1841
+			cors <- list(corrAA, corrAB, corrBB)
1842
+			rm(corrAA, corrAB, corrBB); gc()
1843
+			rm(AA,BB); gc()
1844
+			tau2 <- list(AA=list(tau2A.AA,
1845
+				     tau2B.AA),
1846
+				     AB=list(NULL, NULL),
1847
+				     BB=list(tau2A.BB,
1848
+				     tau2B.BB))
1849
+			rm(tau2A.AA, tau2B.AA, tau2A.BB, tau2B.BB); gc()
1850
+			for(i in 1:3){
1851
+				G.call <- isCall(GG, call=i)
1852
+				for(j in 1:2){
1853
+					computeSummary(object,
1854
+						       G.call,
1855
+						       call=i,
1856
+						       I=I[[j]],
1857
+						       allele=c("A", "B")[j],
1858
+						       Ns=Ns[[i]],
1859
+						       call=i,
1860
+						       tau2=tau2[[i]][[j]],
1861
+						       index=index)
1862
+				}
1863
+				updateCors(cors[[i]], G.call, lI)
1864
+			}
1865
+			##I <- lapply(I, log2)
1866
+			##AA <- log2(AA)
1867
+			##BB <- log2(BB)
1868
+			##		tau2A.AA[, k] <- summaryStats(G.AA, AA, FUNS="rowMAD")^2
1869
+			##		tau2A.BB[, k] <- summaryStats(G.BB, AA, FUNS="rowMAD")^2
1870
+			##tau2B.AA[, k] <- summaryStats(G.AA, BB, FUNS="rowMAD")^2
1871
+			##tau2B.BB[, k] <- summaryStats(G.BB, BB, FUNS="rowMAD")^2
1872
+			##		corrAA[, k] <- rowCors(AA*G.AA, BB*G.AA, na.rm=TRUE)
1873
+			##		corrAB[, k] <- rowCors(AA*G.AB, BB*G.AB, na.rm=TRUE)
1874
+			##		corrBB[, k] <- rowCors(AA*G.BB, BB*G.BB, na.rm=TRUE)
1875
+			##		rm(AA, BB); gc()
1876
+			##
1877
+			## TODO:   fill in NAs -- use code from shrinkGenotypeSummaries
1878
+			##
1879
+			##		rm(GG, CP, AA, BB, FL, stats)
1880
+			##		gc()
1881
+			##		G.AA <- GG==1
1882
+			##		G.AA[G.AA==FALSE] <- NA
1883
+			##		G.AB <- GG==2
1884
+			##		G.AB[G.AB==FALSE] <- NA
1885
+			##		G.BB <- GG==3
1886
+			##		G.BB[G.BB==FALSE] <- NA
1887
+			##		Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE)
1888
+			##		Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE)
1889
+			##		Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE)
1890
+			##		N.AA(object)[index,] <- Ns.AA
1891
+			##		N.AB(object)[index,] <- Ns.AB
1892
+			##		N.BB(object)[index,] <- Ns.BB
1893
+
1894
+			## Calculate row medians and MADs
1895
+			##		medianA.AA(object)[index, k] <- stats[, 1]
1896
+			##		madA.AA(object)[index, k] <- stats[, 2]
1897
+			##		stats <- summaryStats(G.AB, AA, FUNS=c("rowMedians", "rowMAD"))
1898
+			##		medianA.AB(object)[index, k] <- stats[, 1]
1899
+			##		madA.AB(object)[index, k] <- stats[, 2]
1900
+			##		stats <- summaryStats(G.BB, AA, FUNS=c("rowMedians", "rowMAD"))
1901
+			##		medianA.BB(object)[index, k] <- stats[, 1]
1902
+			##		madA.BB(object)[index, k] <- stats[, 2]
1903
+			##		stats <- summaryStats(G.AA, BB, FUNS=c("rowMedians", "rowMAD"))
1904
+			##		medianB.AA(object)[index, k] <- stats[, 1]
1905
+			##		madB.AA(object)[index, k] <- stats[, 2]
1906
+			##		stats <- summaryStats(G.AB, BB, FUNS=c("rowMedians", "rowMAD"))
1907
+			##		medianB.AB(object)[index, k] <- stats[, 1]
1908
+			##		madB.AB(object)[index, k] <- stats[, 2]
1909
+			##		stats <- summaryStats(G.BB, BB, FUNS=c("rowMedians", "rowMAD"))
1910
+			##		medianB.BB(object)[index, k] <- stats[, 1]
1911
+			##		madB.BB(object)[index, k] <- stats[, 2]
1912
+			## log2 Transform Intensities
1913
+		}
1888 1914
 	}
1889
-	if(verbose) message("        Begin writing...")
1915
+	##	if(verbose) message("        Begin writing...")
1890 1916
 	N.AA(object)[index,] <- Ns.AA
1891 1917
 	N.AB(object)[index,] <- Ns.AB
1892 1918
 	N.BB(object)[index,] <- Ns.BB
... ...
@@ -1900,6 +1926,92 @@ summarizeSnps <- function(strata,
1900 1926
 	if(is.lds) return(TRUE) else return(object)
1901 1927
 }
1902 1928
 
1929
+isCall <- function(G, call){
1930
+	G.call <- G==call
1931
+	G.call[G.call==FALSE] <- NA
1932
+	G.call
1933
+}
1934
+
1935
+computeSummary(object, G.call, call, I, allele, Ns, call=1, index){
1936
+	k <- match("grandMean", batchNames(object))
1937
+	stats <- summaryStats(G.call, I, FUNS=c("rowMedians", "rowMAD"))
1938
+	Ns[, k] <- rowSums(G.call, na.rm=TRUE)
1939
+	updateStats(stats, Ns, object, call, allele, index)
1940
+	gc()
1941
+	return()
1942
+}
1943
+
1944
+updateTau <- function(object, tau2, G.call, call, I, allele, index){
1945
+	k <- match("grandMean", batchNames(object))
1946
+	logI <- log2(I)
1947
+	rm(I); gc()
1948
+	tau2[, k] <- summaryStats(G.call, logI, FUNS="rowMAD")^2
1949
+	if(call==1 & allele=="A"){
1950
+		tau2A.AA(object)[index, ] <- tau2
1951
+	}
1952
+	if(call==1 & allele=="B"){
1953
+		tau2B.AA(object)[index, ] <- tau2
1954
+	}
1955
+	if(call==3 & allele=="A"){
1956
+		tau2A.BB(object)[index, ] <- tau2
1957
+	}
1958
+	if(call==3 & allele=="B"){
1959
+		tau2B.BB(object)[index, ] <- tau2
1960
+	}
1961
+	NULL
1962
+}
1963
+
1964
+updateCors <- function(cors, G.call, I){
1965
+	k <- match("grandMean", batchNames(object))
1966
+	cors[, k] <- rowCors(I[[1]]*G.call, I[[2]]*G.call, na.rm=TRUE)
1967
+	if(call==1){
1968
+		corrAA(object)[index, ] <- cors
1969
+	}
1970
+	if(call==2){
1971
+		corrAB(object)[index, ] <- cors
1972
+	}
1973
+	if(call==3){
1974
+		corrBB(object)[index, ] <- cors
1975
+	}
1976
+}
1977
+
1978
+updateStats <- function(stats, Ns, object, call, allele, tau2, index){
1979
+	if(call==1){
1980
+		Ns.AA(object)[index, ] <- Ns
1981
+		if(allele=="A"){
1982
+			medianA.AA(object)[index, k] <- stats[, 1]
1983
+			madA.AA(object)[index, k] <- stats[, 2]
1984
+			updateTau(object, tau2, G.call, call, I, allele, index)
1985
+		} else {
1986
+			medianB.AA(object)[index, k] <- stats[, 1]
1987
+			madB.AA(object)[index, k] <- stats[, 2]
1988
+			updateTau(object, tau2, G.call, call, I, allele, index)
1989
+		}
1990
+	}
1991
+	if(call==2){
1992
+		Ns.AB(object)[index, ] <- Ns
1993
+		if(allele=="A"){
1994
+			medianA.AB(object)[index, k] <- stats[, 1]
1995
+			madA.AB(object)[index, k] <- stats[, 2]
1996
+		} else {
1997
+			medianB.AB(object)[index, k] <- stats[, 1]
1998
+			madB.AB(object)[index, k] <- stats[, 2]
1999
+		}
2000
+	}
2001
+	if(call==3){
2002
+		Ns.BB(object)[index, ] <- Ns
2003
+		if(allele=="A"){
2004
+			medianA.BB(object)[index, k] <- stats[, 1]
2005
+			madA.BB(object)[index, k] <- stats[, 2]
2006
+			updateTau(object, tau2, G.call, call, I, allele, index)
2007
+		} else {
2008
+			medianB.BB(object)[index, k] <- stats[, 1]
2009
+			madB.BB(object)[index, k] <- stats[, 2]
2010
+			updateTau(object, tau2, G.call, call, I, allele, index)
2011
+		}
2012
+	}
2013
+}
2014
+
1903 2015
 crlmmCopynumber <- function(object,
1904 2016
 			    MIN.SAMPLES=10,
1905 2017
 			    SNRMin=5,