git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@54390 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.9.27 |
|
4 |
+Version: 1.9.28 |
|
5 | 5 |
Date: 2010-12-10 |
6 | 6 |
Author: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry |
7 | 7 |
Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -1821,72 +1821,98 @@ summarizeSnps <- function(strata, |
1821 | 1821 |
##--------------------------------------------------------------------------- |
1822 | 1822 |
## grand mean |
1823 | 1823 |
##--------------------------------------------------------------------------- |
1824 |
- if(length(batches) > 1 && "grandMean" %in% batchNames(object)){ |
|
1825 |
- ##k <- k+1 |
|
1826 |
- k <- match("grandMean", batchNames(object)) |
|
1827 |
- if(verbose) message(" computing grand means...") |
|
1828 |
- ##G <- GG[, B] |
|
1829 |
- ##NORM <- normal.index[, k] |
|
1830 |
- ##xx <- CP[, B] |
|
1831 |
- ##highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1832 |
- highConf <- (1-exp(-CP/1000)) > GT.CONF.THR |
|
1833 |
- ##G <- G*highConf |
|
1834 |
- ## Some markers may have genotype confidences scores that are ALL below the threshold |
|
1835 |
- ## For these markers, provide statistical summaries based on all the samples and flag |
|
1836 |
- ## Provide summaries for these markers and flag to indicate the problem |
|
1837 |
- ii <- which(rowSums(highConf) == 0) |
|
1838 |
- if(length(ii) > 0) highConf[ii, ] <- TRUE |
|
1839 |
- GG <- GG*highConf |
|
1840 |
- ## table(rowSums(G==0)) |
|
1841 |
- ##G <- G*highConf*NORM |
|
1842 |
-## A <- AA[, B] |
|
1843 |
-## B <- BB[, B] |
|
1844 |
- ## this can be time consuming...do only once |
|
1845 |
-## G.AA <- G==1 |
|
1846 |
- G.AA <- GG==1 |
|
1847 |
- G.AA[G.AA==FALSE] <- NA |
|
1848 |
- G.AB <- GG==2 |
|
1849 |
- G.AB[G.AB==FALSE] <- NA |
|
1850 |
- G.BB <- GG==3 |
|
1851 |
- G.BB[G.BB==FALSE] <- NA |
|
1852 |
- Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE) |
|
1853 |
- Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE) |
|
1854 |
- Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE) |
|
1855 |
- ## Calculate row medians and MADs |
|
1856 |
- stats <- summaryStats(G.AA, AA, FUNS=c("rowMedians", "rowMAD")) |
|
1857 |
- medianA.AA(object)[index, k] <- stats[, 1] |
|
1858 |
- madA.AA(object)[index, k] <- stats[, 2] |
|
1859 |
- stats <- summaryStats(G.AB, AA, FUNS=c("rowMedians", "rowMAD")) |
|
1860 |
- medianA.AB(object)[index, k] <- stats[, 1] |
|
1861 |
- madA.AB(object)[index, k] <- stats[, 2] |
|
1862 |
- stats <- summaryStats(G.BB, AA, FUNS=c("rowMedians", "rowMAD")) |
|
1863 |
- medianA.BB(object)[index, k] <- stats[, 1] |
|
1864 |
- madA.BB(object)[index, k] <- stats[, 2] |
|
1865 |
- stats <- summaryStats(G.AA, BB, FUNS=c("rowMedians", "rowMAD")) |
|
1866 |
- medianB.AA(object)[index, k] <- stats[, 1] |
|
1867 |
- madB.AA(object)[index, k] <- stats[, 2] |
|
1868 |
- stats <- summaryStats(G.AB, BB, FUNS=c("rowMedians", "rowMAD")) |
|
1869 |
- medianB.AB(object)[index, k] <- stats[, 1] |
|
1870 |
- madB.AB(object)[index, k] <- stats[, 2] |
|
1871 |
- stats <- summaryStats(G.BB, BB, FUNS=c("rowMedians", "rowMAD")) |
|
1872 |
- medianB.BB(object)[index, k] <- stats[, 1] |
|
1873 |
- madB.BB(object)[index, k] <- stats[, 2] |
|
1874 |
- ## log2 Transform Intensities |
|
1875 |
- AA <- log2(AA); BB <- log2(BB) |
|
1876 |
- tau2A.AA[, k] <- summaryStats(G.AA, AA, FUNS="rowMAD")^2 |
|
1877 |
- tau2A.BB[, k] <- summaryStats(G.BB, AA, FUNS="rowMAD")^2 |
|
1878 |
- tau2B.AA[, k] <- summaryStats(G.AA, BB, FUNS="rowMAD")^2 |
|
1879 |
- tau2B.BB[, k] <- summaryStats(G.BB, BB, FUNS="rowMAD")^2 |
|
1880 |
- corrAA[, k] <- rowCors(AA*G.AA, BB*G.AA, na.rm=TRUE) |
|
1881 |
- corrAB[, k] <- rowCors(AA*G.AB, BB*G.AB, na.rm=TRUE) |
|
1882 |
- corrBB[, k] <- rowCors(AA*G.BB, BB*G.BB, na.rm=TRUE) |
|
1883 |
- ## |
|
1884 |
- ## TODO: fill in NAs -- use code from shrinkGenotypeSummaries |
|
1885 |
- ## |
|
1886 |
- rm(GG, CP, AA, BB, FL, stats) |
|
1887 |
- gc() |
|
1824 |
+ if(FALSE){ ## no implemented |
|
1825 |
+ if(length(batches) > 1 && "grandMean" %in% batchNames(object)){ |
|
1826 |
+ k <- match("grandMean", batchNames(object)) |
|
1827 |
+ if(verbose) message(" computing grand means...") |
|
1828 |
+ highConf <- (1-exp(-CP/1000)) > GT.CONF.THR |
|
1829 |
+ rm(CP); gc() |
|
1830 |
+ ## Some markers may have genotype confidences scores that are ALL below the threshold |
|
1831 |
+ ## For these markers, provide statistical summaries based on all the samples and flag |
|
1832 |
+ ## Provide summaries for these markers and flag to indicate the problem |
|
1833 |
+ ii <- which(rowSums(highConf) == 0) |
|
1834 |
+ if(length(ii) > 0) highConf[ii, ] <- TRUE |
|
1835 |
+ GG <- GG*highConf |
|
1836 |
+ rm(highConf); gc() |
|
1837 |
+ Ns <- list(Ns.AA, Ns.AB, Ns.BB) |
|
1838 |
+ rm(Ns.AA, Ns.AB, Ns.BB) ; gc() |
|
1839 |
+ I <- list(AA, BB) |
|
1840 |
+ lI <- lapply(I, log2) |
|
1841 |
+ cors <- list(corrAA, corrAB, corrBB) |
|
1842 |
+ rm(corrAA, corrAB, corrBB); gc() |
|
1843 |
+ rm(AA,BB); gc() |
|
1844 |
+ tau2 <- list(AA=list(tau2A.AA, |
|
1845 |
+ tau2B.AA), |
|
1846 |
+ AB=list(NULL, NULL), |
|
1847 |
+ BB=list(tau2A.BB, |
|
1848 |
+ tau2B.BB)) |
|
1849 |
+ rm(tau2A.AA, tau2B.AA, tau2A.BB, tau2B.BB); gc() |
|
1850 |
+ for(i in 1:3){ |
|
1851 |
+ G.call <- isCall(GG, call=i) |
|
1852 |
+ for(j in 1:2){ |
|
1853 |
+ computeSummary(object, |
|
1854 |
+ G.call, |
|
1855 |
+ call=i, |
|
1856 |
+ I=I[[j]], |
|
1857 |
+ allele=c("A", "B")[j], |
|
1858 |
+ Ns=Ns[[i]], |
|
1859 |
+ call=i, |
|
1860 |
+ tau2=tau2[[i]][[j]], |
|
1861 |
+ index=index) |
|
1862 |
+ } |
|
1863 |
+ updateCors(cors[[i]], G.call, lI) |
|
1864 |
+ } |
|
1865 |
+ ##I <- lapply(I, log2) |
|
1866 |
+ ##AA <- log2(AA) |
|
1867 |
+ ##BB <- log2(BB) |
|
1868 |
+ ## tau2A.AA[, k] <- summaryStats(G.AA, AA, FUNS="rowMAD")^2 |
|
1869 |
+ ## tau2A.BB[, k] <- summaryStats(G.BB, AA, FUNS="rowMAD")^2 |
|
1870 |
+ ##tau2B.AA[, k] <- summaryStats(G.AA, BB, FUNS="rowMAD")^2 |
|
1871 |
+ ##tau2B.BB[, k] <- summaryStats(G.BB, BB, FUNS="rowMAD")^2 |
|
1872 |
+ ## corrAA[, k] <- rowCors(AA*G.AA, BB*G.AA, na.rm=TRUE) |
|
1873 |
+ ## corrAB[, k] <- rowCors(AA*G.AB, BB*G.AB, na.rm=TRUE) |
|
1874 |
+ ## corrBB[, k] <- rowCors(AA*G.BB, BB*G.BB, na.rm=TRUE) |
|
1875 |
+ ## rm(AA, BB); gc() |
|
1876 |
+ ## |
|
1877 |
+ ## TODO: fill in NAs -- use code from shrinkGenotypeSummaries |
|
1878 |
+ ## |
|
1879 |
+ ## rm(GG, CP, AA, BB, FL, stats) |
|
1880 |
+ ## gc() |
|
1881 |
+ ## G.AA <- GG==1 |
|
1882 |
+ ## G.AA[G.AA==FALSE] <- NA |
|
1883 |
+ ## G.AB <- GG==2 |
|
1884 |
+ ## G.AB[G.AB==FALSE] <- NA |
|
1885 |
+ ## G.BB <- GG==3 |
|
1886 |
+ ## G.BB[G.BB==FALSE] <- NA |
|
1887 |
+ ## Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE) |
|
1888 |
+ ## Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE) |
|
1889 |
+ ## Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE) |
|
1890 |
+ ## N.AA(object)[index,] <- Ns.AA |
|
1891 |
+ ## N.AB(object)[index,] <- Ns.AB |
|
1892 |
+ ## N.BB(object)[index,] <- Ns.BB |
|
1893 |
+ |
|
1894 |
+ ## Calculate row medians and MADs |
|
1895 |
+ ## medianA.AA(object)[index, k] <- stats[, 1] |
|
1896 |
+ ## madA.AA(object)[index, k] <- stats[, 2] |
|
1897 |
+ ## stats <- summaryStats(G.AB, AA, FUNS=c("rowMedians", "rowMAD")) |
|
1898 |
+ ## medianA.AB(object)[index, k] <- stats[, 1] |
|
1899 |
+ ## madA.AB(object)[index, k] <- stats[, 2] |
|
1900 |
+ ## stats <- summaryStats(G.BB, AA, FUNS=c("rowMedians", "rowMAD")) |
|
1901 |
+ ## medianA.BB(object)[index, k] <- stats[, 1] |
|
1902 |
+ ## madA.BB(object)[index, k] <- stats[, 2] |
|
1903 |
+ ## stats <- summaryStats(G.AA, BB, FUNS=c("rowMedians", "rowMAD")) |
|
1904 |
+ ## medianB.AA(object)[index, k] <- stats[, 1] |
|
1905 |
+ ## madB.AA(object)[index, k] <- stats[, 2] |
|
1906 |
+ ## stats <- summaryStats(G.AB, BB, FUNS=c("rowMedians", "rowMAD")) |
|
1907 |
+ ## medianB.AB(object)[index, k] <- stats[, 1] |
|
1908 |
+ ## madB.AB(object)[index, k] <- stats[, 2] |
|
1909 |
+ ## stats <- summaryStats(G.BB, BB, FUNS=c("rowMedians", "rowMAD")) |
|
1910 |
+ ## medianB.BB(object)[index, k] <- stats[, 1] |
|
1911 |
+ ## madB.BB(object)[index, k] <- stats[, 2] |
|
1912 |
+ ## log2 Transform Intensities |
|
1913 |
+ } |
|
1888 | 1914 |
} |
1889 |
- if(verbose) message(" Begin writing...") |
|
1915 |
+ ## if(verbose) message(" Begin writing...") |
|
1890 | 1916 |
N.AA(object)[index,] <- Ns.AA |
1891 | 1917 |
N.AB(object)[index,] <- Ns.AB |
1892 | 1918 |
N.BB(object)[index,] <- Ns.BB |
... | ... |
@@ -1900,6 +1926,92 @@ summarizeSnps <- function(strata, |
1900 | 1926 |
if(is.lds) return(TRUE) else return(object) |
1901 | 1927 |
} |
1902 | 1928 |
|
1929 |
+isCall <- function(G, call){ |
|
1930 |
+ G.call <- G==call |
|
1931 |
+ G.call[G.call==FALSE] <- NA |
|
1932 |
+ G.call |
|
1933 |
+} |
|
1934 |
+ |
|
1935 |
+computeSummary(object, G.call, call, I, allele, Ns, call=1, index){ |
|
1936 |
+ k <- match("grandMean", batchNames(object)) |
|
1937 |
+ stats <- summaryStats(G.call, I, FUNS=c("rowMedians", "rowMAD")) |
|
1938 |
+ Ns[, k] <- rowSums(G.call, na.rm=TRUE) |
|
1939 |
+ updateStats(stats, Ns, object, call, allele, index) |
|
1940 |
+ gc() |
|
1941 |
+ return() |
|
1942 |
+} |
|
1943 |
+ |
|
1944 |
+updateTau <- function(object, tau2, G.call, call, I, allele, index){ |
|
1945 |
+ k <- match("grandMean", batchNames(object)) |
|
1946 |
+ logI <- log2(I) |
|
1947 |
+ rm(I); gc() |
|
1948 |
+ tau2[, k] <- summaryStats(G.call, logI, FUNS="rowMAD")^2 |
|
1949 |
+ if(call==1 & allele=="A"){ |
|
1950 |
+ tau2A.AA(object)[index, ] <- tau2 |
|
1951 |
+ } |
|
1952 |
+ if(call==1 & allele=="B"){ |
|
1953 |
+ tau2B.AA(object)[index, ] <- tau2 |
|
1954 |
+ } |
|
1955 |
+ if(call==3 & allele=="A"){ |
|
1956 |
+ tau2A.BB(object)[index, ] <- tau2 |
|
1957 |
+ } |
|
1958 |
+ if(call==3 & allele=="B"){ |
|
1959 |
+ tau2B.BB(object)[index, ] <- tau2 |
|
1960 |
+ } |
|
1961 |
+ NULL |
|
1962 |
+} |
|
1963 |
+ |
|
1964 |
+updateCors <- function(cors, G.call, I){ |
|
1965 |
+ k <- match("grandMean", batchNames(object)) |
|
1966 |
+ cors[, k] <- rowCors(I[[1]]*G.call, I[[2]]*G.call, na.rm=TRUE) |
|
1967 |
+ if(call==1){ |
|
1968 |
+ corrAA(object)[index, ] <- cors |
|
1969 |
+ } |
|
1970 |
+ if(call==2){ |
|
1971 |
+ corrAB(object)[index, ] <- cors |
|
1972 |
+ } |
|
1973 |
+ if(call==3){ |
|
1974 |
+ corrBB(object)[index, ] <- cors |
|
1975 |
+ } |
|
1976 |
+} |
|
1977 |
+ |
|
1978 |
+updateStats <- function(stats, Ns, object, call, allele, tau2, index){ |
|
1979 |
+ if(call==1){ |
|
1980 |
+ Ns.AA(object)[index, ] <- Ns |
|
1981 |
+ if(allele=="A"){ |
|
1982 |
+ medianA.AA(object)[index, k] <- stats[, 1] |
|
1983 |
+ madA.AA(object)[index, k] <- stats[, 2] |
|
1984 |
+ updateTau(object, tau2, G.call, call, I, allele, index) |
|
1985 |
+ } else { |
|
1986 |
+ medianB.AA(object)[index, k] <- stats[, 1] |
|
1987 |
+ madB.AA(object)[index, k] <- stats[, 2] |
|
1988 |
+ updateTau(object, tau2, G.call, call, I, allele, index) |
|
1989 |
+ } |
|
1990 |
+ } |
|
1991 |
+ if(call==2){ |
|
1992 |
+ Ns.AB(object)[index, ] <- Ns |
|
1993 |
+ if(allele=="A"){ |
|
1994 |
+ medianA.AB(object)[index, k] <- stats[, 1] |
|
1995 |
+ madA.AB(object)[index, k] <- stats[, 2] |
|
1996 |
+ } else { |
|
1997 |
+ medianB.AB(object)[index, k] <- stats[, 1] |
|
1998 |
+ madB.AB(object)[index, k] <- stats[, 2] |
|
1999 |
+ } |
|
2000 |
+ } |
|
2001 |
+ if(call==3){ |
|
2002 |
+ Ns.BB(object)[index, ] <- Ns |
|
2003 |
+ if(allele=="A"){ |
|
2004 |
+ medianA.BB(object)[index, k] <- stats[, 1] |
|
2005 |
+ madA.BB(object)[index, k] <- stats[, 2] |
|
2006 |
+ updateTau(object, tau2, G.call, call, I, allele, index) |
|
2007 |
+ } else { |
|
2008 |
+ medianB.BB(object)[index, k] <- stats[, 1] |
|
2009 |
+ madB.BB(object)[index, k] <- stats[, 2] |
|
2010 |
+ updateTau(object, tau2, G.call, call, I, allele, index) |
|
2011 |
+ } |
|
2012 |
+ } |
|
2013 |
+} |
|
2014 |
+ |
|
1903 | 2015 |
crlmmCopynumber <- function(object, |
1904 | 2016 |
MIN.SAMPLES=10, |
1905 | 2017 |
SNRMin=5, |