Browse code

Merge branch 'collab'

* collab:
fixing vignettes dir
fixing vignettes dir
removing old vignettes
removing old vignettes
Adding GGdata to suggests
Fixed vignette for genotyping + association; Moved oligoClasses back to Depends b/c final users do need access to calls()/confs() when using crlmm()

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64350 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 23/03/2012 17:35:35
Showing19 changed files

... ...
@@ -1,15 +1,14 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.13.14
4
+Version: 1.13.19
5 5
 Author: Benilton S Carvalho, Robert Scharpf, Matt Ritchie, Ingo Ruczinski, Rafael A Irizarry
6 6
 Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
8 8
 License: Artistic-2.0
9
-Depends: R (>= 2.14.0)
9
+Depends: R (>= 2.14.0), oligoClasses (>= 1.17.36)
10 10
 Imports: methods,
11 11
          Biobase (>= 2.15.4),
12
-         oligoClasses (>= 1.17.36),
13 12
          BiocGenerics,
14 13
          affyio (>= 1.23.2),
15 14
          ellipse,
... ...
@@ -24,6 +23,7 @@ Imports: methods,
24 23
 	 ff
25 24
 Suggests: hapmapsnp6,
26 25
           genomewidesnp6Crlmm (>= 1.0.4),
26
+          GGdata,
27 27
           snpStats,
28 28
 	  ellipse,
29 29
 	  RUnit
30 30
deleted file mode 100644
31 31
Binary files a/inst/doc/IlluminaPreprocessCN.pdf and /dev/null differ
32 32
deleted file mode 100644
33 33
Binary files a/inst/doc/crlmmDownstream.pdf and /dev/null differ
34 34
deleted file mode 100644
... ...
@@ -1,128 +0,0 @@
1
-%\VignetteIndexEntry{crlmm Vignette - Downstream Analysis}
2
-%\VignetteKeywords{genotype, crlmm, SNP 5, SNP 6}
3
-%\VignettePackage{crlmm}
4
-
5
-\documentclass[12pt]{article}
6
-
7
-\usepackage{amsmath,pstricks}
8
-\usepackage[authoryear,round]{natbib}
9
-\usepackage{hyperref}
10
-
11
-
12
-\textwidth=6.2in
13
-\textheight=8.5in
14
-%\parskip=.3cm
15
-\oddsidemargin=.1in
16
-\evensidemargin=.1in
17
-\headheight=-.3in
18
-
19
-\newcommand{\scscst}{\scriptscriptstyle}
20
-\newcommand{\scst}{\scriptstyle}
21
-
22
-
23
-\newcommand{\Rfunction}[1]{{\texttt{#1}}}
24
-\newcommand{\Robject}[1]{{\texttt{#1}}}
25
-\newcommand{\Rpackage}[1]{{\textit{#1}}}
26
-\newcommand{\Rmethod}[1]{{\texttt{#1}}}
27
-\newcommand{\Rfunarg}[1]{{\texttt{#1}}}
28
-\newcommand{\Rclass}[1]{{\textit{#1}}}
29
-
30
-\textwidth=6.2in
31
-
32
-\bibliographystyle{plainnat} 
33
- 
34
-\begin{document}
35
-%\setkeys{Gin}{width=0.55\textwidth}
36
-
37
-\title{crlmm to downstream data analysis}
38
-\author{VJ Carey}
39
-\maketitle
40
-
41
-\section{Running CRLMM on a nontrivial set of CEL files}
42
-
43
-We work with the 90 CEU samples hybridized to Affy 6.0 chips, which
44
-are assumed to be in the current directory. First, we identify the
45
-files and run \Rmethod{crlmm}. The results will be saved to the
46
-variable \Robject{crlmmResult}.
47
-<<lkd>>=
48
-library(oligoClasses}
49
-library(crlmm)
50
-celFiles <- list.celfiles()
51
-celFiles[1:4]
52
-if (!exists("crlmmResult")) {
53
- if (file.exists("crlmmResult.rda")) load("crlmmResult.rda")
54
- else {
55
-    crlmmResult <- crlmm(celFiles)
56
-    save(crlmmResult, file="crlmmResult.rda")
57
-    }
58
-}
59
-@
60
-
61
-This is currently a \Rclass{SnpSet} object.
62
-<<lkj21>>=
63
-  class(crlmmResult)
64
-@
65
-
66
-\section{Adding information to a \Rclass{SnpSet}}
67
-
68
-We will use the \Rpackage{GGdata} package to obtain extra information
69
-on the samples. This will be later used when building an \Rclass{eSet}
70
-extension to store the genotyping results.
71
-<<getpd>>=
72
-  library(GGdata)
73
-  if (!exists("hmceuB36")) data(hmceuB36)
74
-  pd <- phenoData(hmceuB36)
75
-  ggn <- sampleNames(pd)
76
-  preSN <- sampleNames(crlmmResult)
77
-  simpSN <- gsub("_.*", "", preSN)
78
-  if (!all.equal(simpSN, ggn)) stop("align GGdata phenoData with crlmmResult read")
79
-@ 
80
-
81
-The additional information obtained from \Rpackage{GGdata} can be
82
-easily combined to what is already available on \Robject{crlmmResult}.
83
-<<docl>>=
84
-  sampleNames(crlmmResult) <- simpSN
85
-  phenoData(crlmmResult) <- combine(pd, phenoData(crlmmResult))
86
-  dim(calls(crlmmResult))
87
-  dim(confs(crlmmResult))
88
-  calls(crlmmResult)[1:10, 1:2]
89
-  confs(crlmmResult)[1:10, 1:2]
90
-@
91
-
92
-\section{Coercing to snp.matrix as a prelude to a GWAS}
93
-
94
-<<lksnm>>=
95
-library(snpMatrix)
96
-crlmmSM <- as(t(calls(crlmmResult))-1, "snp.matrix")
97
-crlmmSM
98
-@
99
-
100
-\section{Conducting a GWAS}
101
-
102
-We want to find SNP for which rare allele count is predictive of expression of CPNE1.
103
-We will use expression data available from GGdata.  This is a very naive analysis.
104
-<<doa>>=
105
-library(illuminaHumanv1.db)
106
-rmm <- revmap(illuminaHumanv1SYMBOL)
107
-mypr <- get("CPNE1", rmm)
108
-ex <- as.numeric(exprs(hmceuB36)[mypr[1],])
109
-subjdata <- pData(hmceuB36)
110
-subjdata[["ex"]] <- ex
111
-gwas <- snp.rhs.tests(ex~male, data=subjdata, snp.data=crlmmSM, family="gaussian")
112
-ok <- which(p.value(gwas) < 1e-10)
113
-gwas[ok,]
114
-<<dopl,fig=TRUE>>=
115
-plot(ex~calls(crlmmResult)["SNP_A-4208858",],
116
-     xlab="Genotype Call for SNP_A-4208858",
117
-     ylab="Expression", xaxt="n")
118
-axis(1, at=1:3, labels=c("AA", "AB", "BB"))
119
-@
120
-
121
-\section{Session Info}
122
-
123
-This vignette was created using the following packages:
124
-<<lksess>>=
125
-sessionInfo()
126
-@
127
-
128
-\end{document}
129 0
deleted file mode 100644
130 1
Binary files a/inst/scripts/crlmmDownstream.pdf and /dev/null differ
131 2
new file mode 100644
... ...
@@ -0,0 +1,183 @@
1
+%\VignetteIndexEntry{From Genotypes to Association}
2
+%\VignetteKeywords{genotype, crlmm, SNP 5, SNP 6}
3
+%\VignettePackage{crlmm}
4
+
5
+\documentclass[12pt]{article}
6
+
7
+\usepackage{amsmath}
8
+\usepackage[authoryear,round]{natbib}
9
+\usepackage{hyperref}
10
+
11
+
12
+\textwidth=6.2in
13
+\textheight=8.5in
14
+%\parskip=.3cm
15
+\oddsidemargin=.1in
16
+\evensidemargin=.1in
17
+\headheight=-.3in
18
+
19
+\newcommand{\scscst}{\scriptscriptstyle}
20
+\newcommand{\scst}{\scriptstyle}
21
+
22
+
23
+\newcommand{\Rfunction}[1]{{\texttt{#1}}}
24
+\newcommand{\Robject}[1]{{\texttt{#1}}}
25
+\newcommand{\Rpackage}[1]{{\textit{#1}}}
26
+\newcommand{\Rmethod}[1]{{\texttt{#1}}}
27
+\newcommand{\Rfunarg}[1]{{\texttt{#1}}}
28
+\newcommand{\Rclass}[1]{{\textit{#1}}}
29
+
30
+\textwidth=6.2in
31
+
32
+\bibliographystyle{plainnat}
33
+
34
+\begin{document}
35
+%\setkeys{Gin}{width=0.55\textwidth}
36
+
37
+\title{crlmm to downstream data analysis}
38
+\author{VJ Carey, B Carvalho}
39
+\date{March, 2012}
40
+\maketitle
41
+
42
+\section{Running CRLMM on a nontrivial set of CEL files}
43
+To use the \Rmethod{crlmm} algorithm, the user must load the
44
+\Rpackage{crlmm} package, as described below:
45
+<<loadPkg>>=
46
+library(crlmm)
47
+@
48
+
49
+We work with the 90 CEU samples hybridized to Affy 6.0 chips. When CEL
50
+files are available, they must be identified and passed to
51
+\Rmethod{crlmm}, as shown below. In this example, we assume that the
52
+results are stored in a variable called \Robject{crlmmResult}.
53
+<<lkd, eval=FALSE>>=
54
+celFiles <- list.celfiles()
55
+crlmmResult <- crlmm(celFiles)
56
+@
57
+
58
+Alternatively, the data aforementioned are available through the
59
+\Rpackage{hapmapsnp6} package (required minimum version $1.3.6$) and can
60
+be loaded by using:
61
+
62
+<<loadFromPkg>>=
63
+suppressPackageStartupMessages(library(hapmapsnp6))
64
+data(crlmmResult)
65
+@
66
+
67
+This is currently a \Rclass{SnpSet} object.
68
+<<lkj21>>=
69
+  class(crlmmResult)
70
+@
71
+
72
+%% In order to reduce the memory requirements for this task, we will use
73
+%% only results for chromosome 20.
74
+%%
75
+%% <<getSubset>>=
76
+%% @
77
+
78
+\section{Adding information to a \Rclass{SnpSet}}
79
+
80
+We will use the \Rpackage{GGdata} package to obtain extra information
81
+on the samples. This will be later used when building an \Rclass{eSet}
82
+extension to store the genotyping results.
83
+<<getpd>>=
84
+  suppressPackageStartupMessages(library(GGdata))
85
+  hmceuB36 <- getSS('GGdata', as.character(1:22))
86
+  pd <- phenoData(hmceuB36)
87
+  ggn <- sampleNames(pd)
88
+  preSN <- sampleNames(crlmmResult)
89
+  simpSN <- gsub("_.*", "", preSN)
90
+  if (!all.equal(simpSN, ggn)) stop("align GGdata phenoData with crlmmResult read")
91
+@
92
+
93
+The additional information obtained from \Rpackage{GGdata} can be
94
+easily combined to what is already available on \Robject{crlmmResult}.
95
+<<docl>>=
96
+  sampleNames(crlmmResult) <- simpSN
97
+  phenoData(crlmmResult) <- combine(pd, phenoData(crlmmResult))
98
+  dim(calls(crlmmResult))
99
+  dim(confs(crlmmResult, FALSE))
100
+  calls(crlmmResult)[1:10, 1:2]
101
+  confs(crlmmResult, FALSE)[1:10, 1:2]
102
+@
103
+
104
+
105
+\section{Coercing to SnpMatrix as a prelude to a GWAS}
106
+
107
+From this point on, we will use only the genotype calls. Therefore, to
108
+reduce memory requirements, we will recode the \Rpackage{crlmm} genotype
109
+calls, so the \Rpackage{snpStats} package can be used, and delete the
110
+remaining \Rmethod{crlmm} results.
111
+<<clean>>=
112
+theCalls <- t(calls(crlmmResult))-1L
113
+rm(crlmmResult)
114
+@
115
+
116
+<<morecleaning, echo=FALSE>>=
117
+gc()
118
+@
119
+
120
+SNP's for which all the samples have the same genotype are not
121
+informative for association studies. Therefore, we remove such SNP's
122
+prior to fitting the models.
123
+
124
+<<rmNonInformative>>=
125
+gtypeCounts <- rbind(AA=colSums(theCalls == 0L),
126
+                     AB=colSums(theCalls == 1L),
127
+                     BB=colSums(theCalls == 2L))
128
+gtypeCounts[, 1:5]
129
+toRemove <- which(colSums(gtypeCounts == 0) == 2L)
130
+gtypeCounts[, toRemove[1:4]]
131
+theCalls <- theCalls[, -toRemove]
132
+@
133
+
134
+The \Rpackage{snpStats} provides tools to simplify the analysis of
135
+GWAS. The snippet below shows how to load the package and convert the
136
+genotype calls to a format that \Rpackage{snpStats} is able to handle.
137
+<<lksnm>>=
138
+suppressPackageStartupMessages(library(snpStats))
139
+crlmmSM <- new("SnpMatrix", theCalls)
140
+crlmmSM
141
+@
142
+
143
+\section{Conducting a GWAS}
144
+
145
+We want to find SNP for which genotype is predictive of expression of CPNE1.
146
+We will use expression data available from GGdata, using a naive analysis.
147
+<<doa>>=
148
+suppressPackageStartupMessages(library(illuminaHumanv1.db))
149
+rmm <- revmap(illuminaHumanv1SYMBOL)
150
+mypr <- get("CPNE1", rmm)
151
+ex <- as.numeric(exprs(hmceuB36)[mypr[1],])
152
+subjdata <- pData(hmceuB36)
153
+subjdata[["ex"]] <- ex
154
+head(subjdata)
155
+@
156
+
157
+With the expression data now available in \Robject{subjdata}, we can use
158
+the tools from \Rpackage{SnpMatrix} to fit models that will be used to
159
+evaluate the association between the genotypes of each available SNP and
160
+the expression levels of CPNE1.
161
+<<model>>=
162
+gwas <- snp.rhs.tests(ex~male, data=subjdata, snp.data=crlmmSM, family="gaussian")
163
+ok <- which(p.value(gwas) < 1e-10)
164
+gwas[ok,]
165
+@
166
+
167
+<<dopl,fig=TRUE>>=
168
+snp <- names(gwas[ok,])[1]
169
+gtypes <- theCalls[,snp]+1L
170
+boxplot(ex~gtypes, xlab=paste("Genotype Call for", snp),
171
+        ylab="CPNE1 Expression", xaxt="n", range=0)
172
+points(ex~jitter(gtypes), col=gtypes, pch=19)
173
+axis(1, at=1:3, labels=c("AA", "AB", "BB"))
174
+@
175
+
176
+\section{Session Info}
177
+
178
+This vignette was created using the following packages:
179
+<<lksess>>=
180
+sessionInfo()
181
+@
182
+
183
+\end{document}
0 184
new file mode 100644
1 185
Binary files /dev/null and b/inst/scripts/gtypeDownstream.pdf differ
2 186
similarity index 100%
3 187
rename from inst/doc/AffyGW.Rnw
4 188
rename to vignettes/AffyGW.Rnw
5 189
new file mode 100644
6 190
Binary files /dev/null and b/vignettes/AffyGW.pdf differ
7 191
similarity index 100%
8 192
rename from inst/doc/CopyNumberOverview.Rnw
9 193
rename to vignettes/CopyNumberOverview.Rnw
10 194
similarity index 100%
11 195
rename from inst/doc/IlluminaPreprocessCN.Rnw
12 196
rename to vignettes/IlluminaPreprocessCN.Rnw
13 197
new file mode 100644
14 198
Binary files /dev/null and b/vignettes/IlluminaPreprocessCN.pdf differ
15 199
similarity index 100%
16 200
rename from inst/doc/Infrastructure.Rnw
17 201
rename to vignettes/Infrastructure.Rnw
18 202
similarity index 100%
19 203
rename from inst/doc/Infrastructure.pdf
20 204
rename to vignettes/Infrastructure.pdf
21 205
similarity index 60%
22 206
rename from inst/doc/Makefile
23 207
rename to vignettes/Makefile
... ...
@@ -2,10 +2,10 @@ All: vignettes clean
2 2
 
3 3
 ## all pdfs must be generated from the first target of the Makefile
4 4
 vignettes: AffyGW.tex
5
-	cp ../scripts/AffyGW.pdf .
6
-	cp ../scripts/IlluminaPreprocessCN.pdf .
7
-	cp ../scripts/Infrastructure.pdf .
8
-	cp ../scripts/crlmmDownstream.pdf .
5
+	cp ../inst/scripts/AffyGW.pdf .
6
+	cp ../inst/scripts/IlluminaPreprocessCN.pdf .
7
+	cp ../inst/scripts/Infrastructure.pdf .
8
+	cp ../inst/scripts/gtypeDownstream.pdf .
9 9
 	texi2dvi --pdf genotyping.tex
10 10
 	texi2dvi --pdf CopyNumberOverview.tex
11 11
 
12 12
similarity index 100%
13 13
rename from inst/doc/crlmmIllumina.pdf
14 14
rename to vignettes/crlmmIllumina.pdf
15 15
similarity index 100%
16 16
rename from inst/doc/genotyping.Rnw
17 17
rename to vignettes/genotyping.Rnw
18 18
similarity index 100%
19 19
rename from inst/doc/crlmmDownstream.Rnw
20 20
rename to vignettes/gtypeDownstream.Rnw
21 21
new file mode 100644
22 22
Binary files /dev/null and b/vignettes/gtypeDownstream.pdf differ