Browse code

Merge branch 'collab'

* collab:
fixing vignettes dir
fixing vignettes dir
removing old vignettes
removing old vignettes
Adding GGdata to suggests
Fixed vignette for genotyping + association; Moved oligoClasses back to Depends b/c final users do need access to calls()/confs() when using crlmm()

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64350 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 23/03/2012 17:35:35
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,183 @@
1
+%\VignetteIndexEntry{From Genotypes to Association}
2
+%\VignetteKeywords{genotype, crlmm, SNP 5, SNP 6}
3
+%\VignettePackage{crlmm}
4
+
5
+\documentclass[12pt]{article}
6
+
7
+\usepackage{amsmath}
8
+\usepackage[authoryear,round]{natbib}
9
+\usepackage{hyperref}
10
+
11
+
12
+\textwidth=6.2in
13
+\textheight=8.5in
14
+%\parskip=.3cm
15
+\oddsidemargin=.1in
16
+\evensidemargin=.1in
17
+\headheight=-.3in
18
+
19
+\newcommand{\scscst}{\scriptscriptstyle}
20
+\newcommand{\scst}{\scriptstyle}
21
+
22
+
23
+\newcommand{\Rfunction}[1]{{\texttt{#1}}}
24
+\newcommand{\Robject}[1]{{\texttt{#1}}}
25
+\newcommand{\Rpackage}[1]{{\textit{#1}}}
26
+\newcommand{\Rmethod}[1]{{\texttt{#1}}}
27
+\newcommand{\Rfunarg}[1]{{\texttt{#1}}}
28
+\newcommand{\Rclass}[1]{{\textit{#1}}}
29
+
30
+\textwidth=6.2in
31
+
32
+\bibliographystyle{plainnat}
33
+
34
+\begin{document}
35
+%\setkeys{Gin}{width=0.55\textwidth}
36
+
37
+\title{crlmm to downstream data analysis}
38
+\author{VJ Carey, B Carvalho}
39
+\date{March, 2012}
40
+\maketitle
41
+
42
+\section{Running CRLMM on a nontrivial set of CEL files}
43
+To use the \Rmethod{crlmm} algorithm, the user must load the
44
+\Rpackage{crlmm} package, as described below:
45
+<<loadPkg>>=
46
+library(crlmm)
47
+@
48
+
49
+We work with the 90 CEU samples hybridized to Affy 6.0 chips. When CEL
50
+files are available, they must be identified and passed to
51
+\Rmethod{crlmm}, as shown below. In this example, we assume that the
52
+results are stored in a variable called \Robject{crlmmResult}.
53
+<<lkd, eval=FALSE>>=
54
+celFiles <- list.celfiles()
55
+crlmmResult <- crlmm(celFiles)
56
+@
57
+
58
+Alternatively, the data aforementioned are available through the
59
+\Rpackage{hapmapsnp6} package (required minimum version $1.3.6$) and can
60
+be loaded by using:
61
+
62
+<<loadFromPkg>>=
63
+suppressPackageStartupMessages(library(hapmapsnp6))
64
+data(crlmmResult)
65
+@
66
+
67
+This is currently a \Rclass{SnpSet} object.
68
+<<lkj21>>=
69
+  class(crlmmResult)
70
+@
71
+
72
+%% In order to reduce the memory requirements for this task, we will use
73
+%% only results for chromosome 20.
74
+%%
75
+%% <<getSubset>>=
76
+%% @
77
+
78
+\section{Adding information to a \Rclass{SnpSet}}
79
+
80
+We will use the \Rpackage{GGdata} package to obtain extra information
81
+on the samples. This will be later used when building an \Rclass{eSet}
82
+extension to store the genotyping results.
83
+<<getpd>>=
84
+  suppressPackageStartupMessages(library(GGdata))
85
+  hmceuB36 <- getSS('GGdata', as.character(1:22))
86
+  pd <- phenoData(hmceuB36)
87
+  ggn <- sampleNames(pd)
88
+  preSN <- sampleNames(crlmmResult)
89
+  simpSN <- gsub("_.*", "", preSN)
90
+  if (!all.equal(simpSN, ggn)) stop("align GGdata phenoData with crlmmResult read")
91
+@
92
+
93
+The additional information obtained from \Rpackage{GGdata} can be
94
+easily combined to what is already available on \Robject{crlmmResult}.
95
+<<docl>>=
96
+  sampleNames(crlmmResult) <- simpSN
97
+  phenoData(crlmmResult) <- combine(pd, phenoData(crlmmResult))
98
+  dim(calls(crlmmResult))
99
+  dim(confs(crlmmResult, FALSE))
100
+  calls(crlmmResult)[1:10, 1:2]
101
+  confs(crlmmResult, FALSE)[1:10, 1:2]
102
+@
103
+
104
+
105
+\section{Coercing to SnpMatrix as a prelude to a GWAS}
106
+
107
+From this point on, we will use only the genotype calls. Therefore, to
108
+reduce memory requirements, we will recode the \Rpackage{crlmm} genotype
109
+calls, so the \Rpackage{snpStats} package can be used, and delete the
110
+remaining \Rmethod{crlmm} results.
111
+<<clean>>=
112
+theCalls <- t(calls(crlmmResult))-1L
113
+rm(crlmmResult)
114
+@
115
+
116
+<<morecleaning, echo=FALSE>>=
117
+gc()
118
+@
119
+
120
+SNP's for which all the samples have the same genotype are not
121
+informative for association studies. Therefore, we remove such SNP's
122
+prior to fitting the models.
123
+
124
+<<rmNonInformative>>=
125
+gtypeCounts <- rbind(AA=colSums(theCalls == 0L),
126
+                     AB=colSums(theCalls == 1L),
127
+                     BB=colSums(theCalls == 2L))
128
+gtypeCounts[, 1:5]
129
+toRemove <- which(colSums(gtypeCounts == 0) == 2L)
130
+gtypeCounts[, toRemove[1:4]]
131
+theCalls <- theCalls[, -toRemove]
132
+@
133
+
134
+The \Rpackage{snpStats} provides tools to simplify the analysis of
135
+GWAS. The snippet below shows how to load the package and convert the
136
+genotype calls to a format that \Rpackage{snpStats} is able to handle.
137
+<<lksnm>>=
138
+suppressPackageStartupMessages(library(snpStats))
139
+crlmmSM <- new("SnpMatrix", theCalls)
140
+crlmmSM
141
+@
142
+
143
+\section{Conducting a GWAS}
144
+
145
+We want to find SNP for which genotype is predictive of expression of CPNE1.
146
+We will use expression data available from GGdata, using a naive analysis.
147
+<<doa>>=
148
+suppressPackageStartupMessages(library(illuminaHumanv1.db))
149
+rmm <- revmap(illuminaHumanv1SYMBOL)
150
+mypr <- get("CPNE1", rmm)
151
+ex <- as.numeric(exprs(hmceuB36)[mypr[1],])
152
+subjdata <- pData(hmceuB36)
153
+subjdata[["ex"]] <- ex
154
+head(subjdata)
155
+@
156
+
157
+With the expression data now available in \Robject{subjdata}, we can use
158
+the tools from \Rpackage{SnpMatrix} to fit models that will be used to
159
+evaluate the association between the genotypes of each available SNP and
160
+the expression levels of CPNE1.
161
+<<model>>=
162
+gwas <- snp.rhs.tests(ex~male, data=subjdata, snp.data=crlmmSM, family="gaussian")
163
+ok <- which(p.value(gwas) < 1e-10)
164
+gwas[ok,]
165
+@
166
+
167
+<<dopl,fig=TRUE>>=
168
+snp <- names(gwas[ok,])[1]
169
+gtypes <- theCalls[,snp]+1L
170
+boxplot(ex~gtypes, xlab=paste("Genotype Call for", snp),
171
+        ylab="CPNE1 Expression", xaxt="n", range=0)
172
+points(ex~jitter(gtypes), col=gtypes, pch=19)
173
+axis(1, at=1:3, labels=c("AA", "AB", "BB"))
174
+@
175
+
176
+\section{Session Info}
177
+
178
+This vignette was created using the following packages:
179
+<<lksess>>=
180
+sessionInfo()
181
+@
182
+
183
+\end{document}