* collab:
fixing vignettes dir
fixing vignettes dir
removing old vignettes
removing old vignettes
Adding GGdata to suggests
Fixed vignette for genotyping + association; Moved oligoClasses back to Depends b/c final users do need access to calls()/confs() when using crlmm()
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64350 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,183 @@ |
1 |
+%\VignetteIndexEntry{From Genotypes to Association} |
|
2 |
+%\VignetteKeywords{genotype, crlmm, SNP 5, SNP 6} |
|
3 |
+%\VignettePackage{crlmm} |
|
4 |
+ |
|
5 |
+\documentclass[12pt]{article} |
|
6 |
+ |
|
7 |
+\usepackage{amsmath} |
|
8 |
+\usepackage[authoryear,round]{natbib} |
|
9 |
+\usepackage{hyperref} |
|
10 |
+ |
|
11 |
+ |
|
12 |
+\textwidth=6.2in |
|
13 |
+\textheight=8.5in |
|
14 |
+%\parskip=.3cm |
|
15 |
+\oddsidemargin=.1in |
|
16 |
+\evensidemargin=.1in |
|
17 |
+\headheight=-.3in |
|
18 |
+ |
|
19 |
+\newcommand{\scscst}{\scriptscriptstyle} |
|
20 |
+\newcommand{\scst}{\scriptstyle} |
|
21 |
+ |
|
22 |
+ |
|
23 |
+\newcommand{\Rfunction}[1]{{\texttt{#1}}} |
|
24 |
+\newcommand{\Robject}[1]{{\texttt{#1}}} |
|
25 |
+\newcommand{\Rpackage}[1]{{\textit{#1}}} |
|
26 |
+\newcommand{\Rmethod}[1]{{\texttt{#1}}} |
|
27 |
+\newcommand{\Rfunarg}[1]{{\texttt{#1}}} |
|
28 |
+\newcommand{\Rclass}[1]{{\textit{#1}}} |
|
29 |
+ |
|
30 |
+\textwidth=6.2in |
|
31 |
+ |
|
32 |
+\bibliographystyle{plainnat} |
|
33 |
+ |
|
34 |
+\begin{document} |
|
35 |
+%\setkeys{Gin}{width=0.55\textwidth} |
|
36 |
+ |
|
37 |
+\title{crlmm to downstream data analysis} |
|
38 |
+\author{VJ Carey, B Carvalho} |
|
39 |
+\date{March, 2012} |
|
40 |
+\maketitle |
|
41 |
+ |
|
42 |
+\section{Running CRLMM on a nontrivial set of CEL files} |
|
43 |
+To use the \Rmethod{crlmm} algorithm, the user must load the |
|
44 |
+\Rpackage{crlmm} package, as described below: |
|
45 |
+<<loadPkg>>= |
|
46 |
+library(crlmm) |
|
47 |
+@ |
|
48 |
+ |
|
49 |
+We work with the 90 CEU samples hybridized to Affy 6.0 chips. When CEL |
|
50 |
+files are available, they must be identified and passed to |
|
51 |
+\Rmethod{crlmm}, as shown below. In this example, we assume that the |
|
52 |
+results are stored in a variable called \Robject{crlmmResult}. |
|
53 |
+<<lkd, eval=FALSE>>= |
|
54 |
+celFiles <- list.celfiles() |
|
55 |
+crlmmResult <- crlmm(celFiles) |
|
56 |
+@ |
|
57 |
+ |
|
58 |
+Alternatively, the data aforementioned are available through the |
|
59 |
+\Rpackage{hapmapsnp6} package (required minimum version $1.3.6$) and can |
|
60 |
+be loaded by using: |
|
61 |
+ |
|
62 |
+<<loadFromPkg>>= |
|
63 |
+suppressPackageStartupMessages(library(hapmapsnp6)) |
|
64 |
+data(crlmmResult) |
|
65 |
+@ |
|
66 |
+ |
|
67 |
+This is currently a \Rclass{SnpSet} object. |
|
68 |
+<<lkj21>>= |
|
69 |
+ class(crlmmResult) |
|
70 |
+@ |
|
71 |
+ |
|
72 |
+%% In order to reduce the memory requirements for this task, we will use |
|
73 |
+%% only results for chromosome 20. |
|
74 |
+%% |
|
75 |
+%% <<getSubset>>= |
|
76 |
+%% @ |
|
77 |
+ |
|
78 |
+\section{Adding information to a \Rclass{SnpSet}} |
|
79 |
+ |
|
80 |
+We will use the \Rpackage{GGdata} package to obtain extra information |
|
81 |
+on the samples. This will be later used when building an \Rclass{eSet} |
|
82 |
+extension to store the genotyping results. |
|
83 |
+<<getpd>>= |
|
84 |
+ suppressPackageStartupMessages(library(GGdata)) |
|
85 |
+ hmceuB36 <- getSS('GGdata', as.character(1:22)) |
|
86 |
+ pd <- phenoData(hmceuB36) |
|
87 |
+ ggn <- sampleNames(pd) |
|
88 |
+ preSN <- sampleNames(crlmmResult) |
|
89 |
+ simpSN <- gsub("_.*", "", preSN) |
|
90 |
+ if (!all.equal(simpSN, ggn)) stop("align GGdata phenoData with crlmmResult read") |
|
91 |
+@ |
|
92 |
+ |
|
93 |
+The additional information obtained from \Rpackage{GGdata} can be |
|
94 |
+easily combined to what is already available on \Robject{crlmmResult}. |
|
95 |
+<<docl>>= |
|
96 |
+ sampleNames(crlmmResult) <- simpSN |
|
97 |
+ phenoData(crlmmResult) <- combine(pd, phenoData(crlmmResult)) |
|
98 |
+ dim(calls(crlmmResult)) |
|
99 |
+ dim(confs(crlmmResult, FALSE)) |
|
100 |
+ calls(crlmmResult)[1:10, 1:2] |
|
101 |
+ confs(crlmmResult, FALSE)[1:10, 1:2] |
|
102 |
+@ |
|
103 |
+ |
|
104 |
+ |
|
105 |
+\section{Coercing to SnpMatrix as a prelude to a GWAS} |
|
106 |
+ |
|
107 |
+From this point on, we will use only the genotype calls. Therefore, to |
|
108 |
+reduce memory requirements, we will recode the \Rpackage{crlmm} genotype |
|
109 |
+calls, so the \Rpackage{snpStats} package can be used, and delete the |
|
110 |
+remaining \Rmethod{crlmm} results. |
|
111 |
+<<clean>>= |
|
112 |
+theCalls <- t(calls(crlmmResult))-1L |
|
113 |
+rm(crlmmResult) |
|
114 |
+@ |
|
115 |
+ |
|
116 |
+<<morecleaning, echo=FALSE>>= |
|
117 |
+gc() |
|
118 |
+@ |
|
119 |
+ |
|
120 |
+SNP's for which all the samples have the same genotype are not |
|
121 |
+informative for association studies. Therefore, we remove such SNP's |
|
122 |
+prior to fitting the models. |
|
123 |
+ |
|
124 |
+<<rmNonInformative>>= |
|
125 |
+gtypeCounts <- rbind(AA=colSums(theCalls == 0L), |
|
126 |
+ AB=colSums(theCalls == 1L), |
|
127 |
+ BB=colSums(theCalls == 2L)) |
|
128 |
+gtypeCounts[, 1:5] |
|
129 |
+toRemove <- which(colSums(gtypeCounts == 0) == 2L) |
|
130 |
+gtypeCounts[, toRemove[1:4]] |
|
131 |
+theCalls <- theCalls[, -toRemove] |
|
132 |
+@ |
|
133 |
+ |
|
134 |
+The \Rpackage{snpStats} provides tools to simplify the analysis of |
|
135 |
+GWAS. The snippet below shows how to load the package and convert the |
|
136 |
+genotype calls to a format that \Rpackage{snpStats} is able to handle. |
|
137 |
+<<lksnm>>= |
|
138 |
+suppressPackageStartupMessages(library(snpStats)) |
|
139 |
+crlmmSM <- new("SnpMatrix", theCalls) |
|
140 |
+crlmmSM |
|
141 |
+@ |
|
142 |
+ |
|
143 |
+\section{Conducting a GWAS} |
|
144 |
+ |
|
145 |
+We want to find SNP for which genotype is predictive of expression of CPNE1. |
|
146 |
+We will use expression data available from GGdata, using a naive analysis. |
|
147 |
+<<doa>>= |
|
148 |
+suppressPackageStartupMessages(library(illuminaHumanv1.db)) |
|
149 |
+rmm <- revmap(illuminaHumanv1SYMBOL) |
|
150 |
+mypr <- get("CPNE1", rmm) |
|
151 |
+ex <- as.numeric(exprs(hmceuB36)[mypr[1],]) |
|
152 |
+subjdata <- pData(hmceuB36) |
|
153 |
+subjdata[["ex"]] <- ex |
|
154 |
+head(subjdata) |
|
155 |
+@ |
|
156 |
+ |
|
157 |
+With the expression data now available in \Robject{subjdata}, we can use |
|
158 |
+the tools from \Rpackage{SnpMatrix} to fit models that will be used to |
|
159 |
+evaluate the association between the genotypes of each available SNP and |
|
160 |
+the expression levels of CPNE1. |
|
161 |
+<<model>>= |
|
162 |
+gwas <- snp.rhs.tests(ex~male, data=subjdata, snp.data=crlmmSM, family="gaussian") |
|
163 |
+ok <- which(p.value(gwas) < 1e-10) |
|
164 |
+gwas[ok,] |
|
165 |
+@ |
|
166 |
+ |
|
167 |
+<<dopl,fig=TRUE>>= |
|
168 |
+snp <- names(gwas[ok,])[1] |
|
169 |
+gtypes <- theCalls[,snp]+1L |
|
170 |
+boxplot(ex~gtypes, xlab=paste("Genotype Call for", snp), |
|
171 |
+ ylab="CPNE1 Expression", xaxt="n", range=0) |
|
172 |
+points(ex~jitter(gtypes), col=gtypes, pch=19) |
|
173 |
+axis(1, at=1:3, labels=c("AA", "AB", "BB")) |
|
174 |
+@ |
|
175 |
+ |
|
176 |
+\section{Session Info} |
|
177 |
+ |
|
178 |
+This vignette was created using the following packages: |
|
179 |
+<<lksess>>= |
|
180 |
+sessionInfo() |
|
181 |
+@ |
|
182 |
+ |
|
183 |
+\end{document} |