Browse code

Updated vignette.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/edge@102595 bc3139a8-67e5-0310-9ffc-ced21a209358

John D Storey authored on 16/04/2015 20:13:59
Showing 2 changed files

... ...
@@ -118,35 +118,59 @@ experiments, see section~\ref{sec:citepackage}.
118 118
 
119 119
 \section{Citing this package}
120 120
 \label{sec:citepackage}
121
-\textbf{\bibentry{storey:2007}} \\
122
-Theory paper that introduces the optimal discovery procedure and shows that it
123
-maximizes the expected true positive results for each number of fixed false
124
-positive results. The optimality is closely related to the false discovery rate.
125 121
 
126
-\textbf{\bibentry{storey:etal:2007}} \\
127
-Discusses various ways of estimating the ODP statistic with applications to
128
-microarray experiments.
122
+{\bf When reporting results involving the estimation of false discovery rate or q-value quantities, please cite:}
129 123
 
130
-\textbf{\bibentry{woo:leek:storey:2011}} \\
131
-Previous implementations of the ODP are computationally infeasible for a large
132
-number of hypothesis tests. This paper introduces a computationally efficient
133
-implementation of ODP that this package is based on.
124
+\bibentry{Storey:2002fc}
125
+
126
+\bibentry{Storey:2003il}
127
+
128
+{\bf When reporting results involving the analysis of time course studies, please cite:}
129
+
130
+\bibentry{storey:2005}
131
+% A methodology for analyzing time course microarray data is introduced and
132
+% applied to two time course studies on humans.
133
+
134
+{\bf When reporting results involving use of the optimal discovery procedure ({\tt odp}), please cite:}
135
+
136
+\bibentry{storey:2007}
137
+% Theory paper that introduces the optimal discovery procedure and shows that it
138
+% maximizes the expected true positive results for each number of fixed false
139
+% positive results. The optimality is closely related to the false discovery rate.
140
+
141
+\bibentry{storey:etal:2007}
142
+% Discusses various ways of estimating the ODP statistic with applications to
143
+% microarray experiments.
144
+
145
+\bibentry{woo:leek:storey:2011}
146
+% Previous implementations of the ODP are computationally infeasible for a large
147
+% number of hypothesis tests. This paper introduces a computationally efficient
148
+% implementation of ODP that this package is based on.
149
+
150
+{\bf When reporting results involving surrogate variable analysis ({\tt apply\_sva}), please cite:}
151
+
152
+\bibentry{leek:2007}
153
+
154
+\bibentry{Leek:2008qf}
155
+
156
+{\bf When reporting results involving supervised normalization of microarrays ({\tt apply\_snm}), please cite:}
157
+
158
+\bibentry{mecham:2010}
159
+
160
+{\bf To cite the {\tt edge} R package itself, please type the following to retrieve the citation:}
161
+
162
+<<citepackage,cache=FALSE>>=
163
+citation("edge")
164
+@
134 165
 
135
-\textbf{\bibentry{storey:2005}} \\
136
-A methodology for analyzing time course microarray data is introduced and
137
-applied to two time course studies on humans.
138 166
 
139 167
 \section{Getting help}
140
-Hopefully, most questions relating to the package will be answered in this
141
-vignette but to get a more detailed account of how to use the functions simply
142
-type within R:
143
-<<help_qvalue>>=
168
+Many questions about {\tt qvalue} will hopefully be answered by this documentation and references therein.  As with any R package, detailed information on functions, their arguments and values, can be obtained in the help files. To view the
169
+help for {\tt qvalue} within R, type
170
+<<help_edge>>=
144 171
 help(package="edge")
145 172
 @
146
-\noindent If you identify bugs related to basic usage please contact the
147
-authors directly.  Otherwise, any questions or problems regarding \edge will
148
-most efficiently be addressed on the Bioconductor support site,
149
-\url{https://support.bioconductor.org/}.
173
+\noindent If you identify bugs related to basic usage please contact the authors directly, preferably via GitHub at \url{https://github.com/jdstorey/edge/issues}.  Otherwise, any questions or problems regarding {\tt edge} will most efficiently be addressed on the Bioconductor support site, \url{https://support.bioconductor.org/}.
150 174
 
151 175
 \section{Quick start guide}
152 176
 To get started, first load the {\tt kidney} dataset included in the package:
... ...
@@ -1,3 +1,45 @@
1
+
2
+@article{Storey:2002fc,
3
+  Abstract = {Multiple-hypothesis testing involves guarding against much more complicated errors than single-hypothesis testing. Whereas we typically control the type I error rate for a single-hypothesis test, a compound error rate is controlled for multiple-hypothesis tests. For example, controlling the false discovery rate FDR traditionally involves intricate sequential p-value rejection methods based on the observed data. Whereas a sequential p-value method fixes the error rate and estimates its corresponding rejection region, we propose the opposite approach-we fix the rejection region and then estimate its corresponding error rate. This new approach offers increased applicability, accuracy and power. We apply the methodology to both the positive false discovery rate pFDR and FDR, and provide evidence for its benefits. It is shown that pFDR is probably the quantity of interest over FDR. Also discussed is the calculation of the q-value, the pFDR analogue of the p-value, which eliminates the need to set the error rate beforehand as is traditionally done. Some simple numerical examples are presented that show that this new approach can yield an increase of over eight times in power compared with the Benjamini-Hochberg FDR method.},
4
+	Address = {108 COWLEY RD, OXFORD OX4 1JF, OXON, ENGLAND},
5
+	Author = {Storey, JD},
6
+	Date-Added = {2011-10-30 22:26:25 -0400},
7
+	Date-Modified = {2011-10-30 22:26:25 -0400},
8
+	Isi = {000177425500009},
9
+	Isi-Recid = {126051257},
10
+	Isi-Ref-Recids = {112504863 90155838 115373815 122784094 87253760 119531800 126051258 126051259 119668320 112504865},
11
+	Journal = {Journal of the Royal Statistical Society Series B-Statistical Methodology},
12
+	Keywords = {false discovery rate; multiple comparisons; positive false discovery rate; p-values; q-values; sequential p-value methods; simultaneous inference},
13
+	Pages = {479--498},
14
+	Publisher = {BLACKWELL PUBL LTD},
15
+	Times-Cited = {1132},
16
+	Title = {A direct approach to false discovery rates},
17
+	Volume = {64},
18
+	Year = {2002},
19
+}
20
+
21
+
22
+@article{Storey:2003il,
23
+  Abstract = {With the increase in genomewide experiments and the sequencing of multiple genomes, the analysis of large data sets has become commonplace in biology. It is often the case that thousands of features in a genomewide data set are tested against some null hypothesis, where a number of features are expected to be significant. Here we propose an approach to measuring statistical significance in these genomewide studies based on the concept of the false discovery rate. This approach offers a sensible balance between the number of true and false positives that is automatically calibrated and easily interpreted. In doing so, a measure of statistical significance called the q value is associated with each tested feature. The q value is similar to the well known p value, except it is a measure of significance in terms of the false discovery rate rather than the false positive rate. Our approach avoids a flood of false positive results, while offering a more liberal criterion than what has been used in genome scans for linkage.},
24
+	Author = {Storey, John D and Tibshirani, Robert},
25
+	Date-Added = {2011-10-30 22:16:49 -0400},
26
+	Date-Modified = {2011-10-30 22:16:49 -0400},
27
+	Doi = {10.1073/pnas.1530509100},
28
+	Journal = {Proc Natl Acad Sci U S A},
29
+	Journal-Full = {Proceedings of the National Academy of Sciences of the United States of America},
30
+	Mesh = {Algorithms; Alternative Splicing; Animals; Binding Sites; Exons; Gene Expression Regulation; Genetic Linkage; Genetic Techniques; Genome; Humans; Oligonucleotide Array Sequence Analysis; Statistics as Topic; Transcription, Genetic},
31
+	Month = {Aug},
32
+	Number = {16},
33
+	Pages = {9440-5},
34
+	Pmc = {PMC170937},
35
+	Pmid = {12883005},
36
+	Pst = {ppublish},
37
+	Title = {Statistical significance for genomewide studies},
38
+	Volume = {100},
39
+	Year = {2003},
40
+}
41
+
42
+
1 43
 @article{woo:leek:storey:2011,
2 44
 author = {Woo, Sangsoon and Leek, Jeffrey T. and Storey, John D.},
3 45
 title = {A computationally efficient modular optimal discovery procedure},
... ...
@@ -141,6 +183,28 @@ eprint = {
141 183
 }
142 184
 
143 185
 
186
+@article{Leek:2008qf,
187
+  Abstract = {We develop a general framework for performing large-scale significance testing in the presence of arbitrarily strong dependence. We derive a low-dimensional set of random vectors, called a dependence kernel, that fully captures the dependence structure in an observed high-dimensional dataset. This result shows a surprising reversal of the "curse of dimensionality" in the high-dimensional hypothesis testing setting. We show theoretically that conditioning on a dependence kernel is sufficient to render statistical tests independent regardless of the level of dependence in the observed data. This framework for multiple testing dependence has implications in a variety of common multiple testing problems, such as in gene expression studies, brain imaging, and spatial epidemiology.},
188
+	Author = {Leek, Jeffrey T and Storey, John D},
189
+	Date-Added = {2011-10-30 22:16:12 -0400},
190
+	Date-Modified = {2011-10-30 22:16:12 -0400},
191
+	Doi = {10.1073/pnas.0808709105},
192
+	Journal = {Proc Natl Acad Sci U S A},
193
+	Journal-Full = {Proceedings of the National Academy of Sciences of the United States of America},
194
+	Mesh = {Algorithms; Computer Simulation; Models, Statistical; Software; Statistics as Topic},
195
+	Month = {Dec},
196
+	Number = {48},
197
+	Pages = {18718-23},
198
+	Pmc = {PMC2586646},
199
+	Pmid = {19033188},
200
+	Pst = {ppublish},
201
+	Title = {A general framework for multiple testing dependence},
202
+	Volume = {105},
203
+	Year = {2008},
204
+	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0808709105}
205
+}
206
+
207
+
144 208
 @article{mecham:2010,
145 209
 author = {Mecham, Brigham H. and Nelson, Peter S. and Storey, John D.},
146 210
 title = {Supervised normalization of microarrays},