git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/edge@102595 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -118,35 +118,59 @@ experiments, see section~\ref{sec:citepackage}. |
118 | 118 |
|
119 | 119 |
\section{Citing this package} |
120 | 120 |
\label{sec:citepackage} |
121 |
-\textbf{\bibentry{storey:2007}} \\ |
|
122 |
-Theory paper that introduces the optimal discovery procedure and shows that it |
|
123 |
-maximizes the expected true positive results for each number of fixed false |
|
124 |
-positive results. The optimality is closely related to the false discovery rate. |
|
125 | 121 |
|
126 |
-\textbf{\bibentry{storey:etal:2007}} \\ |
|
127 |
-Discusses various ways of estimating the ODP statistic with applications to |
|
128 |
-microarray experiments. |
|
122 |
+{\bf When reporting results involving the estimation of false discovery rate or q-value quantities, please cite:} |
|
129 | 123 |
|
130 |
-\textbf{\bibentry{woo:leek:storey:2011}} \\ |
|
131 |
-Previous implementations of the ODP are computationally infeasible for a large |
|
132 |
-number of hypothesis tests. This paper introduces a computationally efficient |
|
133 |
-implementation of ODP that this package is based on. |
|
124 |
+\bibentry{Storey:2002fc} |
|
125 |
+ |
|
126 |
+\bibentry{Storey:2003il} |
|
127 |
+ |
|
128 |
+{\bf When reporting results involving the analysis of time course studies, please cite:} |
|
129 |
+ |
|
130 |
+\bibentry{storey:2005} |
|
131 |
+% A methodology for analyzing time course microarray data is introduced and |
|
132 |
+% applied to two time course studies on humans. |
|
133 |
+ |
|
134 |
+{\bf When reporting results involving use of the optimal discovery procedure ({\tt odp}), please cite:} |
|
135 |
+ |
|
136 |
+\bibentry{storey:2007} |
|
137 |
+% Theory paper that introduces the optimal discovery procedure and shows that it |
|
138 |
+% maximizes the expected true positive results for each number of fixed false |
|
139 |
+% positive results. The optimality is closely related to the false discovery rate. |
|
140 |
+ |
|
141 |
+\bibentry{storey:etal:2007} |
|
142 |
+% Discusses various ways of estimating the ODP statistic with applications to |
|
143 |
+% microarray experiments. |
|
144 |
+ |
|
145 |
+\bibentry{woo:leek:storey:2011} |
|
146 |
+% Previous implementations of the ODP are computationally infeasible for a large |
|
147 |
+% number of hypothesis tests. This paper introduces a computationally efficient |
|
148 |
+% implementation of ODP that this package is based on. |
|
149 |
+ |
|
150 |
+{\bf When reporting results involving surrogate variable analysis ({\tt apply\_sva}), please cite:} |
|
151 |
+ |
|
152 |
+\bibentry{leek:2007} |
|
153 |
+ |
|
154 |
+\bibentry{Leek:2008qf} |
|
155 |
+ |
|
156 |
+{\bf When reporting results involving supervised normalization of microarrays ({\tt apply\_snm}), please cite:} |
|
157 |
+ |
|
158 |
+\bibentry{mecham:2010} |
|
159 |
+ |
|
160 |
+{\bf To cite the {\tt edge} R package itself, please type the following to retrieve the citation:} |
|
161 |
+ |
|
162 |
+<<citepackage,cache=FALSE>>= |
|
163 |
+citation("edge") |
|
164 |
+@ |
|
134 | 165 |
|
135 |
-\textbf{\bibentry{storey:2005}} \\ |
|
136 |
-A methodology for analyzing time course microarray data is introduced and |
|
137 |
-applied to two time course studies on humans. |
|
138 | 166 |
|
139 | 167 |
\section{Getting help} |
140 |
-Hopefully, most questions relating to the package will be answered in this |
|
141 |
-vignette but to get a more detailed account of how to use the functions simply |
|
142 |
-type within R: |
|
143 |
-<<help_qvalue>>= |
|
168 |
+Many questions about {\tt qvalue} will hopefully be answered by this documentation and references therein. As with any R package, detailed information on functions, their arguments and values, can be obtained in the help files. To view the |
|
169 |
+help for {\tt qvalue} within R, type |
|
170 |
+<<help_edge>>= |
|
144 | 171 |
help(package="edge") |
145 | 172 |
@ |
146 |
-\noindent If you identify bugs related to basic usage please contact the |
|
147 |
-authors directly. Otherwise, any questions or problems regarding \edge will |
|
148 |
-most efficiently be addressed on the Bioconductor support site, |
|
149 |
-\url{https://support.bioconductor.org/}. |
|
173 |
+\noindent If you identify bugs related to basic usage please contact the authors directly, preferably via GitHub at \url{https://github.com/jdstorey/edge/issues}. Otherwise, any questions or problems regarding {\tt edge} will most efficiently be addressed on the Bioconductor support site, \url{https://support.bioconductor.org/}. |
|
150 | 174 |
|
151 | 175 |
\section{Quick start guide} |
152 | 176 |
To get started, first load the {\tt kidney} dataset included in the package: |
... | ... |
@@ -1,3 +1,45 @@ |
1 |
+ |
|
2 |
+@article{Storey:2002fc, |
|
3 |
+ Abstract = {Multiple-hypothesis testing involves guarding against much more complicated errors than single-hypothesis testing. Whereas we typically control the type I error rate for a single-hypothesis test, a compound error rate is controlled for multiple-hypothesis tests. For example, controlling the false discovery rate FDR traditionally involves intricate sequential p-value rejection methods based on the observed data. Whereas a sequential p-value method fixes the error rate and estimates its corresponding rejection region, we propose the opposite approach-we fix the rejection region and then estimate its corresponding error rate. This new approach offers increased applicability, accuracy and power. We apply the methodology to both the positive false discovery rate pFDR and FDR, and provide evidence for its benefits. It is shown that pFDR is probably the quantity of interest over FDR. Also discussed is the calculation of the q-value, the pFDR analogue of the p-value, which eliminates the need to set the error rate beforehand as is traditionally done. Some simple numerical examples are presented that show that this new approach can yield an increase of over eight times in power compared with the Benjamini-Hochberg FDR method.}, |
|
4 |
+ Address = {108 COWLEY RD, OXFORD OX4 1JF, OXON, ENGLAND}, |
|
5 |
+ Author = {Storey, JD}, |
|
6 |
+ Date-Added = {2011-10-30 22:26:25 -0400}, |
|
7 |
+ Date-Modified = {2011-10-30 22:26:25 -0400}, |
|
8 |
+ Isi = {000177425500009}, |
|
9 |
+ Isi-Recid = {126051257}, |
|
10 |
+ Isi-Ref-Recids = {112504863 90155838 115373815 122784094 87253760 119531800 126051258 126051259 119668320 112504865}, |
|
11 |
+ Journal = {Journal of the Royal Statistical Society Series B-Statistical Methodology}, |
|
12 |
+ Keywords = {false discovery rate; multiple comparisons; positive false discovery rate; p-values; q-values; sequential p-value methods; simultaneous inference}, |
|
13 |
+ Pages = {479--498}, |
|
14 |
+ Publisher = {BLACKWELL PUBL LTD}, |
|
15 |
+ Times-Cited = {1132}, |
|
16 |
+ Title = {A direct approach to false discovery rates}, |
|
17 |
+ Volume = {64}, |
|
18 |
+ Year = {2002}, |
|
19 |
+} |
|
20 |
+ |
|
21 |
+ |
|
22 |
+@article{Storey:2003il, |
|
23 |
+ Abstract = {With the increase in genomewide experiments and the sequencing of multiple genomes, the analysis of large data sets has become commonplace in biology. It is often the case that thousands of features in a genomewide data set are tested against some null hypothesis, where a number of features are expected to be significant. Here we propose an approach to measuring statistical significance in these genomewide studies based on the concept of the false discovery rate. This approach offers a sensible balance between the number of true and false positives that is automatically calibrated and easily interpreted. In doing so, a measure of statistical significance called the q value is associated with each tested feature. The q value is similar to the well known p value, except it is a measure of significance in terms of the false discovery rate rather than the false positive rate. Our approach avoids a flood of false positive results, while offering a more liberal criterion than what has been used in genome scans for linkage.}, |
|
24 |
+ Author = {Storey, John D and Tibshirani, Robert}, |
|
25 |
+ Date-Added = {2011-10-30 22:16:49 -0400}, |
|
26 |
+ Date-Modified = {2011-10-30 22:16:49 -0400}, |
|
27 |
+ Doi = {10.1073/pnas.1530509100}, |
|
28 |
+ Journal = {Proc Natl Acad Sci U S A}, |
|
29 |
+ Journal-Full = {Proceedings of the National Academy of Sciences of the United States of America}, |
|
30 |
+ Mesh = {Algorithms; Alternative Splicing; Animals; Binding Sites; Exons; Gene Expression Regulation; Genetic Linkage; Genetic Techniques; Genome; Humans; Oligonucleotide Array Sequence Analysis; Statistics as Topic; Transcription, Genetic}, |
|
31 |
+ Month = {Aug}, |
|
32 |
+ Number = {16}, |
|
33 |
+ Pages = {9440-5}, |
|
34 |
+ Pmc = {PMC170937}, |
|
35 |
+ Pmid = {12883005}, |
|
36 |
+ Pst = {ppublish}, |
|
37 |
+ Title = {Statistical significance for genomewide studies}, |
|
38 |
+ Volume = {100}, |
|
39 |
+ Year = {2003}, |
|
40 |
+} |
|
41 |
+ |
|
42 |
+ |
|
1 | 43 |
@article{woo:leek:storey:2011, |
2 | 44 |
author = {Woo, Sangsoon and Leek, Jeffrey T. and Storey, John D.}, |
3 | 45 |
title = {A computationally efficient modular optimal discovery procedure}, |
... | ... |
@@ -141,6 +183,28 @@ eprint = { |
141 | 183 |
} |
142 | 184 |
|
143 | 185 |
|
186 |
+@article{Leek:2008qf, |
|
187 |
+ Abstract = {We develop a general framework for performing large-scale significance testing in the presence of arbitrarily strong dependence. We derive a low-dimensional set of random vectors, called a dependence kernel, that fully captures the dependence structure in an observed high-dimensional dataset. This result shows a surprising reversal of the "curse of dimensionality" in the high-dimensional hypothesis testing setting. We show theoretically that conditioning on a dependence kernel is sufficient to render statistical tests independent regardless of the level of dependence in the observed data. This framework for multiple testing dependence has implications in a variety of common multiple testing problems, such as in gene expression studies, brain imaging, and spatial epidemiology.}, |
|
188 |
+ Author = {Leek, Jeffrey T and Storey, John D}, |
|
189 |
+ Date-Added = {2011-10-30 22:16:12 -0400}, |
|
190 |
+ Date-Modified = {2011-10-30 22:16:12 -0400}, |
|
191 |
+ Doi = {10.1073/pnas.0808709105}, |
|
192 |
+ Journal = {Proc Natl Acad Sci U S A}, |
|
193 |
+ Journal-Full = {Proceedings of the National Academy of Sciences of the United States of America}, |
|
194 |
+ Mesh = {Algorithms; Computer Simulation; Models, Statistical; Software; Statistics as Topic}, |
|
195 |
+ Month = {Dec}, |
|
196 |
+ Number = {48}, |
|
197 |
+ Pages = {18718-23}, |
|
198 |
+ Pmc = {PMC2586646}, |
|
199 |
+ Pmid = {19033188}, |
|
200 |
+ Pst = {ppublish}, |
|
201 |
+ Title = {A general framework for multiple testing dependence}, |
|
202 |
+ Volume = {105}, |
|
203 |
+ Year = {2008}, |
|
204 |
+ Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0808709105} |
|
205 |
+} |
|
206 |
+ |
|
207 |
+ |
|
144 | 208 |
@article{mecham:2010, |
145 | 209 |
author = {Mecham, Brigham H. and Nelson, Peter S. and Storey, John D.}, |
146 | 210 |
title = {Supervised normalization of microarrays}, |