git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/edge@102555 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,31 +1,42 @@ |
1 | 1 |
Package: edge |
2 | 2 |
Type: Package |
3 | 3 |
Title: Extraction of Differential Gene Expression |
4 |
-Date: 2015-03-26 |
|
5 |
-Version: 1.99.0 |
|
6 |
-Authors@R: c( person("John D.", "Storey", email = |
|
7 |
- "jstorey@princeton.edu", role = c("aut", "cre", "cph")), |
|
8 |
- person("Jeffrey T.", "Leek", email = "jleek@jhsph.edu ", role = |
|
9 |
- c("aut")), person("Andrew J.", "Bass", email = |
|
10 |
- "ajbass@princeton.edu", role = c("aut")) ) |
|
4 |
+Date: 2015-04-15 |
|
5 |
+Version: 1.99.1 |
|
6 |
+Authors@R: c( |
|
7 |
+ person("John D.", "Storey", email = "jstorey@princeton.edu", role = c("aut", "cre", "cph")), |
|
8 |
+ person("Jeffrey T.", "Leek", email = "jleek@jhsph.edu ", role = c("aut")), |
|
9 |
+ person("Andrew J.", "Bass", email = "ajbass@princeton.edu", role = c("aut")) |
|
10 |
+ ) |
|
11 |
+Maintainer: John D. Storey <jstorey@princeton.edu> |
|
11 | 12 |
biocViews: MultipleComparison, DifferentialExpression, TimeCourse, |
12 |
- Regression, GeneExpression, DataImport |
|
13 |
-Description: The edge package implements methods for carrying out |
|
14 |
- differential expression analyses of genome-wide gene expression |
|
15 |
- studies. Significance testing using the optimal discovery |
|
16 |
- procedure and generalized likelihood ratio tests (equivalent to |
|
17 |
- F-tests and t-tests) are implemented for general study designs. |
|
18 |
- Special functions are available to facilitate the analysis of |
|
19 |
- common study designs, including time course experiments. Other |
|
20 |
- packages such as snm, sva, and qvalue are integrated in edge to |
|
21 |
- provide a wide range of tools for gene expression analysis. |
|
13 |
+ Regression, GeneExpression, DataImport |
|
14 |
+Description: The edge package implements methods for carrying out differential |
|
15 |
+ expression analyses of genome-wide gene expression studies. Significance |
|
16 |
+ testing using the optimal discovery procedure and generalized likelihood |
|
17 |
+ ratio tests (equivalent to F-tests and t-tests) are implemented for general study |
|
18 |
+ designs. Special functions are available to facilitate the analysis of |
|
19 |
+ common study designs, including time course experiments. Other packages |
|
20 |
+ such as snm, sva, and qvalue are integrated in edge to provide a wide range |
|
21 |
+ of tools for gene expression analysis. |
|
22 | 22 |
VignetteBuilder: knitr |
23 |
-Imports: methods, splines, sva, snm, qvalue(>= 1.99.0), MASS |
|
24 |
-Suggests: testthat, knitr, ggplot2, reshape2 |
|
25 |
-Depends: R(>= 3.2.0), Biobase |
|
23 |
+Imports: |
|
24 |
+ methods, |
|
25 |
+ splines, |
|
26 |
+ sva, |
|
27 |
+ snm, |
|
28 |
+ qvalue(>= 1.99.0), |
|
29 |
+ MASS |
|
30 |
+Suggests: |
|
31 |
+ testthat, |
|
32 |
+ knitr, |
|
33 |
+ ggplot2, |
|
34 |
+ reshape2 |
|
35 |
+Depends: |
|
36 |
+ R(>= 3.2.0), |
|
37 |
+ Biobase |
|
26 | 38 |
URL: https://github.com/jdstorey/edge |
27 | 39 |
BugReports: https://github.com/jdstorey/edge/issues |
28 | 40 |
LazyData: true |
29 |
-License: GPL |
|
30 |
-Copyright: 2005-2015 John D. Storey |
|
41 |
+License: MIT + file LICENSE |
|
31 | 42 |
NeedsCompilation: yes |
... | ... |
@@ -91,11 +91,11 @@ deFitCheck <- function(object) { |
91 | 91 |
|
92 | 92 |
#' The differential expression class (deSet) |
93 | 93 |
#' |
94 |
-#' The \code{deSet} class is designed in order to complement the |
|
95 |
-#' \code{\link{ExpressionSet}} class. While the \code{ExpressionSet} class |
|
96 |
-#' contains information about the experiment, the \code{deSet} class |
|
97 |
-#' contains both experimental information and additional information relevant |
|
98 |
-#' for differential expression analysis. |
|
94 |
+#' The \code{deSet} class extends the \code{\link{ExpressionSet}} class. |
|
95 |
+#' While the \code{ExpressionSet} class contains information about the |
|
96 |
+#' experiment, the \code{deSet} class contains both experimental information and |
|
97 |
+#' additional information relevant for differential expression analysis, |
|
98 |
+#' explained below in Slots. |
|
99 | 99 |
#' |
100 | 100 |
#' @slot null.model \code{formula}: contains the adjustment variables in the |
101 | 101 |
#' experiment. The null model is used for comparison when fitting the |
... | ... |
@@ -19,13 +19,12 @@ |
19 | 19 |
#' @param ... Additional arguments for \code{\link{apply_qvalue}} and |
20 | 20 |
#' \code{\link{empPvals}} function. |
21 | 21 |
#' |
22 |
-#' @details |
|
23 |
-#' \code{lrt} fits the full and null models to each gene using the function |
|
24 |
-#' \code{\link{fit_models}} and then performs a likelihood ratio test. The |
|
25 |
-#' user has the option to calculate p-values from either the F distribution or |
|
26 |
-#' through a bootstrap algorithm. If \code{nullDistn} is "bootstrap" |
|
27 |
-#' then empirical p-values will be determined from the \code{\link{qvalue}} |
|
28 |
-#' package (see \code{\link{empPvals}}). |
|
22 |
+#' @details \code{lrt} fits the full and null models to each gene using the |
|
23 |
+#' function \code{\link{fit_models}} and then performs a likelihood ratio test. |
|
24 |
+#' The user has the option to calculate p-values a Normal distribution |
|
25 |
+#' assumption or through a bootstrap algorithm. If \code{nullDistn} is |
|
26 |
+#' "bootstrap" then empirical p-values will be determined from the |
|
27 |
+#' \code{\link{qvalue}} package (see \code{\link{empPvals}}). |
|
29 | 28 |
#' |
30 | 29 |
#' @author John Storey, Andrew Bass |
31 | 30 |
#' |
... | ... |
@@ -65,6 +64,8 @@ |
65 | 64 |
#' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance |
66 | 65 |
#' analysis of time course microarray experiments. Proceedings of the National |
67 | 66 |
#' Academy of Sciences, 102: 12837-12842. |
67 |
+#' |
|
68 |
+#' \url{http://en.wikipedia.org/wiki/Likelihood-ratio_test} |
|
68 | 69 |
#' |
69 | 70 |
#' @seealso \code{\linkS4class{deSet}}, \code{\link{build_models}}, |
70 | 71 |
#' \code{\link{odp}} |
... | ... |
@@ -78,11 +79,10 @@ setGeneric("lrt", function(object, de.fit, |
78 | 79 |
|
79 | 80 |
#' The optimal discovery procedure |
80 | 81 |
#' |
81 |
-#' \code{odp} performs the optimal discovery procedure, which is a new |
|
82 |
-#' approach for optimally performing many hypothesis tests in a |
|
83 |
-#' high-dimensional study. When testing whether a feature is significant, the |
|
84 |
-#' optimal discovery procedure uses information across all features when |
|
85 |
-#' testing for significance. |
|
82 |
+#' \code{odp} performs the optimal discovery procedure, which is a framework for |
|
83 |
+#' optimally performing many hypothesis tests in a high-dimensional study. When |
|
84 |
+#' testing whether a feature is significant, the optimal discovery procedure |
|
85 |
+#' uses information across all features when testing for significance. |
|
86 | 86 |
#' |
87 | 87 |
#' @param object \code{S4 object}: \code{\linkS4class{deSet}} |
88 | 88 |
#' @param de.fit \code{S4 object}: \code{\linkS4class{deFit}}. Optional. |
... | ... |
@@ -100,16 +100,16 @@ setGeneric("lrt", function(object, de.fit, |
100 | 100 |
#' |
101 | 101 |
#' |
102 | 102 |
#' @details |
103 |
-#' The full ODP estimator computationally grows quadratically with |
|
104 |
-#' respect to the number of genes. This becomes computationally infeasible at |
|
105 |
-#' a certain point. Therefore, an alternative method called mODP is used which |
|
106 |
-#' has been shown to provide results that are very similar. mODP utilizes a |
|
107 |
-#' k-means clustering algorithm where genes are assigned to a cluster based on |
|
108 |
-#' the Kullback-Leiber distance. Each gene is assigned an module-average |
|
109 |
-#' parameter to calculate the ODP score and it reduces the computations time |
|
110 |
-#' to linear (See Woo, Leek and Storey 2010). If the number of clusters is equal |
|
111 |
-#' to the number of genes then the original ODP is implemented. Depending on |
|
112 |
-#' the number of hypothesis tests, this can take some time. |
|
103 |
+#' The full ODP estimator computationally grows quadratically with respect to |
|
104 |
+#' the number of genes. This becomes computationally taxing at a certain point. |
|
105 |
+#' Therefore, an alternative method called mODP is used which has been shown to |
|
106 |
+#' provide results that are very similar. mODP utilizes a clustering algorithm |
|
107 |
+#' where genes are assigned to a cluster based on the Kullback-Leiber distance. |
|
108 |
+#' Each gene is assigned an module-average parameter to calculate the ODP score |
|
109 |
+#' and it reduces the computations time to approximately linear (see Woo, Leek |
|
110 |
+#' and Storey 2010). If the number of clusters is equal to the number of genes |
|
111 |
+#' then the original ODP is implemented. Depending on the number of hypothesis |
|
112 |
+#' tests, this can take some time. |
|
113 | 113 |
#' |
114 | 114 |
#' @return \code{\linkS4class{deSet}} object |
115 | 115 |
#' |
... | ... |
@@ -168,30 +168,29 @@ setGeneric("odp", function(object, de.fit, odp.parms = NULL, bs.its = 100, |
168 | 168 |
#' Modular optimal discovery procedure (mODP) |
169 | 169 |
#' |
170 | 170 |
#' \code{kl_clust} is an implementation of mODP that assigns genes to modules |
171 |
-#' based off of the Kullback-Leibler distance. |
|
171 |
+#' based on of the Kullback-Leibler distance. |
|
172 | 172 |
#' |
173 | 173 |
#' @param object \code{S4 object}: \code{\linkS4class{deSet}}. |
174 | 174 |
#' @param de.fit \code{S4 object}: \code{\linkS4class{deFit}}. |
175 |
-#' @param n.mods \code{integer}: number of clusters. |
|
175 |
+#' @param n.mods \code{integer}: number of modules (i.e., clusters). |
|
176 | 176 |
#' |
177 | 177 |
#' @details mODP utilizes a k-means clustering algorithm where genes are |
178 | 178 |
#' assigned to a cluster based on the Kullback-Leiber distance. Each gene is |
179 | 179 |
#' assigned an module-average parameter to calculate the ODP score (See Woo, |
180 |
-#' Leek and Storey 2010 for more details). The mODP and full ODP produce near |
|
180 |
+#' Leek and Storey 2010 for more details). The mODP and full ODP produce nearly |
|
181 | 181 |
#' exact results but mODP has the advantage of being computationally |
182 |
-#' feasible. |
|
182 |
+#' faster. |
|
183 | 183 |
#' |
184 |
-#' @note The results are generally insensitive to the number of modules after a |
|
185 |
-#' certain threshold of about n.mods>=50. It is recommended that users |
|
186 |
-#' experiment with the number of clusters. If the number of clusters is equal |
|
187 |
-#' to the number of genes then the original ODP is implemented. Depending on |
|
188 |
-#' the number of hypothesis tests, this can take some time. |
|
184 |
+#' @note The results are generally insensitive to the number of modules after a |
|
185 |
+#' certain threshold of about n.mods>=50 in our experience. It is recommended |
|
186 |
+#' that users experiment with the number of modules. If the number of modules |
|
187 |
+#' is equal to the number of genes then the original ODP is implemented. |
|
189 | 188 |
#' |
190 | 189 |
#' @return |
191 | 190 |
#' A list with the following slots: |
192 | 191 |
#' \itemize{ |
193 |
-#' \item {mu.full: cluster means from full model.} |
|
194 |
-#' \item {mu.null: cluster means from null model.} |
|
192 |
+#' \item {mu.full: cluster averaged fitted values from full model.} |
|
193 |
+#' \item {mu.null: cluster averaged fitted values from null model.} |
|
195 | 194 |
#' \item {sig.full: cluster standard deviations from full model.} |
196 | 195 |
#' \item {sig.null: cluster standard deviations from null model.} |
197 | 196 |
#' \item {n.per.mod: total members in each cluster.} |
... | ... |
@@ -239,8 +238,7 @@ setGeneric("odp", function(object, de.fit, odp.parms = NULL, bs.its = 100, |
239 | 238 |
#' |
240 | 239 |
#' @author John Storey, Jeffrey Leek |
241 | 240 |
#' |
242 |
-#' @seealso \code{\link{odp}}, \code{\link{lrt}} and |
|
243 |
-#' \code{\link{fit_models}} |
|
241 |
+#' @seealso \code{\link{odp}}, \code{\link{fit_models}} |
|
244 | 242 |
#' |
245 | 243 |
#' @exportMethod kl_clust |
246 | 244 |
setGeneric("kl_clust", function(object, de.fit = NULL, n.mods = 50) |
... | ... |
@@ -248,7 +246,7 @@ setGeneric("kl_clust", function(object, de.fit = NULL, n.mods = 50) |
248 | 246 |
|
249 | 247 |
#' Linear regression of the null and full models |
250 | 248 |
#' |
251 |
-#' \code{fit_models} fits a linear model to each gene by using the least |
|
249 |
+#' \code{fit_models} fits a model matrix to each gene by using the least |
|
252 | 250 |
#' squares method. Model fits can be either statistic type "odp" (optimal |
253 | 251 |
#' discovery procedure) or "lrt" (likelihood ratio test). |
254 | 252 |
#' |
... | ... |
@@ -257,8 +255,9 @@ setGeneric("kl_clust", function(object, de.fit = NULL, n.mods = 50) |
257 | 255 |
#' "lrt" or "odp". Default is "lrt". |
258 | 256 |
#' |
259 | 257 |
#' @details |
260 |
-#' If "odp" method is implemented then the null model is removed from the full |
|
261 |
-#' model (see Storey 2007). |
|
258 |
+#' If "odp" method is implemented then the null model is removed from the full |
|
259 |
+#' model (see Storey 2007). Otherwise, the statistic type has no affect on the |
|
260 |
+#' model fit. |
|
262 | 261 |
#' |
263 | 262 |
#' @note \code{fit_models} does not have to be called by the user to use |
264 | 263 |
#' \code{\link{odp}}, \code{\link{lrt}} or \code{\link{kl_clust}} as it is an |
... | ... |
@@ -374,7 +373,8 @@ setGeneric("deSet", function(object, full.model, null.model, |
374 | 373 |
#' @param object \code{S4 object}: \code{\linkS4class{deSet}} |
375 | 374 |
#' @param ... Additional arguments for \code{\link{qvalue}} |
376 | 375 |
#' |
377 |
-#' @return \code{\linkS4class{deSet}} object |
|
376 |
+#' @return \code{\linkS4class{deSet}} object with slots updated by \code{\link{qvalue}} |
|
377 |
+#' calculations. |
|
378 | 378 |
#' |
379 | 379 |
#' @examples |
380 | 380 |
#' # import data |
... | ... |
@@ -421,7 +421,9 @@ setGeneric("apply_qvalue", function(object, ...) |
421 | 421 |
#' @param object \code{S4 object}: \code{\linkS4class{deSet}} |
422 | 422 |
#' @param ... Additional arguments for \code{\link{sva}} |
423 | 423 |
#' |
424 |
-#' @return \code{\linkS4class{deSet}} object |
|
424 |
+#' @return \code{\linkS4class{deSet}} object where the surrogate variables |
|
425 |
+#' estimated by \code{\link{sva}} are added to the full model and null model |
|
426 |
+#' matrices. |
|
425 | 427 |
#' |
426 | 428 |
#' @examples |
427 | 429 |
#' # import data |
... | ... |
@@ -453,6 +455,10 @@ setGeneric("apply_qvalue", function(object, ...) |
453 | 455 |
#' Leek JT, Storey JD (2007) Capturing Heterogeneity in Gene Expression |
454 | 456 |
#' Studies by Surrogate Variable Analysis. PLoS Genet 3(9): e161. |
455 | 457 |
#' doi:10.1371/journal.pgen.0030161 |
458 |
+#' |
|
459 |
+#' Leek JT and Storey JD. (2008) A general framework for multiple testing |
|
460 |
+#' dependence. Proceedings of the National Academy of Sciences, 105: 18718- |
|
461 |
+#' 18723. |
|
456 | 462 |
#' |
457 | 463 |
#' @author John Storey, Jeffrey Leek, Andrew Bass |
458 | 464 |
#' @export |
... | ... |
@@ -466,14 +472,19 @@ setGeneric("apply_sva", function(object, ...) |
466 | 472 |
#' on the algorithm. |
467 | 473 |
#' |
468 | 474 |
#' @param object \code{S4 object}: \code{\linkS4class{deSet}} |
469 |
-#' @param int.var \code{data frame}: intensity-dependent effects. |
|
475 |
+#' @param int.var \code{data frame}: intensity-dependent effects (see |
|
476 |
+#' \code{\link{snm}} for details) |
|
470 | 477 |
#' @param ... Additional arguments for \code{\link{snm}} |
471 | 478 |
#' |
472 |
-#' @return \code{apply_snm} returns an \code{\linkS4class{deSet}} object. |
|
479 |
+#' @return \code{apply_snm} returns a \code{\linkS4class{deSet}} object where |
|
480 |
+#' assayData (the expression data) that has been passed to apply_snm is replaced |
|
481 |
+#' with the normalized data that \code{\link{snm}} returns. Specifically, |
|
482 |
+#' \code{exprs(object)} is replaced by \code{$norm.dat} from \code{\link{snm}}, |
|
483 |
+#' where \code{object} is the \code{\link{deSet}} object. |
|
473 | 484 |
#' |
474 | 485 |
#' @references |
475 | 486 |
#' Mechan BH, Nelson PS, Storey JD. Supervised normalization of microarrays. |
476 |
-#' Bioinformatics 2010;26:1308-15 |
|
487 |
+#' Bioinformatics 2010;26:1308-1315. |
|
477 | 488 |
#' |
478 | 489 |
#' @examples |
479 | 490 |
#' # simulate data |
... | ... |
@@ -499,7 +510,7 @@ setGeneric("apply_sva", function(object, ...) |
499 | 510 |
#' |
500 | 511 |
#' @author John Storey, Andrew Bass |
501 | 512 |
#' @export |
502 |
-setGeneric("apply_snm", function(object, int.var, ...) |
|
513 |
+setGeneric("apply_snm", function(object, int.var=NULL, ...) |
|
503 | 514 |
standardGeneric("apply_snm")) |
504 | 515 |
|
505 | 516 |
#' Full model equation |
... | ... |
@@ -745,18 +756,19 @@ setGeneric("qvalueObj<-", function(object, value) { |
745 | 756 |
#' \code{\linkS4class{deSet}}. |
746 | 757 |
#' |
747 | 758 |
#' @param object \code{\linkS4class{deSet}} |
748 |
-#' @param value \code{factor}: identifier for each observation. Important |
|
749 |
-#' if the same individuals are sampled multiple times. |
|
759 |
+#' @param value \code{factor}: Identifies which samples correspond to which |
|
760 |
+#' individuals. Important if the same individuals are sampled multiple times |
|
761 |
+#' in a longitudinal fashion. |
|
750 | 762 |
#' |
751 |
-#' @return \code{individual} returns information regarding individuals |
|
752 |
-#' in the experiment. |
|
763 |
+#' @return \code{individual} returns information regarding dinstinct individuals |
|
764 |
+#' sampled in the experiment. |
|
753 | 765 |
#' |
754 | 766 |
#' @examples |
755 | 767 |
#' library(splines) |
756 | 768 |
#' # import data |
757 | 769 |
#' data(endotoxin) |
758 | 770 |
#' ind <- endotoxin$ind |
759 |
-#' time <- endotoxin$t |
|
771 |
+#' time <- endotoxin$time |
|
760 | 772 |
#' class <- endotoxin$class |
761 | 773 |
#' endoexpr <- endotoxin$endoexpr |
762 | 774 |
#' cov <- data.frame(individual = ind, time = time, class = class) |
... | ... |
@@ -798,7 +810,8 @@ setGeneric("individual<-", function(object, value) { |
798 | 810 |
#' |
799 | 811 |
#' @param object \code{S4 object}: \code{\linkS4class{deFit}} |
800 | 812 |
#' |
801 |
-#' @return \code{betaCoef} returns the regression coefficients. |
|
813 |
+#' @return \code{betaCoef} returns the regression coefficients for the full |
|
814 |
+#' model fit. |
|
802 | 815 |
#' |
803 | 816 |
#' @author John Storey, Andrew Bass |
804 | 817 |
#' |
... | ... |
@@ -830,7 +843,7 @@ setGeneric("individual<-", function(object, value) { |
830 | 843 |
#' @export |
831 | 844 |
setGeneric("betaCoef", function(object) standardGeneric("betaCoef")) |
832 | 845 |
|
833 |
-#' Statistical method used in analysis |
|
846 |
+#' Statistic type used in analysis |
|
834 | 847 |
#' |
835 | 848 |
#' Access the statistic type in a \code{\linkS4class{deFit}} object. Can |
836 | 849 |
#' either be the optimal discovery procedure (odp) or the likelihood ratio |
... | ... |
@@ -299,11 +299,15 @@ setMethod("apply_sva", |
299 | 299 |
#' @rdname apply_snm |
300 | 300 |
setMethod("apply_snm", |
301 | 301 |
signature = signature(object="deSet"), |
302 |
- function(object, int.var, ...) { |
|
302 |
+ function(object, int.var=NULL, ...) { |
|
303 | 303 |
full.matrix <- object@full.matrix |
304 | 304 |
null.matrix <- object@null.matrix |
305 | 305 |
full.matrix <- full.matrix - projMatrix(null.matrix) %*% full.matrix |
306 | 306 |
full.matrix <- as.matrix(rm.zero.cols(full.matrix)) |
307 |
+ if(is.null(int.var)) { |
|
308 |
+ int.var <- 1:ncol(exprs(object)) |
|
309 |
+ warning("Setting int.var=1:n where n is number of samples.") |
|
310 |
+ } |
|
307 | 311 |
exprs(object) <- snm(exprs(object), |
308 | 312 |
bio.var = full.matrix, |
309 | 313 |
adj.var = null.matrix, |
... | ... |
@@ -36,7 +36,7 @@ NULL |
36 | 36 |
#' \item endoexpr: A 500 rows by 46 columns data frame containing expression |
37 | 37 |
#' values. |
38 | 38 |
#' \item class: A vector of length 46 containing information about which |
39 |
-#' individuals were given endotoxin |
|
39 |
+#' individuals were given endotoxin. |
|
40 | 40 |
#' \item ind: A vector of length 46 providing indexing measurements for each |
41 | 41 |
#' individual in the experiment. |
42 | 42 |
#' \item time: A vector of length 46 indicating time measurements. |
... | ... |
@@ -47,11 +47,6 @@ NULL |
47 | 47 |
#' download the full data set, go to \url{http://genomine.org/edge/}. |
48 | 48 |
#' |
49 | 49 |
#' @references |
50 |
-#' Calvano, S. E., Xiao, W., Richards, D. R., Felciano, R. M., Baker, H. |
|
51 |
-#' V., Cho, R.J., Chen, R. O., Brownstein, B. H., Cobb, J. P., Tschoeke, |
|
52 |
-#' S. K., et al. "A network-based analysis of systemic inflammation in humans." |
|
53 |
-#' Nature. 2005 Oct 13; 437(7061):1032-7. Epub 2005 Aug 31. |
|
54 |
-#' |
|
55 | 50 |
#' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance |
56 | 51 |
#' analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr |
57 | 52 |
#' \url{http://www.pnas.org/content/100/16/9440.full} |
... | ... |
@@ -68,13 +63,13 @@ NULL |
68 | 63 |
#' |
69 | 64 |
#' # formulate null and full models in experiement |
70 | 65 |
#' # note: interaction term is a way of taking into account group effects |
71 |
-#' mNull <- ~ns(time, df=4, intercept = FALSE) |
|
66 |
+#' mNull <- ~ns(time, df=4, intercept = FALSE) + class |
|
72 | 67 |
#' mFull <- ~ns(time, df=4, intercept = FALSE) + |
73 |
-#' ns(time, df=4, intercept = FALSE):class + class |
|
68 |
+#' ns(time, df=4, intercept = FALSE):class + class |
|
74 | 69 |
#' |
75 | 70 |
#' # create deSet object |
76 | 71 |
#' de_obj <- build_models(endoexpr, cov = cov, full.model = mFull, |
77 |
-#' null.model = mNull, ind = ind) |
|
72 |
+#' null.model = mNull, ind = ind) |
|
78 | 73 |
#' |
79 | 74 |
#' # Perform ODP/lrt statistic to determine significant genes in study |
80 | 75 |
#' de_odp <- odp(de_obj, bs.its = 10) |
... | ... |
@@ -96,7 +91,7 @@ NULL |
96 | 91 |
#' @description |
97 | 92 |
#' Gene expression measurements from kidney samples were obtained from 72 |
98 | 93 |
#' human subjects ranging in age from 27 to 92 years. Only one array was |
99 |
-#' obtained per sample, and the age and tissue type of each subject was |
|
94 |
+#' obtained per individual, and the age and sex of each individual were |
|
100 | 95 |
#' recorded. |
101 | 96 |
#' |
102 | 97 |
#' @format |
... | ... |
@@ -108,14 +103,12 @@ NULL |
108 | 103 |
#' different tissue sample. |
109 | 104 |
#' \item age: A vector of length 133 giving the age of each sample. |
110 | 105 |
#' \item sex: A vector of length 133 giving the sex of each sample. |
111 |
-#' \item tissue: A vector of length 133 giving the tissue type of each |
|
112 |
-#' sample. |
|
113 | 106 |
#' } |
114 | 107 |
#' @note |
115 |
-#' These data are a random subset of 500 probe-sets from the total number of |
|
116 |
-#' probe-sets in the original data set. To download the full data set, go to |
|
117 |
-#' \url{http://genomine.org/edge/}. The \code{age}, \code{sex}, and |
|
118 |
-#' \code{tissue} data are contained in \code{kidcov} data frame. |
|
108 |
+#' These data are a random subset of 500 probe-sets from the total number of |
|
109 |
+#' probe-sets in the original data set. To download the full data set, go to |
|
110 |
+#' \url{http://genomine.org/edge/}. The \code{age} and \code{sex} are contained |
|
111 |
+#' in \code{kidcov} data frame. |
|
119 | 112 |
#' |
120 | 113 |
#' @references |
121 | 114 |
#' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance |
... | ... |
@@ -152,15 +145,15 @@ NULL |
152 | 145 |
#' |
153 | 146 |
#' @description |
154 | 147 |
#' The data provide gene expression measurements in peripheral blood leukocyte |
155 |
-#' samples from three Moroccan Amazigh groups leading distinct ways of life: |
|
148 |
+#' samples from three Moroccan groups leading distinct ways of life: |
|
156 | 149 |
#' desert nomadic (DESERT), mountain agrarian (VILLAGE), and coastal urban |
157 | 150 |
#' (AGADIR). |
158 | 151 |
#' |
159 | 152 |
#' @format |
160 | 153 |
#' \itemize{ |
161 | 154 |
#' \item batch: Batches in experiment. |
162 |
-#' \item location: Location of Moroccan Amazigh groups. |
|
163 |
-#' \item gender: Gender of individuals. |
|
155 |
+#' \item location: Environment/lifestyle of Moroccan Amazigh groups. |
|
156 |
+#' \item gender: Sex of individuals. |
|
164 | 157 |
#' \item gibexpr: A 500 rows by 46 columns matrix of gene expression values. |
165 | 158 |
#' } |
166 | 159 |
#' |
... | ... |
@@ -1,21 +1,23 @@ |
1 | 1 |
#' Formulates the experimental models |
2 | 2 |
#' |
3 | 3 |
#' \code{build_study} generates the full and null models for users unfamiliar |
4 |
-#' with their experimental design. There are two types of experimental designs: |
|
5 |
-#' static and time-course. For more details, refer to the user manual. |
|
4 |
+#' with building models in R. There are two types of experimental designs: |
|
5 |
+#' static and time-course. For more details, refer to the vignette. |
|
6 | 6 |
#' |
7 |
-#' @param data \code{matrix}: gene expression data. |
|
8 |
-#' @param sampling \code{string}: type of experiment. Either "static" or |
|
9 |
-#' "timecourse". Default is "static". |
|
10 |
-#' @param grp \code{vector}: groups or biological variable in experiment. Optional. |
|
11 |
-#' @param tme \code{vector}: covariate of interest in time course study. Optional. |
|
12 |
-#' @param ind \code{factor}: individual factor for repeated observations of |
|
13 |
-#' the same individuals. Optional. |
|
7 |
+#' @param data \code{matrix}: gene expression data (rows are genes, columns are |
|
8 |
+#' samples). |
|
9 |
+#' @param sampling \code{string}: type of study. Either "static" or |
|
10 |
+#' "timecourse". Default is "static". |
|
11 |
+#' @param grp \code{vector}: group assignement in the study (for K-class |
|
12 |
+#' studies). Optional. |
|
13 |
+#' @param tme \code{vector}: time variable in a time course study. Optional. |
|
14 |
+#' @param ind \code{factor}: individual factor for repeated observations of the |
|
15 |
+#' same individuals. Optional. |
|
14 | 16 |
#' @param bio.var \code{matrix}: biological variables. Optional. |
15 |
-#' @param basis.df \code{numeric}: degree of freedom of the spline fit for time |
|
16 |
-#' course study. Default is 2. |
|
17 |
-#' @param basis.type \code{string}: either "ncs" or "ps" basis for time course |
|
18 |
-#' study. Default is "ncs". |
|
17 |
+#' @param basis.df \code{numeric}: degrees of freedom of the basis for time |
|
18 |
+#' course study. Default is 2. |
|
19 |
+#' @param basis.type \code{string}: either "ncs" (natural cubic spline) or "ps" |
|
20 |
+#' (polynomial spline) basis for time course study. Default is "ncs". |
|
19 | 21 |
#' @param adj.var \code{matrix}: adjustment variables. Optional. |
20 | 22 |
#' |
21 | 23 |
#' @return \code{\linkS4class{deSet}} object |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-edge: Extraction of Differential Expression Analysis |
|
1 |
+edge: Extraction of Differential Gene Expression |
|
2 | 2 |
==== |
3 | 3 |
|
4 | 4 |
Introduction |
... | ... |
@@ -15,12 +15,17 @@ of tools for gene expression analysis. |
15 | 15 |
|
16 | 16 |
### Installation and Documentation |
17 | 17 |
|
18 |
-To install, open R and type: |
|
18 |
+To install the Bioconductor release version, open R and type: |
|
19 |
+```R |
|
20 |
+source("http://bioconductor.org/biocLite.R") |
|
21 |
+biocLite("edge") |
|
22 |
+``` |
|
23 |
+ |
|
24 |
+To install the development version, open R and type: |
|
19 | 25 |
```R |
20 | 26 |
install.packages("devtools") |
21 | 27 |
library("devtools") |
22 |
-install_github("jdstorey/qvalue", build_vignettes = TRUE) |
|
23 |
-install_github("jdstorey/edge", build_vignettes = TRUE) |
|
28 |
+install_github(c("jdstorey/qvalue","jdstorey/edge"), build_vignettes = TRUE) |
|
24 | 29 |
``` |
25 | 30 |
|
26 | 31 |
Instructions on using edge can be viewed by typing: |
... | ... |
@@ -75,7 +80,7 @@ edge_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, ful |
75 | 80 |
|
76 | 81 |
The `cov` is a data frame of covariates, the `null.model` is the null model and the `full.model` is the alternative model. The input `cov` is a data frame with the column names the same as the variables in the alternative and null models. Once the models have been generated, it is often useful to normalize the gene expression matrix using `apply_snm` and/or adjust for unmodelled variables using `apply_sva`. |
77 | 82 |
```R |
78 |
-edge_norm <- apply_snm(edge_obj) |
|
83 |
+edge_norm <- apply_snm(edge_obj, int.var=1:ncol(exprs(edge_obj)), diagnose=FALSE) |
|
79 | 84 |
edge_sva <- apply_sva(edge_norm) |
80 | 85 |
|
81 | 86 |
``` |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-edge 0.99.z: |
|
1 |
+edge 2.0.0: |
|
2 | 2 |
|
3 | 3 |
The edge package was first released in 2005 and described in the publication: |
4 | 4 |
|
... | ... |
@@ -9,6 +9,5 @@ http://bioinformatics.oxfordjournals.org/content/22/4/507.abstract |
9 | 9 |
|
10 | 10 |
It was an independently released R package by the John Storey Lab, which |
11 | 11 |
included multi-threading and a graphical user interface. However, edge has been |
12 |
-updated and will now be made available through Bioconductor; edge 0.99.z is the |
|
13 |
-development version of this Bioconductor package. |
|
14 |
- |
|
12 |
+updated and will now be made available through Bioconductor; edge >=2.0.0 is the |
|
13 |
+new version released through Bioconductor. |
|
15 | 14 |
\ No newline at end of file |
... | ... |
@@ -16,7 +16,8 @@ apply_qvalue(object, ...) |
16 | 16 |
\item{...}{Additional arguments for \code{\link{qvalue}}} |
17 | 17 |
} |
18 | 18 |
\value{ |
19 |
-\code{\linkS4class{deSet}} object |
|
19 |
+\code{\linkS4class{deSet}} object with slots updated by \code{\link{qvalue}} |
|
20 |
+ calculations. |
|
20 | 21 |
} |
21 | 22 |
\description{ |
22 | 23 |
Runs \code{\link{qvalue}} on a \code{\linkS4class{deSet}} object. |
... | ... |
@@ -6,19 +6,24 @@ |
6 | 6 |
\alias{apply_snm,deSet-method} |
7 | 7 |
\title{Supervised normalization of data in edge} |
8 | 8 |
\usage{ |
9 |
-apply_snm(object, int.var, ...) |
|
9 |
+apply_snm(object, int.var = NULL, ...) |
|
10 | 10 |
|
11 |
-\S4method{apply_snm}{deSet}(object, int.var, ...) |
|
11 |
+\S4method{apply_snm}{deSet}(object, int.var = NULL, ...) |
|
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 | 14 |
\item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} |
15 | 15 |
|
16 |
-\item{int.var}{\code{data frame}: intensity-dependent effects.} |
|
16 |
+\item{int.var}{\code{data frame}: intensity-dependent effects (see |
|
17 |
+\code{\link{snm}} for details)} |
|
17 | 18 |
|
18 | 19 |
\item{...}{Additional arguments for \code{\link{snm}}} |
19 | 20 |
} |
20 | 21 |
\value{ |
21 |
-\code{apply_snm} returns an \code{\linkS4class{deSet}} object. |
|
22 |
+\code{apply_snm} returns a \code{\linkS4class{deSet}} object where |
|
23 |
+assayData (the expression data) that has been passed to apply_snm is replaced |
|
24 |
+with the normalized data that \code{\link{snm}} returns. Specifically, |
|
25 |
+\code{exprs(object)} is replaced by \code{$norm.dat} from \code{\link{snm}}, |
|
26 |
+where \code{object} is the \code{\link{deSet}} object. |
|
22 | 27 |
} |
23 | 28 |
\description{ |
24 | 29 |
Runs \code{snm} on a deSet object based on the null and full models in |
... | ... |
@@ -49,7 +54,7 @@ John Storey, Andrew Bass |
49 | 54 |
} |
50 | 55 |
\references{ |
51 | 56 |
Mechan BH, Nelson PS, Storey JD. Supervised normalization of microarrays. |
52 |
-Bioinformatics 2010;26:1308-15 |
|
57 |
+Bioinformatics 2010;26:1308-1315. |
|
53 | 58 |
} |
54 | 59 |
\seealso{ |
55 | 60 |
\code{\linkS4class{deSet}}, \code{\link{odp}} and |
... | ... |
@@ -16,7 +16,9 @@ apply_sva(object, ...) |
16 | 16 |
\item{...}{Additional arguments for \code{\link{sva}}} |
17 | 17 |
} |
18 | 18 |
\value{ |
19 |
-\code{\linkS4class{deSet}} object |
|
19 |
+\code{\linkS4class{deSet}} object where the surrogate variables |
|
20 |
+estimated by \code{\link{sva}} are added to the full model and null model |
|
21 |
+matrices. |
|
20 | 22 |
} |
21 | 23 |
\description{ |
22 | 24 |
Runs \code{\link{sva}} on the null and full models in |
... | ... |
@@ -53,6 +55,10 @@ John Storey, Jeffrey Leek, Andrew Bass |
53 | 55 |
Leek JT, Storey JD (2007) Capturing Heterogeneity in Gene Expression |
54 | 56 |
Studies by Surrogate Variable Analysis. PLoS Genet 3(9): e161. |
55 | 57 |
doi:10.1371/journal.pgen.0030161 |
58 |
+ |
|
59 |
+Leek JT and Storey JD. (2008) A general framework for multiple testing |
|
60 |
+dependence. Proceedings of the National Academy of Sciences, 105: 18718- |
|
61 |
+18723. |
|
56 | 62 |
} |
57 | 63 |
\seealso{ |
58 | 64 |
\code{\linkS4class{deSet}}, \code{\link{odp}} and |
... | ... |
@@ -14,7 +14,8 @@ betaCoef(object) |
14 | 14 |
\item{object}{\code{S4 object}: \code{\linkS4class{deFit}}} |
15 | 15 |
} |
16 | 16 |
\value{ |
17 |
-\code{betaCoef} returns the regression coefficients. |
|
17 |
+\code{betaCoef} returns the regression coefficients for the full |
|
18 |
+ model fit. |
|
18 | 19 |
} |
19 | 20 |
\description{ |
20 | 21 |
Access the full model fitted coefficients of a |
... | ... |
@@ -9,35 +9,37 @@ build_study(data, grp = NULL, adj.var = NULL, bio.var = NULL, |
9 | 9 |
basis.df = 2, basis.type = c("ncs", "poly")) |
10 | 10 |
} |
11 | 11 |
\arguments{ |
12 |
-\item{data}{\code{matrix}: gene expression data.} |
|
12 |
+\item{data}{\code{matrix}: gene expression data (rows are genes, columns are |
|
13 |
+samples).} |
|
13 | 14 |
|
14 |
-\item{grp}{\code{vector}: groups or biological variable in experiment. Optional.} |
|
15 |
+\item{grp}{\code{vector}: group assignement in the study (for K-class |
|
16 |
+studies). Optional.} |
|
15 | 17 |
|
16 | 18 |
\item{adj.var}{\code{matrix}: adjustment variables. Optional.} |
17 | 19 |
|
18 | 20 |
\item{bio.var}{\code{matrix}: biological variables. Optional.} |
19 | 21 |
|
20 |
-\item{tme}{\code{vector}: covariate of interest in time course study. Optional.} |
|
22 |
+\item{tme}{\code{vector}: time variable in a time course study. Optional.} |
|
21 | 23 |
|
22 |
-\item{ind}{\code{factor}: individual factor for repeated observations of |
|
23 |
-the same individuals. Optional.} |
|
24 |
+\item{ind}{\code{factor}: individual factor for repeated observations of the |
|
25 |
+same individuals. Optional.} |
|
24 | 26 |
|
25 |
-\item{sampling}{\code{string}: type of experiment. Either "static" or |
|
27 |
+\item{sampling}{\code{string}: type of study. Either "static" or |
|
26 | 28 |
"timecourse". Default is "static".} |
27 | 29 |
|
28 |
-\item{basis.df}{\code{numeric}: degree of freedom of the spline fit for time |
|
30 |
+\item{basis.df}{\code{numeric}: degrees of freedom of the basis for time |
|
29 | 31 |
course study. Default is 2.} |
30 | 32 |
|
31 |
-\item{basis.type}{\code{string}: either "ncs" or "ps" basis for time course |
|
32 |
-study. Default is "ncs".} |
|
33 |
+\item{basis.type}{\code{string}: either "ncs" (natural cubic spline) or "ps" |
|
34 |
+(polynomial spline) basis for time course study. Default is "ncs".} |
|
33 | 35 |
} |
34 | 36 |
\value{ |
35 | 37 |
\code{\linkS4class{deSet}} object |
36 | 38 |
} |
37 | 39 |
\description{ |
38 | 40 |
\code{build_study} generates the full and null models for users unfamiliar |
39 |
-with their experimental design. There are two types of experimental designs: |
|
40 |
-static and time-course. For more details, refer to the user manual. |
|
41 |
+with building models in R. There are two types of experimental designs: |
|
42 |
+static and time-course. For more details, refer to the vignette. |
|
41 | 43 |
} |
42 | 44 |
\examples{ |
43 | 45 |
# create ExpressionSet object from kidney dataset |
... | ... |
@@ -5,11 +5,11 @@ |
5 | 5 |
\alias{deSet-class} |
6 | 6 |
\title{The differential expression class (deSet)} |
7 | 7 |
\description{ |
8 |
-The \code{deSet} class is designed in order to complement the |
|
9 |
-\code{\link{ExpressionSet}} class. While the \code{ExpressionSet} class |
|
10 |
-contains information about the experiment, the \code{deSet} class |
|
11 |
-contains both experimental information and additional information relevant |
|
12 |
-for differential expression analysis. |
|
8 |
+The \code{deSet} class extends the \code{\link{ExpressionSet}} class. |
|
9 |
+While the \code{ExpressionSet} class contains information about the |
|
10 |
+experiment, the \code{deSet} class contains both experimental information and |
|
11 |
+additional information relevant for differential expression analysis, |
|
12 |
+explained below in Slots. |
|
13 | 13 |
} |
14 | 14 |
\section{Slots}{ |
15 | 15 |
|
... | ... |
@@ -8,7 +8,7 @@ |
8 | 8 |
\item endoexpr: A 500 rows by 46 columns data frame containing expression |
9 | 9 |
values. |
10 | 10 |
\item class: A vector of length 46 containing information about which |
11 |
- individuals were given endotoxin |
|
11 |
+ individuals were given endotoxin. |
|
12 | 12 |
\item ind: A vector of length 46 providing indexing measurements for each |
13 | 13 |
individual in the experiment. |
14 | 14 |
\item time: A vector of length 46 indicating time measurements. |
... | ... |
@@ -41,13 +41,13 @@ cov <- data.frame(individual = ind, time = time, class = class) |
41 | 41 |
|
42 | 42 |
# formulate null and full models in experiement |
43 | 43 |
# note: interaction term is a way of taking into account group effects |
44 |
-mNull <- ~ns(time, df=4, intercept = FALSE) |
|
44 |
+mNull <- ~ns(time, df=4, intercept = FALSE) + class |
|
45 | 45 |
mFull <- ~ns(time, df=4, intercept = FALSE) + |
46 |
-ns(time, df=4, intercept = FALSE):class + class |
|
46 |
+ ns(time, df=4, intercept = FALSE):class + class |
|
47 | 47 |
|
48 | 48 |
# create deSet object |
49 | 49 |
de_obj <- build_models(endoexpr, cov = cov, full.model = mFull, |
50 |
-null.model = mNull, ind = ind) |
|
50 |
+ null.model = mNull, ind = ind) |
|
51 | 51 |
|
52 | 52 |
# Perform ODP/lrt statistic to determine significant genes in study |
53 | 53 |
de_odp <- odp(de_obj, bs.its = 10) |
... | ... |
@@ -57,11 +57,6 @@ de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10) |
57 | 57 |
summary(de_odp) |
58 | 58 |
} |
59 | 59 |
\references{ |
60 |
-Calvano, S. E., Xiao, W., Richards, D. R., Felciano, R. M., Baker, H. |
|
61 |
-V., Cho, R.J., Chen, R. O., Brownstein, B. H., Cobb, J. P., Tschoeke, |
|
62 |
-S. K., et al. "A network-based analysis of systemic inflammation in humans." |
|
63 |
- Nature. 2005 Oct 13; 437(7061):1032-7. Epub 2005 Aug 31. |
|
64 |
- |
|
65 | 60 |
Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance |
66 | 61 |
analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr |
67 | 62 |
\url{http://www.pnas.org/content/100/16/9440.full} |
... | ... |
@@ -20,13 +20,14 @@ fit_models(object, stat.type = c("lrt", "odp")) |
20 | 20 |
\code{\linkS4class{deFit}} object |
21 | 21 |
} |
22 | 22 |
\description{ |
23 |
-\code{fit_models} fits a linear model to each gene by using the least |
|
23 |
+\code{fit_models} fits a model matrix to each gene by using the least |
|
24 | 24 |
squares method. Model fits can be either statistic type "odp" (optimal |
25 | 25 |
discovery procedure) or "lrt" (likelihood ratio test). |
26 | 26 |
} |
27 | 27 |
\details{ |
28 | 28 |
If "odp" method is implemented then the null model is removed from the full |
29 |
-model (see Storey 2007). |
|
29 |
+model (see Storey 2007). Otherwise, the statistic type has no affect on the |
|
30 |
+model fit. |
|
30 | 31 |
} |
31 | 32 |
\note{ |
32 | 33 |
\code{fit_models} does not have to be called by the user to use |
... | ... |
@@ -6,8 +6,8 @@ |
6 | 6 |
\title{Gene expression dataset from Idaghdour et al. (2008)} |
7 | 7 |
\format{\itemize{ |
8 | 8 |
\item batch: Batches in experiment. |
9 |
- \item location: Location of Moroccan Amazigh groups. |
|
10 |
- \item gender: Gender of individuals. |
|
9 |
+ \item location: Environment/lifestyle of Moroccan Amazigh groups. |
|
10 |
+ \item gender: Sex of individuals. |
|
11 | 11 |
\item gibexpr: A 500 rows by 46 columns matrix of gene expression values. |
12 | 12 |
}} |
13 | 13 |
\usage{ |
... | ... |
@@ -18,7 +18,7 @@ gibson dataset |
18 | 18 |
} |
19 | 19 |
\description{ |
20 | 20 |
The data provide gene expression measurements in peripheral blood leukocyte |
21 |
-samples from three Moroccan Amazigh groups leading distinct ways of life: |
|
21 |
+samples from three Moroccan groups leading distinct ways of life: |
|
22 | 22 |
desert nomadic (DESERT), mountain agrarian (VILLAGE), and coastal urban |
23 | 23 |
(AGADIR). |
24 | 24 |
} |
... | ... |
@@ -19,12 +19,13 @@ individual(object) <- value |
19 | 19 |
\arguments{ |
20 | 20 |
\item{object}{\code{\linkS4class{deSet}}} |
21 | 21 |
|
22 |
-\item{value}{\code{factor}: identifier for each observation. Important |
|
23 |
-if the same individuals are sampled multiple times.} |
|
22 |
+\item{value}{\code{factor}: Identifies which samples correspond to which |
|
23 |
+ individuals. Important if the same individuals are sampled multiple times |
|
24 |
+ in a longitudinal fashion.} |
|
24 | 25 |
} |
25 | 26 |
\value{ |
26 |
-\code{individual} returns information regarding individuals |
|
27 |
-in the experiment. |
|
27 |
+\code{individual} returns information regarding dinstinct individuals |
|
28 |
+ sampled in the experiment. |
|
28 | 29 |
} |
29 | 30 |
\description{ |
30 | 31 |
These generic functions access and set the individual slot in |
... | ... |
@@ -35,7 +36,7 @@ library(splines) |
35 | 36 |
# import data |
36 | 37 |
data(endotoxin) |
37 | 38 |
ind <- endotoxin$ind |
38 |
-time <- endotoxin$t |
|
39 |
+time <- endotoxin$time |
|
39 | 40 |
class <- endotoxin$class |
40 | 41 |
endoexpr <- endotoxin$endoexpr |
41 | 42 |
cov <- data.frame(individual = ind, time = time, class = class) |
... | ... |
@@ -12,8 +12,6 @@ |
12 | 12 |
different tissue sample. |
13 | 13 |
\item age: A vector of length 133 giving the age of each sample. |
14 | 14 |
\item sex: A vector of length 133 giving the sex of each sample. |
15 |
- \item tissue: A vector of length 133 giving the tissue type of each |
|
16 |
- sample. |
|
17 | 15 |
}} |
18 | 16 |
\usage{ |
19 | 17 |
data(kidney) |
... | ... |
@@ -24,14 +22,14 @@ kidney dataset |
24 | 22 |
\description{ |
25 | 23 |
Gene expression measurements from kidney samples were obtained from 72 |
26 | 24 |
human subjects ranging in age from 27 to 92 years. Only one array was |
27 |
-obtained per sample, and the age and tissue type of each subject was |
|
25 |
+obtained per individual, and the age and sex of each individual were |
|
28 | 26 |
recorded. |
29 | 27 |
} |
30 | 28 |
\note{ |
31 | 29 |
These data are a random subset of 500 probe-sets from the total number of |
32 | 30 |
probe-sets in the original data set. To download the full data set, go to |
33 |
-\url{http://genomine.org/edge/}. The \code{age}, \code{sex}, and |
|
34 |
-\code{tissue} data are contained in \code{kidcov} data frame. |
|
31 |
+\url{http://genomine.org/edge/}. The \code{age} and \code{sex} are contained |
|
32 |
+in \code{kidcov} data frame. |
|
35 | 33 |
} |
36 | 34 |
\examples{ |
37 | 35 |
# import data |
... | ... |
@@ -18,13 +18,13 @@ kl_clust(object, de.fit = NULL, n.mods = 50) |
18 | 18 |
|
19 | 19 |
\item{de.fit}{\code{S4 object}: \code{\linkS4class{deFit}}.} |
20 | 20 |
|
21 |
-\item{n.mods}{\code{integer}: number of clusters.} |
|
21 |
+\item{n.mods}{\code{integer}: number of modules (i.e., clusters).} |
|
22 | 22 |
} |
23 | 23 |
\value{ |
24 | 24 |
A list with the following slots: |
25 | 25 |
\itemize{ |
26 |
- \item {mu.full: cluster means from full model.} |
|
27 |
- \item {mu.null: cluster means from null model.} |
|
26 |
+ \item {mu.full: cluster averaged fitted values from full model.} |
|
27 |
+ \item {mu.null: cluster averaged fitted values from null model.} |
|
28 | 28 |
\item {sig.full: cluster standard deviations from full model.} |
29 | 29 |
\item {sig.null: cluster standard deviations from null model.} |
30 | 30 |
\item {n.per.mod: total members in each cluster.} |
... | ... |
@@ -33,22 +33,21 @@ A list with the following slots: |
33 | 33 |
} |
34 | 34 |
\description{ |
35 | 35 |
\code{kl_clust} is an implementation of mODP that assigns genes to modules |
36 |
-based off of the Kullback-Leibler distance. |
|
36 |
+based on of the Kullback-Leibler distance. |
|
37 | 37 |
} |
38 | 38 |
\details{ |
39 | 39 |
mODP utilizes a k-means clustering algorithm where genes are |
40 | 40 |
assigned to a cluster based on the Kullback-Leiber distance. Each gene is |
41 | 41 |
assigned an module-average parameter to calculate the ODP score (See Woo, |
42 |
-Leek and Storey 2010 for more details). The mODP and full ODP produce near |
|
42 |
+Leek and Storey 2010 for more details). The mODP and full ODP produce nearly |
|
43 | 43 |
exact results but mODP has the advantage of being computationally |
44 |
-feasible. |
|
44 |
+faster. |
|
45 | 45 |
} |
46 | 46 |
\note{ |
47 | 47 |
The results are generally insensitive to the number of modules after a |
48 |
-certain threshold of about n.mods>=50. It is recommended that users |
|
49 |
-experiment with the number of clusters. If the number of clusters is equal |
|
50 |
-to the number of genes then the original ODP is implemented. Depending on |
|
51 |
-the number of hypothesis tests, this can take some time. |
|
48 |
+ certain threshold of about n.mods>=50 in our experience. It is recommended |
|
49 |
+ that users experiment with the number of modules. If the number of modules |
|
50 |
+ is equal to the number of genes then the original ODP is implemented. |
|
52 | 51 |
} |
53 | 52 |
\examples{ |
54 | 53 |
# import data |
... | ... |
@@ -93,7 +92,6 @@ Woo S, Leek JT, Storey JD (2010) A computationally efficient modular optimal |
93 | 92 |
discovery procedure. Bioinformatics, 27(4): 509-515. |
94 | 93 |
} |
95 | 94 |
\seealso{ |
96 |
-\code{\link{odp}}, \code{\link{lrt}} and |
|
97 |
-\code{\link{fit_models}} |
|
95 |
+\code{\link{odp}}, \code{\link{fit_models}} |
|
98 | 96 |
} |
99 | 97 |
|
... | ... |
@@ -47,12 +47,12 @@ Default is TRUE.} |
47 | 47 |
null models. |
48 | 48 |
} |
49 | 49 |
\details{ |
50 |
-\code{lrt} fits the full and null models to each gene using the function |
|
51 |
-\code{\link{fit_models}} and then performs a likelihood ratio test. The |
|
52 |
-user has the option to calculate p-values from either the F distribution or |
|
53 |
-through a bootstrap algorithm. If \code{nullDistn} is "bootstrap" |
|
54 |
-then empirical p-values will be determined from the \code{\link{qvalue}} |
|
55 |
-package (see \code{\link{empPvals}}). |
|
50 |
+\code{lrt} fits the full and null models to each gene using the |
|
51 |
+function \code{\link{fit_models}} and then performs a likelihood ratio test. |
|
52 |
+The user has the option to calculate p-values a Normal distribution |
|
53 |
+assumption or through a bootstrap algorithm. If \code{nullDistn} is |
|
54 |
+"bootstrap" then empirical p-values will be determined from the |
|
55 |
+\code{\link{qvalue}} package (see \code{\link{empPvals}}). |
|
56 | 56 |
} |
57 | 57 |
\examples{ |
58 | 58 |
# import data |
... | ... |
@@ -91,6 +91,8 @@ John Storey, Andrew Bass |
91 | 91 |
Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance |
92 | 92 |
analysis of time course microarray experiments. Proceedings of the National |
93 | 93 |
Academy of Sciences, 102: 12837-12842. |
94 |
+ |
|
95 |
+\url{http://en.wikipedia.org/wiki/Likelihood-ratio_test} |
|
94 | 96 |
} |
95 | 97 |
\seealso{ |
96 | 98 |
\code{\linkS4class{deSet}}, \code{\link{build_models}}, |
... | ... |
@@ -42,23 +42,22 @@ Default is TRUE.} |
42 | 42 |
\code{\linkS4class{deSet}} object |
43 | 43 |
} |
44 | 44 |
\description{ |
45 |
-\code{odp} performs the optimal discovery procedure, which is a new |
|
46 |
-approach for optimally performing many hypothesis tests in a |
|
47 |
-high-dimensional study. When testing whether a feature is significant, the |
|
48 |
-optimal discovery procedure uses information across all features when |
|
49 |
-testing for significance. |
|
45 |
+\code{odp} performs the optimal discovery procedure, which is a framework for |
|
46 |
+optimally performing many hypothesis tests in a high-dimensional study. When |
|
47 |
+testing whether a feature is significant, the optimal discovery procedure |
|
48 |
+uses information across all features when testing for significance. |
|
50 | 49 |
} |
51 | 50 |
\details{ |
52 |
-The full ODP estimator computationally grows quadratically with |
|
53 |
-respect to the number of genes. This becomes computationally infeasible at |
|
54 |
-a certain point. Therefore, an alternative method called mODP is used which |
|
55 |
-has been shown to provide results that are very similar. mODP utilizes a |
|
56 |
-k-means clustering algorithm where genes are assigned to a cluster based on |
|
57 |
-the Kullback-Leiber distance. Each gene is assigned an module-average |
|
58 |
-parameter to calculate the ODP score and it reduces the computations time |
|
59 |
-to linear (See Woo, Leek and Storey 2010). If the number of clusters is equal |
|
60 |
-to the number of genes then the original ODP is implemented. Depending on |
|
61 |
-the number of hypothesis tests, this can take some time. |
|
51 |
+The full ODP estimator computationally grows quadratically with respect to |
|
52 |
+the number of genes. This becomes computationally taxing at a certain point. |
|
53 |
+Therefore, an alternative method called mODP is used which has been shown to |
|
54 |
+provide results that are very similar. mODP utilizes a clustering algorithm |
|
55 |
+where genes are assigned to a cluster based on the Kullback-Leiber distance. |
|
56 |
+Each gene is assigned an module-average parameter to calculate the ODP score |
|
57 |
+and it reduces the computations time to approximately linear (see Woo, Leek |
|
58 |
+and Storey 2010). If the number of clusters is equal to the number of genes |
|
59 |
+then the original ODP is implemented. Depending on the number of hypothesis |
|
60 |
+tests, this can take some time. |
|
62 | 61 |
} |
63 | 62 |
\examples{ |
64 | 63 |
# import data |