git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/edge@109556 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,47 @@ |
1 |
+fit_wmodels <- function(object, w = NULL, stat.type = c("lrt", "odp")) { |
|
2 |
+ exprsData <- exprs(object) |
|
3 |
+ n <- ncol(exprsData) |
|
4 |
+ nr <- nrow(exprsData) |
|
5 |
+ stat.var <- match.arg(stat.type, c("lrt", "odp")) |
|
6 |
+ null.matrix <- object@null.matrix |
|
7 |
+ full.matrix <- object@full.matrix |
|
8 |
+ if (length(object@individual) != 0) { |
|
9 |
+ ind.matrix <- model.matrix(~-1 + as.factor(object@individual)) |
|
10 |
+ Hi <- projMatrix(ind.matrix) |
|
11 |
+ fitInd <- t(Hi %*% t(exprsData)) |
|
12 |
+ exprsData <- exprsData - fitInd |
|
13 |
+ full.matrix <- full.matrix - Hi %*% full.matrix |
|
14 |
+ null.matrix <- null.matrix - Hi %*% null.matrix |
|
15 |
+ full.matrix <- rm.zero.cols(full.matrix) |
|
16 |
+ null.matrix <- rm.zero.cols(null.matrix) |
|
17 |
+ } |
|
18 |
+ fitFull <- fitNull <- resNull <- resFull <- matrix(nrow=nr, ncol=n) |
|
19 |
+ for (i in 1:nr) { |
|
20 |
+ wlm_full <- lm.wfit(x = full.matrix, y = exprsData[i,], w = w[i,]) |
|
21 |
+ wlm_null <- lm.wfit(x = null.matrix, y = exprsData[i,], w = w[i,]) |
|
22 |
+ |
|
23 |
+ fitFull[i,] <- wlm_full$fitted.values |
|
24 |
+ fitNull[i,] <- wlm_null$fitted.values |
|
25 |
+ |
|
26 |
+ resFull[i,] <- wlm_full$residuals * sqrt(wlm_full$weights) |
|
27 |
+ resNull[i,] <- wlm_null$residuals * sqrt(wlm_full$weights) |
|
28 |
+ } |
|
29 |
+ dHFull <- diag(projMatrix(null.matrix)) |
|
30 |
+ B.coef <- exprsData %*% full.matrix %*% ginv(t(full.matrix) %*% full.matrix) |
|
31 |
+ if (stat.var == "odp") { |
|
32 |
+ H.null <- projMatrix(null.matrix) |
|
33 |
+ full.matrix <- full.matrix - H.null %*% full.matrix |
|
34 |
+ full.matrix <- rm.zero.cols(full.matrix) |
|
35 |
+ H.full <- projMatrix(full.matrix) |
|
36 |
+ B.coef <- resNull %*% full.matrix %*% ginv(t(full.matrix) %*% full.matrix) |
|
37 |
+ dHFull <- diag(H.full) |
|
38 |
+ fitFull <- t(H.full %*% t(resNull)) |
|
39 |
+ resFull <- resNull - fitFull |
|
40 |
+ } |
|
41 |
+ |
|
42 |
+ efObj <- new("deFit", fit.full = fitFull, fit.null = fitNull, |
|
43 |
+ dH.full = dHFull, res.full = resFull, |
|
44 |
+ res.null = resNull, beta.coef = B.coef, |
|
45 |
+ stat.type = stat.var) |
|
46 |
+ return(efObj) |
|
47 |
+} |
0 | 48 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,106 @@ |
1 |
+% Generated by roxygen2 (4.1.1): do not edit by hand |
|
2 |
+% Please edit documentation in R/AllGenerics.R, R/deSet-methods.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{apply_jackstraw} |
|
5 |
+\alias{apply_jackstraw} |
|
6 |
+\alias{apply_jackstraw,deSet-method} |
|
7 |
+\title{Non-Parametric Jackstraw for Principal Component Analysis (PCA)} |
|
8 |
+\usage{ |
|
9 |
+apply_jackstraw(object, PC = NULL, r = NULL, s = NULL, B = NULL, |
|
10 |
+ covariate = NULL, verbose = TRUE, seed = NULL) |
|
11 |
+ |
|
12 |
+\S4method{apply_jackstraw}{deSet}(object, PC = NULL, r = NULL, s = NULL, |
|
13 |
+ B = NULL, covariate = NULL, verbose = TRUE, seed = NULL) |
|
14 |
+} |
|
15 |
+\arguments{ |
|
16 |
+\item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} |
|
17 |
+ |
|
18 |
+\item{PC}{a numeric vector of principal components of interest. Choose a subset of r significant PCs to be used.} |
|
19 |
+ |
|
20 |
+\item{r}{a number (a positive integer) of significant principal components.} |
|
21 |
+ |
|
22 |
+\item{s}{a number (a positive integer) of synthetic null variables. Out of m variables, s variables are independently permuted.} |
|
23 |
+ |
|
24 |
+\item{B}{a number (a positive integer) of resampling iterations. There will be a total of s*B null statistics.} |
|
25 |
+ |
|
26 |
+\item{covariate}{a data matrix of covariates with corresponding n observations.} |
|
27 |
+ |
|
28 |
+\item{verbose}{a logical indicator as to whether to print the progress.} |
|
29 |
+ |
|
30 |
+\item{seed}{a seed for the random number generator.} |
|
31 |
+} |
|
32 |
+\value{ |
|
33 |
+\code{apply_jackstraw} returns a \code{list} containing the following |
|
34 |
+slots: |
|
35 |
+\itemize{ |
|
36 |
+\item{\code{p.value} the m p-values of association tests between variables |
|
37 |
+and their principal components} |
|
38 |
+\item{\code{obs.stat} the observed F-test statistics} |
|
39 |
+\item{\code{null.stat} the s*B null F-test statistics} |
|
40 |
+} |
|
41 |
+} |
|
42 |
+\description{ |
|
43 |
+Estimates statistical significance of association between variables and |
|
44 |
+their principal components (PCs). |
|
45 |
+} |
|
46 |
+\details{ |
|
47 |
+This function computes m p-values of linear association between m variables |
|
48 |
+and their PCs. Its resampling strategy accounts for the over-fitting |
|
49 |
+characteristics due to direct computation of PCs from the observed data |
|
50 |
+and protects against an anti-conservative bias. |
|
51 |
+ |
|
52 |
+Provide the \code{\linkS4class{deSet}}, |
|
53 |
+with m variables as rows and n observations as columns. Given that there are |
|
54 |
+r significant PCs, this function tests for linear association between m |
|
55 |
+varibles and their r PCs. |
|
56 |
+ |
|
57 |
+You could specify a subset of significant PCs |
|
58 |
+that you are interested in (PC). If PC is given, then this function computes |
|
59 |
+statistical significance of association between m variables and PC, while |
|
60 |
+adjusting for other PCs (i.e., significant PCs that are not your interest). |
|
61 |
+For example, if you want to identify variables associated with 1st and 2nd |
|
62 |
+PCs, when your data contains three significant PCs, set r=3 and PC=c(1,2). |
|
63 |
+ |
|
64 |
+Please take a careful look at your data and use appropriate graphical and |
|
65 |
+statistical criteria to determine a number of significant PCs, r. The number |
|
66 |
+of significant PCs depends on the data structure and the context. In a case |
|
67 |
+when you fail to specify r, it will be estimated from a permutation test |
|
68 |
+(Buja and Eyuboglu, 1992) using a function \code{\link{permutationPA}}. |
|
69 |
+ |
|
70 |
+If s is not supplied, s is set to about 10% of m variables. If B is not |
|
71 |
+supplied, B is set to m*10/s. |
|
72 |
+} |
|
73 |
+\examples{ |
|
74 |
+library(splines) |
|
75 |
+data(kidney) |
|
76 |
+age <- kidney$age |
|
77 |
+sex <- kidney$sex |
|
78 |
+kidexpr <- kidney$kidexpr |
|
79 |
+cov <- data.frame(sex = sex, age = age) |
|
80 |
+# create models |
|
81 |
+null_model <- ~sex |
|
82 |
+full_model <- ~sex + ns(age, df = 4) |
|
83 |
+# create deSet object from data |
|
84 |
+de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, |
|
85 |
+ full.model = full_model) |
|
86 |
+## apply the jackstraw |
|
87 |
+out = apply_jackstraw(de_obj, PC=1, r=1) |
|
88 |
+## Use optional arguments |
|
89 |
+## For example, set s and B for a balance between speed of the algorithm and accuracy of p-values |
|
90 |
+## out = apply_jackstraw(dat, PC=1, r=1, s=10, B=1000, seed=5678) |
|
91 |
+} |
|
92 |
+\author{ |
|
93 |
+Neo Christopher Chung \email{nc@princeton.edu} |
|
94 |
+} |
|
95 |
+\references{ |
|
96 |
+Chung and Storey (2013) Statistical Significance of |
|
97 |
+Variables Driving Systematic Variation in |
|
98 |
+High-Dimensional Data. arXiv:1308.6013 [stat.ME] |
|
99 |
+\url{http://arxiv.org/abs/1308.6013} |
|
100 |
+ |
|
101 |
+More information available at \url{http://ncc.name/} |
|
102 |
+} |
|
103 |
+\seealso{ |
|
104 |
+\code{\link{permutationPA}} |
|
105 |
+} |
|
106 |
+ |