Browse code

Update to edge 2.1.1 (added two new files).

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/edge@109556 bc3139a8-67e5-0310-9ffc-ced21a209358

John D Storey authored on 13/10/2015 01:22:03
Showing 2 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,47 @@
1
+fit_wmodels <- function(object, w = NULL,  stat.type = c("lrt", "odp")) {
2
+  exprsData <- exprs(object)
3
+  n <- ncol(exprsData)
4
+  nr <- nrow(exprsData)
5
+  stat.var <- match.arg(stat.type, c("lrt", "odp"))
6
+  null.matrix <- object@null.matrix
7
+  full.matrix <- object@full.matrix
8
+  if (length(object@individual) != 0) {
9
+    ind.matrix <- model.matrix(~-1 + as.factor(object@individual))
10
+    Hi <- projMatrix(ind.matrix)
11
+    fitInd <- t(Hi %*% t(exprsData))
12
+    exprsData <- exprsData - fitInd
13
+    full.matrix <- full.matrix - Hi %*% full.matrix
14
+    null.matrix <- null.matrix - Hi %*% null.matrix
15
+    full.matrix <- rm.zero.cols(full.matrix)
16
+    null.matrix <- rm.zero.cols(null.matrix)
17
+  }
18
+  fitFull <- fitNull <- resNull <- resFull <- matrix(nrow=nr, ncol=n)
19
+  for (i in 1:nr) {
20
+    wlm_full <- lm.wfit(x = full.matrix, y = exprsData[i,], w = w[i,])
21
+    wlm_null <- lm.wfit(x = null.matrix, y = exprsData[i,], w = w[i,])
22
+    
23
+    fitFull[i,] <- wlm_full$fitted.values
24
+    fitNull[i,] <- wlm_null$fitted.values
25
+    
26
+    resFull[i,] <- wlm_full$residuals * sqrt(wlm_full$weights)
27
+    resNull[i,] <- wlm_null$residuals * sqrt(wlm_full$weights)
28
+  }
29
+  dHFull <- diag(projMatrix(null.matrix))
30
+  B.coef <- exprsData %*% full.matrix %*% ginv(t(full.matrix) %*% full.matrix)
31
+  if (stat.var == "odp") {
32
+    H.null <- projMatrix(null.matrix)
33
+    full.matrix <- full.matrix - H.null %*% full.matrix
34
+    full.matrix <- rm.zero.cols(full.matrix)
35
+    H.full <- projMatrix(full.matrix)
36
+    B.coef <- resNull %*% full.matrix %*% ginv(t(full.matrix) %*% full.matrix)
37
+    dHFull <- diag(H.full)
38
+    fitFull <- t(H.full %*% t(resNull))
39
+    resFull <- resNull - fitFull
40
+  }
41
+  
42
+  efObj <- new("deFit", fit.full = fitFull, fit.null = fitNull,
43
+               dH.full = dHFull, res.full = resFull,
44
+               res.null = resNull, beta.coef = B.coef,
45
+               stat.type = stat.var)
46
+  return(efObj)
47
+}
0 48
new file mode 100644
... ...
@@ -0,0 +1,106 @@
1
+% Generated by roxygen2 (4.1.1): do not edit by hand
2
+% Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
3
+\docType{methods}
4
+\name{apply_jackstraw}
5
+\alias{apply_jackstraw}
6
+\alias{apply_jackstraw,deSet-method}
7
+\title{Non-Parametric Jackstraw for Principal Component Analysis (PCA)}
8
+\usage{
9
+apply_jackstraw(object, PC = NULL, r = NULL, s = NULL, B = NULL,
10
+  covariate = NULL, verbose = TRUE, seed = NULL)
11
+
12
+\S4method{apply_jackstraw}{deSet}(object, PC = NULL, r = NULL, s = NULL,
13
+  B = NULL, covariate = NULL, verbose = TRUE, seed = NULL)
14
+}
15
+\arguments{
16
+\item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
17
+
18
+\item{PC}{a numeric vector of principal components of interest. Choose a subset of r significant PCs to be used.}
19
+
20
+\item{r}{a number (a positive integer) of significant principal components.}
21
+
22
+\item{s}{a number (a positive integer) of synthetic null variables. Out of m variables, s variables are independently permuted.}
23
+
24
+\item{B}{a number (a positive integer) of resampling iterations. There will be a total of s*B null statistics.}
25
+
26
+\item{covariate}{a data matrix of covariates with corresponding n observations.}
27
+
28
+\item{verbose}{a logical indicator as to whether to print the progress.}
29
+
30
+\item{seed}{a seed for the random number generator.}
31
+}
32
+\value{
33
+\code{apply_jackstraw} returns a \code{list} containing the following
34
+slots:
35
+\itemize{
36
+\item{\code{p.value} the m p-values of association tests between variables
37
+and their principal components}
38
+\item{\code{obs.stat} the observed F-test statistics}
39
+\item{\code{null.stat} the s*B null F-test statistics}
40
+}
41
+}
42
+\description{
43
+Estimates statistical significance of association between variables and
44
+their principal components (PCs).
45
+}
46
+\details{
47
+This function computes m p-values of linear association between m variables
48
+and their PCs. Its resampling strategy accounts for the over-fitting
49
+characteristics due to direct computation of PCs from the observed data
50
+and protects against an anti-conservative bias.
51
+
52
+Provide the \code{\linkS4class{deSet}},
53
+with m variables as rows and n observations as columns. Given that there are
54
+r significant PCs, this function tests for linear association between m
55
+varibles and their r PCs.
56
+
57
+You could specify a subset of significant PCs
58
+that you are interested in (PC). If PC is given, then this function computes
59
+statistical significance of association between m variables and PC, while
60
+adjusting for other PCs (i.e., significant PCs that are not your interest).
61
+For example, if you want to identify variables associated with 1st and 2nd
62
+PCs, when your data contains three significant PCs, set r=3 and PC=c(1,2).
63
+
64
+Please take a careful look at your data and use appropriate graphical and
65
+statistical criteria to determine a number of significant PCs, r. The number
66
+of significant PCs depends on the data structure and the context. In a case
67
+when you fail to specify r, it will be estimated from a permutation test
68
+(Buja and Eyuboglu, 1992) using a function \code{\link{permutationPA}}.
69
+
70
+If s is not supplied, s is set to about 10% of m variables. If B is not
71
+supplied, B is set to m*10/s.
72
+}
73
+\examples{
74
+library(splines)
75
+data(kidney)
76
+age <- kidney$age
77
+sex <- kidney$sex
78
+kidexpr <- kidney$kidexpr
79
+cov <- data.frame(sex = sex, age = age)
80
+# create models
81
+null_model <- ~sex
82
+full_model <- ~sex + ns(age, df = 4)
83
+# create deSet object from data
84
+de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
85
+                      full.model = full_model)
86
+## apply the jackstraw
87
+out = apply_jackstraw(de_obj, PC=1, r=1)
88
+## Use optional arguments
89
+## For example, set s and B for a balance between speed of the algorithm and accuracy of p-values
90
+## out = apply_jackstraw(dat, PC=1, r=1, s=10, B=1000, seed=5678)
91
+}
92
+\author{
93
+Neo Christopher Chung \email{nc@princeton.edu}
94
+}
95
+\references{
96
+Chung and Storey (2013) Statistical Significance of
97
+Variables Driving Systematic Variation in
98
+High-Dimensional Data. arXiv:1308.6013 [stat.ME]
99
+\url{http://arxiv.org/abs/1308.6013}
100
+
101
+More information available at \url{http://ncc.name/}
102
+}
103
+\seealso{
104
+\code{\link{permutationPA}}
105
+}
106
+