git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GSVA@130527 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: GSVA |
2 |
-Version: 1.25.1 |
|
2 |
+Version: 1.25.2 |
|
3 | 3 |
Title: Gene Set Variation Analysis for microarray and RNA-seq data |
4 | 4 |
Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.guinney@sagebase.org"), |
5 | 5 |
person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu")) |
... | ... |
@@ -463,11 +463,15 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
463 | 463 |
} |
464 | 464 |
|
465 | 465 |
rank.scores <- rep(0, num_genes) |
466 |
- if(abs.ranking){ |
|
467 |
- sort.sgn.idxs <- apply(abs(gene.density), 2, order, decreasing=TRUE) # n.genes * n.samples |
|
468 |
- }else{ |
|
469 |
- sort.sgn.idxs <- apply(gene.density, 2, order, decreasing=TRUE) # n.genes * n.samples |
|
470 |
- } |
|
466 |
+ ## 19.06.17 disable current functioning of abs.ranking flag |
|
467 |
+ ## to switch to a different approach to implement it during |
|
468 |
+ ## the KS random walk based on the Kuiper statistic |
|
469 |
+ ## if(abs.ranking){ |
|
470 |
+ ## sort.sgn.idxs <- apply(abs(gene.density), 2, order, decreasing=TRUE) # n.genes * n.samples |
|
471 |
+ ## }else{ |
|
472 |
+ ## sort.sgn.idxs <- apply(gene.density, 2, order, decreasing=TRUE) # n.genes * n.samples |
|
473 |
+ ## } |
|
474 |
+ sort.sgn.idxs <- apply(gene.density, 2, order, decreasing=TRUE) # n.genes * n.samples |
|
471 | 475 |
|
472 | 476 |
rank.scores <- apply(sort.sgn.idxs, 2, compute_rank_score) |
473 | 477 |
|
... | ... |
@@ -503,7 +507,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
503 | 507 |
m <- t(parSapp(cl, gset.idx.list, ks_test_m, |
504 | 508 |
gene.density=rank.scores, |
505 | 509 |
sort.idxs=sort.sgn.idxs, |
506 |
- mx.diff=mx.diff, tau=tau, verbose=FALSE)) |
|
510 |
+ mx.diff=mx.diff, abs.ranking=abs.ranking, |
|
511 |
+ tau=tau, verbose=FALSE)) |
|
507 | 512 |
if(verbose) |
508 | 513 |
cat("Cleaning up\n") |
509 | 514 |
stopCl(cl) |
... | ... |
@@ -528,7 +533,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
528 | 533 |
m <- mclapp(gset.idx.list, ks_test_m, |
529 | 534 |
gene.density=rank.scores, |
530 | 535 |
sort.idxs=sort.sgn.idxs, |
531 |
- mx.diff=mx.diff, tau=tau, verbose=verbose) |
|
536 |
+ mx.diff=mx.diff, abs.ranking=abs.ranking, |
|
537 |
+ tau=tau, verbose=verbose) |
|
532 | 538 |
m <- do.call("rbind", m) |
533 | 539 |
colnames(m) <- colnames(expr) |
534 | 540 |
|
... | ... |
@@ -555,7 +561,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
555 | 561 |
} |
556 | 562 |
|
557 | 563 |
m <- t(sapply(gset.idx.list, ks_test_m, rank.scores, sort.sgn.idxs, |
558 |
- mx.diff=mx.diff, tau=tau, verbose=verbose)) |
|
564 |
+ mx.diff=mx.diff, abs.ranking=abs.ranking, |
|
565 |
+ tau=tau, verbose=verbose)) |
|
559 | 566 |
|
560 | 567 |
if (verbose) { |
561 | 568 |
setTxtProgressBar(get("progressBar", envir=globalenv()), 1) |
... | ... |
@@ -566,7 +573,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
566 | 573 |
} |
567 | 574 |
|
568 | 575 |
|
569 |
-ks_test_m <- function(gset_idxs, gene.density, sort.idxs, mx.diff=TRUE, tau=1, verbose=TRUE){ |
|
576 |
+ks_test_m <- function(gset_idxs, gene.density, sort.idxs, mx.diff=TRUE, |
|
577 |
+ abs.ranking=FALSE, tau=1, verbose=TRUE){ |
|
570 | 578 |
|
571 | 579 |
n.genes <- nrow(gene.density) |
572 | 580 |
n.samples <- ncol(gene.density) |
... | ... |
@@ -581,7 +589,8 @@ ks_test_m <- function(gset_idxs, gene.density, sort.idxs, mx.diff=TRUE, tau=1, v |
581 | 589 |
n.geneset, |
582 | 590 |
as.double(tau), |
583 | 591 |
n.samples, |
584 |
- as.integer(mx.diff))$R |
|
592 |
+ as.integer(mx.diff), |
|
593 |
+ as.integer(abs.ranking))$R |
|
585 | 594 |
|
586 | 595 |
if (verbose) { |
587 | 596 |
assign("iGeneSet", get("iGeneSet", envir=globalenv()) + 1, envir=globalenv()) |
... | ... |
@@ -103,10 +103,12 @@ Estimates GSVA enrichment scores. |
103 | 103 |
RNA-seq counts such as log-CPMs, log-RPKMs or log-TPMs. This flag should be set to |
104 | 104 |
\code{rnaseq=TRUE} only when the values of the input gene expression data are integer |
105 | 105 |
counts.} |
106 |
- \item{abs.ranking}{Flag to determine whether genes should be ranked according to |
|
107 |
- their sign (\code{abs.ranking=FALSE}) or by absolute value (\code{abs.ranking=TRUE}). |
|
108 |
- In the latter, pathways with genes enriched on either extreme |
|
109 |
- (high or low) will be regarded as 'highly' activated. } |
|
106 |
+ \item{abs.ranking}{Flag used only when \code{mx.diff=TRUE}. When \code{abs.ranking=FALSE} (default) |
|
107 |
+ a modified Kuiper statistic is used to calculate enrichment scores, taking the magnitude |
|
108 |
+ difference between the largest positive and negative random walk deviations. When |
|
109 |
+ \code{abs.ranking=TRUE} the original Kuiper statistic that sums the largest positive and |
|
110 |
+ negative random walk deviations, is used. In this latter case, gene sets with genes |
|
111 |
+ enriched on either extreme (high or low) will be regarded as 'highly' activated.} |
|
110 | 112 |
\item{min.sz}{Minimum size of the resulting gene sets.} |
111 | 113 |
\item{max.sz}{Maximum size of the resulting gene sets.} |
112 | 114 |
\item{no.bootstraps}{Number of bootstrap iterations to perform.} |
... | ... |
@@ -203,7 +205,7 @@ geneSets <- list(set1=paste("g", 1:3, sep=""), |
203 | 205 |
y <- matrix(rnorm(n*p), nrow=p, ncol=n, |
204 | 206 |
dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) |
205 | 207 |
|
206 |
-## genes in set1 are expressed at higher levels in the last 10 samples |
|
208 |
+## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples |
|
207 | 209 |
y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 |
208 | 210 |
|
209 | 211 |
## build design matrix |
... | ... |
@@ -6,10 +6,12 @@ |
6 | 6 |
|
7 | 7 |
|
8 | 8 |
void |
9 |
-ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, int* n_geneset, double* tau, int* n_samples, int* mx_diff); |
|
9 |
+ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, |
|
10 |
+ int* n_geneset, double* tau, int* n_samples, int* mx_diff, int* abs_rnk); |
|
10 | 11 |
|
11 | 12 |
double |
12 |
-ks_sample(double* x, int* x_sort_indxs, int n_genes, int* geneset_mask, int* geneset_idxs, int n_geneset, double tau, int mx_diff){ |
|
13 |
+ks_sample(double* x, int* x_sort_indxs, int n_genes, int* geneset_mask, |
|
14 |
+ int* geneset_idxs, int n_geneset, double tau, int mx_diff, int abs_rnk){ |
|
13 | 15 |
|
14 | 16 |
|
15 | 17 |
double dec = 1.0 / (n_genes - n_geneset); |
... | ... |
@@ -40,9 +42,11 @@ ks_sample(double* x, int* x_sort_indxs, int n_genes, int* geneset_mask, int* gen |
40 | 42 |
if(cum_sum < mx_neg){ mx_neg = cum_sum; } |
41 | 43 |
} |
42 | 44 |
|
43 |
- if(mx_diff!=0){ |
|
45 |
+ if (mx_diff != 0) { |
|
44 | 46 |
mx_value_sign = mx_pos + mx_neg; |
45 |
- }else{ |
|
47 |
+ if (abs_rnk != 0) |
|
48 |
+ mx_value_sign = mx_pos - mx_neg; |
|
49 |
+ } else { |
|
46 | 50 |
mx_value_sign = (mx_pos > fabs(mx_neg)) ? mx_pos : mx_neg; |
47 | 51 |
} |
48 | 52 |
return mx_value_sign; |
... | ... |
@@ -54,7 +58,8 @@ ks_sample(double* x, int* x_sort_indxs, int n_genes, int* geneset_mask, int* gen |
54 | 58 |
* R <- result |
55 | 59 |
* sidxs <- sorted gene densities idxs |
56 | 60 |
*/ |
57 |
-void ks_matrix(double* X, double* R, int* sidxs, int n_genes, int* geneset_idxs, int n_geneset, double tau, int n_samples, int mx_diff){ |
|
61 |
+void ks_matrix(double* X, double* R, int* sidxs, int n_genes, int* geneset_idxs, |
|
62 |
+ int n_geneset, double tau, int n_samples, int mx_diff, int abs_rnk){ |
|
58 | 63 |
int geneset_mask[n_genes]; |
59 | 64 |
for(int i = 0; i < n_genes; ++i){ |
60 | 65 |
geneset_mask[i] = 0; |
... | ... |
@@ -66,11 +71,13 @@ void ks_matrix(double* X, double* R, int* sidxs, int n_genes, int* geneset_idxs, |
66 | 71 |
|
67 | 72 |
for(int j = 0; j < n_samples; ++j){ |
68 | 73 |
int offset = j * n_genes; |
69 |
- R[j] = ks_sample(&X[offset], &sidxs[offset], n_genes, &geneset_mask[0], geneset_idxs, n_geneset, tau, mx_diff); |
|
74 |
+ R[j] = ks_sample(&X[offset], &sidxs[offset], n_genes, &geneset_mask[0], |
|
75 |
+ geneset_idxs, n_geneset, tau, mx_diff, abs_rnk); |
|
70 | 76 |
} |
71 | 77 |
} |
72 | 78 |
|
73 |
-void ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, int* n_geneset, double* tau, int* n_samples, int* mx_diff){ |
|
74 |
- ks_matrix(X,R, sidxs, *n_genes, geneset_idxs, *n_geneset, *tau, *n_samples, *mx_diff); |
|
79 |
+void ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, |
|
80 |
+ int* geneset_idxs, int* n_geneset, double* tau, |
|
81 |
+ int* n_samples, int* mx_diff, int* abs_rnk) { |
|
82 |
+ ks_matrix(X,R, sidxs, *n_genes, geneset_idxs, *n_geneset, *tau, *n_samples, *mx_diff, *abs_rnk); |
|
75 | 83 |
} |
76 |
- |
... | ... |
@@ -9,7 +9,7 @@ void |
9 | 9 |
matrix_density_R(double* X, double* Y, double* R, int* n_density_samples,int* n_test_samples, int* n_genes, int* rnaseq); |
10 | 10 |
|
11 | 11 |
void |
12 |
-ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, int* n_geneset, double* tau, int* n_samples, int* mx_diff); |
|
12 |
+ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, int* n_geneset, double* tau, int* n_samples, int* mx_diff, int* abs_rnk); |
|
13 | 13 |
|
14 | 14 |
/* registration of C-entry points */ |
15 | 15 |
|
... | ... |
@@ -17,12 +17,12 @@ static R_NativePrimitiveArgType |
17 | 17 |
matrix_density_R_t[7] = {REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP}; |
18 | 18 |
|
19 | 19 |
static R_NativePrimitiveArgType |
20 |
-ks_matrix_R_t[9] = {REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, INTSXP}; |
|
20 |
+ks_matrix_R_t[10] = {REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, INTSXP, INTSXP}; |
|
21 | 21 |
|
22 | 22 |
static const R_CMethodDef |
23 | 23 |
cMethods[] = { |
24 | 24 |
{"matrix_density_R", (DL_FUNC) &matrix_density_R, 7, matrix_density_R_t}, |
25 |
- {"ks_matrix_R", (DL_FUNC) &ks_matrix_R, 9, ks_matrix_R_t}, |
|
25 |
+ {"ks_matrix_R", (DL_FUNC) &ks_matrix_R, 10, ks_matrix_R_t}, |
|
26 | 26 |
{NULL, NULL, 0} |
27 | 27 |
}; |
28 | 28 |
|