Browse code

Updated implementation of the option abs.ranking=TRUE to use the original Kuiper statistic.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GSVA@130527 bc3139a8-67e5-0310-9ffc-ced21a209358

Robert Castelo authored on 19/06/2017 14:35:29
Showing 5 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: GSVA
2
-Version: 1.25.1
2
+Version: 1.25.2
3 3
 Title: Gene Set Variation Analysis for microarray and RNA-seq data
4 4
 Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.guinney@sagebase.org"),
5 5
              person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"))
... ...
@@ -463,11 +463,15 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE,
463 463
 	}
464 464
 	
465 465
 	rank.scores <- rep(0, num_genes)
466
-	if(abs.ranking){
467
-		sort.sgn.idxs <- apply(abs(gene.density), 2, order, decreasing=TRUE) # n.genes * n.samples	
468
-	}else{
469
-		sort.sgn.idxs <- apply(gene.density, 2, order, decreasing=TRUE) # n.genes * n.samples
470
-	}
466
+  ## 19.06.17 disable current functioning of abs.ranking flag
467
+  ## to switch to a different approach to implement it during
468
+  ## the KS random walk based on the Kuiper statistic
469
+	## if(abs.ranking){
470
+  ##   sort.sgn.idxs <- apply(abs(gene.density), 2, order, decreasing=TRUE) # n.genes * n.samples	
471
+  ## }else{
472
+  ##   sort.sgn.idxs <- apply(gene.density, 2, order, decreasing=TRUE) # n.genes * n.samples
473
+  ## }
474
+  sort.sgn.idxs <- apply(gene.density, 2, order, decreasing=TRUE) # n.genes * n.samples
471 475
 	
472 476
 	rank.scores <- apply(sort.sgn.idxs, 2, compute_rank_score)
473 477
 	
... ...
@@ -503,7 +507,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE,
503 507
 		  m <- t(parSapp(cl, gset.idx.list, ks_test_m,
504 508
 						  gene.density=rank.scores, 
505 509
 						  sort.idxs=sort.sgn.idxs,
506
-						  mx.diff=mx.diff, tau=tau, verbose=FALSE))
510
+						  mx.diff=mx.diff, abs.ranking=abs.ranking,
511
+              tau=tau, verbose=FALSE))
507 512
 		  if(verbose)
508 513
         cat("Cleaning up\n")
509 514
 		  stopCl(cl)
... ...
@@ -528,7 +533,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE,
528 533
       m <- mclapp(gset.idx.list, ks_test_m,
529 534
                   gene.density=rank.scores,
530 535
                   sort.idxs=sort.sgn.idxs,
531
-                  mx.diff=mx.diff, tau=tau, verbose=verbose)
536
+                  mx.diff=mx.diff, abs.ranking=abs.ranking,
537
+                  tau=tau, verbose=verbose)
532 538
       m <- do.call("rbind", m)
533 539
       colnames(m) <- colnames(expr)
534 540
 
... ...
@@ -555,7 +561,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE,
555 561
     }
556 562
 
557 563
 		m <- t(sapply(gset.idx.list, ks_test_m, rank.scores, sort.sgn.idxs,
558
-                  mx.diff=mx.diff, tau=tau, verbose=verbose))
564
+                  mx.diff=mx.diff, abs.ranking=abs.ranking,
565
+                  tau=tau, verbose=verbose))
559 566
 
560 567
     if (verbose) {
561 568
       setTxtProgressBar(get("progressBar", envir=globalenv()), 1)
... ...
@@ -566,7 +573,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE,
566 573
 }
567 574
 
568 575
 
569
-ks_test_m <- function(gset_idxs, gene.density, sort.idxs, mx.diff=TRUE, tau=1, verbose=TRUE){
576
+ks_test_m <- function(gset_idxs, gene.density, sort.idxs, mx.diff=TRUE,
577
+                      abs.ranking=FALSE, tau=1, verbose=TRUE){
570 578
 	
571 579
 	n.genes <- nrow(gene.density)
572 580
 	n.samples <- ncol(gene.density)
... ...
@@ -581,7 +589,8 @@ ks_test_m <- function(gset_idxs, gene.density, sort.idxs, mx.diff=TRUE, tau=1, v
581 589
 			n.geneset,
582 590
 			as.double(tau),
583 591
 			n.samples,
584
-			as.integer(mx.diff))$R
592
+			as.integer(mx.diff),
593
+      as.integer(abs.ranking))$R
585 594
 
586 595
   if (verbose) {
587 596
     assign("iGeneSet", get("iGeneSet", envir=globalenv()) + 1, envir=globalenv())
... ...
@@ -103,10 +103,12 @@ Estimates GSVA enrichment scores.
103 103
                 RNA-seq counts such as log-CPMs, log-RPKMs or log-TPMs. This flag should be set to
104 104
                 \code{rnaseq=TRUE} only when the values of the input gene expression data are integer
105 105
                 counts.}
106
-  \item{abs.ranking}{Flag to determine whether genes should be ranked according to 
107
-  					their sign (\code{abs.ranking=FALSE}) or by absolute value (\code{abs.ranking=TRUE}). 
108
-  					In the latter, pathways with genes enriched on either extreme
109
-  					(high or low) will be regarded as 'highly' activated. }
106
+  \item{abs.ranking}{Flag used only when \code{mx.diff=TRUE}. When \code{abs.ranking=FALSE} (default)
107
+            a modified Kuiper statistic is used to calculate enrichment scores, taking the magnitude
108
+            difference between the largest positive and negative random walk deviations. When
109
+            \code{abs.ranking=TRUE} the original Kuiper statistic that sums the largest positive and
110
+            negative random walk deviations, is used. In this latter case, gene sets with genes
111
+            enriched on either extreme (high or low) will be regarded as 'highly' activated.}
110 112
   \item{min.sz}{Minimum size of the resulting gene sets.}
111 113
   \item{max.sz}{Maximum size of the resulting gene sets.}
112 114
   \item{no.bootstraps}{Number of bootstrap iterations to perform.}
... ...
@@ -203,7 +205,7 @@ geneSets <- list(set1=paste("g", 1:3, sep=""),
203 205
 y <- matrix(rnorm(n*p), nrow=p, ncol=n,
204 206
             dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep="")))
205 207
 
206
-## genes in set1 are expressed at higher levels in the last 10 samples
208
+## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples
207 209
 y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2
208 210
 
209 211
 ## build design matrix
... ...
@@ -6,10 +6,12 @@
6 6
 
7 7
 
8 8
 void
9
-ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, int* n_geneset, double* tau,  int* n_samples, int* mx_diff);
9
+ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs,
10
+            int* n_geneset, double* tau,  int* n_samples, int* mx_diff, int* abs_rnk);
10 11
 
11 12
 double
12
-ks_sample(double* x, int* x_sort_indxs, int n_genes, int* geneset_mask, int* geneset_idxs, int n_geneset, double tau, int mx_diff){
13
+ks_sample(double* x, int* x_sort_indxs, int n_genes, int* geneset_mask,
14
+          int* geneset_idxs, int n_geneset, double tau, int mx_diff, int abs_rnk){
13 15
 
14 16
 
15 17
 	double dec = 1.0 / (n_genes - n_geneset);
... ...
@@ -40,9 +42,11 @@ ks_sample(double* x, int* x_sort_indxs, int n_genes, int* geneset_mask, int* gen
40 42
 		if(cum_sum < mx_neg){ mx_neg = cum_sum; }
41 43
 	}
42 44
 
43
-	if(mx_diff!=0){
45
+	if (mx_diff != 0) {
44 46
 		mx_value_sign = mx_pos + mx_neg;
45
-	}else{
47
+    if (abs_rnk != 0)
48
+      mx_value_sign = mx_pos - mx_neg;
49
+	} else {
46 50
 		mx_value_sign = (mx_pos > fabs(mx_neg)) ? mx_pos : mx_neg;
47 51
 	}
48 52
 	return mx_value_sign;
... ...
@@ -54,7 +58,8 @@ ks_sample(double* x, int* x_sort_indxs, int n_genes, int* geneset_mask, int* gen
54 58
  * R <- result
55 59
  * sidxs <- sorted gene densities idxs
56 60
  */
57
-void ks_matrix(double* X, double* R, int* sidxs, int n_genes, int* geneset_idxs, int n_geneset, double tau, int n_samples, int mx_diff){
61
+void ks_matrix(double* X, double* R, int* sidxs, int n_genes, int* geneset_idxs,
62
+               int n_geneset, double tau, int n_samples, int mx_diff, int abs_rnk){
58 63
 	int geneset_mask[n_genes];
59 64
 	for(int i = 0; i < n_genes; ++i){
60 65
 		geneset_mask[i] = 0;
... ...
@@ -66,11 +71,13 @@ void ks_matrix(double* X, double* R, int* sidxs, int n_genes, int* geneset_idxs,
66 71
 
67 72
 	for(int j = 0; j < n_samples; ++j){
68 73
 		int offset = j * n_genes;
69
-		R[j] = ks_sample(&X[offset], &sidxs[offset], n_genes, &geneset_mask[0], geneset_idxs, n_geneset, tau, mx_diff);
74
+    R[j] = ks_sample(&X[offset], &sidxs[offset], n_genes, &geneset_mask[0],
75
+                     geneset_idxs, n_geneset, tau, mx_diff, abs_rnk);
70 76
 	}
71 77
 }
72 78
 
73
-void ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, int* n_geneset, double* tau,  int* n_samples, int* mx_diff){
74
-	ks_matrix(X,R, sidxs, *n_genes, geneset_idxs, *n_geneset, *tau, *n_samples, *mx_diff);
79
+void ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes,
80
+                 int* geneset_idxs, int* n_geneset, double* tau,
81
+                 int* n_samples, int* mx_diff, int* abs_rnk) {
82
+	ks_matrix(X,R, sidxs, *n_genes, geneset_idxs, *n_geneset, *tau, *n_samples, *mx_diff, *abs_rnk);
75 83
 }
76
-
... ...
@@ -9,7 +9,7 @@ void
9 9
 matrix_density_R(double* X, double* Y, double* R, int* n_density_samples,int* n_test_samples, int* n_genes, int* rnaseq);
10 10
 
11 11
 void
12
-ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, int* n_geneset, double* tau,  int* n_samples, int* mx_diff);
12
+ks_matrix_R(double* X, double* R, int* sidxs, int* n_genes, int* geneset_idxs, int* n_geneset, double* tau,  int* n_samples, int* mx_diff, int* abs_rnk);
13 13
 
14 14
 /* registration of C-entry points */
15 15
 
... ...
@@ -17,12 +17,12 @@ static R_NativePrimitiveArgType
17 17
 matrix_density_R_t[7] = {REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP};
18 18
 
19 19
 static R_NativePrimitiveArgType
20
-ks_matrix_R_t[9] = {REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, INTSXP};
20
+ks_matrix_R_t[10] = {REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, INTSXP, INTSXP};
21 21
 
22 22
 static const R_CMethodDef
23 23
 cMethods[] = {
24 24
   {"matrix_density_R", (DL_FUNC) &matrix_density_R, 7, matrix_density_R_t},
25
-  {"ks_matrix_R", (DL_FUNC) &ks_matrix_R, 9, ks_matrix_R_t},
25
+  {"ks_matrix_R", (DL_FUNC) &ks_matrix_R, 10, ks_matrix_R_t},
26 26
   {NULL, NULL, 0}
27 27
 };
28 28