Browse code

Bugfix to enforce DelayedMatrixStats::rowSds(), with matrixStats::rowSds() behind the scenes, giving an exact zero where base sd() would also give an exact zero. See https://github.com/rcastelo/GSVA/issues/54 and https://github.com/HenrikBengtsson/matrixStats/issues/204

[rcastelo] authored on 03/06/2021 15:42:11
Showing 1 changed files
... ...
@@ -4,6 +4,10 @@
4 4
   ## DelayedMatrixStats::rowSds() works for both base and 
5 5
   ## DelayedArray matrices
6 6
   sdGenes <- DelayedMatrixStats::rowSds(expr)
7
+  ## the following fixes this bug, see issues
8
+  ## https://github.com/rcastelo/GSVA/issues/54
9
+  ## https://github.com/HenrikBengtsson/matrixStats/issues/204
10
+  sdGenes[sdGenes < 1e-10] <- 0
7 11
   if (any(sdGenes == 0) || any(is.na(sdGenes))) {
8 12
     warning(sum(sdGenes == 0 | is.na(sdGenes)),
9 13
             " genes with constant expression values throuhgout the samples.")
Browse code

removed filterFeatureSparse()

pablo-rodr-bio2 authored on 03/05/2021 12:24:35
Showing 1 changed files
... ...
@@ -66,27 +66,3 @@
66 66
   m@x <- unlist(x, use.names=FALSE)
67 67
   m
68 68
 }
69
-
70
-## filter out genes which non-zero values have
71
-## constant expression values
72
-.filterFeaturesSparse <- function(expr, method) {
73
-  
74
-  sdGenes <- sapply(.sparseToList(expr, 1), sd)
75
-  
76
-  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
77
-    warning(sum(sdGenes == 0 | is.na(sdGenes)),
78
-            " genes with constant expression values throuhgout the samples.")
79
-    if (method != "ssgsea") {
80
-      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
81
-      expr <- expr[sdGenes > 0 & !is.na(sdGenes), ]
82
-    }
83
-  }
84
-  
85
-  if (nrow(expr) < 2)
86
-    stop("Less than two genes in the input assay object\n")
87
-  
88
-  if(is.null(rownames(expr)))
89
-    stop("The input assay object doesn't have rownames\n")
90
-  
91
-  expr
92
-}
Browse code

Replaced fastmatch::fmatch() by IRanges::match,CharacterList-method after disscussion at https://github.com/rcastelo/GSVA/issues/39

Robert Castelo authored on 03/02/2021 16:09:12
Showing 1 changed files
... ...
@@ -28,20 +28,12 @@
28 28
 ## features in both input objects follow the same nomenclature,
29 29
 .mapGeneSetsToFeatures <- function(gsets, features) {
30 30
 
31
-  ## fastmatch::fmatch() modifies the 'table' argument (i.e., the
32
-  ## second argument) in place by adding the attribute '.match.hash'
33
-  ## https://github.com/rcastelo/GSVA/issues/39
34
-  ## to avoid that undesired feature we duplicate 'features'
35
-  ## by adding an "impossible value" at the end and let
36
-  ## fastmatch::match() work with the duplicated object
37
-  features2 <- c(features, "&!%impossiblevalue%!&")
31
+  ## Aaron Lun's suggestion at
32
+  ## https://github.com/rcastelo/GSVA/issues/39#issuecomment-765549620
33
+  gsets2 <- CharacterList(gsets)
34
+  mt <- match(gsets2, features)
35
+  mapdgenesets <- as.list(mt[!is.na(mt)])
38 36
 
39
-  ## map to the actual features for which expression data is available
40
-  mapdgenesets <- lapply(gsets,
41
-                         function(x, y)
42
-                           as.vector(na.omit(fastmatch::fmatch(x, y))),
43
-                         features2)
44
-  
45 37
   if (length(unlist(mapdgenesets, use.names=FALSE)) == 0)
46 38
     stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
47 39
 
Browse code

Fixed call to fastmatch::fmatch() to avoid that this call modifies rownames of input expression matrices. Use this opportunity to factor out relevant code.

Robert Castelo authored on 22/01/2021 11:27:10
Showing 1 changed files
... ...
@@ -13,9 +13,41 @@
13 13
     }
14 14
   } 
15 15
 
16
+  if (nrow(expr) < 2)
17
+    stop("Less than two genes in the input assay object\n")
18
+  
19
+  if(is.null(rownames(expr)))
20
+    stop("The input assay object doesn't have rownames\n")
21
+  
16 22
   expr
17 23
 }
18 24
 
25
+## maps gene sets content in 'gsets' to 'features', where 'gsets'
26
+## is a 'list' object with character string vectors as elements,
27
+## and 'features' is a character string vector object. it assumes
28
+## features in both input objects follow the same nomenclature,
29
+.mapGeneSetsToFeatures <- function(gsets, features) {
30
+
31
+  ## fastmatch::fmatch() modifies the 'table' argument (i.e., the
32
+  ## second argument) in place by adding the attribute '.match.hash'
33
+  ## https://github.com/rcastelo/GSVA/issues/39
34
+  ## to avoid that undesired feature we duplicate 'features'
35
+  ## by adding an "impossible value" at the end and let
36
+  ## fastmatch::match() work with the duplicated object
37
+  features2 <- c(features, "&!%impossiblevalue%!&")
38
+
39
+  ## map to the actual features for which expression data is available
40
+  mapdgenesets <- lapply(gsets,
41
+                         function(x, y)
42
+                           as.vector(na.omit(fastmatch::fmatch(x, y))),
43
+                         features2)
44
+  
45
+  if (length(unlist(mapdgenesets, use.names=FALSE)) == 0)
46
+    stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
47
+
48
+  mapdgenesets
49
+}
50
+
19 51
 ## transforms a dgCMatrix into a list of its
20 52
 ## non-zero values by MARGIN (1 for row, 2 for column)
21 53
 .sparseToList <-function(dgCMat, MARGIN){
... ...
@@ -58,5 +90,11 @@
58 90
     }
59 91
   }
60 92
   
93
+  if (nrow(expr) < 2)
94
+    stop("Less than two genes in the input assay object\n")
95
+  
96
+  if(is.null(rownames(expr)))
97
+    stop("The input assay object doesn't have rownames\n")
98
+  
61 99
   expr
62
-}
63 100
\ No newline at end of file
101
+}
Browse code

added hdf5 support for plage method

pablo-rodr-bio2 authored on 20/11/2020 18:38:41
Showing 1 changed files
... ...
@@ -1,6 +1,8 @@
1 1
 .filterFeatures <- function(expr, method) {
2 2
 
3 3
   ## filter out genes with constant expression values
4
+  ## DelayedMatrixStats::rowSds() works for both base and 
5
+  ## DelayedArray matrices
4 6
   sdGenes <- DelayedMatrixStats::rowSds(expr)
5 7
   if (any(sdGenes == 0) || any(is.na(sdGenes))) {
6 8
     warning(sum(sdGenes == 0 | is.na(sdGenes)),
Browse code

adding support to DelayedArray backend

pablo-rodr-bio2 authored on 19/11/2020 10:20:24
Showing 1 changed files
... ...
@@ -1,7 +1,7 @@
1 1
 .filterFeatures <- function(expr, method) {
2 2
 
3 3
   ## filter out genes with constant expression values
4
-  sdGenes <- apply(expr, 1, sd)
4
+  sdGenes <- DelayedMatrixStats::rowSds(expr)
5 5
   if (any(sdGenes == 0) || any(is.na(sdGenes))) {
6 6
     warning(sum(sdGenes == 0 | is.na(sdGenes)),
7 7
             " genes with constant expression values throuhgout the samples.")
Browse code

adding dgCMatrix support to method plage

pablo-rodr-bio2 authored on 09/11/2020 11:11:55
Showing 1 changed files
... ...
@@ -35,6 +35,12 @@
35 35
   result
36 36
 }
37 37
 
38
+.dgCapply<-function(m,f, MARGIN){
39
+  x <- lapply(.sparseToList(m, MARGIN), f)
40
+  m@x <- unlist(x, use.names=FALSE)
41
+  m
42
+}
43
+
38 44
 ## filter out genes which non-zero values have
39 45
 ## constant expression values
40 46
 .filterFeaturesSparse <- function(expr, method) {
Browse code

adding dgCMatrix/list support

pablo-rodr-bio2 authored on 02/11/2020 18:20:08
Showing 1 changed files
... ...
@@ -13,3 +13,42 @@
13 13
 
14 14
   expr
15 15
 }
16
+
17
+## transforms a dgCMatrix into a list of its
18
+## non-zero values by MARGIN (1 for row, 2 for column)
19
+.sparseToList <-function(dgCMat, MARGIN){
20
+  MARGIN <- as.integer(MARGIN)
21
+  J <- rep(1:ncol(dgCMat), diff(dgCMat@p))
22
+  I <- dgCMat@i + 1
23
+  x <- dgCMat@x
24
+  if (MARGIN == 1L) {
25
+    result <- split(x, I)
26
+    names(result) <- rownames(dgCMat)[as.numeric(names(result))]
27
+  } else if (MARGIN == 2L) {
28
+    result <- split(x, J)
29
+    names(result) <- colnames(dgCMat)[as.numeric(names(result))]
30
+  }
31
+  else {
32
+    warning("invalid MARGIN; return NULL")
33
+    result <- NULL
34
+  }
35
+  result
36
+}
37
+
38
+## filter out genes which non-zero values have
39
+## constant expression values
40
+.filterFeaturesSparse <- function(expr, method) {
41
+  
42
+  sdGenes <- sapply(.sparseToList(expr, 1), sd)
43
+  
44
+  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
45
+    warning(sum(sdGenes == 0 | is.na(sdGenes)),
46
+            " genes with constant expression values throuhgout the samples.")
47
+    if (method != "ssgsea") {
48
+      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
49
+      expr <- expr[sdGenes > 0 & !is.na(sdGenes), ]
50
+    }
51
+  }
52
+  
53
+  expr
54
+}
16 55
\ No newline at end of file
Browse code

Added methods to take expression data in a SummarizedExperiment object.

Robert Castelo authored on 16/12/2019 11:46:25
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,15 @@
1
+.filterFeatures <- function(expr, method) {
2
+
3
+  ## filter out genes with constant expression values
4
+  sdGenes <- apply(expr, 1, sd)
5
+  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
6
+    warning(sum(sdGenes == 0 | is.na(sdGenes)),
7
+            " genes with constant expression values throuhgout the samples.")
8
+    if (method != "ssgsea") {
9
+      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
10
+      expr <- expr[sdGenes > 0 & !is.na(sdGenes), ]
11
+    }
12
+  } 
13
+
14
+  expr
15
+}