Browse code

Replaced fastmatch::fmatch() by IRanges::match,CharacterList-method after disscussion at https://github.com/rcastelo/GSVA/issues/39

Robert Castelo authored on 03/02/2021 16:09:12
Showing 4 changed files

... ...
@@ -1,14 +1,14 @@
1 1
 Package: GSVA
2
-Version: 1.39.14
2
+Version: 1.39.15
3 3
 Title: Gene Set Variation Analysis for microarray and RNA-seq data
4 4
 Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.guinney@sagebase.org"),
5 5
              person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"),
6 6
              person("Alexey", "Sergushichev", role="ctb", email="alsergbox@gmail.com"),
7 7
              person("Pablo Sebastian", "Rodriguez", role="ctb", email="pablosebastian.rodriguez@upf.edu"))
8 8
 Depends: R (>= 3.5.0)
9
-Imports: methods, stats, utils, graphics, S4Vectors,
9
+Imports: methods, stats, utils, graphics, S4Vectors, IRanges,
10 10
          Biobase, SummarizedExperiment, GSEABase, Matrix,
11
-         parallel, BiocParallel, fastmatch, SingleCellExperiment, 
11
+         parallel, BiocParallel, SingleCellExperiment, 
12 12
          sparseMatrixStats, DelayedArray, DelayedMatrixStats,
13 13
          HDF5Array, BiocSingular
14 14
 Suggests: 
... ...
@@ -18,6 +18,7 @@ importMethodsFrom(Biobase, featureNames,
18 18
 
19 19
 importMethodsFrom(S4Vectors, metadata,
20 20
                              "metadata<-")
21
+importMethodsFrom(IRanges, match)
21 22
 
22 23
 importMethodsFrom(SummarizedExperiment, assays,
23 24
                                         colData)
... ...
@@ -40,6 +41,7 @@ importFrom(utils, installed.packages,
40 41
                   read.csv,
41 42
                   write.csv)
42 43
 importFrom(S4Vectors, SimpleList)
44
+importFrom(IRanges, CharacterList)
43 45
 importFrom(GSEABase, AnnoOrEntrezIdentifier,
44 46
                      mapIdentifiers,
45 47
                      getGmt)
... ...
@@ -825,7 +825,7 @@ ks_test_Rcode <- function(gene.density, gset_idxs, tau=1, make.plot=FALSE){
825 825
 .fastRndWalk <- function(gSetIdx, geneRanking, j, Ra) {
826 826
     n <- length(geneRanking)
827 827
     k <- length(gSetIdx)
828
-    idxs <- sort.int(fastmatch::fmatch(gSetIdx, geneRanking))
828
+    idxs <- sort.int(match(gSetIdx, geneRanking))
829 829
     
830 830
     stepCDFinGeneSet2 <- 
831 831
         sum(Ra[geneRanking[idxs], j] * (n - idxs + 1)) /
... ...
@@ -28,20 +28,12 @@
28 28
 ## features in both input objects follow the same nomenclature,
29 29
 .mapGeneSetsToFeatures <- function(gsets, features) {
30 30
 
31
-  ## fastmatch::fmatch() modifies the 'table' argument (i.e., the
32
-  ## second argument) in place by adding the attribute '.match.hash'
33
-  ## https://github.com/rcastelo/GSVA/issues/39
34
-  ## to avoid that undesired feature we duplicate 'features'
35
-  ## by adding an "impossible value" at the end and let
36
-  ## fastmatch::match() work with the duplicated object
37
-  features2 <- c(features, "&!%impossiblevalue%!&")
31
+  ## Aaron Lun's suggestion at
32
+  ## https://github.com/rcastelo/GSVA/issues/39#issuecomment-765549620
33
+  gsets2 <- CharacterList(gsets)
34
+  mt <- match(gsets2, features)
35
+  mapdgenesets <- as.list(mt[!is.na(mt)])
38 36
 
39
-  ## map to the actual features for which expression data is available
40
-  mapdgenesets <- lapply(gsets,
41
-                         function(x, y)
42
-                           as.vector(na.omit(fastmatch::fmatch(x, y))),
43
-                         features2)
44
-  
45 37
   if (length(unlist(mapdgenesets, use.names=FALSE)) == 0)
46 38
     stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
47 39