... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: GSVA |
2 |
-Version: 1.39.11 |
|
2 |
+Version: 1.39.12 |
|
3 | 3 |
Title: Gene Set Variation Analysis for microarray and RNA-seq data |
4 | 4 |
Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.guinney@sagebase.org"), |
5 | 5 |
person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"), |
... | ... |
@@ -26,19 +26,8 @@ setMethod("gsva", signature(expr="HDF5Array", gset.idx.list="list"), |
26 | 26 |
## e.g., constant expression |
27 | 27 |
expr <- .filterFeatures(expr, method) |
28 | 28 |
|
29 |
- if (nrow(expr) < 2) |
|
30 |
- stop("Less than two genes in the input assay object\n") |
|
31 |
- |
|
32 |
- if(is.null(rownames(expr))) |
|
33 |
- stop("The input assay object doesn't have rownames\n") |
|
34 |
- |
|
35 | 29 |
## map to the actual features for which expression data is available |
36 |
- mapped.gset.idx.list <- lapply(gset.idx.list, |
|
37 |
- function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
38 |
- rownames(expr)) |
|
39 |
- |
|
40 |
- if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
41 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
30 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
42 | 31 |
|
43 | 32 |
## remove gene sets from the analysis for which no features are available |
44 | 33 |
## and meet the minimum and maximum gene-set size specified by the user |
... | ... |
@@ -105,20 +94,8 @@ setMethod("gsva", signature(expr="SingleCellExperiment", gset.idx.list="list"), |
105 | 94 |
expr <- .filterFeatures(expr, method) |
106 | 95 |
} |
107 | 96 |
|
108 |
- |
|
109 |
- if (nrow(expr) < 2) |
|
110 |
- stop("Less than two genes in the input ExpressionSet object\n") |
|
111 |
- |
|
112 |
- if(is.null(rownames(expr))) |
|
113 |
- stop("The input assay object doesn't have rownames\n") |
|
114 |
- |
|
115 | 97 |
## map to the actual features for which expression data is available |
116 |
- mapped.gset.idx.list <- lapply(gset.idx.list, |
|
117 |
- function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
118 |
- rownames(expr)) |
|
119 |
- |
|
120 |
- if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
121 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
98 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
122 | 99 |
|
123 | 100 |
## remove gene sets from the analysis for which no features are available |
124 | 101 |
## and meet the minimum and maximum gene-set size specified by the user |
... | ... |
@@ -169,19 +146,8 @@ setMethod("gsva", signature(expr="dgCMatrix", gset.idx.list="list"), |
169 | 146 |
## e.g., constant expression |
170 | 147 |
expr <- .filterFeaturesSparse(expr, method) |
171 | 148 |
|
172 |
- if (nrow(expr) < 2) |
|
173 |
- stop("Less than two genes in the input assay object\n") |
|
174 |
- |
|
175 |
- if(is.null(rownames(expr))) |
|
176 |
- stop("The input assay object doesn't have rownames\n") |
|
177 |
- |
|
178 | 149 |
## map to the actual features for which expression data is available |
179 |
- mapped.gset.idx.list <- lapply(gset.idx.list, |
|
180 |
- function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
181 |
- rownames(expr)) |
|
182 |
- |
|
183 |
- if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
184 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
150 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
185 | 151 |
|
186 | 152 |
## remove gene sets from the analysis for which no features are available |
187 | 153 |
## and meet the minimum and maximum gene-set size specified by the user |
... | ... |
@@ -243,12 +209,6 @@ setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="GeneSetC |
243 | 209 |
## e.g., constant expression |
244 | 210 |
expr <- .filterFeatures(expr, method) |
245 | 211 |
|
246 |
- if (nrow(expr) < 2) |
|
247 |
- stop("Less than two genes in the input ExpressionSet object\n") |
|
248 |
- |
|
249 |
- if(is.null(rownames(expr))) |
|
250 |
- stop("The input assay object doesn't have rownames\n") |
|
251 |
- |
|
252 | 212 |
annotpkg <- metadata(se)$annotation |
253 | 213 |
if (!is.null(annotpkg) && length(annotpkg) > 0 && is.character(annotpkg) && annotpkg != "") { |
254 | 214 |
if (!annotpkg %in% installed.packages()) |
... | ... |
@@ -272,12 +232,7 @@ setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="GeneSetC |
272 | 232 |
} |
273 | 233 |
|
274 | 234 |
## map to the actual features for which expression data is available |
275 |
- mapped.gset.idx.list <- lapply(mapped.gset.idx.list, |
|
276 |
- function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
277 |
- rownames(expr)) |
|
278 |
- |
|
279 |
- if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
280 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
235 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
281 | 236 |
|
282 | 237 |
## remove gene sets from the analysis for which no features are available |
283 | 238 |
## and meet the minimum and maximum gene-set size specified by the user |
... | ... |
@@ -344,20 +299,9 @@ setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="list"), |
344 | 299 |
## e.g., constant expression |
345 | 300 |
expr <- .filterFeatures(expr, method) |
346 | 301 |
|
347 |
- if (nrow(expr) < 2) |
|
348 |
- stop("Less than two genes in the input ExpressionSet object\n") |
|
349 |
- |
|
350 |
- if(is.null(rownames(expr))) |
|
351 |
- stop("The input assay object doesn't have rownames\n") |
|
352 |
- |
|
353 | 302 |
## map to the actual features for which expression data is available |
354 |
- mapped.gset.idx.list <- lapply(gset.idx.list, |
|
355 |
- function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
356 |
- rownames(expr)) |
|
357 |
- |
|
358 |
- if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
359 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
360 |
- |
|
303 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
304 |
+ |
|
361 | 305 |
## remove gene sets from the analysis for which no features are available |
362 | 306 |
## and meet the minimum and maximum gene-set size specified by the user |
363 | 307 |
mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
... | ... |
@@ -409,20 +353,9 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"), |
409 | 353 |
## e.g., constant expression |
410 | 354 |
expr <- .filterFeatures(expr, method) |
411 | 355 |
|
412 |
- if (nrow(expr) < 2) |
|
413 |
- stop("Less than two genes in the input ExpressionSet object\n") |
|
414 |
- |
|
415 |
- if(is.null(rownames(expr))) |
|
416 |
- stop("The input assay object doesn't have rownames\n") |
|
417 |
- |
|
418 | 356 |
## map to the actual features for which expression data is available |
419 |
- mapped.gset.idx.list <- lapply(gset.idx.list, |
|
420 |
- function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
421 |
- rownames(expr)) |
|
422 |
- |
|
423 |
- if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
424 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
425 |
- |
|
357 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
358 |
+ |
|
426 | 359 |
## remove gene sets from the analysis for which no features are available |
427 | 360 |
## and meet the minimum and maximum gene-set size specified by the user |
428 | 361 |
mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
... | ... |
@@ -472,12 +405,6 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
472 | 405 |
## e.g., constant expression |
473 | 406 |
expr <- .filterFeatures(expr, method) |
474 | 407 |
|
475 |
- if (nrow(expr) < 2) |
|
476 |
- stop("Less than two genes in the input ExpressionSet object\n") |
|
477 |
- |
|
478 |
- if(is.null(rownames(expr))) |
|
479 |
- stop("The input assay object doesn't have rownames\n") |
|
480 |
- |
|
481 | 408 |
annotpkg <- Biobase::annotation(eset) |
482 | 409 |
if (length(annotpkg) > 0 && annotpkg != "") { |
483 | 410 |
if (!annotpkg %in% installed.packages()) |
... | ... |
@@ -501,14 +428,11 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
501 | 428 |
} |
502 | 429 |
|
503 | 430 |
## map to the actual features for which expression data is available |
504 |
- tmp <- lapply(mapped.gset.idx.list, |
|
505 |
- function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
506 |
- rownames(expr)) |
|
507 |
- names(tmp) <- names(mapped.gset.idx.list) |
|
508 |
- |
|
431 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
432 |
+ |
|
509 | 433 |
## remove gene sets from the analysis for which no features are available |
510 | 434 |
## and meet the minimum and maximum gene-set size specified by the user |
511 |
- mapped.gset.idx.list <- filterGeneSets(tmp, |
|
435 |
+ mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
512 | 436 |
min.sz=max(1, min.sz), |
513 | 437 |
max.sz=max.sz) |
514 | 438 |
|
... | ... |
@@ -553,12 +477,6 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"), |
553 | 477 |
## e.g., constant expression |
554 | 478 |
expr <- .filterFeatures(expr, method) |
555 | 479 |
|
556 |
- if (nrow(expr) < 2) |
|
557 |
- stop("Less than two genes in the input expression data matrix\n") |
|
558 |
- |
|
559 |
- if(is.null(rownames(expr))) |
|
560 |
- stop("The input assay object doesn't have rownames\n") |
|
561 |
- |
|
562 | 480 |
## map gene identifiers of the gene sets to the features in the matrix |
563 | 481 |
mapped.gset.idx.list <- gset.idx.list |
564 | 482 |
if (!missing(annotation)) { |
... | ... |
@@ -570,17 +488,11 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"), |
570 | 488 |
} |
571 | 489 |
|
572 | 490 |
## map to the actual features for which expression data is available |
573 |
- tmp <- lapply(geneIds(mapped.gset.idx.list), |
|
574 |
- function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
575 |
- rownames(expr)) |
|
576 |
- names(tmp) <- names(mapped.gset.idx.list) |
|
577 |
- |
|
578 |
- if (length(unlist(tmp, use.names=FALSE)) == 0) |
|
579 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
580 |
- |
|
491 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
492 |
+ |
|
581 | 493 |
## remove gene sets from the analysis for which no features are available |
582 | 494 |
## and meet the minimum and maximum gene-set size specified by the user |
583 |
- mapped.gset.idx.list <- filterGeneSets(tmp, |
|
495 |
+ mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
584 | 496 |
min.sz=max(1, min.sz), |
585 | 497 |
max.sz=max.sz) |
586 | 498 |
|
... | ... |
@@ -623,16 +535,9 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list"), |
623 | 535 |
## e.g., constant expression |
624 | 536 |
expr <- .filterFeatures(expr, method) |
625 | 537 |
|
626 |
- if (nrow(expr) < 2) |
|
627 |
- stop("Less than two genes in the input expression data matrix\n") |
|
628 |
- |
|
629 |
- mapped.gset.idx.list <- lapply(gset.idx.list, |
|
630 |
- function(x ,y) na.omit(fastmatch::fmatch(x, y)), |
|
631 |
- rownames(expr)) |
|
632 |
- |
|
633 |
- if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
634 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
635 |
- |
|
538 |
+ ## map to the actual features for which expression data is available |
|
539 |
+ mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr)) |
|
540 |
+ |
|
636 | 541 |
## remove gene sets from the analysis for which no features are available |
637 | 542 |
## and meet the minimum and maximum gene-set size specified by the user |
638 | 543 |
mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
... | ... |
@@ -1091,10 +996,10 @@ setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="character |
1091 | 996 |
function(gSets, uniqGenes, min.sz=1, max.sz=Inf) { |
1092 | 997 |
totalGenes <- length(uniqGenes) |
1093 | 998 |
|
1094 |
- ## map to the features requested |
|
1095 |
- gSets <- lapply(gSets, function(x, y) as.vector(na.omit(fastmatch::fmatch(x, y))), uniqGenes) |
|
999 |
+ ## map to the actual features for which expression data is available |
|
1000 |
+ gSets <- .mapGeneSetsToFeatures(gSets, uniqGenes) |
|
1096 | 1001 |
|
1097 |
- lenGsets <- sapply(gSets, length) |
|
1002 |
+ lenGsets <- lengths(gSets) |
|
1098 | 1003 |
totalGsets <- length(gSets) |
1099 | 1004 |
|
1100 | 1005 |
gSetsMembershipMatrix <- matrix(0, nrow=totalGenes, ncol=totalGsets, |
... | ... |
@@ -1111,9 +1016,9 @@ setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="Expressio |
1111 | 1016 |
totalGenes <- length(uniqGenes) |
1112 | 1017 |
|
1113 | 1018 |
## map to the actual features for which expression data is available |
1114 |
- gSets <- lapply(gSets, function(x, y) as.vector(na.omit(fastmatch::fmatch(x, y))), uniqGenes) |
|
1019 |
+ gSets <- .mapGeneSetsToFeatures(gSets, uniqGenes) |
|
1115 | 1020 |
|
1116 |
- lenGsets <- sapply(gSets, length) |
|
1021 |
+ lenGsets <- lengths(gSets) |
|
1117 | 1022 |
totalGsets <- length(gSets) |
1118 | 1023 |
|
1119 | 1024 |
gSetsMembershipMatrix <- matrix(0, nrow=totalGenes, ncol=totalGsets, |
... | ... |
@@ -13,9 +13,41 @@ |
13 | 13 |
} |
14 | 14 |
} |
15 | 15 |
|
16 |
+ if (nrow(expr) < 2) |
|
17 |
+ stop("Less than two genes in the input assay object\n") |
|
18 |
+ |
|
19 |
+ if(is.null(rownames(expr))) |
|
20 |
+ stop("The input assay object doesn't have rownames\n") |
|
21 |
+ |
|
16 | 22 |
expr |
17 | 23 |
} |
18 | 24 |
|
25 |
+## maps gene sets content in 'gsets' to 'features', where 'gsets' |
|
26 |
+## is a 'list' object with character string vectors as elements, |
|
27 |
+## and 'features' is a character string vector object. it assumes |
|
28 |
+## features in both input objects follow the same nomenclature, |
|
29 |
+.mapGeneSetsToFeatures <- function(gsets, features) { |
|
30 |
+ |
|
31 |
+ ## fastmatch::fmatch() modifies the 'table' argument (i.e., the |
|
32 |
+ ## second argument) in place by adding the attribute '.match.hash' |
|
33 |
+ ## https://github.com/rcastelo/GSVA/issues/39 |
|
34 |
+ ## to avoid that undesired feature we duplicate 'features' |
|
35 |
+ ## by adding an "impossible value" at the end and let |
|
36 |
+ ## fastmatch::match() work with the duplicated object |
|
37 |
+ features2 <- c(features, "&!%impossiblevalue%!&") |
|
38 |
+ |
|
39 |
+ ## map to the actual features for which expression data is available |
|
40 |
+ mapdgenesets <- lapply(gsets, |
|
41 |
+ function(x, y) |
|
42 |
+ as.vector(na.omit(fastmatch::fmatch(x, y))), |
|
43 |
+ features2) |
|
44 |
+ |
|
45 |
+ if (length(unlist(mapdgenesets, use.names=FALSE)) == 0) |
|
46 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
47 |
+ |
|
48 |
+ mapdgenesets |
|
49 |
+} |
|
50 |
+ |
|
19 | 51 |
## transforms a dgCMatrix into a list of its |
20 | 52 |
## non-zero values by MARGIN (1 for row, 2 for column) |
21 | 53 |
.sparseToList <-function(dgCMat, MARGIN){ |
... | ... |
@@ -58,5 +90,11 @@ |
58 | 90 |
} |
59 | 91 |
} |
60 | 92 |
|
93 |
+ if (nrow(expr) < 2) |
|
94 |
+ stop("Less than two genes in the input assay object\n") |
|
95 |
+ |
|
96 |
+ if(is.null(rownames(expr))) |
|
97 |
+ stop("The input assay object doesn't have rownames\n") |
|
98 |
+ |
|
61 | 99 |
expr |
62 |
-} |
|
63 | 100 |
\ No newline at end of file |
101 |
+} |