... | ... |
@@ -4,10 +4,10 @@ |
4 | 4 |
#' @param pathwayFile (char) path to file with pathway annotations. |
5 | 5 |
#' @param header (logical) whether \code{pathwayFile} has a header |
6 | 6 |
#' (default FALSE). |
7 |
-#' @param pathwayCol (char) column name with pathway identifiers. |
|
8 |
-#' For use with non-GMT input files (eg "Pathway.ID"; default NULL). |
|
9 |
-#' @param geneCol (char) column name with gene identifiers. |
|
10 |
-#' For use with non-GMT input files (eg "Gene.ID"; default NULL). |
|
7 |
+#' @param pathCol (char or int) column name or number with pathway identifiers. |
|
8 |
+#' For use with non-GMT input files (eg "Pathway.ID" or 2; default NULL). |
|
9 |
+#' @param geneCol (char or int) column name or number with gene identifiers. |
|
10 |
+#' For use with non-GMT input files (eg "Gene.ID" or 5; default NULL). |
|
11 | 11 |
#' @param minGene (integer) minimum number of genes to be considered |
12 | 12 |
#' in a pathway (default 1). |
13 | 13 |
#' @param maxGene (integer) maximum number of genes to be considered |
... | ... |
@@ -22,7 +22,7 @@ |
22 | 22 |
#' ) |
23 | 23 |
#' pathways <- readPathways( |
24 | 24 |
#' system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP"), |
25 |
-#' header = TRUE, pathwayCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
25 |
+#' header = TRUE, pathCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
26 | 26 |
#' ) |
27 | 27 |
#' @importFrom openxlsx read.xlsx |
28 | 28 |
#' @importFrom tibble deframe |
... | ... |
@@ -30,8 +30,7 @@ |
30 | 30 |
#' @importFrom utils head read.delim tail |
31 | 31 |
#' @export |
32 | 32 |
readPathways <- function(pathwayFile, header = FALSE, |
33 |
- pathwayCol = NULL, geneCol = NULL, |
|
34 |
- minGene = 1L, maxGene = Inf) { |
|
33 |
+ pathCol = NULL, geneCol = NULL, minGene = 1L, maxGene = Inf) { |
|
35 | 34 |
s <- c("gmt", "txt", "xlsx") |
36 | 35 |
f <- sub(".*\\.", "", pathwayFile) |
37 | 36 |
if (!f %in% s) { |
... | ... |
@@ -41,31 +40,32 @@ readPathways <- function(pathwayFile, header = FALSE, |
41 | 40 |
)) |
42 | 41 |
} |
43 | 42 |
if (f == "gmt") { |
44 |
- pathway_in <- strsplit(readLines(pathwayFile), "\t") |
|
43 |
+ path_in <- strsplit(readLines(pathwayFile), "\t") |
|
45 | 44 |
if (header) { |
46 |
- pathway_in <- pathway_in[-1] |
|
45 |
+ path_in <- path_in[-1] |
|
47 | 46 |
} |
48 |
- pathways <- lapply(pathway_in, tail, -2) |
|
49 |
- names(pathways) <- vapply(pathway_in, head, n = 1, character(1)) |
|
47 |
+ pathways <- lapply(path_in, tail, -2) |
|
48 |
+ names(pathways) <- vapply(path_in, head, n = 1, character(1)) |
|
50 | 49 |
} else { |
51 | 50 |
if (f == "xlsx") { |
52 |
- pathway_in <- read.xlsx(pathwayFile) |
|
53 |
- } else if (f == "txt") { |
|
54 |
- pathway_in <- read.delim(pathwayFile, header = header) |
|
51 |
+ path_in <- read.xlsx(pathwayFile) |
|
55 | 52 |
} |
56 |
- if (missing(pathwayCol) || !pathwayCol %in% colnames(pathway_in)) { |
|
57 |
- stop("Pathway ID column (", pathwayCol, ") not in file") |
|
58 |
- } else if (missing(geneCol) || !geneCol %in% colnames(pathway_in)) { |
|
53 |
+ if (f == "txt") { |
|
54 |
+ path_in <- read.delim(pathwayFile, header = header) |
|
55 |
+ } |
|
56 |
+ if (!pathCol %in% names(path_in) && !pathCol %in% seq_along(path_in)) { |
|
57 |
+ stop("Pathway ID column (", pathCol, ") not in file") |
|
58 |
+ } |
|
59 |
+ if (!geneCol %in% names(path_in) && !geneCol %in% seq_along(path_in)) { |
|
59 | 60 |
stop("Gene ID column (", geneCol, ") not in file") |
60 |
- } else { |
|
61 |
- pathway_df <- data.frame( |
|
62 |
- pathway = pathway_in[, pathwayCol], gene = pathway_in[, geneCol] |
|
63 |
- ) |
|
64 |
- pathway_df[which(pathway_df$gene == ""), "gene"] <- NA |
|
65 |
- pathway_df <- na.omit(pathway_df) |
|
66 |
- pathway_df <- aggregate(gene ~ pathway, pathway_df, paste) |
|
67 |
- pathways <- deframe(pathway_df) |
|
68 | 61 |
} |
62 |
+ pathway_df <- data.frame( |
|
63 |
+ pathway = path_in[, pathCol], gene = path_in[, geneCol] |
|
64 |
+ ) |
|
65 |
+ pathway_df[which(pathway_df$gene == ""), "gene"] <- NA |
|
66 |
+ pathway_df <- na.omit(pathway_df) |
|
67 |
+ pathway_df <- aggregate(gene ~ pathway, pathway_df, paste) |
|
68 |
+ pathways <- deframe(pathway_df) |
|
69 | 69 |
} |
70 | 70 |
size <- lapply(pathways, length) |
71 | 71 |
pathways_s <- pathways[which(size >= minGene & size <= maxGene)] |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
## code to prepare `testGene` and `backgroundGene` datasets goes here |
2 |
-pathway_file <- system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", package = "FEDUP") |
|
3 |
-pathwaysGMT <- readPathways(pathway_file, MIN_GENE = 10, MAX_GENE = 500) |
|
2 |
+pathwayFile <- system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", package = "FEDUP") |
|
3 |
+pathwaysGMT <- readPathways(pathwayFile, minGene = 10, maxGene = 500) |
|
4 | 4 |
|
5 | 5 |
testGene <- pathwaysGMT[[grep("397014", names(pathwaysGMT))]] # Reactome muscle contraction pathway |
6 | 6 |
backgroundGene <- unique(unlist(pathwaysGMT)) |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
## code to prepare `pathwaysGMT` dataset goes here |
2 |
-pathway_file <- system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", package = "FEDUP") |
|
3 |
-pathwaysGMT <- readPathways(pathway_file, MIN_GENE = 10, MAX_GENE = 500) |
|
2 |
+pathwayFile <- system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", package = "FEDUP") |
|
3 |
+pathwaysGMT <- readPathways(pathwayFile, minGene = 10, maxGene = 500) |
|
4 | 4 |
names(pathwaysGMT) <- stringi::stri_trans_general(names(pathwaysGMT), "latin-ascii") |
5 | 5 |
usethis::use_data(pathwaysGMT, compress = "xz", version = 2, overwrite = TRUE) |
... | ... |
@@ -1,12 +1,12 @@ |
1 | 1 |
## code to prepare `pathwaysXLSX` dataset goes here |
2 | 2 |
library(tibble) |
3 | 3 |
|
4 |
-pathway_file <- system.file("extdata", "SAFE_terms.txt", package = "FEDUP") |
|
4 |
+pathwayFile <- system.file("extdata", "SAFE_terms.txt", package = "FEDUP") |
|
5 | 5 |
pathwaysTXT <- readPathways( |
6 |
- pathway_file, |
|
6 |
+ pathwayFile, |
|
7 | 7 |
header = TRUE, |
8 |
- pathway_col = "Enriched.GO.names", |
|
9 |
- gene_col = "Gene.ID" |
|
8 |
+ pathCol = "Enriched.GO.names", |
|
9 |
+ geneCol = "Gene.ID" |
|
10 | 10 |
) |
11 | 11 |
|
12 | 12 |
names(pathwaysTXT) <- stringi::stri_trans_general(names(pathwaysTXT), "latin-ascii") |
... | ... |
@@ -2,12 +2,12 @@ |
2 | 2 |
library(openxlsx) |
3 | 3 |
library(tibble) |
4 | 4 |
|
5 |
-pathway_file <- system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP") |
|
5 |
+pathwayFile <- system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP") |
|
6 | 6 |
pathwaysXLSX <- readPathways( |
7 |
- pathway_file, |
|
7 |
+ pathwayFile, |
|
8 | 8 |
header = TRUE, |
9 |
- pathway_col = "Enriched.GO.names", |
|
10 |
- gene_col = "Gene.ID" |
|
9 |
+ pathCol = "Enriched.GO.names", |
|
10 |
+ geneCol = "Gene.ID" |
|
11 | 11 |
) |
12 | 12 |
|
13 | 13 |
names(pathwaysXLSX) <- stringi::stri_trans_general(names(pathwaysXLSX), "latin-ascii") |
... | ... |
@@ -8,7 +8,7 @@ Currently supports the following file format: gmt, txt, xlsx.} |
8 | 8 |
readPathways( |
9 | 9 |
pathwayFile, |
10 | 10 |
header = FALSE, |
11 |
- pathwayCol = NULL, |
|
11 |
+ pathCol = NULL, |
|
12 | 12 |
geneCol = NULL, |
13 | 13 |
minGene = 1L, |
14 | 14 |
maxGene = Inf |
... | ... |
@@ -20,11 +20,11 @@ readPathways( |
20 | 20 |
\item{header}{(logical) whether \code{pathwayFile} has a header |
21 | 21 |
(default FALSE).} |
22 | 22 |
|
23 |
-\item{pathwayCol}{(char) column name with pathway identifiers. |
|
24 |
-For use with non-GMT input files (eg "Pathway.ID"; default NULL).} |
|
23 |
+\item{pathCol}{(char or int) column name or number with pathway identifiers. |
|
24 |
+For use with non-GMT input files (eg "Pathway.ID" or 2; default NULL).} |
|
25 | 25 |
|
26 |
-\item{geneCol}{(char) column name with gene identifiers. |
|
27 |
-For use with non-GMT input files (eg "Gene.ID"; default NULL).} |
|
26 |
+\item{geneCol}{(char or int) column name or number with gene identifiers. |
|
27 |
+For use with non-GMT input files (eg "Gene.ID" or 5; default NULL).} |
|
28 | 28 |
|
29 | 29 |
\item{minGene}{(integer) minimum number of genes to be considered |
30 | 30 |
in a pathway (default 1).} |
... | ... |
@@ -48,6 +48,6 @@ pathways <- readPathways( |
48 | 48 |
) |
49 | 49 |
pathways <- readPathways( |
50 | 50 |
system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP"), |
51 |
- header = TRUE, pathwayCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
51 |
+ header = TRUE, pathCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
52 | 52 |
) |
53 | 53 |
} |
... | ... |
@@ -8,17 +8,17 @@ test_that("Test that readPathways stops without proper inputs", { |
8 | 8 |
expect_error(readPathways( |
9 | 9 |
pathwayFile, |
10 | 10 |
header = TRUE, |
11 |
- pathwayCol = "Enriched.GO.names", geneCol = "oops" |
|
11 |
+ pathCol = "Enriched.GO.names", geneCol = "oops" |
|
12 | 12 |
)) |
13 | 13 |
expect_error(readPathways( |
14 | 14 |
pathwayFile, |
15 | 15 |
header = TRUE, |
16 |
- pathwayCol = "oops", geneCol = "Gene.ID" |
|
16 |
+ pathCol = "oops", geneCol = "Gene.ID" |
|
17 | 17 |
)) |
18 | 18 |
expect_error(readPathways( |
19 | 19 |
pathwayFile, |
20 | 20 |
header = TRUE, minGene = 500, |
21 |
- pathwayCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
21 |
+ pathCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
22 | 22 |
)) |
23 | 23 |
}) |
24 | 24 |
|
... | ... |
@@ -43,7 +43,7 @@ test_that("Test that readPathways works with XLSX input", { |
43 | 43 |
pathways <- readPathways( |
44 | 44 |
pathwayFile, |
45 | 45 |
header = TRUE, |
46 |
- pathwayCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
46 |
+ pathCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
47 | 47 |
) |
48 | 48 |
expect_true(is.list(pathways)) |
49 | 49 |
expect_equal(length(pathways), 30) |
... | ... |
@@ -58,7 +58,7 @@ test_that("Test that readPathways works with TXT input", { |
58 | 58 |
pathways <- readPathways( |
59 | 59 |
pathwayFile, |
60 | 60 |
header = TRUE, |
61 |
- pathwayCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
61 |
+ pathCol = "Enriched.GO.names", geneCol = "Gene.ID" |
|
62 | 62 |
) |
63 | 63 |
expect_true(is.list(pathways)) |
64 | 64 |
expect_equal(length(pathways), 30) |