... | ... |
@@ -6,9 +6,9 @@ |
6 | 6 |
#' |
7 | 7 |
#' Raw data location |
8 | 8 |
#' system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", |
9 |
-#' package = "FEDUP") |
|
9 |
+#' package="FEDUP") |
|
10 | 10 |
#' Script to prepare data |
11 |
-#' system.file("data-raw", "pathwaysGMT.R", package = "FEDUP") |
|
11 |
+#' system.file("data-raw", "pathwaysGMT.R", package="FEDUP") |
|
12 | 12 |
#' |
13 | 13 |
#' @format a named list of 1437 vectors |
14 | 14 |
"pathwaysGMT" |
... | ... |
@@ -16,10 +16,10 @@ |
16 | 16 |
#' Example list of yeast SAFE terms obtained from a XLSX file. |
17 | 17 |
#' |
18 | 18 |
#' Raw data location |
19 |
-#' system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP") |
|
19 |
+#' system.file("extdata", "SAFE_terms.xlsx", package="FEDUP") |
|
20 | 20 |
#' |
21 | 21 |
#' Script to prepare data |
22 |
-#' system.file("data-raw", "pathwaysXLSX.R", package = "FEDUP") |
|
22 |
+#' system.file("data-raw", "pathwaysXLSX.R", package="FEDUP") |
|
23 | 23 |
#' |
24 | 24 |
#' @format a named list of 30 vectors |
25 | 25 |
"pathwaysXLSX" |
... | ... |
@@ -27,10 +27,10 @@ |
27 | 27 |
#' Example list of yeast SAFE terms obtained from a TXT file. |
28 | 28 |
#' |
29 | 29 |
#' Raw data location |
30 |
-#' system.file("extdata", "SAFE_terms.txt", package = "FEDUP") |
|
30 |
+#' system.file("extdata", "SAFE_terms.txt", package="FEDUP") |
|
31 | 31 |
#' |
32 | 32 |
#' Script to prepare data |
33 |
-#' system.file("data-raw", "pathwaysTXT.R", package = "FEDUP") |
|
33 |
+#' system.file("data-raw", "pathwaysTXT.R", package="FEDUP") |
|
34 | 34 |
#' |
35 | 35 |
#' @format a named list of 30 vectors |
36 | 36 |
"pathwaysTXT" |
... | ... |
@@ -38,7 +38,7 @@ |
38 | 38 |
#' Example vector of human genes to use as test set for enrichment. |
39 | 39 |
#' |
40 | 40 |
#' Script to prepare data |
41 |
-#' system.file("data-raw", "genes.R", package = "FEDUP") |
|
41 |
+#' system.file("data-raw", "genes.R", package="FEDUP") |
|
42 | 42 |
#' |
43 | 43 |
#' @format a character vector with 190 elements (gene IDs) |
44 | 44 |
"testGene" |
... | ... |
@@ -46,7 +46,7 @@ |
46 | 46 |
#' Example vector of human genes to use as background set for enrichment. |
47 | 47 |
#' |
48 | 48 |
#' Script to generate data |
49 |
-#' system.file("data-raw", "genes.R", package = "FEDUP") |
|
49 |
+#' system.file("data-raw", "genes.R", package="FEDUP") |
|
50 | 50 |
#' |
51 | 51 |
#' @format a character vector with 10208 elements (gene IDs) |
52 | 52 |
"backgroundGene" |
... | ... |
@@ -1,37 +1,42 @@ |
1 |
-inputObject <- function(test_gene, background_gene, pathways) { |
|
1 |
+inputObject <- function(testGene, backgroundGene, pathways) { |
|
2 | 2 |
|
3 | 3 |
pathway_genes <- unique(as.character(unlist(pathways))) |
4 |
- test_gene_in_pathways <- which(test_gene %in% pathway_genes) |
|
5 |
- back_gene_in_pathways <- which(background_gene %in% pathway_genes) |
|
4 |
+ testGene_in_pathways <- which(testGene %in% pathway_genes) |
|
5 |
+ back_gene_in_pathways <- which(backgroundGene %in% pathway_genes) |
|
6 | 6 |
|
7 |
- if (is.null(test_gene)) { |
|
8 |
- stop("Oops, argument 'test_gene' is empty. Supply a vector of |
|
9 |
- genes ... I promise this will work.") |
|
10 |
- } else if (is.null(background_gene)) { |
|
11 |
- stop("Oops, argument 'background_gene' is empty. Supply a vector of |
|
12 |
- genes ... I promise this will work") |
|
13 |
- } else if (!is.list(pathways)) { |
|
7 |
+ if (is.null(testGene)) { |
|
8 |
+ stop("Oops, argument 'testGene' is empty. Supply a vector of |
|
9 |
+ genes... I promise this will work.") |
|
10 |
+ } |
|
11 |
+ if (is.null(backgroundGene)) { |
|
12 |
+ stop("Oops, argument 'backgroundGene' is empty. Supply a vector of |
|
13 |
+ genes... I promise this will work.") |
|
14 |
+ } |
|
15 |
+ if (!is.list(pathways)) { |
|
14 | 16 |
stop("Oops, argument 'pathways' is not in a list format... |
15 | 17 |
have you tried using readPathways() on your input pathway file?") |
16 |
- } else if (!length(test_gene_in_pathways)) { |
|
17 |
- stop("Oops, none of the genes in 'test_gene' was found in 'pathways'. |
|
18 |
+ } |
|
19 |
+ if (!length(testGene_in_pathways)) { |
|
20 |
+ stop("Oops, none of the genes in 'testGene' was found in 'pathways'. |
|
18 | 21 |
Make sure that you have some gene IDs in both inputs, otherwise how do |
19 | 22 |
you expect this works?") |
20 |
- } else if (!length(back_gene_in_pathways)) { |
|
21 |
- stop("Oops, none of the genes in 'background_genes' was found in |
|
23 |
+ } |
|
24 |
+ if (!length(back_gene_in_pathways)) { |
|
25 |
+ stop("Oops, none of the genes in 'backgroundGenes' was found in |
|
22 | 26 |
'pathways'. Make sure that you have some gene IDs in both inputs, |
23 | 27 |
otherwise how do you expect this works?") |
24 |
- } else if (length(test_gene) >= length(background_gene)) { |
|
28 |
+ } |
|
29 |
+ if (length(testGene) >= length(backgroundGene)) { |
|
25 | 30 |
stop("Oops, your test set can't have more genes than your background |
26 |
- set. Have you mixed up the 'test_gene' and 'background_gene' arguments? |
|
31 |
+ set. Have you mixed up the 'testGene' and 'backgroundGene' arguments? |
|
27 | 32 |
You're so close... I can feel it.") |
28 | 33 |
} |
29 | 34 |
|
30 |
- test_gene <- unique(as.character(test_gene)) |
|
31 |
- background_gene <- unique(as.character(background_gene)) |
|
35 |
+ testGene <- unique(as.character(testGene)) |
|
36 |
+ backgroundGene <- unique(as.character(backgroundGene)) |
|
32 | 37 |
|
33 |
- list(test_gene = test_gene, |
|
34 |
- background_gene = background_gene, |
|
38 |
+ list(testGene = testGene, |
|
39 |
+ backgroundGene = backgroundGene, |
|
35 | 40 |
pathways = pathways, |
36 | 41 |
pathways_name = names(pathways), |
37 | 42 |
pathways_size = unlist(lapply(pathways, length)) |
... | ... |
@@ -40,8 +45,8 @@ inputObject <- function(test_gene, background_gene, pathways) { |
40 | 45 |
|
41 | 46 |
#' Runs gene enrichment and depletion analysis for a list of pathways. |
42 | 47 |
#' |
43 |
-#' @param test_gene (char) vector of genes to use as test set. |
|
44 |
-#' @param background_gene (char) vector of genes to use as background set. |
|
48 |
+#' @param testGene (char) vector of genes to use as test set. |
|
49 |
+#' @param backgroundGene (char) vector of genes to use as background set. |
|
45 | 50 |
#' @param pathways (list) list of vectors with pathway annotations. |
46 | 51 |
#' @return table of pathway enrichment and depletion results. Rows represent |
47 | 52 |
#' tested pathways. Columns represent: |
... | ... |
@@ -49,14 +54,14 @@ inputObject <- function(test_gene, background_gene, pathways) { |
49 | 54 |
#' \item pathway -- name of the pathway, corresponds to |
50 | 55 |
#' names(\code{pathways}); |
51 | 56 |
#' \item size -- size of the pathway; |
52 |
-#' \item real_frac -- fraction of \code{test_gene} members in pathway; |
|
53 |
-#' \item expected_frac -- fraction of \code{background_gene} members in |
|
57 |
+#' \item real_frac -- fraction of \code{testGene} members in pathway; |
|
58 |
+#' \item expected_frac -- fraction of \code{backgroundGene} members in |
|
54 | 59 |
#' pathway; |
55 | 60 |
#' \item fold_enrichment -- fold enrichment measure, |
56 | 61 |
#' evaluates as \code{real_frac} / \code{expected_frac}; |
57 | 62 |
#' \item status -- indicator that pathway is enriched or depleted for |
58 |
-#' \code{test_gene} members; |
|
59 |
-#' \item real_gene -- vector of \code{test_gene} gene members annotated |
|
63 |
+#' \code{testGene} members; |
|
64 |
+#' \item real_gene -- vector of \code{testGene} gene members annotated |
|
60 | 65 |
#' to \code{pathways}; |
61 | 66 |
#' \item pvalue -- enrichment p-value calculated via Fisher's exact test; |
62 | 67 |
#' \item qvalue -- BH-adjusted p-value |
... | ... |
@@ -70,10 +75,10 @@ inputObject <- function(test_gene, background_gene, pathways) { |
70 | 75 |
#' @importFrom utils head read.delim tail |
71 | 76 |
#' @importFrom stats fisher.test p.adjust |
72 | 77 |
#' @export |
73 |
-runFedup <- function(test_gene, background_gene, pathways) { |
|
74 |
- inputs <- inputObject(test_gene, background_gene, pathways) |
|
75 |
- test <- inputs$test_gene |
|
76 |
- background <- inputs$background_gene |
|
78 |
+runFedup <- function(testGene, backgroundGene, pathways) { |
|
79 |
+ inputs <- inputObject(testGene, backgroundGene, pathways) |
|
80 |
+ test <- inputs$testGene |
|
81 |
+ background <- inputs$backgroundGene |
|
77 | 82 |
pathways <- inputs$pathways |
78 | 83 |
pathways_name <- inputs$pathways_name |
79 | 84 |
pathways_size <- inputs$pathways_size |
... | ... |
@@ -84,17 +89,17 @@ runFedup <- function(test_gene, background_gene, pathways) { |
84 | 89 |
|
85 | 90 |
res <- data.table(pathway = pathways_name, size = pathways_size) |
86 | 91 |
res_stats <- vapply(pathways, function(x) { |
87 |
- a_n <- length(test) # n test genes |
|
88 |
- b_n <- length(background) # n background genes |
|
89 |
- a <- intersect(test, x) # test genes in pathway |
|
90 |
- b <- intersect(background, x) # background genes in pathway |
|
91 |
- a_len <- length(a) # n test genes in pathway |
|
92 |
- b_len <- length(b) # n background genes in pathway |
|
93 |
- a_x <- (a_len / a_n) * 100 # fraction of test genes in pathway |
|
94 |
- b_x <- (b_len / b_n) * 100 # fraction of background genes in pathway |
|
95 |
- f <- a_x / b_x # fold enrichment measure |
|
92 |
+ a_n <- length(test) |
|
93 |
+ b_n <- length(background) |
|
94 |
+ a <- intersect(test, x) |
|
95 |
+ b <- intersect(background, x) |
|
96 |
+ a_len <- length(a) |
|
97 |
+ b_len <- length(b) |
|
98 |
+ a_x <- (a_len / a_n) * 100 |
|
99 |
+ b_x <- (b_len / b_n) * 100 |
|
100 |
+ f <- a_x / b_x |
|
96 | 101 |
e <- ifelse(f > 1, "Enriched", "Depleted") |
97 |
- m <- rbind(c(a_len, b_len), c(a_n, b_n)) # pval contingency table |
|
102 |
+ m <- rbind(c(a_len, b_len), c(a_n, b_n)) |
|
98 | 103 |
p <- fisher.test(m, alternative = "two.sided")$p.value |
99 | 104 |
return(c( |
100 | 105 |
real_frac = a_x, expected_frac = b_x, fold_enrich = f, |
... | ... |
@@ -107,8 +112,9 @@ runFedup <- function(test_gene, background_gene, pathways) { |
107 | 112 |
res[, "status" := unlist(res_stats["status",])] |
108 | 113 |
res[, "real_gene" := mapply("[", strsplit(res_stats["real_gene",], "\\|"))] |
109 | 114 |
res[, "pvalue" := as.numeric(unlist(res_stats["pvalue",]))] |
110 |
- res <- res[order(res$pvalue),] # BH-correct pvalues |
|
115 |
+ res <- res[order(res$pvalue),] |
|
111 | 116 |
res$qvalue <- p.adjust(res$pvalue, method = "BH") |
117 |
+ |
|
112 | 118 |
message("You did it! FEDUP ran successfully, feeling pretty good huh?") |
113 | 119 |
return(res) |
114 | 120 |
} |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
#' |
3 | 3 |
#' @param df (data.frame) table with FEDUP enrichment results. |
4 | 4 |
#' (see runFedup() for column descriptions) |
5 |
-#' @param results_file (char) name of output results file. |
|
5 |
+#' @param resultsFile (char) name of output results file. |
|
6 | 6 |
#' @return table of gene enrichment and depletion results formatted as a |
7 | 7 |
#' 'Generic results file'. Rows represent tested pathways. Columns represent: |
8 | 8 |
#' \itemize{ |
... | ... |
@@ -11,43 +11,42 @@ |
11 | 11 |
#' \item description -- pathway name or description; |
12 | 12 |
#' \item pvalue -- enrichment pvalue; |
13 | 13 |
#' \item qvalue -- BH-corrected pvalue; |
14 |
-#' \item status -- +1 or -1, to identify enrichment in either of the two |
|
15 |
-#' phenotypes being compared in the two-class analysis |
|
14 |
+#' \item status -- +1 or -1, to identify enriched or depleted pathways |
|
16 | 15 |
#' (+1 maps to red, -1 maps to blue) |
17 | 16 |
#' } |
18 | 17 |
#' @examples |
19 | 18 |
#' data(testGene) |
20 | 19 |
#' data(backgroundGene) |
21 | 20 |
#' data(pathwaysGMT) |
22 |
-#' fedup_res <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
23 |
-#' results_file <- tempfile("fedup_res", fileext = ".txt") |
|
24 |
-#' writeFemap(fedup_res, results_file) |
|
21 |
+#' fedupRes <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
22 |
+#' resultsFile <- tempfile("fedupRes", fileext=".txt") |
|
23 |
+#' writeFemap(fedupRes, resultsFile) |
|
25 | 24 |
#' @importFrom data.table fwrite |
26 | 25 |
#' @importFrom dplyr select mutate %>% |
27 | 26 |
#' @export |
28 |
-writeFemap <- function(df, results_file) { |
|
27 |
+writeFemap <- function(df, resultsFile) { |
|
29 | 28 |
df_em <- df %>% |
30 | 29 |
select("pathway", "pvalue", "qvalue", "status") %>% |
31 | 30 |
mutate("description" = gsub("\\%.*", "", df$pathway)) %>% |
32 | 31 |
mutate("status" = ifelse(df$status == "Enriched", "1", "-1")) %>% |
33 | 32 |
select("pathway", "description", "pvalue", "qvalue", "status") |
34 | 33 |
|
35 |
- fwrite(df_em, results_file, sep = "\t", col.names = TRUE, quote = FALSE) |
|
36 |
- message("Wrote Cytoscape-formatted FEDUP results file to ", results_file) |
|
34 |
+ fwrite(df_em, resultsFile, sep = "\t", col.names = TRUE, quote = FALSE) |
|
35 |
+ message("Wrote out Cytoscape-formatted FEDUP results file to ", resultsFile) |
|
37 | 36 |
} |
38 | 37 |
|
39 | 38 |
#' Draws a network representation of overlaps among enriched and depleted |
40 | 39 |
#' pathways using EnrichmentMap (EM) in Cytoscape. |
41 | 40 |
#' |
42 |
-#' @param gmt_file (char) path to GMT file (must be an absolute path). |
|
43 |
-#' @param results_file (char) path to file with FEDUP results |
|
41 |
+#' @param gmtFile (char) path to GMT file (must be an absolute path). |
|
42 |
+#' @param resultsFile (char) path to file with FEDUP results |
|
44 | 43 |
#' (must be an absolute path). |
45 | 44 |
#' @param pvalue (numeric) pvalue cutoff. Pathways with a higher pvalue |
46 | 45 |
#' will not be included in the EM (value between 0 and 1; default 1). |
47 | 46 |
#' @param qvalue (numeric) qvalue cutoff. Pathways with a higher qvalue |
48 | 47 |
#' will not be included in the EM (value between 0 and 1; default 1). |
49 |
-#' @param net_name (char) name for EM in Cytoscape (default generic). |
|
50 |
-#' @param net_file (char) name of output image. Supports png, pdf, svg, |
|
48 |
+#' @param netName (char) name for EM in Cytoscape (default generic). |
|
49 |
+#' @param netFile (char) name of output image. Supports png, pdf, svg, |
|
51 | 50 |
#' jpeg image formats. |
52 | 51 |
#' @return file name of image to which the network is exported. Also side |
53 | 52 |
#' effect of plotting the EM in an open session of Cytoscape. |
... | ... |
@@ -57,41 +56,39 @@ writeFemap <- function(df, results_file) { |
57 | 56 |
#' data(testGene) |
58 | 57 |
#' data(backgroundGene) |
59 | 58 |
#' data(pathwaysGMT) |
60 |
-#' gmt_file <- tempfile("pathwaysGMT", fileext = ".gmt") |
|
61 |
-#' fedup_res <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
62 |
-#' results_file <- tempfile("fedup_res", fileext = ".txt") |
|
63 |
-#' net_file <- tempfile("FEDUP_EM", fileext = ".png") |
|
64 |
-#' writePathways(pathwaysGMT, gmt_file) |
|
65 |
-#' writeFemap(fedup_res, results_file) |
|
59 |
+#' gmtFile <- tempfile("pathwaysGMT", fileext=".gmt") |
|
60 |
+#' fedupRes <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
61 |
+#' resultsFile <- tempfile("fedupRes", fileext=".txt") |
|
62 |
+#' netFile <- tempfile("FEDUP_EM", fileext=".png") |
|
63 |
+#' writePathways(pathwaysGMT, gmtFile) |
|
64 |
+#' writeFemap(fedupRes, resultsFile) |
|
66 | 65 |
#' plotFemap( |
67 |
-#' gmt_file = gmt_file, |
|
68 |
-#' results_file = results_file, |
|
69 |
-#' qvalue = 0.05, |
|
70 |
-#' net_name = "FEDUP_EM", |
|
71 |
-#' net_file = net_file)} |
|
66 |
+#' gmtFile=gmtFile, |
|
67 |
+#' resultsFile=resultsFile, |
|
68 |
+#' qvalue=0.05, |
|
69 |
+#' netName="FEDUP_EM", |
|
70 |
+#' netFile=netFile)} |
|
72 | 71 |
#' @import RCy3 |
73 | 72 |
#' @export |
74 |
-plotFemap <- function(gmt_file, results_file, |
|
75 |
- pvalue = 1, qvalue = 1, |
|
76 |
- net_name = "generic", net_file = "png") { |
|
73 |
+plotFemap <- function(gmtFile, resultsFile, pvalue=1, qvalue=1, |
|
74 |
+ netName="generic", netFile="png") { |
|
75 |
+ |
|
77 | 76 |
# Confirm that Cytoscape is installed and opened |
78 | 77 |
cytoscapePing() |
79 |
- if (net_name %in% getNetworkList()) { |
|
80 |
- deleteNetwork(net_name) |
|
81 |
- } |
|
78 |
+ if (netName %in% getNetworkList()) { deleteNetwork(netName) } |
|
82 | 79 |
|
83 | 80 |
message("Building the network") |
84 | 81 |
em_command <- paste( |
85 | 82 |
'enrichmentmap build analysisType="generic"', |
86 |
- "gmtFile=", gmt_file, |
|
87 |
- "enrichmentsDataset1=", results_file, |
|
83 |
+ "gmtFile=", gmtFile, |
|
84 |
+ "enrichmentsDataset1=", resultsFile, |
|
88 | 85 |
"pvalue=", pvalue, |
89 | 86 |
"qvalue=", qvalue, |
90 | 87 |
"similaritycutoff=", 0.375, |
91 | 88 |
"coefficients=", "COMBINED", |
92 | 89 |
"combinedConstant=", 0.5) |
93 | 90 |
response <- commandsGET(em_command) |
94 |
- renameNetwork(net_name, getNetworkSuid()) |
|
91 |
+ renameNetwork(netName, getNetworkSuid()) |
|
95 | 92 |
|
96 | 93 |
# Node visualization (enriched = red nodes, depleted = blue nodes) |
97 | 94 |
message("Setting network chart data") |
... | ... |
@@ -106,18 +103,18 @@ plotFemap <- function(gmt_file, results_file, |
106 | 103 |
"autoannotate annotate-clusterBoosted", |
107 | 104 |
"clusterAlgorithm=MCL", |
108 | 105 |
"maxWords=3", |
109 |
- "network=", net_name) |
|
106 |
+ "network=", netName) |
|
110 | 107 |
response <- commandsGET(aa_command) |
111 | 108 |
|
112 | 109 |
# Network layout |
113 | 110 |
message("Applying a force-directed network layout") |
114 | 111 |
ln_command <- paste( |
115 | 112 |
"layout force-directed", |
116 |
- "network=", net_name) |
|
113 |
+ "network=", netName) |
|
117 | 114 |
response <- commandsGET(ln_command) |
118 | 115 |
fitContent() |
119 | 116 |
|
120 | 117 |
# Draw out network to file |
121 |
- message("Drawing out network to ", net_file) |
|
122 |
- exportImage(net_file) |
|
118 |
+ message("Drawing out network to ", netFile) |
|
119 |
+ exportImage(netFile) |
|
123 | 120 |
} |
... | ... |
@@ -1,83 +1,83 @@ |
1 | 1 |
#' Returns a list of pathways from various file formats. |
2 |
-#' Currently supports the following file format: GMT, TXT, XLSX. |
|
2 |
+#' Currently supports the following file format: gmt, txt, xlsx. |
|
3 | 3 |
#' |
4 |
-#' @param pathway_file (char) path to file with pathway annotations. |
|
5 |
-#' @param header (logical) whether \code{pathway_file} has a header |
|
4 |
+#' @param pathwayFile (char) path to file with pathway annotations. |
|
5 |
+#' @param header (logical) whether \code{pathwayFile} has a header |
|
6 | 6 |
#' (default FALSE). |
7 |
-#' @param pathway_col (char) column name with pathway identifiers. |
|
7 |
+#' @param pathwayCol (char) column name with pathway identifiers. |
|
8 | 8 |
#' For use with non-GMT input files (eg "Pathway.ID"; default NULL). |
9 |
-#' @param gene_col (char) column name with gene identifiers. |
|
9 |
+#' @param geneCol (char) column name with gene identifiers. |
|
10 | 10 |
#' For use with non-GMT input files (eg "Gene.ID"; default NULL). |
11 |
-#' @param MIN_GENE (integer) minimum number of genes to be considered |
|
12 |
-#' in a pathway (default = 1). |
|
13 |
-#' @param MAX_GENE (integer) maximum number of genes to be considered |
|
14 |
-#' in a pathway (default = Inf). |
|
11 |
+#' @param minGene (integer) minimum number of genes to be considered |
|
12 |
+#' in a pathway (default 1). |
|
13 |
+#' @param maxGene (integer) maximum number of genes to be considered |
|
14 |
+#' in a pathway (default Inf). |
|
15 | 15 |
#' @return a list of vectors with pathway annotations. |
16 | 16 |
#' @examples |
17 | 17 |
#' pathways <- readPathways( |
18 | 18 |
#' system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", |
19 |
-#' package = "FEDUP"), MIN_GENE = 10, MAX_GENE = 500) |
|
19 |
+#' package="FEDUP"), minGene=10, maxGene=500) |
|
20 | 20 |
#' pathways <- readPathways( |
21 |
-#' system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP"), |
|
22 |
-#' header = TRUE, pathway_col = "Enriched.GO.names", gene_col = "Gene.ID") |
|
21 |
+#' system.file("extdata", "SAFE_terms.xlsx", package="FEDUP"), |
|
22 |
+#' header=TRUE, pathwayCol="Enriched.GO.names", geneCol="Gene.ID") |
|
23 | 23 |
#' @importFrom openxlsx read.xlsx |
24 | 24 |
#' @importFrom tibble deframe |
25 | 25 |
#' @importFrom stats aggregate na.omit |
26 | 26 |
#' @importFrom utils head read.delim tail |
27 | 27 |
#' @export |
28 |
-readPathways <- function(pathway_file, header = FALSE, |
|
29 |
- pathway_col = NULL, gene_col = NULL, |
|
30 |
- MIN_GENE = 1L, MAX_GENE = Inf) { |
|
28 |
+readPathways <- function(pathwayFile, header=FALSE, |
|
29 |
+ pathwayCol=NULL, geneCol=NULL, |
|
30 |
+ minGene=1L, maxGene=Inf) { |
|
31 | 31 |
|
32 |
- message("Pathway file: ", basename(pathway_file)) |
|
33 |
- s <- c("gmt", "txt", "xlsx") # supported file extensions |
|
34 |
- f <- sub(".*\\.", "", pathway_file) # pathway_file extension |
|
32 |
+ s <- c("gmt", "txt", "xlsx") |
|
33 |
+ f <- sub(".*\\.", "", pathwayFile) |
|
35 | 34 |
if (!f %in% s) { |
36 | 35 |
stop(paste0("Sorry, pathway file type (", f, ") is not supported. ", |
37 | 36 |
"Supported extensions: ", paste(s, collapse = ", "), ".")) |
38 | 37 |
} |
39 | 38 |
if (f == "gmt") { |
40 |
- pathway_in <- strsplit(readLines(pathway_file), "\t") |
|
39 |
+ pathway_in <- strsplit(readLines(pathwayFile), "\t") |
|
41 | 40 |
if (header) { pathway_in <- pathway_in[-1] } |
42 | 41 |
pathways <- lapply(pathway_in, tail, -2) |
43 | 42 |
names(pathways) <- vapply(pathway_in, head, n = 1, character(1)) |
44 | 43 |
} else { |
45 | 44 |
if (f == "xlsx") { |
46 |
- pathway_in <- read.xlsx(pathway_file) |
|
45 |
+ pathway_in <- read.xlsx(pathwayFile) |
|
47 | 46 |
} else if (f == "txt") { |
48 |
- pathway_in <- read.delim(pathway_file, header = header) |
|
47 |
+ pathway_in <- read.delim(pathwayFile, header = header) |
|
49 | 48 |
} |
50 |
- if (missing(pathway_col)||!pathway_col %in% colnames(pathway_in)) { |
|
51 |
- stop("Pathway ID column (", pathway_col, ") not in file") |
|
52 |
- } else if (missing(gene_col)||!gene_col %in% colnames(pathway_in)) { |
|
53 |
- stop("Gene ID column (", gene_col, ") not in file") |
|
49 |
+ if (missing(pathwayCol)||!pathwayCol %in% colnames(pathway_in)) { |
|
50 |
+ stop("Pathway ID column (", pathwayCol, ") not in file") |
|
51 |
+ } else if (missing(geneCol)||!geneCol %in% colnames(pathway_in)) { |
|
52 |
+ stop("Gene ID column (", geneCol, ") not in file") |
|
54 | 53 |
} else { |
55 | 54 |
pathway_df <- data.frame( |
56 |
- pathway = pathway_in[,pathway_col], |
|
57 |
- gene = pathway_in[,gene_col]) |
|
55 |
+ pathway = pathway_in[,pathwayCol], |
|
56 |
+ gene = pathway_in[,geneCol]) |
|
58 | 57 |
pathway_df[which(pathway_df$gene == ""), "gene"] <- NA |
59 |
- pathway_df <- na.omit(pathway_df) # ensure no NaNs |
|
58 |
+ pathway_df <- na.omit(pathway_df) |
|
60 | 59 |
pathway_df <- aggregate(gene ~ pathway, pathway_df, paste) |
61 |
- pathways <- deframe(pathway_df) # transform df to list |
|
60 |
+ pathways <- deframe(pathway_df) |
|
62 | 61 |
} |
63 | 62 |
} |
64 | 63 |
|
65 |
- size <- lapply(pathways, length) # subset for pathways in [MIN:MAX] range |
|
66 |
- pathways_s <- pathways[which(size >= MIN_GENE & size <= MAX_GENE)] |
|
64 |
+ size <- lapply(pathways, length) |
|
65 |
+ pathways_s <- pathways[which(size >= minGene & size <= maxGene)] |
|
67 | 66 |
pathways_s <- pathways_s[!duplicated(names(pathways_s))] |
68 |
- message(" => n total pathways: ", length(pathways)) |
|
69 |
- message(" => n pathways (",MIN_GENE,"-",MAX_GENE, "): ", length(pathways_s)) |
|
70 |
- |
|
71 | 67 |
if (!length(pathways_s)) { |
72 | 68 |
stop("Oops, no pathways left... try different filtering options.") |
73 | 69 |
} |
70 |
+ message("Pathway file: ", basename(pathwayFile), |
|
71 |
+ "\n => n total pathways: ", length(pathways), |
|
72 |
+ "\n => n pathways (",minGene,"-",maxGene, "): ", length(pathways_s)) |
|
73 |
+ |
|
74 | 74 |
return(pathways_s) |
75 | 75 |
} |
76 | 76 |
|
77 | 77 |
#' Writes a set of pathways (list of vectors) to a GMT file. |
78 | 78 |
#' |
79 | 79 |
#' @param pathways (list) named list of vectors. |
80 |
-#' @param gmt_file (char) name of output GMT file. |
|
80 |
+#' @param gmtFile (char) name of output GMT file. |
|
81 | 81 |
#' @return GMT-formatted file. Rows represent pathways. Columns represent: |
82 | 82 |
#' \itemize{ |
83 | 83 |
#' \item pathway ID; |
... | ... |
@@ -86,15 +86,15 @@ readPathways <- function(pathway_file, header = FALSE, |
86 | 86 |
#' } |
87 | 87 |
#' @examples |
88 | 88 |
#' data(pathwaysXLSX) |
89 |
-#' writePathways(pathwaysXLSX, tempfile("pathwaysXLSX", fileext = ".gmt")) |
|
89 |
+#' writePathways(pathwaysXLSX, tempfile("pathwaysXLSX", fileext=".gmt")) |
|
90 | 90 |
#' @importFrom data.table fwrite |
91 | 91 |
#' @export |
92 |
-writePathways <- function(pathways, gmt_file) { |
|
92 |
+writePathways <- function(pathways, gmtFile) { |
|
93 | 93 |
tab <- data.table( |
94 | 94 |
pathway = names(pathways), |
95 | 95 |
description = gsub("\\%.*", "", names(pathways)), |
96 | 96 |
genes = unlist(lapply(pathways, paste, collapse = "\t")) |
97 | 97 |
) |
98 |
- fwrite(tab, file = gmt_file, sep = "\t", col.names = FALSE, quote = FALSE) |
|
99 |
- message("Wrote out GMT file with to ", gmt_file) |
|
98 |
+ fwrite(tab, file = gmtFile, sep = "\t", col.names = FALSE, quote = FALSE) |
|
99 |
+ message("Wrote out pathway gmt file to ", gmtFile) |
|
100 | 100 |
} |
... | ... |
@@ -1,16 +1,16 @@ |
1 | 1 |
#' Visualizes pathway enrichment and depletion using ggplot. |
2 | 2 |
#' |
3 | 3 |
#' @param df (data.frame) table with FEDUP enrichment results to plot. |
4 |
-#' @param x_var (char) x-axis variable (must be a column value in \code{df}). |
|
5 |
-#' @param y_var (char) y-axis variable (must be a column value in \code{df}). |
|
6 |
-#' @param x_lab (char) x-axis label (default \code{x_var} value). |
|
7 |
-#' @param y_lab (char) y-axis label (default NULL). |
|
8 |
-#' @param p_title (char) plot title (default NULL). |
|
9 |
-#' @param fill_var (char) point fill variable (default NULL). |
|
10 |
-#' @param fill_col (char) point fill colours (default NULL). |
|
11 |
-#' @param fill_lab (char) point fill label (default \code{fill_var} value). |
|
12 |
-#' @param size_var (char) point size variable (default NULL). |
|
13 |
-#' @param size_lab (char) point size label (default \code{size_var} value). |
|
4 |
+#' @param xVar (char) x-axis variable (must be a column value in \code{df}). |
|
5 |
+#' @param yVar (char) y-axis variable (must be a column value in \code{df}). |
|
6 |
+#' @param xLab (char) x-axis label (default \code{xVar} value). |
|
7 |
+#' @param yLab (char) y-axis label (default NULL). |
|
8 |
+#' @param pTitle (char) plot title (default NULL). |
|
9 |
+#' @param fillVar (char) point fill variable (default NULL). |
|
10 |
+#' @param fillCol (char) point fill colours (default NULL). |
|
11 |
+#' @param fillLab (char) point fill label (default \code{fillVar} value). |
|
12 |
+#' @param sizeVar (char) point size variable (default NULL). |
|
13 |
+#' @param sizeLab (char) point size label (default \code{sizeVar} value). |
|
14 | 14 |
#' @return object returned from ggplot with the enrichment dot plot. |
15 | 15 |
#' @examples |
16 | 16 |
#' data(testGene) |
... | ... |
@@ -21,55 +21,56 @@ |
21 | 21 |
#' fedup_plot$log10qvalue <- -log10(fedup_plot$qvalue + 1e-10) |
22 | 22 |
#' fedup_plot$pathway <- gsub("\\%.*", "", fedup_plot$pathway) |
23 | 23 |
#' plotDotPlot( |
24 |
-#' df = fedup_plot, |
|
25 |
-#' x_var = "log10qvalue", |
|
26 |
-#' y_var = "pathway", |
|
27 |
-#' x_lab = "-log10(Qvalue)", |
|
28 |
-#' fill_var = "status", |
|
29 |
-#' fill_lab = "Enrichment\nstatus", |
|
30 |
-#' size_var = "fold_enrichment", |
|
31 |
-#' size_lab = "Fold enrichment") |
|
24 |
+#' df=fedup_plot, |
|
25 |
+#' xVar="log10qvalue", |
|
26 |
+#' yVar="pathway", |
|
27 |
+#' xLab="-log10(Qvalue)", |
|
28 |
+#' fillVar="status", |
|
29 |
+#' fillLab="Enrichment\nstatus", |
|
30 |
+#' sizeVar="fold_enrichment", |
|
31 |
+#' sizeLab="Fold enrichment") |
|
32 | 32 |
#' @import ggplot2 |
33 | 33 |
#' @importFrom ggthemes theme_clean |
34 | 34 |
#' @importFrom forcats fct_reorder |
35 | 35 |
#' @importFrom RColorBrewer brewer.pal |
36 | 36 |
#' @export |
37 |
-plotDotPlot <- function(df, x_var, y_var, |
|
38 |
- x_lab = x_var, y_lab = NULL, p_title = NULL, |
|
39 |
- fill_var = NULL, fill_col = NULL, fill_lab = fill_var, |
|
40 |
- size_var = NULL, size_lab = size_var) { |
|
37 |
+plotDotPlot <- function(df, xVar, yVar, |
|
38 |
+ xLab=xVar, yLab=NULL, pTitle=NULL, |
|
39 |
+ fillVar=NULL, fillCol=NULL, fillLab=fillVar, |
|
40 |
+ sizeVar=NULL, sizeLab=sizeVar) { |
|
41 | 41 |
|
42 |
- if (!is.null(fill_var) && is.null(fill_col)) { |
|
43 |
- fill_n <- length(unique(df[[fill_var]])) |
|
42 |
+ if (!is.null(fillVar) && is.null(fillCol)) { |
|
43 |
+ fill_n <- length(unique(df[[fillVar]])) |
|
44 | 44 |
pal_n <- ifelse(fill_n >= 3, fill_n, 3) |
45 |
- fill_col <- brewer.pal(pal_n, "Set1") |
|
45 |
+ fillCol <- brewer.pal(pal_n, "Set1") |
|
46 | 46 |
} |
47 | 47 |
|
48 |
- if (fill_var == "status") { |
|
49 |
- df[[fill_var]] <- factor( |
|
50 |
- df[[fill_var]], |
|
51 |
- levels = c("Enriched", "Depleted")) |
|
48 |
+ if (fillVar == "status") { |
|
49 |
+ df[[fillVar]] <- factor( |
|
50 |
+ df[[fillVar]], |
|
51 |
+ levels=c("Enriched", "Depleted")) |
|
52 | 52 |
} |
53 | 53 |
|
54 | 54 |
p <- ggplot(df, aes_string( |
55 |
- x = x_var, |
|
56 |
- y = fct_reorder(df[[y_var]], df[[x_var]]), |
|
57 |
- fill = fill_var, |
|
58 |
- size = size_var)) + |
|
59 |
- geom_point(shape = 21, colour = "black") + |
|
60 |
- labs(x = x_lab, y = y_lab, title = p_title, |
|
61 |
- fill = fill_lab, size = size_lab) + |
|
62 |
- scale_fill_manual(values = fill_col) + |
|
63 |
- theme_clean(base_size = 10) + |
|
64 |
- theme(plot.title = element_text(hjust = 0.5), |
|
65 |
- legend.title = element_text(size = 10), |
|
66 |
- legend.text = element_text(size = 10), |
|
67 |
- legend.key.size = unit(0.1, "line"), |
|
68 |
- plot.background = element_blank()) |
|
55 |
+ x=xVar, |
|
56 |
+ y=fct_reorder(df[[yVar]], df[[xVar]]), |
|
57 |
+ fill=fillVar, |
|
58 |
+ size=sizeVar)) + |
|
59 |
+ geom_point(shape=21, colour="black") + |
|
60 |
+ labs(x=xLab, y=yLab, title=pTitle, |
|
61 |
+ fill=fillLab, size=sizeLab) + |
|
62 |
+ scale_fill_manual(values=fillCol) + |
|
63 |
+ theme_clean(base_size=10) + |
|
64 |
+ theme(plot.title=element_text(hjust=0.5), |
|
65 |
+ legend.title=element_text(size=10), |
|
66 |
+ legend.text=element_text(size=10), |
|
67 |
+ legend.key.size=unit(0.1, "line"), |
|
68 |
+ plot.background=element_blank()) |
|
69 | 69 |
|
70 |
- if (is.numeric(df[[x_var]])) { |
|
71 |
- xmin <- floor(min(df[[x_var]])) # set x-axis limits to avoid points |
|
72 |
- xmax <- ceiling(max(df[[x_var]])) # being cut off from plot window |
|
70 |
+ # Increase x-axis limits to keep points in plot window |
|
71 |
+ if (is.numeric(df[[xVar]])) { |
|
72 |
+ xmin <- floor(min(df[[xVar]])) |
|
73 |
+ xmax <- ceiling(max(df[[xVar]])) |
|
73 | 74 |
p <- p + xlim(xmin, xmax) |
74 | 75 |
} |
75 | 76 |
return(p) |
... | ... |
@@ -18,8 +18,8 @@ Raw GMT file is available from |
18 | 18 |
\details{ |
19 | 19 |
Raw data location |
20 | 20 |
system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", |
21 |
- package = "FEDUP") |
|
21 |
+ package="FEDUP") |
|
22 | 22 |
Script to prepare data |
23 |
-system.file("data-raw", "pathwaysGMT.R", package = "FEDUP") |
|
23 |
+system.file("data-raw", "pathwaysGMT.R", package="FEDUP") |
|
24 | 24 |
} |
25 | 25 |
\keyword{datasets} |
... | ... |
@@ -12,10 +12,10 @@ pathwaysTXT |
12 | 12 |
} |
13 | 13 |
\description{ |
14 | 14 |
Raw data location |
15 |
-system.file("extdata", "SAFE_terms.txt", package = "FEDUP") |
|
15 |
+system.file("extdata", "SAFE_terms.txt", package="FEDUP") |
|
16 | 16 |
} |
17 | 17 |
\details{ |
18 | 18 |
Script to prepare data |
19 |
-system.file("data-raw", "pathwaysTXT.R", package = "FEDUP") |
|
19 |
+system.file("data-raw", "pathwaysTXT.R", package="FEDUP") |
|
20 | 20 |
} |
21 | 21 |
\keyword{datasets} |
... | ... |
@@ -12,10 +12,10 @@ pathwaysXLSX |
12 | 12 |
} |
13 | 13 |
\description{ |
14 | 14 |
Raw data location |
15 |
-system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP") |
|
15 |
+system.file("extdata", "SAFE_terms.xlsx", package="FEDUP") |
|
16 | 16 |
} |
17 | 17 |
\details{ |
18 | 18 |
Script to prepare data |
19 |
-system.file("data-raw", "pathwaysXLSX.R", package = "FEDUP") |
|
19 |
+system.file("data-raw", "pathwaysXLSX.R", package="FEDUP") |
|
20 | 20 |
} |
21 | 21 |
\keyword{datasets} |
... | ... |
@@ -6,40 +6,40 @@ |
6 | 6 |
\usage{ |
7 | 7 |
plotDotPlot( |
8 | 8 |
df, |
9 |
- x_var, |
|
10 |
- y_var, |
|
11 |
- x_lab = x_var, |
|
12 |
- y_lab = NULL, |
|
13 |
- p_title = NULL, |
|
14 |
- fill_var = NULL, |
|
15 |
- fill_col = NULL, |
|
16 |
- fill_lab = fill_var, |
|
17 |
- size_var = NULL, |
|
18 |
- size_lab = size_var |
|
9 |
+ xVar, |
|
10 |
+ yVar, |
|
11 |
+ xLab = xVar, |
|
12 |
+ yLab = NULL, |
|
13 |
+ pTitle = NULL, |
|
14 |
+ fillVar = NULL, |
|
15 |
+ fillCol = NULL, |
|
16 |
+ fillLab = fillVar, |
|
17 |
+ sizeVar = NULL, |
|
18 |
+ sizeLab = sizeVar |
|
19 | 19 |
) |
20 | 20 |
} |
21 | 21 |
\arguments{ |
22 | 22 |
\item{df}{(data.frame) table with FEDUP enrichment results to plot.} |
23 | 23 |
|
24 |
-\item{x_var}{(char) x-axis variable (must be a column value in \code{df}).} |
|
24 |
+\item{xVar}{(char) x-axis variable (must be a column value in \code{df}).} |
|
25 | 25 |
|
26 |
-\item{y_var}{(char) y-axis variable (must be a column value in \code{df}).} |
|
26 |
+\item{yVar}{(char) y-axis variable (must be a column value in \code{df}).} |
|
27 | 27 |
|
28 |
-\item{x_lab}{(char) x-axis label (default \code{x_var} value).} |
|
28 |
+\item{xLab}{(char) x-axis label (default \code{xVar} value).} |
|
29 | 29 |
|
30 |
-\item{y_lab}{(char) y-axis label (default NULL).} |
|
30 |
+\item{yLab}{(char) y-axis label (default NULL).} |
|
31 | 31 |
|
32 |
-\item{p_title}{(char) plot title (default NULL).} |
|
32 |
+\item{pTitle}{(char) plot title (default NULL).} |
|
33 | 33 |
|
34 |
-\item{fill_var}{(char) point fill variable (default NULL).} |
|
34 |
+\item{fillVar}{(char) point fill variable (default NULL).} |
|
35 | 35 |
|
36 |
-\item{fill_col}{(char) point fill colours (default NULL).} |
|
36 |
+\item{fillCol}{(char) point fill colours (default NULL).} |
|
37 | 37 |
|
38 |
-\item{fill_lab}{(char) point fill label (default \code{fill_var} value).} |
|
38 |
+\item{fillLab}{(char) point fill label (default \code{fillVar} value).} |
|
39 | 39 |
|
40 |
-\item{size_var}{(char) point size variable (default NULL).} |
|
40 |
+\item{sizeVar}{(char) point size variable (default NULL).} |
|
41 | 41 |
|
42 |
-\item{size_lab}{(char) point size label (default \code{size_var} value).} |
|
42 |
+\item{sizeLab}{(char) point size label (default \code{sizeVar} value).} |
|
43 | 43 |
} |
44 | 44 |
\value{ |
45 | 45 |
object returned from ggplot with the enrichment dot plot. |
... | ... |
@@ -56,12 +56,12 @@ fedup_plot <- fedup_res[which(fedup_res$qvalue < 0.05),] |
56 | 56 |
fedup_plot$log10qvalue <- -log10(fedup_plot$qvalue + 1e-10) |
57 | 57 |
fedup_plot$pathway <- gsub("\\\\\%.*", "", fedup_plot$pathway) |
58 | 58 |
plotDotPlot( |
59 |
- df = fedup_plot, |
|
60 |
- x_var = "log10qvalue", |
|
61 |
- y_var = "pathway", |
|
62 |
- x_lab = "-log10(Qvalue)", |
|
63 |
- fill_var = "status", |
|
64 |
- fill_lab = "Enrichment\nstatus", |
|
65 |
- size_var = "fold_enrichment", |
|
66 |
- size_lab = "Fold enrichment") |
|
59 |
+ df=fedup_plot, |
|
60 |
+ xVar="log10qvalue", |
|
61 |
+ yVar="pathway", |
|
62 |
+ xLab="-log10(Qvalue)", |
|
63 |
+ fillVar="status", |
|
64 |
+ fillLab="Enrichment\nstatus", |
|
65 |
+ sizeVar="fold_enrichment", |
|
66 |
+ sizeLab="Fold enrichment") |
|
67 | 67 |
} |
... | ... |
@@ -6,18 +6,18 @@ |
6 | 6 |
pathways using EnrichmentMap (EM) in Cytoscape.} |
7 | 7 |
\usage{ |
8 | 8 |
plotFemap( |
9 |
- gmt_file, |
|
10 |
- results_file, |
|
9 |
+ gmtFile, |
|
10 |
+ resultsFile, |
|
11 | 11 |
pvalue = 1, |
12 | 12 |
qvalue = 1, |
13 |
- net_name = "generic", |
|
14 |
- net_file = "png" |
|
13 |
+ netName = "generic", |
|
14 |
+ netFile = "png" |
|
15 | 15 |
) |
16 | 16 |
} |
17 | 17 |
\arguments{ |
18 |
-\item{gmt_file}{(char) path to GMT file (must be an absolute path).} |
|
18 |
+\item{gmtFile}{(char) path to GMT file (must be an absolute path).} |
|
19 | 19 |
|
20 |
-\item{results_file}{(char) path to file with FEDUP results |
|
20 |
+\item{resultsFile}{(char) path to file with FEDUP results |
|
21 | 21 |
(must be an absolute path).} |
22 | 22 |
|
23 | 23 |
\item{pvalue}{(numeric) pvalue cutoff. Pathways with a higher pvalue |
... | ... |
@@ -26,9 +26,9 @@ will not be included in the EM (value between 0 and 1; default 1).} |
26 | 26 |
\item{qvalue}{(numeric) qvalue cutoff. Pathways with a higher qvalue |
27 | 27 |
will not be included in the EM (value between 0 and 1; default 1).} |
28 | 28 |
|
29 |
-\item{net_name}{(char) name for EM in Cytoscape (default generic).} |
|
29 |
+\item{netName}{(char) name for EM in Cytoscape (default generic).} |
|
30 | 30 |
|
31 |
-\item{net_file}{(char) name of output image. Supports png, pdf, svg, |
|
31 |
+\item{netFile}{(char) name of output image. Supports png, pdf, svg, |
|
32 | 32 |
jpeg image formats.} |
33 | 33 |
} |
34 | 34 |
\value{ |
... | ... |
@@ -45,16 +45,16 @@ pathways using EnrichmentMap (EM) in Cytoscape. |
45 | 45 |
data(testGene) |
46 | 46 |
data(backgroundGene) |
47 | 47 |
data(pathwaysGMT) |
48 |
- gmt_file <- tempfile("pathwaysGMT", fileext = ".gmt") |
|
49 |
- fedup_res <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
50 |
- results_file <- tempfile("fedup_res", fileext = ".txt") |
|
51 |
- net_file <- tempfile("FEDUP_EM", fileext = ".png") |
|
52 |
- writePathways(pathwaysGMT, gmt_file) |
|
53 |
- writeFemap(fedup_res, results_file) |
|
48 |
+ gmtFile <- tempfile("pathwaysGMT", fileext=".gmt") |
|
49 |
+ fedupRes <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
50 |
+ resultsFile <- tempfile("fedupRes", fileext=".txt") |
|
51 |
+ netFile <- tempfile("FEDUP_EM", fileext=".png") |
|
52 |
+ writePathways(pathwaysGMT, gmtFile) |
|
53 |
+ writeFemap(fedupRes, resultsFile) |
|
54 | 54 |
plotFemap( |
55 |
- gmt_file = gmt_file, |
|
56 |
- results_file = results_file, |
|
57 |
- qvalue = 0.05, |
|
58 |
- net_name = "FEDUP_EM", |
|
59 |
- net_file = net_file)} |
|
55 |
+ gmtFile=gmtFile, |
|
56 |
+ resultsFile=resultsFile, |
|
57 |
+ qvalue=0.05, |
|
58 |
+ netName="FEDUP_EM", |
|
59 |
+ netFile=netFile)} |
|
60 | 60 |
} |
... | ... |
@@ -3,47 +3,47 @@ |
3 | 3 |
\name{readPathways} |
4 | 4 |
\alias{readPathways} |
5 | 5 |
\title{Returns a list of pathways from various file formats. |
6 |
-Currently supports the following file format: GMT, TXT, XLSX.} |
|
6 |
+Currently supports the following file format: gmt, txt, xlsx.} |
|
7 | 7 |
\usage{ |
8 | 8 |
readPathways( |
9 |
- pathway_file, |
|
9 |
+ pathwayFile, |
|
10 | 10 |
header = FALSE, |
11 |
- pathway_col = NULL, |
|
12 |
- gene_col = NULL, |
|
13 |
- MIN_GENE = 1L, |
|
14 |
- MAX_GENE = Inf |
|
11 |
+ pathwayCol = NULL, |
|
12 |
+ geneCol = NULL, |
|
13 |
+ minGene = 1L, |
|
14 |
+ maxGene = Inf |
|
15 | 15 |
) |
16 | 16 |
} |
17 | 17 |
\arguments{ |
18 |
-\item{pathway_file}{(char) path to file with pathway annotations.} |
|
18 |
+\item{pathwayFile}{(char) path to file with pathway annotations.} |
|
19 | 19 |
|
20 |
-\item{header}{(logical) whether \code{pathway_file} has a header |
|
20 |
+\item{header}{(logical) whether \code{pathwayFile} has a header |
|
21 | 21 |
(default FALSE).} |
22 | 22 |
|
23 |
-\item{pathway_col}{(char) column name with pathway identifiers. |
|
23 |
+\item{pathwayCol}{(char) column name with pathway identifiers. |
|
24 | 24 |
For use with non-GMT input files (eg "Pathway.ID"; default NULL).} |
25 | 25 |
|
26 |
-\item{gene_col}{(char) column name with gene identifiers. |
|
26 |
+\item{geneCol}{(char) column name with gene identifiers. |
|
27 | 27 |
For use with non-GMT input files (eg "Gene.ID"; default NULL).} |
28 | 28 |
|
29 |
-\item{MIN_GENE}{(integer) minimum number of genes to be considered |
|
30 |
-in a pathway (default = 1).} |
|
29 |
+\item{minGene}{(integer) minimum number of genes to be considered |
|
30 |
+in a pathway (default 1).} |
|
31 | 31 |
|
32 |
-\item{MAX_GENE}{(integer) maximum number of genes to be considered |
|
33 |
-in a pathway (default = Inf).} |
|
32 |
+\item{maxGene}{(integer) maximum number of genes to be considered |
|
33 |
+in a pathway (default Inf).} |
|
34 | 34 |
} |
35 | 35 |
\value{ |
36 | 36 |
a list of vectors with pathway annotations. |
37 | 37 |
} |
38 | 38 |
\description{ |
39 | 39 |
Returns a list of pathways from various file formats. |
40 |
-Currently supports the following file format: GMT, TXT, XLSX. |
|
40 |
+Currently supports the following file format: gmt, txt, xlsx. |
|
41 | 41 |
} |
42 | 42 |
\examples{ |
43 | 43 |
pathways <- readPathways( |
44 | 44 |
system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", |
45 |
- package = "FEDUP"), MIN_GENE = 10, MAX_GENE = 500) |
|
45 |
+ package="FEDUP"), minGene=10, maxGene=500) |
|
46 | 46 |
pathways <- readPathways( |
47 |
- system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP"), |
|
48 |
- header = TRUE, pathway_col = "Enriched.GO.names", gene_col = "Gene.ID") |
|
47 |
+ system.file("extdata", "SAFE_terms.xlsx", package="FEDUP"), |
|
48 |
+ header=TRUE, pathwayCol="Enriched.GO.names", geneCol="Gene.ID") |
|
49 | 49 |
} |
... | ... |
@@ -4,12 +4,12 @@ |
4 | 4 |
\alias{runFedup} |
5 | 5 |
\title{Runs gene enrichment and depletion analysis for a list of pathways.} |
6 | 6 |
\usage{ |
7 |
-runFedup(test_gene, background_gene, pathways) |
|
7 |
+runFedup(testGene, backgroundGene, pathways) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 |
-\item{test_gene}{(char) vector of genes to use as test set.} |
|
10 |
+\item{testGene}{(char) vector of genes to use as test set.} |
|
11 | 11 |
|
12 |
-\item{background_gene}{(char) vector of genes to use as background set.} |
|
12 |
+\item{backgroundGene}{(char) vector of genes to use as background set.} |
|
13 | 13 |
|
14 | 14 |
\item{pathways}{(list) list of vectors with pathway annotations.} |
15 | 15 |
} |
... | ... |
@@ -20,14 +20,14 @@ tested pathways. Columns represent: |
20 | 20 |
\item pathway -- name of the pathway, corresponds to |
21 | 21 |
names(\code{pathways}); |
22 | 22 |
\item size -- size of the pathway; |
23 |
- \item real_frac -- fraction of \code{test_gene} members in pathway; |
|
24 |
- \item expected_frac -- fraction of \code{background_gene} members in |
|
23 |
+ \item real_frac -- fraction of \code{testGene} members in pathway; |
|
24 |
+ \item expected_frac -- fraction of \code{backgroundGene} members in |
|
25 | 25 |
pathway; |
26 | 26 |
\item fold_enrichment -- fold enrichment measure, |
27 | 27 |
evaluates as \code{real_frac} / \code{expected_frac}; |
28 | 28 |
\item status -- indicator that pathway is enriched or depleted for |
29 |
- \code{test_gene} members; |
|
30 |
- \item real_gene -- vector of \code{test_gene} gene members annotated |
|
29 |
+ \code{testGene} members; |
|
30 |
+ \item real_gene -- vector of \code{testGene} gene members annotated |
|
31 | 31 |
to \code{pathways}; |
32 | 32 |
\item pvalue -- enrichment p-value calculated via Fisher's exact test; |
33 | 33 |
\item qvalue -- BH-adjusted p-value |
... | ... |
@@ -4,13 +4,13 @@ |
4 | 4 |
\alias{writeFemap} |
5 | 5 |
\title{Writes an enrichment dataset file for use in Cytoscape EnrichmentMap.} |
6 | 6 |
\usage{ |
7 |
-writeFemap(df, results_file) |
|
7 |
+writeFemap(df, resultsFile) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{df}{(data.frame) table with FEDUP enrichment results. |
11 | 11 |
(see runFedup() for column descriptions)} |
12 | 12 |
|
13 |
-\item{results_file}{(char) name of output results file.} |
|
13 |
+\item{resultsFile}{(char) name of output results file.} |
|
14 | 14 |
} |
15 | 15 |
\value{ |
16 | 16 |
table of gene enrichment and depletion results formatted as a |
... | ... |
@@ -21,8 +21,7 @@ table of gene enrichment and depletion results formatted as a |
21 | 21 |
\item description -- pathway name or description; |
22 | 22 |
\item pvalue -- enrichment pvalue; |
23 | 23 |
\item qvalue -- BH-corrected pvalue; |
24 |
- \item status -- +1 or -1, to identify enrichment in either of the two |
|
25 |
- phenotypes being compared in the two-class analysis |
|
24 |
+ \item status -- +1 or -1, to identify enriched or depleted pathways |
|
26 | 25 |
(+1 maps to red, -1 maps to blue) |
27 | 26 |
} |
28 | 27 |
} |
... | ... |
@@ -33,7 +32,7 @@ Writes an enrichment dataset file for use in Cytoscape EnrichmentMap. |
33 | 32 |
data(testGene) |
34 | 33 |
data(backgroundGene) |
35 | 34 |
data(pathwaysGMT) |
36 |
-fedup_res <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
37 |
-results_file <- tempfile("fedup_res", fileext = ".txt") |
|
38 |
-writeFemap(fedup_res, results_file) |
|
35 |
+fedupRes <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
36 |
+resultsFile <- tempfile("fedupRes", fileext=".txt") |
|
37 |
+writeFemap(fedupRes, resultsFile) |
|
39 | 38 |
} |
... | ... |
@@ -4,12 +4,12 @@ |
4 | 4 |
\alias{writePathways} |
5 | 5 |
\title{Writes a set of pathways (list of vectors) to a GMT file.} |
6 | 6 |
\usage{ |
7 |
-writePathways(pathways, gmt_file) |
|
7 |
+writePathways(pathways, gmtFile) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{pathways}{(list) named list of vectors.} |
11 | 11 |
|
12 |
-\item{gmt_file}{(char) name of output GMT file.} |
|
12 |
+\item{gmtFile}{(char) name of output GMT file.} |
|
13 | 13 |
} |
14 | 14 |
\value{ |
15 | 15 |
GMT-formatted file. Rows represent pathways. Columns represent: |
... | ... |
@@ -24,5 +24,5 @@ Writes a set of pathways (list of vectors) to a GMT file. |
24 | 24 |
} |
25 | 25 |
\examples{ |
26 | 26 |
data(pathwaysXLSX) |
27 |
-writePathways(pathwaysXLSX, tempfile("pathwaysXLSX", fileext = ".gmt")) |
|
27 |
+writePathways(pathwaysXLSX, tempfile("pathwaysXLSX", fileext=".gmt")) |
|
28 | 28 |
} |
... | ... |
@@ -4,13 +4,13 @@ test_that("Test that writeFemap works", { |
4 | 4 |
data(testGene) |
5 | 5 |
data(backgroundGene) |
6 | 6 |
data(pathwaysGMT) |
7 |
- fedup_res <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
8 |
- results_file <- tempfile("fedup_res", fileext = ".txt") |
|
9 |
- writeFemap(fedup_res, results_file) |
|
10 |
- femap_res <- read.delim(results_file) |
|
7 |
+ fedupRes <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
8 |
+ resultsFile <- tempfile("fedupRes", fileext=".txt") |
|
9 |
+ writeFemap(fedupRes, resultsFile) |
|
10 |
+ femapRes <- read.delim(resultsFile) |
|
11 | 11 |
|
12 |
- expect_equal(nrow(fedup_res), nrow(femap_res)) |
|
13 |
- expect_true("status" %in% colnames(femap_res)) |
|
14 |
- expect_true(fedup_res[1,"status"] == "Enriched" && femap_res[1,"status"] == 1) |
|
15 |
- expect_true(fedup_res[1436,"status"] == "Depleted" && femap_res[1436,"status"] == -1) |
|
12 |
+ expect_equal(nrow(fedupRes), nrow(femapRes)) |
|
13 |
+ expect_true("status" %in% colnames(femapRes)) |
|
14 |
+ expect_true(fedupRes[1,"status"] == "Enriched" && femapRes[1,"status"] == 1) |
|
15 |
+ expect_true(fedupRes[1436,"status"] == "Depleted" && femapRes[1436,"status"] == -1) |
|
16 | 16 |
}) |
... | ... |
@@ -21,13 +21,13 @@ test_that("Test that FEDUP analysis works", { |
21 | 21 |
expect_false(length(testGene) > length(backgroundGene)) |
22 | 22 |
expect_true(is.list(pathwaysGMT)) |
23 | 23 |
|
24 |
- fedup_res <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
25 |
- expect_equal(fedup_res[1, real_frac], 100.00000) |
|
26 |
- expect_equal(fedup_res[1, qvalue], 1.567426e-186) |
|
27 |
- expect_true("NKX2-5" %in% fedup_res[,real_gene][[1]]) |
|
28 |
- expect_true(!"OR11A1" %in% fedup_res[,real_gene][[1]]) |
|
29 |
- expect_equal(fedup_res[1437, real_frac], 0.0000000) |
|
30 |
- expect_equal(fedup_res[1437, qvalue], 1.000000e+00) |
|
31 |
- expect_false("NKX2-5" %in% fedup_res[,real_gene][[1437]]) |
|
32 |
- expect_true(!"OR11A1" %in% fedup_res[,real_gene][[1437]]) |
|
24 |
+ fedupRes <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
25 |
+ expect_equal(fedupRes[1, real_frac], 100.00000) |
|
26 |
+ expect_equal(fedupRes[1, qvalue], 1.567426e-186) |
|
27 |
+ expect_true("NKX2-5" %in% fedupRes[,real_gene][[1]]) |
|
28 |
+ expect_true(!"OR11A1" %in% fedupRes[,real_gene][[1]]) |
|
29 |
+ expect_equal(fedupRes[1437, real_frac], 0.0000000) |
|
30 |
+ expect_equal(fedupRes[1437, qvalue], 1.000000e+00) |
|
31 |
+ expect_false("NKX2-5" %in% fedupRes[,real_gene][[1437]]) |
|
32 |
+ expect_true(!"OR11A1" %in% fedupRes[,real_gene][[1437]]) |
|
33 | 33 |
}) |
... | ... |
@@ -4,54 +4,54 @@ test_that("Test that readPathways stops without proper inputs", { |
4 | 4 |
expect_error(readPathways("test.123.xls")) |
5 | 5 |
expect_error(readPathways("test.gmt.123")) |
6 | 6 |
|
7 |
- pathway_file <- system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP") |
|
7 |
+ pathwayFile <- system.file("extdata", "SAFE_terms.xlsx", package="FEDUP") |
|
8 | 8 |
expect_error(readPathways( |
9 |
- pathway_file, header = TRUE, |
|
10 |
- pathway_col = "Enriched.GO.names", gene_col = "oops")) |
|
9 |
+ pathwayFile, header=TRUE, |
|
10 |
+ pathwayCol="Enriched.GO.names", geneCol="oops")) |
|
11 | 11 |
expect_error(readPathways( |
12 |
- pathway_file, header = TRUE, |
|
13 |
- pathway_col = "oops", gene_col = "Gene.ID")) |
|
12 |
+ pathwayFile, header=TRUE, |
|
13 |
+ pathwayCol="oops", geneCol="Gene.ID")) |
|
14 | 14 |
expect_error(readPathways( |
15 |
- pathway_file, header = TRUE, MIN_GENE = 500, |
|
16 |
- pathway_col = "Enriched.GO.names", gene_col = "Gene.ID")) |
|
15 |
+ pathwayFile, header=TRUE, minGene=500, |
|
16 |
+ pathwayCol="Enriched.GO.names", geneCol="Gene.ID")) |
|
17 | 17 |
}) |
18 | 18 |
|
19 | 19 |
test_that("Test that readPathways works with GMT input", { |
20 |
- pathway_file <- system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", package = "FEDUP") |
|
21 |
- supported_types <- c("gmt", "txt", "xlsx") |
|
22 |
- file_type <- sub(".*\\.", "", pathway_file) |
|
23 |
- expect_true(file_type %in% supported_types) |
|
20 |
+ pathwayFile <- system.file("extdata", "Human_Reactome_November_17_2020_symbol.gmt", package="FEDUP") |
|
21 |
+ s <- c("gmt", "txt", "xlsx") |
|
22 |
+ f <- sub(".*\\.", "", pathwayFile) |
|
23 |
+ expect_true(f %in% s) |
|
24 | 24 |
|
25 |
- pathways <- readPathways(pathway_file, MIN_GENE = 10, MAX_GENE = 500) |
|
25 |
+ pathways <- readPathways(pathwayFile, minGene=10, maxGene=500) |
|
26 | 26 |
expect_true(is.list(pathways)) |
27 | 27 |
expect_equal(length(pathways), 1437) |
28 | 28 |
expect_false(any(duplicated(names(pathways)))) |
29 |
- expect_equal(length(readPathways(pathway_file, MIN_GENE = 10, MAX_GENE = 500, header = TRUE)), 1436) |
|
29 |
+ expect_equal(length(readPathways(pathwayFile, minGene=10, maxGene=500, header=TRUE)), 1436) |
|
30 | 30 |
}) |
31 | 31 |
|
32 | 32 |
test_that("Test that readPathways works with XLSX input", { |
33 |
- pathway_file <- system.file("extdata", "SAFE_terms.xlsx", package = "FEDUP") |
|
34 |
- supported_types <- c("gmt", "txt", "xlsx") |
|
35 |
- file_type <- sub(".*\\.", "", pathway_file) |
|
36 |
- expect_true(file_type %in% supported_types) |
|
33 |
+ pathwayFile <- system.file("extdata", "SAFE_terms.xlsx", package="FEDUP") |
|
34 |
+ s <- c("gmt", "txt", "xlsx") |
|
35 |
+ f <- sub(".*\\.", "", pathwayFile) |
|
36 |
+ expect_true(f %in% s) |
|
37 | 37 |
|
38 | 38 |
pathways <- readPathways( |
39 |
- pathway_file, header = TRUE, |
|
40 |
- pathway_col = "Enriched.GO.names", gene_col = "Gene.ID") |
|
39 |
+ pathwayFile, header=TRUE, |
|
40 |
+ pathwayCol="Enriched.GO.names", geneCol="Gene.ID") |
|
41 | 41 |
expect_true(is.list(pathways)) |
42 | 42 |
expect_equal(length(pathways), 30) |
43 | 43 |
expect_false(any(duplicated(names(pathways)))) |
44 | 44 |
}) |
45 | 45 |
|
46 | 46 |
test_that("Test that readPathways works with TXT input", { |
47 |
- pathway_file <- system.file("extdata", "SAFE_terms.txt", package = "FEDUP") |
|
48 |
- supported_types <- c("gmt", "txt", "xlsx") |
|
49 |
- file_type <- sub(".*\\.", "", pathway_file) |
|
50 |
- expect_true(file_type %in% supported_types) |
|
47 |
+ pathwayFile <- system.file("extdata", "SAFE_terms.txt", package="FEDUP") |
|
48 |
+ s <- c("gmt", "txt", "xlsx") |
|
49 |
+ f <- sub(".*\\.", "", pathwayFile) |
|
50 |
+ expect_true(f %in% s) |
|
51 | 51 |
|
52 | 52 |
pathways <- readPathways( |
53 |
- pathway_file, header = TRUE, |
|
54 |
- pathway_col = "Enriched.GO.names", gene_col = "Gene.ID") |
|
53 |
+ pathwayFile, header=TRUE, |
|
54 |
+ pathwayCol="Enriched.GO.names", geneCol="Gene.ID") |
|
55 | 55 |
expect_true(is.list(pathways)) |
56 | 56 |
expect_equal(length(pathways), 30) |
57 | 57 |
expect_false(any(duplicated(names(pathways)))) |
... | ... |
@@ -59,10 +59,10 @@ test_that("Test that readPathways works with TXT input", { |
59 | 59 |
|
60 | 60 |
test_that("Test that writePathways works", { |
61 | 61 |
data(pathwaysXLSX) |
62 |
- gmt_file <- tempfile("pathwaysXLSX", fileext = ".gmt") |
|
62 |
+ gmtFile <- tempfile("pathwaysXLSX", fileext=".gmt") |
|
63 | 63 |
|
64 |
- writePathways(pathwaysXLSX, gmt_file) |
|
65 |
- pathways <- readPathways(gmt_file, header = FALSE) |
|
64 |
+ writePathways(pathwaysXLSX, gmtFile) |
|
65 |
+ pathways <- readPathways(gmtFile, header=FALSE) |
|
66 | 66 |
|
67 | 67 |
expect_equal(length(pathwaysXLSX), length(pathways)) |
68 | 68 |
expect_true(is.list(pathways)) |
... | ... |
@@ -4,22 +4,22 @@ test_that("Test that plotDotPlot works", { |
4 | 4 |
data(testGene) |
5 | 5 |
data(backgroundGene) |
6 | 6 |
data(pathwaysGMT) |
7 |
- fedup_res <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
8 |
- fedup_enr <- head(fedup_res[with(fedup_res, which(status == "Enriched")),], 10) |
|
9 |
- fedup_dep <- head(fedup_res[with(fedup_res, which(status == "Depleted")),], 10) |
|
10 |
- fedup_plot <- rbind(fedup_enr, fedup_dep) |
|
11 |
- fedup_plot$log10fdr <- -log10(fedup_plot$fdr + 1e-10) # log10-transform FDR for plotting |
|
12 |
- fedup_plot$pathway <- gsub("\\%.*", "", fedup_plot$pathway) # clean pathway names |
|
13 |
- temp <- tempfile("plot", fileext = ".png") |
|
14 |
- png(filename = temp, width = 2750, height = 1600, res = 300) |
|
15 |
- plotDotPlot(df = fedup_plot, |
|
16 |
- x_var = "log10fdr", |
|
17 |
- y_var = "pathway", |
|
18 |
- x_lab = "-log10(FDR)", |
|
19 |
- fill_var = "status", |
|
20 |
- fill_lab = "Enrichment status", |
|
21 |
- size_var = "fold_enrichment", |
|
22 |
- size_lab = "Fold enrichment") |
|
7 |
+ fedupRes <- runFedup(testGene, backgroundGene, pathwaysGMT) |
|
8 |
+ fedupEnr <- head(fedupRes[with(fedupRes, which(status == "Enriched")),], 10) |
|
9 |
+ fedupDep <- head(fedupRes[with(fedupRes, which(status == "Depleted")),], 10) |
|
10 |
+ fedupPlot <- rbind(fedupEnr, fedupDep) |
|
11 |
+ fedupPlot$log10fdr <- -log10(fedupPlot$fdr + 1e-10) # log10-transform FDR for plotting |
|
12 |
+ fedupPlot$pathway <- gsub("\\%.*", "", fedupPlot$pathway) # clean pathway names |
|
13 |
+ temp <- tempfile("plot", fileext=".png") |
|
14 |
+ png(filename=temp, width=2750, height=1600, res=300) |
|
15 |
+ plotDotPlot(df=fedupPlot, |
|
16 |
+ xVar="log10fdr", |
|
17 |
+ yVar="pathway", |
|
18 |
+ xLab="-log10(FDR)", |
|
19 |
+ fillVar="status", |
|
20 |
+ fillLab="Enrichment status", |
|
21 |
+ sizeVar="fold_enrichment", |
|
22 |
+ sizeLab="Fold enrichment") |
|
23 | 23 |
dev.off() |
24 | 24 |
expect_true(TRUE) |
25 | 25 |
}) |