Browse code

[FIX] Fixed minor issues in CIS_grubbs, CIS_volcano and added general function to identify file extension

GiuliaPais authored on 03/03/2021 15:08:21
Showing6 changed files

... ...
@@ -771,15 +771,35 @@ CIS_grubbs <- function(x,
771 771
     stopifnot(is.logical(add_standard_padjust) &
772 772
         length(add_standard_padjust) == 1)
773 773
     stopifnot(file.exists(genomic_annotation_file))
774
+    # Determine file extension
775
+    ext <- .check_file_extension(genomic_annotation_file)
776
+
774 777
     # Try to import annotation file
775
-    refgenes <- read.csv(
776
-        file = genomic_annotation_file,
777
-        header = TRUE, fill = TRUE, sep = "\t",
778
-        check.names = FALSE,
779
-        na.strings = c("NONE", "NA", "NULL", "NaN", "")
780
-    )
781
-    refgenes <- tibble::as_tibble(refgenes) %>%
782
-        dplyr::mutate(chrom = stringr::str_replace_all(.data$chrom, "chr", ""))
778
+    if (ext == "tsv") {
779
+        refgenes <- read.csv(
780
+            file = genomic_annotation_file,
781
+            header = TRUE, fill = TRUE, sep = "\t",
782
+            check.names = FALSE,
783
+            na.strings = c("NONE", "NA", "NULL", "NaN", "")
784
+        )
785
+        refgenes <- tibble::as_tibble(refgenes) %>%
786
+            dplyr::mutate(chrom = stringr::str_replace_all(.data$chrom,
787
+                                                           "chr", ""))
788
+    } else if (ext == "csv") {
789
+        refgenes <- read.csv(
790
+            file = genomic_annotation_file,
791
+            header = TRUE, fill = TRUE,
792
+            check.names = FALSE,
793
+            na.strings = c("NONE", "NA", "NULL", "NaN", "")
794
+        )
795
+        refgenes <- tibble::as_tibble(refgenes) %>%
796
+            dplyr::mutate(chrom = stringr::str_replace_all(.data$chrom,
797
+                                                           "chr", ""))
798
+    } else {
799
+        stop(paste("The genomic annotation file must be either in",
800
+                   ".tsv or .csv format (compressed or not)"))
801
+    }
802
+
783 803
     # Check annotation file format
784 804
     if (!all(c(
785 805
         "name2", "chrom", "strand", "min_txStart", "max_txEnd",
... ...
@@ -811,9 +831,11 @@ CIS_grubbs <- function(x,
811 831
             median_bp_integration_locus =
812 832
                 stats::median(.data$integration_locus),
813 833
             distinct_orientations = dplyr::n_distinct(.data$strand),
814
-            describe = psych::describe(.data$integration_locus),
834
+            describe = list(tibble::as_tibble(
835
+                psych::describe(.data$integration_locus))),
815 836
             .groups = "drop"
816
-        )
837
+        ) %>%
838
+        tidyr::unnest(.data$describe, keep_empty = TRUE, names_sep = "_")
817 839
 
818 840
     df_bygene_withannotation <- df_by_gene %>%
819 841
         dplyr::inner_join(refgenes, by = c(
... ...
@@ -3,6 +3,25 @@
3 3
 #------------------------------------------------------------------------------#
4 4
 ## All functions in this file are NOT exported, to be used internally only.
5 5
 
6
+#### ---- Internals for multiple functions/general purpose ----####
7
+# Returns the file format for each of the file paths passed as a parameter.
8
+#' @importFrom fs path_ext
9
+#' @importFrom tools file_path_sans_ext
10
+#' @importFrom purrr is_empty
11
+.check_file_extension <- function(file_path) {
12
+    compressed_formats <- c("gz", "bz2", "xz", "zip")
13
+    ## Get last portion of file name
14
+    last <- fs::path_ext(file_path)
15
+    compressed <- which(last %in% compressed_formats)
16
+    if (!purrr::is_empty(compressed)) {
17
+        ## for compressed files try extracting the true extension
18
+        file_path[compressed] <- tools::file_path_sans_ext(
19
+            file_path[compressed])
20
+        last <- fs::path_ext(file_path)
21
+    }
22
+    return(last)
23
+}
24
+
6 25
 #### ---- Internals for checks on integration matrices----####
7 26
 
8 27
 # Internal helper function for checking mandatory vars presence in x.
... ...
@@ -2945,14 +2964,14 @@
2945 2964
     species) {
2946 2965
     if (!file.exists(onco_db_file)) {
2947 2966
         stop(paste(
2948
-            "`onco_db_file` was not found, check you provided the",
2967
+            "`onco_db_file` was not found, check if you provided the",
2949 2968
             "correct path for the file"
2950 2969
         ))
2951 2970
     }
2952 2971
     if (!file.exists(tumor_suppressors_db_file)) {
2953 2972
         stop(paste(
2954 2973
             "`tumor_suppressors_db_file` was not found,",
2955
-            "check you provided the",
2974
+            "check if you provided the",
2956 2975
             "correct path for the file"
2957 2976
         ))
2958 2977
     }
... ...
@@ -1005,13 +1005,18 @@
1005 1005
     } else {
1006 1006
         htmltools::p()
1007 1007
     }
1008
-    additional_info <- if (!is.null(add_info)) {
1009
-        .add_info_widget(add_info)
1010
-    } else {
1008
+    additional_info <- if (is.null(add_info) || nrow(add_info) == 0) {
1011 1009
         htmltools::p()
1010
+    } else {
1011
+        .add_info_widget(add_info)
1012 1012
     }
1013 1013
     post_join_info <- .sc_stats_input_joined(input_joined_df, quant_cols)
1014
-    pre_process_matrix <- .generate_react_table(input_df[-missing, ])
1014
+    ppm <- if (length(missing) > 0) {
1015
+        input_df[-missing, ]
1016
+    } else {
1017
+        input_df
1018
+    }
1019
+    pre_process_matrix <- .generate_react_table(ppm)
1015 1020
     pre_sharing <- .sharing_widget(input_joined_df, "PRE-PROCESSING")
1016 1021
     summary_w <- .generate_react_table(summary)
1017 1022
 
... ...
@@ -17,7 +17,7 @@
17 17
 #' ## Oncogene and tumor suppressor genes files
18 18
 #' These files are included in the package for user convenience and are
19 19
 #' simply UniProt files with gene annotations for human and mouse.
20
-#' For more details on how this files were generated use the help `?filname`
20
+#' For more details on how this files were generated use the help `?filename`
21 21
 #' function.
22 22
 #'
23 23
 #' ## Known oncogenes
... ...
@@ -98,7 +98,8 @@ CIS_volcano_plot <- function(x,
98 98
     significance_threshold = 0.05,
99 99
     annotation_threshold_ontots = 0.1,
100 100
     facet_rows = NULL,
101
-    facet_cols = NULL) {
101
+    facet_cols = NULL,
102
+    ProjectID = NULL) {
102 103
     stopifnot(is.data.frame(x))
103 104
     stopifnot(is.character(onco_db_file) & length(onco_db_file) == 1)
104 105
     stopifnot(is.character(tumor_suppressors_db_file) &
... ...
@@ -112,6 +113,11 @@ CIS_volcano_plot <- function(x,
112 113
     stopifnot(is.numeric(annotation_threshold_ontots) |
113 114
         is.integer(annotation_threshold_ontots) &
114 115
             length(annotation_threshold_ontots) == 1)
116
+    stopifnot(is.null(ProjectID) || (is.character(ProjectID) &
117
+                                         length(ProjectID == 1)))
118
+    if (is.null(ProjectID)) {
119
+        ProjectID <- ""
120
+    }
115 121
     ## Load onco and ts
116 122
     oncots_to_use <- .load_onco_ts_genes(
117 123
         onco_db_file,
... ...
@@ -176,10 +182,10 @@ CIS_volcano_plot <- function(x,
176 182
     plot_cis_fdr_slice <- ggplot2::ggplot(
177 183
         data = cis_grubbs_df,
178 184
         ggplot2::aes(
179
-            y = .data$minus_log_p_fdr,
180
-            x = .data$neg_zscore_minus_log2_int_freq_tolerance,
181
-            color = .data$KnownGeneClass,
182
-            fill = .data$KnownGeneClass
185
+            y = minus_log_p_fdr,
186
+            x = neg_zscore_minus_log2_int_freq_tolerance,
187
+            color = KnownGeneClass,
188
+            fill = KnownGeneClass
183 189
         ),
184 190
         na.rm = TRUE, se = TRUE
185 191
     ) +
... ...
@@ -190,16 +196,15 @@ CIS_volcano_plot <- function(x,
190 196
         ) +
191 197
         ggplot2::scale_y_continuous(limits = c(0, max(c(
192 198
             (significance_threshold_minus_log_p + 0.5),
193
-            max(.data$minus_log_p_fdr, na.rm = TRUE)
194
-        )))) +
199
+            max(cis_grubbs_df$minus_log_p_fdr, na.rm = TRUE)
200
+        ), na.rm = TRUE))) +
195 201
         ggplot2::scale_x_continuous(breaks = seq(-4, 4, 2)) +
196
-        ggplot2::facet_grid(rows = {{ facet_rows }}, cols = {{ facet_cols }}) +
197 202
         ggrepel::geom_label_repel(
198 203
             data = dplyr::filter(
199 204
                 cis_grubbs_df,
200
-                .data$tdist_fdr < significance_threshold
205
+                tdist_fdr < significance_threshold
201 206
             ),
202
-            ggplot2::aes(label = .data$GeneName),
207
+            ggplot2::aes(label = GeneName),
203 208
             box.padding = ggplot2::unit(0.35, "lines"),
204 209
             point.padding = ggplot2::unit(0.3, "lines"),
205 210
             color = "white",
... ...
@@ -226,10 +231,10 @@ CIS_volcano_plot <- function(x,
226 231
             axis.text.y = ggplot2::element_text(size = 16),
227 232
             axis.title = ggplot2::element_text(size = 16),
228 233
             plot.title = ggplot2::element_text(size = 20)
229
-        ) +
230
-        ggplot2::labs(list(
234
+         ) +
235
+        ggplot2::labs(
231 236
             title = paste(
232
-                .data$ProjectID,
237
+                ProjectID,
233 238
                 "- Volcano plot of IS gene frequency and",
234 239
                 "CIS results"
235 240
             ),
... ...
@@ -246,7 +251,20 @@ CIS_volcano_plot <- function(x,
246 251
                 "labeled as 'Other'). Annotated if P-value > ",
247 252
                 round(annotation_threshold_ontots_log, 3)
248 253
             )
249
-        ))
254
+        )
255
+    if (!is.null(facet_rows) & !is.null(facet_cols)) {
256
+        plot_cis_fdr_slice <- plot_cis_fdr_slice +
257
+            ggplot2::facet_grid(rows = {{ facet_rows }},
258
+                                cols = {{ facet_cols }})
259
+    } else if (!is.null(facet_rows)) {
260
+        plot_cis_fdr_slice <- plot_cis_fdr_slice +
261
+            ggplot2::facet_wrap({{ facet_rows }}, nrow = length(facet_rows),
262
+                                ncol = 1)
263
+    } else if (!is.null(facet_cols)) {
264
+        plot_cis_fdr_slice <- plot_cis_fdr_slice +
265
+            ggplot2::facet_wrap({{ facet_cols }}, ncol = length(facet_cols),
266
+                                nrow = 1)
267
+    }
250 268
     return(plot_cis_fdr_slice)
251 269
 }
252 270
 
... ...
@@ -72,7 +72,7 @@
72 72
 #' )
73 73
 compute_near_integrations <- function(x,
74 74
     threshold = 4,
75
-    keep_criteria = "keep_first",
75
+    keep_criteria = "max_value",
76 76
     strand_specific = TRUE,
77 77
     max_value_column = "seqCount",
78 78
     map_as_widget = TRUE,
79 79
new file mode 100644
... ...
@@ -0,0 +1,42 @@
1
+context("Internal general purpose")
2
+
3
+library(ISAnalytics)
4
+
5
+#------------------------------------------------------------------------------#
6
+# Global vars
7
+#------------------------------------------------------------------------------#
8
+sample_xz_file <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
9
+    package = "ISAnalytics"
10
+)
11
+sample_zip_file <- system.file("extdata", "fs.zip",
12
+    package = "ISAnalytics"
13
+)
14
+sample_tsv_file <- system.file("extdata", "ex_association_file.tsv",
15
+    package = "ISAnalytics"
16
+)
17
+
18
+#------------------------------------------------------------------------------#
19
+# Test .check_file_extension
20
+#------------------------------------------------------------------------------#
21
+test_that(".check_file_extension works with compressed file", {
22
+    expect_equal(.check_file_extension(sample_xz_file), "tsv")
23
+})
24
+
25
+test_that(".check_file_extension works with compressed folder", {
26
+    expect_equal(.check_file_extension(sample_zip_file), "")
27
+})
28
+
29
+test_that(".check_file_extension works with non comp file", {
30
+    expect_equal(.check_file_extension(sample_tsv_file), "tsv")
31
+})
32
+
33
+test_that(".check_file_extension works with multiple input", {
34
+  checks <- .check_file_extension(
35
+    c(
36
+      sample_xz_file,
37
+      sample_zip_file,
38
+      sample_tsv_file
39
+    ))
40
+  expected <- c("tsv", "", "tsv")
41
+  expect_equal(checks, expected)
42
+})