Browse code

[U] Update to 1.7.3

Giulia Pais authored on 17/06/2022 14:47:19
Showing33 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: ISAnalytics
2 2
 Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies
3
-Version: 1.7.2
3
+Version: 1.7.3
4 4
 Date: 2020-07-03
5 5
 Authors@R: c(
6 6
   person(given = "Andrea",
... ...
@@ -78,7 +78,8 @@ Suggests:
78 78
     gtools,
79 79
     eulerr,
80 80
     openxlsx,
81
-    jsonlite
81
+    jsonlite,
82
+    pheatmap
82 83
 VignetteBuilder: knitr
83 84
 RdMacros: 
84 85
     lifecycle
... ...
@@ -1,6 +1,7 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3 3
 export(CIS_grubbs)
4
+export(CIS_grubbs_overtime)
4 5
 export(CIS_volcano_plot)
5 6
 export(HSC_population_plot)
6 7
 export(HSC_population_size_estimate)
... ...
@@ -76,6 +77,7 @@ export(sharing_heatmap)
76 77
 export(sharing_venn)
77 78
 export(threshold_filter)
78 79
 export(top_abund_tableGrob)
80
+export(top_cis_overtime_heatmap)
79 81
 export(top_integrations)
80 82
 export(top_targeted_genes)
81 83
 export(transform_columns)
... ...
@@ -2,6 +2,20 @@
2 2
 title: "NEWS"
3 3
 output: github_document
4 4
 ---
5
+# ISAnalytics 1.7.3 (2022-06-17)
6
+
7
+## BUG FIXES AND MINOR CHANGES
8
+
9
+* All functions that check for options now have a default value if option is
10
+not set
11
+* `CIS_grubbs` function is now faster (removed dependency from 
12
+`psych::describe`)
13
+
14
+## NEW
15
+
16
+* New functions `CIS_grubbs_overtime()` and associated plotting function
17
+`top_cis_overtime_heatmap()` to compute CIS_grubbs test over time
18
+
5 19
 # ISAnalytics 1.7.2 (2022-05-23)
6 20
 
7 21
 ## BUG FIXES AND MINOR CHANGES
... ...
@@ -1,6 +1,21 @@
1 1
 NEWS
2 2
 ================
3 3
 
4
+# ISAnalytics 1.7.3 (2022-06-17)
5
+
6
+## BUG FIXES AND MINOR CHANGES
7
+
8
+-   All functions that check for options now have a default value if
9
+    option is not set
10
+-   `CIS_grubbs` function is now faster (removed dependency from
11
+    `psych::describe`)
12
+
13
+## NEW
14
+
15
+-   New functions `CIS_grubbs_overtime()` and associated plotting
16
+    function `top_cis_overtime_heatmap()` to compute CIS_grubbs test
17
+    over time
18
+
4 19
 # ISAnalytics 1.7.2 (2022-05-23)
5 20
 
6 21
 ## BUG FIXES AND MINOR CHANGES
... ...
@@ -700,7 +700,7 @@ gene_frequency_fisher <- function(cis_x,
700 700
         purrr::reduce(cis_mod, ~ dplyr::inner_join(.x, .y, by = cols_for_join))
701 701
     }
702 702
     if (nrow(merged) == 0) {
703
-        if (getOption("ISAnalytics.verbose") == TRUE) {
703
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
704 704
             msg <- c("Data frame empty after filtering",
705 705
                 i = paste(
706 706
                     "Data frame is empty after applying filter on IS,",
... ...
@@ -778,7 +778,7 @@ gene_frequency_fisher <- function(cis_x,
778 778
             dplyr::filter(.data$to_exclude_from_test == FALSE) %>%
779 779
             dplyr::select(-.data$to_exclude_from_test)
780 780
         if (nrow(merged) == 0) {
781
-            if (getOption("ISAnalytics.verbose") == TRUE) {
781
+            if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
782 782
                 msg <- c("Data frame empty after filtering",
783 783
                     i = paste(
784 784
                         "Data frame is after removing unbalanced IS,",
... ...
@@ -951,7 +951,7 @@ sample_statistics <- function(x,
951 951
             result <- result %>%
952 952
                 dplyr::left_join(nIS, by = sample_key)
953 953
         } else {
954
-            if (getOption("ISAnalytics.verbose")) {
954
+            if (getOption("ISAnalytics.verbose", TRUE)) {
955 955
                 rlang::inform(.inform_skip_count_is())
956 956
             }
957 957
         }
... ...
@@ -1015,7 +1015,7 @@ sample_statistics <- function(x,
1015 1015
 #' ("SubjectID" column must be included in the input data frame).
1016 1016
 #' @param return_missing_as_df Returns those genes present in the input df
1017 1017
 #' but not in the refgenes as a data frame?
1018
-#' @param results_as_list Relevant only if `by` is not `NULL` - if `TRUE`
1018
+#' @param results_as_list If `TRUE`
1019 1019
 #' return the group computations as a named list, otherwise return a single
1020 1020
 #' df with an additional column containing the group id
1021 1021
 #'
... ...
@@ -1037,172 +1037,229 @@ CIS_grubbs <- function(x,
1037 1037
     by = NULL,
1038 1038
     return_missing_as_df = TRUE,
1039 1039
     results_as_list = TRUE) {
1040
-    ## Check x has the correct structure
1041
-    stopifnot(is.data.frame(x))
1042
-    ## Check dyn vars for required tags
1043
-    req_mand_vars <- .check_required_cols(c("chromosome", "locus"),
1044
-        vars_df = mandatory_IS_vars(TRUE),
1045
-        duplicate_politic = "error"
1046
-    )
1047
-    chrom_col <- req_mand_vars %>%
1048
-        dplyr::filter(.data$tag == "chromosome") %>%
1049
-        dplyr::pull(.data$names)
1050
-    locus_col <- req_mand_vars %>%
1051
-        dplyr::filter(.data$tag == "locus") %>%
1052
-        dplyr::pull(.data$names)
1053
-    strand_col <- if ("is_strand" %in% mandatory_IS_vars(TRUE)$tag) {
1054
-        col <- mandatory_IS_vars(TRUE) %>%
1055
-            dplyr::filter(.data$tag == "is_strand") %>%
1056
-            dplyr::pull(.data$names)
1057
-        if (col %in% colnames(x)) {
1058
-            col
1059
-        } else {
1060
-            NULL
1061
-        }
1062
-    } else {
1063
-        NULL
1064
-    }
1065
-    req_annot_col <- .check_required_cols(
1066
-        list(gene_symbol = "char", gene_strand = "char"),
1067
-        vars_df = annotation_IS_vars(TRUE),
1068
-        "error"
1040
+    res_checks <- .cis_param_check(
1041
+        x, genomic_annotation_file,
1042
+        grubbs_flanking_gene_bp,
1043
+        threshold_alpha,
1044
+        return_missing_as_df
1069 1045
     )
1070
-    gene_symbol_col <- req_annot_col %>%
1071
-        dplyr::filter(.data$tag == "gene_symbol") %>%
1072
-        dplyr::pull(.data$names)
1073
-    gene_strand_col <- req_annot_col %>%
1074
-        dplyr::filter(.data$tag == "gene_strand") %>%
1075
-        dplyr::pull(.data$names)
1076
-    cols_required <- c(chrom_col, locus_col, gene_symbol_col, gene_strand_col)
1077
-    if (!all(cols_required %in% colnames(x))) {
1078
-        rlang::abort(.missing_user_cols_error(
1079
-            cols_required[!cols_required %in% colnames(x)]
1080
-        ))
1081
-    }
1082
-    # Check other parameters
1083
-    stopifnot(is.data.frame(genomic_annotation_file) ||
1084
-        is.character(genomic_annotation_file))
1085
-    genomic_annotation_file <- genomic_annotation_file[1]
1086
-    if (is.character(genomic_annotation_file) &&
1087
-        !genomic_annotation_file %in% c("hg19", "mm9")) {
1088
-        err_msg <- c("Genomic annotation file unknown",
1089
-            x = paste(
1090
-                "Since ISAnalytics 1.5.4, if provided as",
1091
-                "character vector, `genomic_annotation_file`",
1092
-                "parameter must be one of 'hg19' or 'mm9'"
1093
-            ),
1094
-            i = paste(
1095
-                "For using other genome reference files",
1096
-                "import them in the R environment and pass",
1097
-                "them to the function"
1098
-            )
1099
-        )
1100
-        rlang::abort(err_msg, class = "genomic_file_char")
1101
-    }
1102
-    if (is.character(genomic_annotation_file)) {
1103
-        gen_file <- paste0("refGenes_", genomic_annotation_file)
1104
-        utils::data(list = gen_file, envir = rlang::current_env())
1105
-        refgenes <- rlang::eval_tidy(rlang::sym(gen_file))
1106
-    } else {
1107
-        # Check annotation file format
1108
-        refgenes <- genomic_annotation_file
1109
-        if (!all(refGene_table_cols() %in% colnames(refgenes))) {
1110
-            rlang::abort(.non_standard_annotation_structure())
1111
-        }
1112
-        refgenes <- tibble::as_tibble(refgenes) %>%
1113
-            dplyr::mutate(chrom = stringr::str_replace_all(
1114
-                .data$chrom,
1115
-                "chr", ""
1116
-            ))
1117
-    }
1118
-    stopifnot(is.numeric(grubbs_flanking_gene_bp) ||
1119
-        is.integer(grubbs_flanking_gene_bp))
1120
-    grubbs_flanking_gene_bp <- grubbs_flanking_gene_bp[1]
1121
-    stopifnot(is.numeric(threshold_alpha))
1122
-    threshold_alpha <- threshold_alpha[1]
1123 1046
     stopifnot(is.null(by) || is.character(by))
1124 1047
     if (!all(by %in% colnames(x))) {
1125 1048
         rlang::abort(.missing_user_cols_error(by[!by %in% colnames(x)]))
1126 1049
     }
1127
-    stopifnot(is.logical(return_missing_as_df))
1128
-    return_missing_as_df <- return_missing_as_df[1]
1050
+    result <- list()
1051
+    join_ref_res <- .cis_join_ref(x, res_checks)
1052
+    result <- append(result, join_ref_res[
1053
+        names(join_ref_res) %in% c("missing_genes", "missing_is")
1054
+    ])
1129 1055
     if (is.null(by)) {
1130
-        result <- .cis_grubb_calc(
1131
-            x = x,
1132
-            refgenes = refgenes,
1133
-            grubbs_flanking_gene_bp = grubbs_flanking_gene_bp,
1134
-            threshold_alpha = threshold_alpha,
1135
-            gene_symbol_col = gene_symbol_col,
1136
-            gene_strand_col = gene_strand_col,
1137
-            chr_col = chrom_col, locus_col = locus_col,
1138
-            strand_col = strand_col
1056
+        cis <- .cis_grubb_calc(
1057
+            x = join_ref_res$joint_ref,
1058
+            grubbs_flanking_gene_bp = res_checks$grubbs_flanking_gene_bp,
1059
+            threshold_alpha = res_checks$threshold_alpha,
1060
+            gene_symbol_col = res_checks$gene_symbol_col,
1061
+            gene_strand_col = res_checks$gene_strand_col,
1062
+            chr_col = res_checks$chrom_col, locus_col = res_checks$locus_col,
1063
+            strand_col = res_checks$strand_col
1139 1064
         )
1140
-        missing_df <- result$missing
1141 1065
     } else {
1142
-        grouped <- x %>%
1066
+        grouped <- join_ref_res$joint_ref %>%
1143 1067
             dplyr::group_by(dplyr::across(dplyr::all_of(by)))
1144
-        group_keys <- grouped %>%
1068
+        group_ks <- grouped %>%
1145 1069
             dplyr::group_keys() %>%
1146 1070
             tidyr::unite(col = "id", dplyr::everything()) %>%
1147 1071
             dplyr::pull(.data$id)
1148
-        split <- x %>%
1149
-            dplyr::group_by(dplyr::across(dplyr::all_of(by))) %>%
1072
+        split <- grouped %>%
1150 1073
             dplyr::group_split() %>%
1151
-            purrr::set_names(group_keys)
1152
-        result <- purrr::map(split, ~ .cis_grubb_calc(
1074
+            purrr::set_names(group_ks)
1075
+        cis <- purrr::map(split, ~ .cis_grubb_calc(
1153 1076
             x = .x,
1154
-            refgenes = refgenes,
1155
-            grubbs_flanking_gene_bp = grubbs_flanking_gene_bp,
1156
-            threshold_alpha = threshold_alpha,
1157
-            gene_symbol_col = gene_symbol_col,
1158
-            gene_strand_col = gene_strand_col,
1159
-            chr_col = chrom_col, locus_col = locus_col,
1160
-            strand_col = strand_col
1077
+            grubbs_flanking_gene_bp = res_checks$grubbs_flanking_gene_bp,
1078
+            threshold_alpha = res_checks$threshold_alpha,
1079
+            gene_symbol_col = res_checks$gene_symbol_col,
1080
+            gene_strand_col = res_checks$gene_strand_col,
1081
+            chr_col = res_checks$chrom_col, locus_col = res_checks$locus_col,
1082
+            strand_col = res_checks$strand_col
1161 1083
         ))
1162
-        missing_df <- purrr::map(result, ~ .x$missing) %>%
1163
-            purrr::reduce(dplyr::bind_rows) %>%
1164
-            dplyr::distinct()
1084
+        if (!results_as_list) {
1085
+            cis <- purrr::map2(cis, names(cis), ~ {
1086
+                .x %>%
1087
+                    dplyr::mutate(group = .y)
1088
+            }) %>% purrr::reduce(dplyr::bind_rows)
1089
+        }
1165 1090
     }
1166
-    if (nrow(missing_df) > 0 & getOption("ISAnalytics.verbose") == TRUE) {
1167
-        warn_miss <- c("Warning: missing genes in refgenes table",
1168
-            i = paste(paste(
1169
-                "A total of", nrow(missing_df),
1170
-                "genes",
1171
-                "were found in the input data but not",
1172
-                "in the refgene table. This may be caused by",
1173
-                "a mismatch in the annotation phase of",
1174
-                "the matrix. Here is a summary: "
1175
-            ),
1176
-            paste0(utils::capture.output({
1177
-                print(missing_df, n = Inf)
1178
-            }), collapse = "\n"),
1179
-            sep = "\n"
1091
+    result$cis <- cis
1092
+    return(result)
1093
+}
1094
+
1095
+
1096
+#' Compute CIS and Grubbs test over different time points and groups.
1097
+#'
1098
+#' @description
1099
+#' `r lifecycle::badge("experimental")`
1100
+#' Computes common insertion sites and Grubbs test for each separate group
1101
+#' and separating different time points among the same group. The logic
1102
+#' applied is the same as the function `CIS_grubbs()`.
1103
+#'
1104
+#' @inherit CIS_grubbs details
1105
+#'
1106
+#' @inheritParams CIS_grubbs
1107
+#' @param group A character vector of column names that identifies a group.
1108
+#' Each group must contain one or more time points.
1109
+#' @param timepoint_col What is the name of the column containing time points?
1110
+#' @param as_df Choose the result format: if `TRUE` the results are returned
1111
+#' as a single data frame containing a column for the group id and a column
1112
+#' for the time point, if `FALSE` results are returned in the form of nested
1113
+#' lists (one table for each time point and for each group), if `"group"`
1114
+#' results are returned as a list separated for each group but containing a
1115
+#' single table with all time points.
1116
+#' @param max_workers Maximum number of parallel workers. If `NULL` the
1117
+#' maximum number of workers is calculated automatically.
1118
+#'
1119
+#' @return A list with results and optionally missing genes info
1120
+#' @export
1121
+#'
1122
+#' @examples
1123
+#' data("integration_matrices", package = "ISAnalytics")
1124
+#' data("association_file", package = "ISAnalytics")
1125
+#' aggreg <- aggregate_values_by_key(
1126
+#'     x = integration_matrices,
1127
+#'     association_file = association_file,
1128
+#'     value_cols = c("seqCount", "fragmentEstimate")
1129
+#' )
1130
+#' cis_overtime <- CIS_grubbs_overtime(aggreg)
1131
+#' cis_overtime
1132
+CIS_grubbs_overtime <- function(x,
1133
+    genomic_annotation_file = "hg19",
1134
+    grubbs_flanking_gene_bp = 100000,
1135
+    threshold_alpha = 0.05,
1136
+    group = "SubjectID",
1137
+    timepoint_col = "TimePoint",
1138
+    as_df = TRUE,
1139
+    return_missing_as_df = TRUE,
1140
+    max_workers = NULL) {
1141
+    result <- list()
1142
+    res_checks <- .cis_param_check(
1143
+        x, genomic_annotation_file,
1144
+        grubbs_flanking_gene_bp, threshold_alpha,
1145
+        return_missing_as_df
1146
+    )
1147
+    stopifnot(is.character(group))
1148
+    stopifnot(is.character(timepoint_col))
1149
+    stopifnot(is.null(max_workers) || is.numeric(max_workers))
1150
+    timepoint_col <- timepoint_col[1]
1151
+    if (!all(c(group, timepoint_col) %in% colnames(x))) {
1152
+        rlang::abort(.missing_user_cols_error(
1153
+            c(group, timepoint_col)[!c(group, timepoint_col) %in% colnames(x)]
1154
+        ))
1155
+    }
1156
+    # Manage workers
1157
+    if (.Platform$OS.type == "windows" & is.null(max_workers)) {
1158
+        max_workers <- BiocParallel::snowWorkers()
1159
+    } else if (.Platform$OS.type != "windows" & is.null(max_workers)) {
1160
+        max_workers <- BiocParallel::multicoreWorkers()
1161
+    }
1162
+    stopifnot(is.logical(as_df) || is.character(as_df))
1163
+    result_as_df <- if (is.logical(as_df)) {
1164
+        if (as_df[1] == TRUE) {
1165
+            1
1166
+        } else {
1167
+            0
1168
+        }
1169
+    } else if (is.character(as_df) & as_df[1] == "group") {
1170
+        2
1171
+    } else {
1172
+        err_as_df <- c("The argument `as_df` must be one of the allowed values",
1173
+            x = paste(
1174
+                "Arg can be either logical (T/F) or",
1175
+                "equal to 'group'"
1180 1176
             ),
1181
-            i = paste(
1182
-                "NOTE: missing genes will be removed from",
1183
-                "the final output! Review results carefully"
1184
-            )
1177
+            i = paste("See `?CIS_grubbs_overtime`")
1185 1178
         )
1186
-        rlang::inform(warn_miss, class = "warn_miss_genes")
1179
+        rlang::abort(err_as_df)
1187 1180
     }
1188
-    if (!is.null(by)) {
1189
-        result <- if (results_as_list) {
1190
-            purrr::map(result, ~ .x$df)
1181
+    join_ref_res <- .cis_join_ref(x, res_checks)
1182
+    result <- append(result, join_ref_res[
1183
+        names(join_ref_res) %in% c("missing_genes", "missing_is")
1184
+    ])
1185
+    # --- Split according to group
1186
+    split <- join_ref_res$joint_ref %>%
1187
+        dplyr::group_by(dplyr::across(dplyr::all_of(group)))
1188
+    keys_g <- split %>%
1189
+        dplyr::group_keys() %>%
1190
+        tidyr::unite(col = "id") %>%
1191
+        dplyr::pull(.data$id)
1192
+    split <- split %>%
1193
+        dplyr::group_split() %>%
1194
+        purrr::set_names(keys_g)
1195
+
1196
+    # --- Calculate for each group and each tp
1197
+    tp_slice_cis <- function(df, timepoint_col, res_checks, result_as_df) {
1198
+        tmp <- df %>%
1199
+            dplyr::group_by(dplyr::across(dplyr::all_of(timepoint_col)))
1200
+        g_keys <- tmp %>%
1201
+            dplyr::group_keys() %>%
1202
+            dplyr::pull(!!timepoint_col)
1203
+        tmp <- tmp %>%
1204
+            dplyr::group_split() %>%
1205
+            purrr::set_names(g_keys)
1206
+        res <- if (result_as_df == 0) {
1207
+            purrr::map(tmp, ~ .cis_grubb_calc(
1208
+                x = .x,
1209
+                grubbs_flanking_gene_bp = res_checks$grubbs_flanking_gene_bp,
1210
+                threshold_alpha = res_checks$threshold_alpha,
1211
+                gene_symbol_col = res_checks$gene_symbol_col,
1212
+                gene_strand_col = res_checks$gene_strand_col,
1213
+                chr_col = res_checks$chrom_col, locus_col = res_checks$locus_col,
1214
+                strand_col = res_checks$strand_col
1215
+            ))
1191 1216
         } else {
1192
-            purrr::map2(result, names(result), ~ {
1193
-                .x$df %>%
1194
-                    dplyr::mutate(group = .y)
1195
-            }) %>% purrr::reduce(dplyr::bind_rows)
1196
-        }
1197
-        if (return_missing_as_df) {
1198
-            return(list(cis = result, missing_genes = missing_df))
1217
+            purrr::map2_df(tmp, names(tmp), ~ .cis_grubb_calc(
1218
+                x = .x,
1219
+                grubbs_flanking_gene_bp = res_checks$grubbs_flanking_gene_bp,
1220
+                threshold_alpha = res_checks$threshold_alpha,
1221
+                gene_symbol_col = res_checks$gene_symbol_col,
1222
+                gene_strand_col = res_checks$gene_strand_col,
1223
+                chr_col = res_checks$chrom_col, locus_col = res_checks$locus_col,
1224
+                strand_col = res_checks$strand_col
1225
+            ) %>% dplyr::mutate(!!timepoint_col := .y))
1199 1226
         }
1200
-        return(result)
1227
+        return(res)
1201 1228
     }
1202
-    if (return_missing_as_df) {
1203
-        return(list(cis = result$df, missing_genes = missing_df))
1229
+
1230
+    if (.Platform$OS.type == "windows") {
1231
+        p <- BiocParallel::SnowParam(
1232
+            stop.on.error = TRUE,
1233
+            progressbar = getOption("ISAnalytics.verbose", TRUE),
1234
+            tasks = length(split),
1235
+            workers = max_workers
1236
+        )
1237
+    } else {
1238
+        p <- BiocParallel::MulticoreParam(
1239
+            stop.on.error = TRUE,
1240
+            progressbar = getOption("ISAnalytics.verbose", TRUE),
1241
+            tasks = length(split),
1242
+            workers = max_workers
1243
+        )
1204 1244
     }
1205
-    return(result$df)
1245
+    cis_overtime <- BiocParallel::bplapply(split,
1246
+        FUN = tp_slice_cis,
1247
+        timepoint_col = timepoint_col,
1248
+        res_checks = res_checks,
1249
+        result_as_df = result_as_df,
1250
+        BPPARAM = p
1251
+    )
1252
+    BiocParallel::bpstop(p)
1253
+
1254
+    if (result_as_df == 1) {
1255
+        cis_overtime <- purrr::map2_df(
1256
+            cis_overtime, names(cis_overtime),
1257
+            ~ .x %>%
1258
+                dplyr::mutate(group = .y)
1259
+        )
1260
+    }
1261
+    result$cis <- cis_overtime
1262
+    return(result)
1206 1263
 }
1207 1264
 
1208 1265
 #' Integrations cumulative count in time by sample
... ...
@@ -1500,7 +1557,7 @@ is_sharing <- function(...,
1500 1557
             rlang::abort(.keys_not_char_err())
1501 1558
         }
1502 1559
         if (length(unique(group_keys)) == 1) {
1503
-            if (getOption("ISAnalytics.verbose") == TRUE) {
1560
+            if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1504 1561
                 one_key_list <- c("Single key in list",
1505 1562
                     i = paste(
1506 1563
                         "Provided a single key in list,",
... ...
@@ -1670,7 +1727,7 @@ is_sharing <- function(...,
1670 1727
             venn = table_for_venn
1671 1728
         )
1672 1729
     }
1673
-    if (getOption("ISAnalytics.verbose") == TRUE) {
1730
+    if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1674 1731
         rlang::inform("Done!")
1675 1732
     }
1676 1733
     return(sharing)
... ...
@@ -95,7 +95,7 @@ remove_collisions <- function(x,
95 95
     seq_count_col <- quant_cols["seqCount"]
96 96
     ## Check if association file contains all info relative to content the of
97 97
     ## the matrix
98
-    verbose <- getOption("ISAnalytics.verbose")
98
+    verbose <- getOption("ISAnalytics.verbose", TRUE)
99 99
     pcr_col <- pcr_id_column()
100 100
     ### - Are all sample in the matrix present in the AF?
101 101
     missing_ind <- if (!all(x[[pcr_col]] %in%
... ...
@@ -147,13 +147,13 @@ remove_collisions <- function(x,
147 147
         "pcr_replicate"][["names"]]
148 148
     pool_col <- req_tag_cols[eval(sym("tag")) == "pool_id"][["names"]]
149 149
     joined <- pre_process %>%
150
-      dplyr::left_join(association_file, by = pcr_col) %>%
151
-      dplyr::select(dplyr::all_of(
152
-        c(
153
-          colnames(pre_process), date_col,
154
-          replicate_n_col, independent_sample_id, pool_col
155
-        )
156
-      ))
150
+        dplyr::left_join(association_file, by = pcr_col) %>%
151
+        dplyr::select(dplyr::all_of(
152
+            c(
153
+                colnames(pre_process), date_col,
154
+                replicate_n_col, independent_sample_id, pool_col
155
+            )
156
+        ))
157 157
     if (verbose) {
158 158
         rlang::inform("Identifying collisions...")
159 159
     }
... ...
@@ -196,7 +196,8 @@ remove_collisions <- function(x,
196 196
     final_matr <- final_matr[, mget(colnames(pre_process))]
197 197
 
198 198
     ## ---- REPORT PRODUCTION
199
-    if (getOption("ISAnalytics.reports") == TRUE & !is.null(report_path)) {
199
+    if (getOption("ISAnalytics.reports", TRUE) == TRUE &
200
+        !is.null(report_path)) {
200 201
         post_joined <- association_file[final_matr, on = pcr_col]
201 202
         post_joined <- post_joined[, mget(c(
202 203
             colnames(final_matr), date_col,
... ...
@@ -355,11 +355,11 @@ import_association_file <- function(path,
355 355
             dplyr::filter(.data$tag == "tp_days") %>%
356 356
             dplyr::pull(.data$names)
357 357
         if (!purrr::is_empty(tp_col) && tp_col %in% colnames(as_file)) {
358
-          as_file <- as_file %>%
359
-            dplyr::mutate(
360
-              TimepointMonths = .timepoint_to_months(.data[[tp_col]]),
361
-              TimepointYears = .timepoint_to_years(.data[[tp_col]])
362
-            )
358
+            as_file <- as_file %>%
359
+                dplyr::mutate(
360
+                    TimepointMonths = .timepoint_to_months(.data[[tp_col]]),
361
+                    TimepointYears = .timepoint_to_years(.data[[tp_col]])
362
+                )
363 363
         }
364 364
     }
365 365
     import_stats_rep <- NULL
... ...
@@ -452,7 +452,8 @@ import_association_file <- function(path,
452 452
             invokeRestart(rest, cnd)
453 453
         }
454 454
     )
455
-    if (!getOption("ISAnalytics.reports") & getOption("ISAnalytics.verbose")) {
455
+    if (!getOption("ISAnalytics.reports", TRUE) &
456
+        getOption("ISAnalytics.verbose", TRUE)) {
456 457
         summary_report <- .summary_af_import_msg(
457 458
             pars_prob = parsing_problems, dates_prob = date_problems,
458 459
             cols_prob = col_probs[[!is.null(col_probs)]],
... ...
@@ -576,7 +577,7 @@ import_Vispa2_stats <- function(association_file,
576 577
     stats <- stats$stats
577 578
     ## - IF NO STATS IMPORTED (STATS ARE NULL)
578 579
     if (is.null(stats)) {
579
-        if (getOption("ISAnalytics.verbose") == TRUE) {
580
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
580 581
             rlang::inform(.no_stat_files_imported())
581 582
         }
582 583
         if (!is.null(report_path) && report_path == "INTERNAL") {
... ...
@@ -920,7 +921,7 @@ import_parallel_Vispa2Matrices <- function(association_file,
920 921
             !!!mult_args
921 922
         ))
922 923
     }
923
-    annotation_problems <- if (getOption("ISAnalytics.reports") == TRUE &
924
+    annotation_problems <- if (getOption("ISAnalytics.reports", TRUE) == TRUE &
924 925
         !is.null(report_path)) {
925 926
         tmp <- if (!multi_quant_matrix) {
926 927
             comparison_matrix(matrices)
... ...
@@ -1098,13 +1099,13 @@ annotation_issues <- function(matrix) {
1098 1099
     }
1099 1100
     if (is.data.frame(matrix)) {
1100 1101
         probs <- find_probs(matrix)
1101
-        if (is.null(probs) & getOption("ISAnalytics.verbose") == TRUE) {
1102
+        if (is.null(probs) & getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1102 1103
             rlang::inform("No annotation issues found")
1103 1104
         }
1104 1105
         return(probs)
1105 1106
     }
1106 1107
     probs <- purrr::map(matrix, find_probs)
1107
-    if (all(is.null(probs)) & getOption("ISAnalytics.verbose") == TRUE) {
1108
+    if (all(is.null(probs)) & getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1108 1109
         rlang::inform("No annotation issues found")
1109 1110
     }
1110 1111
     return(probs)
... ...
@@ -95,7 +95,7 @@
95 95
                     inspect_cmd, "`"
96 96
                 )
97 97
             )
98
-            rlang::inform(warn, class = "missing_crit_tags")
98
+            rlang::warn(warn, class = "missing_crit_tags")
99 99
         }
100 100
     }
101 101
     new_vars
... ...
@@ -412,7 +412,7 @@
412 412
                         na = ""
413 413
                     )
414 414
                 } else {
415
-                    if (getOption("ISAnalytics.verbose") == TRUE) {
415
+                    if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
416 416
                         warn_empty_iss <- c(paste0(
417 417
                             "Warning: empty stats for project '",
418 418
                             proj, "', pool '", .y, "'"
... ...
@@ -519,7 +519,7 @@
519 519
                             na = ""
520 520
                         )
521 521
                     } else {
522
-                        if (getOption("ISAnalytics.verbose") == TRUE) {
522
+                        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
523 523
                             warn_empty_iss <- c(paste0(
524 524
                                 "Warning: empty stats for project '",
525 525
                                 proj, "', pool '", .y, "'"
... ...
@@ -890,7 +890,7 @@
890 890
         verbose = FALSE,
891 891
         drop = to_drop,
892 892
         colClasses = col_types,
893
-        showProgress = getOption("ISAnalytics.verbose"),
893
+        showProgress = getOption("ISAnalytics.verbose", TRUE),
894 894
         data.table = TRUE
895 895
     )
896 896
 
... ...
@@ -951,7 +951,7 @@
951 951
         col_types = do.call(readr::cols, col_types),
952 952
         na = c("NONE", "NA", "NULL", "NaN", ""),
953 953
         trim_ws = TRUE,
954
-        progress = getOption("ISAnalytics.verbose")
954
+        progress = getOption("ISAnalytics.verbose", TRUE)
955 955
     )
956 956
     vars <- mandatory_IS_vars(TRUE)
957 957
     if (annotated) {
... ...
@@ -1019,7 +1019,8 @@
1019 1019
         if (!compression_type %in% .supported_fread_compression_formats()) {
1020 1020
             ### If not, switch to classic for reading
1021 1021
             mode <- "classic"
1022
-            if (call_mode == "EXTERNAL" && getOption("ISAnalytics.verbose") == TRUE) {
1022
+            if (call_mode == "EXTERNAL" &&
1023
+                getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1023 1024
                 rlang::inform(.unsupported_comp_format_inf(),
1024 1025
                     class = "unsup_comp_format"
1025 1026
                 )
... ...
@@ -1043,7 +1044,8 @@
1043 1044
     additional_cols <- additional_cols[names(additional_cols) %in%
1044 1045
         colnames(peek_headers)]
1045 1046
     ## - Start reading
1046
-    if (call_mode == "EXTERNAL" && getOption("ISAnalytics.verbose") == TRUE) {
1047
+    if (call_mode == "EXTERNAL" &&
1048
+        getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1047 1049
         rlang::inform(c("Reading file...", i = paste0("Mode: ", mode)))
1048 1050
     }
1049 1051
     df <- if (mode == "fread") {
... ...
@@ -1075,7 +1077,8 @@
1075 1077
             verbose = FALSE
1076 1078
         )
1077 1079
     }
1078
-    if (call_mode == "EXTERNAL" && getOption("ISAnalytics.verbose") == TRUE) {
1080
+    if (call_mode == "EXTERNAL" &&
1081
+        getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1079 1082
         rlang::inform("Reshaping...")
1080 1083
     }
1081 1084
     max_workers <- trunc(BiocParallel::snowWorkers() / 3)
... ...
@@ -1087,7 +1090,7 @@
1087 1090
             BiocParallel::SnowParam(
1088 1091
                 workers = max_workers,
1089 1092
                 tasks = trunc(nrow(df) / max_workers),
1090
-                progressbar = getOption("ISAnalytics.verbose"),
1093
+                progressbar = getOption("ISAnalytics.verbose", TRUE),
1091 1094
                 exportglobals = TRUE,
1092 1095
                 stop.on.error = TRUE
1093 1096
             )
... ...
@@ -1095,7 +1098,7 @@
1095 1098
             BiocParallel::MulticoreParam(
1096 1099
                 workers = max_workers,
1097 1100
                 tasks = trunc(nrow(df) / max_workers),
1098
-                progressbar = getOption("ISAnalytics.verbose"),
1101
+                progressbar = getOption("ISAnalytics.verbose", TRUE),
1099 1102
                 exportglobals = FALSE,
1100 1103
                 stop.on.error = TRUE
1101 1104
             )
... ...
@@ -1130,7 +1133,7 @@
1130 1133
     tidy <- tidy[val_col_name > 0]
1131 1134
     ## Transform cols
1132 1135
     if (call_mode == "EXTERNAL" &&
1133
-        getOption("ISAnalytics.verbose") == TRUE &&
1136
+        getOption("ISAnalytics.verbose", TRUE) == TRUE &&
1134 1137
         !is.null(transformations)) {
1135 1138
         rlang::inform("Applying column transformations...")
1136 1139
     }
... ...
@@ -1138,7 +1141,8 @@
1138 1141
         tidy <- transform_columns(tidy, transf_list = transformations)
1139 1142
     }
1140 1143
     ## - Report summary
1141
-    if (call_mode == "EXTERNAL" && getOption("ISAnalytics.verbose") == TRUE) {
1144
+    if (call_mode == "EXTERNAL" &&
1145
+        getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1142 1146
         rlang::inform(.summary_ism_import_msg(
1143 1147
             is_annotated,
1144 1148
             df_dim,
... ...
@@ -1184,7 +1188,7 @@
1184 1188
         if (!requireNamespace("readxl", quietly = TRUE)) {
1185 1189
             rlang::abort(.missing_pkg_error("readxl"))
1186 1190
         }
1187
-        if (getOption("ISAnalytics.verbose") == TRUE) {
1191
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1188 1192
             rlang::inform(.xls_file_warning(),
1189 1193
                 class = "xls_file"
1190 1194
             )
... ...
@@ -1226,7 +1230,7 @@
1226 1230
                 na = c("NONE", "NA", "NULL", "NaN", ""),
1227 1231
                 col_types = do.call(readr::cols, col_types),
1228 1232
                 trim_ws = TRUE,
1229
-                progress = getOption("ISAnalytics.verbose")
1233
+                progress = getOption("ISAnalytics.verbose", TRUE)
1230 1234
             )
1231 1235
             problems <- readr::problems(df)
1232 1236
             df
... ...
@@ -1235,7 +1239,7 @@
1235 1239
                 col_types = col_types,
1236 1240
                 na = c("NONE", "NA", "NULL", "NaN", ""),
1237 1241
                 trim_ws = TRUE,
1238
-                progress = getOption("ISAnalytics.verbose")
1242
+                progress = getOption("ISAnalytics.verbose", TRUE)
1239 1243
             )
1240 1244
             problems <- NULL
1241 1245
             df
... ...
@@ -1250,7 +1254,7 @@
1250 1254
                 false = .data$types
1251 1255
             ))
1252 1256
         dates <- dates %>%
1253
-          dplyr::filter(.data$names %in% colnames(as_file))
1257
+            dplyr::filter(.data$names %in% colnames(as_file))
1254 1258
         date_failures <- NULL
1255 1259
         if (nrow(dates) > 0) {
1256 1260
             before <- as_file %>%
... ...
@@ -1355,7 +1359,7 @@
1355 1359
                 fs::path(cur[[concat_pool_col]])
1356 1360
             ), "$")
1357 1361
         } else {
1358
-            if (getOption("ISAnalytics.verbose") == TRUE) {
1362
+            if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1359 1363
                 msg <- c(paste(
1360 1364
                     "Warning: found NA",
1361 1365
                     paste0(concat_pool_col, " field")
... ...
@@ -1447,7 +1451,7 @@
1447 1451
     if (!is.null(filter)) {
1448 1452
         # Pre-filtering of association file
1449 1453
         if (!all(names(filter) %in% colnames(association_file))) {
1450
-            if (getOption("ISAnalytics.verbose") == TRUE) {
1454
+            if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
1451 1455
                 missing_filt <- c(
1452 1456
                     "Some or all the names in the filter not found",
1453 1457
                     i = paste(
... ...
@@ -1729,13 +1733,13 @@
1729 1733
         p <- BiocParallel::SnowParam(
1730 1734
             stop.on.error = FALSE,
1731 1735
             tasks = length(stats_paths$stats_files),
1732
-            progressbar = getOption("ISAnalytics.verbose")
1736
+            progressbar = getOption("ISAnalytics.verbose", TRUE)
1733 1737
         )
1734 1738
     } else {
1735 1739
         p <- BiocParallel::MulticoreParam(
1736 1740
             stop.on.error = FALSE,
1737 1741
             tasks = length(stats_paths$stats_files),
1738
-            progressbar = getOption("ISAnalytics.verbose")
1742
+            progressbar = getOption("ISAnalytics.verbose", TRUE)
1739 1743
         )
1740 1744
     }
1741 1745
     FUN <- function(x, req_cols) {
... ...
@@ -1872,7 +1876,7 @@
1872 1876
         # Keep asking user until the choice is valid
1873 1877
         repeat {
1874 1878
             cat("Your choice: ")
1875
-            connection <- getOption("ISAnalytics.connection")
1879
+            connection <- getOption("ISAnalytics.connection", stdin())
1876 1880
             if (length(connection) > 1) {
1877 1881
                 con <- connection[1]
1878 1882
             } else {
... ...
@@ -2012,7 +2016,7 @@
2012 2016
     cat("[1] ALL", "[2] ONLY SOME", "[0] QUIT", sep = "\n")
2013 2017
     repeat {
2014 2018
         cat("Your choice: ")
2015
-        connection <- getOption("ISAnalytics.connection")
2019
+        connection <- getOption("ISAnalytics.connection", stdin())
2016 2020
         if (length(connection) > 1) {
2017 2021
             connection <- connection[4]
2018 2022
         }
... ...
@@ -2044,7 +2048,7 @@
2044 2048
 .pool_choices_IN <- function(indexes) {
2045 2049
     repeat {
2046 2050
         cat("\nYour choice: ")
2047
-        connection <- getOption("ISAnalytics.connection")
2051
+        connection <- getOption("ISAnalytics.connection", stdin())
2048 2052
         if (length(connection) > 1) {
2049 2053
             connection <- connection[5]
2050 2054
         }
... ...
@@ -2134,7 +2138,7 @@
2134 2138
             print(pools_to_import)
2135 2139
             cat("\nConfirm your choices? [y/n]", sep = "")
2136 2140
             repeat {
2137
-                connection <- getOption("ISAnalytics.connection")
2141
+                connection <- getOption("ISAnalytics.connection", stdin())
2138 2142
                 if (length(connection) > 1) {
2139 2143
                     connection <- connection[6]
2140 2144
                 }
... ...
@@ -2167,7 +2171,7 @@
2167 2171
                 dplyr::distinct())
2168 2172
             cat("\nConfirm your choices? [y/n]", sep = "")
2169 2173
             repeat {
2170
-                connection <- getOption("ISAnalytics.connection")
2174
+                connection <- getOption("ISAnalytics.connection", stdin())
2171 2175
                 if (length(connection) > 1) {
2172 2176
                     connection <- connection[6]
2173 2177
                 }
... ...
@@ -2339,7 +2343,7 @@
2339 2343
         )
2340 2344
         repeat {
2341 2345
             cat("\n\nType the index number: ")
2342
-            connection <- getOption("ISAnalytics.connection")
2346
+            connection <- getOption("ISAnalytics.connection", stdin())
2343 2347
             if (length(connection) > 1) {
2344 2348
                 connection <- connection[7]
2345 2349
             }
... ...
@@ -2453,7 +2457,7 @@
2453 2457
         ), sep = "\n\n")
2454 2458
         cat("\nConfirm your choices? [y/n]", sep = "")
2455 2459
         repeat {
2456
-            connection <- getOption("ISAnalytics.connection")
2460
+            connection <- getOption("ISAnalytics.connection", stdin())
2457 2461
             if (length(connection) > 1) {
2458 2462
                 connection <- connection[8]
2459 2463
             }
... ...
@@ -2781,7 +2785,7 @@
2781 2785
     reported_anomalies <- purrr::map_df(files_to_import, ~ .x$reported)
2782 2786
     files_to_import <- purrr::map_df(files_to_import, ~ .x$to_import)
2783 2787
     # Report missing or duplicated
2784
-    if (getOption("ISAnalytics.verbose") == TRUE &&
2788
+    if (getOption("ISAnalytics.verbose", TRUE) == TRUE &&
2785 2789
         nrow(reported_anomalies) > 0) {
2786 2790
         if (any(reported_anomalies$Missing)) {
2787 2791
             missing_msg <- c("Some files are missing and will be ignored",
... ...
@@ -2896,7 +2900,8 @@
2896 2900
         if (.y) {
2897 2901
             .x %>%
2898 2902
                 dplyr::distinct(dplyr::across(
2899
-                  dplyr::all_of(mandatory_IS_vars()))) %>%
2903
+                    dplyr::all_of(mandatory_IS_vars())
2904
+                )) %>%
2900 2905
                 nrow()
2901 2906
         } else {
2902 2907
             NA_integer_
... ...
@@ -2942,14 +2947,14 @@
2942 2947
         p <- BiocParallel::SnowParam(
2943 2948
             workers = workers,
2944 2949
             stop.on.error = FALSE,
2945
-            progressbar = getOption("ISAnalytics.verbose"),
2950
+            progressbar = getOption("ISAnalytics.verbose", TRUE),
2946 2951
             tasks = nrow(files_to_import)
2947 2952
         )
2948 2953
     } else {
2949 2954
         p <- BiocParallel::MulticoreParam(
2950 2955
             workers = workers,
2951 2956
             stop.on.error = FALSE,
2952
-            progressbar = getOption("ISAnalytics.verbose"),
2957
+            progressbar = getOption("ISAnalytics.verbose", TRUE),
2953 2958
             tasks = nrow(files_to_import)
2954 2959
         )
2955 2960
     }
... ...
@@ -3107,17 +3112,17 @@
3107 3112
     indep_sample_id) {
3108 3113
     data.table::setDT(req_tag_cols)
3109 3114
     joined <- association_file %>%
3110
-      dplyr::left_join(df, by = pcr_id_column())
3115
+        dplyr::left_join(df, by = pcr_id_column())
3111 3116
     proj_col_name <- req_tag_cols[eval(rlang::expr(
3112 3117
         !!sym("tag") == "project_id"
3113 3118
     ))][["names"]]
3114 3119
     projects <- joined %>%
3115
-      dplyr::filter(!dplyr::if_all(
3116
-        .cols = dplyr::all_of(mandatory_IS_vars()),
3117
-        .fns = is.na
3118
-      )) %>%
3119
-      dplyr::distinct(.data[[proj_col_name]]) %>%
3120
-      dplyr::pull(.data[[proj_col_name]])
3120
+        dplyr::filter(!dplyr::if_all(
3121
+            .cols = dplyr::all_of(mandatory_IS_vars()),
3122
+            .fns = is.na
3123
+        )) %>%
3124
+        dplyr::distinct(.data[[proj_col_name]]) %>%
3125
+        dplyr::pull(.data[[proj_col_name]])
3121 3126
     wanted_tags <- c(
3122 3127
         "subject", "tissue", "cell_marker", "tp_days"
3123 3128
     )
... ...
@@ -3127,19 +3132,19 @@
3127 3132
         eval(sym("names")) %in% colnames(association_file), ]
3128 3133
     wanted <- data.table::rbindlist(list(req_tag_cols, wanted))
3129 3134
     missing_in_df <- joined %>%
3130
-      dplyr::filter(
3131
-        dplyr::if_all(
3132
-          .cols = dplyr::all_of(mandatory_IS_vars()),
3133
-          .fns = is.na
3134
-          ) & .data[[proj_col_name]] %in% projects
3135
-      ) %>%
3136
-      dplyr::distinct(dplyr::across(
3137
-        dplyr::all_of(c(wanted$names, indep_sample_id))
3135
+        dplyr::filter(
3136
+            dplyr::if_all(
3137
+                .cols = dplyr::all_of(mandatory_IS_vars()),
3138
+                .fns = is.na
3139
+            ) & .data[[proj_col_name]] %in% projects
3140
+        ) %>%
3141
+        dplyr::distinct(dplyr::across(
3142
+            dplyr::all_of(c(wanted$names, indep_sample_id))
3138 3143
         ))
3139 3144
     reduced_af <- association_file %>%
3140
-      dplyr::filter(
3141
-        .data[[proj_col_name]] %in% projects
3142
-      )
3145
+        dplyr::filter(
3146
+            .data[[proj_col_name]] %in% projects
3147
+        )
3143 3148
     data.table::setDT(reduced_af)
3144 3149
     data.table::setDT(missing_in_df)
3145 3150
     list(miss = missing_in_df, reduced_af = reduced_af)
... ...
@@ -3155,17 +3160,19 @@
3155 3160
 .identify_independent_samples <- function(joined, indep_sample_id) {
3156 3161
     indep_syms <- purrr::map(indep_sample_id, rlang::sym)
3157 3162
     temp <- joined %>%
3158
-      dplyr::select(dplyr::all_of(
3159
-        c(mandatory_IS_vars(), indep_sample_id)
3160
-      )) %>%
3161
-      dplyr::group_by(dplyr::across(dplyr::all_of(mandatory_IS_vars()))) %>%
3162
-      dplyr::summarise(N = dplyr::n_distinct(!!!indep_syms),
3163
-                       .groups = "drop") %>%
3164
-      dplyr::filter(.data$N > 1)
3163
+        dplyr::select(dplyr::all_of(
3164
+            c(mandatory_IS_vars(), indep_sample_id)
3165
+        )) %>%
3166
+        dplyr::group_by(dplyr::across(dplyr::all_of(mandatory_IS_vars()))) %>%
3167
+        dplyr::summarise(
3168
+            N = dplyr::n_distinct(!!!indep_syms),
3169
+            .groups = "drop"
3170
+        ) %>%
3171
+        dplyr::filter(.data$N > 1)
3165 3172
     non_collisions <- joined %>%
3166
-      dplyr::anti_join(temp, by = mandatory_IS_vars())
3173
+        dplyr::anti_join(temp, by = mandatory_IS_vars())
3167 3174
     collisions <- joined %>%
3168
-      dplyr::semi_join(temp, by = mandatory_IS_vars())
3175
+        dplyr::semi_join(temp, by = mandatory_IS_vars())
3169 3176
     data.table::setDT(non_collisions)
3170 3177
     data.table::setDT(collisions)
3171 3178
     list(collisions = collisions, non_collisions = non_collisions)
... ...
@@ -3203,7 +3210,7 @@
3203 3210
     winning_sample <- x[1, mget(ind_sample_key)]
3204 3211
     # Filter the winning rows
3205 3212
     x <- x %>%
3206
-      dplyr::semi_join(winning_sample, by = ind_sample_key)
3213
+        dplyr::semi_join(winning_sample, by = ind_sample_key)
3207 3214
     data.table::setDT(x)
3208 3215
     return(list(data = x, check = TRUE))
3209 3216
 }
... ...
@@ -3229,16 +3236,16 @@
3229 3236
 # not (and therefore there is the need to perform the next step)
3230 3237
 .discriminate_by_replicate <- function(x, repl_col, ind_sample_key) {
3231 3238
     temp <- x %>%
3232
-      dplyr::group_by(dplyr::across(dplyr::all_of(ind_sample_key))) %>%
3233
-      dplyr::summarise(N = dplyr::n(), .groups = "drop") %>%
3234
-      dplyr::arrange(dplyr::desc(.data$N))
3239
+        dplyr::group_by(dplyr::across(dplyr::all_of(ind_sample_key))) %>%
3240
+        dplyr::summarise(N = dplyr::n(), .groups = "drop") %>%
3241
+        dplyr::arrange(dplyr::desc(.data$N))
3235 3242
     data.table::setDT(temp)
3236 3243
     if (length(temp$N) != 1 & !temp$N[1] > temp$N[2]) {
3237 3244
         return(list(data = x, check = FALSE))
3238 3245
     }
3239 3246
     temp <- temp[1, mget(ind_sample_key)]
3240 3247
     x <- x %>%
3241
-      dplyr::semi_join(temp, by = ind_sample_key)
3248
+        dplyr::semi_join(temp, by = ind_sample_key)
3242 3249
     data.table::setDT(x)
3243 3250
     return(list(data = x, check = TRUE))
3244 3251
 }
... ...
@@ -3269,9 +3276,9 @@
3269 3276
     seqCount_col,
3270 3277
     ind_sample_key) {
3271 3278
     temp <- x %>%
3272
-      dplyr::group_by(dplyr::across(dplyr::all_of(ind_sample_key))) %>%
3273
-      dplyr::summarise(sum = sum(.data[[seqCount_col]]), .groups = "drop") %>%
3274
-      dplyr::arrange(dplyr::desc(.data$sum))
3279
+        dplyr::group_by(dplyr::across(dplyr::all_of(ind_sample_key))) %>%
3280
+        dplyr::summarise(sum = sum(.data[[seqCount_col]]), .groups = "drop") %>%
3281
+        dplyr::arrange(dplyr::desc(.data$sum))
3275 3282
     data.table::setDT(temp)
3276 3283
     ratio <- temp$sum[1] / temp$sum[2]
3277 3284
     if (!ratio > reads_ratio) {
... ...
@@ -3279,7 +3286,7 @@
3279 3286
     }
3280 3287
     temp <- temp[1, mget(ind_sample_key)]
3281 3288
     x <- x %>%
3282
-      dplyr::semi_join(temp, by = ind_sample_key)
3289
+        dplyr::semi_join(temp, by = ind_sample_key)
3283 3290
     data.table::setDT(x)
3284 3291
     return(list(data = x, check = TRUE))
3285 3292
 }
... ...
@@ -3367,14 +3374,14 @@
3367 3374
     if (.Platform$OS.type == "windows") {
3368 3375
         p <- BiocParallel::SnowParam(
3369 3376
             stop.on.error = TRUE,
3370
-            progressbar = getOption("ISAnalytics.verbose"),
3377
+            progressbar = getOption("ISAnalytics.verbose", TRUE),
3371 3378
             tasks = length(split_data),
3372 3379
             workers = max_workers
3373 3380
         )
3374 3381
     } else {
3375 3382
         p <- BiocParallel::MulticoreParam(
3376 3383
             stop.on.error = TRUE,
3377
-            progressbar = getOption("ISAnalytics.verbose"),
3384
+            progressbar = getOption("ISAnalytics.verbose", TRUE),
3378 3385
             tasks = length(split_data),
3379 3386
             workers = max_workers
3380 3387
         )
... ...
@@ -4078,7 +4085,7 @@
4078 4085
                 "csv", paste("csv", .compressed_formats(), sep = "."),
4079 4086
                 "txt", paste("txt", .compressed_formats(), sep = ".")
4080 4087
             )) {
4081
-                if (getOption("ISAnalytics.verbose") == TRUE) {
4088
+                if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
4082 4089
                     warn <- c("Recalibration file format unsupported",
4083 4090
                         i = "Writing file in 'tsv.gz' format"
4084 4091
                     )
... ...
@@ -4102,7 +4109,7 @@
4102 4109
                 "Recalibration map saved to: ",
4103 4110
                 tmp_filename
4104 4111
             )
4105
-            if (getOption("ISAnalytics.verbose") == TRUE) {
4112
+            if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
4106 4113
                 rlang::inform(saved_msg)
4107 4114
             }
4108 4115
         },
... ...
@@ -4518,10 +4525,182 @@
4518 4525
     count_by_gene
4519 4526
 }
4520 4527
 
4521
-#---- USED IN : CIS_grubbs ----
4528
+#---- USED IN : CIS_grubbs, CIS_grubbs_overtime ----
4529
+# Param check for CIS functions
4530
+.cis_param_check <- function(x, genomic_annotation_file,
4531
+    grubbs_flanking_gene_bp,
4532
+    threshold_alpha,
4533
+    return_missing_as_df) {
4534
+    ## Check x has the correct structure
4535
+    stopifnot(is.data.frame(x))
4536
+    check_res <- list()
4537
+    ## Check dyn vars for required tags
4538
+    check_res$req_mand_vars <- .check_required_cols(c("chromosome", "locus"),
4539
+        vars_df = mandatory_IS_vars(TRUE),
4540
+        duplicate_politic = "error"
4541
+    )
4542
+    check_res$chrom_col <- check_res$req_mand_vars %>%
4543
+        dplyr::filter(.data$tag == "chromosome") %>%
4544
+        dplyr::pull(.data$names)
4545
+    check_res$locus_col <- check_res$req_mand_vars %>%
4546
+        dplyr::filter(.data$tag == "locus") %>%
4547
+        dplyr::pull(.data$names)
4548
+    check_res$strand_col <- if ("is_strand" %in% mandatory_IS_vars(TRUE)$tag) {
4549
+        col <- mandatory_IS_vars(TRUE) %>%
4550
+            dplyr::filter(.data$tag == "is_strand") %>%
4551
+            dplyr::pull(.data$names)
4552
+        if (col %in% colnames(x)) {
4553
+            col
4554
+        } else {
4555
+            NULL
4556
+        }
4557
+    } else {
4558
+        NULL
4559
+    }
4560
+    check_res$req_annot_col <- .check_required_cols(
4561
+        list(gene_symbol = "char", gene_strand = "char"),
4562
+        vars_df = annotation_IS_vars(TRUE),
4563
+        "error"
4564
+    )
4565
+    check_res$gene_symbol_col <- check_res$req_annot_col %>%
4566
+        dplyr::filter(.data$tag == "gene_symbol") %>%
4567
+        dplyr::pull(.data$names)
4568
+    check_res$gene_strand_col <- check_res$req_annot_col %>%
4569
+        dplyr::filter(.data$tag == "gene_strand") %>%
4570
+        dplyr::pull(.data$names)
4571
+    check_res$cols_required <- c(
4572
+        check_res$chrom_col, check_res$locus_col,
4573
+        check_res$gene_symbol_col,
4574
+        check_res$gene_strand_col
4575
+    )
4576
+    if (!all(check_res$cols_required %in% colnames(x))) {
4577
+        rlang::abort(.missing_user_cols_error(
4578
+            check_res$cols_required[!check_res$cols_required %in% colnames(x)]
4579
+        ))
4580
+    }
4581
+    # Check other parameters
4582
+    stopifnot(is.data.frame(genomic_annotation_file) ||
4583
+        is.character(genomic_annotation_file))
4584
+    genomic_annotation_file <- genomic_annotation_file[1]
4585
+    if (is.character(genomic_annotation_file) &&
4586
+        !genomic_annotation_file %in% c("hg19", "mm9")) {
4587
+        err_msg <- c("Genomic annotation file unknown",
4588
+            x = paste(
4589
+                "Since ISAnalytics 1.5.4, if provided as",
4590
+                "character vector, `genomic_annotation_file`",
4591
+                "parameter must be one of 'hg19' or 'mm9'"
4592
+            ),
4593
+            i = paste(
4594
+                "For using other genome reference files",
4595
+                "import them in the R environment and pass",
4596
+                "them to the function"
4597
+            )
4598
+        )
4599
+        rlang::abort(err_msg, class = "genomic_file_char")
4600
+    }
4601
+    if (is.character(genomic_annotation_file)) {
4602
+        gen_file <- paste0("refGenes_", genomic_annotation_file)
4603
+        utils::data(list = gen_file, envir = rlang::current_env())
4604
+        check_res$refgenes <- rlang::eval_tidy(rlang::sym(gen_file))
4605
+    } else {
4606
+        # Check annotation file format
4607
+        refgenes <- genomic_annotation_file
4608
+        if (!all(refGene_table_cols() %in% colnames(refgenes))) {
4609
+            rlang::abort(.non_standard_annotation_structure())
4610
+        }
4611
+        check_res$refgenes <- tibble::as_tibble(refgenes) %>%
4612
+            dplyr::mutate(chrom = stringr::str_replace_all(
4613
+                .data$chrom,
4614
+                "chr", ""
4615
+            ))
4616
+    }
4617
+    stopifnot(is.numeric(grubbs_flanking_gene_bp) ||
4618
+        is.integer(grubbs_flanking_gene_bp))
4619
+    check_res$grubbs_flanking_gene_bp <- grubbs_flanking_gene_bp[1]
4620
+    stopifnot(is.numeric(threshold_alpha))
4621
+    check_res$threshold_alpha <- threshold_alpha[1]
4622
+    stopifnot(is.logical(return_missing_as_df))
4623
+    check_res$return_missing_as_df <- return_missing_as_df[1]
4624
+    return(check_res)
4625
+}
4626
+
4627
+# Join with refgenes
4628
+.cis_join_ref <- function(x, res_checks) {
4629
+    result <- list()
4630
+    # Join input with refgenes - inner join only
4631
+    result$joint_ref <- x %>%
4632
+        dplyr::inner_join(res_checks$refgenes %>%
4633
+            dplyr::select(
4634
+                dplyr::all_of(
4635
+                    c("name2", "chrom", "strand", "average_TxLen")
4636
+                )
4637
+            ), by = setNames(
4638
+            object = c("chrom", "strand", "name2"),
4639
+            nm = c(
4640
+                res_checks$chrom_col,
4641
+                res_checks$gene_strand_col,
4642
+                res_checks$gene_symbol_col
4643
+            )
4644
+        ))
4645
+    # Retrieve eventual missing genes
4646
+    missing_genes <- x %>%
4647
+        dplyr::anti_join(result$joint_ref,
4648
+            by = c(
4649
+                res_checks$gene_symbol_col,
4650
+                res_checks$gene_strand_col, res_checks$chrom_col
4651
+            )
4652
+        )
4653
+    missing_is_tot <- nrow(missing_genes)
4654
+    missing_genes <- missing_genes %>%
4655
+        dplyr::distinct(dplyr::across(dplyr::all_of(
4656
+            c(
4657
+                res_checks$gene_symbol_col, res_checks$gene_strand_col,
4658
+                res_checks$chrom_col
4659
+            )
4660
+        )))
4661
+    if (nrow(missing_genes) > 0 &
4662
+        getOption("ISAnalytics.verbose", TRUE) == TRUE) {
4663
+        warn_miss <- c("Warning: missing genes in refgenes table",
4664
+            i = paste(paste(
4665
+                "A total of", nrow(missing_genes),
4666
+                "genes",
4667
+                "were found in the input data but not",
4668
+                "in the refgene table. This may be caused by",
4669
+                "a mismatch in the annotation phase of",
4670
+                "the matrix. Here is a summary: "
4671
+            ),
4672
+            paste0(utils::capture.output({
4673
+                print(missing_genes, n = Inf)
4674
+            }), collapse = "\n"),
4675
+            sep = "\n"
4676
+            ),
4677
+            i = paste(
4678
+                "NOTE: missing genes will be removed from",
4679
+                "the final output! Review results carefully"
4680
+            ),
4681
+            i = paste(
4682
+                "A total of", missing_is_tot,
4683
+                "IS will be removed because of missing genes (",
4684
+                round((missing_is_tot / nrow(x)) * 100, 2), "% of",
4685
+                "total IS in input)"
4686
+            )
4687
+        )
4688
+        rlang::warn(warn_miss, class = "warn_miss_genes")
4689
+    }
4690
+    if (res_checks$return_missing_as_df) {
4691
+        result$missing_genes <- missing_genes
4692
+        result$missing_is <- list(
4693
+            absolute = missing_is_tot,
4694
+            perc = round(
4695
+                (missing_is_tot / nrow(x)) * 100, 2
4696
+            )
4697
+        )
4698
+    }
4699
+    return(result)
4700
+}
4701
+
4522 4702
 # Internal computation of CIS_grubbs test
4523 4703
 .cis_grubb_calc <- function(x,
4524
-    refgenes,
4525 4704
     grubbs_flanking_gene_bp,
4526 4705
     threshold_alpha,
4527 4706
     gene_symbol_col,
... ...
@@ -4530,73 +4709,90 @@
4530 4709
     locus_col,
4531 4710
     strand_col) {
4532 4711
     ## -- Grouping by gene
4533
-    df_by_gene <- x %>%
4534
-        dplyr::group_by(
4535
-            dplyr::across(
4536
-                dplyr::all_of(c(gene_symbol_col, gene_strand_col, chr_col))
4537
-            )
4538
-        ) %>%
4539
-        dplyr::summarise(
4540
-            n_IS_perGene = dplyr::n_distinct(
4541
-                .data[[locus_col]]
4542
-            ),
4543
-            min_bp_integration_locus =
4544
-                min(.data[[locus_col]]),
4545
-            max_bp_integration_locus =
4546
-                max(.data[[locus_col]]),
4547
-            IS_span_bp = (max(.data[[locus_col]]) -
4548
-                min(.data[[locus_col]])),
4549
-            avg_bp_integration_locus =
4550
-                mean(.data[[locus_col]]),
4551
-            median_bp_integration_locus =
4552
-                stats::median(.data[[locus_col]]),
4553
-            distinct_orientations = dplyr::if_else(
4554
-                condition = is.null(strand_col),
4555
-                true = NA_integer_,
4556
-                false = dplyr::n_distinct(.data[[strand_col]])
4557
-            ),
4558
-            .groups = "drop"
4559
-        )
4560
-    ## --- Add describe if package available
4561
-    if (requireNamespace("psych", quietly = TRUE)) {
4562
-        desc <- x %>%
4712
+    df_by_gene <- if (requireNamespace("psych", quietly = TRUE)) {
4713
+        x %>%
4563 4714
             dplyr::group_by(
4564 4715
                 dplyr::across(
4565 4716
                     dplyr::all_of(c(gene_symbol_col, gene_strand_col, chr_col))
4566 4717
                 )
4567 4718
             ) %>%
4568 4719
             dplyr::summarise(
4569
-                describe = list(tibble::as_tibble(
4570
-                    psych::describe(.data[[locus_col]])
4571
-                )), .groups = "drop"
4720
+                n = dplyr::n(),
4721
+                mean = mean(.data[[locus_col]]),
4722
+                sd = stats::sd(.data[[locus_col]], na.rm = TRUE),
4723
+                median = stats::median(.data[[locus_col]]),
4724
+                trimmed = mean(.data[[locus_col]], trim = .1),
4725
+                mad = stats::mad(.data[[locus_col]]),
4726
+                min = min(.data[[locus_col]]),
4727
+                max = max(.data[[locus_col]]),
4728
+                range = max(.data[[locus_col]]) - min(.data[[locus_col]]),
4729
+                skew = psych::skew(.data[[locus_col]]),
4730
+                kurtosis = psych::kurtosi(.data[[locus_col]]),
4731
+                n_IS_perGene = dplyr::n_distinct(
4732
+                    .data[[locus_col]]
4733
+                ),
4734
+                min_bp_integration_locus =
4735
+                    min(.data[[locus_col]]),
4736
+                max_bp_integration_locus =
4737
+                    max(.data[[locus_col]]),
4738
+                IS_span_bp = (max(.data[[locus_col]]) -
4739
+                    min(.data[[locus_col]])),
4740
+                avg_bp_integration_locus =
4741
+                    mean(.data[[locus_col]]),
4742
+                median_bp_integration_locus =
4743
+                    stats::median(.data[[locus_col]]),
4744
+                distinct_orientations = dplyr::if_else(
4745
+                    condition = is.null(strand_col),
4746
+                    true = NA_integer_,
4747
+                    false = dplyr::n_distinct(.data[[strand_col]])
4748
+                ),
4749
+                average_TxLen = .data$average_TxLen[1],
4750
+                .groups = "drop"
4751
+            )
4752
+    } else {
4753
+        x %>%
4754
+            dplyr::group_by(
4755
+                dplyr::across(
4756
+                    dplyr::all_of(c(gene_symbol_col, gene_strand_col, chr_col))
4757
+                )
4572 4758
             ) %>%
4573
-            tidyr::unnest(cols = .data$describe)
4574
-        df_by_gene <- df_by_gene %>%
4575
-            dplyr::left_join(desc,
4576
-                by = c(gene_symbol_col, gene_strand_col, chr_col)
4759
+            dplyr::summarise(
4760
+                n = dplyr::n(),
4761
+                mean = mean(.data[[locus_col]]),
4762
+                sd = stats::sd(.data[[locus_col]], na.rm = TRUE),
4763
+                median = stats::median(.data[[locus_col]]),
4764
+                trimmed = mean(.data[[locus_col]], trim = .1),
4765
+                mad = stats::mad(.data[[locus_col]]),
4766
+                min = min(.data[[locus_col]]),
4767
+                max = max(.data[[locus_col]]),
4768
+                range = max(.data[[locus_col]]) - min(.data[[locus_col]]),
4769
+                n_IS_perGene = dplyr::n_distinct(
4770
+                    .data[[locus_col]]
4771
+                ),
4772
+                min_bp_integration_locus =
4773
+                    min(.data[[locus_col]]),
4774
+                max_bp_integration_locus =
4775
+                    max(.data[[locus_col]]),
4776
+                IS_span_bp = (max(.data[[locus_col]]) -
4777
+                    min(.data[[locus_col]])),
4778
+                avg_bp_integration_locus =
4779
+                    mean(.data[[locus_col]]),
4780
+                median_bp_integration_locus =
4781
+                    stats::median(.data[[locus_col]]),
4782
+                distinct_orientations = dplyr::if_else(
4783
+                    condition = is.null(strand_col),
4784
+                    true = NA_integer_,
4785
+                    false = dplyr::n_distinct(.data[[strand_col]])
4786
+                ),
4787
+                average_TxLen = .data$average_TxLen[1],
4788
+                .groups = "drop"
4577 4789
             )
4578 4790
     }
4579
-    df_bygene_withannotation <- df_by_gene %>%
4580
-        dplyr::inner_join(refgenes, by = setNames(
4581
-            object = c("chrom", "strand", "name2"),
4582
-            nm = c(chr_col, gene_strand_col, gene_symbol_col)
4583
-        )) %>%
4584
-        dplyr::select(c(
4585
-            dplyr::all_of(colnames(df_by_gene)),
4586
-            .data$average_TxLen
4587
-        ))
4588
-    missing_genes <- df_by_gene %>%
4589
-        dplyr::anti_join(df_bygene_withannotation,
4590
-            by = c(gene_symbol_col, gene_strand_col, chr_col)
4591
-        ) %>%
4592
-        dplyr::distinct(dplyr::across(dplyr::all_of(
4593
-            c(gene_symbol_col, gene_strand_col, chr_col)
4594
-        )))
4595 4791
 
4596
-    n_elements <- nrow(df_bygene_withannotation)
4792
+    n_elements <- nrow(df_by_gene)
4597 4793
     ### Grubbs test
4598 4794
     ### --- Gene Frequency
4599
-    df_bygene_withannotation <- df_bygene_withannotation %>%
4795
+    df_bygene_withannotation <- df_by_gene %>%
4600 4796
         dplyr::mutate(
4601 4797
             raw_gene_integration_frequency =
4602 4798
                 .data$n_IS_perGene / .data$average_TxLen,
... ...
@@ -4683,7 +4879,7 @@
4683 4879
                     NA
4684 4880
                 )
4685 4881
         )
4686
-    return(list(df = df_bygene_withannotation, missing = missing_genes))
4882
+    return(df_bygene_withannotation)
4687 4883
 }
4688 4884
 
4689 4885
 #---- USED IN : CIS_volcano_plot ----
... ...
@@ -4736,7 +4932,7 @@
4736 4932
             "Mus musculus (Mouse)"
4737 4933
         )
4738 4934
     )
4739
-    if (getOption("ISAnalytics.verbose") == TRUE) {
4935
+    if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
4740 4936
         load_onco_msg <- c(
4741 4937
             "Loading annotated genes -  species selected: ",
4742 4938
             paste(c(specie_name), collapse = ", ")
... ...
@@ -4747,7 +4943,7 @@
4747 4943
     onco_df <- .filter_db(onco_db, specie_name, "OncoGene")
4748 4944
     tumsup_df <- .filter_db(tumsup_db, specie_name, "TumorSuppressor")
4749 4945
     oncots_df_to_use <- .merge_onco_tumsup(onco_df, tumsup_df)
4750
-    if (getOption("ISAnalytics.verbose") == TRUE) {
4946
+    if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
4751 4947
         rlang::inform(c(
4752 4948
             "Loading annotated genes -  done"
4753 4949
         ))
... ...
@@ -4886,7 +5082,7 @@
4886 5082
                 env = rlang::caller_env(), nm = symbol_name,
4887 5083
                 value = flag_logic_new
4888 5084
             )
4889
-            if (getOption("ISAnalytics.verbose") == TRUE) {
5085
+            if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
4890 5086
                 flag_msg <- c(paste0(
4891 5087
                     "'", symbol_name,
4892 5088
                     "' has more elements than expected"
... ...
@@ -4906,7 +5102,7 @@
4906 5102
                 env = rlang::caller_env(), nm = symbol_name,
4907 5103
                 value = flag_logic_new
4908 5104
             )
4909
-            if (getOption("ISAnalytics.verbose") == TRUE) {
5105
+            if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
4910 5106
                 flag_logic_err <- c(paste(
4911 5107
                     "'", symbol_name, "' has an incorrect",
4912 5108
                     "amount of elements"
... ...
@@ -4957,7 +5153,7 @@
4957 5153
     log2_removed_report <- NULL
4958 5154
     if (log2) {
4959 5155
         ## discard values < = 0 and report removed
4960
-        if (getOption("ISAnalytics.verbose") == TRUE) {
5156
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
4961 5157
             rlang::inform("Log2 transformation, removing values <= 0")
4962 5158
         }
4963 5159
         old_meta <- meta
... ...
@@ -5832,7 +6028,7 @@
5832 6028
     # Check n and k
5833 6029
     if (nrow(temp) < n_comp) {
5834 6030
         n_comp <- nrow(temp)
5835
-        if (getOption("ISAnalytics.verbose") == TRUE) {
6031
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
5836 6032
             warn_msg <- c("Number of requested comparisons too big",
5837 6033
                 i = paste(
5838 6034
                     "The number of requested comparisons",
... ...
@@ -5844,7 +6040,7 @@
5844 6040
             rlang::inform(warn_msg)
5845 6041
         }
5846 6042
     }
5847
-    if (getOption("ISAnalytics.verbose") == TRUE) {
6043
+    if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
5848 6044
         rlang::inform("Calculating combinations...")
5849 6045
     }
5850 6046
     group_comb <- gtools::combinations(
... ...
@@ -5878,7 +6074,7 @@
5878 6074
 
5879 6075
     if (include_self_comp) {
5880 6076
         ## Calculate groups with equal components
5881
-        if (getOption("ISAnalytics.verbose") == TRUE) {
6077
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
5882 6078
             rlang::inform("Calculating self groups (requested)...")
5883 6079
         }
5884 6080
         self_rows <- purrr::map2_df(
... ...
@@ -5928,7 +6124,7 @@
5928 6124
         )
5929 6125
     }
5930 6126
     if (!minimal) {
5931
-        if (getOption("ISAnalytics.verbose") == TRUE) {
6127
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
5932 6128
             rlang::inform("Calculating permutations (requested)...")
5933 6129
         }
5934 6130
         sharing_df <- purrr::pmap_df(sharing_df, .sh_row_permut,
... ...
@@ -6023,7 +6219,7 @@
6023 6219
         )
6024 6220
     }
6025 6221
     if (!minimal) {
6026
-        if (getOption("ISAnalytics.verbose") == TRUE) {
6222
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
6027 6223
             rlang::inform("Calculating permutations (requested)...")
6028 6224
         }
6029 6225
         sharing_df <- purrr::pmap_df(sharing_df, .sh_row_permut,
... ...
@@ -6114,7 +6310,7 @@
6114 6310
         )
6115 6311
     }
6116 6312
     if (!minimal) {
6117
-        if (getOption("ISAnalytics.verbose") == TRUE) {
6313
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
6118 6314
             rlang::inform("Calculating permutations (requested)...")
6119 6315
         }
6120 6316
         sharing_df <- purrr::pmap_df(sharing_df, .sh_row_permut,
... ...
@@ -6201,7 +6397,7 @@
6201 6397
         )
6202 6398
     }
6203 6399
     if (!minimal) {
6204
-        if (getOption("ISAnalytics.verbose") == TRUE) {
6400
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
6205 6401
             rlang::inform("Calculating permutations (requested)...")
6206 6402
         }
6207 6403
         sharing_df <- purrr::pmap_df(sharing_df, .sh_row_permut,
... ...
@@ -6290,7 +6486,7 @@
6290 6486
             )
6291 6487
             rlang::abort(no_common_err)
6292 6488
         }
6293
-        if (getOption("ISAnalytics.verbose") == TRUE &&
6489
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE &&
6294 6490
             (length(common_names) < length(names(ref)) ||
6295 6491
                 length(common_names) < length(names(sel)))) {
6296 6492
             all_names <- union(names(ref), names(sel))
... ...
@@ -6313,13 +6509,13 @@
6313 6509
         if (.Platform$OS.type == "windows") {
6314 6510
             p <- BiocParallel::SnowParam(
6315 6511
                 tasks = length(common_names),
6316
-                progressbar = getOption("ISAnalytics.verbose"),
6512
+                progressbar = getOption("ISAnalytics.verbose", TRUE),
6317 6513
                 exportglobals = TRUE
6318 6514
             )
6319 6515
         } else {
6320 6516
             p <- BiocParallel::MulticoreParam(
6321 6517
                 tasks = length(common_names),
6322
-                progressbar = getOption("ISAnalytics.verbose"),
6518
+                progressbar = getOption("ISAnalytics.verbose", TRUE),
6323 6519
                 exportglobals = FALSE
6324 6520
             )
6325 6521
         }
... ...
@@ -138,10 +138,12 @@ outlier_filter <- function(metadata,
138 138
             test_args_names <- do.call(rlang::fn_fmls_names, args = list(test))
139 139
             test_args_names <- test_args_names[test_args_names != "metadata"]
140 140
             test_args <- if ("report_path" %in% test_args_names) {
141
-              append(vargs[names(vargs) %in% test_args_names],
142
-                     list(report_path = report_path))
141
+                append(
142
+                    vargs[names(vargs) %in% test_args_names],
143
+                    list(report_path = report_path)
144
+                )
143 145
             } else {
144
-              vargs[names(vargs) %in% test_args_names]
146
+                vargs[names(vargs) %in% test_args_names]
145 147
             }
146 148
             f_call <- rlang::call2(test, metadata = metadata, !!!test_args)
147 149
             result <- rlang::eval_tidy(f_call)
... ...
@@ -228,7 +230,8 @@ outlier_filter <- function(metadata,
228 230
             !c("to_remove")
229 231
         ]
230 232
     }
231
-    if (getOption("ISAnalytics.reports") == TRUE && !is.null(report_path)) {
233
+    if (getOption("ISAnalytics.reports", TRUE) == TRUE &&
234
+        !is.null(report_path)) {
232 235
         withCallingHandlers(
233 236
             {
234 237
                 .produce_report(
... ...
@@ -352,7 +355,7 @@ outliers_by_pool_fragments <- function(metadata,
352 355
     )
353 356
     .outlier_test_verify_logiop(key, flag_logic, "flag_logic")
354 357
     ## Cleaning
355
-    if (getOption("ISAnalytics.verbose") == TRUE) {
358
+    if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
356 359
         rlang::inform("Removing NAs from data...")
357 360
     }
358 361
     metadata <- data.table::setDT(metadata)
... ...
@@ -419,7 +422,8 @@ outliers_by_pool_fragments <- function(metadata,
419 422
     }
420 423
     calc_res[, c("to_remove") := to_rem]
421 424
     ## Produce report
422
-    if (getOption("ISAnalytics.reports") == TRUE && !is.null(report_path)) {
425
+    if (getOption("ISAnalytics.reports", TRUE) == TRUE &&
426
+        !is.null(report_path)) {
423 427
         withCallingHandlers(
424 428
             {
425 429
                 af_cols_specs <- data.table::setDT(
... ...
@@ -97,10 +97,10 @@ CIS_volcano_plot <- function(x,
97 97
         "neg_zscore_minus_log2_int_freq_tolerance"
98 98
     )
99 99
     cis_grubbs_df <- if (!all(min_cis_col %in% colnames(x))) {
100
-        if (getOption("ISAnalytics.verbose") == TRUE) {
100
+        if (getOption("ISAnalytics.verbose", TRUE) == TRUE) {
101 101
             rlang::inform("Calculating CIS_grubbs for x...")
102 102
         }
103
-        CIS_grubbs(x, return_missing_as_df = FALSE)
103
+        (CIS_grubbs(x, return_missing_as_df = FALSE))$cis
104 104
     } else {
105 105
         x
106 106
     }
... ...
@@ -274,6 +274,540 @@ clinical_relevant_suspicious_genes <- function() {
274 274
     )
275 275
 }
276 276
 
277
+#' Heatmaps for the top N common insertion sites over time.
278
+#'
279
+#' @description
280
+#' `r lifecycle::badge("experimental")`
281
+#' This function computes the visualization of the results of the function
282
+#' `CIS_grubbs_overtime()` in the form of heatmaps for the top N selected
283
+#' genes over time.
284
+#'
285
+#' @template genes_db
286
+#' @family Plotting functions
287
+#'
288
+#' @inheritParams CIS_volcano_plot
289
+#' @param x Output of the function `CIS_grubbs_overtime()`, either in single
290
+#' data frame form or nested lists
291
+#' @param n_genes Number of top genes to consider
292
+#' @param timepoint_col The name of the time point column in `x`
293
+#' @param group_col The name of the group column in `x`
294
+#' @param plot_values Which kind of values should be plotted? Can either be
295
+#' `"p"` for the p-value or `"minus_log_p"` for a scaled p-value of the
296
+#' Grubbs test
297
+#' @param p_value_correction One among `"bonferroni"` and `"fdr"`
298
+#' @param prune_tp_treshold Minimum number of genes to retain a time point.
299
+#' See details.
300
+#' @param gene_selection_param The descriptive statistic measure to decide
301
+#' which genes to plot, possible choices are
302
+#' `"trimmed", "n", "mean", "sd", "median","mad", "min", "max"`. See details.
303
+#' @param fill_0_selection Fill NA values with 0s before computing statistics
304
+#' for each gene? (TRUE/FALSE)
305
+#' @param fill_NA_in_heatmap Fill NA values with 0 when plotting the heatmap?
306
+#' (TRUE/FALSE)
307
+#' @param heatmap_color_palette Colors for values in the heatmaps,
308
+#' either `"default"` or a function producing
309
+#' a color palette, obtainable via `grDevices::colorRampPalette`.
310
+#' @param title_generator Either `NULL` or a function. See details.
311
+#' @param save_as_files Should heatmaps be saved to files on disk? (TRUE/FALSE)
312
+#' @param files_format The extension of the files produced, supported
313
+#' formats are `"pdf", "png", "tiff", "bmp", "jpg"`. Relevant only if
314
+#' `files_format = TRUE`
315
+#' @param folder_path Path to the folder where files will be saved
316
+#' @param ... Other params to pass to `pheatmap::pheatmap`
317
+#'
318
+#' @details
319
+#' ## Top N gene selection
320
+#' Since the genes present in different time point slices are likely different,
321
+#' the decision process to select the final top N genes to represent in the
322
+#' heatmap follows this logic:
323
+#'
324
+#' * Each time point slice is arranged either in ascending order (if we want to
325
+#' plot the p-value) or in descending order (if we want to plot the scaled
326
+#' p-value) and the top n genes are selected
327
+#' * A series of statistics are computed over the union set of genes on ALL
328
+#' time points (min, max, mean, ...)
329
+#' * A decision is taken by considering the ordered `gene_selection_param`
330
+#' (order depends once again if the values are scaled or not), and the first
331
+#' N genes are selected for plotting.
332
+#'
333
+#' ### Filling NA values prior calculations
334
+#' It is possible to fill NA values (aka missing combinations of GENE/TP) with
335
+#' 0s prior computing the descriptive statistics on which gene selection is
336
+#' based. Please keep in mind that this has an impact on the final result,
337
+#' since for computing metrics such as the mean, NA values are usually removed,
338
+#' decreasing the overall number of values considered - this does not hold
339
+#' when NA values are substituted with 0s.
340
+#'
341
+#' ### The statistics
342
+#' Statistics are computed for each gene over all time points of each group.
343
+#' More in detail, `n`: counts the number of instances (rows)
344
+#' in which the genes appears, aka it counts the time points in which the gene
345
+#' is present. NOTE: if
346
+#' `fill_0_selection` option is set to `TRUE` this value will be equal for
347
+#' all genes! All other statistics as per the argument `gene_selection_param`
348
+#' map to the corresponding R functions with the exception of `trimmed` which
349
+#' is a simple call to the `mean` function with the argument `trimmed = 0.1`.
350
+#'
351
+#' ## Aesthetics
352
+#' It is possible to customise the appearence of the plot through different
353
+#' parameters.
354
+#'
355
+#' * `fill_NA_in_heatmap` tells the function whether missing combinations of
356
+#' GENE/TP should be plotted as NA or filled with a value (1 if p-value, 0
357
+#' if scaled p-value)
358
+#' * A title generator function can be provided to dynamically create a title
359
+#' for the plots: the function can accept two positional arguments for
360
+#' the group identifier and the number of selected genes respectively. If one or
361
+#' none of the arguments are of interest, they can be absorbed with `...`.
362
+#' * `heatmap_color_palette` can be used to specify a function from which
363
+#' colors are sampled (refers to the colors of values only)
364
+#' * To change the colors associated with annotations instead, use the
365
+#' argument `annotation_colors` of `pheatmap::pheatmap()` - it must be set to a
366
+#' list with this format:
367
+#' ```
368
+#' list(
369
+#'   KnownGeneClass = c("OncoGene" = color_spec,
370
+#'                      "Other" = color_spec,
371
+#'                      "TumSuppressor" = color_spec),
372
+#'   ClinicalRelevance = c("TRUE" = color_spec,
373
+#'                         "FALSE" = color_spec),
374
+#'   CriticalForInsMut = c("TRUE" = color_spec,
375
+#'                         "FALSE" = color_spec)
376
+#' )
377
+#' ```
378
+#'
379
+#' @return Either a list of graphical objects or a list of paths where
380
+#' plots were saved
381
+#' @export
382
+#'
383
+#' @examples
384
+#' data("integration_matrices", package = "ISAnalytics")
385
+#' data("association_file", package = "ISAnalytics")
386
+#' aggreg <- aggregate_values_by_key(
387
+#'     x = integration_matrices,
388
+#'     association_file = association_file,
389
+#'     value_cols = c("seqCount", "fragmentEstimate")
390
+#' )
391
+#' cis_overtime <- CIS_grubbs_overtime(aggreg)
392
+#' hmaps <- top_cis_overtime_heatmap(cis_overtime$cis,
393
+#'     fill_NA_in_heatmap = TRUE
394
+#' )
395
+#'
396
+#' # To re-plot:
397
+#' # grid::grid.newpage()
398
+#' # grid::grid.draw(hmaps$PT001$gtable)
399
+top_cis_overtime_heatmap <- function(x,
400
+    n_genes = 20,
401
+    timepoint_col = "TimePoint",
402
+    group_col = "group",
403
+    onco_db_file = "proto_oncogenes",
404
+    tumor_suppressors_db_file = "tumor_suppressors",
405
+    species = "human",
406
+    known_onco = known_clinical_oncogenes(),
407
+    suspicious_genes =
408
+        clinical_relevant_suspicious_genes(),
409
+    significance_threshold = 0.05,
410
+    plot_values = c("minus_log_p", "p"),
411
+    p_value_correction = c("fdr", "bonferroni"),
412
+    prune_tp_treshold = 20,
413
+    gene_selection_param = c(
414
+        "trimmed", "n", "mean", "sd", "median",
415
+        "mad", "min", "max"
416
+    ),
417
+    fill_0_selection = TRUE,
418
+    fill_NA_in_heatmap = FALSE,
419
+    heatmap_color_palette = "default",
420
+    title_generator = NULL,
421
+    save_as_files = FALSE,
422
+    files_format = c("pdf", "png", "tiff", "bmp", "jpg"),
423
+    folder_path = NULL,
424
+    ...) {
425
+    # --- Preliminary checks
426
+    if (!requireNamespace("pheatmap", quietly = TRUE)) {
427
+        rlang::abort(.missing_pkg_error("pheatmap"))
428
+    }
429
+    stopifnot(is.list(x))
430
+    stopifnot(is.numeric(n_genes))
431
+    if (is.list(x) & !is.data.frame(x) & is.null(names(x))) {
432
+        err_named <- c("Input list must have names",
433
+            x = paste(
434
+                "Input should follow the output format of",
435
+                "`CIS_grubbs_overtime()`"
436
+            )
437
+        )
438
+        rlang::abort(err_named)
439
+    } else if (is.data.frame(x) &
440
+        !all(c(timepoint_col, group_col) %in% colnames(x))) {
441
+        err_df <- c("Input df is missing columns",
442
+            x = paste(
443
+                "Input should follow the output format of",
444
+                "`CIS_grubbs_overtime()`"
445
+            ),
446
+            x = paste(
447
+                "Time point column ('", timepoint_col,
448
+                "') and/or group column ('", group_col,
449
+                "') are missing"
450
+            )
451
+        )
452
+        rlang::abort(err_df)
453
+    }
454
+    p_value_correction <- rlang::arg_match(p_value_correction)
455
+    plot_values <- rlang::arg_match(plot_values)
456
+    values_to_plot <- if (plot_values == "p") {
457
+        paste0("tdist_", p_value_correction)
458
+    } else {
459
+        paste0("minus_log_p_", p_value_correction)
460
+    }
461
+    gene_selection_param <- rlang::arg_match(gene_selection_param)
462
+    stopifnot(is.numeric(prune_tp_treshold))
463
+    stopifnot(is.logical(fill_0_selection))
464
+    fill_0_selection <- fill_0_selection[1]
465
+    stopifnot(is.logical(fill_NA_in_heatmap))
466
+    fill_NA_in_heatmap <- fill_NA_in_heatmap[1]
467
+    stopifnot(is.logical(save_as_files))
468
+    save_as_files <- save_as_files[1]
469
+    stopifnot(is.null(folder_path) || is.character(folder_path))
470
+    if (is.character(folder_path[1])) {
471
+        fs::dir_create(folder_path[1])
472
+    }
473
+    stopifnot(
474
+        (is.character(heatmap_color_palette) &
475
+            heatmap_color_palette == "default") ||
476
+            is.function(heatmap_color_palette)
477
+    )
478
+    files_format <- rlang::arg_match(files_format)
479
+    if (save_as_files == TRUE & is.null(folder_path) &
480
+        getOption("ISAnalytics.verbose", TRUE) == TRUE) {
481
+        warn_msg <- c("Warning: you did not set a folder for saving files",
482
+            i = "Returning heatmaps as a list in R env"
483
+        )
484
+        rlang::inform(warn_msg, class = "no_fold_files")
485
+    }
486
+    stopifnot(is.null(title_generator) || is.function(title_generator))
487
+    dots <- rlang::dots_list(..., .named = TRUE)
488
+    # --- If input is in list form, convert in single df
489
+    if (is.list(x) & !is.data.frame(x)) {
490
+        x <- purrr::map2_df(x, names(x), ~ {
491
+            tmp <- .x
492
+            if (is.list(tmp) & !is.data.frame(tmp)) {
493
+                purrr::map2_df(
494
+                    tmp, names(tmp),
495
+                    ~ .x %>% dplyr::mutate(!!timepoint_col := .y)
496
+                ) %>%
497
+                    dplyr::mutate(!!group_col := .y)
498
+            } else if (is.data.frame(tmp)) {
499
+                temp %>% dplyr::mutate(!!group_col := .y)
500
+            } else {
501
+                non_list_el <- c("Element is not list or df",
502
+                    x = paste(
503
+                        "Element", .y, "in x is not a list or a",
504
+                        "data frame"
505
+                    )
506
+                )
507
+                rlang::abort(non_list_el)
508
+            }
509
+        })
510
+    }
511
+    gene_sym_col <- annotation_IS_vars(TRUE) %>%
512
+        dplyr::filter(.data$tag == "gene_symbol") %>%
513
+        dplyr::pull(.data$names)
514
+    # --- Expand the df with gene info
515
+    expanded <- .expand_cis_df(
516
+        x, gene_sym_col,
517
+        onco_db_file, tumor_suppressors_db_file,
518
+        species, known_onco, suspicious_genes
519
+    )
520
+    # --- Add log conversions if needed
521
+    if (plot_values == "minus_log_p") {
522
+        expanded <- expanded %>%
523
+            dplyr::mutate(
524
+                minus_log_p_bonferroni = -log(.data$tdist_bonferroni,
525
+                    base = 10
526
+                ),
527
+                minus_log_p_fdr = -log(.data$tdist_fdr, base = 10)
528
+            )
529
+    }
530
+    # --- For each combo (group, tp) arrange and slice the top n
531
+    arrange_slice_top <- function(group_df, ...) {
532
+        if (nrow(group_df) < prune_tp_treshold) {
533
+            return(NULL)
534
+        }
535
+        if (plot_values == "p") {
536
+            return(
537
+                group_df %>%
538
+                    dplyr::arrange(.data[[values_to_plot]]) %>%
539
+                    dplyr::slice_head(n = n_genes)
540
+            )
541
+        } else {
542
+            return(
543
+                group_df %>%
544
+                    dplyr::arrange(dplyr::desc(.data[[values_to_plot]])) %>%
545
+                    dplyr::slice_head(n = n_genes)
546
+            )
547
+        }
548
+    }
549
+    slice_groups_tps <- expanded %>%
550
+        dplyr::select(
551
+            dplyr::all_of(c(
552
+                gene_sym_col, timepoint_col,
553
+                group_col, values_to_plot
554
+            ))
555
+        ) %>%
556
+        dplyr::group_by(dplyr::across(dplyr::all_of(
557
+            c(group_col, timepoint_col)
558
+        ))) %>%
559
+        dplyr::group_modify(.f = arrange_slice_top) %>%
560
+        dplyr::ungroup()
561
+
562
+    groups <- unique(slice_groups_tps[[group_col]])
563
+
564
+    # --- Evaluate statistics for genes in top n slices
565
+    eval_candidates <- function(group_id) {
566
+        df <- slice_groups_tps %>%
567
+            dplyr::filter(.data[[group_col]] == group_id) %>%
568
+            dplyr::select(-.data[[group_col]])
569
+        if (fill_0_selection == TRUE) {
570
+            value_fill <- list(0)
571
+            names(value_fill) <- values_to_plot
572
+            df <- df %>%
573
+                tidyr::complete(.data[[timepoint_col]], .data[[gene_sym_col]],
574
+                    fill = value_fill
575
+                )
576
+        }
577
+        df %>%
578
+            dplyr::group_by(dplyr::across(dplyr::all_of(gene_sym_col))) %>%
579
+            dplyr::summarise(
580
+                n = dplyr::n(),
581
+                mean = mean(.data[[values_to_plot]], na.rm = TRUE),
582
+                sd = stats::sd(.data[[values_to_plot]], na.rm = TRUE),
583
+                median = stats::median(.data[[values_to_plot]], na.rm = TRUE),
584
+                trimmed = mean(.data[[values_to_plot]],
585
+                    na.rm = TRUE,
586
+                    trim = 0.1
587
+                ),
588
+                mad = stats::mad(.data[[values_to_plot]], na.rm = TRUE),
589
+                min = min(.data[[values_to_plot]], na.rm = TRUE),
590
+                max = max(.data[[values_to_plot]], na.rm = TRUE),
591
+                .groups = "drop"
592
+            )
593
+    }
594
+    candidates <- purrr::map(groups, eval_candidates) %>%
595
+        purrr::set_names(groups)
596
+
597
+    # --- Select from candidates according to gene_selection_param
598
+    select_from_candidates <- function(group_df) {
599
+        if (gene_selection_param == "n") {
600
+            return(
601
+                group_df %>%
602
+                    dplyr::arrange(dplyr::desc(.data$n)) %>%
603
+                    dplyr::slice_head(n = n_genes) %>%
604
+                    dplyr::pull(.data[[gene_sym_col]])
605
+            )
606
+        }
607
+        if (plot_values == "p") {
608
+            ## Order ascending
609
+            return(
610
+                group_df %>%
611
+                    dplyr::arrange(.data[[gene_selection_param]]) %>%
612
+                    dplyr::slice_head(n = n_genes) %>%
613
+                    dplyr::pull(.data[[gene_sym_col]])
614
+            )
615
+        } else {
616
+            ## Order descending
617
+            return(
618
+                group_df %>%
619
+                    dplyr::arrange(dplyr::desc(
620
+                        .data[[gene_selection_param]]
621
+                    )) %>%
622
+                    dplyr::slice_head(n = n_genes) %>%
623
+                    dplyr::pull(.data[[gene_sym_col]])
624
+            )
625
+        }
626
+    }
627
+    gene_selection <- purrr::map(candidates, select_from_candidates)
628
+
629
+    # --- Extract only relevant genes from input
630
+    genes_to_map <- purrr::map(groups, ~ expanded %>%
631
+        dplyr::filter(group == .x) %>%
632
+        dplyr::filter(.data[[timepoint_col]] %in%
633
+            unique((slice_groups_tps %>%
634
+                dplyr::filter(group == .x))[[timepoint_col]])) %>%
635
+        dplyr::filter(.data[[gene_sym_col]] %in%
636
+            gene_selection[[.x]]) %>%
637
+        dplyr::select(
638
+            dplyr::all_of(c(
639
+                gene_sym_col, timepoint_col,
640
+                group_col, values_to_plot
641
+            )),
642
+            .data$CriticalForInsMut, .data$KnownGeneClass,
643
+            .data$ClinicalRelevance
644
+        )) %>%