Browse code

[UPDATE] Upgrade to v 1.1.11

Giulia Pais authored on 11/05/2021 12:50:41
Showing51 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: ISAnalytics
2 2
 Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies
3
-Version: 1.1.10
3
+Version: 1.1.11
4 4
 Date: 2020-07-03
5 5
 Authors@R: c(
6 6
   person(given = "Andrea",
... ...
@@ -32,7 +32,6 @@ Imports:
32 32
     tidyr,
33 33
     purrr,
34 34
     rlang,
35
-    forcats,
36 35
     tibble,
37 36
     BiocParallel,
38 37
     stringr,
... ...
@@ -48,7 +47,9 @@ Imports:
48 47
     grDevices,
49 48
     data.table,
50 49
     readxl,
51
-    tools
50
+    tools,
51
+    Rcapture,
52
+    plotly
52 53
 Encoding: UTF-8
53 54
 LazyData: false
54 55
 Roxygen: list(markdown = TRUE)
... ...
@@ -63,7 +64,8 @@ Suggests:
63 64
     rmarkdown,
64 65
     roxygen2,
65 66
     vegan,
66
-    withr
67
+    withr,
68
+    extraDistr
67 69
 VignetteBuilder: knitr
68 70
 RdMacros: 
69 71
     lifecycle
... ...
@@ -2,11 +2,15 @@
2 2
 
3 3
 export(CIS_grubbs)
4 4
 export(CIS_volcano_plot)
5
+export(HSC_population_plot)
6
+export(HSC_population_size_estimate)
5 7
 export(aggregate_metadata)
6 8
 export(aggregate_values_by_key)
7 9
 export(annotation_IS_vars)
8 10
 export(as_sparse_matrix)
9 11
 export(association_file_columns)
12
+export(available_outlier_tests)
13
+export(blood_lineages_default)
10 14
 export(clinical_relevant_suspicious_genes)
11 15
 export(comparison_matrix)
12 16
 export(compute_abundance)
... ...
@@ -14,9 +18,12 @@ export(compute_near_integrations)
14 18
 export(cumulative_count_union)
15 19
 export(date_columns_coll)
16 20
 export(date_formats)
21
+export(default_iss_file_prefixes)
22
+export(default_meta_agg)
17 23
 export(default_stats)
18 24
 export(generate_Vispa2_launch_AF)
19 25
 export(generate_blank_association_file)
26
+export(import_Vispa2_stats)
20 27
 export(import_association_file)
21 28
 export(import_parallel_Vispa2Matrices_auto)
22 29
 export(import_parallel_Vispa2Matrices_interactive)
... ...
@@ -24,6 +31,8 @@ export(import_single_Vispa2Matrix)
24 31
 export(known_clinical_oncogenes)
25 32
 export(mandatory_IS_vars)
26 33
 export(matching_options)
34
+export(outlier_filter)
35
+export(outliers_by_pool_fragments)
27 36
 export(quantification_types)
28 37
 export(realign_after_collisions)
29 38
 export(reduced_AF_columns)
... ...
@@ -36,6 +45,7 @@ export(unzip_file_system)
36 45
 import(BiocParallel)
37 46
 import(dplyr)
38 47
 import(ggplot2)
48
+import(htmltools)
39 49
 import(lifecycle)
40 50
 import(upsetjs)
41 51
 importFrom(BiocParallel,MulticoreParam)
... ...
@@ -44,6 +54,8 @@ importFrom(BiocParallel,bplapply)
44 54
 importFrom(BiocParallel,bpok)
45 55
 importFrom(BiocParallel,bpstop)
46 56
 importFrom(BiocParallel,bptry)
57
+importFrom(Rcapture,closedp.0)
58
+importFrom(Rcapture,closedp.bc)
47 59
 importFrom(data.table,fread)
48 60
 importFrom(data.table,melt.data.table)
49 61
 importFrom(data.table,rbindlist)
... ...
@@ -60,13 +72,12 @@ importFrom(dplyr,group_by)
60 72
 importFrom(dplyr,group_split)
61 73
 importFrom(dplyr,inner_join)
62 74
 importFrom(dplyr,intersect)
75
+importFrom(dplyr,left_join)
63 76
 importFrom(dplyr,mutate)
64 77
 importFrom(dplyr,rename)
65 78
 importFrom(dplyr,select)
66 79
 importFrom(dplyr,semi_join)
67 80
 importFrom(dplyr,slice)
68
-importFrom(forcats,as_factor)
69
-importFrom(forcats,fct_inseq)
70 81
 importFrom(fs,as_fs_path)
71 82
 importFrom(fs,dir_create)
72 83
 importFrom(fs,dir_exists)
... ...
@@ -91,13 +102,18 @@ importFrom(htmltools,tagList)
91 102
 importFrom(htmltools,tags)
92 103
 importFrom(lubridate,parse_date_time)
93 104
 importFrom(magrittr,`%>%`)
105
+importFrom(plotly,plot_ly)
94 106
 importFrom(psych,describe)
107
+importFrom(purrr,cross_df)
108
+importFrom(purrr,detect_index)
95 109
 importFrom(purrr,flatten)
110
+importFrom(purrr,flatten_chr)
96 111
 importFrom(purrr,is_empty)
97 112
 importFrom(purrr,is_formula)
98 113
 importFrom(purrr,is_function)
99 114
 importFrom(purrr,map)
100 115
 importFrom(purrr,map2)
116
+importFrom(purrr,map2_chr)
101 117
 importFrom(purrr,map2_dfr)
102 118
 importFrom(purrr,map2_lgl)
103 119
 importFrom(purrr,map_chr)
... ...
@@ -108,6 +124,7 @@ importFrom(purrr,pmap)
108 124
 importFrom(purrr,pmap_chr)
109 125
 importFrom(purrr,pmap_df)
110 126
 importFrom(purrr,pmap_dfr)
127
+importFrom(purrr,pmap_lgl)
111 128
 importFrom(purrr,reduce)
112 129
 importFrom(purrr,set_names)
113 130
 importFrom(purrr,walk)
... ...
@@ -126,25 +143,33 @@ importFrom(rlang,.data)
126 143
 importFrom(rlang,`:=`)
127 144
 importFrom(rlang,abort)
128 145
 importFrom(rlang,arg_match)
146
+importFrom(rlang,as_function)
129 147
 importFrom(rlang,call2)
130 148
 importFrom(rlang,dots_list)
149
+importFrom(rlang,enexpr)
131 150
 importFrom(rlang,env_bind)
132 151
 importFrom(rlang,eval_tidy)
152
+importFrom(rlang,exec)
133 153
 importFrom(rlang,expr)
134 154
 importFrom(rlang,fn_fmls_names)
135 155
 importFrom(rlang,inform)
156
+importFrom(rlang,is_formula)
157
+importFrom(rlang,is_function)
158
+importFrom(rlang,list2)
136 159
 importFrom(rlang,parse_expr)
160
+importFrom(stats,dt)
137 161
 importFrom(stats,median)
138 162
 importFrom(stats,na.omit)
139 163
 importFrom(stats,p.adjust)
140 164
 importFrom(stats,pt)
165
+importFrom(stats,setNames)
166
+importFrom(stats,shapiro.test)
141 167
 importFrom(stringr,str_detect)
142
-importFrom(stringr,str_extract)
143
-importFrom(stringr,str_extract_all)
144 168
 importFrom(stringr,str_pad)
145 169
 importFrom(stringr,str_replace)
146 170
 importFrom(stringr,str_replace_all)
147 171
 importFrom(stringr,str_split)
172
+importFrom(stringr,str_to_upper)
148 173
 importFrom(tibble,add_column)
149 174
 importFrom(tibble,add_row)
150 175
 importFrom(tibble,as_tibble)
... ...
@@ -153,12 +178,12 @@ importFrom(tibble,as_tibble_row)
153 178
 importFrom(tibble,is_tibble)
154 179
 importFrom(tibble,tibble)
155 180
 importFrom(tibble,tibble_row)
181
+importFrom(tibble,tribble)
156 182
 importFrom(tidyr,everything)
157 183
 importFrom(tidyr,nest)
158 184
 importFrom(tidyr,pivot_longer)
159 185
 importFrom(tidyr,pivot_wider)
160 186
 importFrom(tidyr,separate)
161
-importFrom(tidyr,unite)
162 187
 importFrom(tidyr,unnest)
163 188
 importFrom(tools,file_path_sans_ext)
164 189
 importFrom(utils,read.csv)
... ...
@@ -1,5 +1,35 @@
1 1
 \title{ISAnalytics News}
2 2
 
3
+# ISAnalytics 1.1.11 (2021-05-11)
4
+
5
+## NEW FUNCTIONALITY
6
+
7
+* `HSC_population_size_estimate` and `HSC_population_plot` allow estimates
8
+on hematopoietic stem cell population size
9
+* Importing of Vispa2 stats per pool now has a dedicated function,
10
+`import_Vispa2_stats`
11
+* `outlier_filter` and `outliers_by_pool_fragments` offer a mean to filter
12
+poorly represented samples based on custom outliers tests
13
+
14
+## VISIBLE USER CHANGES
15
+
16
+* The argument `import_stats` of `aggregate_metadata` is officially deprecated
17
+in favor of `import_Vispa2_stats`
18
+* `aggregate_metadata` is now a lot more flexible on what operations can be
19
+performed on columns via the new argument `aggregating_functions`
20
+* `import_association_file` allows directly for the import of Vispa2 stats
21
+and converts time points to months and years where not already present
22
+* File system alignment of `import_association_file` now produces 3 separate
23
+columns for paths
24
+* `separate_quant_matrices` and `comparison_matrix` now do not require 
25
+mandatory columns other than the quantifications - this allows for separation
26
+or joining also for aggregated matrices
27
+
28
+## FIXES
29
+
30
+* Fixed a minor issue in `CIS_volcano_plot` that caused duplication of some
31
+labels if highlighted genes were provided in input
32
+
3 33
 # ISAnalytics 1.1.10 (2021-04-08)
4 34
 
5 35
 ## FIXES
... ...
@@ -26,6 +26,7 @@
26 26
 #' * Import functions:
27 27
 #'   * \code{\link{import_single_Vispa2Matrix}}
28 28
 #'   * \code{\link{import_association_file}}
29
+#'   * \code{\link{import_Vispa2_stats}}
29 30
 #'   * \code{\link{import_parallel_Vispa2Matrices_interactive}}
30 31
 #'   * \code{\link{import_parallel_Vispa2Matrices_auto}}
31 32
 #' * Aggregation functions:
... ...
@@ -34,6 +35,9 @@
34 35
 #' * Collision removal functions:
35 36
 #'   * \code{\link{remove_collisions}}
36 37
 #'   * \code{\link{realign_after_collisions}}
38
+#' * Removal of outliers from raw reads
39
+#'   * \code{\link{outlier_filter}}
40
+#'   * \code{\link{outliers_by_pool_fragments}}
37 41
 #' * Recalibration functions:
38 42
 #'   * \code{\link{compute_near_integrations}}
39 43
 #' * Analysis functions:
... ...
@@ -45,8 +49,11 @@
45 49
 #'   * \code{\link{sample_statistics}}
46 50
 #'   * \code{\link{CIS_grubbs}}
47 51
 #'   * \code{\link{cumulative_count_union}}
52
+#' * HSC population size estimate:
53
+#'   * \code{\link{HSC_population_size_estimate}}
48 54
 #' * Plotting functions:
49 55
 #'   * \code{\link{CIS_volcano_plot}}
56
+#'   * \code{\link{HSC_population_plot}}
50 57
 #' * Utility functions:
51 58
 #'   * \code{\link{generate_blank_association_file}}
52 59
 #'   * \code{\link{generate_Vispa2_launch_AF}}
... ...
@@ -4,20 +4,28 @@
4 4
 
5 5
 #' Performs aggregation on metadata contained in the association file.
6 6
 #'
7
-#' \lifecycle{experimental}
8
-#' Groups metadata by grouping_keys and returns a summary of info for each
9
-#' group. For more details on how to use this function:
7
+#' \lifecycle{maturing}
8
+#' Groups metadata by the specified grouping keys and returns a
9
+#' summary of info for each group. For more details on how to use this function:
10 10
 #' \code{vignette("Working with aggregate functions", package = "ISAnalytics")}
11 11
 #'
12 12
 #' @param association_file The imported association file
13
-#' (via `import_association_file`)
13
+#' (via link{import_association_file})
14 14
 #' @param grouping_keys A character vector of column names to form a group
15
-#' @param import_stats Should Vispa2 stats files be imported and included?
15
+#' @param aggregating_functions A data frame containing specifications
16
+#' of the functions to be applied to columns in the association file during
17
+#' aggregation. It defaults to \link{default_meta_agg}. The structure of
18
+#' this data frame should be maintained if the user wishes to change the
19
+#' defaults.
20
+#' @param import_stats `r lifecycle::badge("deprecated")` The import
21
+#' of VISPA2 stats has been moved to its dedicated function,
22
+#' see \link{import_Vispa2_stats}.
16 23
 #' @family Aggregate functions
24
+#' @importFrom rlang abort inform
17 25
 #' @importFrom purrr is_empty
18
-#' @importFrom tibble is_tibble
26
+#' @import lifecycle
19 27
 #'
20
-#' @return A tibble
28
+#' @return An aggregated data frame
21 29
 #' @export
22 30
 #'
23 31
 #' @examples
... ...
@@ -30,7 +38,7 @@
30 38
 #' association_file <- import_association_file(path_AF, root_correct,
31 39
 #'     dates_format = "dmy"
32 40
 #' )
33
-#' aggregated_meta <- aggregate_metadata(association_file, import_stats = FALSE)
41
+#' aggregated_meta <- aggregate_metadata(association_file)
34 42
 #' options(op)
35 43
 aggregate_metadata <- function(association_file,
36 44
     grouping_keys = c(
... ...
@@ -39,88 +47,99 @@ aggregate_metadata <- function(association_file,
39 47
         "Tissue",
40 48
         "TimePoint"
41 49
     ),
42
-    import_stats = TRUE) {
50
+    aggregating_functions = default_meta_agg(),
51
+    import_stats = lifecycle::deprecated()
52
+    ) {
43 53
     # Check parameters
44
-    stopifnot(tibble::is_tibble(association_file))
45
-    min_missing <- setdiff(.min_var_set(), colnames(association_file))
46
-    if (!purrr::is_empty(min_missing)) {
47
-        stop(paste(c(
48
-            "Association file is missing some of the mandatory columns:",
49
-            min_missing
50
-        ), collapse = "\n"))
51
-    }
54
+    stopifnot(is.data.frame(association_file))
52 55
     stopifnot(!is.null(grouping_keys))
53 56
     stopifnot(is.character(grouping_keys))
54
-    keys_missing <- setdiff(grouping_keys, colnames(association_file))
57
+    keys_missing <- grouping_keys[!grouping_keys %in%
58
+                                      colnames(association_file)]
55 59
     if (!purrr::is_empty(keys_missing)) {
56
-        stop(paste(c(
57
-            "Some of the grouping keys you provided were not found:",
58
-            keys_missing
59
-        ), collapse = "\n"))
60
+        rlang::abort(.missing_user_cols_error(keys_missing))
60 61
     }
61
-    stopifnot(is.logical(import_stats) & length(import_stats) == 1)
62
-    # Import if true
63
-    stats <- NULL
64
-    if (import_stats == TRUE) {
65
-        stats <- .import_stats_iss(association_file)
66
-        if (is.null(stats)) {
67
-            if (getOption("ISAnalytics.verbose") == TRUE) {
68
-                message(paste("No Vispa2 stats files found for import,
69
-                    ignoring this step"))
70
-            }
71
-        } else {
72
-            if (getOption("ISAnalytics.widgets") == TRUE) {
73
-                withCallingHandlers(
74
-                    {
75
-                        withRestarts(
76
-                            {
77
-                                report <- stats[[2]]
78
-                                stats <- stats[[1]]
79
-                                widg <- .iss_import_widget(report)
80
-                                print(widg)
81
-                            },
82
-                            print_err = function(cnd) {
83
-                                message(.widgets_error())
84
-                                if (getOption("ISAnalytics.verbose") == TRUE) {
85
-                                    print(paste0(
86
-                                        "--- REPORT IMPORT VISPA2",
87
-                                        "STATS: FILES IMPORTED ---"
88
-                                    ))
89
-                                    print(stats[[2]],
90
-                                        width = Inf,
91
-                                        n = nrow(stats[[2]])
92
-                                    )
93
-                                }
94
-                            }
95
-                        )
96
-                    },
97
-                    error = function(cnd) {
98
-                        message(conditionMessage(cnd))
99
-                        invokeRestart("print_err")
100
-                    }
101
-                )
102
-            } else {
103
-                if (getOption("ISAnalytics.verbose") == TRUE) {
104
-                    print(paste0(
105
-                        "--- REPORT IMPORT VISPA2",
106
-                        "STATS: FILES IMPORTED ---"
107
-                    ))
108
-                    print(stats[[2]],
109
-                        width = Inf,
110
-                        n = nrow(stats[[2]])
111
-                    )
112
-                }
113
-                stats <- stats[[1]]
114
-            }
115
-        }
62
+    if (lifecycle::is_present(import_stats)) {
63
+        lifecycle::deprecate_warn(
64
+            when = "1.1.11",
65
+            what = "aggregate_metadata(import_stats)",
66
+            details = c("Import Vispa2 stats functionality moved",
67
+                        i = paste("Please use `import_Vispa2_stats()`",
68
+                                  "or",
69
+                                  "`import_association_file(import_iss = TRUE)`",
70
+                                  "instead."))
71
+        )
72
+    }
73
+    aggregated <- .aggregate_meta(association_file = association_file,
74
+                                  grouping_keys = grouping_keys,
75
+                                  function_tbl = aggregating_functions)
76
+    if (is.null(aggregated)) {
77
+        rlang::inform(paste("No columns in `aggregating_functions$Column`",
78
+                            "was found in column names of the association",
79
+                            "file. Nothing to return."))
116 80
     }
117
-    aggregated <- .join_and_aggregate(association_file, stats, grouping_keys)
118 81
     aggregated
119 82
 }
120 83
 
84
+
85
+#' Default metadata aggregation function table
86
+#'
87
+#' A default columns-function specifications for \link{aggregate_metadata}
88
+#'
89
+#' @details
90
+#' This data frame contains four columns:
91
+#'
92
+#' * `Column`: holds the name of the column in the association file that
93
+#' should be processed
94
+#' * `Function`: contains either the name of a function (e.g. mean)
95
+#' or a purrr-style lambda (e.g. `~ mean(.x, na.rm = TRUE)`). This function
96
+#' will be applied to the corresponding column specified in `Column`
97
+#' * `Args`: optional additional arguments to pass to the corresponding
98
+#' function. This is relevant ONLY if the corresponding `Function` is a
99
+#' simple function and not a purrr-style lambda.
100
+#' * `Output_colname`: a `glue` specification that will be used to determine
101
+#' a unique output column name. See \link[glue]{glue} for more details.
102
+#'
103
+#' @importFrom tibble tribble
104
+#' @return A data frame
105
+#' @family Aggregate functions
106
+#' @export
107
+#'
108
+#' @examples
109
+#' default_meta_agg()
110
+default_meta_agg <- function() {
111
+    tibble::tribble(
112
+        ~ Column, ~ Function, ~ Args, ~ Output_colname,
113
+        "FusionPrimerPCRDate", ~ suppressWarnings(min(.x, na.rm = TRUE)),
114
+        NA, "{.col}_min",
115
+        "LinearPCRDate", ~ suppressWarnings(min(.x, na.rm = TRUE)),
116
+        NA, "{.col}_min",
117
+        "VCN", ~ suppressWarnings(mean(.x, na.rm = TRUE)),
118
+        NA, "{.col}_avg",
119
+        "ng DNA corrected", ~ suppressWarnings(mean(.x, na.rm = TRUE)),
120
+        NA, "{.col}_avg",
121
+        "Kapa", ~ suppressWarnings(mean(.x, na.rm = TRUE)),
122
+        NA, "{.col}_avg",
123
+        "ng DNA corrected", ~ sum(.x, na.rm = TRUE),
124
+        NA, "{.col}_sum",
125
+        "ulForPool", ~ sum(.x, na.rm = TRUE),
126
+        NA, "{.col}_sum",
127
+        "BARCODE_MUX", ~ sum(.x, na.rm = TRUE),
128
+        NA, "{.col}_sum",
129
+        "TRIMMING_FINAL_LTRLC", ~ sum(.x, na.rm = TRUE),
130
+        NA, "{.col}_sum",
131
+        "LV_MAPPED", ~ sum(.x, na.rm = TRUE),
132
+        NA, "{.col}_sum",
133
+        "BWA_MAPPED_OVERALL", ~ sum(.x, na.rm = TRUE),
134
+        NA, "{.col}_sum",
135
+        "ISS_MAPPED_PP", ~ sum(.x, na.rm = TRUE),
136
+        NA, "{.col}_sum"
137
+    )
138
+}
139
+
121 140
 #' Aggregates matrices values based on specified key.
122 141
 #'
123
-#' \lifecycle{experimental}
142
+#' \lifecycle{maturing}
124 143
 #' Performs aggregation on values contained in the integration matrices based
125 144
 #' on the key and the specified lambda. For more details on how to use this
126 145
 #' function:
... ...
@@ -177,12 +196,16 @@ aggregate_metadata <- function(association_file,
177 196
 #' See details section.
178 197
 #' @param group Other variables to include in the grouping besides `key`,
179 198
 #' can be set to NULL
199
+#' @param join_af_by A character vector representing the joining key
200
+#' between the matrix and the metadata. Useful to re-aggregate already
201
+#' aggregated matrices.
180 202
 #' @family Aggregate functions
181 203
 #'
182
-#' @importFrom purrr walk
204
+#' @importFrom purrr walk set_names map_lgl
183 205
 #' @importFrom rlang expr eval_tidy abort
184 206
 #'
185
-#' @return A list of tibbles or a single tibble according to input
207
+#' @return A list of tibbles or a single tibble aggregated according to
208
+#' the specified arguments
186 209
 #' @export
187 210
 #'
188 211
 #' @examples
... ...
@@ -219,7 +242,8 @@ aggregate_values_by_key <- function(x,
219 242
     group = c(
220 243
         mandatory_IS_vars(),
221 244
         annotation_IS_vars()
222
-    )) {
245
+    ),
246
+    join_af_by = "CompleteAmplificationID") {
223 247
     stopifnot(is.data.frame(x) || is.list(x))
224 248
     if (!is.data.frame(x)) {
225 249
         purrr::walk(x, function(df) {
... ...
@@ -227,8 +251,13 @@ aggregate_values_by_key <- function(x,
227 251
             if (.check_mandatory_vars(df) == FALSE) {
228 252
                 rlang::abort(.non_ISM_error())
229 253
             }
230
-            if (.check_complAmpID(df) == FALSE) {
231
-                rlang::abort(.missing_complAmpID_error())
254
+            if (!all(join_af_by %in% colnames(df))) {
255
+                rlang::abort(c(x = paste("Missing common columns",
256
+                                         "to join metadata"),
257
+                               i = paste("Missing: ",
258
+                                         paste0(join_af_by[!join_af_by %in%
259
+                                                               colnames(df)],
260
+                                                collapse = ", "))))
232 261
             }
233 262
             if (!all(value_cols %in% colnames(df))) {
234 263
                 rlang::abort(.missing_user_cols_error(
... ...
@@ -253,8 +282,13 @@ aggregate_values_by_key <- function(x,
253 282
         if (.check_mandatory_vars(x) == FALSE) {
254 283
             rlang::abort(.non_ISM_error())
255 284
         }
256
-        if (.check_complAmpID(x) == FALSE) {
257
-            rlang::abort(.missing_complAmpID_error())
285
+        if (!all(join_af_by %in% colnames(x))) {
286
+            rlang::abort(c(x = paste("Missing common columns",
287
+                                     "to join metadata"),
288
+                           i = paste("Missing: ",
289
+                                     paste0(join_af_by[!join_af_by %in%
290
+                                                           colnames(x)],
291
+                                            collapse = ", "))))
258 292
         }
259 293
         if (!all(value_cols %in% colnames(x))) {
260 294
             rlang::abort(.missing_user_cols_error(
... ...
@@ -280,7 +314,7 @@ aggregate_values_by_key <- function(x,
280 314
     # Check key
281 315
     stopifnot(is.character(key))
282 316
     if (!all(key %in% colnames(association_file))) {
283
-        stop("Key fields are missing from association file")
317
+        rlang::abort(c(x = "Key fields are missing from association file"))
284 318
     }
285 319
     # Check lambda
286 320
     stopifnot(is.list(lambda))
... ...
@@ -288,24 +322,24 @@ aggregate_values_by_key <- function(x,
288 322
     stopifnot(is.character(group) | is.null(group))
289 323
     if (is.data.frame(x)) {
290 324
         if (!all(group %in% c(colnames(association_file), colnames(x)))) {
291
-            stop(paste("Grouping variables not found"))
325
+            rlang::abort(paste("Grouping variables not found"))
292 326
         }
293 327
     } else {
294 328
         purrr::walk(x, function(df) {
295 329
             if (!all(group %in% c(colnames(association_file), colnames(df)))) {
296
-                stop(paste("Grouping variables not found"))
330
+                rlang::abort(paste("Grouping variables not found"))
297 331
             }
298 332
         })
299 333
     }
300 334
     if (is.data.frame(x)) {
301 335
         x <- list(x)
302 336
         agg_matrix <- .aggregate_lambda(
303
-            x, association_file, key, value_cols, lambda, group
337
+            x, association_file, key, value_cols, lambda, group, join_af_by
304 338
         )
305 339
         return(agg_matrix[[1]])
306 340
     }
307 341
     agg_matrix <- .aggregate_lambda(
308
-        x, association_file, key, value_cols, lambda, group
342
+        x, association_file, key, value_cols, lambda, group, join_af_by
309 343
     )
310 344
     agg_matrix
311 345
 }
... ...
@@ -194,14 +194,6 @@ comparison_matrix <- function(x,
194 194
     ShsCount = "ShsCount") {
195 195
     stopifnot(is.list(x) & !is.data.frame(x))
196 196
     stopifnot(all(names(x) %in% quantification_types()))
197
-    purrr::walk(x, function(m) {
198
-        mand <- .check_mandatory_vars(m)
199
-        amp <- .check_complAmpID(m)
200
-        val <- .check_value_col(m)
201
-        if (any(c(mand, amp, val) == FALSE)) {
202
-            stop(.non_ISM_error())
203
-        }
204
-    })
205 197
     stopifnot(is.character(fragmentEstimate) & length(fragmentEstimate) == 1)
206 198
     stopifnot(is.character(seqCount) & length(seqCount) == 1)
207 199
     stopifnot(is.character(barcodeCount) & length(barcodeCount) == 1)
... ...
@@ -221,8 +213,11 @@ comparison_matrix <- function(x,
221 213
         matrix1 %>%
222 214
             dplyr::full_join(matrix2, by = commoncols)
223 215
     })
224
-    if (any(is.na(result)) & getOption("ISAnalytics.verbose") == TRUE) {
225
-        message(.nas_introduced_msg())
216
+    na_introduced <- purrr::map_lgl(param_names, function(p) {
217
+        any(is.na(result[[p]]))
218
+    })
219
+    if (any(na_introduced) & getOption("ISAnalytics.verbose") == TRUE) {
220
+        rlang::inform(.nas_introduced_msg())
226 221
     }
227 222
     result
228 223
 }
... ...
@@ -248,6 +243,7 @@ comparison_matrix <- function(x,
248 243
 #' in input
249 244
 #' @param ShsCount Name of the shs count values column
250 245
 #' in input
246
+#' @param key Key columns to perform the joining operation
251 247
 #'
252 248
 #' @importFrom purrr is_empty map set_names
253 249
 #' @importFrom dplyr rename
... ...
@@ -278,21 +274,24 @@ comparison_matrix <- function(x,
278 274
 #' )
279 275
 #' separated_matrix <- separate_quant_matrices(matrices)
280 276
 #' options(op)
281
-separate_quant_matrices <- function(x, fragmentEstimate = "fragmentEstimate",
277
+separate_quant_matrices <- function(
278
+    x,
279
+    fragmentEstimate = "fragmentEstimate",
282 280
     seqCount = "seqCount",
283 281
     barcodeCount = "barcodeCount",
284 282
     cellCount = "cellCount",
285
-    ShsCount = "ShsCount") {
283
+    ShsCount = "ShsCount",
284
+    key = c(mandatory_IS_vars(),
285
+            annotation_IS_vars(),
286
+            "CompleteAmplificationID")
287
+    ) {
286 288
     stopifnot(is.data.frame(x))
287
-    if (.check_mandatory_vars(x) == FALSE) {
288
-        stop(.non_ISM_error())
289
+    if (!all(key %in% colnames(x))) {
290
+        rlang::abort(.missing_user_cols_error(key[!key %in% colnames(x)]))
289 291
     }
290
-    if (.check_complAmpID(x) == FALSE) {
291
-        stop(.missing_complAmpID_error())
292
-    }
293
-    num_cols <- .find_exp_cols(x)
292
+    num_cols <- .find_exp_cols(x, key)
294 293
     if (purrr::is_empty(num_cols)) {
295
-        stop(.missing_num_cols_error())
294
+        rlang::abort(.missing_num_cols_error())
296 295
     }
297 296
     stopifnot(is.character(fragmentEstimate) & length(fragmentEstimate) == 1)
298 297
     stopifnot(is.character(seqCount) & length(seqCount) == 1)
... ...
@@ -307,24 +306,16 @@ separate_quant_matrices <- function(x, fragmentEstimate = "fragmentEstimate",
307 306
     )
308 307
     to_copy <- if (any(!num_cols %in% param_col)) {
309 308
         if (all(!num_cols %in% param_col)) {
310
-            stop(.non_quant_cols_error())
309
+            rlang::abort(.non_quant_cols_error())
311 310
         }
312 311
         num_cols[!num_cols %in% param_col]
313 312
     }
314 313
     num_cols <- param_col[param_col %in% num_cols]
315
-    annot <- if (.is_annotated(x)) {
316
-        annotation_IS_vars()
317
-    } else {
318
-        character(0)
319
-    }
320 314
     if (!purrr::is_empty(to_copy) & getOption("ISAnalytics.verbose") == TRUE) {
321
-        message(.non_quant_cols_msg(to_copy))
315
+        rlang::inform(.non_quant_cols_msg(to_copy))
322 316
     }
323 317
     separated <- purrr::map(num_cols, function(quant) {
324
-        x[c(
325
-            mandatory_IS_vars(), annot, "CompleteAmplificationID",
326
-            to_copy, quant
327
-        )] %>% dplyr::rename(Value = quant)
318
+        x[c(key, to_copy, quant)] %>% dplyr::rename(Value = quant)
328 319
     }) %>% purrr::set_names(names(num_cols))
329 320
     separated
330 321
 }
... ...
@@ -91,7 +91,10 @@ remove_collisions <- function(x,
91 91
         ## Check if it contains the "Value" column. If not find all numeric
92 92
         ## columns that are not default columns
93 93
         quantifications_cols <- if (.check_value_col(x) == FALSE) {
94
-            found <- .find_exp_cols(x)
94
+            found <- .find_exp_cols(x,
95
+                                    c(mandatory_IS_vars(),
96
+                                      annotation_IS_vars(),
97
+                                      "CompleteAmplificationID"))
95 98
             if (purrr::is_empty(found)) {
96 99
                 stop(.missing_num_cols_error())
97 100
             }
... ...
@@ -98,6 +98,8 @@ association_file_columns <- function() {
98 98
     )
99 99
 }
100 100
 
101
+# Internal: associates column types with column names for a more precise
102
+# import
101 103
 .af_col_types <- function(mode) {
102 104
     if (mode == "fread") {
103 105
         types <- list(
... ...
@@ -137,9 +139,6 @@ association_file_columns <- function() {
137 139
         return(types)
138 140
     }
139 141
     if (mode == "readr") {
140
-        # date_format <- unlist(strsplit(date_format))
141
-        # date_format <- paste0("%", date_format)
142
-        # date_format <- paste0(date_format, collapse = "%.")
143 142
         types <- list(
144 143
             ProjectID = "c", FUSIONID = "c", PoolID = "c", TagSequence = "c",
145 144
             SubjectID = "c", VectorType = "c", VectorID = "c",
... ...
@@ -180,6 +179,23 @@ association_file_columns <- function() {
180 179
     }
181 180
 }
182 181
 
182
+# Internal: used for file system alignment in import_association_file,
183
+# gives the names of the columns that respectively contain:
184
+# - the absolute path on disk of the project
185
+# - the path to the quantification folder
186
+# - the path to the iss folder
187
+.path_cols_names <- function() {
188
+    list(project = "Path", quant = "Path_quant", iss = "Path_iss")
189
+}
190
+
191
+.matrix_annotated_suffixes <- function() {
192
+    c(".no0.annotated")
193
+}
194
+
195
+.matrix_not_annotated_suffixes <- function() {
196
+    c("")
197
+}
198
+
183 199
 #' Names of the columns of the association file to consider for
184 200
 #' Vispa2 launch.
185 201
 #'
... ...
@@ -221,3 +237,8 @@ reduced_AF_columns <- function() {
221 237
 .supported_fread_compression_formats <- function() {
222 238
     c("gz", "bz2")
223 239
 }
240
+
241
+
242
+flag_logics <- function() {
243
+    c("AND", "OR", "XOR", "NAND", "NOR", "XNOR")
244
+}
... ...
@@ -196,10 +196,13 @@ import_single_Vispa2Matrix <- function(path,
196 196
 #' for which the value of the column "ProjectID" is one of the specified
197 197
 #' values. If multiple columns are present in the list all filtering
198 198
 #' conditions are applied as a logical AND.
199
+#' @param import_iss Import Vispa2 stats and merge them with the
200
+#' association file?
199 201
 #' @param export_widget_path A path on disk to save produced widgets or NULL
200 202
 #' if the user doesn't wish to save the html file
203
+#' @param ... Additional arguments to pass to \code{\link{import_Vispa2_stats}}
201 204
 #' @family Import functions
202
-#' @return A tibble with the contents of the association file plus a column
205
+#' @return A tibble with the contents of the association file plus columns
203 206
 #' containing the path in the file system for every project and pool if found.
204 207
 #' @details The import series of functions is designed to work in combination
205 208
 #' with the use of Vispa2 pipeline, please refer to this article for more
... ...
@@ -229,7 +232,11 @@ import_single_Vispa2Matrix <- function(path,
229 232
 #' @export
230 233
 #'
231 234
 #' @importFrom purrr map_lgl set_names is_empty
232
-#' @importFrom rlang inform
235
+#' @importFrom rlang inform abort dots_list exec
236
+#' @importFrom htmltools tagList browsable
237
+#' @importFrom tibble add_column
238
+#' @import dplyr
239
+#' @importFrom stringr str_pad
233 240
 #' @importFrom magrittr `%>%`
234 241
 #' @seealso \code{\link{date_formats}}
235 242
 #' @examples
... ...
@@ -245,7 +252,9 @@ import_association_file <- function(path,
245 252
     root = NULL, tp_padding = 4, dates_format = "ymd",
246 253
     separator = "\t",
247 254
     filter_for = NULL,
248
-    export_widget_path = NULL) {
255
+    import_iss = FALSE,
256
+    export_widget_path = NULL,
257
+    ...) {
249 258
     # Check parameters
250 259
     stopifnot(is.character(path) & length(path) == 1)
251 260
     stopifnot((is.character(root) & length(root) == 1) || (is.null(root)))
... ...
@@ -257,6 +266,14 @@ import_association_file <- function(path,
257 266
         is.integer(tp_padding)) & length(tp_padding) == 1)
258 267
     stopifnot(length(dates_format) == 1 & dates_format %in% date_formats())
259 268
     stopifnot(is.character(separator) && length(separator) == 1)
269
+    stopifnot(is.logical(import_iss) && length(import_iss) == 1)
270
+    if (import_iss & is.null(root)) {
271
+        rlang::abort(paste(
272
+            "Can't import Vispa2 stats files without",
273
+            "file system alignment. Provide the appropriate",
274
+            "root."
275
+        ))
276
+    }
260 277
     # Check filter
261 278
     stopifnot(is.null(filter_for) ||
262 279
         (is.list(filter_for) && !is.null(names(filter_for))))
... ...
@@ -292,7 +309,57 @@ import_association_file <- function(path,
292 309
         col_probs,
293 310
         missing_dates
294 311
     )))
295
-    if (something_to_report) {
312
+    ## Fix timepoints
313
+    if (!"TimepointMonths" %in% colnames(as_file)) {
314
+        as_file <- as_file %>%
315
+            tibble::add_column(TimepointMonths = NA_real_)
316
+    }
317
+    if (!"TimepointYears" %in% colnames(as_file)) {
318
+        as_file <- as_file %>%
319
+            tibble::add_column(TimepointYears = NA_real_)
320
+    }
321
+    as_file <- as_file %>%
322
+        dplyr::mutate(
323
+            TimepointMonths = dplyr::if_else(
324
+                condition = is.na(.data$TimepointMonths),
325
+                false = .data$TimepointMonths,
326
+                true = dplyr::if_else(
327
+                    condition = as.numeric(.data$TimePoint) == 0,
328
+                    true = 0,
329
+                    false = dplyr::if_else(
330
+                        condition = as.numeric(.data$TimePoint) > 0 &
331
+                            as.numeric(.data$TimePoint) < 30,
332
+                        true = ceiling(as.numeric(.data$TimePoint) / 30),
333
+                        false = round(as.numeric(.data$TimePoint) / 30)
334
+                    )
335
+                )
336
+            ),
337
+            TimepointYears = dplyr::if_else(
338
+                condition = is.na(.data$TimepointYears),
339
+                false = .data$TimepointYears,
340
+                true = dplyr::if_else(
341
+                    condition = as.numeric(.data$TimePoint) == 0,
342
+                    true = 0,
343
+                    false = dplyr::if_else(
344
+                        condition = as.numeric(.data$TimePoint) > 0 &
345
+                            as.numeric(.data$TimePoint) < 360,
346
+                        true = ceiling(as.numeric(.data$TimePoint) / 360),
347
+                        false = round(as.numeric(.data$TimePoint) / 360)
348
+                    )
349
+                )
350
+            )
351
+        ) %>%
352
+        dplyr::mutate(
353
+            TimepointMonths = stringr::str_pad(
354
+                as.character(.data$TimepointMonths),
355
+                pad = "0", side = "left", width = 2
356
+            ),
357
+            TimepointYears = stringr::str_pad(
358
+                as.character(.data$TimepointYears),
359
+                pad = "0", side = "left", width = 2
360
+            ),
361
+        )
362
+    widget <- if (something_to_report) {
296 363
         summary_report <- .summary_af_import_msg(
297 364
             pars_prob = parsing_problems, dates_prob = date_problems,
298 365
             cols_prob = col_probs, crit_na = missing_dates,
... ...
@@ -305,49 +372,261 @@ import_association_file <- function(path,
305 372
             )
306 373
         )
307 374
         if (getOption("ISAnalytics.widgets") == TRUE) {
308
-            withCallingHandlers(
309
-                expr = {
310
-                    withRestarts(
311
-                        {
312
-                            widg <- .checker_widget(
313
-                                parsing_problems,
314
-                                date_problems,
315
-                                checks,
316
-                                col_probs,
317
-                                missing_dates
318
-                            )
319
-                            print(widg)
320
-                        },
321
-                        print_err = function() {
322
-                            rlang::inform(.widgets_error())
323
-                            if (getOption("ISAnalytics.verbose") == TRUE) {
324
-                                rlang::inform(summary_report,
325
-                                    class = "summary_report"
326
-                                )
327
-                            }
328
-                        }
329
-                    )
330
-                },
331
-                error = function(cnd) {
332
-                    rlang::inform(conditionMessage(cnd))
333
-                    invokeRestart("print_err")
334
-                }
375
+            .produce_widget(".checker_widget",
376
+                parsing_probs = parsing_problems,
377
+                date_probs = date_problems,
378
+                checker_df = checks,
379
+                col_probs = col_probs,
380
+                critical_nas = missing_dates
335 381
             )
336
-            if (!is.null(export_widget_path)) {
337
-                .export_widget_file(
338
-                    widg,
339
-                    export_widget_path, "af_import_report.html"
340
-                )
341
-            }
342 382
         } else if (getOption("ISAnalytics.verbose") == TRUE) {
343 383
             rlang::inform(summary_report,
344 384
                 class = "summary_report"
345 385
             )
386
+            NULL
387
+        } else {
388
+            NULL
346 389
         }
347 390
     }
391
+    if (import_iss) {
392
+        dots <- rlang::dots_list(.named = TRUE)
393
+        dots <- dots[!names(dots) %in% c(
394
+            "association_file",
395
+            "export_widget_path",
396
+            "join_with_af"
397
+        )]
398
+        stats <- withCallingHandlers(
399
+            {
400
+                withRestarts(
401
+                    {
402
+                        rlang::exec(import_Vispa2_stats,
403
+                            association_file = as_file,
404
+                            export_widget_path = "INTERNAL",
405
+                            join_with_af = TRUE,
406
+                            !!!dots
407
+                        )
408
+                    },
409
+                    fail_stats = function() {
410
+                        rlang::inform("Issues in importing stats files, skipping")
411
+                    }
412
+                )
413
+            },
414
+            error = function(err) {
415
+                rlang::inform(err$message)
416
+                invokeRestart("fail_stats")
417
+            }
418
+        )
419
+        if (!is.null(stats)) {
420
+            as_file <- stats$stats
421
+            if (getOption("ISAnalytics.widgets") == TRUE) {
422
+                widget <- htmltools::browsable(
423
+                    htmltools::tagList(
424
+                        widget, stats$report_w
425
+                    )
426
+                )
427
+            }
428
+        }
429
+    }
430
+    if (!is.null(widget)) {
431
+        .print_widget(widget, else_verbose = {
432
+            rlang::inform(summary_report,
433
+                class = "summary_report"
434
+            )
435
+        })
436
+    } else if (getOption("ISAnalytics.verbose") == TRUE) {
437
+        rlang::inform(summary_report,
438
+            class = "summary_report"
439
+        )
440
+    }
441
+    if (!is.null(export_widget_path)) {
442
+        .export_widget_file(widget,
443
+            path = export_widget_path,
444
+            def_file_name = "af_import_report.html"
445
+        )
446
+    }
348 447
     as_file
349 448
 }
350 449
 
450
+#' Import Vispa2 stats given the aligned association file.
451
+#'
452
+#' \lifecycle{experimental}
453
+#' Imports all the Vispa2 stats files for each pool provided the association
454
+#' file has been aligned with the file system
455
+#' (see \code{\link{import_association_file}}).
456
+#'
457
+#' @param association_file The file system aligned association file
458
+#' (contains columns with absolute paths to the 'iss' folder)
459
+#' @param file_prefixes A character vector with known file prefixes
460
+#'  to match on file names. NOTE: the elements represent regular expressions.
461
+#'  For defaults see \link{default_iss_file_prefixes}.
462
+#' @param join_with_af Logical, if TRUE the imported stats files will be
463
+#' merged with the association file, if false a single data frame holding
464
+#' only the stats will be returned.
465
+#' @param pool_col A single string. What is the name of the pool column
466
+#' used in the Vispa2 run? This will be used as a key to perform a join
467
+#' operation with the stats files `POOL` column.
468
+#' @param export_widget_path Either NULL or the path on disk where the
469
+#' widget report should be saved.
470
+#'
471
+#' @family Import functions
472
+#' @importFrom rlang inform abort
473
+#' @importFrom stats setNames
474
+#' @import dplyr
475
+#' @importFrom htmltools browsable tagList
476
+#'
477
+#' @return A data frame
478
+#' @export
479
+#'
480
+#' @examples
481
+#' op <- options("ISAnalytics.widgets" = FALSE)
482
+#' path <- system.file("extdata", "ex_association_file.tsv",
483
+#'     package = "ISAnalytics"
484
+#' )
485
+#' root_pth <- system.file("extdata", "fs.zip", package = "ISAnalytics")
486
+#' root <- unzip_file_system(root_pth, "fs")
487
+#' association_file <- import_association_file(path, root, dates_format = "dmy")
488
+#' af_with_stats <- import_Vispa2_stats(association_file)
489
+#' options(op)
490
+import_Vispa2_stats <- function(association_file,
491
+    file_prefixes = default_iss_file_prefixes(),
492
+    join_with_af = TRUE,
493
+    pool_col = "concatenatePoolIDSeqRun",
494
+    export_widget_path = NULL) {
495
+    ## Check param
496
+    if (!is.data.frame(association_file)) {
497
+        rlang::abort(.af_not_imported_err())
498
+    }
499
+    path_cols <- .path_cols_names()
500
+    if (!path_cols$iss %in% colnames(association_file)) {
501
+        rlang::abort(.af_not_aligned_err())
502
+    }
503
+    min_cols <- c("ProjectID", "concatenatePoolIDSeqRun", path_cols$iss)
504
+    if (!all(min_cols %in% colnames(association_file))) {
505
+        rlang::abort(
506
+            .missing_needed_cols(
507
+                min_cols[!min_cols %in% colnames(association_file)]
508
+            )
509
+        )
510
+    }
511
+    stopifnot(is.character(file_prefixes))
512
+    stopifnot(is.logical(join_with_af))
513
+    join_with_af <- join_with_af[1]
514
+    if (join_with_af) {
515
+        stopifnot(is.character(pool_col) & length(pool_col) == 1)
516
+        stopifnot(pool_col %in% colnames(association_file))
517
+    }
518
+    ## export path has a special placeholder "INTERNAL" for calling
519
+    ## the function inside import_association_file
520
+    stopifnot(is.null(export_widget_path) || is.character(export_widget_path))
521
+    ## Import
522
+    stats <- .import_stats_iss(
523
+        association_file = association_file,
524
+        prefixes = file_prefixes
525
+    )
526
+    report <- stats$report
527
+    stats <- stats$stats
528
+    ## Produce widget report if requested
529
+    widget_stats_import <- if (getOption("ISAnalytics.widgets") == TRUE) {
530
+        .produce_widget(".iss_import_widget", report = report)
531
+    } else {
532
+        NULL
533
+    }
534
+    ## - IF NO STATS IMPORTED (STATS ARE NULL)
535
+    if (is.null(stats)) {
536
+        if (getOption("ISAnalytics.verbose") == TRUE) {
537
+            rlang::inform(paste("No stats files imported"))
538
+        }
539
+        if (!is.null(widget_stats_import)) {
540
+            if (is.null(export_widget_path)) {
541
+                .print_widget(widget_stats_import)
542
+            } else if (export_widget_path != "INTERNAL") {
543
+                .print_widget(widget_stats_import)
544
+            }
545
+        }
546
+        if (!is.null(export_widget_path) && export_widget_path != "INTERNAL") {
547
+            .export_widget_file(
548
+                widget_stats_import,
549
+                export_widget_path,
550
+                "vispa2_stats_import_report.html"
551
+            )
552
+        }
553
+        if (!is.null(export_widget_path) && export_widget_path == "INTERNAL") {
554
+            return(list(stats = NULL, report_w = widget_stats_import))
555
+        } else {
556
+            return(NULL)
557
+        }
558
+    }
559
+    ## - IF STATS NOT NULL
560
+    ## Merge if requested
561
+    if (join_with_af) {
562
+        association_file <- association_file %>%
563
+            dplyr::left_join(stats, by = c(
564
+                stats::setNames("POOL", pool_col),
565
+                "TagSequence" = "TAG"
566
+            ))
567
+        ## Detect potential problems
568
+        missing_stats <- association_file %>%
569
+            dplyr::filter(is.na(.data$RUN_NAME)) %>%
570
+            dplyr::select(
571
+                .data$ProjectID,
572
+                .data$concatenatePoolIDSeqRun,
573
+                dplyr::all_of(pool_col),
574
+                .data$CompleteAmplificationID
575
+            ) %>%
576
+            dplyr::distinct()
577
+        widget_stats_import <- if (getOption("ISAnalytics.widgets") == TRUE) {
578
+            miss_widget <- .produce_widget(".missing_iss_widget",
579
+                missing_iss = missing_stats
580
+            )
581
+            htmltools::browsable(
582
+                htmltools::tagList(widget_stats_import, miss_widget)
583
+            )
584
+        } else {
585
+            NULL
586
+        }
587
+        if (!is.null(widget_stats_import)) {
588
+            if (is.null(export_widget_path)) {
589
+                .print_widget(widget_stats_import)
590
+            } else if (export_widget_path != "INTERNAL") {
591
+                .print_widget(widget_stats_import)
592
+            }
593
+        }
594
+        if (!is.null(export_widget_path) && export_widget_path != "INTERNAL") {
595
+            .export_widget_file(
596
+                widget_stats_import,
597
+                export_widget_path,
598
+                "vispa2_stats_import_report.html"
599
+            )
600
+        }
601
+        if (!is.null(export_widget_path) && export_widget_path == "INTERNAL") {
602
+            return(list(
603
+                stats = association_file,
604
+                report_w = widget_stats_import
605
+            ))
606
+        } else {
607
+            return(association_file)
608
+        }
609
+    }
610
+    if (!is.null(widget_stats_import)) {
611
+        if (is.null(export_widget_path)) {
612
+            .print_widget(widget_stats_import)
613
+        } else if (export_widget_path != "INTERNAL") {
614
+            .print_widget(widget_stats_import)
615
+        }
616
+    }
617
+    if (!is.null(export_widget_path) && export_widget_path != "INTERNAL") {
618
+        .export_widget_file(
619
+            widget_stats_import,
620
+            export_widget_path,
621
+            "vispa2_stats_import_report.html"
622
+        )
623
+    }
624
+    if (!is.null(export_widget_path) && export_widget_path == "INTERNAL") {
625
+        return(list(stats = stats, report_w = widget_stats_import))
626
+    } else {
627
+        return(stats)
628
+    }
629
+}
351 630
 
352 631
 #' Import integration matrices based on the association file.
353 632
 #'
... ...
@@ -817,3 +1096,17 @@ matching_options <- function() {
817 1096
 date_formats <- function() {
818 1097
     c("ymd", "ydm", "mdy", "myd", "dmy", "dym", "yq")
819 1098
 }
1099
+
1100
+
1101
+#' Default regex prefixes for Vispa2 stats files.
1102
+#'
1103
+#' Note that each element is a regular expression.
1104
+#'
1105
+#' @return A character vector of regexes
1106
+#' @export
1107
+#'
1108
+#' @examples
1109
+#' default_iss_file_prefixes()
1110
+default_iss_file_prefixes <- function() {
1111
+    c("stats\\.sequence.", "stats\\.matrix.")
1112
+}
... ...
@@ -94,13 +94,9 @@
94 94
 #' @keywords internal
95 95
 #
96 96
 # @return A character vector of column names
97
-.find_exp_cols <- function(x) {
97
+.find_exp_cols <- function(x, to_exclude) {
98 98
     stopifnot(is.data.frame(x))
99
-    default_cols <- c(
100
-        mandatory_IS_vars(), annotation_IS_vars(),
101
-        "CompleteAmplificationID"
102
-    )
103
-    remaining <- colnames(x)[!colnames(x) %in% default_cols]
99
+    remaining <- colnames(x)[!colnames(x) %in% to_exclude]
104 100
     remaining_numeric <- purrr::map_lgl(remaining, function(y) {
105 101
         exp <- rlang::expr(`$`(x, !!y))
106 102
         exp <- rlang::eval_tidy(exp)
... ...
@@ -128,37 +124,6 @@
128 124
 
129 125
 #---- USED IN : import_single_Vispa2Matrix ----
130 126
 
131
-# Internal function to convert a messy matrix to a tidy data frame
132
-#
133
-# @description Uses the suite of functions provided by the
134
-# tidyverse to produce a more dense and ordered structure.
135
-# This function is not exported and should be called in other importing
136
-# functions.
137
-#
138
-# @param df Messy tibble to convert to tidy
139
-# @keywords internal
140
-#
141
-# @return a tidy tibble
142
-#' @importFrom rlang .data
143
-#' @importFrom tidyr pivot_longer
144
-#' @importFrom dplyr arrange all_of filter
145
-#' @importFrom forcats fct_inseq as_factor
146
-.messy_to_tidy <- function(df) {
147
-    exp_cols <- which(!(colnames(df) %in% c(
148
-        mandatory_IS_vars(),
149
-        annotation_IS_vars()
150
-    )))
151
-    isadf_tidy <- df %>%
152
-        tidyr::pivot_longer(
153
-            cols = dplyr::all_of(exp_cols),
154
-            names_to = "CompleteAmplificationID",
155
-            values_to = "Value",
156
-            values_drop_na = TRUE
157
-        ) %>%
158
-        dplyr::filter(.data$Value > 0)
159
-    isadf_tidy
160
-}
161
-
162 127
 # Internal function to auto-detect the type of IS based on the headers.
163 128
 #
164 129
 # @param df the data frame to inspect
... ...
@@ -353,9 +318,9 @@
353 318
         before <- as_file %>% dplyr::select(dplyr::all_of(date_cols))
354 319
         as_file <- as_file %>%
355 320
             dplyr::mutate(dplyr::across(date_cols,
356
-                .fns = ~ lubridate::parse_date_time(.x,
321
+                .fns = ~ lubridate::as_date(lubridate::parse_date_time(.x,
357 322
                     orders = date_format
358
-                )
323
+                ))
359 324
             ))
360 325
         date_failures <- purrr::map_dfr(date_cols, function(col) {
361 326
             before_col <- purrr::pluck(before, col)
... ...
@@ -388,10 +353,10 @@
388 353
 # @param df The imported association file (data.frame or tibble)
389 354
 # @param root_folder Path to the root folder
390 355
 # @keywords internal
391
-#' @importFrom dplyr select distinct mutate bind_rows
392
-#' @importFrom fs dir_ls
393
-#' @importFrom purrr pmap is_empty reduce map_dbl
394
-#' @importFrom stringr str_replace_all str_extract_all
356
+#' @importFrom dplyr select distinct mutate
357
+#' @importFrom fs dir_ls path dir_exists
358
+#' @importFrom purrr pmap_dfr is_empty
359
+#' @importFrom stringr str_replace_all
395 360
 #' @importFrom tibble tibble
396 361
 #
397 362
 # @return A data frame containing, for each ProjectID and
... ...
@@ -408,89 +373,70 @@
408 373
             .data$PathToFolderProjectID
409 374
         ) %>%
410 375
         dplyr::distinct()
411
-    root_regexp <- stringr::str_replace_all(root_folder, "\\/", "\\\\/")
412
-    if (root_folder == "") {
413
-        results_df <- purrr::pmap(
414
-            temp_df,
415
-            function(...) {
416
-                cur <- tibble::tibble(...)
417
-                pattern <- paste0(
418
-                    "^", root_regexp,
419
-                    stringr::str_replace_all(
420
-                        cur$PathToFolderProjectID, "\\/", "\\\\/"
421
-                    ),
422
-                    "\\/quantification\\/",
423
-                    cur$concatenatePoolIDSeqRun, "$"
424
-                )
425
-                pattern <- stringr::str_replace_all(pattern,
426
-                    pattern = "\\\\\\/\\\\\\/",
427
-                    replacement = "\\\\/"
428
-                )
429
-                dirExists <- file.exists(cur$PathToFolderProjectID)
430
-                value <- if (!dirExists) {
431
-                    NA_character_
432
-                } else {
433
-                    tree_struct <- fs::dir_ls(
434
-                        path = cur$PathToFolderProjectID, recurse = TRUE,
435
-                        type = "directory", fail = FALSE
436
-                    )
437
-                    found <- stringr::str_extract_all(tree_struct, pattern)
438
-                    found <- unlist(found)
439
-                    if (purrr::is_empty(found)) {
440
-                        NA_character_
441
-                    } else {
442
-                        found
443
-                    }
444
-                }
445
-                tibble::tibble(
446
-                    ProjectID = cur$ProjectID,
447
-                    concatenatePoolIDSeqRun =
448
-                        cur$concatenatePoolIDSeqRun,
449
-                    Path = value
450
-                )
376
+    path_cols <- .path_cols_names()
377
+    results_df <- purrr::pmap_dfr(
378
+        temp_df,
379
+        function(...) {
380
+            cur <- tibble::tibble(...)
381
+            if (is.na(cur$PathToFolderProjectID)) {
382
+                return(cur %>%
383
+                    dplyr::mutate(
384
+                        !!path_cols$project := NA_character_,
385
+                        !!path_cols$quant := NA_character_,
386
+                        !!path_cols$iss := NA_character_
387
+                    ))
451 388
             }
452
-        )
453
-    } else {
454
-        tree_struct <- fs::dir_ls(
455
-            path = root_folder, recurse = TRUE,
456
-            type = "directory", fail = FALSE
457
-        )
458
-        results_df <- purrr::pmap(
459
-            temp_df,
460
-            function(...) {
461
-                cur <- tibble::tibble(...)
462
-                pattern <- paste0(
463
-                    "^", root_regexp,
464
-                    stringr::str_replace_all(
465
-                        cur$PathToFolderProjectID, "\\/", "\\\\/"
466
-                    ),
467
-                    "\\/quantification\\/",
468
-                    cur$concatenatePoolIDSeqRun, "$"
469
-                )
470
-                pattern <- stringr::str_replace_all(pattern,
471
-                    pattern = "\\\\\\/\\\\\\/",
472
-                    replacement = "\\\\/"
473
-                )
474
-                found <- stringr::str_extract_all(tree_struct, pattern)
475
-                found <- unlist(found)
476
-                value <- if (purrr::is_empty(found)) {
477
-                    NA_character_
478
-                } else {
479
-                    found
480
-                }
481
-                tibble::tibble(
482
-                    ProjectID = cur$ProjectID,
483
-                    concatenatePoolIDSeqRun =
484
-                        cur$concatenatePoolIDSeqRun,
485
-                    Path = value
486
-                )
389
+            project_folder <- fs::path(
390
+                fs::path(root_folder),
391
+                cur$PathToFolderProjectID
392
+            )
393
+            quant_folder <- fs::path(
394
+                "quantification",
395
+                fs::path(cur$concatenatePoolIDSeqRun)
396
+            )
397
+            iss_folder <- fs::path(
398
+                "iss",
399
+                fs::path(cur$concatenatePoolIDSeqRun)
400
+            )
401
+            dirExists <- fs::dir_exists(project_folder)
402
+            if (!dirExists) {
403
+                return(cur %>%
404
+                    dplyr::mutate(
405
+                        !!path_cols$project := NA_character_,
406
+                        !!path_cols$quant := NA_character_,
407
+                        !!path_cols$iss := NA_character_
408
+                    ))
487 409
             }
488
-        )
489
-    }
490
-    checker_df <- purrr::reduce(results_df, dplyr::bind_rows) %>%
410
+            quant_found <- fs::dir_ls(
411
+                path = project_folder, recurse = TRUE,
412
+                type = "directory", fail = FALSE,
413
+                regexp = quant_folder
414
+            )
415
+            if (length(quant_found) == 0) {
416
+                quant_found <- NA_character_
417
+            }
418
+            iss_found <- fs::dir_ls(
419
+                path = project_folder, recurse = TRUE,
420
+                type = "directory", fail = FALSE,
421
+                regexp = iss_folder
422
+            )
423
+            if (length(iss_found) == 0) {
424
+                iss_found <- NA_character_
425
+            }
426
+            return(
427
+                cur %>%
428
+                    dplyr::mutate(
429
+                        !!path_cols$project := project_folder,
430
+                        !!path_cols$quant := quant_found,
431
+                        !!path_cols$iss := iss_found
432
+                    )
433
+            )
434
+        }
435
+    )
436
+    checker_df <- results_df %>%
491 437
         dplyr::mutate(
492
-            Found = ifelse(!is.na(.data$Path), TRUE, FALSE),
493
-            .before = .data$Path
438
+            Found = ifelse(!is.na(.data[[path_cols$project]]), TRUE, FALSE),
439
+            .before = .data[[path_cols$project]]
494 440
         )
495 441
     checker_df
496 442
 }
... ...
@@ -508,21 +454,257 @@
508 454
 # @keywords internal
509 455
 #
510 456
 # @return An updated association file with absolute paths
457
+#' @importFrom dplyr left_join select
511 458
 .update_af_after_alignment <- function(as_file, checks, root) {
512
-    # Finally import modified association file
513
-    checks <- checks %>%
514
-        dplyr::select(
515
-            .data$ProjectID, .data$concatenatePoolIDSeqRun,
516
-            .data$Path
517
-        )
518 459
     as_file <- as_file %>%
519
-        dplyr::left_join(checks, by = c("ProjectID", "concatenatePoolIDSeqRun"))
460
+        dplyr::left_join(checks %>%
461
+            dplyr::select(
462
+                -.data$PathToFolderProjectID,
463
+                -.data$Found
464
+            ),
465
+        by = c("ProjectID", "concatenatePoolIDSeqRun")
466
+        )
520 467
     as_file
521 468
 }
522 469
 
470
+#---- USED IN : import_Vispa2_stats ----
471
+
472
+# Finds automatically the path on disk to each stats file.
473
+#
474
+#' @import dplyr
475
+#' @importFrom purrr pmap_dfr detect_index
476
+#' @importFrom tibble tibble
477
+#' @importFrom fs dir_ls
478
+#' @importFrom rlang .data
479
+# @keywords internal
480
+# @return A tibble with columns: ProjectID, concatenatePoolIDSeqRun,
481
+# Path_iss (or designated dynamic name), stats_files, info
482
+.stats_report <- function(association_file, prefixes) {
483
+    path_col_names <- .path_cols_names()
484
+    temp <- association_file %>%
485
+        dplyr::select(
486
+            .data$ProjectID,
487
+            .data$concatenatePoolIDSeqRun,
488
+            .data[[path_col_names$iss]]
489
+        ) %>%
490
+        dplyr::distinct()
491
+    # If paths are all NA return
492
+    if (all(is.na(temp[[path_col_names$iss]]))) {
493
+        return(temp %>% dplyr::mutate(
494
+            stats_files = NA_character_,
495
+            info = NULL
496
+        ))
497
+    }
498
+    match_pattern <- function(pattern, temp_row) {
499
+        if (is.na(temp_row[[path_col_names$iss]])) {
500
+            return(tibble::tibble(pattern = pattern, file = NA_character_))
501
+        }
502
+        # For each prefix pattern search in the iss folder
503
+        # Note: there can be
504
+        # - a single file matching the prefix (ideal)
505
+        # - multiple files matching
506
+        # - no file matching
507
+        files <- fs::dir_ls(temp_row[[path_col_names$iss]],
508
+            type = "file", fail = FALSE,
509
+            regexp = pattern
510
+        )
511
+        if (length(files) == 0) {
512
+            files <- NA_character_
513
+        }
514
+        tibble::tibble(pattern = pattern, file = files)
515
+    }
516
+    stats_paths <- purrr::pmap_dfr(temp, function(...) {
517
+        temp_row <- tibble::tibble(...)
518
+        matches <- purrr::map_dfr(prefixes, ~ match_pattern(.x, temp_row))
519
+        if (all(is.na(matches$file))) {
520
+            # No stats files found for all prefixes
521
+            return(temp_row %>%
522
+                dplyr::mutate(
523
+                    stats_files = NA_character_,
524
+                    info = list("NOT FOUND")
525
+                ))
526
+        }
527
+        if (length(which(!is.na(matches$file))) == 1) {
528
+            return(temp_row %>%
529
+                dplyr::mutate(
530
+                    stats_files = (matches %>%
531
+                        dplyr::filter(!is.na(.data$file)) %>%
532
+                        dplyr::pull(.data$file)),
533
+                    info = NA
534
+                ))
535
+        }
536
+        # Get the count of files found for each pattern and order them
537
+        # as preference - preferred pattern is the first in the prefixes
538
+        # parameter
539
+        pattern_counts <- matches %>%
540
+            dplyr::mutate(pattern = factor(.data$pattern,
541
+                levels = prefixes
542
+            )) %>%
543
+            dplyr::filter(!is.na(.data$file)) %>%
544
+            dplyr::group_by(.data$pattern) %>%
545
+            dplyr::tally() %>%
546
+            dplyr::arrange(.data$pattern)
547
+        one_index <- purrr::detect_index(pattern_counts$n, ~ .x == 1)
548
+        if (one_index == 0) {
549
+            ## Means there are duplicates for all patterns, gets reported
550
+            ## but no files will be imported
551
+            return(temp_row %>%
552
+                dplyr::mutate(
553
+                    stats_files = NA_character_,
554
+                    info = list(matches %>%
555
+                        dplyr::filter(!is.na(.data$file)))
556
+                ))
557
+        } else {
558
+            ## Pick the file with the pattern that has a count of 1 (no
559
+            ## duplicates)
560
+            chosen_pattern <- pattern_counts$pattern[one_index]
561
+            return(temp_row %>%
562
+                dplyr::mutate(
563
+                    stats_files = matches %>%
564
+                        dplyr::filter(.data$pattern == chosen_pattern) %>%
565
+                        dplyr::pull(.data$file),
566
+                    info = NA
567
+                ))
568
+        }
569
+    })
570
+    stats_paths
571
+}
572
+
573
+# Imports all found Vispa2 stats files.
574
+#
575
+#' @import BiocParallel
576
+#' @importFrom purrr map_chr pmap_dfr reduce is_empty
577
+#' @import dplyr
578
+#' @importFrom tibble tibble
579
+#' @importFrom data.table fread
580
+# @keywords internal
581
+#
582
+# @return A list with the imported stats and a report of imported files. If no
583
+# files were imported returns NULL instead
584
+.import_stats_iss <- function(association_file, prefixes) {
585
+    # Obtain paths
586
+    stats_paths <- .stats_report(association_file, prefixes)
587
+    if (all(is.na(stats_paths$stats_files))) {
588
+        stats_paths <- stats_paths %>% dplyr::mutate(Imported = FALSE)
589
+        evaluate <- function(x) {
590
+            if (is.list(x)) {
591
+                if (is.data.frame(x)[[1]]) {
592
+                    return("DUPLICATES")
593
+                }
594
+                if (is.na(x[[1]]) | is.null(x[[1]])) {
595
+                    return(NA_character_)
596
+                }
597
+                return(x[[1]])
598
+            }
599
+            if (is.na(x)) {
600
+                return(NA_character_)
601
+            }
602
+            return(x)
603
+        }
604
+        condition <- purrr::map_chr(stats_paths$info, evaluate)
605
+        stats_paths <- stats_paths %>% dplyr::mutate(reason = condition)
606
+        return(list(stats = NULL, report = stats_paths))
607
+    }
608
+    # Setup parallel workers and import
609
+    # Register backend according to platform
610
+    if (.Platform$OS.type == "windows") {
611
+        p <- BiocParallel::SnowParam(
612
+            stop.on.error = FALSE,
613
+            tasks = length(stats_paths$stats_files),
614
+            progressbar = getOption("ISAnalytics.verbose"),
615
+            exportglobals = FALSE
616
+        )
617
+    } else {
618
+        p <- BiocParallel::MulticoreParam(
619
+            stop.on.error = FALSE,
620
+            tasks = length(stats_paths$stats_files),
621
+            progressbar = getOption("ISAnalytics.verbose"),
622
+            exportglobals = FALSE
623
+        )
624
+    }
625
+    FUN <- function(x) {
626
+        if (is.na(x)) {
627
+            return(NULL)
628
+        }
629
+        stats <- data.table::fread(
630
+            file = x, sep = "\t",
631
+            na.strings = c("", "NA", "na", "NONE"),
632
+            data.table = TRUE
633
+        )
634
+        ok <- .check_stats(stats)
635
+        if (ok == TRUE) {
636
+            return(stats %>%
637
+                dplyr::mutate(TAG = stringr::str_replace_all(
638
+                    .data$TAG,
639
+                    pattern = "\\.", replacement = ""
640
+                )))
641
+        } else {
642
+            return("MALFORMED")
643
+        }
644
+    }
645
+    stats_dfs <- BiocParallel::bptry(
646
+        BiocParallel::bplapply(stats_paths$stats_files,
647
+            FUN,
648
+            BPPARAM = p
649
+        )
650
+    )
651
+    BiocParallel::bpstop(p)
652
+    correct <- purrr::map_chr(stats_dfs, function(x) {
653
+        if (all(is.data.frame(x))) {
654
+            "TRUE"
655
+        } else if (is.null(x)) {
656
+            "FALSE"
657
+        } else {
658
+            x
659
+        }
660
+    })
661
+    stats_paths <- stats_paths %>%
662
+        dplyr::mutate(Imported = correct)
663
+    stats_paths <- purrr::pmap_dfr(stats_paths, function(...) {
664
+        row <- tibble::tibble(...)
665
+        condition <- if (row$Imported == "MALFORMED") {
666
+            "MALFORMED"
667
+        } else if (row$Imported == "TRUE") {
668
+            NA_character_
669
+        } else {
670
+            if (!is.na(row$info) & is.list(row$info)) {
671
+                if (is.data.frame(row$info[[1]])) {
672
+                    "DUPLICATES"
673
+                } else if (is.null(row$info[[1]])) {
674
+                    NA_character_
675
+                } else {
676
+                    row$info[[1]]
677
+                }
678
+            } else {
679
+                row$info
680
+            }
681
+        }
682
+        row %>%
683
+            dplyr::mutate(reason = condition) %>%
684
+            dplyr::mutate(Imported = dplyr::if_else(
685
+                condition = (.data$Imported == "MALFORMED"),
686
+                true = FALSE,
687
+                false = as.logical(.data$Imported)
688
+            ))
689
+    })
690
+    stats_dfs <- stats_dfs[stats_paths$Imported]
691
+    # Bind rows in single tibble for all files
692
+    if (purrr::is_empty(stats_dfs)) {
693
+        return(list(stats = NULL, report = stats_paths))
694
+    }
695
+    stats_dfs <- purrr::reduce(stats_dfs, function(x, y) {
696
+        x %>%
697
+            dplyr::bind_rows(y) %>%
698
+            dplyr::distinct()
699
+    })
700
+    list(stats = stats_dfs, report = stats_paths)
701
+}
702
+
523 703
 #---- USED IN : import_parallel_Vispa2Matrices_interactive ----
524 704
 
525 705
 # Helper function to be used internally to treat association file.
706
+#' @importFrom rlang inform enexpr eval_tidy parse_expr
707
+#' @importFrom purrr is_empty map2_chr
526 708
 .manage_association_file <- function(association_file,
527 709
     root, padding, format,
528 710
     delimiter, filter) {
... ...
@@ -988,94 +1170,89 @@
988 1170
 # @keywords internal
989 1171
 #' @importFrom tibble tibble
990 1172
 #' @importFrom fs dir_ls as_fs_path
991
-#' @importFrom purrr map reduce map_dbl
992
-#' @importFrom stringr str_detect
993
-#' @importFrom dplyr select distinct bind_rows mutate
1173
+#' @importFrom purrr pmap_dfr cross_df
1174
+#' @importFrom stringr str_replace_all
1175
+#' @import dplyr
994 1176
 #' @importFrom tidyr nest
995 1177
 #
996 1178
 # @return A tibble containing all found files, including duplicates and missing
997 1179
 .lookup_matrices <- function(association_file,
998 1180
     quantification_type,
999 1181
     matrix_type) {
1182
+    path_col_names <- .path_cols_names()
1000 1183
     temp <- association_file %>%
1001 1184
         dplyr::select(
1002 1185
             .data$ProjectID,
1003 1186
             .data$concatenatePoolIDSeqRun,
1004
-            .data$Path
1187
+            .data[[path_col_names$quant]]
1005 1188
         ) %>%
1006 1189
         dplyr::distinct()
1007
-    # Regex for matrix type matching
1008
-    matrix_type_regexp <- paste0("\\.no0\\.annotated")
1009
-    # Map, for each row in temp
1010
-    lups <- purrr::pmap(temp, function(...) {
1011
-        # Obtain a tibble with all the columns in temp but a single row
1012
-        current <- tibble::tibble(...)
1013
-        # Scan file system starting from Path
1014
-        files_found <- fs::dir_ls(path = current$Path)
1015
-        # Map for each quantification type requested
1016
-        matching <- purrr::map(quantification_type, function(x) {
1017
-            # Are there files with the requested quantification type in the
1018
-            # scanned folder?
1019
-            file_regexp <- paste0("\\_", x, "\\_", "matrix")
1020
-            detected <- unlist(stringr::str_detect(files_found,
1021
-                pattern = file_regexp
1022
-            ))
1023
-            found <- if (length(files_found[detected]) > 0) {
1024
-                # If yes subset and in the subset find only the ones that match
1025
-                # the matrix type regex
1026
-                files_found <- files_found[detected]
1027
-                detected <- unlist(stringr::str_detect(files_found,
1028
-                    pattern = matrix_type_regexp
1029
-                ))
1030
-                type_match <- if (matrix_type == "annotated") {
1031
-                    # If the wanted matrix type is annotated
1032
-                    if (length(detected) > 0) {
1033
-                        # If some paths were found to contain the type regex
1034
-                        # then return the subsetted vector of file paths
1035
-                        files_found[detected]
1036
-                    } else {
1037
-                        # If no paths were found to contain the type regex
1038
-                        # return NA
1039
-                        NA_character_
1040
-                    }
1041
-                } else {
1042
-                    # If the wanted matrix is NOT annotated
1043
-                    if (length(files_found[detected]) > 0) {
1044
-                        # If some paths were found to contain the type regex
1045
-                        if (length(files_found[!detected]) > 0) {
1046
-                            # If the files_found MINUS the detected ones is
1047
-                            # not an empty set then return the set
1048
-                            files_found[!detected]
1049
-                        } else {
1050
-                            # If the files_found MINUS the detected ones is an
1051
-                            # empty set then return NA
1052
-                            NA_character_
1053
-                        }
1054
-                    } else {
1055
-                        # If there were no paths that matched the type regex
1056
-                        # simply return the original files_found
1057
-                        files_found
1058
-                    }
1059
-                }
1060
-            } else {
1061
-                NA_character_
1190
+    ## Obtain a df with all possible combination of suffixes for each
1191
+    ## quantification
1192
+    ms <- if (matrix_type == "annotated") {
1193
+        .matrix_annotated_suffixes()
1194
+    } else {
1195
+        .matrix_not_annotated_suffixes()
1196
+    }
1197
+    cross <- purrr::cross_df(list(
1198
+        quant = quantification_type,
1199
+        ms = ms
1200
+    ))
1201
+    cross <- cross %>%
1202
+        dplyr::mutate(suffix = paste0(
1203
+            .data$quant,
1204
+            "_matrix",
1205
+            .data$ms,
1206
+            ".tsv"
1207
+        )) %>%
1208
+        dplyr::mutate(suffix = stringr::str_replace_all(
1209
+            .data$suffix,
1210
+            "\\.",
1211
+            "\\\\."
1212
+        ))
1213
+    ## For each row in temp (aka for each ProjectID and
1214
+    ## concatenatePoolIDSeqRun) scan the quantification folder
1215
+    lups <- purrr::pmap_dfr(temp, function(...) {
1216
+        temp_row <- tibble::tibble(...)
1217
+        found <- purrr::pmap_dfr(cross, function(...) {
1218
+            ## For each quantification scan the folder for the
1219
+            ## corresponding suffixes
1220
+            cross_row <- tibble::tibble(...)
1221
+            matches <- fs::dir_ls(temp_row[[path_col_names$quant]],
1222
+                type = "file", fail = FALSE,
1223
+                regexp = cross_row$suffix
1224
+            )
1225
+            if (length(matches) == 0) {
1226
+                matches <- NA_character_
1062 1227
             }
1063 1228
             tibble::tibble(
1064
-                Quantification_type = x,
1065
-                Files_found = fs::as_fs_path(found)
1229
+                Quantification_type = cross_row$quant,
1230
+                Files_found = matches
1066 1231
             )
1067 1232
         })
1068
-        matching <- purrr::reduce(matching, dplyr::bind_rows) %>%
1069
-            dplyr::mutate(Files_found = fs::as_fs_path(.data$Files_found))
1070
-
1233
+        found <- found %>%
1234
+            dplyr::group_by(.data$Quantification_type) %>%
1235
+            dplyr::distinct() %>%
1236
+            dplyr::group_modify(~ {
1237
+                if (nrow(.x) > 1) {
1238
+                    if (any(is.na(.x$Files_found)) &
1239
+                        !all(is.na(.x$Files_found))) {
1240
+                        .x %>%
1241
+                            dplyr::filter(!is.na(.data$Files_found))
1242
+                    } else {
1243
+                        .x
1244
+                    }
1245
+                } else {
1246
+                    .x
1247
+                }
1248
+            })
1071 1249
         tibble::tibble(
1072
-            ProjectID = current$ProjectID,
1073
-            concatenatePoolIDSeqRun = current$concatenatePoolIDSeqRun,
1074
-            matching
1250
+            ProjectID = temp_row$ProjectID,
1251
+            concatenatePoolIDSeqRun = temp_row$concatenatePoolIDSeqRun,
1252
+            found
1075 1253
         )
1076 1254
     })
1077
-
1078
-    lups <- purrr::reduce(lups, dplyr::bind_rows) %>%
1255
+    lups <- lups %>%
1079 1256
         tidyr::nest(Files = c(.data$Quantification_type, .data$Files_found))
1080 1257
     lups <- .trace_anomalies(lups)
1081 1258
     lups
... ...
@@ -2243,20 +2420,6 @@
2243 2420
 
2244 2421
 #---- USED IN : aggregate_metadata ----
2245 2422
 
2246
-# Minimal association_file variable set.
2247
-#
2248
-# Contains the names of the columns of the association file that are a minimum
2249
-# requirement to perform aggregation.
2250
-# @keywords internal
2251
-#
2252
-# @return A character vector
2253
-.min_var_set <- function() {
2254
-    c(
2255
-        "FusionPrimerPCRDate", "LinearPCRDate", "VCN", "DNAngUsed", "Kapa",
2256
-        "ulForPool", "Path"
2257
-    )
2258
-}
2259
-
2260 2423
 # Minimal stats column set.
2261 2424
 #
2262 2425
 # Contains the name of the columns that are a minimum requirement for
... ...
@@ -2285,197 +2448,68 @@
2285 2448
     }
2286 2449
 }
2287 2450
 
2288
-# Finds automatically the path on disk to each stats file.
2289
-#
2290
-# @param association_file The association file
2451
+# Aggregates the association file based on the function table.
2291 2452
 #' @import dplyr
2292
-#' @importFrom purrr pmap_dfr pmap_df
2293
-#' @importFrom tibble as_tibble tibble add_column
2294
-#' @importFrom stringr str_extract str_extract_all str_detect
2295
-#' @importFrom fs dir_exists dir_ls
2296
-#' @importFrom tidyr unnest unite
2297
-#' @importFrom rlang .data
2298
-# @keywords internal
2299
-# @return A tibble with ProjectID and the absolute path on disk to each file
2300
-# if found
2301
-.stats_report <- function(association_file) {
2302
-    # Obtain unique projectID and path
2303
-    temp <- association_file %>%
2304
-        dplyr::select(.data$ProjectID, .data$Path) %>%
2305
-        dplyr::distinct()
2306
-    # If paths are all NA return
2307
-    if (all(is.na(temp$Path))) {
2308
-        return(NULL)
2309
-    }
2310
-    pattern <- "stats\\.sequence*"
2311
-    # Obtain paths to iss folder (as ../iss/pool)
2312
-    stats_paths <- purrr::pmap_dfr(temp, function(...) {
2313
-        current <- tibble::tibble(...)
2314
-        if (is.na(current$Path)) {
2315
-            l <- list(ProjectID = current$ProjectID, stats_path = NA_character_)
2316
-            tibble::as_tibble(l)
2317
-        } else {
2318
-            path_split <- unlist(fs::path_split(current$Path))
2319
-            path_split[path_split == "quantification"] <- "iss"
2320
-            pj_path <- fs::path_join(path_split)
2321
-            l <- list(ProjectID = current$ProjectID, stats_path = pj_path)
2322
-            tibble::as_tibble(l)
2323
-        }
2324
-    })
2325
-    stats_paths <- stats_paths %>% dplyr::distinct()
2326
-    # Find all stats files in iss folders
2327
-    stats_paths <- purrr::pmap_df(stats_paths, function(...) {
2328
-        cur <- tibble::tibble(...)
2329
-        # Check if folder exists
2330
-        # Set to NA the iss folders not found
2331
-        if (!fs::dir_exists(cur$stats_path)) {
2332
-            cur$stats_path <- NA_character_
2333
-            cur %>% tibble::add_column(stats_files = list(NA_character_))
2334
-        } else {
2335
-            files_in_iss <- unlist(fs::dir_ls(cur$stats_path,
2336
-                regexp = pattern,
2337
-                type = "file"
2338
-            ))
2339
-            cur %>% tibble::add_column(stats_files = list(files_in_iss))
2340
-        }
2341
-    })
2342
-    stats_paths <- stats_paths %>%
2343
-        tidyr::unnest(.data$stats_files)
2344
-}
2345
-
2346
-# Imports all found Vispa2 stats files.
2347
-#
2348
-# @param association_file The association file
2349
-#' @import BiocParallel
2350
-#' @importFrom tibble as_tibble
2351
-#' @importFrom purrr map2_lgl reduce is_empty
2352
-#' @importFrom dplyr mutate bind_rows distinct
2453
+#' @importFrom rlang .data is_function is_formula
2454
+#' @importFrom tibble tibble
2455
+#' @importFrom purrr pmap_df
2353 2456
 # @keywords internal
2354 2457
 #
2355
-# @return A list with the imported stats and a report of imported files. If no
2356
-# files were imported returns NULL instead
2357
-.import_stats_iss <- function(association_file) {
2358
-    # Obtain paths
2359
-    stats_paths <- .stats_report(association_file)
2360
-    if (is.null(stats_paths)) {
2458
+# @return A tibble
2459
+.aggregate_meta <- function(association_file, grouping_keys, function_tbl) {
2460
+    ## Discard columns that are not present in the af
2461
+    function_tbl <- function_tbl %>%
2462
+        dplyr::filter(.data$Column %in% colnames(association_file))
2463
+    ## If no columns are left return
2464
+    if (nrow(function_tbl) == 0) {
2361 2465
         return(NULL)
2362 2466
     }
2363
-    # Setup parallel workers and import
2364
-    # Register backend according to platform
2365
-    if (.Platform$OS.type == "windows") {
2366
-        p <- BiocParallel::SnowParam(stop.on.error = FALSE)
2367
-    } else {
2368
-        p <- BiocParallel::MulticoreParam(
2369
-            stop.on.error = FALSE
2467
+    function_tbl <- function_tbl %>%
2468
+        tidyr::nest(cols = .data$Column)
2469
+    apply_function <- function(Function, Args, Output_colname, cols) {
2470
+        Function <- list(Function)
2471
+        Args <- list(Args)
2472
+        row <- tibble::tibble(Function, Args,
2473
+            Output_colname,
2474
+            cols = list(cols)
2370 2475
         )
2371
-    }
2372
-    FUN <- function(x) {
2373
-        stats <- utils::read.csv(
2374
-            file = x, sep = "\t", stringsAsFactors = FALSE,
2375
-            check.names = FALSE
2376
-        )
2377
-        stats <- tibble::as_tibble(stats)
2378
-        ok <- .check_stats(stats)
2379
-        if (ok == TRUE) {
2380
-            return(stats)
2381
-        } else {
2382
-            return(NULL)
2476
+        if (rlang::is_formula(row$Function[[1]])) {
2477
+            res <- association_file %>%
2478
+                dplyr::group_by(dplyr::across(dplyr::all_of(grouping_keys))) %>%
2479
+                dplyr::summarise(
2480
+                    dplyr::across(
2481
+                        dplyr::all_of(row$cols[[1]]$Column),
2482
+                        .fns = row$Function[[1]],
2483
+                        .names = row$Output_colname
2484
+                    ),
2485
+                    .groups = "drop"
2486
+                )
2487
+            return(res)
2383 2488
         }
2384
-    }
2385
-    suppressMessages(suppressWarnings({
2386
-        stats_dfs <- BiocParallel::bptry(
2387
-            BiocParallel::bplapply(stats_paths$stats_files, FUN, BPPARAM = p)