Browse code

[UPDATE] Upgrade to v 1.1.11

Giulia Pais authored on 11/05/2021 12:50:41
Showing 51 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: ISAnalytics
2 2
 Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies
3
-Version: 1.1.10
3
+Version: 1.1.11
4 4
 Date: 2020-07-03
5 5
 Authors@R: c(
6 6
   person(given = "Andrea",
... ...
@@ -32,7 +32,6 @@ Imports:
32 32
     tidyr,
33 33
     purrr,
34 34
     rlang,
35
-    forcats,
36 35
     tibble,
37 36
     BiocParallel,
38 37
     stringr,
... ...
@@ -48,7 +47,9 @@ Imports:
48 47
     grDevices,
49 48
     data.table,
50 49
     readxl,
51
-    tools
50
+    tools,
51
+    Rcapture,
52
+    plotly
52 53
 Encoding: UTF-8
53 54
 LazyData: false
54 55
 Roxygen: list(markdown = TRUE)
... ...
@@ -63,7 +64,8 @@ Suggests:
63 64
     rmarkdown,
64 65
     roxygen2,
65 66
     vegan,
66
-    withr
67
+    withr,
68
+    extraDistr
67 69
 VignetteBuilder: knitr
68 70
 RdMacros: 
69 71
     lifecycle
... ...
@@ -2,11 +2,15 @@
2 2
 
3 3
 export(CIS_grubbs)
4 4
 export(CIS_volcano_plot)
5
+export(HSC_population_plot)
6
+export(HSC_population_size_estimate)
5 7
 export(aggregate_metadata)
6 8
 export(aggregate_values_by_key)
7 9
 export(annotation_IS_vars)
8 10
 export(as_sparse_matrix)
9 11
 export(association_file_columns)
12
+export(available_outlier_tests)
13
+export(blood_lineages_default)
10 14
 export(clinical_relevant_suspicious_genes)
11 15
 export(comparison_matrix)
12 16
 export(compute_abundance)
... ...
@@ -14,9 +18,12 @@ export(compute_near_integrations)
14 18
 export(cumulative_count_union)
15 19
 export(date_columns_coll)
16 20
 export(date_formats)
21
+export(default_iss_file_prefixes)
22
+export(default_meta_agg)
17 23
 export(default_stats)
18 24
 export(generate_Vispa2_launch_AF)
19 25
 export(generate_blank_association_file)
26
+export(import_Vispa2_stats)
20 27
 export(import_association_file)
21 28
 export(import_parallel_Vispa2Matrices_auto)
22 29
 export(import_parallel_Vispa2Matrices_interactive)
... ...
@@ -24,6 +31,8 @@ export(import_single_Vispa2Matrix)
24 31
 export(known_clinical_oncogenes)
25 32
 export(mandatory_IS_vars)
26 33
 export(matching_options)
34
+export(outlier_filter)
35
+export(outliers_by_pool_fragments)
27 36
 export(quantification_types)
28 37
 export(realign_after_collisions)
29 38
 export(reduced_AF_columns)
... ...
@@ -36,6 +45,7 @@ export(unzip_file_system)
36 45
 import(BiocParallel)
37 46
 import(dplyr)
38 47
 import(ggplot2)
48
+import(htmltools)
39 49
 import(lifecycle)
40 50
 import(upsetjs)
41 51
 importFrom(BiocParallel,MulticoreParam)
... ...
@@ -44,6 +54,8 @@ importFrom(BiocParallel,bplapply)
44 54
 importFrom(BiocParallel,bpok)
45 55
 importFrom(BiocParallel,bpstop)
46 56
 importFrom(BiocParallel,bptry)
57
+importFrom(Rcapture,closedp.0)
58
+importFrom(Rcapture,closedp.bc)
47 59
 importFrom(data.table,fread)
48 60
 importFrom(data.table,melt.data.table)
49 61
 importFrom(data.table,rbindlist)
... ...
@@ -60,13 +72,12 @@ importFrom(dplyr,group_by)
60 72
 importFrom(dplyr,group_split)
61 73
 importFrom(dplyr,inner_join)
62 74
 importFrom(dplyr,intersect)
75
+importFrom(dplyr,left_join)
63 76
 importFrom(dplyr,mutate)
64 77
 importFrom(dplyr,rename)
65 78
 importFrom(dplyr,select)
66 79
 importFrom(dplyr,semi_join)
67 80
 importFrom(dplyr,slice)
68
-importFrom(forcats,as_factor)
69
-importFrom(forcats,fct_inseq)
70 81
 importFrom(fs,as_fs_path)
71 82
 importFrom(fs,dir_create)
72 83
 importFrom(fs,dir_exists)
... ...
@@ -91,13 +102,18 @@ importFrom(htmltools,tagList)
91 102
 importFrom(htmltools,tags)
92 103
 importFrom(lubridate,parse_date_time)
93 104
 importFrom(magrittr,`%>%`)
105
+importFrom(plotly,plot_ly)
94 106
 importFrom(psych,describe)
107
+importFrom(purrr,cross_df)
108
+importFrom(purrr,detect_index)
95 109
 importFrom(purrr,flatten)
110
+importFrom(purrr,flatten_chr)
96 111
 importFrom(purrr,is_empty)
97 112
 importFrom(purrr,is_formula)
98 113
 importFrom(purrr,is_function)
99 114
 importFrom(purrr,map)
100 115
 importFrom(purrr,map2)
116
+importFrom(purrr,map2_chr)
101 117
 importFrom(purrr,map2_dfr)
102 118
 importFrom(purrr,map2_lgl)
103 119
 importFrom(purrr,map_chr)
... ...
@@ -108,6 +124,7 @@ importFrom(purrr,pmap)
108 124
 importFrom(purrr,pmap_chr)
109 125
 importFrom(purrr,pmap_df)
110 126
 importFrom(purrr,pmap_dfr)
127
+importFrom(purrr,pmap_lgl)
111 128
 importFrom(purrr,reduce)
112 129
 importFrom(purrr,set_names)
113 130
 importFrom(purrr,walk)
... ...
@@ -126,25 +143,33 @@ importFrom(rlang,.data)
126 143
 importFrom(rlang,`:=`)
127 144
 importFrom(rlang,abort)
128 145
 importFrom(rlang,arg_match)
146
+importFrom(rlang,as_function)
129 147
 importFrom(rlang,call2)
130 148
 importFrom(rlang,dots_list)
149
+importFrom(rlang,enexpr)
131 150
 importFrom(rlang,env_bind)
132 151
 importFrom(rlang,eval_tidy)
152
+importFrom(rlang,exec)
133 153
 importFrom(rlang,expr)
134 154
 importFrom(rlang,fn_fmls_names)
135 155
 importFrom(rlang,inform)
156
+importFrom(rlang,is_formula)
157
+importFrom(rlang,is_function)
158
+importFrom(rlang,list2)
136 159
 importFrom(rlang,parse_expr)
160
+importFrom(stats,dt)
137 161
 importFrom(stats,median)
138 162
 importFrom(stats,na.omit)
139 163
 importFrom(stats,p.adjust)
140 164
 importFrom(stats,pt)
165
+importFrom(stats,setNames)
166
+importFrom(stats,shapiro.test)
141 167
 importFrom(stringr,str_detect)
142
-importFrom(stringr,str_extract)
143
-importFrom(stringr,str_extract_all)
144 168
 importFrom(stringr,str_pad)
145 169
 importFrom(stringr,str_replace)
146 170
 importFrom(stringr,str_replace_all)
147 171
 importFrom(stringr,str_split)
172
+importFrom(stringr,str_to_upper)
148 173
 importFrom(tibble,add_column)
149 174
 importFrom(tibble,add_row)
150 175
 importFrom(tibble,as_tibble)
... ...
@@ -153,12 +178,12 @@ importFrom(tibble,as_tibble_row)
153 178
 importFrom(tibble,is_tibble)
154 179
 importFrom(tibble,tibble)
155 180
 importFrom(tibble,tibble_row)
181
+importFrom(tibble,tribble)
156 182
 importFrom(tidyr,everything)
157 183
 importFrom(tidyr,nest)
158 184
 importFrom(tidyr,pivot_longer)
159 185
 importFrom(tidyr,pivot_wider)
160 186
 importFrom(tidyr,separate)
161
-importFrom(tidyr,unite)
162 187
 importFrom(tidyr,unnest)
163 188
 importFrom(tools,file_path_sans_ext)
164 189
 importFrom(utils,read.csv)
... ...
@@ -1,5 +1,35 @@
1 1
 \title{ISAnalytics News}
2 2
 
3
+# ISAnalytics 1.1.11 (2021-05-11)
4
+
5
+## NEW FUNCTIONALITY
6
+
7
+* `HSC_population_size_estimate` and `HSC_population_plot` allow estimates
8
+on hematopoietic stem cell population size
9
+* Importing of Vispa2 stats per pool now has a dedicated function,
10
+`import_Vispa2_stats`
11
+* `outlier_filter` and `outliers_by_pool_fragments` offer a mean to filter
12
+poorly represented samples based on custom outliers tests
13
+
14
+## VISIBLE USER CHANGES
15
+
16
+* The argument `import_stats` of `aggregate_metadata` is officially deprecated
17
+in favor of `import_Vispa2_stats`
18
+* `aggregate_metadata` is now a lot more flexible on what operations can be
19
+performed on columns via the new argument `aggregating_functions`
20
+* `import_association_file` allows directly for the import of Vispa2 stats
21
+and converts time points to months and years where not already present
22
+* File system alignment of `import_association_file` now produces 3 separate
23
+columns for paths
24
+* `separate_quant_matrices` and `comparison_matrix` now do not require 
25
+mandatory columns other than the quantifications - this allows for separation
26
+or joining also for aggregated matrices
27
+
28
+## FIXES
29
+
30
+* Fixed a minor issue in `CIS_volcano_plot` that caused duplication of some
31
+labels if highlighted genes were provided in input
32
+
3 33
 # ISAnalytics 1.1.10 (2021-04-08)
4 34
 
5 35
 ## FIXES
... ...
@@ -26,6 +26,7 @@
26 26
 #' * Import functions:
27 27
 #'   * \code{\link{import_single_Vispa2Matrix}}
28 28
 #'   * \code{\link{import_association_file}}
29
+#'   * \code{\link{import_Vispa2_stats}}
29 30
 #'   * \code{\link{import_parallel_Vispa2Matrices_interactive}}
30 31
 #'   * \code{\link{import_parallel_Vispa2Matrices_auto}}
31 32
 #' * Aggregation functions:
... ...
@@ -34,6 +35,9 @@
34 35
 #' * Collision removal functions:
35 36
 #'   * \code{\link{remove_collisions}}
36 37
 #'   * \code{\link{realign_after_collisions}}
38
+#' * Removal of outliers from raw reads
39
+#'   * \code{\link{outlier_filter}}
40
+#'   * \code{\link{outliers_by_pool_fragments}}
37 41
 #' * Recalibration functions:
38 42
 #'   * \code{\link{compute_near_integrations}}
39 43
 #' * Analysis functions:
... ...
@@ -45,8 +49,11 @@
45 49
 #'   * \code{\link{sample_statistics}}
46 50
 #'   * \code{\link{CIS_grubbs}}
47 51
 #'   * \code{\link{cumulative_count_union}}
52
+#' * HSC population size estimate:
53
+#'   * \code{\link{HSC_population_size_estimate}}
48 54
 #' * Plotting functions:
49 55
 #'   * \code{\link{CIS_volcano_plot}}
56
+#'   * \code{\link{HSC_population_plot}}
50 57
 #' * Utility functions:
51 58
 #'   * \code{\link{generate_blank_association_file}}
52 59
 #'   * \code{\link{generate_Vispa2_launch_AF}}
... ...
@@ -4,20 +4,28 @@
4 4
 
5 5
 #' Performs aggregation on metadata contained in the association file.
6 6
 #'
7
-#' \lifecycle{experimental}
8
-#' Groups metadata by grouping_keys and returns a summary of info for each
9
-#' group. For more details on how to use this function:
7
+#' \lifecycle{maturing}
8
+#' Groups metadata by the specified grouping keys and returns a
9
+#' summary of info for each group. For more details on how to use this function:
10 10
 #' \code{vignette("Working with aggregate functions", package = "ISAnalytics")}
11 11
 #'
12 12
 #' @param association_file The imported association file
13
-#' (via `import_association_file`)
13
+#' (via link{import_association_file})
14 14
 #' @param grouping_keys A character vector of column names to form a group
15
-#' @param import_stats Should Vispa2 stats files be imported and included?
15
+#' @param aggregating_functions A data frame containing specifications
16
+#' of the functions to be applied to columns in the association file during
17
+#' aggregation. It defaults to \link{default_meta_agg}. The structure of
18
+#' this data frame should be maintained if the user wishes to change the
19
+#' defaults.
20
+#' @param import_stats `r lifecycle::badge("deprecated")` The import
21
+#' of VISPA2 stats has been moved to its dedicated function,
22
+#' see \link{import_Vispa2_stats}.
16 23
 #' @family Aggregate functions
24
+#' @importFrom rlang abort inform
17 25
 #' @importFrom purrr is_empty
18
-#' @importFrom tibble is_tibble
26
+#' @import lifecycle
19 27
 #'
20
-#' @return A tibble
28
+#' @return An aggregated data frame
21 29
 #' @export
22 30
 #'
23 31
 #' @examples
... ...
@@ -30,7 +38,7 @@
30 38
 #' association_file <- import_association_file(path_AF, root_correct,
31 39
 #'     dates_format = "dmy"
32 40
 #' )
33
-#' aggregated_meta <- aggregate_metadata(association_file, import_stats = FALSE)
41
+#' aggregated_meta <- aggregate_metadata(association_file)
34 42
 #' options(op)
35 43
 aggregate_metadata <- function(association_file,
36 44
     grouping_keys = c(
... ...
@@ -39,88 +47,99 @@ aggregate_metadata <- function(association_file,
39 47
         "Tissue",
40 48
         "TimePoint"
41 49
     ),
42
-    import_stats = TRUE) {
50
+    aggregating_functions = default_meta_agg(),
51
+    import_stats = lifecycle::deprecated()
52
+    ) {
43 53
     # Check parameters
44
-    stopifnot(tibble::is_tibble(association_file))
45
-    min_missing <- setdiff(.min_var_set(), colnames(association_file))
46
-    if (!purrr::is_empty(min_missing)) {
47
-        stop(paste(c(
48
-            "Association file is missing some of the mandatory columns:",
49
-            min_missing
50
-        ), collapse = "\n"))
51
-    }
54
+    stopifnot(is.data.frame(association_file))
52 55
     stopifnot(!is.null(grouping_keys))
53 56
     stopifnot(is.character(grouping_keys))
54
-    keys_missing <- setdiff(grouping_keys, colnames(association_file))
57
+    keys_missing <- grouping_keys[!grouping_keys %in%
58
+                                      colnames(association_file)]
55 59
     if (!purrr::is_empty(keys_missing)) {
56
-        stop(paste(c(
57
-            "Some of the grouping keys you provided were not found:",
58
-            keys_missing
59
-        ), collapse = "\n"))
60
+        rlang::abort(.missing_user_cols_error(keys_missing))
60 61
     }
61
-    stopifnot(is.logical(import_stats) & length(import_stats) == 1)
62
-    # Import if true
63
-    stats <- NULL
64
-    if (import_stats == TRUE) {
65
-        stats <- .import_stats_iss(association_file)
66
-        if (is.null(stats)) {
67
-            if (getOption("ISAnalytics.verbose") == TRUE) {
68
-                message(paste("No Vispa2 stats files found for import,
69
-                    ignoring this step"))
70
-            }
71
-        } else {
72
-            if (getOption("ISAnalytics.widgets") == TRUE) {
73
-                withCallingHandlers(
74
-                    {
75
-                        withRestarts(
76
-                            {
77
-                                report <- stats[[2]]
78
-                                stats <- stats[[1]]
79
-                                widg <- .iss_import_widget(report)
80
-                                print(widg)
81
-                            },
82
-                            print_err = function(cnd) {
83
-                                message(.widgets_error())
84
-                                if (getOption("ISAnalytics.verbose") == TRUE) {
85
-                                    print(paste0(
86
-                                        "--- REPORT IMPORT VISPA2",
87
-                                        "STATS: FILES IMPORTED ---"
88
-                                    ))
89
-                                    print(stats[[2]],
90
-                                        width = Inf,
91
-                                        n = nrow(stats[[2]])
92
-                                    )
93
-                                }
94
-                            }
95
-                        )
96
-                    },
97
-                    error = function(cnd) {
98
-                        message(conditionMessage(cnd))
99
-                        invokeRestart("print_err")
100
-                    }
101
-                )
102
-            } else {
103
-                if (getOption("ISAnalytics.verbose") == TRUE) {
104
-                    print(paste0(
105
-                        "--- REPORT IMPORT VISPA2",
106
-                        "STATS: FILES IMPORTED ---"
107
-                    ))
108
-                    print(stats[[2]],
109
-                        width = Inf,
110
-                        n = nrow(stats[[2]])
111
-                    )
112
-                }
113
-                stats <- stats[[1]]
114
-            }
115
-        }
62
+    if (lifecycle::is_present(import_stats)) {
63
+        lifecycle::deprecate_warn(
64
+            when = "1.1.11",
65
+            what = "aggregate_metadata(import_stats)",
66
+            details = c("Import Vispa2 stats functionality moved",
67
+                        i = paste("Please use `import_Vispa2_stats()`",
68
+                                  "or",
69
+                                  "`import_association_file(import_iss = TRUE)`",
70
+                                  "instead."))
71
+        )
72
+    }
73
+    aggregated <- .aggregate_meta(association_file = association_file,
74
+                                  grouping_keys = grouping_keys,
75
+                                  function_tbl = aggregating_functions)
76
+    if (is.null(aggregated)) {
77
+        rlang::inform(paste("No columns in `aggregating_functions$Column`",
78
+                            "was found in column names of the association",
79
+                            "file. Nothing to return."))
116 80
     }
117
-    aggregated <- .join_and_aggregate(association_file, stats, grouping_keys)
118 81
     aggregated
119 82
 }
120 83
 
84
+
85
+#' Default metadata aggregation function table
86
+#'
87
+#' A default columns-function specifications for \link{aggregate_metadata}
88
+#'
89
+#' @details
90
+#' This data frame contains four columns:
91
+#'
92
+#' * `Column`: holds the name of the column in the association file that
93
+#' should be processed
94
+#' * `Function`: contains either the name of a function (e.g. mean)
95
+#' or a purrr-style lambda (e.g. `~ mean(.x, na.rm = TRUE)`). This function
96
+#' will be applied to the corresponding column specified in `Column`
97
+#' * `Args`: optional additional arguments to pass to the corresponding
98
+#' function. This is relevant ONLY if the corresponding `Function` is a
99
+#' simple function and not a purrr-style lambda.
100
+#' * `Output_colname`: a `glue` specification that will be used to determine
101
+#' a unique output column name. See \link[glue]{glue} for more details.
102
+#'
103
+#' @importFrom tibble tribble
104
+#' @return A data frame
105
+#' @family Aggregate functions
106
+#' @export
107
+#'
108
+#' @examples
109
+#' default_meta_agg()
110
+default_meta_agg <- function() {
111
+    tibble::tribble(
112
+        ~ Column, ~ Function, ~ Args, ~ Output_colname,
113
+        "FusionPrimerPCRDate", ~ suppressWarnings(min(.x, na.rm = TRUE)),
114
+        NA, "{.col}_min",
115
+        "LinearPCRDate", ~ suppressWarnings(min(.x, na.rm = TRUE)),
116
+        NA, "{.col}_min",
117
+        "VCN", ~ suppressWarnings(mean(.x, na.rm = TRUE)),
118
+        NA, "{.col}_avg",
119
+        "ng DNA corrected", ~ suppressWarnings(mean(.x, na.rm = TRUE)),
120
+        NA, "{.col}_avg",
121
+        "Kapa", ~ suppressWarnings(mean(.x, na.rm = TRUE)),
122
+        NA, "{.col}_avg",
123
+        "ng DNA corrected", ~ sum(.x, na.rm = TRUE),
124
+        NA, "{.col}_sum",
125
+        "ulForPool", ~ sum(.x, na.rm = TRUE),
126
+        NA, "{.col}_sum",
127
+        "BARCODE_MUX", ~ sum(.x, na.rm = TRUE),
128
+        NA, "{.col}_sum",
129
+        "TRIMMING_FINAL_LTRLC", ~ sum(.x, na.rm = TRUE),
130
+        NA, "{.col}_sum",
131
+        "LV_MAPPED", ~ sum(.x, na.rm = TRUE),
132
+        NA, "{.col}_sum",
133
+        "BWA_MAPPED_OVERALL", ~ sum(.x, na.rm = TRUE),
134
+        NA, "{.col}_sum",
135
+        "ISS_MAPPED_PP", ~ sum(.x, na.rm = TRUE),
136
+        NA, "{.col}_sum"
137
+    )
138
+}
139
+
121 140
 #' Aggregates matrices values based on specified key.
122 141
 #'
123
-#' \lifecycle{experimental}
142
+#' \lifecycle{maturing}
124 143
 #' Performs aggregation on values contained in the integration matrices based
125 144
 #' on the key and the specified lambda. For more details on how to use this
126 145
 #' function:
... ...
@@ -177,12 +196,16 @@ aggregate_metadata <- function(association_file,
177 196
 #' See details section.
178 197
 #' @param group Other variables to include in the grouping besides `key`,
179 198
 #' can be set to NULL
199
+#' @param join_af_by A character vector representing the joining key
200
+#' between the matrix and the metadata. Useful to re-aggregate already
201
+#' aggregated matrices.
180 202
 #' @family Aggregate functions
181 203
 #'
182
-#' @importFrom purrr walk
204
+#' @importFrom purrr walk set_names map_lgl
183 205
 #' @importFrom rlang expr eval_tidy abort
184 206
 #'
185
-#' @return A list of tibbles or a single tibble according to input
207
+#' @return A list of tibbles or a single tibble aggregated according to
208
+#' the specified arguments
186 209
 #' @export
187 210
 #'
188 211
 #' @examples
... ...
@@ -219,7 +242,8 @@ aggregate_values_by_key <- function(x,
219 242
     group = c(
220 243
         mandatory_IS_vars(),
221 244
         annotation_IS_vars()
222
-    )) {
245
+    ),
246
+    join_af_by = "CompleteAmplificationID") {
223 247
     stopifnot(is.data.frame(x) || is.list(x))
224 248
     if (!is.data.frame(x)) {
225 249
         purrr::walk(x, function(df) {
... ...
@@ -227,8 +251,13 @@ aggregate_values_by_key <- function(x,
227 251
             if (.check_mandatory_vars(df) == FALSE) {
228 252
                 rlang::abort(.non_ISM_error())
229 253
             }
230
-            if (.check_complAmpID(df) == FALSE) {
231
-                rlang::abort(.missing_complAmpID_error())
254
+            if (!all(join_af_by %in% colnames(df))) {
255
+                rlang::abort(c(x = paste("Missing common columns",
256
+                                         "to join metadata"),
257
+                               i = paste("Missing: ",
258
+                                         paste0(join_af_by[!join_af_by %in%
259
+                                                               colnames(df)],
260
+                                                collapse = ", "))))
232 261
             }
233 262
             if (!all(value_cols %in% colnames(df))) {
234 263
                 rlang::abort(.missing_user_cols_error(
... ...
@@ -253,8 +282,13 @@ aggregate_values_by_key <- function(x,
253 282
         if (.check_mandatory_vars(x) == FALSE) {
254 283
             rlang::abort(.non_ISM_error())
255 284
         }
256
-        if (.check_complAmpID(x) == FALSE) {
257
-            rlang::abort(.missing_complAmpID_error())
285
+        if (!all(join_af_by %in% colnames(x))) {
286
+            rlang::abort(c(x = paste("Missing common columns",
287
+                                     "to join metadata"),
288
+                           i = paste("Missing: ",
289
+                                     paste0(join_af_by[!join_af_by %in%
290
+                                                           colnames(x)],
291
+                                            collapse = ", "))))
258 292
         }
259 293
         if (!all(value_cols %in% colnames(x))) {
260 294
             rlang::abort(.missing_user_cols_error(
... ...
@@ -280,7 +314,7 @@ aggregate_values_by_key <- function(x,
280 314
     # Check key
281 315
     stopifnot(is.character(key))
282 316
     if (!all(key %in% colnames(association_file))) {
283
-        stop("Key fields are missing from association file")
317
+        rlang::abort(c(x = "Key fields are missing from association file"))
284 318
     }
285 319
     # Check lambda
286 320
     stopifnot(is.list(lambda))
... ...
@@ -288,24 +322,24 @@ aggregate_values_by_key <- function(x,
288 322
     stopifnot(is.character(group) | is.null(group))
289 323
     if (is.data.frame(x)) {
290 324
         if (!all(group %in% c(colnames(association_file), colnames(x)))) {
291
-            stop(paste("Grouping variables not found"))
325
+            rlang::abort(paste("Grouping variables not found"))
292 326
         }
293 327
     } else {
294 328
         purrr::walk(x, function(df) {
295 329
             if (!all(group %in% c(colnames(association_file), colnames(df)))) {
296
-                stop(paste("Grouping variables not found"))
330
+                rlang::abort(paste("Grouping variables not found"))
297 331
             }
298 332
         })
299 333
     }
300 334
     if (is.data.frame(x)) {
301 335
         x <- list(x)
302 336
         agg_matrix <- .aggregate_lambda(
303
-            x, association_file, key, value_cols, lambda, group
337
+            x, association_file, key, value_cols, lambda, group, join_af_by
304 338
         )
305 339
         return(agg_matrix[[1]])
306 340
     }
307 341
     agg_matrix <- .aggregate_lambda(
308
-        x, association_file, key, value_cols, lambda, group
342
+        x, association_file, key, value_cols, lambda, group, join_af_by
309 343
     )
310 344
     agg_matrix
311 345
 }
... ...
@@ -194,14 +194,6 @@ comparison_matrix <- function(x,
194 194
     ShsCount = "ShsCount") {
195 195
     stopifnot(is.list(x) & !is.data.frame(x))
196 196
     stopifnot(all(names(x) %in% quantification_types()))
197
-    purrr::walk(x, function(m) {
198
-        mand <- .check_mandatory_vars(m)
199
-        amp <- .check_complAmpID(m)
200
-        val <- .check_value_col(m)
201
-        if (any(c(mand, amp, val) == FALSE)) {
202
-            stop(.non_ISM_error())
203
-        }
204
-    })
205 197
     stopifnot(is.character(fragmentEstimate) & length(fragmentEstimate) == 1)
206 198
     stopifnot(is.character(seqCount) & length(seqCount) == 1)
207 199
     stopifnot(is.character(barcodeCount) & length(barcodeCount) == 1)
... ...
@@ -221,8 +213,11 @@ comparison_matrix <- function(x,
221 213
         matrix1 %>%
222 214
             dplyr::full_join(matrix2, by = commoncols)
223 215
     })
224
-    if (any(is.na(result)) & getOption("ISAnalytics.verbose") == TRUE) {
225
-        message(.nas_introduced_msg())
216
+    na_introduced <- purrr::map_lgl(param_names, function(p) {
217
+        any(is.na(result[[p]]))
218
+    })
219
+    if (any(na_introduced) & getOption("ISAnalytics.verbose") == TRUE) {
220
+        rlang::inform(.nas_introduced_msg())
226 221
     }
227 222
     result
228 223
 }
... ...
@@ -248,6 +243,7 @@ comparison_matrix <- function(x,
248 243
 #' in input
249 244
 #' @param ShsCount Name of the shs count values column
250 245
 #' in input
246
+#' @param key Key columns to perform the joining operation
251 247
 #'
252 248
 #' @importFrom purrr is_empty map set_names
253 249
 #' @importFrom dplyr rename
... ...
@@ -278,21 +274,24 @@ comparison_matrix <- function(x,
278 274
 #' )
279 275
 #' separated_matrix <- separate_quant_matrices(matrices)
280 276
 #' options(op)
281
-separate_quant_matrices <- function(x, fragmentEstimate = "fragmentEstimate",
277
+separate_quant_matrices <- function(
278
+    x,
279
+    fragmentEstimate = "fragmentEstimate",
282 280
     seqCount = "seqCount",
283 281
     barcodeCount = "barcodeCount",
284 282
     cellCount = "cellCount",
285
-    ShsCount = "ShsCount") {
283
+    ShsCount = "ShsCount",
284
+    key = c(mandatory_IS_vars(),
285
+            annotation_IS_vars(),
286
+            "CompleteAmplificationID")
287
+    ) {
286 288
     stopifnot(is.data.frame(x))
287
-    if (.check_mandatory_vars(x) == FALSE) {
288
-        stop(.non_ISM_error())
289
+    if (!all(key %in% colnames(x))) {
290
+        rlang::abort(.missing_user_cols_error(key[!key %in% colnames(x)]))
289 291
     }
290
-    if (.check_complAmpID(x) == FALSE) {
291
-        stop(.missing_complAmpID_error())
292
-    }
293
-    num_cols <- .find_exp_cols(x)
292
+    num_cols <- .find_exp_cols(x, key)
294 293
     if (purrr::is_empty(num_cols)) {
295
-        stop(.missing_num_cols_error())
294
+        rlang::abort(.missing_num_cols_error())
296 295
     }
297 296
     stopifnot(is.character(fragmentEstimate) & length(fragmentEstimate) == 1)
298 297
     stopifnot(is.character(seqCount) & length(seqCount) == 1)
... ...
@@ -307,24 +306,16 @@ separate_quant_matrices <- function(x, fragmentEstimate = "fragmentEstimate",
307 306
     )
308 307
     to_copy <- if (any(!num_cols %in% param_col)) {
309 308
         if (all(!num_cols %in% param_col)) {
310
-            stop(.non_quant_cols_error())
309
+            rlang::abort(.non_quant_cols_error())
311 310
         }
312 311
         num_cols[!num_cols %in% param_col]
313 312
     }
314 313
     num_cols <- param_col[param_col %in% num_cols]
315
-    annot <- if (.is_annotated(x)) {
316
-        annotation_IS_vars()
317
-    } else {
318
-        character(0)
319
-    }
320 314
     if (!purrr::is_empty(to_copy) & getOption("ISAnalytics.verbose") == TRUE) {
321
-        message(.non_quant_cols_msg(to_copy))
315
+        rlang::inform(.non_quant_cols_msg(to_copy))
322 316
     }
323 317
     separated <- purrr::map(num_cols, function(quant) {
324
-        x[c(
325
-            mandatory_IS_vars(), annot, "CompleteAmplificationID",
326
-            to_copy, quant
327
-        )] %>% dplyr::rename(Value = quant)
318
+        x[c(key, to_copy, quant)] %>% dplyr::rename(Value = quant)
328 319
     }) %>% purrr::set_names(names(num_cols))
329 320
     separated
330 321
 }
... ...
@@ -91,7 +91,10 @@ remove_collisions <- function(x,
91 91
         ## Check if it contains the "Value" column. If not find all numeric
92 92
         ## columns that are not default columns
93 93
         quantifications_cols <- if (.check_value_col(x) == FALSE) {
94
-            found <- .find_exp_cols(x)
94
+            found <- .find_exp_cols(x,
95
+                                    c(mandatory_IS_vars(),
96
+                                      annotation_IS_vars(),
97
+                                      "CompleteAmplificationID"))
95 98
             if (purrr::is_empty(found)) {
96 99
                 stop(.missing_num_cols_error())
97 100
             }
... ...
@@ -98,6 +98,8 @@ association_file_columns <- function() {
98 98
     )
99 99
 }
100 100
 
101
+# Internal: associates column types with column names for a more precise
102
+# import
101 103
 .af_col_types <- function(mode) {
102 104
     if (mode == "fread") {
103 105
         types <- list(
... ...
@@ -137,9 +139,6 @@ association_file_columns <- function() {
137 139
         return(types)
138 140
     }
139 141
     if (mode == "readr") {
140
-        # date_format <- unlist(strsplit(date_format))
141
-        # date_format <- paste0("%", date_format)
142
-        # date_format <- paste0(date_format, collapse = "%.")
143 142
         types <- list(
144 143
             ProjectID = "c", FUSIONID = "c", PoolID = "c", TagSequence = "c",
145 144
             SubjectID = "c", VectorType = "c", VectorID = "c",
... ...
@@ -180,6 +179,23 @@ association_file_columns <- function() {
180 179
     }
181 180
 }
182 181
 
182
+# Internal: used for file system alignment in import_association_file,
183
+# gives the names of the columns that respectively contain:
184
+# - the absolute path on disk of the project
185
+# - the path to the quantification folder
186
+# - the path to the iss folder
187
+.path_cols_names <- function() {
188
+    list(project = "Path", quant = "Path_quant", iss = "Path_iss")
189
+}
190
+
191
+.matrix_annotated_suffixes <- function() {
192
+    c(".no0.annotated")
193
+}
194
+
195
+.matrix_not_annotated_suffixes <- function() {
196
+    c("")
197
+}
198
+
183 199
 #' Names of the columns of the association file to consider for
184 200
 #' Vispa2 launch.
185 201
 #'
... ...
@@ -221,3 +237,8 @@ reduced_AF_columns <- function() {
221 237
 .supported_fread_compression_formats <- function() {
222 238
     c("gz", "bz2")
223 239
 }
240
+
241
+
242
+flag_logics <- function() {
243
+    c("AND", "OR", "XOR", "NAND", "NOR", "XNOR")
244
+}
... ...
@@ -196,10 +196,13 @@ import_single_Vispa2Matrix <- function(path,
196 196
 #' for which the value of the column "ProjectID" is one of the specified
197 197
 #' values. If multiple columns are present in the list all filtering
198 198
 #' conditions are applied as a logical AND.
199
+#' @param import_iss Import Vispa2 stats and merge them with the
200
+#' association file?
199 201
 #' @param export_widget_path A path on disk to save produced widgets or NULL
200 202
 #' if the user doesn't wish to save the html file
203
+#' @param ... Additional arguments to pass to \code{\link{import_Vispa2_stats}}
201 204
 #' @family Import functions
202
-#' @return A tibble with the contents of the association file plus a column
205
+#' @return A tibble with the contents of the association file plus columns
203 206
 #' containing the path in the file system for every project and pool if found.
204 207
 #' @details The import series of functions is designed to work in combination
205 208
 #' with the use of Vispa2 pipeline, please refer to this article for more
... ...
@@ -229,7 +232,11 @@ import_single_Vispa2Matrix <- function(path,
229 232
 #' @export
230 233
 #'
231 234
 #' @importFrom purrr map_lgl set_names is_empty
232
-#' @importFrom rlang inform
235
+#' @importFrom rlang inform abort dots_list exec
236
+#' @importFrom htmltools tagList browsable
237
+#' @importFrom tibble add_column
238
+#' @import dplyr
239
+#' @importFrom stringr str_pad
233 240
 #' @importFrom magrittr `%>%`
234 241
 #' @seealso \code{\link{date_formats}}
235 242
 #' @examples
... ...
@@ -245,7 +252,9 @@ import_association_file <- function(path,
245 252
     root = NULL, tp_padding = 4, dates_format = "ymd",
246 253
     separator = "\t",
247 254
     filter_for = NULL,
248
-    export_widget_path = NULL) {
255
+    import_iss = FALSE,
256
+    export_widget_path = NULL,
257
+    ...) {
249 258
     # Check parameters
250 259
     stopifnot(is.character(path) & length(path) == 1)
251 260
     stopifnot((is.character(root) & length(root) == 1) || (is.null(root)))
... ...
@@ -257,6 +266,14 @@ import_association_file <- function(path,
257 266
         is.integer(tp_padding)) & length(tp_padding) == 1)
258 267
     stopifnot(length(dates_format) == 1 & dates_format %in% date_formats())
259 268
     stopifnot(is.character(separator) && length(separator) == 1)
269
+    stopifnot(is.logical(import_iss) && length(import_iss) == 1)
270
+    if (import_iss & is.null(root)) {
271
+        rlang::abort(paste(
272
+            "Can't import Vispa2 stats files without",
273
+            "file system alignment. Provide the appropriate",
274
+            "root."
275
+        ))
276
+    }
260 277
     # Check filter
261 278
     stopifnot(is.null(filter_for) ||
262 279
         (is.list(filter_for) && !is.null(names(filter_for))))
... ...
@@ -292,7 +309,57 @@ import_association_file <- function(path,
292 309
         col_probs,
293 310
         missing_dates
294 311
     )))
295
-    if (something_to_report) {
312
+    ## Fix timepoints
313
+    if (!"TimepointMonths" %in% colnames(as_file)) {
314
+        as_file <- as_file %>%
315
+            tibble::add_column(TimepointMonths = NA_real_)
316
+    }
317
+    if (!"TimepointYears" %in% colnames(as_file)) {
318
+        as_file <- as_file %>%
319
+            tibble::add_column(TimepointYears = NA_real_)
320
+    }
321
+    as_file <- as_file %>%
322
+        dplyr::mutate(
323
+            TimepointMonths = dplyr::if_else(
324
+                condition = is.na(.data$TimepointMonths),
325
+                false = .data$TimepointMonths,
326
+                true = dplyr::if_else(
327
+                    condition = as.numeric(.data$TimePoint) == 0,
328
+                    true = 0,
329
+                    false = dplyr::if_else(
330
+                        condition = as.numeric(.data$TimePoint) > 0 &
331
+                            as.numeric(.data$TimePoint) < 30,
332
+                        true = ceiling(as.numeric(.data$TimePoint) / 30),
333
+                        false = round(as.numeric(.data$TimePoint) / 30)
334
+                    )
335
+                )
336
+            ),
337
+            TimepointYears = dplyr::if_else(
338
+                condition = is.na(.data$TimepointYears),
339
+                false = .data$TimepointYears,
340
+                true = dplyr::if_else(
341
+                    condition = as.numeric(.data$TimePoint) == 0,
342
+                    true = 0,
343
+                    false = dplyr::if_else(
344
+                        condition = as.numeric(.data$TimePoint) > 0 &
345
+                            as.numeric(.data$TimePoint) < 360,
346
+                        true = ceiling(as.numeric(.data$TimePoint) / 360),
347
+                        false = round(as.numeric(.data$TimePoint) / 360)
348
+                    )
349
+                )
350
+            )
351
+        ) %>%
352
+        dplyr::mutate(
353
+            TimepointMonths = stringr::str_pad(
354
+                as.character(.data$TimepointMonths),
355
+                pad = "0", side = "left", width = 2
356
+            ),
357
+            TimepointYears = stringr::str_pad(
358
+                as.character(.data$TimepointYears),
359
+                pad = "0", side = "left", width = 2
360
+            ),
361
+        )
362
+    widget <- if (something_to_report) {
296 363
         summary_report <- .summary_af_import_msg(
297 364
             pars_prob = parsing_problems, dates_prob = date_problems,
298 365
             cols_prob = col_probs, crit_na = missing_dates,
... ...
@@ -305,49 +372,261 @@ import_association_file <- function(path,
305 372
             )
306 373
         )
307 374
         if (getOption("ISAnalytics.widgets") == TRUE) {
308
-            withCallingHandlers(
309
-                expr = {
310
-                    withRestarts(
311
-                        {
312
-                            widg <- .checker_widget(
313
-                                parsing_problems,
314
-                                date_problems,
315
-                                checks,
316
-                                col_probs,
317
-                                missing_dates
318
-                            )
319
-                            print(widg)
320
-                        },
321
-                        print_err = function() {
322
-                            rlang::inform(.widgets_error())
323
-                            if (getOption("ISAnalytics.verbose") == TRUE) {
324
-                                rlang::inform(summary_report,
325
-                                    class = "summary_report"
326
-                                )
327
-                            }
328
-                        }
329
-                    )
330
-                },
331
-                error = function(cnd) {
332
-                    rlang::inform(conditionMessage(cnd))
333
-                    invokeRestart("print_err")
334
-                }
375
+            .produce_widget(".checker_widget",
376
+                parsing_probs = parsing_problems,
377
+                date_probs = date_problems,
378
+                checker_df = checks,
379
+                col_probs = col_probs,
380
+                critical_nas = missing_dates
335 381
             )
336
-            if (!is.null(export_widget_path)) {
337
-                .export_widget_file(
338
-                    widg,
339
-                    export_widget_path, "af_import_report.html"
340
-                )
341
-            }
342 382
         } else if (getOption("ISAnalytics.verbose") == TRUE) {
343 383
             rlang::inform(summary_report,
344 384
                 class = "summary_report"
345 385
             )
386
+            NULL
387
+        } else {
388
+            NULL
346 389
         }
347 390
     }
391
+    if (import_iss) {
392
+        dots <- rlang::dots_list(.named = TRUE)
393
+        dots <- dots[!names(dots) %in% c(
394
+            "association_file",
395
+            "export_widget_path",
396
+            "join_with_af"
397
+        )]
398
+        stats <- withCallingHandlers(
399
+            {
400
+                withRestarts(
401
+                    {
402
+                        rlang::exec(import_Vispa2_stats,
403
+                            association_file = as_file,
404
+                            export_widget_path = "INTERNAL",
405
+                            join_with_af = TRUE,
406
+                            !!!dots
407
+                        )
408
+                    },
409
+                    fail_stats = function() {
410
+                        rlang::inform("Issues in importing stats files, skipping")
411
+                    }
412
+                )
413
+            },
414
+            error = function(err) {
415
+                rlang::inform(err$message)
416
+                invokeRestart("fail_stats")
417
+            }
418
+        )
419
+        if (!is.null(stats)) {
420
+            as_file <- stats$stats
421
+            if (getOption("ISAnalytics.widgets") == TRUE) {
422
+                widget <- htmltools::browsable(
423
+                    htmltools::tagList(
424
+                        widget, stats$report_w
425
+                    )
426
+                )
427
+            }
428
+        }
429
+    }
430
+    if (!is.null(widget)) {
431
+        .print_widget(widget, else_verbose = {
432
+            rlang::inform(summary_report,
433
+                class = "summary_report"
434
+            )
435
+        })
436
+    } else if (getOption("ISAnalytics.verbose") == TRUE) {
437
+        rlang::inform(summary_report,
438
+            class = "summary_report"
439
+        )
440
+    }
441
+    if (!is.null(export_widget_path)) {
442
+        .export_widget_file(widget,
443
+            path = export_widget_path,
444
+            def_file_name = "af_import_report.html"
445
+        )
446
+    }
348 447
     as_file
349 448
 }
350 449
 
450
+#' Import Vispa2 stats given the aligned association file.
451
+#'
452
+#' \lifecycle{experimental}
453
+#' Imports all the Vispa2 stats files for each pool provided the association
454
+#' file has been aligned with the file system
455
+#' (see \code{\link{import_association_file}}).
456
+#'
457
+#' @param association_file The file system aligned association file
458
+#' (contains columns with absolute paths to the 'iss' folder)
459
+#' @param file_prefixes A character vector with known file prefixes
460
+#'  to match on file names. NOTE: the elements represent regular expressions.
461
+#'  For defaults see \link{default_iss_file_prefixes}.
462
+#' @param join_with_af Logical, if TRUE the imported stats files will be
463
+#' merged with the association file, if false a single data frame holding
464
+#' only the stats will be returned.
465
+#' @param pool_col A single string. What is the name of the pool column
466
+#' used in the Vispa2 run? This will be used as a key to perform a join
467
+#' operation with the stats files `POOL` column.
468
+#' @param export_widget_path Either NULL or the path on disk where the
469
+#' widget report should be saved.
470
+#'
471
+#' @family Import functions
472
+#' @importFrom rlang inform abort
473
+#' @importFrom stats setNames
474
+#' @import dplyr
475
+#' @importFrom htmltools browsable tagList
476
+#'
477
+#' @return A data frame
478
+#' @export
479
+#'
480
+#' @examples
481
+#' op <- options("ISAnalytics.widgets" = FALSE)
482
+#' path <- system.file("extdata", "ex_association_file.tsv",
483
+#'     package = "ISAnalytics"
484
+#' )
485
+#' root_pth <- system.file("extdata", "fs.zip", package = "ISAnalytics")
486
+#' root <- unzip_file_system(root_pth, "fs")
487
+#' association_file <- import_association_file(path, root, dates_format = "dmy")
488
+#' af_with_stats <- import_Vispa2_stats(association_file)
489
+#' options(op)
490
+import_Vispa2_stats <- function(association_file,
491
+    file_prefixes = default_iss_file_prefixes(),
492
+    join_with_af = TRUE,
493
+    pool_col = "concatenatePoolIDSeqRun",
494
+    export_widget_path = NULL) {
495
+    ## Check param
496
+    if (!is.data.frame(association_file)) {
497
+        rlang::abort(.af_not_imported_err())
498
+    }
499
+    path_cols <- .path_cols_names()
500
+    if (!path_cols$iss %in% colnames(association_file)) {
501
+        rlang::abort(.af_not_aligned_err())
502
+    }
503
+    min_cols <- c("ProjectID", "concatenatePoolIDSeqRun", path_cols$iss)
504
+    if (!all(min_cols %in% colnames(association_file))) {
505
+        rlang::abort(
506
+            .missing_needed_cols(
507
+                min_cols[!min_cols %in% colnames(association_file)]
508
+            )
509
+        )
510
+    }
511
+    stopifnot(is.character(file_prefixes))
512
+    stopifnot(is.logical(join_with_af))
513
+    join_with_af <- join_with_af[1]
514
+    if (join_with_af) {
515
+        stopifnot(is.character(pool_col) & length(pool_col) == 1)
516
+        stopifnot(pool_col %in% colnames(association_file))
517
+    }
518
+    ## export path has a special placeholder "INTERNAL" for calling
519
+    ## the function inside import_association_file
520
+    stopifnot(is.null(export_widget_path) || is.character(export_widget_path))
521
+    ## Import
522
+    stats <- .import_stats_iss(
523
+        association_file = association_file,
524
+        prefixes = file_prefixes
525
+    )
526
+    report <- stats$report
527
+    stats <- stats$stats
528
+    ## Produce widget report if requested
529
+    widget_stats_import <- if (getOption("ISAnalytics.widgets") == TRUE) {
530
+        .produce_widget(".iss_import_widget", report = report)
531
+    } else {
532
+        NULL
533
+    }
534
+    ## - IF NO STATS IMPORTED (STATS ARE NULL)
535
+    if (is.null(stats)) {
536
+        if (getOption("ISAnalytics.verbose") == TRUE) {
537
+            rlang::inform(paste("No stats files imported"))
538
+        }
539
+        if (!is.null(widget_stats_import)) {
540
+            if (is.null(export_widget_path)) {
541
+                .print_widget(widget_stats_import)
542
+            } else if (export_widget_path != "INTERNAL") {
543
+                .print_widget(widget_stats_import)
544
+            }
545
+        }
546
+        if (!is.null(export_widget_path) && export_widget_path != "INTERNAL") {
547
+            .export_widget_file(
548
+                widget_stats_import,
549
+                export_widget_path,
550
+                "vispa2_stats_import_report.html"
551
+            )
552
+        }
553
+        if (!is.null(export_widget_path) && export_widget_path == "INTERNAL") {
554
+            return(list(stats = NULL, report_w = widget_stats_import))
555
+        } else {
556
+            return(NULL)
557
+        }
558
+    }
559
+    ## - IF STATS NOT NULL
560
+    ## Merge if requested
561
+    if (join_with_af) {
562
+        association_file <- association_file %>%
563
+            dplyr::left_join(stats, by = c(
564
+                stats::setNames("POOL", pool_col),
565
+                "TagSequence" = "TAG"
566
+            ))
567
+        ## Detect potential problems
568
+        missing_stats <- association_file %>%
569
+            dplyr::filter(is.na(.data$RUN_NAME)) %>%
570
+            dplyr::select(
571
+                .data$ProjectID,
572
+                .data$concatenatePoolIDSeqRun,
573
+                dplyr::all_of(pool_col),
574
+                .data$CompleteAmplificationID
575
+            ) %>%
576
+            dplyr::distinct()
577
+        widget_stats_import <- if (getOption("ISAnalytics.widgets") == TRUE) {
578
+            miss_widget <- .produce_widget(".missing_iss_widget",
579
+                missing_iss = missing_stats
580
+            )
581
+            htmltools::browsable(
582
+                htmltools::tagList(widget_stats_import, miss_widget)
583
+            )
584
+        } else {
585
+            NULL
586
+        }
587
+        if (!is.null(widget_stats_import)) {
588
+            if (is.null(export_widget_path)) {
589
+                .print_widget(widget_stats_import)
590
+            } else if (export_widget_path != "INTERNAL") {
591
+                .print_widget(widget_stats_import)
592
+            }
593
+        }
594
+        if (!is.null(export_widget_path) && export_widget_path != "INTERNAL") {
595
+            .export_widget_file(
596
+                widget_stats_import,
597
+                export_widget_path,
598
+                "vispa2_stats_import_report.html"
599
+            )
600
+        }
601
+        if (!is.null(export_widget_path) && export_widget_path == "INTERNAL") {
602
+            return(list(
603
+                stats = association_file,
604
+                report_w = widget_stats_import
605
+            ))
606
+        } else {
607
+            return(association_file)
608
+        }
609
+    }
610
+    if (!is.null(widget_stats_import)) {
611
+        if (is.null(export_widget_path)) {
612
+            .print_widget(widget_stats_import)
613
+        } else if (export_widget_path != "INTERNAL") {
614
+            .print_widget(widget_stats_import)
615
+        }
616
+    }
617
+    if (!is.null(export_widget_path) && export_widget_path != "INTERNAL") {
618
+        .export_widget_file(
619
+            widget_stats_import,
620
+            export_widget_path,
621
+            "vispa2_stats_import_report.html"
622
+        )
623
+    }
624
+    if (!is.null(export_widget_path) && export_widget_path == "INTERNAL") {
625
+        return(list(stats = stats, report_w = widget_stats_import))
626
+    } else {
627
+        return(stats)
628
+    }
629
+}
351 630
 
352 631
 #' Import integration matrices based on the association file.
353 632
 #'
... ...
@@ -817,3 +1096,17 @@ matching_options <- function() {
817 1096
 date_formats <- function() {
818 1097
     c("ymd", "ydm", "mdy", "myd", "dmy", "dym", "yq")
819 1098
 }
1099
+
1100
+
1101
+#' Default regex prefixes for Vispa2 stats files.
1102
+#'
1103
+#' Note that each element is a regular expression.
1104
+#'
1105
+#' @return A character vector of regexes
1106
+#' @export
1107
+#'
1108
+#' @examples
1109
+#' default_iss_file_prefixes()
1110
+default_iss_file_prefixes <- function() {
1111
+    c("stats\\.sequence.", "stats\\.matrix.")
1112
+}
... ...
@@ -94,13 +94,9 @@
94 94
 #' @keywords internal
95 95
 #
96 96
 # @return A character vector of column names
97
-.find_exp_cols <- function(x) {
97
+.find_exp_cols <- function(x, to_exclude) {
98 98
     stopifnot(is.data.frame(x))
99
-    default_cols <- c(
100
-        mandatory_IS_vars(), annotation_IS_vars(),
101
-        "CompleteAmplificationID"
102
-    )
103
-    remaining <- colnames(x)[!colnames(x) %in% default_cols]
99
+    remaining <- colnames(x)[!colnames(x) %in% to_exclude]
104 100
     remaining_numeric <- purrr::map_lgl(remaining, function(y) {
105 101
         exp <- rlang::expr(`$`(x, !!y))
106 102
         exp <- rlang::eval_tidy(exp)
... ...
@@ -128,37 +124,6 @@
128 124
 
129 125
 #---- USED IN : import_single_Vispa2Matrix ----
130 126
 
131
-# Internal function to convert a messy matrix to a tidy data frame
132
-#
133
-# @description Uses the suite of functions provided by the
134
-# tidyverse to produce a more dense and ordered structure.
135
-# This function is not exported and should be called in other importing
136
-# functions.
137
-#
138
-# @param df Messy tibble to convert to tidy
139
-# @keywords internal
140
-#
141
-# @return a tidy tibble
142
-#' @importFrom rlang .data
143
-#' @importFrom tidyr pivot_longer
144
-#' @importFrom dplyr arrange all_of filter
145
-#' @importFrom forcats fct_inseq as_factor
146
-.messy_to_tidy <- function(df) {
147
-    exp_cols <- which(!(colnames(df) %in% c(
148
-        mandatory_IS_vars(),
149
-        annotation_IS_vars()
150
-    )))
151
-    isadf_tidy <- df %>%
152
-        tidyr::pivot_longer(
153
-            cols = dplyr::all_of(exp_cols),
154
-            names_to = "CompleteAmplificationID",
155
-            values_to = "Value",
156
-            values_drop_na = TRUE
157
-        ) %>%
158
-        dplyr::filter(.data$Value > 0)
159
-    isadf_tidy
160
-}
161
-
162 127
 # Internal function to auto-detect the type of IS based on the headers.
163 128
 #
164 129
 # @param df the data frame to inspect
... ...
@@ -353,9 +318,9 @@
353 318
         before <- as_file %>% dplyr::select(dplyr::all_of(date_cols))
354 319
         as_file <- as_file %>%
355 320
             dplyr::mutate(dplyr::across(date_cols,
356
-                .fns = ~ lubridate::parse_date_time(.x,
321
+                .fns = ~ lubridate::as_date(lubridate::parse_date_time(.x,
357 322
                     orders = date_format
358
-                )
323
+                ))
359 324
             ))
360 325
         date_failures <- purrr::map_dfr(date_cols, function(col) {
361 326
             before_col <- purrr::pluck(before, col)
... ...
@@ -388,10 +353,10 @@
388 353
 # @param df The imported association file (data.frame or tibble)
389 354
 # @param root_folder Path to the root folder
390 355
 # @keywords internal
391
-#' @importFrom dplyr select distinct mutate bind_rows
392
-#' @importFrom fs dir_ls
393
-#' @importFrom purrr pmap is_empty reduce map_dbl
394
-#' @importFrom stringr str_replace_all str_extract_all
356
+#' @importFrom dplyr select distinct mutate
357
+#' @importFrom fs dir_ls path dir_exists
358
+#' @importFrom purrr pmap_dfr is_empty
359
+#' @importFrom stringr str_replace_all
395 360
 #' @importFrom tibble tibble
396 361
 #
397 362
 # @return A data frame containing, for each ProjectID and
... ...
@@ -408,89 +373,70 @@
408 373
             .data$PathToFolderProjectID
409 374
         ) %>%
410 375
         dplyr::distinct()
411
-    root_regexp <- stringr::str_replace_all(root_folder, "\\/", "\\\\/")
412
-    if (root_folder == "") {
413
-        results_df <- purrr::pmap(
414
-            temp_df,
415
-            function(...) {
416
-                cur <- tibble::tibble(...)
417
-                pattern <- paste0(
418
-                    "^", root_regexp,
419
-                    stringr::str_replace_all(
420
-                        cur$PathToFolderProjectID, "\\/", "\\\\/"
421
-                    ),
422
-                    "\\/quantification\\/",
423
-                    cur$concatenatePoolIDSeqRun, "$"
424
-                )
425
-                pattern <- stringr::str_replace_all(pattern,
426
-                    pattern = "\\\\\\/\\\\\\/",
427
-                    replacement = "\\\\/"
428
-                )
429
-                dirExists <- file.exists(cur$PathToFolderProjectID)
430
-                value <- if (!dirExists) {
431
-                    NA_character_
432
-                } else {
433
-                    tree_struct <- fs::dir_ls(
434
-                        path = cur$PathToFolderProjectID, recurse = TRUE,
435
-                        type = "directory", fail = FALSE
436
-                    )
437
-                    found <- stringr::str_extract_all(tree_struct, pattern)
438
-                    found <- unlist(found)
439
-                    if (purrr::is_empty(found)) {
440
-                        NA_character_
441
-                    } else {
442
-                        found
443
-                    }
444
-                }
445
-                tibble::tibble(
446
-                    ProjectID = cur$ProjectID,
447
-                    concatenatePoolIDSeqRun =
448
-                        cur$concatenatePoolIDSeqRun,
449
-                    Path = value
450
-                )
376
+    path_cols <- .path_cols_names()
377
+    results_df <- purrr::pmap_dfr(
378
+        temp_df,
379
+        function(...) {
380
+            cur <- tibble::tibble(...)
381
+            if (is.na(cur$PathToFolderProjectID)) {
382
+                return(cur %>%
383
+                    dplyr::mutate(
384
+                        !!path_cols$project := NA_character_,
385
+                        !!path_cols$quant := NA_character_,
386
+                        !!path_cols$iss := NA_character_
387
+                    ))
451 388
             }
452
-        )
453
-    } else {
454
-        tree_struct <- fs::dir_ls(
455
-            path = root_folder, recurse = TRUE,
456
-            type = "directory", fail = FALSE
457
-        )
458
-        results_df <- purrr::pmap(
459
-            temp_df,
460
-            function(...) {
461
-                cur <- tibble::tibble(...)
462
-                pattern <- paste0(
463
-                    "^", root_regexp,
464
-                    stringr::str_replace_all(
465
-                        cur$PathToFolderProjectID, "\\/", "\\\\/"
466
-                    ),
467
-                    "\\/quantification\\/",
468
-                    cur$concatenatePoolIDSeqRun, "$"
469
-                )
470
-                pattern <- stringr::str_replace_all(pattern,
471
-                    pattern = "\\\\\\/\\\\\\/",
472
-                    replacement = "\\\\/"
473
-                )
474
-                found <- stringr::str_extract_all(tree_struct, pattern)
475
-                found <- unlist(found)
476
-                value <- if (purrr::is_empty(found)) {
477
-                    NA_character_
478
-                } else {
479
-                    found
480
-                }
481
-                tibble::tibble(
482
-                    ProjectID = cur$ProjectID,
483
-                    concatenatePoolIDSeqRun =
484
-                        cur$concatenatePoolIDSeqRun,
485
-                    Path = value
486
-                )
389
+            project_folder <- fs::path(
390
+                fs::path(root_folder),
391
+                cur$PathToFolderProjectID
392
+            )
393
+            quant_folder <- fs::path(
394
+                "quantification",
395
+                fs::path(cur$concatenatePoolIDSeqRun)
396
+            )
397
+            iss_folder <- fs::path(
398
+                "iss",
399
+                fs::path(cur$concatenatePoolIDSeqRun)
400
+            )
401
+            dirExists <- fs::dir_exists(project_folder)
402
+            if (!dirExists) {
403
+                return(cur %>%
404
+                    dplyr::mutate(
405
+                        !!path_cols$project := NA_character_,
406
+                        !!path_cols$quant := NA_character_,
407
+                        !!path_cols$iss := NA_character_
408
+                    ))
487 409
             }
488
-        )
489
-    }
490
-    checker_df <- purrr::reduce(results_df, dplyr::bind_rows) %>%
410
+            quant_found <- fs::dir_ls(
411
+                path = project_folder, recurse = TRUE,
412
+                type = "directory", fail = FALSE,
413
+                regexp = quant_folder
414
+            )
415
+            if (length(quant_found) == 0) {
416
+                quant_found <- NA_character_
417
+            }
418
+            iss_found <- fs::dir_ls(
419
+                path = project_folder, recurse = TRUE,
420
+                type = "directory", fail = FALSE,
421
+                regexp = iss_folder
422
+            )
423
+            if (length(iss_found) == 0) {
424
+                iss_found <- NA_character_
425
+            }
426
+            return(
427
+                cur %>%
428
+                    dplyr::mutate(
429
+                        !!path_cols$project := project_folder,
430
+                        !!path_cols$quant := quant_found,
431
+                        !!path_cols$iss := iss_found
432
+                    )
433
+            )
434
+        }
435
+    )
436
+    checker_df <- results_df %>%
491 437
         dplyr::mutate(
492
-            Found = ifelse(!is.na(.data$Path), TRUE, FALSE),
493
-            .before = .data$Path
438
+            Found = ifelse(!is.na(.data[[path_cols$project]]), TRUE, FALSE),
439
+            .before = .data[[path_cols$project]]
494 440
         )
495 441
     checker_df
496 442
 }
... ...
@@ -508,21 +454,257 @@
508 454
 # @keywords internal
509 455
 #
510 456
 # @return An updated association file with absolute paths
457
+#' @importFrom dplyr left_join select
511 458
 .update_af_after_alignment <- function(as_file, checks, root) {
512
-    # Finally import modified association file
513
-    checks <- checks %>%
514
-        dplyr::select(
515
-            .data$ProjectID, .data$concatenatePoolIDSeqRun,
516
-            .data$Path
517
-        )
518 459
     as_file <- as_file %>%
519