Browse code

[UPDATE] Partial update for v. 1.5.4 - Updated logics for recalibration + dynamic vars

Giulia Pais authored on 28/03/2022 13:01:11
Showing 4 changed files

... ...
@@ -695,6 +695,9 @@
695 695
         dplyr::group_split()
696 696
     single_type_eval <- function(sub_df) {
697 697
         tag_type <- types[[sub_df$tag[1]]]
698
+        if (is.null(tag_type) || purrr::is_empty(tag_type)) {
699
+          return(list(type = "DF", result = sub_df))
700
+        }
698 701
         out <- sub_df %>%
699 702
             dplyr::filter(.data$types %in% tag_type)
700 703
         if (nrow(out) == 0) {
... ...
@@ -3613,15 +3616,6 @@
3613 3616
 # **NOTE: this function is meant to be called on a SINGLE GROUP,
3614 3617
 # meaning a subset of an integration matrix in which all rows
3615 3618
 # share the same chr (and optionally same strand).**
3616
-#
3617
-# @param x An integration matrix subset (see description)
3618
-# @param threshold The numeric value representing an absolute number
3619
-# of bases for which two integrations are considered distinct
3620
-# @param keep_criteria The string with selecting criteria
3621
-# @param annotated Is `x` annotated? Logical value
3622
-# @param num_cols A character vector with the names of the numeric columns
3623
-# @param max_val_col The column to consider if criteria is max_value
3624
-# @param produce_map Produce recalibration map?
3625 3619
 #' @importFrom dplyr arrange
3626 3620
 #' @importFrom purrr is_empty map_dfr
3627 3621
 #' @importFrom rlang expr eval_tidy .data
... ...
@@ -3635,20 +3629,30 @@
3635 3629
     annotated,
3636 3630
     num_cols,
3637 3631
     max_val_col,
3632
+    sample_col,
3633
+    req_tags,
3634
+    add_col_lambdas,
3638 3635
     produce_map) {
3636
+    locus_col <- req_tags %>%
3637
+      dplyr::filter(.data$tag == "locus") %>%
3638
+      dplyr::pull(.data$names)
3639 3639
     ## Order by integration_locus
3640
-    x <- x %>% dplyr::arrange(.data$integration_locus)
3640
+    x <- x %>% dplyr::arrange(.data[[locus_col]])
3641 3641
     x <- data.table::setDT(x)
3642 3642
     map_recalibr <- if (produce_map == TRUE) {
3643
-        mand_vars <- mandatory_IS_vars()
3644
-        rec_map <- unique(x[, ..mand_vars])
3643
+        req_vars <- req_tags$names
3644
+        rec_map <- unique(x[, mget(req_vars)])
3645 3645
         data.table::setnames(
3646 3646
             x = rec_map,
3647
-            old = mandatory_IS_vars(),
3648
-            new = paste0(mandatory_IS_vars(), "_before")
3647
+            old = req_vars,
3648
+            new = paste0(req_vars, "_before")
3649 3649
         )
3650
-        rec_map[, c("chr_after", "integration_locus_after", "strand_after") :=
3651
-            list(NA_character_, NA_real_, NA_character_)]
3650
+        na_type <- purrr::map(req_vars, ~ {
3651
+          col_type <- typeof(x[, get(.x)])
3652
+          fun_name <- paste0("as.", col_type)
3653
+          return(do.call(what = fun_name, args = list(NA)))
3654
+        })
3655
+        rec_map[, c(paste(req_vars, "after", sep = "_")) := na_type]
3652 3656
     } else {
3653 3657
         NULL
3654 3658
     }
... ...
@@ -3657,27 +3661,27 @@
3657 3661
         # If index is the last row in the data frame return
3658 3662
         if (index == nrow(x)) {
3659 3663
             if (produce_map == TRUE) {
3664
+              is_predicate <- purrr::map(req_vars, ~ {
3665
+                var_before <- sym(paste0(.x, "_before"))
3666
+                value <- x[index, get(.x)]
3667
+                rlang::expr(!!var_before == !!value)
3668
+              }) %>%
3669
+                purrr::reduce(~ rlang::expr(!!.x & !!.y))
3660 3670
                 map_recalibr[
3661
-                    chr_before == x[index, ]$chr &
3662
-                        integration_locus_before ==
3663
-                            x[index, ]$integration_locus &
3664
-                        strand_before == x[index, ]$strand,
3671
+                    eval(is_predicate),
3665 3672
                     c(
3666
-                        "chr_after",
3667
-                        "integration_locus_after",
3668
-                        "strand_after"
3673
+                      paste0(req_vars, "_after")
3669 3674
                     ) :=
3670
-                        list(
3671
-                            chr_before,
3672
-                            integration_locus_before,
3673
-                            strand_before
3675
+                        purrr::map(
3676
+                          req_vars,
3677
+                          ~ eval(sym(paste0(.x, "_before")))
3674 3678
                         )
3675 3679
                 ]
3676 3680
             }
3677 3681
             return(list(recalibrated_matrix = x, map = map_recalibr))
3678 3682
         }
3679 3683
         ## Compute interval for row
3680
-        interval <- x[index, ]$integration_locus + 1 + threshold
3684
+        interval <- x[index, ][[locus_col]] + 1 + threshold
3681 3685
         ## Look ahead for every integration that falls in the interval
3682 3686
         near <- numeric()
3683 3687
         k <- index
... ...
@@ -3686,7 +3690,7 @@
3686 3690
                 break
3687 3691
             }
3688 3692
             k <- k + 1
3689
-            if (x[k, ]$integration_locus < interval) {
3693
+            if (x[k, ][[locus_col]] < interval) {
3690 3694
                 # Saves the indexes of the rows that are in the interval
3691 3695
                 near <- append(near, k)
3692 3696
             } else {
... ...
@@ -3694,10 +3698,10 @@
3694 3698
             }
3695 3699
         }
3696 3700
         window <- c(index, near)
3701
+        row_to_keep <- index
3697 3702
         if (!purrr::is_empty(near)) {
3698 3703
             ## Change loci according to criteria
3699 3704
             ######## CRITERIA PROCESSING
3700
-            row_to_keep <- index
3701 3705
             if (keep_criteria == "max_value") {
3702 3706
                 expr <- rlang::expr(`$`(x[window, ], !!max_val_col))
3703 3707
                 to_check <- rlang::eval_tidy(expr)
... ...
@@ -3709,54 +3713,68 @@
3709 3713
             }
3710 3714
             # Fill map if needed
3711 3715
             if (produce_map == TRUE) {
3712
-                map_recalibr[
3713
-                    chr_before %chin% x[window, ]$chr &
3714
-                        integration_locus_before %in%
3715
-                            x[window, ]$integration_locus &
3716
-                        strand_before %chin% x[window, ]$strand,
3717
-                    c(
3718
-                        "chr_after",
3719
-                        "integration_locus_after",
3720
-                        "strand_after"
3721
-                    ) :=
3722
-                        list(
3723
-                            x[row_to_keep, ]$chr,
3724
-                            x[row_to_keep, ]$integration_locus,
3725
-                            x[row_to_keep, ]$strand
3726
-                        )
3727
-                ]
3716
+              is_predicate <- purrr::map(req_vars, ~ {
3717
+                var_before <- sym(paste0(.x, "_before"))
3718
+                value <- x[window, get(.x)]
3719
+                if (!is.character(value)) {
3720
+                  return(rlang::expr(!!var_before %in% !!value))
3721
+                }
3722
+                rlang::expr(!!var_before %chin% !!value)
3723
+              }) %>%
3724
+                purrr::reduce(~ rlang::expr(!!.x & !!.y))
3725
+              map_recalibr[
3726
+                eval(is_predicate),
3727
+                c(
3728
+                  paste0(req_vars, "_after")
3729
+                ) :=
3730
+                  purrr::map(
3731
+                    req_vars,
3732
+                    ~ x[row_to_keep][[.x]]
3733
+                  )
3734
+              ]
3728 3735
             }
3729
-            # Change loci and strand of near integrations
3736
+            # Change loci and other ids of near integrations
3737
+            fields_to_change <- req_tags$names
3730 3738
             if (annotated) {
3731
-                x[near, c(
3732
-                    "integration_locus",
3733
-                    "strand",
3734
-                    annotation_IS_vars()
3735
-                ) := list(
3736
-                    x[row_to_keep, ]$integration_locus,
3737
-                    x[row_to_keep, ]$strand,
3738
-                    x[row_to_keep, ]$GeneName,
3739
-                    x[row_to_keep, ]$GeneStrand
3740
-                )]
3739
+              fields_to_change <- c(fields_to_change, annotation_IS_vars())
3740
+              x[near, c(
3741
+                req_tags$names,
3742
+                annotation_IS_vars()
3743
+              ) := x[row_to_keep,
3744
+                     mget(fields_to_change)]]
3741 3745
             } else {
3742
-                x[near, c("integration_locus", "strand") := list(
3743
-                    x[row_to_keep, ]$integration_locus,
3744
-                    x[row_to_keep, ]$strand
3745
-                )]
3746
+              x[near, c(
3747
+                req_tags$names
3748
+              ) := x[row_to_keep,
3749
+                     mget(fields_to_change)]]
3746 3750
             }
3747 3751
             ## Aggregate same IDs
3748 3752
             starting_rows <- nrow(x)
3749 3753
             repeat {
3750
-                t <- x$CompleteAmplificationID[window]
3754
+                t <- x[[sample_col]][window]
3751 3755
                 d <- unique(t[duplicated(t)])
3752 3756
                 if (purrr::is_empty(d)) {
3753 3757
                     break
3754 3758
                 }
3755 3759
                 dupl_indexes <- which(t == d[1])
3756
-                values_sum <- colSums(x[window[dupl_indexes], ..num_cols],
3760
+                values_sum <- colSums(x[window[dupl_indexes], mget(num_cols)],
3757 3761
                     na.rm = TRUE
3758 3762
                 )
3759 3763
                 x[window[dupl_indexes[1]], c(num_cols) := as.list(values_sum)]
3764
+                ### Aggregate additional cols
3765
+                if (!is.null(add_col_lambdas) &&
3766
+                    !purrr::is_empty(add_col_lambdas)) {
3767
+                  agg_cols <- purrr::map(names(add_col_lambdas), ~ {
3768
+                    if (is.null(add_col_lambdas[[.x]])) {
3769
+                      return(x[window[dupl_indexes[1]], ][[.x]])
3770
+                    }
3771
+                    values <- x[window[dupl_indexes], ][[.x]]
3772
+                    return(rlang::as_function(add_col_lambdas[[.x]])(values))
3773
+                  })
3774
+                  x[window[dupl_indexes[1]], c(names(add_col_lambdas)) :=
3775
+                      agg_cols]
3776
+                }
3777
+
3760 3778
                 x <- x[-window[dupl_indexes[-1]], ]
3761 3779
                 to_drop <- seq(
3762 3780
                     from = length(window),
... ...
@@ -3773,22 +3791,25 @@
3773 3791
             }
3774 3792
         } else {
3775 3793
             if (produce_map == TRUE) {
3776
-                map_recalibr[
3777
-                    chr_before == x[index, ]$chr &
3778
-                        integration_locus_before ==
3779
-                            x[index, ]$integration_locus &
3780
-                        strand_before == x[index, ]$strand,
3781
-                    c(
3782
-                        "chr_after",
3783
-                        "integration_locus_after",
3784
-                        "strand_after"
3785
-                    ) :=
3786
-                        list(
3787
-                            chr_before,
3788
-                            integration_locus_before,
3789
-                            strand_before
3790
-                        )
3791
-                ]
3794
+              is_predicate <- purrr::map(req_vars, ~ {
3795
+                var_before <- sym(paste0(.x, "_before"))
3796
+                value <- x[window, get(.x)]
3797
+                if (!is.character(value)) {
3798
+                  return(rlang::expr(!!var_before %in% !!value))
3799
+                }
3800
+                rlang::expr(!!var_before %chin% !!value)
3801
+              }) %>%
3802
+                purrr::reduce(~ rlang::expr(!!.x & !!.y))
3803
+              map_recalibr[
3804
+                eval(is_predicate),
3805
+                c(
3806
+                  paste0(req_vars, "_after")
3807
+                ) :=
3808
+                  purrr::map(
3809
+                    req_vars,
3810
+                    ~ x[row_to_keep][[.x]]
3811
+                  )
3812
+              ]
3792 3813
             }
3793 3814
             index <- index + 1
3794 3815
         }
... ...
@@ -3820,23 +3841,43 @@
3820 3841
 #
3821 3842
 # @return Nothing
3822 3843
 .write_recalibr_map <- function(map, file_path) {
3823
-    if (fs::is_dir(file_path)) {
3824
-        if (!fs::dir_exists(file_path)) {
3825
-            fs::dir_create(file_path)
3826
-        }
3827
-        gen_filename <- .generate_rec_map_filename()
3828
-        file_path <- fs::path(file_path, gen_filename)
3844
+  if (!fs::file_exists(file_path)) {
3845
+    ext <- fs::path_ext(file_path)
3846
+    ## if ext is empty assume it's a folder
3847
+    if (ext == "") {
3848
+      fs::dir_create(file_path)
3849
+      gen_filename <- .generate_rec_map_filename()
3850
+      tmp_filename <- fs::path(file_path, gen_filename)
3829 3851
     } else {
3830
-        if (fs::path_ext(file_path) == "") {
3831
-            file_path <- fs::path_ext_set(file_path, "tsv.gz")
3852
+      tmp_filename <- fs::path_ext_remove(file_path)
3853
+      if (ext %in% .compressed_formats()) {
3854
+        ext <- paste(fs::path_ext(tmp_filename), ext, sep = ".")
3855
+        tmp_filename <- fs::path_ext_remove(tmp_filename)
3856
+      }
3857
+      if (!ext %in% c("tsv", paste("tsv", .compressed_formats(), sep = "."),
3858
+                      "csv", paste("csv", .compressed_formats(), sep = "."),
3859
+                      "txt", paste("txt", .compressed_formats(), sep = ".")
3860
+      )) {
3861
+        if (getOption("ISAnalytics.verbose") == TRUE) {
3862
+          warn <- c("Recalibration file format unsupported",
3863
+                    i = "Writing file in 'tsv.gz' format")
3864
+          rlang::inform(warn, class = "rec_unsupp_ext")
3832 3865
         }
3866
+        tmp_filename <- paste0(tmp_filename, ".tsv.gz")
3867
+      } else {
3868
+        tmp_filename <- paste0(tmp_filename, ".", ext)
3869
+      }
3833 3870
     }
3871
+  } else if (fs::is_dir(file_path)) {
3872
+    gen_filename <- .generate_rec_map_filename()
3873
+    tmp_filename <- fs::path(file_path, gen_filename)
3874
+  }
3834 3875
     withRestarts(
3835 3876
         {
3836
-            data.table::fwrite(map, file = file_path, sep = "\t", na = "")
3877
+            data.table::fwrite(map, file = tmp_filename, sep = "\t", na = "")
3837 3878
             saved_msg <- paste(
3838 3879
                 "Recalibration map saved to: ",
3839
-                file_path
3880
+                tmp_filename
3840 3881
             )
3841 3882
             if (getOption("ISAnalytics.verbose") == TRUE) {
3842 3883
                 rlang::inform(saved_msg)
... ...
@@ -71,121 +71,189 @@
71 71
 #' head(rec)
72 72
 compute_near_integrations <- function(x,
73 73
     threshold = 4,
74
-    keep_criteria = "max_value",
75
-    strand_specific = TRUE,
74
+    is_identity_tags = c("chromosome", "is_strand"),
75
+    keep_criteria = c("max_value", "keep_first"),
76 76
     value_columns = c("seqCount", "fragmentEstimate"),
77 77
     max_value_column = "seqCount",
78
+    sample_id_column = pcr_id_column(),
79
+    additional_agg_lambda = list(.default = default_rec_agg_lambdas()),
80
+    max_workers = 4,
78 81
     map_as_file = TRUE,
79
-    file_path = default_report_path()) {
80
-    # Check parameters
82
+    file_path = default_report_path(),
83
+    strand_specific = lifecycle::deprecated()) {
84
+    ## --- Check parameters
81 85
     stopifnot(is.data.frame(x))
82
-    ## Check tibble is an integration matrix
83
-    if (!.check_mandatory_vars(x)) {
84
-        rlang::abort(.missing_mand_vars())
85
-    }
86
-    ## Check it contains CompleteAmplificationID column
87
-    if (!.check_complAmpID(x)) {
88
-        rlang::abort(.missing_cAmp_sub_msg())
89
-    }
90
-    ## Check numeric columns
86
+    stopifnot(is.numeric(threshold) || is.integer(threshold))
87
+    threshold <- threshold[1]
91 88
     stopifnot(is.character(value_columns))
92 89
     stopifnot(is.character(max_value_column))
93 90
     num_cols <- unique(c(value_columns, max_value_column[1]))
94 91
     if (!all(num_cols %in% colnames(x))) {
95
-        rlang::abort(.missing_user_cols_error(
96
-            num_cols[!num_cols %in% colnames(x)]
97
-        ))
92
+      rlang::abort(.missing_needed_cols(
93
+        num_cols[!num_cols %in% colnames(x)]
94
+      ))
98 95
     }
99
-    stopifnot(is.numeric(threshold) || is.integer(threshold))
100
-    threshold <- threshold[1]
101
-    criteria <- rlang::arg_match(keep_criteria, values = c(
102
-        "max_value",
103
-        "keep_first"
104
-    ))
105
-    stopifnot(is.logical(strand_specific))
106
-    strand_specific <- strand_specific[1]
96
+    criteria <- rlang::arg_match(keep_criteria)
97
+    stopifnot(is.character(sample_id_column))
98
+    sample_id_column <- sample_id_column[1]
107 99
     stopifnot(is.logical(map_as_file))
108 100
     map_as_file <- map_as_file[1]
109 101
     if (map_as_file == TRUE) {
110
-        stopifnot(is.character(file_path))
111
-        file_path <- file_path[1]
102
+      stopifnot(is.character(file_path))
103
+      file_path <- file_path[1]
104
+    }
105
+    if (lifecycle::is_present(strand_specific)) {
106
+      lifecycle::deprecate_warn(
107
+        when = "1.5.4",
108
+        what = "compute_near_integrations(strand_specific)",
109
+        with = "compute_near_integrations(is_identity_tags)",
110
+        details = paste("See the documentation for details,",
111
+                        "the argument will be likely dropped in the next",
112
+                        "release cycle.")
113
+      )
114
+      stopifnot(is.logical(strand_specific))
115
+      strand_specific <- strand_specific[1]
116
+      if (!"is_strand" %in% is_identity_tags) {
117
+        is_identity_tags <- c(is_identity_tags, "is_strand")
118
+      }
119
+    }
120
+    ## --- Check tags
121
+    stopifnot(is.null(is_identity_tags) || is.character(is_identity_tags))
122
+    required_tags <- list("locus" = c("int", "numeric"))
123
+    if (!is.null(is_identity_tags)) {
124
+      is_identity_tags <- is_identity_tags[is_identity_tags != "locus"]
125
+      id_tags_types <- purrr::map(is_identity_tags, ~ NULL) %>%
126
+        purrr::set_names(is_identity_tags)
127
+      required_tags <- append(required_tags, id_tags_types)
128
+    }
129
+    required_tag_cols <- .check_required_cols(
130
+      required_tags = required_tags,
131
+      vars_df = mandatory_IS_vars(TRUE),
132
+      duplicate_politic = "error")
133
+    if (!all(required_tag_cols$names %in% colnames(x))) {
134
+      rlang::abort(.missing_needed_cols(required_tag_cols$names[
135
+        !required_tag_cols$names %in% colnames(x)
136
+      ]))
137
+    }
138
+    if (!sample_id_column %in% colnames(x)) {
139
+      rlang::abort(.missing_needed_cols(sample_id_column))
112 140
     }
113 141
     ## Is x annotated?
114 142
     annotated <- .is_annotated(x)
115
-    # Split data for parallel execution
116
-    split <- if (strand_specific == TRUE) {
117
-        x %>%
118
-            dplyr::group_by(.data$chr, .data$strand) %>%
119
-            dplyr::group_split()
120
-    } else {
121
-        x %>%
122
-            dplyr::group_by(.data$chr) %>%
123
-            dplyr::group_split()
143
+    ## Any additional columns present?
144
+    additional_cols <- colnames(x)[!colnames(x) %in% c(
145
+      required_tag_cols$names, sample_id_column, value_columns,
146
+      annotation_IS_vars()
147
+    )]
148
+    find_lambda <- function(.x) {
149
+      if (.x %in% names(additional_agg_lambda)) {
150
+        return(additional_agg_lambda[[.x]])
151
+      }
152
+      defaults <- additional_agg_lambda[[".defaults"]]
153
+      if (is.null(defaults)) {
154
+        return(NULL)
155
+      }
156
+      col_type <- typeof(x[[.x]])
157
+      return(defaults[[col_type]])
124 158
     }
125
-    ## Select only groups with 2 or more rows
126
-    split_to_process <- split[purrr::map_lgl(split, ~ nrow(.x) > 1)]
127
-    # # Set up parallel workers
128
-    if (.Platform$OS.type == "windows") {
159
+    add_cols_lambdas <- purrr::map(additional_cols, find_lambda) %>%
160
+      purrr::set_names(additional_cols)
161
+    add_cols_lambdas <- add_cols_lambdas[purrr::map_lgl(add_cols_lambdas,
162
+                                                        ~ !is.null(.x))]
163
+    # Process
164
+    if (is.null(is_identity_tags) || purrr::is_empty(is_identity_tags)) {
165
+      result <- .sliding_window(
166
+        x = x,
167
+        threshold = threshold,
168
+        keep_criteria = criteria,
169
+        annotated = annotated,
170
+        num_cols = num_cols,
171
+        max_val_col = max_value_column,
172
+        sample_col = sample_id_column,
173
+        req_tags = required_tag_cols,
174
+        add_col_lambdas = add_cols_lambdas,
175
+        produce_map = map_as_file
176
+      )
177
+      recalibr_m <- result$recalibrated_matrix
178
+      maps <- result$map
179
+
180
+    } else {
181
+      stopifnot(is.numeric(max_workers))
182
+      max_workers <- max_workers[1]
183
+      is_identity_names <- required_tag_cols %>%
184
+        dplyr::filter(.data$tag != "locus") %>%
185
+        dplyr::pull(.data$names)
186
+      # Split data for parallel execution
187
+      split <- x %>%
188
+          dplyr::group_by(dplyr::across(dplyr::all_of(is_identity_names))) %>%
189
+          dplyr::group_split()
190
+      ## Select only groups with 2 or more rows
191
+      split_to_process <- split[purrr::map_lgl(split, ~ nrow(.x) > 1)]
192
+      # Set up parallel workers
193
+      if (.Platform$OS.type == "windows") {
129 194
         p <- BiocParallel::SnowParam(
130
-            stop.on.error = FALSE,
131
-            progressbar = TRUE,
132
-            tasks = length(split_to_process),
133
-            exportglobals = TRUE
195
+          workers = max_workers,
196
+          progressbar = getOption("ISAnalytics.verbose"),
197
+          tasks = length(split_to_process),
198
+          exportglobals = TRUE
134 199
         )
135
-    } else {
200
+      } else {
136 201
         p <- BiocParallel::MulticoreParam(
137
-            stop.on.error = FALSE,
138
-            progressbar = TRUE,
139
-            tasks = length(split_to_process),
140
-            exportglobals = FALSE
141
-        )
142
-    }
143
-    FUN <- function(x) {
144
-        .sliding_window(x,
145
-            threshold = threshold,
146
-            keep_criteria = criteria,
147
-            num_cols = num_cols,
148
-            annotated = annotated,
149
-            max_val_col = max_value_column,
150
-            produce_map = map_as_file
202
+          workers = max_workers,
203
+          progressbar = getOption("ISAnalytics.verbose"),
204
+          tasks = length(split_to_process),
205
+          exportglobals = FALSE
151 206
         )
152
-    }
153
-    ## Obtain result: list of lists
154
-    result <- BiocParallel::bptry(
155
-        BiocParallel::bplapply(
156
-            split_to_process,
157
-            FUN = FUN,
158
-            BPPARAM = p
159
-        )
160
-    )
161
-    BiocParallel::bpstop(p)
162
-    ## Obtain single list
163
-    recalibr_m <- purrr::map(result, ~ .x$recalibrated_matrix)
164
-    recalibr_m <- data.table::rbindlist(recalibr_m)
165
-    maps <- purrr::map(result, ~ .x$map)
166
-    maps <- data.table::rbindlist(maps)
167
-    ## Add all rows that were not part of recalibration
168
-    split_fine <- split[purrr::map_lgl(split, ~ !nrow(.x) > 1)]
169
-    if (length(split_fine) > 0) {
207
+      }
208
+      ## Obtain result: list of lists
209
+      result <- BiocParallel::bplapply(
210
+        split_to_process,
211
+        FUN = .sliding_window,
212
+        threshold = threshold,
213
+        keep_criteria = criteria,
214
+        annotated = annotated,
215
+        num_cols = num_cols,
216
+        max_val_col = max_value_column,
217
+        sample_col = sample_id_column,
218
+        req_tags = required_tag_cols,
219
+        add_col_lambdas = add_cols_lambdas,
220
+        produce_map = map_as_file,
221
+        BPPARAM = p
222
+      )
223
+      BiocParallel::bpstop(p)
224
+      ## Obtain single list
225
+      recalibr_m <- purrr::map(result, ~ .x$recalibrated_matrix)
226
+      recalibr_m <- data.table::rbindlist(recalibr_m)
227
+      maps <- purrr::map(result, ~ .x$map)
228
+      maps <- data.table::rbindlist(maps)
229
+      ## Add all rows that were not part of recalibration
230
+      split_fine <- split[purrr::map_lgl(split, ~ !nrow(.x) > 1)]
231
+      if (length(split_fine) > 0) {
170 232
         split_fine <- purrr::reduce(
171
-            split_fine,
172
-            function(g1, g2) {
173
-                g1 <- data.table::setDT(g1)
174
-                g2 <- data.table::setDT(g2)
175
-                data.table::rbindlist(list(g1, g2))
176
-            }
177
-        ) %>% data.table::setDT()
178
-        mand_vars <- mandatory_IS_vars()
179
-        map_fine <- unique(split_fine[, ..mand_vars])
233
+          split_fine,
234
+          function(g1, g2) {
235
+            g1 <- data.table::setDT(g1)
236
+            g2 <- data.table::setDT(g2)
237
+            data.table::rbindlist(list(g1, g2))
238
+          }
239
+        )
240
+        split_fine <- data.table::setDT(split_fine)
241
+        map_fine <- unique(split_fine[, mget(required_tag_cols$names)])
180 242
         data.table::setnames(
181
-            x = map_fine,
182
-            old = mandatory_IS_vars(),
183
-            new = paste0(mandatory_IS_vars(), "_before")
243
+          x = map_fine,
244
+          old = mget(required_tag_cols$names),
245
+          new = paste0(mget(required_tag_cols$names), "_before")
184 246
         )
185
-        map_fine[, c("chr_after", "integration_locus_after", "strand_after") :=
186
-            list(chr_before, integration_locus_before, strand_before)]
247
+        map_fine[, c(
248
+          paste0(required_tag_cols$names, "_after")
249
+        ) :=
250
+          purrr::map(
251
+            required_tag_cols$names,
252
+            ~ eval(sym(paste0(.x, "_before")))
253
+          )]
187 254
         recalibr_m <- data.table::rbindlist(list(recalibr_m, split_fine))
188 255
         maps <- data.table::rbindlist(list(maps, map_fine))
256
+      }
189 257
     }
190 258
     if (map_as_file) {
191 259
         ### Manage file
... ...
@@ -204,3 +272,12 @@ compute_near_integrations <- function(x,
204 272
     }
205 273
     return(recalibr_m)
206 274
 }
275
+
276
+default_rec_agg_lambdas <- function() {
277
+  list(
278
+    character = ~ paste0(.x, collapse = ";"),
279
+    integer = ~ sum(.x, na.rm = TRUE),
280
+    double = ~ sum(.x, na.rm = TRUE),
281
+    logical = ~ all(.x)
282
+  )
283
+}
... ...
@@ -1059,12 +1059,12 @@ test_that(paste(func_name[17], "produces report in right location"), {
1059 1059
 #------------------------------------------------------------------------------#
1060 1060
 # Tests import_parallel_Vispa2Matrices
1061 1061
 #------------------------------------------------------------------------------#
1062
-test_that(paste(func_name[17], "executes correctly"), {
1062
+test_that(paste(func_name[18], "executes correctly"), {
1063 1063
     matrices <- import_parallel_Vispa2Matrices(
1064 1064
         association_file = local_af_corr,
1065 1065
         quantification_type = c("seqCount", "fragmentEstimate"),
1066 1066
         mode = "AUTO", report_path = NULL
1067 1067
     )
1068
-    expect_true(nrow(matrices) == 5904 & ncol(matrices) == 8)
1068
+    expect_true(nrow(matrices) == 1689 & ncol(matrices) == 8)
1069 1069
     expect_true(all(c("seqCount", "fragmentEstimate") %in% colnames(matrices)))
1070 1070
 })
... ...
@@ -43,6 +43,7 @@ sample_group2 <- tibble::tibble(
43 43
         48
44 44
     )
45 45
 )
46
+
46 47
 sample_group_mult1 <- tibble::tibble(
47 48
     chr = c(rep_len("1", 6)),
48 49
     integration_locus = c(
... ...
@@ -275,10 +276,14 @@ expected_for_smplmult2_mv <- tibble::tibble(
275 276
 )
276 277
 
277 278
 test_that(".sliding_window produces correct output for sample1", {
279
+  withr::local_options(list(ISAnalytics.mandatory_is_vars = "default"))
278 280
     result <- .sliding_window(
279 281
         x = sample_group1, threshold = 4,
280 282
         keep_criteria = "keep_first", annotated = FALSE,
281 283
         num_cols = "Value", max_val_col = "Value",
284
+        sample_col = "CompleteAmplificationID",
285
+        req_tags = mandatory_IS_vars(TRUE),
286
+        add_col_lambdas = NULL,
282 287
         produce_map = TRUE
283 288
     )
284 289
     expect_equal(result$recalibrated_matrix, expected_for_smpl1_kf,
... ...
@@ -291,6 +296,9 @@ test_that(".sliding_window produces correct output for sample1", {
291 296
         x = sample_group1, threshold = 4,
292 297
         keep_criteria = "max_value", annotated = FALSE,
293 298
         num_cols = "Value", max_val_col = "Value",
299
+        sample_col = "CompleteAmplificationID",
300
+        req_tags = mandatory_IS_vars(TRUE),
301
+        add_col_lambdas = NULL,
294 302
         produce_map = TRUE
295 303
     )
296 304
     expect_equal(result$recalibrated_matrix, expected_for_smpl1_kf,
... ...
@@ -302,10 +310,14 @@ test_that(".sliding_window produces correct output for sample1", {
302 310
 })
303 311
 
304 312
 test_that(".sliding_window produces correct output for sample2", {
313
+  withr::local_options(list(ISAnalytics.mandatory_is_vars = "default"))
305 314
     result <- .sliding_window(
306 315
         x = sample_group2, threshold = 4,
307 316
         keep_criteria = "keep_first", annotated = FALSE,
308 317
         num_cols = "Value", max_val_col = "Value",
318
+        sample_col = "CompleteAmplificationID",
319
+        req_tags = mandatory_IS_vars(TRUE),
320
+        add_col_lambdas = NULL,
309 321
         produce_map = TRUE
310 322
     )
311 323
     expect_equal(result$recalibrated_matrix, expected_for_smpl2_kf,
... ...
@@ -318,6 +330,9 @@ test_that(".sliding_window produces correct output for sample2", {
318 330
         x = sample_group2, threshold = 4,
319 331
         keep_criteria = "max_value", annotated = FALSE,
320 332
         num_cols = "Value", max_val_col = "Value",
333
+        sample_col = "CompleteAmplificationID",
334
+        req_tags = mandatory_IS_vars(TRUE),
335
+        add_col_lambdas = NULL,
321 336
         produce_map = TRUE
322 337
     )
323 338
     expect_equal(result$recalibrated_matrix, expected_for_smpl2_mv,
... ...
@@ -329,11 +344,15 @@ test_that(".sliding_window produces correct output for sample2", {
329 344
 })
330 345
 
331 346
 test_that(".sliding_window produces correct output for sample1 - mult column", {
347
+  withr::local_options(list(ISAnalytics.mandatory_is_vars = "default"))
332 348
     result <- .sliding_window(
333 349
         x = sample_group_mult1, threshold = 4,
334 350
         keep_criteria = "keep_first", annotated = FALSE,
335 351
         num_cols = c("seqCount", "fragmentEstimate"),
336 352
         max_val_col = "seqCount",
353
+        sample_col = "CompleteAmplificationID",
354
+        req_tags = mandatory_IS_vars(TRUE),
355
+        add_col_lambdas = NULL,
337 356
         produce_map = TRUE
338 357
     )
339 358
     expect_equal(result$recalibrated_matrix, expected_for_smplmult1_kf,
... ...
@@ -347,6 +366,9 @@ test_that(".sliding_window produces correct output for sample1 - mult column", {
347 366
         keep_criteria = "max_value", annotated = FALSE,
348 367
         num_cols = c("seqCount", "fragmentEstimate"),
349 368
         max_val_col = "seqCount",
369
+        sample_col = "CompleteAmplificationID",
370
+        req_tags = mandatory_IS_vars(TRUE),
371
+        add_col_lambdas = NULL,
350 372
         produce_map = TRUE
351 373
     )
352 374
     expect_equal(result$recalibrated_matrix, expected_for_smplmult1_kf,
... ...
@@ -358,11 +380,15 @@ test_that(".sliding_window produces correct output for sample1 - mult column", {
358 380
 })
359 381
 
360 382
 test_that(".sliding_window produces correct output for sample2 - mult column", {
383
+  withr::local_options(list(ISAnalytics.mandatory_is_vars = "default"))
361 384
     result <- .sliding_window(
362 385
         x = sample_group_mult2, threshold = 4,
363 386
         keep_criteria = "keep_first", annotated = FALSE,
364 387
         num_cols = c("seqCount", "fragmentEstimate"),
365 388
         max_val_col = "seqCount",
389
+        sample_col = "CompleteAmplificationID",
390
+        req_tags = mandatory_IS_vars(TRUE),
391
+        add_col_lambdas = NULL,
366 392
         produce_map = TRUE
367 393
     )
368 394
     expect_equal(result$recalibrated_matrix, expected_for_smplmult2_kf,
... ...
@@ -376,6 +402,9 @@ test_that(".sliding_window produces correct output for sample2 - mult column", {
376 402
         keep_criteria = "max_value", annotated = FALSE,
377 403
         num_cols = c("seqCount", "fragmentEstimate"),
378 404
         max_val_col = "seqCount",
405
+        sample_col = "CompleteAmplificationID",
406
+        req_tags = mandatory_IS_vars(TRUE),
407
+        add_col_lambdas = NULL,
379 408
         produce_map = TRUE
380 409
     )
381 410
     expect_equal(result$recalibrated_matrix, expected_for_smplmult2_mv,
... ...
@@ -386,11 +415,212 @@ test_that(".sliding_window produces correct output for sample2 - mult column", {
386 415
     )
387 416
 })
388 417
 
418
+test_that(".sliding_window works on annotated", {
419
+  withr::local_options(list(ISAnalytics.mandatory_is_vars = "default"))
420
+  annot_group1 <- sample_group1 %>%
421
+    dplyr::mutate(GeneName = paste0("GENE", seq_len(nrow(sample_group1))),
422
+                  GeneStrand = "+", .after = "strand")
423
+  expected_matrix <- expected_for_smpl1_kf %>%
424
+    dplyr::mutate(GeneName = c("GENE4", "GENE4", "GENE4", "GENE4",
425
+                               "GENE5", "GENE5"),
426
+                  GeneStrand = "+", .after = "strand")
427
+  result <- .sliding_window(
428
+    x = annot_group1, threshold = 4,
429
+    keep_criteria = "max_value", annotated = TRUE,
430
+    num_cols = "Value",
431
+    max_val_col = "Value",
432
+    sample_col = "CompleteAmplificationID",
433
+    req_tags = mandatory_IS_vars(TRUE),
434
+    add_col_lambdas = NULL,
435
+    produce_map = TRUE
436
+  )
437
+  expect_equal(result$recalibrated_matrix, expected_matrix,
438
+               ignore_attr = TRUE
439
+  )
440
+  expect_equal(result$map, recalibr_map_smpl1_kf,
441
+               ignore_attr = TRUE
442
+  )
443
+  annot_group2 <- sample_group2 %>%
444
+    dplyr::mutate(GeneName = paste0("GENE", seq_len(nrow(sample_group2))),
445
+                  GeneStrand = "-", .after = "strand")
446
+  expected_matrix <- expected_for_smpl2_mv %>%
447
+    dplyr::mutate(GeneName = c("GENE1", "GENE7", "GENE6", "GENE6", "GENE3"),
448
+                  GeneStrand = "-", .after = "strand")
449
+  result <- .sliding_window(
450
+    x = annot_group2, threshold = 4,
451
+    keep_criteria = "max_value", annotated = TRUE,
452
+    num_cols = "Value",
453
+    max_val_col = "Value",
454
+    sample_col = "CompleteAmplificationID",
455
+    req_tags = mandatory_IS_vars(TRUE),
456
+    add_col_lambdas = NULL,
457
+    produce_map = TRUE
458
+  )
459
+  expect_equal(result$recalibrated_matrix, expected_matrix,
460
+               ignore_attr = TRUE
461
+  )
462
+  expect_equal(result$map, recalibr_map_smpl2_mv,
463
+               ignore_attr = TRUE
464
+  )
465
+})
466
+
467
+test_that(".sliding_window aggregates add columns correctly", {
468
+  withr::local_options(list(ISAnalytics.mandatory_is_vars = "default"))
469
+  s2_add_cols <- sample_group2 %>%
470
+    dplyr::mutate(ann1 = seq_len(nrow(sample_group2)),
471
+                  ann2 = c("a", "b", "c", "d", "e", "f", "g"),
472
+                  ann3 = c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE),
473
+                  .after = "strand")
474
+  result <- .sliding_window(
475
+    x = s2_add_cols, threshold = 4,
476
+    keep_criteria = "max_value", annotated = FALSE,
477
+    num_cols = "Value",
478
+    max_val_col = "Value",
479
+    sample_col = "CompleteAmplificationID",
480
+    req_tags = mandatory_IS_vars(TRUE),
481
+    add_col_lambdas = list(ann1 = ~sum(.x, na.rm = TRUE),
482
+                           ann2 = ~paste0(.x, collapse = ";"),
483
+                           ann3 = ~any(.x)),
484
+    produce_map = TRUE
485
+  )
486
+  expected_v1 <- expected_for_smpl2_mv %>%
487
+    dplyr::mutate(ann1 = c(6, 7, 6, 6, 3),
488
+                  ann2 = c("e;a", "g", "d;b", "f", "c"),
489
+                  ann3 = c(TRUE, FALSE, TRUE, TRUE, FALSE),
490
+                  .after = "strand")
491
+  expect_equal(result$recalibrated_matrix, expected_v1,
492
+               ignore_attr = TRUE
493
+  )
494
+  expect_equal(result$map, recalibr_map_smpl2_mv,
495
+               ignore_attr = TRUE
496
+  )
497
+  ## NULL lambdas correspond to keep any value
498
+  result <- .sliding_window(
499
+    x = s2_add_cols, threshold = 4,
500
+    keep_criteria = "max_value", annotated = FALSE,
501
+    num_cols = "Value",
502
+    max_val_col = "Value",
503
+    sample_col = "CompleteAmplificationID",
504
+    req_tags = mandatory_IS_vars(TRUE),
505
+    add_col_lambdas = list(ann1 = NULL,
506
+                           ann2 = NULL,
507
+                           ann3 = NULL),
508
+    produce_map = TRUE
509
+  )
510
+  expected_v2 <- expected_for_smpl2_mv %>%
511
+    dplyr::mutate(ann1 = c(5, 7, 4, 6, 3),
512
+                  ann2 = c("e", "g", "d", "f", "c"),
513
+                  ann3 = c(FALSE, FALSE, TRUE, TRUE, FALSE),
514
+                  .after = "strand")
515
+  expect_equal(result$recalibrated_matrix, expected_v2,
516
+               ignore_attr = TRUE
517
+  )
518
+  expect_equal(result$map, recalibr_map_smpl2_mv,
519
+               ignore_attr = TRUE
520
+  )
521
+})
522
+
523
+test_that(".sliding_window works with custom vars", {
524
+  withr::local_options(list(ISAnalytics.mandatory_is_vars = "default"))
525
+  customized <- sample_group2 %>%
526
+    dplyr::rename(chrom = "chr", locus = "integration_locus")
527
+  temp_vars <- mandatory_IS_vars(TRUE)
528
+  temp_vars[1, ]$names <- "chrom"
529
+  temp_vars[2, ]$names <- "locus"
530
+  withr::with_options(list(ISAnalytics.mandatory_is_vars = temp_vars),
531
+                      {
532
+                        result <- .sliding_window(
533
+                          x = customized, threshold = 4,
534
+                          keep_criteria = "max_value", annotated = FALSE,
535
+                          num_cols = "Value", max_val_col = "Value",
536
+                          sample_col = "CompleteAmplificationID",
537
+                          req_tags = mandatory_IS_vars(TRUE),
538
+                          add_col_lambdas = NULL,
539
+                          produce_map = TRUE
540
+                        )
541
+                      })
542
+  expected_matrix <- expected_for_smpl2_mv %>%
543
+    dplyr::rename(chrom = "chr", locus = "integration_locus")
544
+  expect_equal(result$recalibrated_matrix, expected_matrix,
545
+               ignore_attr = TRUE
546
+  )
547
+  expect_equal(result$map, recalibr_map_smpl2_mv %>%
548
+                 dplyr::rename(chrom_before = "chr_before",
549
+                               locus_before = "integration_locus_before",
550
+                               chrom_after = "chr_after",
551
+                               locus_after = "integration_locus_after"),
552
+               ignore_attr = TRUE
553
+  )
554
+})
555
+
556
+#------------------------------------------------------------------------------#
557
+# Tests .write_recalibr_map
558
+#------------------------------------------------------------------------------#
559
+test_that(".write_recalibr_map works if path provided is dir", {
560
+  tmp_dir <- fs::path(tempdir(), "ISAtest")
561
+  ## Works if folder doesn't exist (creates it and writes the file in it)
562
+  withr::with_file(tmp_dir, {
563
+    .write_recalibr_map(recalibr_map_smpl1_kf, tmp_dir)
564
+    expect_true(fs::dir_exists(tmp_dir))
565
+    expect_true(length(fs::dir_ls(tmp_dir)) == 1)
566
+  })
567
+  ## Works if folder already exists
568
+  withr::with_file(tmp_dir, {
569
+    fs::dir_create(tmp_dir)
570
+    .write_recalibr_map(recalibr_map_smpl1_kf, tmp_dir)
571
+    expect_true(fs::dir_exists(tmp_dir))
572
+    expect_true(length(fs::dir_ls(tmp_dir)) == 1)
573
+  })
574
+})
575
+
576
+test_that(".write_recalibr_map works if path provided is file", {
577
+  ## Works for accepted formats
578
+  for (ext in c("tsv", "csv", "txt",
579
+                      paste("tsv", .compressed_formats(), sep = "."),
580
+                      paste("csv", .compressed_formats(), sep = "."),
581
+                      paste("txt", .compressed_formats(), sep = "."))) {
582
+    file_name <- paste0("recalibration_map.", ext)
583
+    tmp_file <- fs::path(tempdir(), file_name)
584
+    withr::with_file(tmp_file, {
585
+      .write_recalibr_map(recalibr_map_smpl1_kf, tmp_file)
586
+      expect_true(fs::file_exists(tmp_file))
587
+    })
588
+  }
589
+  ## Changes extension for unsupported extension
590
+  file_name <- "recalibration_map.xslx"
591
+  expected_filename <- "recalibration_map.tsv.gz"
592
+  tmp_file <- fs::path(tempdir(), file_name)
593
+  withr::with_options(list(ISAnalytics.verbose = TRUE), {
594
+    expect_message({
595
+      expect_message({
596
+        withr::with_file(tmp_file, {
597
+          .write_recalibr_map(recalibr_map_smpl1_kf, tmp_file)
598
+          expect_false(fs::file_exists(tmp_file))
599
+          expect_true(fs::file_exists(fs::path(tempdir(), expected_filename)))
600
+        })
601
+      }, class = "rec_unsupp_ext")
602
+    })
603
+  })
604
+  file_name <- "recalibration_map.gz"
605
+  tmp_file <- fs::path(tempdir(), file_name)
606
+  withr::with_options(list(ISAnalytics.verbose = TRUE), {
607
+    expect_message({
608
+    expect_message({
609
+      withr::with_file(tmp_file, {
610
+        .write_recalibr_map(recalibr_map_smpl1_kf, tmp_file)
611
+        expect_false(fs::file_exists(tmp_file))
612
+        expect_true(fs::file_exists(fs::path(tempdir(), expected_filename)))
613
+      })
614
+    }, class = "rec_unsupp_ext")
615
+    })
616
+  })
617
+})
618
+
389 619
 #------------------------------------------------------------------------------#
390 620
 # Tests compute_near_integrations
391 621
 #------------------------------------------------------------------------------#
392
-## TEST VALUES
393 622
 test_that("compute_near_integrations produces correct output for total", {
623
+  withr::local_options(list(ISAnalytics.mandatory_is_vars = "default"))
394 624
     total_simple <- sample_group1 %>% dplyr::bind_rows(sample_group2)
395 625
     total_mult <- sample_group_mult1 %>% dplyr::bind_rows(sample_group_mult2)
396 626
     res <- compute_near_integrations(
... ...
@@ -420,3 +650,16 @@ test_that("compute_near_integrations produces correct output for total", {
420 650
         ignore_attr = TRUE
421 651
     )
422 652
 })
653
+
654
+test_that("compute_near_integrations warns deprecation", {
655
+  expect_deprecated({
656
+    res <- compute_near_integrations(
657
+      x = sample_group1,
658
+      keep_criteria = "keep_first",
659
+      max_value_column = "Value",
660
+      value_columns = "Value",
661
+      strand_specific = TRUE,
662
+      map_as_file = FALSE
663
+    )
664
+  })
665
+})