... | ... |
@@ -695,6 +695,9 @@ |
695 | 695 |
dplyr::group_split() |
696 | 696 |
single_type_eval <- function(sub_df) { |
697 | 697 |
tag_type <- types[[sub_df$tag[1]]] |
698 |
+ if (is.null(tag_type) || purrr::is_empty(tag_type)) { |
|
699 |
+ return(list(type = "DF", result = sub_df)) |
|
700 |
+ } |
|
698 | 701 |
out <- sub_df %>% |
699 | 702 |
dplyr::filter(.data$types %in% tag_type) |
700 | 703 |
if (nrow(out) == 0) { |
... | ... |
@@ -3613,15 +3616,6 @@ |
3613 | 3616 |
# **NOTE: this function is meant to be called on a SINGLE GROUP, |
3614 | 3617 |
# meaning a subset of an integration matrix in which all rows |
3615 | 3618 |
# share the same chr (and optionally same strand).** |
3616 |
-# |
|
3617 |
-# @param x An integration matrix subset (see description) |
|
3618 |
-# @param threshold The numeric value representing an absolute number |
|
3619 |
-# of bases for which two integrations are considered distinct |
|
3620 |
-# @param keep_criteria The string with selecting criteria |
|
3621 |
-# @param annotated Is `x` annotated? Logical value |
|
3622 |
-# @param num_cols A character vector with the names of the numeric columns |
|
3623 |
-# @param max_val_col The column to consider if criteria is max_value |
|
3624 |
-# @param produce_map Produce recalibration map? |
|
3625 | 3619 |
#' @importFrom dplyr arrange |
3626 | 3620 |
#' @importFrom purrr is_empty map_dfr |
3627 | 3621 |
#' @importFrom rlang expr eval_tidy .data |
... | ... |
@@ -3635,20 +3629,30 @@ |
3635 | 3629 |
annotated, |
3636 | 3630 |
num_cols, |
3637 | 3631 |
max_val_col, |
3632 |
+ sample_col, |
|
3633 |
+ req_tags, |
|
3634 |
+ add_col_lambdas, |
|
3638 | 3635 |
produce_map) { |
3636 |
+ locus_col <- req_tags %>% |
|
3637 |
+ dplyr::filter(.data$tag == "locus") %>% |
|
3638 |
+ dplyr::pull(.data$names) |
|
3639 | 3639 |
## Order by integration_locus |
3640 |
- x <- x %>% dplyr::arrange(.data$integration_locus) |
|
3640 |
+ x <- x %>% dplyr::arrange(.data[[locus_col]]) |
|
3641 | 3641 |
x <- data.table::setDT(x) |
3642 | 3642 |
map_recalibr <- if (produce_map == TRUE) { |
3643 |
- mand_vars <- mandatory_IS_vars() |
|
3644 |
- rec_map <- unique(x[, ..mand_vars]) |
|
3643 |
+ req_vars <- req_tags$names |
|
3644 |
+ rec_map <- unique(x[, mget(req_vars)]) |
|
3645 | 3645 |
data.table::setnames( |
3646 | 3646 |
x = rec_map, |
3647 |
- old = mandatory_IS_vars(), |
|
3648 |
- new = paste0(mandatory_IS_vars(), "_before") |
|
3647 |
+ old = req_vars, |
|
3648 |
+ new = paste0(req_vars, "_before") |
|
3649 | 3649 |
) |
3650 |
- rec_map[, c("chr_after", "integration_locus_after", "strand_after") := |
|
3651 |
- list(NA_character_, NA_real_, NA_character_)] |
|
3650 |
+ na_type <- purrr::map(req_vars, ~ { |
|
3651 |
+ col_type <- typeof(x[, get(.x)]) |
|
3652 |
+ fun_name <- paste0("as.", col_type) |
|
3653 |
+ return(do.call(what = fun_name, args = list(NA))) |
|
3654 |
+ }) |
|
3655 |
+ rec_map[, c(paste(req_vars, "after", sep = "_")) := na_type] |
|
3652 | 3656 |
} else { |
3653 | 3657 |
NULL |
3654 | 3658 |
} |
... | ... |
@@ -3657,27 +3661,27 @@ |
3657 | 3661 |
# If index is the last row in the data frame return |
3658 | 3662 |
if (index == nrow(x)) { |
3659 | 3663 |
if (produce_map == TRUE) { |
3664 |
+ is_predicate <- purrr::map(req_vars, ~ { |
|
3665 |
+ var_before <- sym(paste0(.x, "_before")) |
|
3666 |
+ value <- x[index, get(.x)] |
|
3667 |
+ rlang::expr(!!var_before == !!value) |
|
3668 |
+ }) %>% |
|
3669 |
+ purrr::reduce(~ rlang::expr(!!.x & !!.y)) |
|
3660 | 3670 |
map_recalibr[ |
3661 |
- chr_before == x[index, ]$chr & |
|
3662 |
- integration_locus_before == |
|
3663 |
- x[index, ]$integration_locus & |
|
3664 |
- strand_before == x[index, ]$strand, |
|
3671 |
+ eval(is_predicate), |
|
3665 | 3672 |
c( |
3666 |
- "chr_after", |
|
3667 |
- "integration_locus_after", |
|
3668 |
- "strand_after" |
|
3673 |
+ paste0(req_vars, "_after") |
|
3669 | 3674 |
) := |
3670 |
- list( |
|
3671 |
- chr_before, |
|
3672 |
- integration_locus_before, |
|
3673 |
- strand_before |
|
3675 |
+ purrr::map( |
|
3676 |
+ req_vars, |
|
3677 |
+ ~ eval(sym(paste0(.x, "_before"))) |
|
3674 | 3678 |
) |
3675 | 3679 |
] |
3676 | 3680 |
} |
3677 | 3681 |
return(list(recalibrated_matrix = x, map = map_recalibr)) |
3678 | 3682 |
} |
3679 | 3683 |
## Compute interval for row |
3680 |
- interval <- x[index, ]$integration_locus + 1 + threshold |
|
3684 |
+ interval <- x[index, ][[locus_col]] + 1 + threshold |
|
3681 | 3685 |
## Look ahead for every integration that falls in the interval |
3682 | 3686 |
near <- numeric() |
3683 | 3687 |
k <- index |
... | ... |
@@ -3686,7 +3690,7 @@ |
3686 | 3690 |
break |
3687 | 3691 |
} |
3688 | 3692 |
k <- k + 1 |
3689 |
- if (x[k, ]$integration_locus < interval) { |
|
3693 |
+ if (x[k, ][[locus_col]] < interval) { |
|
3690 | 3694 |
# Saves the indexes of the rows that are in the interval |
3691 | 3695 |
near <- append(near, k) |
3692 | 3696 |
} else { |
... | ... |
@@ -3694,10 +3698,10 @@ |
3694 | 3698 |
} |
3695 | 3699 |
} |
3696 | 3700 |
window <- c(index, near) |
3701 |
+ row_to_keep <- index |
|
3697 | 3702 |
if (!purrr::is_empty(near)) { |
3698 | 3703 |
## Change loci according to criteria |
3699 | 3704 |
######## CRITERIA PROCESSING |
3700 |
- row_to_keep <- index |
|
3701 | 3705 |
if (keep_criteria == "max_value") { |
3702 | 3706 |
expr <- rlang::expr(`$`(x[window, ], !!max_val_col)) |
3703 | 3707 |
to_check <- rlang::eval_tidy(expr) |
... | ... |
@@ -3709,54 +3713,68 @@ |
3709 | 3713 |
} |
3710 | 3714 |
# Fill map if needed |
3711 | 3715 |
if (produce_map == TRUE) { |
3712 |
- map_recalibr[ |
|
3713 |
- chr_before %chin% x[window, ]$chr & |
|
3714 |
- integration_locus_before %in% |
|
3715 |
- x[window, ]$integration_locus & |
|
3716 |
- strand_before %chin% x[window, ]$strand, |
|
3717 |
- c( |
|
3718 |
- "chr_after", |
|
3719 |
- "integration_locus_after", |
|
3720 |
- "strand_after" |
|
3721 |
- ) := |
|
3722 |
- list( |
|
3723 |
- x[row_to_keep, ]$chr, |
|
3724 |
- x[row_to_keep, ]$integration_locus, |
|
3725 |
- x[row_to_keep, ]$strand |
|
3726 |
- ) |
|
3727 |
- ] |
|
3716 |
+ is_predicate <- purrr::map(req_vars, ~ { |
|
3717 |
+ var_before <- sym(paste0(.x, "_before")) |
|
3718 |
+ value <- x[window, get(.x)] |
|
3719 |
+ if (!is.character(value)) { |
|
3720 |
+ return(rlang::expr(!!var_before %in% !!value)) |
|
3721 |
+ } |
|
3722 |
+ rlang::expr(!!var_before %chin% !!value) |
|
3723 |
+ }) %>% |
|
3724 |
+ purrr::reduce(~ rlang::expr(!!.x & !!.y)) |
|
3725 |
+ map_recalibr[ |
|
3726 |
+ eval(is_predicate), |
|
3727 |
+ c( |
|
3728 |
+ paste0(req_vars, "_after") |
|
3729 |
+ ) := |
|
3730 |
+ purrr::map( |
|
3731 |
+ req_vars, |
|
3732 |
+ ~ x[row_to_keep][[.x]] |
|
3733 |
+ ) |
|
3734 |
+ ] |
|
3728 | 3735 |
} |
3729 |
- # Change loci and strand of near integrations |
|
3736 |
+ # Change loci and other ids of near integrations |
|
3737 |
+ fields_to_change <- req_tags$names |
|
3730 | 3738 |
if (annotated) { |
3731 |
- x[near, c( |
|
3732 |
- "integration_locus", |
|
3733 |
- "strand", |
|
3734 |
- annotation_IS_vars() |
|
3735 |
- ) := list( |
|
3736 |
- x[row_to_keep, ]$integration_locus, |
|
3737 |
- x[row_to_keep, ]$strand, |
|
3738 |
- x[row_to_keep, ]$GeneName, |
|
3739 |
- x[row_to_keep, ]$GeneStrand |
|
3740 |
- )] |
|
3739 |
+ fields_to_change <- c(fields_to_change, annotation_IS_vars()) |
|
3740 |
+ x[near, c( |
|
3741 |
+ req_tags$names, |
|
3742 |
+ annotation_IS_vars() |
|
3743 |
+ ) := x[row_to_keep, |
|
3744 |
+ mget(fields_to_change)]] |
|
3741 | 3745 |
} else { |
3742 |
- x[near, c("integration_locus", "strand") := list( |
|
3743 |
- x[row_to_keep, ]$integration_locus, |
|
3744 |
- x[row_to_keep, ]$strand |
|
3745 |
- )] |
|
3746 |
+ x[near, c( |
|
3747 |
+ req_tags$names |
|
3748 |
+ ) := x[row_to_keep, |
|
3749 |
+ mget(fields_to_change)]] |
|
3746 | 3750 |
} |
3747 | 3751 |
## Aggregate same IDs |
3748 | 3752 |
starting_rows <- nrow(x) |
3749 | 3753 |
repeat { |
3750 |
- t <- x$CompleteAmplificationID[window] |
|
3754 |
+ t <- x[[sample_col]][window] |
|
3751 | 3755 |
d <- unique(t[duplicated(t)]) |
3752 | 3756 |
if (purrr::is_empty(d)) { |
3753 | 3757 |
break |
3754 | 3758 |
} |
3755 | 3759 |
dupl_indexes <- which(t == d[1]) |
3756 |
- values_sum <- colSums(x[window[dupl_indexes], ..num_cols], |
|
3760 |
+ values_sum <- colSums(x[window[dupl_indexes], mget(num_cols)], |
|
3757 | 3761 |
na.rm = TRUE |
3758 | 3762 |
) |
3759 | 3763 |
x[window[dupl_indexes[1]], c(num_cols) := as.list(values_sum)] |
3764 |
+ ### Aggregate additional cols |
|
3765 |
+ if (!is.null(add_col_lambdas) && |
|
3766 |
+ !purrr::is_empty(add_col_lambdas)) { |
|
3767 |
+ agg_cols <- purrr::map(names(add_col_lambdas), ~ { |
|
3768 |
+ if (is.null(add_col_lambdas[[.x]])) { |
|
3769 |
+ return(x[window[dupl_indexes[1]], ][[.x]]) |
|
3770 |
+ } |
|
3771 |
+ values <- x[window[dupl_indexes], ][[.x]] |
|
3772 |
+ return(rlang::as_function(add_col_lambdas[[.x]])(values)) |
|
3773 |
+ }) |
|
3774 |
+ x[window[dupl_indexes[1]], c(names(add_col_lambdas)) := |
|
3775 |
+ agg_cols] |
|
3776 |
+ } |
|
3777 |
+ |
|
3760 | 3778 |
x <- x[-window[dupl_indexes[-1]], ] |
3761 | 3779 |
to_drop <- seq( |
3762 | 3780 |
from = length(window), |
... | ... |
@@ -3773,22 +3791,25 @@ |
3773 | 3791 |
} |
3774 | 3792 |
} else { |
3775 | 3793 |
if (produce_map == TRUE) { |
3776 |
- map_recalibr[ |
|
3777 |
- chr_before == x[index, ]$chr & |
|
3778 |
- integration_locus_before == |
|
3779 |
- x[index, ]$integration_locus & |
|
3780 |
- strand_before == x[index, ]$strand, |
|
3781 |
- c( |
|
3782 |
- "chr_after", |
|
3783 |
- "integration_locus_after", |
|
3784 |
- "strand_after" |
|
3785 |
- ) := |
|
3786 |
- list( |
|
3787 |
- chr_before, |
|
3788 |
- integration_locus_before, |
|
3789 |
- strand_before |
|
3790 |
- ) |
|
3791 |
- ] |
|
3794 |
+ is_predicate <- purrr::map(req_vars, ~ { |
|
3795 |
+ var_before <- sym(paste0(.x, "_before")) |
|
3796 |
+ value <- x[window, get(.x)] |
|
3797 |
+ if (!is.character(value)) { |
|
3798 |
+ return(rlang::expr(!!var_before %in% !!value)) |
|
3799 |
+ } |
|
3800 |
+ rlang::expr(!!var_before %chin% !!value) |
|
3801 |
+ }) %>% |
|
3802 |
+ purrr::reduce(~ rlang::expr(!!.x & !!.y)) |
|
3803 |
+ map_recalibr[ |
|
3804 |
+ eval(is_predicate), |
|
3805 |
+ c( |
|
3806 |
+ paste0(req_vars, "_after") |
|
3807 |
+ ) := |
|
3808 |
+ purrr::map( |
|
3809 |
+ req_vars, |
|
3810 |
+ ~ x[row_to_keep][[.x]] |
|
3811 |
+ ) |
|
3812 |
+ ] |
|
3792 | 3813 |
} |
3793 | 3814 |
index <- index + 1 |
3794 | 3815 |
} |
... | ... |
@@ -3820,23 +3841,43 @@ |
3820 | 3841 |
# |
3821 | 3842 |
# @return Nothing |
3822 | 3843 |
.write_recalibr_map <- function(map, file_path) { |
3823 |
- if (fs::is_dir(file_path)) { |
|
3824 |
- if (!fs::dir_exists(file_path)) { |
|
3825 |
- fs::dir_create(file_path) |
|
3826 |
- } |
|
3827 |
- gen_filename <- .generate_rec_map_filename() |
|
3828 |
- file_path <- fs::path(file_path, gen_filename) |
|
3844 |
+ if (!fs::file_exists(file_path)) { |
|
3845 |
+ ext <- fs::path_ext(file_path) |
|
3846 |
+ ## if ext is empty assume it's a folder |
|
3847 |
+ if (ext == "") { |
|
3848 |
+ fs::dir_create(file_path) |
|
3849 |
+ gen_filename <- .generate_rec_map_filename() |
|
3850 |
+ tmp_filename <- fs::path(file_path, gen_filename) |
|
3829 | 3851 |
} else { |
3830 |
- if (fs::path_ext(file_path) == "") { |
|
3831 |
- file_path <- fs::path_ext_set(file_path, "tsv.gz") |
|
3852 |
+ tmp_filename <- fs::path_ext_remove(file_path) |
|
3853 |
+ if (ext %in% .compressed_formats()) { |
|
3854 |
+ ext <- paste(fs::path_ext(tmp_filename), ext, sep = ".") |
|
3855 |
+ tmp_filename <- fs::path_ext_remove(tmp_filename) |
|
3856 |
+ } |
|
3857 |
+ if (!ext %in% c("tsv", paste("tsv", .compressed_formats(), sep = "."), |
|
3858 |
+ "csv", paste("csv", .compressed_formats(), sep = "."), |
|
3859 |
+ "txt", paste("txt", .compressed_formats(), sep = ".") |
|
3860 |
+ )) { |
|
3861 |
+ if (getOption("ISAnalytics.verbose") == TRUE) { |
|
3862 |
+ warn <- c("Recalibration file format unsupported", |
|
3863 |
+ i = "Writing file in 'tsv.gz' format") |
|
3864 |
+ rlang::inform(warn, class = "rec_unsupp_ext") |
|
3832 | 3865 |
} |
3866 |
+ tmp_filename <- paste0(tmp_filename, ".tsv.gz") |
|
3867 |
+ } else { |
|
3868 |
+ tmp_filename <- paste0(tmp_filename, ".", ext) |
|
3869 |
+ } |
|
3833 | 3870 |
} |
3871 |
+ } else if (fs::is_dir(file_path)) { |
|
3872 |
+ gen_filename <- .generate_rec_map_filename() |
|
3873 |
+ tmp_filename <- fs::path(file_path, gen_filename) |
|
3874 |
+ } |
|
3834 | 3875 |
withRestarts( |
3835 | 3876 |
{ |
3836 |
- data.table::fwrite(map, file = file_path, sep = "\t", na = "") |
|
3877 |
+ data.table::fwrite(map, file = tmp_filename, sep = "\t", na = "") |
|
3837 | 3878 |
saved_msg <- paste( |
3838 | 3879 |
"Recalibration map saved to: ", |
3839 |
- file_path |
|
3880 |
+ tmp_filename |
|
3840 | 3881 |
) |
3841 | 3882 |
if (getOption("ISAnalytics.verbose") == TRUE) { |
3842 | 3883 |
rlang::inform(saved_msg) |
... | ... |
@@ -71,121 +71,189 @@ |
71 | 71 |
#' head(rec) |
72 | 72 |
compute_near_integrations <- function(x, |
73 | 73 |
threshold = 4, |
74 |
- keep_criteria = "max_value", |
|
75 |
- strand_specific = TRUE, |
|
74 |
+ is_identity_tags = c("chromosome", "is_strand"), |
|
75 |
+ keep_criteria = c("max_value", "keep_first"), |
|
76 | 76 |
value_columns = c("seqCount", "fragmentEstimate"), |
77 | 77 |
max_value_column = "seqCount", |
78 |
+ sample_id_column = pcr_id_column(), |
|
79 |
+ additional_agg_lambda = list(.default = default_rec_agg_lambdas()), |
|
80 |
+ max_workers = 4, |
|
78 | 81 |
map_as_file = TRUE, |
79 |
- file_path = default_report_path()) { |
|
80 |
- # Check parameters |
|
82 |
+ file_path = default_report_path(), |
|
83 |
+ strand_specific = lifecycle::deprecated()) { |
|
84 |
+ ## --- Check parameters |
|
81 | 85 |
stopifnot(is.data.frame(x)) |
82 |
- ## Check tibble is an integration matrix |
|
83 |
- if (!.check_mandatory_vars(x)) { |
|
84 |
- rlang::abort(.missing_mand_vars()) |
|
85 |
- } |
|
86 |
- ## Check it contains CompleteAmplificationID column |
|
87 |
- if (!.check_complAmpID(x)) { |
|
88 |
- rlang::abort(.missing_cAmp_sub_msg()) |
|
89 |
- } |
|
90 |
- ## Check numeric columns |
|
86 |
+ stopifnot(is.numeric(threshold) || is.integer(threshold)) |
|
87 |
+ threshold <- threshold[1] |
|
91 | 88 |
stopifnot(is.character(value_columns)) |
92 | 89 |
stopifnot(is.character(max_value_column)) |
93 | 90 |
num_cols <- unique(c(value_columns, max_value_column[1])) |
94 | 91 |
if (!all(num_cols %in% colnames(x))) { |
95 |
- rlang::abort(.missing_user_cols_error( |
|
96 |
- num_cols[!num_cols %in% colnames(x)] |
|
97 |
- )) |
|
92 |
+ rlang::abort(.missing_needed_cols( |
|
93 |
+ num_cols[!num_cols %in% colnames(x)] |
|
94 |
+ )) |
|
98 | 95 |
} |
99 |
- stopifnot(is.numeric(threshold) || is.integer(threshold)) |
|
100 |
- threshold <- threshold[1] |
|
101 |
- criteria <- rlang::arg_match(keep_criteria, values = c( |
|
102 |
- "max_value", |
|
103 |
- "keep_first" |
|
104 |
- )) |
|
105 |
- stopifnot(is.logical(strand_specific)) |
|
106 |
- strand_specific <- strand_specific[1] |
|
96 |
+ criteria <- rlang::arg_match(keep_criteria) |
|
97 |
+ stopifnot(is.character(sample_id_column)) |
|
98 |
+ sample_id_column <- sample_id_column[1] |
|
107 | 99 |
stopifnot(is.logical(map_as_file)) |
108 | 100 |
map_as_file <- map_as_file[1] |
109 | 101 |
if (map_as_file == TRUE) { |
110 |
- stopifnot(is.character(file_path)) |
|
111 |
- file_path <- file_path[1] |
|
102 |
+ stopifnot(is.character(file_path)) |
|
103 |
+ file_path <- file_path[1] |
|
104 |
+ } |
|
105 |
+ if (lifecycle::is_present(strand_specific)) { |
|
106 |
+ lifecycle::deprecate_warn( |
|
107 |
+ when = "1.5.4", |
|
108 |
+ what = "compute_near_integrations(strand_specific)", |
|
109 |
+ with = "compute_near_integrations(is_identity_tags)", |
|
110 |
+ details = paste("See the documentation for details,", |
|
111 |
+ "the argument will be likely dropped in the next", |
|
112 |
+ "release cycle.") |
|
113 |
+ ) |
|
114 |
+ stopifnot(is.logical(strand_specific)) |
|
115 |
+ strand_specific <- strand_specific[1] |
|
116 |
+ if (!"is_strand" %in% is_identity_tags) { |
|
117 |
+ is_identity_tags <- c(is_identity_tags, "is_strand") |
|
118 |
+ } |
|
119 |
+ } |
|
120 |
+ ## --- Check tags |
|
121 |
+ stopifnot(is.null(is_identity_tags) || is.character(is_identity_tags)) |
|
122 |
+ required_tags <- list("locus" = c("int", "numeric")) |
|
123 |
+ if (!is.null(is_identity_tags)) { |
|
124 |
+ is_identity_tags <- is_identity_tags[is_identity_tags != "locus"] |
|
125 |
+ id_tags_types <- purrr::map(is_identity_tags, ~ NULL) %>% |
|
126 |
+ purrr::set_names(is_identity_tags) |
|
127 |
+ required_tags <- append(required_tags, id_tags_types) |
|
128 |
+ } |
|
129 |
+ required_tag_cols <- .check_required_cols( |
|
130 |
+ required_tags = required_tags, |
|
131 |
+ vars_df = mandatory_IS_vars(TRUE), |
|
132 |
+ duplicate_politic = "error") |
|
133 |
+ if (!all(required_tag_cols$names %in% colnames(x))) { |
|
134 |
+ rlang::abort(.missing_needed_cols(required_tag_cols$names[ |
|
135 |
+ !required_tag_cols$names %in% colnames(x) |
|
136 |
+ ])) |
|
137 |
+ } |
|
138 |
+ if (!sample_id_column %in% colnames(x)) { |
|
139 |
+ rlang::abort(.missing_needed_cols(sample_id_column)) |
|
112 | 140 |
} |
113 | 141 |
## Is x annotated? |
114 | 142 |
annotated <- .is_annotated(x) |
115 |
- # Split data for parallel execution |
|
116 |
- split <- if (strand_specific == TRUE) { |
|
117 |
- x %>% |
|
118 |
- dplyr::group_by(.data$chr, .data$strand) %>% |
|
119 |
- dplyr::group_split() |
|
120 |
- } else { |
|
121 |
- x %>% |
|
122 |
- dplyr::group_by(.data$chr) %>% |
|
123 |
- dplyr::group_split() |
|
143 |
+ ## Any additional columns present? |
|
144 |
+ additional_cols <- colnames(x)[!colnames(x) %in% c( |
|
145 |
+ required_tag_cols$names, sample_id_column, value_columns, |
|
146 |
+ annotation_IS_vars() |
|
147 |
+ )] |
|
148 |
+ find_lambda <- function(.x) { |
|
149 |
+ if (.x %in% names(additional_agg_lambda)) { |
|
150 |
+ return(additional_agg_lambda[[.x]]) |
|
151 |
+ } |
|
152 |
+ defaults <- additional_agg_lambda[[".defaults"]] |
|
153 |
+ if (is.null(defaults)) { |
|
154 |
+ return(NULL) |
|
155 |
+ } |
|
156 |
+ col_type <- typeof(x[[.x]]) |
|
157 |
+ return(defaults[[col_type]]) |
|
124 | 158 |
} |
125 |
- ## Select only groups with 2 or more rows |
|
126 |
- split_to_process <- split[purrr::map_lgl(split, ~ nrow(.x) > 1)] |
|
127 |
- # # Set up parallel workers |
|
128 |
- if (.Platform$OS.type == "windows") { |
|
159 |
+ add_cols_lambdas <- purrr::map(additional_cols, find_lambda) %>% |
|
160 |
+ purrr::set_names(additional_cols) |
|
161 |
+ add_cols_lambdas <- add_cols_lambdas[purrr::map_lgl(add_cols_lambdas, |
|
162 |
+ ~ !is.null(.x))] |
|
163 |
+ # Process |
|
164 |
+ if (is.null(is_identity_tags) || purrr::is_empty(is_identity_tags)) { |
|
165 |
+ result <- .sliding_window( |
|
166 |
+ x = x, |
|
167 |
+ threshold = threshold, |
|
168 |
+ keep_criteria = criteria, |
|
169 |
+ annotated = annotated, |
|
170 |
+ num_cols = num_cols, |
|
171 |
+ max_val_col = max_value_column, |
|
172 |
+ sample_col = sample_id_column, |
|
173 |
+ req_tags = required_tag_cols, |
|
174 |
+ add_col_lambdas = add_cols_lambdas, |
|
175 |
+ produce_map = map_as_file |
|
176 |
+ ) |
|
177 |
+ recalibr_m <- result$recalibrated_matrix |
|
178 |
+ maps <- result$map |
|
179 |
+ |
|
180 |
+ } else { |
|
181 |
+ stopifnot(is.numeric(max_workers)) |
|
182 |
+ max_workers <- max_workers[1] |
|
183 |
+ is_identity_names <- required_tag_cols %>% |
|
184 |
+ dplyr::filter(.data$tag != "locus") %>% |
|
185 |
+ dplyr::pull(.data$names) |
|
186 |
+ # Split data for parallel execution |
|
187 |
+ split <- x %>% |
|
188 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(is_identity_names))) %>% |
|
189 |
+ dplyr::group_split() |
|
190 |
+ ## Select only groups with 2 or more rows |
|
191 |
+ split_to_process <- split[purrr::map_lgl(split, ~ nrow(.x) > 1)] |
|
192 |
+ # Set up parallel workers |
|
193 |
+ if (.Platform$OS.type == "windows") { |
|
129 | 194 |
p <- BiocParallel::SnowParam( |
130 |
- stop.on.error = FALSE, |
|
131 |
- progressbar = TRUE, |
|
132 |
- tasks = length(split_to_process), |
|
133 |
- exportglobals = TRUE |
|
195 |
+ workers = max_workers, |
|
196 |
+ progressbar = getOption("ISAnalytics.verbose"), |
|
197 |
+ tasks = length(split_to_process), |
|
198 |
+ exportglobals = TRUE |
|
134 | 199 |
) |
135 |
- } else { |
|
200 |
+ } else { |
|
136 | 201 |
p <- BiocParallel::MulticoreParam( |
137 |
- stop.on.error = FALSE, |
|
138 |
- progressbar = TRUE, |
|
139 |
- tasks = length(split_to_process), |
|
140 |
- exportglobals = FALSE |
|
141 |
- ) |
|
142 |
- } |
|
143 |
- FUN <- function(x) { |
|
144 |
- .sliding_window(x, |
|
145 |
- threshold = threshold, |
|
146 |
- keep_criteria = criteria, |
|
147 |
- num_cols = num_cols, |
|
148 |
- annotated = annotated, |
|
149 |
- max_val_col = max_value_column, |
|
150 |
- produce_map = map_as_file |
|
202 |
+ workers = max_workers, |
|
203 |
+ progressbar = getOption("ISAnalytics.verbose"), |
|
204 |
+ tasks = length(split_to_process), |
|
205 |
+ exportglobals = FALSE |
|
151 | 206 |
) |
152 |
- } |
|
153 |
- ## Obtain result: list of lists |
|
154 |
- result <- BiocParallel::bptry( |
|
155 |
- BiocParallel::bplapply( |
|
156 |
- split_to_process, |
|
157 |
- FUN = FUN, |
|
158 |
- BPPARAM = p |
|
159 |
- ) |
|
160 |
- ) |
|
161 |
- BiocParallel::bpstop(p) |
|
162 |
- ## Obtain single list |
|
163 |
- recalibr_m <- purrr::map(result, ~ .x$recalibrated_matrix) |
|
164 |
- recalibr_m <- data.table::rbindlist(recalibr_m) |
|
165 |
- maps <- purrr::map(result, ~ .x$map) |
|
166 |
- maps <- data.table::rbindlist(maps) |
|
167 |
- ## Add all rows that were not part of recalibration |
|
168 |
- split_fine <- split[purrr::map_lgl(split, ~ !nrow(.x) > 1)] |
|
169 |
- if (length(split_fine) > 0) { |
|
207 |
+ } |
|
208 |
+ ## Obtain result: list of lists |
|
209 |
+ result <- BiocParallel::bplapply( |
|
210 |
+ split_to_process, |
|
211 |
+ FUN = .sliding_window, |
|
212 |
+ threshold = threshold, |
|
213 |
+ keep_criteria = criteria, |
|
214 |
+ annotated = annotated, |
|
215 |
+ num_cols = num_cols, |
|
216 |
+ max_val_col = max_value_column, |
|
217 |
+ sample_col = sample_id_column, |
|
218 |
+ req_tags = required_tag_cols, |
|
219 |
+ add_col_lambdas = add_cols_lambdas, |
|
220 |
+ produce_map = map_as_file, |
|
221 |
+ BPPARAM = p |
|
222 |
+ ) |
|
223 |
+ BiocParallel::bpstop(p) |
|
224 |
+ ## Obtain single list |
|
225 |
+ recalibr_m <- purrr::map(result, ~ .x$recalibrated_matrix) |
|
226 |
+ recalibr_m <- data.table::rbindlist(recalibr_m) |
|
227 |
+ maps <- purrr::map(result, ~ .x$map) |
|
228 |
+ maps <- data.table::rbindlist(maps) |
|
229 |
+ ## Add all rows that were not part of recalibration |
|
230 |
+ split_fine <- split[purrr::map_lgl(split, ~ !nrow(.x) > 1)] |
|
231 |
+ if (length(split_fine) > 0) { |
|
170 | 232 |
split_fine <- purrr::reduce( |
171 |
- split_fine, |
|
172 |
- function(g1, g2) { |
|
173 |
- g1 <- data.table::setDT(g1) |
|
174 |
- g2 <- data.table::setDT(g2) |
|
175 |
- data.table::rbindlist(list(g1, g2)) |
|
176 |
- } |
|
177 |
- ) %>% data.table::setDT() |
|
178 |
- mand_vars <- mandatory_IS_vars() |
|
179 |
- map_fine <- unique(split_fine[, ..mand_vars]) |
|
233 |
+ split_fine, |
|
234 |
+ function(g1, g2) { |
|
235 |
+ g1 <- data.table::setDT(g1) |
|
236 |
+ g2 <- data.table::setDT(g2) |
|
237 |
+ data.table::rbindlist(list(g1, g2)) |
|
238 |
+ } |
|
239 |
+ ) |
|
240 |
+ split_fine <- data.table::setDT(split_fine) |
|
241 |
+ map_fine <- unique(split_fine[, mget(required_tag_cols$names)]) |
|
180 | 242 |
data.table::setnames( |
181 |
- x = map_fine, |
|
182 |
- old = mandatory_IS_vars(), |
|
183 |
- new = paste0(mandatory_IS_vars(), "_before") |
|
243 |
+ x = map_fine, |
|
244 |
+ old = mget(required_tag_cols$names), |
|
245 |
+ new = paste0(mget(required_tag_cols$names), "_before") |
|
184 | 246 |
) |
185 |
- map_fine[, c("chr_after", "integration_locus_after", "strand_after") := |
|
186 |
- list(chr_before, integration_locus_before, strand_before)] |
|
247 |
+ map_fine[, c( |
|
248 |
+ paste0(required_tag_cols$names, "_after") |
|
249 |
+ ) := |
|
250 |
+ purrr::map( |
|
251 |
+ required_tag_cols$names, |
|
252 |
+ ~ eval(sym(paste0(.x, "_before"))) |
|
253 |
+ )] |
|
187 | 254 |
recalibr_m <- data.table::rbindlist(list(recalibr_m, split_fine)) |
188 | 255 |
maps <- data.table::rbindlist(list(maps, map_fine)) |
256 |
+ } |
|
189 | 257 |
} |
190 | 258 |
if (map_as_file) { |
191 | 259 |
### Manage file |
... | ... |
@@ -204,3 +272,12 @@ compute_near_integrations <- function(x, |
204 | 272 |
} |
205 | 273 |
return(recalibr_m) |
206 | 274 |
} |
275 |
+ |
|
276 |
+default_rec_agg_lambdas <- function() { |
|
277 |
+ list( |
|
278 |
+ character = ~ paste0(.x, collapse = ";"), |
|
279 |
+ integer = ~ sum(.x, na.rm = TRUE), |
|
280 |
+ double = ~ sum(.x, na.rm = TRUE), |
|
281 |
+ logical = ~ all(.x) |
|
282 |
+ ) |
|
283 |
+} |
... | ... |
@@ -1059,12 +1059,12 @@ test_that(paste(func_name[17], "produces report in right location"), { |
1059 | 1059 |
#------------------------------------------------------------------------------# |
1060 | 1060 |
# Tests import_parallel_Vispa2Matrices |
1061 | 1061 |
#------------------------------------------------------------------------------# |
1062 |
-test_that(paste(func_name[17], "executes correctly"), { |
|
1062 |
+test_that(paste(func_name[18], "executes correctly"), { |
|
1063 | 1063 |
matrices <- import_parallel_Vispa2Matrices( |
1064 | 1064 |
association_file = local_af_corr, |
1065 | 1065 |
quantification_type = c("seqCount", "fragmentEstimate"), |
1066 | 1066 |
mode = "AUTO", report_path = NULL |
1067 | 1067 |
) |
1068 |
- expect_true(nrow(matrices) == 5904 & ncol(matrices) == 8) |
|
1068 |
+ expect_true(nrow(matrices) == 1689 & ncol(matrices) == 8) |
|
1069 | 1069 |
expect_true(all(c("seqCount", "fragmentEstimate") %in% colnames(matrices))) |
1070 | 1070 |
}) |
... | ... |
@@ -43,6 +43,7 @@ sample_group2 <- tibble::tibble( |
43 | 43 |
48 |
44 | 44 |
) |
45 | 45 |
) |
46 |
+ |
|
46 | 47 |
sample_group_mult1 <- tibble::tibble( |
47 | 48 |
chr = c(rep_len("1", 6)), |
48 | 49 |
integration_locus = c( |
... | ... |
@@ -275,10 +276,14 @@ expected_for_smplmult2_mv <- tibble::tibble( |
275 | 276 |
) |
276 | 277 |
|
277 | 278 |
test_that(".sliding_window produces correct output for sample1", { |
279 |
+ withr::local_options(list(ISAnalytics.mandatory_is_vars = "default")) |
|
278 | 280 |
result <- .sliding_window( |
279 | 281 |
x = sample_group1, threshold = 4, |
280 | 282 |
keep_criteria = "keep_first", annotated = FALSE, |
281 | 283 |
num_cols = "Value", max_val_col = "Value", |
284 |
+ sample_col = "CompleteAmplificationID", |
|
285 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
286 |
+ add_col_lambdas = NULL, |
|
282 | 287 |
produce_map = TRUE |
283 | 288 |
) |
284 | 289 |
expect_equal(result$recalibrated_matrix, expected_for_smpl1_kf, |
... | ... |
@@ -291,6 +296,9 @@ test_that(".sliding_window produces correct output for sample1", { |
291 | 296 |
x = sample_group1, threshold = 4, |
292 | 297 |
keep_criteria = "max_value", annotated = FALSE, |
293 | 298 |
num_cols = "Value", max_val_col = "Value", |
299 |
+ sample_col = "CompleteAmplificationID", |
|
300 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
301 |
+ add_col_lambdas = NULL, |
|
294 | 302 |
produce_map = TRUE |
295 | 303 |
) |
296 | 304 |
expect_equal(result$recalibrated_matrix, expected_for_smpl1_kf, |
... | ... |
@@ -302,10 +310,14 @@ test_that(".sliding_window produces correct output for sample1", { |
302 | 310 |
}) |
303 | 311 |
|
304 | 312 |
test_that(".sliding_window produces correct output for sample2", { |
313 |
+ withr::local_options(list(ISAnalytics.mandatory_is_vars = "default")) |
|
305 | 314 |
result <- .sliding_window( |
306 | 315 |
x = sample_group2, threshold = 4, |
307 | 316 |
keep_criteria = "keep_first", annotated = FALSE, |
308 | 317 |
num_cols = "Value", max_val_col = "Value", |
318 |
+ sample_col = "CompleteAmplificationID", |
|
319 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
320 |
+ add_col_lambdas = NULL, |
|
309 | 321 |
produce_map = TRUE |
310 | 322 |
) |
311 | 323 |
expect_equal(result$recalibrated_matrix, expected_for_smpl2_kf, |
... | ... |
@@ -318,6 +330,9 @@ test_that(".sliding_window produces correct output for sample2", { |
318 | 330 |
x = sample_group2, threshold = 4, |
319 | 331 |
keep_criteria = "max_value", annotated = FALSE, |
320 | 332 |
num_cols = "Value", max_val_col = "Value", |
333 |
+ sample_col = "CompleteAmplificationID", |
|
334 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
335 |
+ add_col_lambdas = NULL, |
|
321 | 336 |
produce_map = TRUE |
322 | 337 |
) |
323 | 338 |
expect_equal(result$recalibrated_matrix, expected_for_smpl2_mv, |
... | ... |
@@ -329,11 +344,15 @@ test_that(".sliding_window produces correct output for sample2", { |
329 | 344 |
}) |
330 | 345 |
|
331 | 346 |
test_that(".sliding_window produces correct output for sample1 - mult column", { |
347 |
+ withr::local_options(list(ISAnalytics.mandatory_is_vars = "default")) |
|
332 | 348 |
result <- .sliding_window( |
333 | 349 |
x = sample_group_mult1, threshold = 4, |
334 | 350 |
keep_criteria = "keep_first", annotated = FALSE, |
335 | 351 |
num_cols = c("seqCount", "fragmentEstimate"), |
336 | 352 |
max_val_col = "seqCount", |
353 |
+ sample_col = "CompleteAmplificationID", |
|
354 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
355 |
+ add_col_lambdas = NULL, |
|
337 | 356 |
produce_map = TRUE |
338 | 357 |
) |
339 | 358 |
expect_equal(result$recalibrated_matrix, expected_for_smplmult1_kf, |
... | ... |
@@ -347,6 +366,9 @@ test_that(".sliding_window produces correct output for sample1 - mult column", { |
347 | 366 |
keep_criteria = "max_value", annotated = FALSE, |
348 | 367 |
num_cols = c("seqCount", "fragmentEstimate"), |
349 | 368 |
max_val_col = "seqCount", |
369 |
+ sample_col = "CompleteAmplificationID", |
|
370 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
371 |
+ add_col_lambdas = NULL, |
|
350 | 372 |
produce_map = TRUE |
351 | 373 |
) |
352 | 374 |
expect_equal(result$recalibrated_matrix, expected_for_smplmult1_kf, |
... | ... |
@@ -358,11 +380,15 @@ test_that(".sliding_window produces correct output for sample1 - mult column", { |
358 | 380 |
}) |
359 | 381 |
|
360 | 382 |
test_that(".sliding_window produces correct output for sample2 - mult column", { |
383 |
+ withr::local_options(list(ISAnalytics.mandatory_is_vars = "default")) |
|
361 | 384 |
result <- .sliding_window( |
362 | 385 |
x = sample_group_mult2, threshold = 4, |
363 | 386 |
keep_criteria = "keep_first", annotated = FALSE, |
364 | 387 |
num_cols = c("seqCount", "fragmentEstimate"), |
365 | 388 |
max_val_col = "seqCount", |
389 |
+ sample_col = "CompleteAmplificationID", |
|
390 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
391 |
+ add_col_lambdas = NULL, |
|
366 | 392 |
produce_map = TRUE |
367 | 393 |
) |
368 | 394 |
expect_equal(result$recalibrated_matrix, expected_for_smplmult2_kf, |
... | ... |
@@ -376,6 +402,9 @@ test_that(".sliding_window produces correct output for sample2 - mult column", { |
376 | 402 |
keep_criteria = "max_value", annotated = FALSE, |
377 | 403 |
num_cols = c("seqCount", "fragmentEstimate"), |
378 | 404 |
max_val_col = "seqCount", |
405 |
+ sample_col = "CompleteAmplificationID", |
|
406 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
407 |
+ add_col_lambdas = NULL, |
|
379 | 408 |
produce_map = TRUE |
380 | 409 |
) |
381 | 410 |
expect_equal(result$recalibrated_matrix, expected_for_smplmult2_mv, |
... | ... |
@@ -386,11 +415,212 @@ test_that(".sliding_window produces correct output for sample2 - mult column", { |
386 | 415 |
) |
387 | 416 |
}) |
388 | 417 |
|
418 |
+test_that(".sliding_window works on annotated", { |
|
419 |
+ withr::local_options(list(ISAnalytics.mandatory_is_vars = "default")) |
|
420 |
+ annot_group1 <- sample_group1 %>% |
|
421 |
+ dplyr::mutate(GeneName = paste0("GENE", seq_len(nrow(sample_group1))), |
|
422 |
+ GeneStrand = "+", .after = "strand") |
|
423 |
+ expected_matrix <- expected_for_smpl1_kf %>% |
|
424 |
+ dplyr::mutate(GeneName = c("GENE4", "GENE4", "GENE4", "GENE4", |
|
425 |
+ "GENE5", "GENE5"), |
|
426 |
+ GeneStrand = "+", .after = "strand") |
|
427 |
+ result <- .sliding_window( |
|
428 |
+ x = annot_group1, threshold = 4, |
|
429 |
+ keep_criteria = "max_value", annotated = TRUE, |
|
430 |
+ num_cols = "Value", |
|
431 |
+ max_val_col = "Value", |
|
432 |
+ sample_col = "CompleteAmplificationID", |
|
433 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
434 |
+ add_col_lambdas = NULL, |
|
435 |
+ produce_map = TRUE |
|
436 |
+ ) |
|
437 |
+ expect_equal(result$recalibrated_matrix, expected_matrix, |
|
438 |
+ ignore_attr = TRUE |
|
439 |
+ ) |
|
440 |
+ expect_equal(result$map, recalibr_map_smpl1_kf, |
|
441 |
+ ignore_attr = TRUE |
|
442 |
+ ) |
|
443 |
+ annot_group2 <- sample_group2 %>% |
|
444 |
+ dplyr::mutate(GeneName = paste0("GENE", seq_len(nrow(sample_group2))), |
|
445 |
+ GeneStrand = "-", .after = "strand") |
|
446 |
+ expected_matrix <- expected_for_smpl2_mv %>% |
|
447 |
+ dplyr::mutate(GeneName = c("GENE1", "GENE7", "GENE6", "GENE6", "GENE3"), |
|
448 |
+ GeneStrand = "-", .after = "strand") |
|
449 |
+ result <- .sliding_window( |
|
450 |
+ x = annot_group2, threshold = 4, |
|
451 |
+ keep_criteria = "max_value", annotated = TRUE, |
|
452 |
+ num_cols = "Value", |
|
453 |
+ max_val_col = "Value", |
|
454 |
+ sample_col = "CompleteAmplificationID", |
|
455 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
456 |
+ add_col_lambdas = NULL, |
|
457 |
+ produce_map = TRUE |
|
458 |
+ ) |
|
459 |
+ expect_equal(result$recalibrated_matrix, expected_matrix, |
|
460 |
+ ignore_attr = TRUE |
|
461 |
+ ) |
|
462 |
+ expect_equal(result$map, recalibr_map_smpl2_mv, |
|
463 |
+ ignore_attr = TRUE |
|
464 |
+ ) |
|
465 |
+}) |
|
466 |
+ |
|
467 |
+test_that(".sliding_window aggregates add columns correctly", { |
|
468 |
+ withr::local_options(list(ISAnalytics.mandatory_is_vars = "default")) |
|
469 |
+ s2_add_cols <- sample_group2 %>% |
|
470 |
+ dplyr::mutate(ann1 = seq_len(nrow(sample_group2)), |
|
471 |
+ ann2 = c("a", "b", "c", "d", "e", "f", "g"), |
|
472 |
+ ann3 = c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE), |
|
473 |
+ .after = "strand") |
|
474 |
+ result <- .sliding_window( |
|
475 |
+ x = s2_add_cols, threshold = 4, |
|
476 |
+ keep_criteria = "max_value", annotated = FALSE, |
|
477 |
+ num_cols = "Value", |
|
478 |
+ max_val_col = "Value", |
|
479 |
+ sample_col = "CompleteAmplificationID", |
|
480 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
481 |
+ add_col_lambdas = list(ann1 = ~sum(.x, na.rm = TRUE), |
|
482 |
+ ann2 = ~paste0(.x, collapse = ";"), |
|
483 |
+ ann3 = ~any(.x)), |
|
484 |
+ produce_map = TRUE |
|
485 |
+ ) |
|
486 |
+ expected_v1 <- expected_for_smpl2_mv %>% |
|
487 |
+ dplyr::mutate(ann1 = c(6, 7, 6, 6, 3), |
|
488 |
+ ann2 = c("e;a", "g", "d;b", "f", "c"), |
|
489 |
+ ann3 = c(TRUE, FALSE, TRUE, TRUE, FALSE), |
|
490 |
+ .after = "strand") |
|
491 |
+ expect_equal(result$recalibrated_matrix, expected_v1, |
|
492 |
+ ignore_attr = TRUE |
|
493 |
+ ) |
|
494 |
+ expect_equal(result$map, recalibr_map_smpl2_mv, |
|
495 |
+ ignore_attr = TRUE |
|
496 |
+ ) |
|
497 |
+ ## NULL lambdas correspond to keep any value |
|
498 |
+ result <- .sliding_window( |
|
499 |
+ x = s2_add_cols, threshold = 4, |
|
500 |
+ keep_criteria = "max_value", annotated = FALSE, |
|
501 |
+ num_cols = "Value", |
|
502 |
+ max_val_col = "Value", |
|
503 |
+ sample_col = "CompleteAmplificationID", |
|
504 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
505 |
+ add_col_lambdas = list(ann1 = NULL, |
|
506 |
+ ann2 = NULL, |
|
507 |
+ ann3 = NULL), |
|
508 |
+ produce_map = TRUE |
|
509 |
+ ) |
|
510 |
+ expected_v2 <- expected_for_smpl2_mv %>% |
|
511 |
+ dplyr::mutate(ann1 = c(5, 7, 4, 6, 3), |
|
512 |
+ ann2 = c("e", "g", "d", "f", "c"), |
|
513 |
+ ann3 = c(FALSE, FALSE, TRUE, TRUE, FALSE), |
|
514 |
+ .after = "strand") |
|
515 |
+ expect_equal(result$recalibrated_matrix, expected_v2, |
|
516 |
+ ignore_attr = TRUE |
|
517 |
+ ) |
|
518 |
+ expect_equal(result$map, recalibr_map_smpl2_mv, |
|
519 |
+ ignore_attr = TRUE |
|
520 |
+ ) |
|
521 |
+}) |
|
522 |
+ |
|
523 |
+test_that(".sliding_window works with custom vars", { |
|
524 |
+ withr::local_options(list(ISAnalytics.mandatory_is_vars = "default")) |
|
525 |
+ customized <- sample_group2 %>% |
|
526 |
+ dplyr::rename(chrom = "chr", locus = "integration_locus") |
|
527 |
+ temp_vars <- mandatory_IS_vars(TRUE) |
|
528 |
+ temp_vars[1, ]$names <- "chrom" |
|
529 |
+ temp_vars[2, ]$names <- "locus" |
|
530 |
+ withr::with_options(list(ISAnalytics.mandatory_is_vars = temp_vars), |
|
531 |
+ { |
|
532 |
+ result <- .sliding_window( |
|
533 |
+ x = customized, threshold = 4, |
|
534 |
+ keep_criteria = "max_value", annotated = FALSE, |
|
535 |
+ num_cols = "Value", max_val_col = "Value", |
|
536 |
+ sample_col = "CompleteAmplificationID", |
|
537 |
+ req_tags = mandatory_IS_vars(TRUE), |
|
538 |
+ add_col_lambdas = NULL, |
|
539 |
+ produce_map = TRUE |
|
540 |
+ ) |
|
541 |
+ }) |
|
542 |
+ expected_matrix <- expected_for_smpl2_mv %>% |
|
543 |
+ dplyr::rename(chrom = "chr", locus = "integration_locus") |
|
544 |
+ expect_equal(result$recalibrated_matrix, expected_matrix, |
|
545 |
+ ignore_attr = TRUE |
|
546 |
+ ) |
|
547 |
+ expect_equal(result$map, recalibr_map_smpl2_mv %>% |
|
548 |
+ dplyr::rename(chrom_before = "chr_before", |
|
549 |
+ locus_before = "integration_locus_before", |
|
550 |
+ chrom_after = "chr_after", |
|
551 |
+ locus_after = "integration_locus_after"), |
|
552 |
+ ignore_attr = TRUE |
|
553 |
+ ) |
|
554 |
+}) |
|
555 |
+ |
|
556 |
+#------------------------------------------------------------------------------# |
|
557 |
+# Tests .write_recalibr_map |
|
558 |
+#------------------------------------------------------------------------------# |
|
559 |
+test_that(".write_recalibr_map works if path provided is dir", { |
|
560 |
+ tmp_dir <- fs::path(tempdir(), "ISAtest") |
|
561 |
+ ## Works if folder doesn't exist (creates it and writes the file in it) |
|
562 |
+ withr::with_file(tmp_dir, { |
|
563 |
+ .write_recalibr_map(recalibr_map_smpl1_kf, tmp_dir) |
|
564 |
+ expect_true(fs::dir_exists(tmp_dir)) |
|
565 |
+ expect_true(length(fs::dir_ls(tmp_dir)) == 1) |
|
566 |
+ }) |
|
567 |
+ ## Works if folder already exists |
|
568 |
+ withr::with_file(tmp_dir, { |
|
569 |
+ fs::dir_create(tmp_dir) |
|
570 |
+ .write_recalibr_map(recalibr_map_smpl1_kf, tmp_dir) |
|
571 |
+ expect_true(fs::dir_exists(tmp_dir)) |
|
572 |
+ expect_true(length(fs::dir_ls(tmp_dir)) == 1) |
|
573 |
+ }) |
|
574 |
+}) |
|
575 |
+ |
|
576 |
+test_that(".write_recalibr_map works if path provided is file", { |
|
577 |
+ ## Works for accepted formats |
|
578 |
+ for (ext in c("tsv", "csv", "txt", |
|
579 |
+ paste("tsv", .compressed_formats(), sep = "."), |
|
580 |
+ paste("csv", .compressed_formats(), sep = "."), |
|
581 |
+ paste("txt", .compressed_formats(), sep = "."))) { |
|
582 |
+ file_name <- paste0("recalibration_map.", ext) |
|
583 |
+ tmp_file <- fs::path(tempdir(), file_name) |
|
584 |
+ withr::with_file(tmp_file, { |
|
585 |
+ .write_recalibr_map(recalibr_map_smpl1_kf, tmp_file) |
|
586 |
+ expect_true(fs::file_exists(tmp_file)) |
|
587 |
+ }) |
|
588 |
+ } |
|
589 |
+ ## Changes extension for unsupported extension |
|
590 |
+ file_name <- "recalibration_map.xslx" |
|
591 |
+ expected_filename <- "recalibration_map.tsv.gz" |
|
592 |
+ tmp_file <- fs::path(tempdir(), file_name) |
|
593 |
+ withr::with_options(list(ISAnalytics.verbose = TRUE), { |
|
594 |
+ expect_message({ |
|
595 |
+ expect_message({ |
|
596 |
+ withr::with_file(tmp_file, { |
|
597 |
+ .write_recalibr_map(recalibr_map_smpl1_kf, tmp_file) |
|
598 |
+ expect_false(fs::file_exists(tmp_file)) |
|
599 |
+ expect_true(fs::file_exists(fs::path(tempdir(), expected_filename))) |
|
600 |
+ }) |
|
601 |
+ }, class = "rec_unsupp_ext") |
|
602 |
+ }) |
|
603 |
+ }) |
|
604 |
+ file_name <- "recalibration_map.gz" |
|
605 |
+ tmp_file <- fs::path(tempdir(), file_name) |
|
606 |
+ withr::with_options(list(ISAnalytics.verbose = TRUE), { |
|
607 |
+ expect_message({ |
|
608 |
+ expect_message({ |
|
609 |
+ withr::with_file(tmp_file, { |
|
610 |
+ .write_recalibr_map(recalibr_map_smpl1_kf, tmp_file) |
|
611 |
+ expect_false(fs::file_exists(tmp_file)) |
|
612 |
+ expect_true(fs::file_exists(fs::path(tempdir(), expected_filename))) |
|
613 |
+ }) |
|
614 |
+ }, class = "rec_unsupp_ext") |
|
615 |
+ }) |
|
616 |
+ }) |
|
617 |
+}) |
|
618 |
+ |
|
389 | 619 |
#------------------------------------------------------------------------------# |
390 | 620 |
# Tests compute_near_integrations |
391 | 621 |
#------------------------------------------------------------------------------# |
392 |
-## TEST VALUES |
|
393 | 622 |
test_that("compute_near_integrations produces correct output for total", { |
623 |
+ withr::local_options(list(ISAnalytics.mandatory_is_vars = "default")) |
|
394 | 624 |
total_simple <- sample_group1 %>% dplyr::bind_rows(sample_group2) |
395 | 625 |
total_mult <- sample_group_mult1 %>% dplyr::bind_rows(sample_group_mult2) |
396 | 626 |
res <- compute_near_integrations( |
... | ... |
@@ -420,3 +650,16 @@ test_that("compute_near_integrations produces correct output for total", { |
420 | 650 |
ignore_attr = TRUE |
421 | 651 |
) |
422 | 652 |
}) |
653 |
+ |
|
654 |
+test_that("compute_near_integrations warns deprecation", { |
|
655 |
+ expect_deprecated({ |
|
656 |
+ res <- compute_near_integrations( |
|
657 |
+ x = sample_group1, |
|
658 |
+ keep_criteria = "keep_first", |
|
659 |
+ max_value_column = "Value", |
|
660 |
+ value_columns = "Value", |
|
661 |
+ strand_specific = TRUE, |
|
662 |
+ map_as_file = FALSE |
|
663 |
+ ) |
|
664 |
+ }) |
|
665 |
+}) |