... | ... |
@@ -2,6 +2,20 @@ |
2 | 2 |
title: "NEWS" |
3 | 3 |
output: github_document |
4 | 4 |
--- |
5 |
+# ISAnalytics <version> |
|
6 |
+ |
|
7 |
+## BUG FIXES AND MINOR CHANGES |
|
8 |
+ |
|
9 |
+* Fixed minor issue in `compute_near_integrations()` - function errored when |
|
10 |
+`report_path` argument was set to `NULL` |
|
11 |
+* Fixed dplyr warning in `integration_alluvial_plot()` internals |
|
12 |
+* Fixed issue with report of VISPA2 stats - report failed due to minor error |
|
13 |
+in rmd fragment |
|
14 |
+* Internals of `remove_collisions()` use again dplyr internally for joining |
|
15 |
+and grouping operations - needed because of performance issues with data.table |
|
16 |
+* `fisher_scatterplot()` has 2 new arguments that allow the disabling of |
|
17 |
+highlighting for some genes even if their p-value is under the threshold |
|
18 |
+ |
|
5 | 19 |
# ISAnalytics 1.5.4 (2022-04-20) |
6 | 20 |
|
7 | 21 |
## MAJOR CHANGES |
... | ... |
@@ -146,11 +146,14 @@ remove_collisions <- function(x, |
146 | 146 |
replicate_n_col <- req_tag_cols[eval(sym("tag")) == |
147 | 147 |
"pcr_replicate"][["names"]] |
148 | 148 |
pool_col <- req_tag_cols[eval(sym("tag")) == "pool_id"][["names"]] |
149 |
- joined <- association_file[pre_process, on = pcr_col] |
|
150 |
- joined <- joined[, mget(c( |
|
151 |
- colnames(pre_process), date_col, |
|
152 |
- replicate_n_col, independent_sample_id, pool_col |
|
153 |
- ))] |
|
149 |
+ joined <- pre_process %>% |
|
150 |
+ dplyr::left_join(association_file, by = pcr_col) %>% |
|
151 |
+ dplyr::select(dplyr::all_of( |
|
152 |
+ c( |
|
153 |
+ colnames(pre_process), date_col, |
|
154 |
+ replicate_n_col, independent_sample_id, pool_col |
|
155 |
+ ) |
|
156 |
+ )) |
|
154 | 157 |
if (verbose) { |
155 | 158 |
rlang::inform("Identifying collisions...") |
156 | 159 |
} |
... | ... |
@@ -2893,7 +2893,8 @@ |
2893 | 2893 |
distinct_is <- purrr::map2_int(matrices, correct, ~ { |
2894 | 2894 |
if (.y) { |
2895 | 2895 |
.x %>% |
2896 |
- dplyr::distinct(dplyr::across(dplyr::all_of(mandatory_IS_vars()))) %>% |
|
2896 |
+ dplyr::distinct(dplyr::across( |
|
2897 |
+ dplyr::all_of(mandatory_IS_vars()))) %>% |
|
2897 | 2898 |
nrow() |
2898 | 2899 |
} else { |
2899 | 2900 |
NA_integer_ |
... | ... |
@@ -3102,17 +3103,19 @@ |
3102 | 3103 |
#' @importFrom rlang .data sym |
3103 | 3104 |
.check_same_info <- function(association_file, df, req_tag_cols, |
3104 | 3105 |
indep_sample_id) { |
3105 |
- association_file <- data.table::setDT(association_file) |
|
3106 |
- df <- data.table::setDT(df) |
|
3107 |
- req_tag_cols <- data.table::setDT(req_tag_cols) |
|
3108 |
- joined <- df[association_file, on = pcr_id_column()] |
|
3109 |
- predicate <- purrr::map(mandatory_IS_vars(), ~ { |
|
3110 |
- rlang::expr(!is.na(!!sym(.x))) |
|
3111 |
- }) %>% purrr::reduce(~ rlang::expr(!!.x & !!.y)) |
|
3106 |
+ data.table::setDT(req_tag_cols) |
|
3107 |
+ joined <- association_file %>% |
|
3108 |
+ dplyr::left_join(df, by = pcr_id_column()) |
|
3112 | 3109 |
proj_col_name <- req_tag_cols[eval(rlang::expr( |
3113 | 3110 |
!!sym("tag") == "project_id" |
3114 | 3111 |
))][["names"]] |
3115 |
- projects <- unique(joined[eval(predicate), ][[proj_col_name]]) |
|
3112 |
+ projects <- joined %>% |
|
3113 |
+ dplyr::filter(!dplyr::if_all( |
|
3114 |
+ .cols = dplyr::all_of(mandatory_IS_vars()), |
|
3115 |
+ .fns = is.na |
|
3116 |
+ )) %>% |
|
3117 |
+ dplyr::distinct(.data[[proj_col_name]]) %>% |
|
3118 |
+ dplyr::pull(.data[[proj_col_name]]) |
|
3116 | 3119 |
wanted_tags <- c( |
3117 | 3120 |
"subject", "tissue", "cell_marker", "tp_days" |
3118 | 3121 |
) |
... | ... |
@@ -3121,15 +3124,22 @@ |
3121 | 3124 |
wanted <- wanted[eval(sym("tag")) %in% wanted_tags & |
3122 | 3125 |
eval(sym("names")) %in% colnames(association_file), ] |
3123 | 3126 |
wanted <- data.table::rbindlist(list(req_tag_cols, wanted)) |
3124 |
- predicate <- purrr::map(mandatory_IS_vars(), ~ { |
|
3125 |
- rlang::expr(is.na(!!sym(.x))) |
|
3126 |
- }) %>% purrr::reduce(~ rlang::expr(!!.x & !!.y)) |
|
3127 |
- predicate <- rlang::expr(!!predicate & !!sym(proj_col_name) %in% projects) |
|
3128 |
- missing_in_df <- unique(joined[ |
|
3129 |
- eval(predicate), |
|
3130 |
- mget(unique(c(wanted$names, indep_sample_id))) |
|
3131 |
- ]) |
|
3132 |
- reduced_af <- association_file[eval(sym(proj_col_name)) %in% projects] |
|
3127 |
+ missing_in_df <- joined %>% |
|
3128 |
+ dplyr::filter( |
|
3129 |
+ dplyr::if_all( |
|
3130 |
+ .cols = dplyr::all_of(mandatory_IS_vars()), |
|
3131 |
+ .fns = is.na |
|
3132 |
+ ) & .data[[proj_col_name]] %in% projects |
|
3133 |
+ ) %>% |
|
3134 |
+ dplyr::distinct(dplyr::across( |
|
3135 |
+ dplyr::all_of(c(wanted$names, indep_sample_id)) |
|
3136 |
+ )) |
|
3137 |
+ reduced_af <- association_file %>% |
|
3138 |
+ dplyr::filter( |
|
3139 |
+ .data[[proj_col_name]] %in% projects |
|
3140 |
+ ) |
|
3141 |
+ data.table::setDT(reduced_af) |
|
3142 |
+ data.table::setDT(missing_in_df) |
|
3133 | 3143 |
list(miss = missing_in_df, reduced_af = reduced_af) |
3134 | 3144 |
} |
3135 | 3145 |
|
... | ... |
@@ -3141,14 +3151,21 @@ |
3141 | 3151 |
# @return A named list containing the splitted joined_df for collisions and |
3142 | 3152 |
# non-collisions |
3143 | 3153 |
.identify_independent_samples <- function(joined, indep_sample_id) { |
3144 |
- temp <- joined[, mget(c(mandatory_IS_vars(), indep_sample_id))] |
|
3145 |
- temp <- temp[, unique(.SD, by = indep_sample_id), |
|
3146 |
- by = eval(mandatory_IS_vars()) |
|
3147 |
- ] |
|
3148 |
- temp <- temp[, .N, by = eval(mandatory_IS_vars())] |
|
3149 |
- temp <- temp[eval(sym("N")) > 1, mget(mandatory_IS_vars())] |
|
3150 |
- non_collisions <- joined[!temp, on = mandatory_IS_vars()] |
|
3151 |
- collisions <- joined[temp, on = mandatory_IS_vars()] |
|
3154 |
+ indep_syms <- purrr::map(indep_sample_id, rlang::sym) |
|
3155 |
+ temp <- joined %>% |
|
3156 |
+ dplyr::select(dplyr::all_of( |
|
3157 |
+ c(mandatory_IS_vars(), indep_sample_id) |
|
3158 |
+ )) %>% |
|
3159 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(mandatory_IS_vars()))) %>% |
|
3160 |
+ dplyr::summarise(N = dplyr::n_distinct(!!!indep_syms), |
|
3161 |
+ .groups = "drop") %>% |
|
3162 |
+ dplyr::filter(.data$N > 1) |
|
3163 |
+ non_collisions <- joined %>% |
|
3164 |
+ dplyr::anti_join(temp, by = mandatory_IS_vars()) |
|
3165 |
+ collisions <- joined %>% |
|
3166 |
+ dplyr::semi_join(temp, by = mandatory_IS_vars()) |
|
3167 |
+ data.table::setDT(non_collisions) |
|
3168 |
+ data.table::setDT(collisions) |
|
3152 | 3169 |
list(collisions = collisions, non_collisions = non_collisions) |
3153 | 3170 |
} |
3154 | 3171 |
|
... | ... |
@@ -3183,7 +3200,9 @@ |
3183 | 3200 |
} |
3184 | 3201 |
winning_sample <- x[1, mget(ind_sample_key)] |
3185 | 3202 |
# Filter the winning rows |
3186 |
- x <- x[winning_sample, on = ind_sample_key] |
|
3203 |
+ x <- x %>% |
|
3204 |
+ dplyr::semi_join(winning_sample, by = ind_sample_key) |
|
3205 |
+ data.table::setDT(x) |
|
3187 | 3206 |
return(list(data = x, check = TRUE)) |
3188 | 3207 |
} |
3189 | 3208 |
|
... | ... |
@@ -3207,14 +3226,18 @@ |
3207 | 3226 |
# * $check: a logical value indicating whether the analysis was successful or |
3208 | 3227 |
# not (and therefore there is the need to perform the next step) |
3209 | 3228 |
.discriminate_by_replicate <- function(x, repl_col, ind_sample_key) { |
3210 |
- temp <- x[, .N, by = eval(ind_sample_key)] |
|
3211 |
- temp <- dplyr::arrange(temp, dplyr::desc(.data$N)) |
|
3212 |
- |
|
3229 |
+ temp <- x %>% |
|
3230 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(ind_sample_key))) %>% |
|
3231 |
+ dplyr::summarise(N = dplyr::n(), .groups = "drop") %>% |
|
3232 |
+ dplyr::arrange(dplyr::desc(.data$N)) |
|
3233 |
+ data.table::setDT(temp) |
|
3213 | 3234 |
if (length(temp$N) != 1 & !temp$N[1] > temp$N[2]) { |
3214 | 3235 |
return(list(data = x, check = FALSE)) |
3215 | 3236 |
} |
3216 | 3237 |
temp <- temp[1, mget(ind_sample_key)] |
3217 |
- x <- x[temp, on = ind_sample_key] |
|
3238 |
+ x <- x %>% |
|
3239 |
+ dplyr::semi_join(temp, by = ind_sample_key) |
|
3240 |
+ data.table::setDT(x) |
|
3218 | 3241 |
return(list(data = x, check = TRUE)) |
3219 | 3242 |
} |
3220 | 3243 |
|
... | ... |
@@ -3243,14 +3266,19 @@ |
3243 | 3266 |
reads_ratio, |
3244 | 3267 |
seqCount_col, |
3245 | 3268 |
ind_sample_key) { |
3246 |
- temp <- x[, list(sum = sum(.SD[[seqCount_col]])), by = eval(ind_sample_key)] |
|
3247 |
- temp <- dplyr::arrange(temp, dplyr::desc(.data$sum)) |
|
3269 |
+ temp <- x %>% |
|
3270 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(ind_sample_key))) %>% |
|
3271 |
+ dplyr::summarise(sum = sum(.data[[seqCount_col]]), .groups = "drop") %>% |
|
3272 |
+ dplyr::arrange(dplyr::desc(.data$sum)) |
|
3273 |
+ data.table::setDT(temp) |
|
3248 | 3274 |
ratio <- temp$sum[1] / temp$sum[2] |
3249 | 3275 |
if (!ratio > reads_ratio) { |
3250 | 3276 |
return(list(data = x, check = FALSE)) |
3251 | 3277 |
} |
3252 | 3278 |
temp <- temp[1, mget(ind_sample_key)] |
3253 |
- x <- x[temp, on = ind_sample_key] |
|
3279 |
+ x <- x %>% |
|
3280 |
+ dplyr::semi_join(temp, by = ind_sample_key) |
|
3281 |
+ data.table::setDT(x) |
|
3254 | 3282 |
return(list(data = x, check = TRUE)) |
3255 | 3283 |
} |
3256 | 3284 |
|
... | ... |
@@ -137,7 +137,12 @@ outlier_filter <- function(metadata, |
137 | 137 |
call_test <- function(test, vargs, df, test_name) { |
138 | 138 |
test_args_names <- do.call(rlang::fn_fmls_names, args = list(test)) |
139 | 139 |
test_args_names <- test_args_names[test_args_names != "metadata"] |
140 |
- test_args <- vargs[names(vargs) %in% test_args_names] |
|
140 |
+ test_args <- if ("report_path" %in% test_args_names) { |
|
141 |
+ append(vargs[names(vargs) %in% test_args_names], |
|
142 |
+ list(report_path = report_path)) |
|
143 |
+ } else { |
|
144 |
+ vargs[names(vargs) %in% test_args_names] |
|
145 |
+ } |
|
141 | 146 |
f_call <- rlang::call2(test, metadata = metadata, !!!test_args) |
142 | 147 |
result <- rlang::eval_tidy(f_call) |
143 | 148 |
.validate_outlier_output_format( |
... | ... |
@@ -281,6 +281,31 @@ clinical_relevant_suspicious_genes <- function() { |
281 | 281 |
#' Plots results of Fisher's exact test on gene frequency obtained via |
282 | 282 |
#' `gene_frequency_fisher()` as a scatterplot. |
283 | 283 |
#' |
284 |
+#' @details |
|
285 |
+#' ## Specifying genes to avoid highlighting |
|
286 |
+#' |
|
287 |
+#' In some cases, users might want to avoid highlighting certain genes |
|
288 |
+#' even if their p-value is below the threshold. To do so, use the |
|
289 |
+#' argument `do_not_highlight`: character vectors are appropriate for specific |
|
290 |
+#' genes that are to be excluded, expressions or lambdas allow a finer control. |
|
291 |
+#' For example we can supply: |
|
292 |
+#' ```{r eval=FALSE} |
|
293 |
+#' expr <- rlang::expr(!stringr::str_starts(GeneName, "MIR") & |
|
294 |
+#' average_TxLen_1 >= 300) |
|
295 |
+#' ``` |
|
296 |
+#' |
|
297 |
+#' with this expression, genes that have a p-value < threshold and start with |
|
298 |
+#' "MIR" or have an average_TxLen_1 lower than 300 are excluded from the |
|
299 |
+#' highlighted points. |
|
300 |
+#' NOTE: keep in mind that expressions are evaluated inside a `dplyr::filter` |
|
301 |
+#' context. |
|
302 |
+#' |
|
303 |
+#' Similarly, lambdas are passed to the filtering function but only operate |
|
304 |
+#' on the column containing the gene symbol. |
|
305 |
+#' ```{r eval=FALSE} |
|
306 |
+#' lambda <- ~ stringr::str_starts(.x, "MIR") |
|
307 |
+#' ``` |
|
308 |
+#' |
|
284 | 309 |
#' @param fisher_df Test results obtained via `gene_frequency_fisher()` |
285 | 310 |
#' @param p_value_col Name of the column containing the p-value to consider |
286 | 311 |
#' @param annot_threshold Annotate with a different color if a point is below |
... | ... |
@@ -288,6 +313,14 @@ clinical_relevant_suspicious_genes <- function() { |
288 | 313 |
#' @param annot_color The color in which points below the threshold should be |
289 | 314 |
#' annotated |
290 | 315 |
#' @param gene_sym_col The name of the column containing the gene symbol |
316 |
+#' @param do_not_highlight Either `NULL`, a character vector, an expression |
|
317 |
+#' or a purrr-style lambda. Tells the function to ignore the highlighting |
|
318 |
+#' and labeling of these genes even if their p-value is below the threshold. |
|
319 |
+#' See details. |
|
320 |
+#' @param keep_not_highlighted If present, how should not highlighted genes |
|
321 |
+#' be treated? If set to `TRUE` points are plotted and colored with the |
|
322 |
+#' chosen color scale. If set to `FALSE` the points are removed entirely from |
|
323 |
+#' the plot. |
|
291 | 324 |
#' |
292 | 325 |
#' @family Plotting functions |
293 | 326 |
#' @return A plot |
... | ... |
@@ -310,11 +343,51 @@ fisher_scatterplot <- function(fisher_df, |
310 | 343 |
p_value_col = "Fisher_p_value_fdr", |
311 | 344 |
annot_threshold = 0.05, |
312 | 345 |
annot_color = "red", |
313 |
- gene_sym_col = "GeneName") { |
|
314 |
- below_threshold <- fisher_df %>% |
|
346 |
+ gene_sym_col = "GeneName", |
|
347 |
+ do_not_highlight = NULL, |
|
348 |
+ keep_not_highlighted = TRUE) { |
|
349 |
+ stopifnot(is.null(do_not_highlight) || is.character(do_not_highlight) |
|
350 |
+ || rlang::is_expression(do_not_highlight)) |
|
351 |
+ if (is.null(do_not_highlight)) { |
|
352 |
+ below_threshold <- fisher_df %>% |
|
315 | 353 |
dplyr::filter(.data[[p_value_col]] < annot_threshold) |
316 |
- above_threshold <- fisher_df %>% |
|
354 |
+ above_threshold <- fisher_df %>% |
|
317 | 355 |
dplyr::filter(.data[[p_value_col]] >= annot_threshold) |
356 |
+ } else if (is.character(do_not_highlight)) { |
|
357 |
+ below_threshold <- fisher_df %>% |
|
358 |
+ dplyr::filter(.data[[p_value_col]] < annot_threshold & |
|
359 |
+ !.data[[gene_sym_col]] %in% do_not_highlight) |
|
360 |
+ if (keep_not_highlighted) { |
|
361 |
+ above_threshold <- fisher_df %>% |
|
362 |
+ dplyr::anti_join(below_threshold, by = gene_sym_col) |
|
363 |
+ } else { |
|
364 |
+ above_threshold <- fisher_df %>% |
|
365 |
+ dplyr::filter(.data[[p_value_col]] >= annot_threshold) |
|
366 |
+ } |
|
367 |
+ } else if (rlang::is_formula(do_not_highlight)) { |
|
368 |
+ below_threshold <- fisher_df %>% |
|
369 |
+ dplyr::filter(.data[[p_value_col]] < annot_threshold & |
|
370 |
+ !rlang::as_function(do_not_highlight)( |
|
371 |
+ .data[[gene_sym_col]])) |
|
372 |
+ if (keep_not_highlighted) { |
|
373 |
+ above_threshold <- fisher_df %>% |
|
374 |
+ dplyr::anti_join(below_threshold, by = gene_sym_col) |
|
375 |
+ } else { |
|
376 |
+ above_threshold <- fisher_df %>% |
|
377 |
+ dplyr::filter(.data[[p_value_col]] >= annot_threshold) |
|
378 |
+ } |
|
379 |
+ } else { |
|
380 |
+ below_threshold <- fisher_df %>% |
|
381 |
+ dplyr::filter(.data[[p_value_col]] < annot_threshold & |
|
382 |
+ (rlang::eval_tidy(do_not_highlight))) |
|
383 |
+ if (keep_not_highlighted) { |
|
384 |
+ above_threshold <- fisher_df %>% |
|
385 |
+ dplyr::anti_join(below_threshold, by = gene_sym_col) |
|
386 |
+ } else { |
|
387 |
+ above_threshold <- fisher_df %>% |
|
388 |
+ dplyr::filter(.data[[p_value_col]] >= annot_threshold) |
|
389 |
+ } |
|
390 |
+ } |
|
318 | 391 |
plot <- ggplot2::ggplot( |
319 | 392 |
above_threshold, |
320 | 393 |
ggplot2::aes_( |
... | ... |
@@ -323,7 +396,7 @@ fisher_scatterplot <- function(fisher_df, |
323 | 396 |
color = ggplot2::sym(p_value_col) |
324 | 397 |
) |
325 | 398 |
) + |
326 |
- ggplot2::geom_point() + |
|
399 |
+ ggplot2::geom_point(alpha = 0.65) + |
|
327 | 400 |
ggplot2::geom_point(data = below_threshold, color = annot_color) + |
328 | 401 |
ggplot2::theme_bw() + |
329 | 402 |
ggplot2::labs( |
... | ... |
@@ -879,9 +952,7 @@ top_abund_tableGrob <- function(df, |
879 | 952 |
tops_by_x <- purrr::map(distinct_x, function(x) { |
880 | 953 |
tmp <- top %>% |
881 | 954 |
dplyr::filter( |
882 |
- dplyr::across( |
|
883 |
- dplyr::all_of(by), ~ .x == x |
|
884 |
- ) |
|
955 |
+ dplyr::if_all(.cols = dplyr::all_of(by), .fns = ~ .x == x) |
|
885 | 956 |
) %>% |
886 | 957 |
dplyr::select(-dplyr::all_of(by)) |
887 | 958 |
if (nrow(tmp) < top_n) { |
... | ... |
@@ -6,7 +6,6 @@ output: html_document |
6 | 6 |
```{r include=FALSE} |
7 | 7 |
knitr::opts_chunk$set(echo = FALSE, warning = FALSE) |
8 | 8 |
options(DT.warn.size = FALSE) |
9 |
-utils::globalVariables("where") |
|
10 | 9 |
``` |
11 | 10 |
|
12 | 11 |
VISPA2 stats info {data-orientation=rows} |
... | ... |
@@ -62,7 +61,8 @@ cat("*Nothing to report*") |
62 | 61 |
|
63 | 62 |
```{r eval=!is.null(iss_stats_miss) && nrow(iss_stats_miss) > 0, echo=FALSE} |
64 | 63 |
iss_stats_miss <- iss_stats_miss %>% |
65 |
- dplyr::mutate(dplyr::across(where(is.character), as.factor)) |
|
64 |
+ dplyr::mutate(dplyr::across( |
|
65 |
+ tidyselect::vars_select_helpers$where(is.character), as.factor)) |
|
66 | 66 |
|
67 | 67 |
datatable(iss_stats_miss, |
68 | 68 |
class = "stripe", |
... | ... |
@@ -9,7 +9,9 @@ fisher_scatterplot( |
9 | 9 |
p_value_col = "Fisher_p_value_fdr", |
10 | 10 |
annot_threshold = 0.05, |
11 | 11 |
annot_color = "red", |
12 |
- gene_sym_col = "GeneName" |
|
12 |
+ gene_sym_col = "GeneName", |
|
13 |
+ do_not_highlight = NULL, |
|
14 |
+ keep_not_highlighted = TRUE |
|
13 | 15 |
) |
14 | 16 |
} |
15 | 17 |
\arguments{ |
... | ... |
@@ -24,6 +26,16 @@ the significance threshold. Single numerical value.} |
24 | 26 |
annotated} |
25 | 27 |
|
26 | 28 |
\item{gene_sym_col}{The name of the column containing the gene symbol} |
29 |
+ |
|
30 |
+\item{do_not_highlight}{Either \code{NULL}, a character vector, an expression |
|
31 |
+or a purrr-style lambda. Tells the function to ignore the highlighting |
|
32 |
+and labeling of these genes even if their p-value is below the threshold. |
|
33 |
+See details.} |
|
34 |
+ |
|
35 |
+\item{keep_not_highlighted}{If present, how should not highlighted genes |
|
36 |
+be treated? If set to \code{TRUE} points are plotted and colored with the |
|
37 |
+chosen color scale. If set to \code{FALSE} the points are removed entirely from |
|
38 |
+the plot.} |
|
27 | 39 |
} |
28 | 40 |
\value{ |
29 | 41 |
A plot |
... | ... |
@@ -33,6 +45,28 @@ A plot |
33 | 45 |
Plots results of Fisher's exact test on gene frequency obtained via |
34 | 46 |
\code{gene_frequency_fisher()} as a scatterplot. |
35 | 47 |
} |
48 |
+\details{ |
|
49 |
+\subsection{Specifying genes to avoid highlighting}{ |
|
50 |
+ |
|
51 |
+In some cases, users might want to avoid highlighting certain genes |
|
52 |
+even if their p-value is below the threshold. To do so, use the |
|
53 |
+argument \code{do_not_highlight}: character vectors are appropriate for specific |
|
54 |
+genes that are to be excluded, expressions or lambdas allow a finer control. |
|
55 |
+For example we can supply:\if{html}{\out{<div class="sourceCode r">}}\preformatted{expr <- rlang::expr(!stringr::str_starts(GeneName, "MIR") & |
|
56 |
+ average_TxLen_1 >= 300) |
|
57 |
+}\if{html}{\out{</div>}} |
|
58 |
+ |
|
59 |
+with this expression, genes that have a p-value < threshold and start with |
|
60 |
+"MIR" or have an average_TxLen_1 lower than 300 are excluded from the |
|
61 |
+highlighted points. |
|
62 |
+NOTE: keep in mind that expressions are evaluated inside a \code{dplyr::filter} |
|
63 |
+context. |
|
64 |
+ |
|
65 |
+Similarly, lambdas are passed to the filtering function but only operate |
|
66 |
+on the column containing the gene symbol.\if{html}{\out{<div class="sourceCode r">}}\preformatted{lambda <- ~ stringr::str_starts(.x, "MIR") |
|
67 |
+}\if{html}{\out{</div>}} |
|
68 |
+} |
|
69 |
+} |
|
36 | 70 |
\examples{ |
37 | 71 |
data("integration_matrices", package = "ISAnalytics") |
38 | 72 |
data("association_file", package = "ISAnalytics") |
... | ... |
@@ -113,13 +113,13 @@ test_that(".check_same_info reports only projects of interest", { |
113 | 113 |
# Tests .identify_independent_samples |
114 | 114 |
#------------------------------------------------------------------------------# |
115 | 115 |
test_that(".identify_independent_samples splits joined_df", { |
116 |
- joined <- minimal_test_coll_meta[minimal_test_coll, |
|
117 |
- on = "CompleteAmplificationID" |
|
118 |
- ] |
|
119 |
- joined <- joined[, mget(c( |
|
116 |
+ joined <- minimal_test_coll %>% |
|
117 |
+ dplyr::left_join(minimal_test_coll_meta, |
|
118 |
+ by = "CompleteAmplificationID") %>% |
|
119 |
+ dplyr::select(dplyr::all_of(c( |
|
120 | 120 |
colnames(minimal_test_coll), "SequencingDate", |
121 | 121 |
"ReplicateNumber", "ProjectID", "SubjectID" |
122 |
- ))] |
|
122 |
+ ))) |
|
123 | 123 |
|
124 | 124 |
split <- .identify_independent_samples(joined, |
125 | 125 |
indep_sample_id = c( |
... | ... |
@@ -795,7 +795,8 @@ test_that(".collisions_check_input_af works as expected", { |
795 | 795 |
expect_error( |
796 | 796 |
{ |
797 | 797 |
checks <- .collisions_check_input_af(minimal_test_coll_meta %>% |
798 |
- dplyr::mutate(SequencingDate = as.character(.data$SequencingDate)), |
|
798 |
+ dplyr::mutate(SequencingDate = as.character( |
|
799 |
+ .data$SequencingDate)), |
|
799 | 800 |
date_col = "SequencingDate", |
800 | 801 |
independent_sample_id = c("SubjectID") |
801 | 802 |
) |