Also fixed some minor issues in docs and functions. Added site with pkgdown.
8 | 11 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,46 @@ |
1 |
+on: |
|
2 |
+ push: |
|
3 |
+ branches: master |
|
4 |
+ |
|
5 |
+name: pkgdown |
|
6 |
+ |
|
7 |
+jobs: |
|
8 |
+ pkgdown: |
|
9 |
+ runs-on: macOS-latest |
|
10 |
+ env: |
|
11 |
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} |
|
12 |
+ steps: |
|
13 |
+ - uses: actions/checkout@v2 |
|
14 |
+ |
|
15 |
+ - uses: r-lib/actions/setup-r@master |
|
16 |
+ |
|
17 |
+ - uses: r-lib/actions/setup-pandoc@master |
|
18 |
+ |
|
19 |
+ - name: Query dependencies |
|
20 |
+ run: | |
|
21 |
+ install.packages('remotes') |
|
22 |
+ saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) |
|
23 |
+ writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") |
|
24 |
+ shell: Rscript {0} |
|
25 |
+ |
|
26 |
+ - name: Cache R packages |
|
27 |
+ uses: actions/cache@v2 |
|
28 |
+ with: |
|
29 |
+ path: ${{ env.R_LIBS_USER }} |
|
30 |
+ key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} |
|
31 |
+ restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- |
|
32 |
+ |
|
33 |
+ - name: Install dependencies |
|
34 |
+ run: | |
|
35 |
+ remotes::install_deps(dependencies = TRUE) |
|
36 |
+ install.packages("pkgdown") |
|
37 |
+ shell: Rscript {0} |
|
38 |
+ |
|
39 |
+ - name: Install package |
|
40 |
+ run: R CMD INSTALL . |
|
41 |
+ |
|
42 |
+ - name: Deploy package |
|
43 |
+ run: | |
|
44 |
+ git config --local user.email "actions@github.com" |
|
45 |
+ git config --local user.name "GitHub Actions" |
|
46 |
+ Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: ISAnalytics |
2 | 2 |
Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies |
3 |
-Version: 0.99.10 |
|
3 |
+Version: 0.99.11 |
|
4 | 4 |
Date: 2020-07-03 |
5 | 5 |
Authors@R: c( |
6 | 6 |
person(given = "Andrea", |
... | ... |
@@ -53,7 +53,8 @@ Suggests: |
53 | 53 |
sessioninfo, |
54 | 54 |
rmarkdown, |
55 | 55 |
roxygen2, |
56 |
- psych |
|
56 |
+ psych, |
|
57 |
+ vegan |
|
57 | 58 |
VignetteBuilder: knitr |
58 | 59 |
RdMacros: |
59 | 60 |
lifecycle |
... | ... |
@@ -23,6 +23,8 @@ export(realign_after_collisions) |
23 | 23 |
export(reduced_AF_columns) |
24 | 24 |
export(remove_collisions) |
25 | 25 |
export(separate_quant_matrices) |
26 |
+export(threshold_filter) |
|
27 |
+export(top_integrations) |
|
26 | 28 |
export(unzip_file_system) |
27 | 29 |
import(BiocParallel) |
28 | 30 |
import(dplyr) |
... | ... |
@@ -40,6 +42,7 @@ importFrom(dplyr,arrange) |
40 | 42 |
importFrom(dplyr,bind_cols) |
41 | 43 |
importFrom(dplyr,bind_rows) |
42 | 44 |
importFrom(dplyr,contains) |
45 |
+importFrom(dplyr,desc) |
|
43 | 46 |
importFrom(dplyr,distinct) |
44 | 47 |
importFrom(dplyr,filter) |
45 | 48 |
importFrom(dplyr,full_join) |
... | ... |
@@ -47,13 +50,12 @@ importFrom(dplyr,group_by) |
47 | 50 |
importFrom(dplyr,group_split) |
48 | 51 |
importFrom(dplyr,inner_join) |
49 | 52 |
importFrom(dplyr,intersect) |
50 |
-importFrom(dplyr,left_join) |
|
51 | 53 |
importFrom(dplyr,mutate) |
52 | 54 |
importFrom(dplyr,rename) |
53 | 55 |
importFrom(dplyr,select) |
54 | 56 |
importFrom(dplyr,semi_join) |
55 | 57 |
importFrom(dplyr,slice) |
56 |
-importFrom(dplyr,summarise) |
|
58 |
+importFrom(dplyr,slice_head) |
|
57 | 59 |
importFrom(forcats,as_factor) |
58 | 60 |
importFrom(forcats,fct_inseq) |
59 | 61 |
importFrom(fs,as_fs_path) |
... | ... |
@@ -82,6 +84,7 @@ importFrom(purrr,map_dbl) |
82 | 84 |
importFrom(purrr,map_dfr) |
83 | 85 |
importFrom(purrr,map_lgl) |
84 | 86 |
importFrom(purrr,pmap) |
87 |
+importFrom(purrr,pmap_chr) |
|
85 | 88 |
importFrom(purrr,pmap_df) |
86 | 89 |
importFrom(purrr,pmap_dfr) |
87 | 90 |
importFrom(purrr,reduce) |
... | ... |
@@ -100,10 +103,12 @@ importFrom(rlang,eval_tidy) |
100 | 103 |
importFrom(rlang,expr) |
101 | 104 |
importFrom(rlang,is_function) |
102 | 105 |
importFrom(rlang,is_installed) |
106 |
+importFrom(rlang,parse_expr) |
|
103 | 107 |
importFrom(stringr,str_detect) |
104 | 108 |
importFrom(stringr,str_extract) |
105 | 109 |
importFrom(stringr,str_extract_all) |
106 | 110 |
importFrom(stringr,str_pad) |
111 |
+importFrom(stringr,str_replace) |
|
107 | 112 |
importFrom(stringr,str_replace_all) |
108 | 113 |
importFrom(stringr,str_split) |
109 | 114 |
importFrom(tibble,add_column) |
... | ... |
@@ -1,5 +1,17 @@ |
1 | 1 |
# ISAnalytics News |
2 | 2 |
|
3 |
+## Changes in version 0.99.11 (2020-09-21) |
|
4 |
+ |
|
5 |
+#### NEW FEATURES |
|
6 |
+ |
|
7 |
+* Added analysis functions `threshold_filter`, `top_integrations` |
|
8 |
+* Added support for multi-quantification matrices in `compute_abundance` |
|
9 |
+ |
|
10 |
+#### MINOR FIXES |
|
11 |
+ |
|
12 |
+* Fixed bug in `comparison_matrix` that ignored custom column names |
|
13 |
+* Fixed issues in some documentation pages |
|
14 |
+ |
|
3 | 15 |
## Changes in version 0.99.10 (2020-09-14) |
4 | 16 |
|
5 | 17 |
ISanalytics is officially on bioconductor! |
... | ... |
@@ -40,6 +40,8 @@ |
40 | 40 |
#' * \code{\link{compute_abundance}} |
41 | 41 |
#' * \code{\link{comparison_matrix}} |
42 | 42 |
#' * \code{\link{separate_quant_matrices}} |
43 |
+#' * \code{\link{threshold_filter}} |
|
44 |
+#' * \code{\link{top_integrations}} |
|
43 | 45 |
#' * Utility functions: |
44 | 46 |
#' * \code{\link{generate_blank_association_file}} |
45 | 47 |
#' * \code{\link{generate_Vispa2_launch_AF}} |
... | ... |
@@ -7,14 +7,24 @@ |
7 | 7 |
#' Abundance is obtained for every row by calculating the ratio |
8 | 8 |
#' between the single value and the total value for the sample. |
9 | 9 |
#' |
10 |
-#' @param x An integration matrix |
|
10 |
+#' @details Abundance will be computed upon the user selected columns |
|
11 |
+#' in the `columns` parameter. For each column a corresponding |
|
12 |
+#' relative abundance column (and optionally a percentage abundance |
|
13 |
+#' column) will be produced. |
|
14 |
+#' |
|
15 |
+#' @param x An integration matrix - aka a data frame that includes |
|
16 |
+#' the `mandatory_IS_vars()` as columns |
|
17 |
+#' @param columns A character vector of column names to process, |
|
18 |
+#' must be numeric or integer columns |
|
11 | 19 |
#' @param percentage Add abundance as percentage? |
20 |
+#' |
|
12 | 21 |
#' @family Analysis functions |
13 | 22 |
#' |
14 | 23 |
#' @importFrom magrittr `%>%` |
15 | 24 |
#' @importFrom tibble is_tibble |
16 |
-#' @importFrom dplyr group_by summarise left_join mutate select |
|
17 |
-#' @importFrom rlang .data |
|
25 |
+#' @import dplyr |
|
26 |
+#' @importFrom rlang .data eval_tidy parse_expr |
|
27 |
+#' @importFrom stringr str_replace |
|
18 | 28 |
#' @return An integration matrix |
19 | 29 |
#' @export |
20 | 30 |
#' |
... | ... |
@@ -24,33 +34,63 @@ |
24 | 34 |
#' ) |
25 | 35 |
#' matrix <- import_single_Vispa2Matrix(path) |
26 | 36 |
#' abundance <- compute_abundance(matrix) |
27 |
-compute_abundance <- function(x, percentage = TRUE) { |
|
37 |
+compute_abundance <- function(x, columns = "Value", percentage = TRUE) { |
|
28 | 38 |
## Check parameters |
29 | 39 |
stopifnot(tibble::is_tibble(x)) |
40 |
+ stopifnot(is.character(columns)) |
|
30 | 41 |
if (.check_mandatory_vars(x) == FALSE) { |
31 | 42 |
stop(.non_ISM_error()) |
32 | 43 |
} |
33 | 44 |
if (.check_complAmpID(x) == FALSE) { |
34 | 45 |
stop(.missing_complAmpID_error()) |
35 | 46 |
} |
36 |
- if (.check_value_col(x) == FALSE) { |
|
37 |
- stop(.missing_value_col_error()) |
|
38 |
- } |
|
39 | 47 |
stopifnot(is.logical(percentage) & length(percentage) == 1) |
48 |
+ if (!all(columns %in% colnames(x))) { |
|
49 |
+ stop(.missing_user_cols_error()) |
|
50 |
+ } |
|
51 |
+ purrr::walk(columns, function(col) { |
|
52 |
+ expr <- rlang::expr(`$`(x, !!col)) |
|
53 |
+ if (!is.numeric(rlang::eval_tidy(expr)) & |
|
54 |
+ !is.numeric(rlang::eval_tidy(expr))) { |
|
55 |
+ stop(.non_num_user_cols_error()) |
|
56 |
+ } |
|
57 |
+ }) |
|
40 | 58 |
## Computation |
41 | 59 |
totals <- x %>% |
42 | 60 |
dplyr::group_by(.data$CompleteAmplificationID) %>% |
43 | 61 |
dplyr::summarise( |
44 |
- QuantificationSum = sum(.data$Value) |
|
62 |
+ dplyr::across(dplyr::all_of(columns), |
|
63 |
+ sum, |
|
64 |
+ .names = "{.col}_sum" |
|
65 |
+ ), |
|
66 |
+ .groups = "drop" |
|
45 | 67 |
) |
46 | 68 |
abundance_df <- x %>% |
47 | 69 |
dplyr::left_join(totals, by = "CompleteAmplificationID") %>% |
48 |
- dplyr::mutate(AbsAbundance = .data$Value / .data$QuantificationSum) %>% |
|
49 |
- dplyr::select(-c(.data$QuantificationSum)) |
|
70 |
+ dplyr::mutate(dplyr::across(dplyr::all_of(columns), |
|
71 |
+ list(ab = ~ .x / rlang::eval_tidy( |
|
72 |
+ rlang::parse_expr( |
|
73 |
+ paste( |
|
74 |
+ dplyr::cur_column(), |
|
75 |
+ "sum", |
|
76 |
+ sep = "_" |
|
77 |
+ ) |
|
78 |
+ ) |
|
79 |
+ )), |
|
80 |
+ .names = "{.col}_RelAbundance" |
|
81 |
+ )) %>% |
|
82 |
+ dplyr::select(-c(dplyr::all_of(paste(columns, "sum", sep = "_")))) %>% |
|
83 |
+ dplyr::distinct() |
|
50 | 84 |
if (percentage == TRUE) { |
51 | 85 |
abundance_df <- abundance_df %>% |
52 | 86 |
dplyr::mutate( |
53 |
- PercAbundance = .data$AbsAbundance * 100 |
|
87 |
+ dplyr::across(dplyr::contains("RelAbundance"), ~ .x * 100, |
|
88 |
+ .names = "{.col}_PercAbundance" |
|
89 |
+ ) |
|
90 |
+ ) %>% |
|
91 |
+ dplyr::rename_with( |
|
92 |
+ ~ stringr::str_replace(.x, "_RelAbundance", ""), |
|
93 |
+ dplyr::contains("PercAbundance") |
|
54 | 94 |
) |
55 | 95 |
} |
56 | 96 |
abundance_df |
... | ... |
@@ -125,8 +165,14 @@ comparison_matrix <- function(x, |
125 | 165 |
stopifnot(is.character(barcodeCount) & length(barcodeCount) == 1) |
126 | 166 |
stopifnot(is.character(cellCount) & length(cellCount) == 1) |
127 | 167 |
stopifnot(is.character(ShsCount) & length(ShsCount) == 1) |
168 |
+ param_names <- c( |
|
169 |
+ fragmentEstimate = fragmentEstimate, |
|
170 |
+ seqCount = seqCount, barcodeCount = barcodeCount, |
|
171 |
+ cellCount = cellCount, ShsCount = ShsCount |
|
172 |
+ ) |
|
128 | 173 |
x <- purrr::map2(x, names(x), function(matrix, quant_type) { |
129 |
- matrix %>% dplyr::rename({{ quant_type }} := .data$Value) |
|
174 |
+ quant_name <- param_names[names(param_names) %in% quant_type] |
|
175 |
+ matrix %>% dplyr::rename(!!quant_name := .data$Value) |
|
130 | 176 |
}) |
131 | 177 |
result <- purrr::reduce(x, function(matrix1, matrix2) { |
132 | 178 |
commoncols <- dplyr::intersect(colnames(matrix1), colnames(matrix2)) |
... | ... |
@@ -207,8 +253,10 @@ separate_quant_matrices <- function(x, fragmentEstimate = "fragmentEstimate", |
207 | 253 |
stopifnot(is.character(cellCount) & length(cellCount) == 1) |
208 | 254 |
stopifnot(is.character(ShsCount) & length(ShsCount) == 1) |
209 | 255 |
param_col <- c( |
210 |
- fragmentEstimate, seqCount, barcodeCount, cellCount, |
|
211 |
- ShsCount |
|
256 |
+ fragmentEstimate = fragmentEstimate, |
|
257 |
+ seqCount = seqCount, barcodeCount = barcodeCount, |
|
258 |
+ cellCount = cellCount, |
|
259 |
+ ShsCount = ShsCount |
|
212 | 260 |
) |
213 | 261 |
to_copy <- if (any(!num_cols %in% param_col)) { |
214 | 262 |
if (all(!num_cols %in% param_col)) { |
... | ... |
@@ -216,7 +264,7 @@ separate_quant_matrices <- function(x, fragmentEstimate = "fragmentEstimate", |
216 | 264 |
} |
217 | 265 |
num_cols[!num_cols %in% param_col] |
218 | 266 |
} |
219 |
- num_cols <- num_cols[num_cols %in% param_col] |
|
267 |
+ num_cols <- param_col[param_col %in% num_cols] |
|
220 | 268 |
annot <- if (.is_annotated(x)) { |
221 | 269 |
annotation_IS_vars() |
222 | 270 |
} else { |
... | ... |
@@ -230,6 +278,247 @@ separate_quant_matrices <- function(x, fragmentEstimate = "fragmentEstimate", |
230 | 278 |
mandatory_IS_vars(), annot, "CompleteAmplificationID", |
231 | 279 |
to_copy, quant |
232 | 280 |
)] %>% dplyr::rename(Value = quant) |
233 |
- }) %>% purrr::set_names(num_cols) |
|
281 |
+ }) %>% purrr::set_names(names(num_cols)) |
|
234 | 282 |
separated |
235 | 283 |
} |
284 |
+ |
|
285 |
+ |
|
286 |
+#' Filter data frames with custom predicates |
|
287 |
+#' |
|
288 |
+#' @description |
|
289 |
+#' \lifecycle{experimental} |
|
290 |
+#' Filter a single data frame or a list of data frames with custom |
|
291 |
+#' predicates assembled from the function parameters. |
|
292 |
+#' |
|
293 |
+#' @details |
|
294 |
+#' ## A single data frame as input |
|
295 |
+#' |
|
296 |
+#' If the user chooses to operate on a single data frame, the other parameters |
|
297 |
+#' should only be vectors: numeric vector for `threshold` and character |
|
298 |
+#' vectors for both `cols_to_compare` and `comparators`. |
|
299 |
+#' A filtering condition is obtained by combining element by element |
|
300 |
+#' `cols_to_compare` + `comparators` + `threshold` (similarly to the |
|
301 |
+#' `paste` function). For example: |
|
302 |
+#' |
|
303 |
+#' \verb{ |
|
304 |
+#' threshold = c(20, 35, 50) |
|
305 |
+#' cols_to_compare = c("a", "b", "c") |
|
306 |
+#' comparators = "<" |
|
307 |
+#' } |
|
308 |
+#' |
|
309 |
+#' given these vectors, the input data frame |
|
310 |
+#' will be filtered by checking which values in column "a" are less |
|
311 |
+#' than 20 **AND** which values in column "b" are less than 35 **AND** |
|
312 |
+#' which values in column "c" are less than 50. |
|
313 |
+#' Things the user should keep in mind are: |
|
314 |
+#' * The vectors of length 1 are going to be recycled if one or |
|
315 |
+#' more parameters are longer (in the example, the `comparators` value) |
|
316 |
+#' * If vectors are not of length 1 they must have the same length |
|
317 |
+#' * Columns to compare, of course, need to be included in the |
|
318 |
+#' input data frame and need to be numeric/integer |
|
319 |
+#' * The filtering will perform a logical "AND" on all the conditions, |
|
320 |
+#' only rows that satisfy ALL the conditions are preserved |
|
321 |
+#' |
|
322 |
+#' ## A list of data frames as input |
|
323 |
+#' |
|
324 |
+#' The input for the function may also be a list of data frames, |
|
325 |
+#' either named or unnamed. |
|
326 |
+#' |
|
327 |
+#' ### Unnamed list |
|
328 |
+#' If the input is a simple unnamed list, the other parameters should |
|
329 |
+#' be simple vectors (as for data frames). All the predicates will |
|
330 |
+#' simply be applied to every data frame in the list: this is useful |
|
331 |
+#' if it's desirable to filter for the same conditions different data frames |
|
332 |
+#' that have the same structure but different data. |
|
333 |
+#' |
|
334 |
+#' ### Named list |
|
335 |
+#' It is also possible to filter different data frames with different |
|
336 |
+#' sets of conditions. Besides having the possibility of defining the |
|
337 |
+#' other parameters as simple vector, which has the same results as |
|
338 |
+#' operating on an unnamed list, the user can define the parameters as |
|
339 |
+#' named lists containing vectors. For example: |
|
340 |
+#' |
|
341 |
+#' ```{r} |
|
342 |
+#' |
|
343 |
+#' example_df <- tibble::tibble(a = c(20, 30, 40), |
|
344 |
+#' b = c(40, 50, 60), |
|
345 |
+#' c = c("a", "b", "c"), |
|
346 |
+#' d = c(3L, 4L, 5L)) |
|
347 |
+#' example_list <- list(first = example_df, |
|
348 |
+#' second = example_df, |
|
349 |
+#' third = example_df) |
|
350 |
+#' print(example_list) |
|
351 |
+#' |
|
352 |
+#' filtered <- threshold_filter(example_list, |
|
353 |
+#' threshold = list(first = c(20, 60), |
|
354 |
+#' third = c(25)), |
|
355 |
+#' cols_to_compare = list(first = c("a", "b"), |
|
356 |
+#' third = c("a")), |
|
357 |
+#' comparators = list(first = c(">", "<"), |
|
358 |
+#' third = c(">="))) |
|
359 |
+#' print(filtered) |
|
360 |
+#' |
|
361 |
+#' ``` |
|
362 |
+#' The above signature will roughly be translated as: |
|
363 |
+#' * Filter the element "first" in the list by checking that values in |
|
364 |
+#' column "a" are bigger than 20 AND values in column "b" are less than |
|
365 |
+#' 60 |
|
366 |
+#' * Don't apply any filter to the element "second" (returns the |
|
367 |
+#' data frame as is) |
|
368 |
+#' * Filter the element "third" by checking that values in column "a" |
|
369 |
+#' are equal or bigger than 25. |
|
370 |
+#' |
|
371 |
+#' It is also possible to use some parameters as vectors and some as |
|
372 |
+#' lists: vectors will be recycled for every element filtered. |
|
373 |
+#' |
|
374 |
+#' ```r |
|
375 |
+#' filtered <- threshold_filter(example_list, |
|
376 |
+#' threshold = list(first = c(20, 60), |
|
377 |
+#' third = c(25, 65)), |
|
378 |
+#' cols_to_compare = c("a", "b"), |
|
379 |
+#' comparators = list(first = c(">", "<"), |
|
380 |
+#' third = c(">=", "<="))) |
|
381 |
+#' ``` |
|
382 |
+#' In this example, different threshold and comparators will be applied |
|
383 |
+#' to the same columns in all data frames. |
|
384 |
+#' |
|
385 |
+#' Things the user should keep in mind are: |
|
386 |
+#' * Names for the list parameters must be the same names in the |
|
387 |
+#' input list |
|
388 |
+#' * Only elements explicited in list parameters as names will |
|
389 |
+#' be filtered |
|
390 |
+#' * Lengths of both vectors and lists must be consistent |
|
391 |
+#' |
|
392 |
+#' @param x A data frame or a list of data frames |
|
393 |
+#' @param threshold A numeric/integer vector or a named list of |
|
394 |
+#' numeric/integer vectors |
|
395 |
+#' @param cols_to_compare A character vector or a named list of |
|
396 |
+#' character vectors |
|
397 |
+#' @param comparators A character vector or a named list of |
|
398 |
+#' character vectors. Must be one of the allowed values between |
|
399 |
+#' `c("<", ">", "==", "!=", ">=", "<=")` |
|
400 |
+#' |
|
401 |
+#' @family Analysis functions |
|
402 |
+#' |
|
403 |
+#' @return A data frame or a list of data frames |
|
404 |
+#' @export |
|
405 |
+#' |
|
406 |
+#' @examples |
|
407 |
+#' example_df <- tibble::tibble( |
|
408 |
+#' a = c(20, 30, 40), |
|
409 |
+#' b = c(40, 50, 60), |
|
410 |
+#' c = c("a", "b", "c"), |
|
411 |
+#' d = c(3L, 4L, 5L) |
|
412 |
+#' ) |
|
413 |
+#' example_list <- list( |
|
414 |
+#' first = example_df, |
|
415 |
+#' second = example_df, |
|
416 |
+#' third = example_df |
|
417 |
+#' ) |
|
418 |
+#' |
|
419 |
+#' filtered <- threshold_filter(example_list, |
|
420 |
+#' threshold = list( |
|
421 |
+#' first = c(20, 60), |
|
422 |
+#' third = c(25) |
|
423 |
+#' ), |
|
424 |
+#' cols_to_compare = list( |
|
425 |
+#' first = c("a", "b"), |
|
426 |
+#' third = c("a") |
|
427 |
+#' ), |
|
428 |
+#' comparators = list( |
|
429 |
+#' first = c(">", "<"), |
|
430 |
+#' third = c(">=") |
|
431 |
+#' ) |
|
432 |
+#' ) |
|
433 |
+threshold_filter <- function(x, |
|
434 |
+ threshold, |
|
435 |
+ cols_to_compare = "Value", |
|
436 |
+ comparators = ">") { |
|
437 |
+ stopifnot(is.list(x)) |
|
438 |
+ ### ---- If x is a data frame ---- ### |
|
439 |
+ if (is.data.frame(x)) { |
|
440 |
+ return(.tf_data_frame(x, threshold, cols_to_compare, comparators)) |
|
441 |
+ } |
|
442 |
+ ### ---- If x is a list ---- ### |
|
443 |
+ return(.tf_list(x, threshold, cols_to_compare, comparators)) |
|
444 |
+} |
|
445 |
+ |
|
446 |
+ |
|
447 |
+#' Sorts and keeps the top n integration sites in a data frame. |
|
448 |
+#' |
|
449 |
+#' \lifecycle{experimental} |
|
450 |
+#' The input data frame will be sorted by the highest values in |
|
451 |
+#' the columns specified and the top n rows will be returned as output. |
|
452 |
+#' The user can choose to keep additional columns in the output |
|
453 |
+#' by passing a vector of column names or passing 2 "shortcuts": |
|
454 |
+#' * `keep` = "everything" keeps all columns in the original data frame |
|
455 |
+#' * `keep` = "nothing" only keeps the mandatory columns |
|
456 |
+#' (`mandatory_IS_vars()`) plus the columns in the `columns` parameter. |
|
457 |
+#' |
|
458 |
+#' @param x An integration matrix (data frame containing |
|
459 |
+#' `mandatory_IS_vars()`) |
|
460 |
+#' @param n How many rows should the output have? Must be numeric |
|
461 |
+#' or integer and greater than 0 |
|
462 |
+#' @param columns Columns to use for the sorting. If more than a column |
|
463 |
+#' is supplied primary ordering is done on the first column, |
|
464 |
+#' secondary ordering on all other columns |
|
465 |
+#' @param keep Names of the columns to keep besides `mandatory_IS_vars()` |
|
466 |
+#' and `columns` |
|
467 |
+#' |
|
468 |
+#' @family Analysis functions |
|
469 |
+#' |
|
470 |
+#' @importFrom dplyr arrange across all_of desc slice_head select |
|
471 |
+#' @importFrom magrittr `%>%` |
|
472 |
+#' |
|
473 |
+#' @return A data frame with `n` rows |
|
474 |
+#' @export |
|
475 |
+#' |
|
476 |
+#' @examples |
|
477 |
+#' smpl <- tibble::tibble( |
|
478 |
+#' chr = c("1", "2", "3", "4", "5", "6"), |
|
479 |
+#' integration_locus = c(14536, 14544, 14512, 14236, 14522, 14566), |
|
480 |
+#' strand = c("+", "+", "-", "+", "-", "+"), |
|
481 |
+#' CompleteAmplificationID = c("ID1", "ID2", "ID1", "ID1", "ID3", "ID2"), |
|
482 |
+#' Value = c(3, 10, 40, 2, 15, 150), |
|
483 |
+#' Value2 = c(456, 87, 87, 9, 64, 96), |
|
484 |
+#' Value3 = c("a", "b", "c", "d", "e", "f") |
|
485 |
+#' ) |
|
486 |
+#' top <- top_integrations(smpl, |
|
487 |
+#' n = 3, |
|
488 |
+#' columns = c("Value", "Value2"), |
|
489 |
+#' keep = "nothing" |
|
490 |
+#' ) |
|
491 |
+top_integrations <- function(x, n = 50, columns = "RelAbundance", |
|
492 |
+ keep = "everything") { |
|
493 |
+ stopifnot(is.data.frame(x)) |
|
494 |
+ stopifnot(is.numeric(n) & length(n) == 1 & n > 0) |
|
495 |
+ stopifnot(is.character(keep)) |
|
496 |
+ stopifnot(is.character(columns)) |
|
497 |
+ if (!.check_mandatory_vars(x)) { |
|
498 |
+ stop(.non_ISM_error()) |
|
499 |
+ } |
|
500 |
+ if (!all(columns %in% colnames(x))) { |
|
501 |
+ stop(.missing_user_cols_error()) |
|
502 |
+ } |
|
503 |
+ if (!(all(keep == "everything") || all(keep == "nothing"))) { |
|
504 |
+ if (any(!keep %in% colnames(x))) { |
|
505 |
+ stop(.missing_user_cols_error()) |
|
506 |
+ } |
|
507 |
+ } |
|
508 |
+ essential_cols <- c(mandatory_IS_vars(), columns) |
|
509 |
+ to_keep <- if (all(keep == "everything")) { |
|
510 |
+ colnames(x)[!colnames(x) %in% essential_cols] |
|
511 |
+ } else if (all(keep == "nothing")) { |
|
512 |
+ character(0) |
|
513 |
+ } else { |
|
514 |
+ keep[!keep %in% essential_cols] |
|
515 |
+ } |
|
516 |
+ result <- x %>% |
|
517 |
+ dplyr::arrange(dplyr::across( |
|
518 |
+ dplyr::all_of(columns), |
|
519 |
+ dplyr::desc |
|
520 |
+ )) %>% |
|
521 |
+ dplyr::slice_head(n = n) %>% |
|
522 |
+ dplyr::select(dplyr::all_of(c(essential_cols, to_keep))) |
|
523 |
+ return(result) |
|
524 |
+} |
... | ... |
@@ -3,113 +3,17 @@ |
3 | 3 |
#------------------------------------------------------------------------------# |
4 | 4 |
## All functions in this file are NOT exported, to be used internally only. |
5 | 5 |
|
6 |
-### Convenience functions for errors and warnings ### |
|
7 |
-#' @keywords internal |
|
8 |
-.malformed_ISmatrix_warning <- function() { |
|
9 |
- paste(c( |
|
10 |
- "Mandatory integration matrix variables, ", mandatory_IS_vars(), |
|
11 |
- ", were not detected" |
|
12 |
- ), collapse = " ") |
|
13 |
-} |
|
14 |
-#' @keywords internal |
|
15 |
-.non_ISM_error <- function() { |
|
16 |
- paste( |
|
17 |
- "One or more elements in x are not integration matrices.", |
|
18 |
- "Aborting." |
|
19 |
- ) |
|
20 |
-} |
|
21 |
- |
|
22 |
-#' @keywords internal |
|
23 |
-.missing_value_col_error <- function() { |
|
24 |
- paste( |
|
25 |
- "The `Value` column is missing or it contains non-numeric data.", |
|
26 |
- "The column is needed for this operation.", |
|
27 |
- "Aborting." |
|
28 |
- ) |
|
29 |
-} |
|
30 |
- |
|
31 |
-#' @keywords internal |
|
32 |
-.missing_complAmpID_error <- function() { |
|
33 |
- paste( |
|
34 |
- "The `CompleteAmplificationID` column is missing.", |
|
35 |
- "The column is needed for this operation.", |
|
36 |
- "Aborting." |
|
37 |
- ) |
|
38 |
-} |
|
39 |
- |
|
40 |
-#' @keywords internal |
|
41 |
-.quant_types_error <- function() { |
|
42 |
- paste( |
|
43 |
- "The list names must be quantification types", |
|
44 |
- ", see quantification_types() for reference" |
|
45 |
- ) |
|
46 |
-} |
|
47 |
- |
|
48 |
-#' @keywords internal |
|
49 |
-.missing_num_cols_error <- function() { |
|
50 |
- paste("No numeric columns found") |
|
51 |
-} |
|
52 |
- |
|
53 |
-#' @keywords internal |
|
54 |
-.non_quant_cols_msg <- function(x) { |
|
55 |
- paste(c( |
|
56 |
- "Found numeric columns that are not quantification values:", |
|
57 |
- "these columns will be copied in all resulting matrices.", |
|
58 |
- "Found: ", x |
|
59 |
- ), collapse = "\n") |
|
60 |
-} |
|
61 |
- |
|
62 |
-#' @keywords internal |
|
63 |
-.non_quant_cols_error <- function() { |
|
64 |
- paste( |
|
65 |
- "No quantification values columns found. Did you set the function", |
|
66 |
- "parameters correctly?" |
|
67 |
- ) |
|
68 |
-} |
|
69 |
- |
|
70 |
-#' @keywords internal |
|
71 |
-.max_val_col_warning <- function(x) { |
|
72 |
- paste0("Column for max value `", x, "` not found in numeric columns.") |
|
73 |
-} |
|
74 |
- |
|
75 |
-#' @keywords internal |
|
76 |
-.using_val_col_warning <- function(x) { |
|
77 |
- paste(c( |
|
78 |
- .max_val_col_warning(x), |
|
79 |
- "Using `Value` column as reference instead." |
|
80 |
- ), collapse = "\n") |
|
81 |
-} |
|
82 |
- |
|
83 |
-#' @keywords internal |
|
84 |
-.max_val_stop_error <- function(x) { |
|
85 |
- paste(c( |
|
86 |
- .max_val_col_warning(x), |
|
87 |
- "Did you set `max_value_column` parameter correctly?" |
|
88 |
- ), |
|
89 |
- collapse = "\n" |
|
90 |
- ) |
|
91 |
-} |
|
92 |
- |
|
93 |
-#' @keywords internal |
|
94 |
-.nas_introduced_msg <- function() { |
|
95 |
- paste("NAs were introduced while producing the data frame.", |
|
96 |
- "The possible cause for this is:", |
|
97 |
- "some quantification matrices were not imported for all pools", |
|
98 |
- sep = "\n" |
|
99 |
- ) |
|
100 |
-} |
|
101 |
- |
|
102 | 6 |
#### ---- Internals for checks on integration matrices----#### |
103 | 7 |
|
104 |
-#' Internal helper function for checking mandatory vars presence in x. |
|
105 |
-#' |
|
106 |
-#' Checks if the elements of `mandatory_IS_vars` are present as column names |
|
107 |
-#' in the data frame. |
|
108 |
-#' @param x A data.frame object (or any extending class) |
|
109 |
-#' @keywords internal |
|
110 |
-#' |
|
111 |
-#' @return FALSE if all or some elements are not found in the data frame, TRUE |
|
112 |
-#' otherwise |
|
8 |
+# Internal helper function for checking mandatory vars presence in x. |
|
9 |
+# |
|
10 |
+# Checks if the elements of `mandatory_IS_vars` are present as column names |
|
11 |
+# in the data frame. |
|
12 |
+# @param x A data.frame object (or any extending class) |
|
13 |
+# @keywords internal |
|
14 |
+# |
|
15 |
+# @return FALSE if all or some elements are not found in the data frame, TRUE |
|
16 |
+# otherwise |
|
113 | 17 |
.check_mandatory_vars <- function(x) { |
114 | 18 |
stopifnot(is.data.frame(x)) |
115 | 19 |
res <- if (all(mandatory_IS_vars() %in% colnames(x))) { |
... | ... |
@@ -120,14 +24,14 @@ |
120 | 24 |
return(res) |
121 | 25 |
} |
122 | 26 |
|
123 |
-#' Internal helper function for checking `Value` column presence in x. |
|
124 |
-#' |
|
125 |
-#' Checks if the column `Value` is present in the data frame and also |
|
126 |
-#' checks if the column is numeric or integer. |
|
127 |
-#' @param x A data.frame object (or any extending class) |
|
128 |
-#' @keywords internal |
|
129 |
-#' |
|
130 |
-#' @return FALSE if not found or contains non-numeric data, TRUE otherwise |
|
27 |
+# Internal helper function for checking `Value` column presence in x. |
|
28 |
+# |
|
29 |
+# Checks if the column `Value` is present in the data frame and also |
|
30 |
+# checks if the column is numeric or integer. |
|
31 |
+# @param x A data.frame object (or any extending class) |
|
32 |
+# @keywords internal |
|
33 |
+# |
|
34 |
+# @return FALSE if not found or contains non-numeric data, TRUE otherwise |
|
131 | 35 |
.check_value_col <- function(x) { |
132 | 36 |
stopifnot(is.data.frame(x)) |
133 | 37 |
present <- if ("Value" %in% colnames(x)) { |
... | ... |
@@ -142,15 +46,15 @@ |
142 | 46 |
} |
143 | 47 |
} |
144 | 48 |
|
145 |
-#' Internal helper function for checking `CompleteAmplifcationID` |
|
146 |
-#' column presence in x. |
|
147 |
-#' |
|
148 |
-#' Checks if the column `CompleteAmplifcationID` is present in the data frame. |
|
149 |
-#' |
|
150 |
-#' @param x A data.frame object (or any extending class) |
|
151 |
-#' @keywords internal |
|
152 |
-#' |
|
153 |
-#' @return FALSE if not found, TRUE otherwise |
|
49 |
+# Internal helper function for checking `CompleteAmplifcationID` |
|
50 |
+# column presence in x. |
|
51 |
+# |
|
52 |
+# Checks if the column `CompleteAmplifcationID` is present in the data frame. |
|
53 |
+# |
|
54 |
+# @param x A data.frame object (or any extending class) |
|
55 |
+# @keywords internal |
|
56 |
+# |
|
57 |
+# @return FALSE if not found, TRUE otherwise |
|
154 | 58 |
.check_complAmpID <- function(x) { |
155 | 59 |
stopifnot(is.data.frame(x)) |
156 | 60 |
if ("CompleteAmplificationID" %in% colnames(x)) { |
... | ... |
@@ -160,17 +64,17 @@ |
160 | 64 |
} |
161 | 65 |
} |
162 | 66 |
|
163 |
-#' Finds experimental columns in an integration matrix. |
|
164 |
-#' |
|
165 |
-#' The function checks if there are numeric columns which are not |
|
166 |
-#' standard integration matrix columns, if there are returns their names. |
|
167 |
-#' |
|
168 |
-#' @param x A data.frame |
|
67 |
+# Finds experimental columns in an integration matrix. |
|
68 |
+# |
|
69 |
+# The function checks if there are numeric columns which are not |
|
70 |
+# standard integration matrix columns, if there are returns their names. |
|
71 |
+# |
|
72 |
+# @param x A data.frame |
|
169 | 73 |
#' @importFrom purrr map_lgl |
170 | 74 |
#' @importFrom rlang expr eval_tidy |
171 | 75 |
#' @keywords internal |
172 |
-#' |
|
173 |
-#' @return A character vector of column names |
|
76 |
+# |
|
77 |
+# @return A character vector of column names |
|
174 | 78 |
.find_exp_cols <- function(x) { |
175 | 79 |
stopifnot(is.data.frame(x)) |
176 | 80 |
default_cols <- c( |
... | ... |
@@ -186,12 +90,12 @@ |
186 | 90 |
remaining[remaining_numeric] |
187 | 91 |
} |
188 | 92 |
|
189 |
-#' Checks if the integration matrix is annotated or not. |
|
190 |
-#' |
|
191 |
-#' @param x A data.frame |
|
192 |
-#' @keywords internal |
|
193 |
-#' |
|
194 |
-#' @return A logical value |
|
93 |
+# Checks if the integration matrix is annotated or not. |
|
94 |
+# |
|
95 |
+# @param x A data.frame |
|
96 |
+# @keywords internal |
|
97 |
+# |
|
98 |
+# @return A logical value |
|
195 | 99 |
.is_annotated <- function(x) { |
196 | 100 |
stopifnot(is.data.frame(x)) |
197 | 101 |
if (all(annotation_IS_vars() %in% colnames(x))) { |
... | ... |
@@ -205,17 +109,17 @@ |
205 | 109 |
|
206 | 110 |
#---- USED IN : import_single_Vispa2Matrix ---- |
207 | 111 |
|
208 |
-#' Internal function to convert a messy matrix to a tidy data frame |
|
209 |
-#' |
|
210 |
-#' @description Uses the suite of functions provided by the |
|
211 |
-#' tidyverse to produce a more dense and ordered structure. |
|
212 |
-#' This function is not exported and should be called in other importing |
|
213 |
-#' functions. |
|
214 |
-#' |
|
215 |
-#' @param df Messy tibble to convert to tidy |
|
216 |
-#' @keywords internal |
|
217 |
-#' |
|
218 |
-#' @return a tidy tibble |
|
112 |
+# Internal function to convert a messy matrix to a tidy data frame |
|
113 |
+# |
|
114 |
+# @description Uses the suite of functions provided by the |
|
115 |
+# tidyverse to produce a more dense and ordered structure. |
|
116 |
+# This function is not exported and should be called in other importing |
|
117 |
+# functions. |
|
118 |
+# |
|
119 |
+# @param df Messy tibble to convert to tidy |
|
120 |
+# @keywords internal |
|
121 |
+# |
|
122 |
+# @return a tidy tibble |
|
219 | 123 |
#' @importFrom rlang .data |
220 | 124 |
#' @importFrom tidyr pivot_longer |
221 | 125 |
#' @importFrom dplyr arrange all_of filter |
... | ... |
@@ -237,16 +141,16 @@ |
237 | 141 |
isadf_tidy |
238 | 142 |
} |
239 | 143 |
|
240 |
-#' Internal function to auto-detect the type of IS based on the headers. |
|
241 |
-#' |
|
242 |
-#' @param df the data frame to inspect |
|
243 |
-#' @keywords internal |
|
244 |
-#' |
|
245 |
-#' @return one value among: |
|
246 |
-#' * "OLD" : for old-style matrices that had only one column holding |
|
247 |
-#' all genomic coordinates |
|
248 |
-#' * "NEW" : for the classic Vispa2 annotated/not annotated matrices |
|
249 |
-#' * "MALFORMED" : in any other case |
|
144 |
+# Internal function to auto-detect the type of IS based on the headers. |
|
145 |
+# |
|
146 |
+# @param df the data frame to inspect |
|
147 |
+# @keywords internal |
|
148 |
+# |
|
149 |
+# @return one value among: |
|
150 |
+# * "OLD" : for old-style matrices that had only one column holding |
|
151 |
+# all genomic coordinates |
|
152 |
+# * "NEW" : for the classic Vispa2 annotated/not annotated matrices |
|
153 |
+# * "MALFORMED" : in any other case |
|
250 | 154 |
.auto_detect_type <- function(df) { |
251 | 155 |
if ("IS_genomicID" %in% colnames(df) & |
252 | 156 |
all(!mandatory_IS_vars() %in% colnames(df))) { |
... | ... |
@@ -260,12 +164,12 @@ |
260 | 164 |
|
261 | 165 |
#---- USED IN : import_association_file ---- |
262 | 166 |
|
263 |
-#' Checks if the association file has the right format (standard headers). |
|
264 |
-#' |
|
265 |
-#' @param df The imported association file |
|
266 |
-#' @keywords internal |
|
267 |
-#' |
|
268 |
-#' @return TRUE if the check passes, FALSE otherwise |
|
167 |
+# Checks if the association file has the right format (standard headers). |
|
168 |
+# |
|
169 |
+# @param df The imported association file |
|
170 |
+# @keywords internal |
|
171 |
+# |
|
172 |
+# @return TRUE if the check passes, FALSE otherwise |
|
269 | 173 |
.check_af_correctness <- function(df) { |
270 | 174 |
if (all(association_file_columns() %in% colnames(df))) { |
271 | 175 |
return(TRUE) |
... | ... |
@@ -275,19 +179,19 @@ |
275 | 179 |
} |
276 | 180 |
|
277 | 181 |
|
278 |
-#' Reads association file and checks if it's correct or not. |
|
279 |
-#' |
|
280 |
-#' @param path The path to the association file on disk |
|
281 |
-#' @param padding The padding for TimePoint field |
|
282 |
-#' @param date_format The date format of date columns |
|
283 |
-#' @keywords internal |
|
182 |
+# Reads association file and checks if it's correct or not. |
|
183 |
+# |
|
184 |
+# @param path The path to the association file on disk |
|
185 |
+# @param padding The padding for TimePoint field |
|
186 |
+# @param date_format The date format of date columns |
|
187 |
+# @keywords internal |
|
284 | 188 |
#' @importFrom tibble as_tibble |
285 | 189 |
#' @importFrom dplyr mutate across contains |
286 | 190 |
#' @importFrom rlang .data |
287 | 191 |
#' @importFrom stringr str_pad |
288 | 192 |
#' @import lubridate |
289 |
-#' |
|
290 |
-#' @return A tibble containing the association file. |
|
193 |
+# |
|
194 |
+# @return A tibble containing the association file. |
|
291 | 195 |
.read_and_correctness_af <- function(path, padding, date_format) { |
292 | 196 |
stopifnot(is.character(path)) |
293 | 197 |
stopifnot(file.exists(path)) |
... | ... |
@@ -320,21 +224,21 @@ |
320 | 224 |
as_file |
321 | 225 |
} |
322 | 226 |
|
323 |
-#' Internal function to check alignment between association file and file |
|
324 |
-#' system starting from the root. The alignment is checked at folder level. |
|
325 |
-#' |
|
326 |
-#' @param df The imported association file (data.frame or tibble) |
|
327 |
-#' @param root_folder Path to the root folder |
|
328 |
-#' @keywords internal |
|
227 |
+# Internal function to check alignment between association file and file |
|
228 |
+# system starting from the root. The alignment is checked at folder level. |
|
229 |
+# |
|
230 |
+# @param df The imported association file (data.frame or tibble) |
|
231 |
+# @param root_folder Path to the root folder |
|
232 |
+# @keywords internal |
|
329 | 233 |
#' @importFrom dplyr select distinct mutate bind_rows |
330 | 234 |
#' @importFrom fs dir_ls |
331 | 235 |
#' @importFrom purrr pmap is_empty reduce map_dbl |
332 | 236 |
#' @importFrom stringr str_replace_all str_extract_all |
333 | 237 |
#' @importFrom tibble tibble |
334 |
-#' |
|
335 |
-#' @return A data frame containing, for each ProjectID and |
|
336 |
-#' concatenatePoolIDSeqRun the |
|
337 |
-#' corresponding path on disk if found, NA otherwise. |
|
238 |
+# |
|
239 |
+# @return A data frame containing, for each ProjectID and |
|
240 |
+# concatenatePoolIDSeqRun the |
|
241 |
+# corresponding path on disk if found, NA otherwise. |
|
338 | 242 |
.check_file_system_alignment <- function(df, root_folder) { |
339 | 243 |
temp_df <- df %>% |
340 | 244 |
dplyr::select( |
... | ... |
@@ -387,19 +291,19 @@ |
387 | 291 |
checker_df |
388 | 292 |
} |
389 | 293 |
|
390 |
-#' Updates the association file after the alignment check. |
|
391 |
-#' |
|
392 |
-#' The function checks if there are missing folders and updates the association |
|
393 |
-#' file by adding a column `Path` where the absolute path on disk for the |
|
394 |
-#' project and pool is found, if no path is found NA is inserted instead. |
|
395 |
-#' |
|
396 |
-#' @param as_file The tibble representing the read association_file |
|
397 |
-#' @param checks The tibble representing the results |
|
398 |
-#' of `.check_file_system_alignment` |
|
399 |
-#' @param root The root folder |
|
400 |
-#' @keywords internal |
|
401 |
-#' |
|
402 |
-#' @return An updated association file with absolute paths |
|
294 |
+# Updates the association file after the alignment check. |
|
295 |
+# |
|
296 |
+# The function checks if there are missing folders and updates the association |
|
297 |
+# file by adding a column `Path` where the absolute path on disk for the |
|
298 |
+# project and pool is found, if no path is found NA is inserted instead. |
|
299 |
+# |
|
300 |
+# @param as_file The tibble representing the read association_file |
|
301 |
+# @param checks The tibble representing the results |
|
302 |
+# of `.check_file_system_alignment` |
|
303 |
+# @param root The root folder |
|
304 |
+# @keywords internal |
|
305 |
+# |
|
306 |
+# @return An updated association file with absolute paths |
|
403 | 307 |
.update_af_after_alignment <- function(as_file, checks, root) { |
404 | 308 |
# If some some folders are missing a warning is thrown |
405 | 309 |
not_found <- checks %>% dplyr::filter(.data$Found == FALSE) |
... | ... |
@@ -427,20 +331,20 @@ |
427 | 331 |
|
428 | 332 |
#---- USED IN : import_parallel_Vispa2Matrices_interactive ---- |
429 | 333 |
|
430 |
-#' Helper function to be used when importing matrices in parallel. |
|
431 |
-#' |
|
432 |
-#' @param association_file Either the path to the association file or the tibble |
|
433 |
-#' representing the imported association file (done via |
|
434 |
-#' `import_association_file`) |
|
435 |
-#' @param root If `association_file` is the path to the file, root is a single |
|
436 |
-#' string holding the path to the root folder, otherwise root is `NULL` |
|
437 |
-#' @param padding The padding for TimePoint field |
|
438 |
-#' @param format The date format of date columns |
|
439 |
-#' @keywords internal |
|
440 |
-#' |
|
441 |
-#' @return A list of two elements: the first element is an updated version of |
|
442 |
-#' the association file with NAs removed, the second element is a widget showing |
|
443 |
-#' results of alignment checks. |
|
334 |
+# Helper function to be used when importing matrices in parallel. |
|
335 |
+# |
|
336 |
+# @param association_file Either the path to the association file or the tibble |
|
337 |
+# representing the imported association file (done via |
|
338 |
+# `import_association_file`) |
|
339 |
+# @param root If `association_file` is the path to the file, root is a single |
|
340 |
+# string holding the path to the root folder, otherwise root is `NULL` |
|
341 |
+# @param padding The padding for TimePoint field |
|
342 |
+# @param format The date format of date columns |
|
343 |
+# @keywords internal |
|
344 |
+# |
|
345 |
+# @return A list of two elements: the first element is an updated version of |
|
346 |
+# the association file with NAs removed, the second element is a widget showing |
|
347 |
+# results of alignment checks. |
|
444 | 348 |
.manage_association_file <- function(association_file, root, padding, format) { |
445 | 349 |
# Manage association file |
446 | 350 |
if (is.character(association_file)) { |
... | ... |
@@ -476,18 +380,18 @@ |
476 | 380 |
} |
477 | 381 |
} |
478 | 382 |
|
479 |
-#' Allows the user to choose interactively the projects |
|
480 |
-#' to consider for import. |
|
481 |
-#' |
|
482 |
-#' @param association_file The tibble representing the imported association file |
|
483 |
-#' @keywords internal |
|
383 |
+# Allows the user to choose interactively the projects |
|
384 |
+# to consider for import. |
|
385 |
+# |
|
386 |
+# @param association_file The tibble representing the imported association file |
|
387 |
+# @keywords internal |
|
484 | 388 |
#' @importFrom dplyr distinct select filter |
485 | 389 |
#' @importFrom rlang .data |
486 | 390 |
#' @importFrom stringr str_split |
487 | 391 |
#' @importFrom purrr map_dbl |
488 |
-#' |
|
489 |
-#' @return A modified version of the association file where only selected |
|
490 |
-#' projects are present |
|
392 |
+# |
|
393 |
+# @return A modified version of the association file where only selected |
|
394 |
+# projects are present |
|
491 | 395 |
.interactive_select_projects_import <- function(association_file) { |
492 | 396 |
repeat { |
493 | 397 |
cat("Which projects would you like to import?\n") |
... | ... |
@@ -621,12 +525,12 @@ |
621 | 525 |
} |
622 | 526 |
} |
623 | 527 |
|
624 |
-#' Simple internal helper function to handle user input for selection |
|
625 |
-#' of number of pools. |
|
626 |
-#' @keywords internal |
|
627 |
-#' |
|
628 |
-#' @return Numeric representing user selection (1 for all pools, 2 for |
|
629 |
-#' only some pools, 0 to exit) |
|
528 |
+# Simple internal helper function to handle user input for selection |
|
529 |
+# of number of pools. |
|
530 |
+# @keywords internal |
|
531 |
+# |
|
532 |
+# @return Numeric representing user selection (1 for all pools, 2 for |
|
533 |
+# only some pools, 0 to exit) |
|
630 | 534 |
.pool_number_IN <- function() { |
631 | 535 |
cat("Which pools for each project would you like to import?\n") |
632 | 536 |
cat("[1] ALL", "[2] ONLY SOME", "[0] QUIT", sep = "\n") |
... | ... |
@@ -652,15 +556,15 @@ |
652 | 556 |
n_pools_to_import |
653 | 557 |
} |
654 | 558 |
|
655 |
-#' Simple helper interal function to handle user input for actual |
|
656 |
-#' pool choices. |
|
657 |
-#' |
|
658 |
-#' @param indexes A vector of integer indexes available |
|
659 |
-#' @keywords internal |
|
559 |
+# Simple helper interal function to handle user input for actual |
|
560 |
+# pool choices. |
|
561 |
+# |
|
562 |
+# @param indexes A vector of integer indexes available |
|
563 |
+# @keywords internal |
|
660 | 564 |
#' @importFrom stringr str_split |
661 | 565 |
#' @importFrom purrr map_dbl |
662 |
-#' |
|
663 |
-#' @return The user selection as a numeric vector |
|
566 |
+# |
|
567 |
+# @return The user selection as a numeric vector |
|
664 | 568 |
.pool_choices_IN <- function(indexes) { |
665 | 569 |
repeat { |
666 | 570 |
cat("\nYour choice: ") |
... | ... |
@@ -700,20 +604,20 @@ |
700 | 604 |
to_imp |
701 | 605 |
} |
702 | 606 |
|
703 |
-#' Allows the user to choose interactively |
|
704 |
-#' the pools to consider for import. |
|
705 |
-#' |
|
706 |
-#' @param association_file The tibble representing the imported |
|
707 |
-#' association file |
|
607 |
+# Allows the user to choose interactively |
|
608 |
+# the pools to consider for import. |
|
609 |
+# |
|
610 |
+# @param association_file The tibble representing the imported |
|
611 |
+# association file |
|
708 | 612 |
#' @importFrom dplyr select distinct group_by bind_rows inner_join |
709 | 613 |
#' @importFrom tibble tibble |
710 | 614 |
#' @importFrom tidyr nest |
711 | 615 |
#' @importFrom purrr map pmap reduce |
712 | 616 |
#' @importFrom rlang .data |
713 |
-#' @keywords internal |
|
714 |
-#' |
|
715 |
-#' @return A modified version of the association file where only selected |
|
716 |
-#' pools for each project are present |
|
617 |
+# @keywords internal |
|
618 |
+# |
|
619 |
+# @return A modified version of the association file where only selected |
|
620 |
+# pools for each project are present |
|
717 | 621 |
.interactive_select_pools_import <- function(association_file) { |
718 | 622 |
repeat { |
719 | 623 |
n_pools_to_import <- .pool_number_IN() |
... | ... |
@@ -817,13 +721,13 @@ |
817 | 721 |
} |
818 | 722 |
} |
819 | 723 |
|
820 |
-#' Updates a files_found tibble with Files_count and Anomalies column. |
|
821 |
-#' |
|
822 |
-#' @param lups A files_found tibble obtained in a lookup function. Must contain |
|
823 |
-#' the Files column (nested table quantification type and files) |
|
824 |
-#' @keywords internal |
|
825 |
-#' |
|
826 |
-#' @return Updated files_found with Anomalies and Files_count columns |
|
724 |
+# Updates a files_found tibble with Files_count and Anomalies column. |
|
725 |
+# |
|
726 |
+# @param lups A files_found tibble obtained in a lookup function. Must contain |
|
727 |
+# the Files column (nested table quantification type and files) |
|
728 |
+# @keywords internal |
|
729 |
+# |
|
730 |
+# @return Updated files_found with Anomalies and Files_count columns |
|
827 | 731 |
.trace_anomalies <- function(lups) { |
828 | 732 |
files_count <- purrr::pmap(lups, function(...) { |
829 | 733 |
temp <- tibble::tibble(...) |
... | ... |
@@ -849,23 +753,23 @@ |
849 | 753 |
lups |
850 | 754 |
} |
851 | 755 |
|
852 |
-#' Looks up matrices to import given the association file and the |
|
853 |
-#' root of the file system. |
|
854 |
-#' |
|
855 |
-#' @param association_file Tibble representing the association file |
|
856 |
-#' @param quantification_type The type of quantification matrices to look for |
|
857 |
-#' (one in `quantification_types()`) |
|
858 |
-#' @param matrix_type The matrix_type to lookup (one between "annotated" or |
|
859 |
-#' "not_annotated") |
|
860 |
-#' @keywords internal |
|
756 |
+# Looks up matrices to import given the association file and the |
|
757 |
+# root of the file system. |
|
758 |
+# |
|
759 |
+# @param association_file Tibble representing the association file |
|
760 |
+# @param quantification_type The type of quantification matrices to look for |
|
761 |
+# (one in `quantification_types()`) |
|
762 |
+# @param matrix_type The matrix_type to lookup (one between "annotated" or |
|
763 |
+# "not_annotated") |
|
764 |
+# @keywords internal |
|
861 | 765 |
#' @importFrom tibble tibble |
862 | 766 |
#' @importFrom fs dir_ls as_fs_path |
863 | 767 |
#' @importFrom purrr map reduce map_dbl |
864 | 768 |
#' @importFrom stringr str_detect |
865 | 769 |
#' @importFrom dplyr select distinct bind_rows mutate |
866 | 770 |
#' @importFrom tidyr nest |
867 |
-#' |
|
868 |
-#' @return A tibble containing all found files, including duplicates and missing |
|
771 |
+# |
|
772 |
+# @return A tibble containing all found files, including duplicates and missing |
|
869 | 773 |
.lookup_matrices <- function(association_file, |
870 | 774 |
quantification_type, |
871 | 775 |
matrix_type) { |
... | ... |
@@ -954,20 +858,20 @@ |
954 | 858 |
} |
955 | 859 |
|
956 | 860 |
|
957 |
-#' Simple function to manage user input for duplicate file choices for each |
|
958 |
-#' quantification type in a single project/pool pair (use internally in |
|
959 |
-#' `.manage_anomalies_interactive`). |
|
960 |
-#' |
|
961 |
-#' @param q_types Vector of characters containing the unique quantification |
|
962 |
-#' types that are detected as duplicates |
|
963 |
-#' @param dupl The tibble containing quantification types and path to the files |
|
964 |
-#' found for a single project/pool pair |
|
965 |
-#' @keywords internal |
|
861 |
+# Simple function to manage user input for duplicate file choices for each |
|
862 |
+# quantification type in a single project/pool pair (use internally in |
|
863 |
+# `.manage_anomalies_interactive`). |
|
864 |
+# |
|
865 |
+# @param q_types Vector of characters containing the unique quantification |
|
866 |
+# types that are detected as duplicates |
|
867 |
+# @param dupl The tibble containing quantification types and path to the files |
|
868 |
+# found for a single project/pool pair |
|
869 |
+# @keywords internal |
|
966 | 870 |
#' @importFrom dplyr filter slice bind_rows |
967 | 871 |
#' @importFrom purrr map reduce |
968 | 872 |
#' @importFrom rlang .data |
969 |
-#' |
|
970 |
-#' @return An updated tibble containing files chosen for each type |
|
873 |
+# |
|
874 |
+# @return An updated tibble containing files chosen for each type |
|
971 | 875 |
.choose_duplicates_files_interactive <- function(q_types, dupl) { |
972 | 876 |
non_dup <- dupl %>% dplyr::filter(!.data$Quantification_type %in% q_types) |
973 | 877 |
type_choice <- purrr::map(q_types, function(x) { |
... | ... |
@@ -1004,25 +908,25 @@ |
1004 | 908 |
type_choice <- purrr::reduce(type_choice, dplyr::bind_rows) |
1005 | 909 |
} |
1006 | 910 |
|
1007 |
-#' Manages anomalies for files found. |
|
1008 |
-#' |
|
1009 |
-#' The function manages anomalies found for files after scanning appropriate |
|
1010 |
-#' folders by: |
|
1011 |
-#' * Removing files not found (files for which Files_count$Found == 0 and Path |
|
1012 |
-#' is NA) and printing a message to notify user |
|
1013 |
-#' * Removing duplicates by asking the user which files to keep for each |
|
1014 |
-#' quantification type, pool and project |
|
1015 |
-#' |
|
1016 |
-#' @param files_found The tibble obtained via calling `.lookup_matrices` |
|
1017 |
-#' @keywords internal |
|
911 |
+# Manages anomalies for files found. |
|
912 |
+# |
|
913 |
+# The function manages anomalies found for files after scanning appropriate |
|
914 |
+# folders by: |
|
915 |
+# * Removing files not found (files for which Files_count$Found == 0 and Path |
|
916 |
+# is NA) and printing a message to notify user |
|
917 |
+# * Removing duplicates by asking the user which files to keep for each |
|
918 |
+# quantification type, pool and project |
|
919 |
+# |
|
920 |
+# @param files_found The tibble obtained via calling `.lookup_matrices` |
|
921 |
+# @keywords internal |
|
1018 | 922 |
#' @importFrom dplyr filter select rename bind_rows arrange |
1019 | 923 |
#' @importFrom tidyr unnest |
1020 | 924 |
#' @importFrom tibble as_tibble tibble |
1021 | 925 |
#' @importFrom purrr pmap flatten reduce |
1022 |
-#' |
|
1023 |
-#' @return A tibble containing for each project, pool and quantification type |
|
1024 |
-#' the files chosen (ideally 1 for each quantification type if found, no more |
|
1025 |
-#' than 1 per type) |
|
926 |
+# |
|
927 |
+# @return A tibble containing for each project, pool and quantification type |
|
928 |
+# the files chosen (ideally 1 for each quantification type if found, no more |
|
929 |
+# than 1 per type) |
|
1026 | 930 |
.manage_anomalies_interactive <- function(files_found) { |
1027 | 931 |
# Isolate anomalies in files found |
1028 | 932 |
anomalies <- files_found %>% dplyr::filter(.data$Anomalies == TRUE) |
... | ... |
@@ -1131,20 +1035,20 @@ |
1131 | 1035 |
} |
1132 | 1036 |
} |
1133 | 1037 |
|
1134 |
-#' Internal function for parallel import of a single quantification |
|
1135 |
-#' type files. |
|
1136 |
-#' |
|
1137 |
-#' @param q_type The quantification type (single string) |
|
1138 |
-#' @param files Files_found table were absolute paths of chosen files |
|
1139 |
-#' are stored |
|
1140 |
-#' @param workers Number of parallel workers |
|
1141 |
-#' @keywords internal |
|
1038 |
+# Internal function for parallel import of a single quantification |
|
1039 |
+# type files. |
|
1040 |
+# |
|
1041 |
+# @param q_type The quantification type (single string) |
|
1042 |
+# @param files Files_found table were absolute paths of chosen files |
|
1043 |
+# are stored |
|
1044 |
+# @param workers Number of parallel workers |
|
1045 |
+# @keywords internal |
|
1142 | 1046 |
#' @importFrom dplyr filter mutate bind_rows distinct |
1143 | 1047 |
#' @importFrom BiocParallel SnowParam MulticoreParam bptry bplapply bpstop bpok |
1144 | 1048 |
#' @importFrom purrr is_empty reduce |
1145 |
-#' |
|
1146 |
-#' @return A single tibble with all data from matrices of same quantification |
|
1147 |
-#' type in tidy format |
|
1049 |
+# |
|
1050 |
+# @return A single tibble with all data from matrices of same quantification |
|
1051 |
+# type in tidy format |
|
1148 | 1052 |
.import_type <- function(q_type, files, workers) { |
1149 | 1053 |
files <- files %>% dplyr::filter(.data$Quantification_type == q_type) |
1150 | 1054 |
# Register backend according to platform |
... | ... |
@@ -1181,16 +1085,16 @@ |
1181 | 1085 |
list(matrices, imported_files) |
1182 | 1086 |
} |
1183 | 1087 |
|
1184 |
-#' Internal function for importing all files for each quantification type. |
|
1185 |
-#' |
|
1186 |
-#' @param files_to_import The tibble containing the files to import |
|
1187 |
-#' @param workers Number of parallel workers |
|
1188 |
-#' @keywords internal |
|
1088 |
+# Internal function for importing all files for each quantification type. |
|
1089 |
+# |
|
1090 |
+# @param files_to_import The tibble containing the files to import |
|
1091 |
+# @param workers Number of parallel workers |
|
1092 |
+# @keywords internal |
|
1189 | 1093 |
#' @importFrom dplyr select distinct bind_rows |
1190 | 1094 |
#' @importFrom purrr map set_names reduce flatten |
1191 | 1095 |
#' @importFrom tibble as_tibble |
1192 |
-#' |
|
1193 |
-#' @return A named list of tibbles |
|
1096 |
+# |
|
1097 |
+# @return A named list of tibbles |
|
1194 | 1098 |
.parallel_import_merge <- function(files_to_import, workers) { |
1195 | 1099 |
# Find the actual quantification types included |
1196 | 1100 |
q_types <- files_to_import %>% |
... | ... |
@@ -1218,23 +1122,23 @@ |
1218 | 1122 |
|
1219 | 1123 |
#---- USED IN : import_parallel_Vispa2Matrices_auto ---- |
1220 | 1124 |
|
1221 |
-#' Internal function to match user defined patterns on a vector |
|
1222 |
-#' of file names. |
|
1223 |
-#' |
|
1224 |
-#' For each pattern specified by the user, the function tries to find a match |
|
1225 |
-#' on all the file names and combines the results as a tibble in which column |
|
1226 |
-#' names are the patterns and the values are TRUE if the pattern matched on the |
|
1227 |
-#' element at that index or FALSE otherwise. |
|
1228 |
-#' |
|
1229 |
-#' @param filenames A character vector of file names |
|
1230 |
-#' @param patterns A character vector of patterns to be matched |
|
1231 |
-#' @keywords internal |
|
1125 |
+# Internal function to match user defined patterns on a vector |
|
1126 |
+# of file names. |
|
1127 |
+# |
|
1128 |
+# For each pattern specified by the user, the function tries to find a match |
|
1129 |
+# on all the file names and combines the results as a tibble in which column |
|
1130 |
+# names are the patterns and the values are TRUE if the pattern matched on the |
|
1131 |
+# element at that index or FALSE otherwise. |
|
1132 |
+# |
|
1133 |
+# @param filenames A character vector of file names |
|
1134 |
+# @param patterns A character vector of patterns to be matched |
|
1135 |
+# @keywords internal |
|
1232 | 1136 |
#' @importFrom tibble as_tibble_col |
1233 | 1137 |
#' @importFrom stringr str_detect |
1234 | 1138 |
#' @importFrom purrr map reduce |
1235 | 1139 |
#' @importFrom dplyr bind_cols |
1236 |
-#' |
|
1237 |
-#' @return A tibble |
|
1140 |
+# |
|
1141 |
+# @return A tibble |
|
1238 | 1142 |
.pattern_matching <- function(filenames, patterns) { |
1239 | 1143 |
p_matches <- purrr::map(patterns, function(x) { |
1240 | 1144 |
mtc <- stringr::str_detect(filenames, x) |
... | ... |
@@ -1246,42 +1150,42 @@ |
1246 | 1150 |
p_matches |
1247 | 1151 |
} |
1248 | 1152 |
|
1249 |
-#' Helper function for checking if any of the elements of the list is true. |
|
1250 |
-#' |
|
1251 |
-#' @param ... A list of logical values |
|
1252 |
-#' @keywords internal |
|
1253 |
-#' |
|
1254 |
-#' @return TRUE if any of the parameters is true |
|
1153 |
+# Helper function for checking if any of the elements of the list is true. |
|
1154 |
+# |
|
1155 |
+# @param ... A list of logical values |
|
1156 |
+# @keywords internal |
|
1157 |
+# |
|
1158 |
+# @return TRUE if any of the parameters is true |
|
1255 | 1159 |
.any_match <- function(...) { |
1256 | 1160 |
l <- unlist(list(...)) |
1257 | 1161 |
any(l) |
1258 | 1162 |
} |
1259 | 1163 |
|
1260 |
-#' Helper function for checking if all of the elements of the list is true. |
|
1261 |
-#' |
|
1262 |
-#' @param ... A list of logical values |
|
1263 |
-#' @keywords internal |
|
1264 |
-#' |
|
1265 |
-#' @return TRUE if all of the parameters is true |
|
1164 |
+# Helper function for checking if all of the elements of the list is true. |
|
1165 |
+# |
|
1166 |
+# @param ... A list of logical values |
|
1167 |
+# @keywords internal |
|
1168 |
+# |
|
1169 |
+# @return TRUE if all of the parameters is true |
|
1266 | 1170 |
.all_match <- function(...) { |
1267 | 1171 |
l <- unlist(list(...)) |
1268 | 1172 |
all(l) |
1269 | 1173 |
} |
1270 | 1174 |
|
1271 |
-#' Updates files_found tibble according to pattern and matching options. |
|
1272 |
-#' |
|
1273 |
-#' @param files_nested The tibble containing Quantification_type and Files_found |
|
1274 |
-#' columns relative to a single project/pool pair |
|
1275 |
-#' @param p_matches The tibble representing the pattern matchings resulting from |
|
1276 |
-#' `pattern_matching` |
|
1277 |
-#' @param matching_opt The matching option |
|
1278 |
-#' @keywords internal |
|
1175 |
+# Updates files_found tibble according to pattern and matching options. |
|
1176 |
+# |
|
1177 |
+# @param files_nested The tibble containing Quantification_type and Files_found |
|
1178 |
+# columns relative to a single project/pool pair |
|
1179 |
+# @param p_matches The tibble representing the pattern matchings resulting from |
|
1180 |
+# `pattern_matching` |
|
1181 |
+# @param matching_opt The matching option |
|
1182 |
+# @keywords internal |
|
1279 | 1183 |
#' @import dplyr |
1280 | 1184 |
#' @importFrom purrr map reduce |
1281 | 1185 |
#' @importFrom rlang .data |
1282 | 1186 |
#' @importFrom fs as_fs_path |
1283 |
-#' |
|
1284 |
-#' @return An updated files_found tibble according to the matching option |
|
1187 |
+# |
|
1188 |
+# @return An updated files_found tibble according to the matching option |
|
1285 | 1189 |
.update_as_option <- function(files_nested, p_matches, matching_opt) { |
1286 | 1190 |
patterns <- colnames(p_matches) |
1287 | 1191 |
# Bind columns of Files nested tbl with pattern matches results |
... | ... |
@@ -1372,21 +1276,21 @@ |
1372 | 1276 |
to_keep |
1373 | 1277 |
} |
1374 | 1278 |
|
1375 |
-#' Looks up matrices to import given the association file and the |
|
1376 |
-#' root of the file system. |
|
1377 |
-#' |
|
1378 |
-#' @inheritParams .lookup_matrices |
|
1379 |
-#' @param patterns A character vector of patterns to be matched |
|
1380 |
-#' @param matching_opt A single character representing the matching option (one |
|
1381 |
-#' of "ANY", "ALL" or "OPTIONAL") |
|
1382 |
-#' @keywords internal |
|
1279 |
+# Looks up matrices to import given the association file and the |
|
1280 |
+# root of the file system. |
|
1281 |
+# |
|
1282 |
+# @inheritParams .lookup_matrices |
|
1283 |
+# @param patterns A character vector of patterns to be matched |
|
1284 |
+# @param matching_opt A single character representing the matching option (one |
|
1285 |
+# of "ANY", "ALL" or "OPTIONAL") |
|
1286 |
+# @keywords internal |
|
1383 | 1287 |
#' @importFrom purrr pmap flatten map |
1384 | 1288 |
#' @importFrom tibble as_tibble |
1385 | 1289 |
#' @importFrom stringr str_split |
1386 | 1290 |
#' @importFrom dplyr mutate select |
1387 | 1291 |
#' @importFrom utils tail |
1388 |
-#' |
|
1389 |
-#' @return A tibble containing all found files, including duplicates and missing |
|
1292 |
+# |
|
1293 |
+# @return A tibble containing all found files, including duplicates and missing |
|
1390 | 1294 |
.lookup_matrices_auto <- function(association_file, |
1391 | 1295 |
quantification_type, |
1392 | 1296 |
matrix_type, |
... | ... |
@@ -1419,24 +1323,24 @@ |
1419 | 1323 |
files_found |
1420 | 1324 |
} |
1421 | 1325 |
|
1422 |