1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,48 @@ |
1 |
+on: |
|
2 |
+ push: |
|
3 |
+ branches: |
|
4 |
+ - RELEASE_3_12 |
|
5 |
+ - master |
|
6 |
+ |
|
7 |
+name: pkgdown |
|
8 |
+ |
|
9 |
+jobs: |
|
10 |
+ pkgdown: |
|
11 |
+ runs-on: macOS-latest |
|
12 |
+ env: |
|
13 |
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} |
|
14 |
+ steps: |
|
15 |
+ - uses: actions/checkout@v2 |
|
16 |
+ |
|
17 |
+ - uses: r-lib/actions/setup-r@v1 |
|
18 |
+ |
|
19 |
+ - uses: r-lib/actions/setup-pandoc@v1 |
|
20 |
+ |
|
21 |
+ - name: Query dependencies |
|
22 |
+ run: | |
|
23 |
+ install.packages('remotes') |
|
24 |
+ saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) |
|
25 |
+ writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") |
|
26 |
+ shell: Rscript {0} |
|
27 |
+ |
|
28 |
+ - name: Cache R packages |
|
29 |
+ uses: actions/cache@v2 |
|
30 |
+ with: |
|
31 |
+ path: ${{ env.R_LIBS_USER }} |
|
32 |
+ key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} |
|
33 |
+ restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- |
|
34 |
+ |
|
35 |
+ - name: Install dependencies |
|
36 |
+ run: | |
|
37 |
+ remotes::install_deps(dependencies = TRUE) |
|
38 |
+ install.packages("pkgdown", type = "binary") |
|
39 |
+ shell: Rscript {0} |
|
40 |
+ |
|
41 |
+ - name: Install package |
|
42 |
+ run: R CMD INSTALL . |
|
43 |
+ |
|
44 |
+ - name: Deploy package |
|
45 |
+ run: | |
|
46 |
+ git config --local user.email "actions@github.com" |
|
47 |
+ git config --local user.name "GitHub Actions" |
|
48 |
+ Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: ISAnalytics |
2 | 2 |
Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies |
3 |
-Version: 1.1.9 |
|
3 |
+Version: 1.1.10 |
|
4 | 4 |
Date: 2020-07-03 |
5 | 5 |
Authors@R: c( |
6 | 6 |
person(given = "Andrea", |
... | ... |
@@ -45,7 +45,10 @@ Imports: |
45 | 45 |
stats, |
46 | 46 |
upsetjs, |
47 | 47 |
psych, |
48 |
- grDevices |
|
48 |
+ grDevices, |
|
49 |
+ data.table, |
|
50 |
+ readxl, |
|
51 |
+ tools |
|
49 | 52 |
Encoding: UTF-8 |
50 | 53 |
LazyData: false |
51 | 54 |
Roxygen: list(markdown = TRUE) |
... | ... |
@@ -59,7 +62,9 @@ Suggests: |
59 | 62 |
sessioninfo, |
60 | 63 |
rmarkdown, |
61 | 64 |
roxygen2, |
62 |
- vegan |
|
65 |
+ vegan, |
|
66 |
+ withr |
|
63 | 67 |
VignetteBuilder: knitr |
64 | 68 |
RdMacros: |
65 | 69 |
lifecycle |
70 |
+Config/testthat/edition: 3 |
... | ... |
@@ -37,7 +37,6 @@ import(BiocParallel) |
37 | 37 |
import(dplyr) |
38 | 38 |
import(ggplot2) |
39 | 39 |
import(lifecycle) |
40 |
-import(lubridate) |
|
41 | 40 |
import(upsetjs) |
42 | 41 |
importFrom(BiocParallel,MulticoreParam) |
43 | 42 |
importFrom(BiocParallel,SnowParam) |
... | ... |
@@ -45,13 +44,15 @@ importFrom(BiocParallel,bplapply) |
45 | 44 |
importFrom(BiocParallel,bpok) |
46 | 45 |
importFrom(BiocParallel,bpstop) |
47 | 46 |
importFrom(BiocParallel,bptry) |
47 |
+importFrom(data.table,fread) |
|
48 |
+importFrom(data.table,melt.data.table) |
|
49 |
+importFrom(data.table,rbindlist) |
|
50 |
+importFrom(data.table,setDT) |
|
48 | 51 |
importFrom(dplyr,across) |
49 | 52 |
importFrom(dplyr,all_of) |
50 | 53 |
importFrom(dplyr,arrange) |
51 | 54 |
importFrom(dplyr,bind_cols) |
52 | 55 |
importFrom(dplyr,bind_rows) |
53 |
-importFrom(dplyr,contains) |
|
54 |
-importFrom(dplyr,desc) |
|
55 | 56 |
importFrom(dplyr,distinct) |
56 | 57 |
importFrom(dplyr,filter) |
57 | 58 |
importFrom(dplyr,full_join) |
... | ... |
@@ -64,7 +65,6 @@ importFrom(dplyr,rename) |
64 | 65 |
importFrom(dplyr,select) |
65 | 66 |
importFrom(dplyr,semi_join) |
66 | 67 |
importFrom(dplyr,slice) |
67 |
-importFrom(dplyr,slice_head) |
|
68 | 68 |
importFrom(forcats,as_factor) |
69 | 69 |
importFrom(forcats,fct_inseq) |
70 | 70 |
importFrom(fs,as_fs_path) |
... | ... |
@@ -75,6 +75,7 @@ importFrom(fs,file_exists) |
75 | 75 |
importFrom(fs,is_dir) |
76 | 76 |
importFrom(fs,path) |
77 | 77 |
importFrom(fs,path_dir) |
78 |
+importFrom(fs,path_ext) |
|
78 | 79 |
importFrom(fs,path_wd) |
79 | 80 |
importFrom(ggrepel,geom_label_repel) |
80 | 81 |
importFrom(grDevices,colorRamp) |
... | ... |
@@ -88,6 +89,7 @@ importFrom(htmltools,h4) |
88 | 89 |
importFrom(htmltools,save_html) |
89 | 90 |
importFrom(htmltools,tagList) |
90 | 91 |
importFrom(htmltools,tags) |
92 |
+importFrom(lubridate,parse_date_time) |
|
91 | 93 |
importFrom(magrittr,`%>%`) |
92 | 94 |
importFrom(psych,describe) |
93 | 95 |
importFrom(purrr,flatten) |
... | ... |
@@ -98,6 +100,7 @@ importFrom(purrr,map) |
98 | 100 |
importFrom(purrr,map2) |
99 | 101 |
importFrom(purrr,map2_dfr) |
100 | 102 |
importFrom(purrr,map2_lgl) |
103 |
+importFrom(purrr,map_chr) |
|
101 | 104 |
importFrom(purrr,map_dbl) |
102 | 105 |
importFrom(purrr,map_dfr) |
103 | 106 |
importFrom(purrr,map_lgl) |
... | ... |
@@ -113,14 +116,23 @@ importFrom(reactable,colDef) |
113 | 116 |
importFrom(reactable,colFormat) |
114 | 117 |
importFrom(reactable,reactable) |
115 | 118 |
importFrom(reactable,reactableTheme) |
119 |
+importFrom(readr,cols) |
|
116 | 120 |
importFrom(readr,parse_factor) |
121 |
+importFrom(readr,problems) |
|
122 |
+importFrom(readr,read_delim) |
|
117 | 123 |
importFrom(readr,write_tsv) |
124 |
+importFrom(readxl,read_excel) |
|
118 | 125 |
importFrom(rlang,.data) |
119 | 126 |
importFrom(rlang,`:=`) |
127 |
+importFrom(rlang,abort) |
|
120 | 128 |
importFrom(rlang,arg_match) |
129 |
+importFrom(rlang,call2) |
|
130 |
+importFrom(rlang,dots_list) |
|
121 | 131 |
importFrom(rlang,env_bind) |
122 | 132 |
importFrom(rlang,eval_tidy) |
123 | 133 |
importFrom(rlang,expr) |
134 |
+importFrom(rlang,fn_fmls_names) |
|
135 |
+importFrom(rlang,inform) |
|
124 | 136 |
importFrom(rlang,parse_expr) |
125 | 137 |
importFrom(stats,median) |
126 | 138 |
importFrom(stats,na.omit) |
... | ... |
@@ -148,6 +160,7 @@ importFrom(tidyr,pivot_wider) |
148 | 160 |
importFrom(tidyr,separate) |
149 | 161 |
importFrom(tidyr,unite) |
150 | 162 |
importFrom(tidyr,unnest) |
163 |
+importFrom(tools,file_path_sans_ext) |
|
151 | 164 |
importFrom(utils,read.csv) |
152 | 165 |
importFrom(utils,read.delim) |
153 | 166 |
importFrom(utils,tail) |
... | ... |
@@ -1,5 +1,50 @@ |
1 | 1 |
\title{ISAnalytics News} |
2 | 2 |
|
3 |
+# ISAnalytics 1.1.10 (2021-04-08) |
|
4 |
+ |
|
5 |
+## FIXES |
|
6 |
+ |
|
7 |
+* Fixed issue in `compute_near_integrations`: when provided recalibration |
|
8 |
+map export path as a folder now the function works correctly and produces |
|
9 |
+an automatically generated file name |
|
10 |
+* Fixed issue in `aggregate_metadata`: now paths to folder that contains |
|
11 |
+Vispa2 stats is looked up correctly. Also, VISPA2 stats columns are aggregated |
|
12 |
+if found in the input data frame independently from the parameter |
|
13 |
+`import_stats`. |
|
14 |
+ |
|
15 |
+## IMPROVEMENTS |
|
16 |
+ |
|
17 |
+* `compute_abundance` can now take as input aggregated matrices and has |
|
18 |
+additional parameters to offer more flexibility to the user. Major updates |
|
19 |
+and improvements also on documentation and reproducible examples. |
|
20 |
+* Major improvements in function `import_single_Vispa2Matrix`: import is |
|
21 |
+now preferentially carried out using `data.table::fread` greatly speeding up |
|
22 |
+the process - where not possible `readr::read_delim` is used instead |
|
23 |
+* Major improvements in function `import_association_file`: greatly improved |
|
24 |
+parsing precision (each column has a dedicated type), import report now |
|
25 |
+signals parsing problems and their location and signals also |
|
26 |
+problems in parsing dates. |
|
27 |
+Report also includes potential problems in column names and signals missing |
|
28 |
+data in important columns. Added also the possibility to give various file |
|
29 |
+formats in input including `*.xls(x)` formats. |
|
30 |
+* Function `top_integrations` can now take additional parameters to compute |
|
31 |
+top n genes for each specified group |
|
32 |
+* Removed faceting parameters in `CIS_volcano_plot` due to poor precision |
|
33 |
+(easier to add faceting manually) and added parameters to return the |
|
34 |
+data frame that generated the plot as an additional result. Also, it is |
|
35 |
+now possible to specify a vector of gene names to highlight even if they're |
|
36 |
+not above the annotation threshold. |
|
37 |
+ |
|
38 |
+## MINOR |
|
39 |
+ |
|
40 |
+* ISAnalytics website has improved graphic theme and has an additional button |
|
41 |
+on the right that leads to the devel (or release) version of the website |
|
42 |
+* Updated vignettes |
|
43 |
+ |
|
44 |
+## FOR DEVS ONLY |
|
45 |
+ |
|
46 |
+* Complete rework of test suite to be compliant to testthat v.3 |
|
47 |
+ |
|
3 | 48 |
# ISAnalytics 1.1.9 (2021-02-17) |
4 | 49 |
|
5 | 50 |
## FIXES |
... | ... |
@@ -21,7 +21,7 @@ |
21 | 21 |
#' @export |
22 | 22 |
#' |
23 | 23 |
#' @examples |
24 |
-#' op <- options("ISAnalytics.widgets" = FALSE) |
|
24 |
+#' op <- options("ISAnalytics.widgets" = FALSE, "ISAnalytics.verbose" = FALSE) |
|
25 | 25 |
#' path_AF <- system.file("extdata", "ex_association_file.tsv", |
26 | 26 |
#' package = "ISAnalytics" |
27 | 27 |
#' ) |
... | ... |
@@ -180,13 +180,13 @@ aggregate_metadata <- function(association_file, |
180 | 180 |
#' @family Aggregate functions |
181 | 181 |
#' |
182 | 182 |
#' @importFrom purrr walk |
183 |
-#' @importFrom rlang expr eval_tidy |
|
183 |
+#' @importFrom rlang expr eval_tidy abort |
|
184 | 184 |
#' |
185 | 185 |
#' @return A list of tibbles or a single tibble according to input |
186 | 186 |
#' @export |
187 | 187 |
#' |
188 | 188 |
#' @examples |
189 |
-#' op <- options("ISAnalytics.widgets" = FALSE) |
|
189 |
+#' op <- options("ISAnalytics.widgets" = FALSE, "ISAnalytics.verbose" = FALSE) |
|
190 | 190 |
#' path_AF <- system.file("extdata", "ex_association_file.tsv", |
191 | 191 |
#' package = "ISAnalytics" |
192 | 192 |
#' ) |
... | ... |
@@ -201,12 +201,12 @@ aggregate_metadata <- function(association_file, |
201 | 201 |
#' matrix_type = "annotated", workers = 2, matching_opt = "ANY" |
202 | 202 |
#' ) |
203 | 203 |
#' agg <- aggregate_values_by_key( |
204 |
-#' x = matrices$seqCount, |
|
205 |
-#' association_file = association_file |
|
204 |
+#' x = matrices, |
|
205 |
+#' association_file = association_file, |
|
206 |
+#' value_cols = c("fragmentEstimate", "seqCount") |
|
206 | 207 |
#' ) |
207 | 208 |
#' options(op) |
208 |
-aggregate_values_by_key <- function( |
|
209 |
- x, |
|
209 |
+aggregate_values_by_key <- function(x, |
|
210 | 210 |
association_file, |
211 | 211 |
value_cols = "Value", |
212 | 212 |
key = c( |
... | ... |
@@ -225,39 +225,55 @@ aggregate_values_by_key <- function( |
225 | 225 |
purrr::walk(x, function(df) { |
226 | 226 |
stopifnot(is.data.frame(df)) |
227 | 227 |
if (.check_mandatory_vars(df) == FALSE) { |
228 |
- stop(.non_ISM_error()) |
|
228 |
+ rlang::abort(.non_ISM_error()) |
|
229 | 229 |
} |
230 | 230 |
if (.check_complAmpID(df) == FALSE) { |
231 |
- stop(.missing_complAmpID_error()) |
|
231 |
+ rlang::abort(.missing_complAmpID_error()) |
|
232 | 232 |
} |
233 | 233 |
if (!all(value_cols %in% colnames(df))) { |
234 |
- stop(.missing_user_cols_error()) |
|
234 |
+ rlang::abort(.missing_user_cols_error( |
|
235 |
+ value_cols[!value_cols %in% colnames(df)] |
|
236 |
+ )) |
|
235 | 237 |
} |
236 |
- purrr::walk(value_cols, function(col) { |
|
237 |
- expr <- rlang::expr(`$`(df, !!col)) |
|
238 |
- if (!is.numeric(rlang::eval_tidy(expr)) && |
|
239 |
- !is.integer(rlang::eval_tidy(expr))) { |
|
240 |
- stop(.non_num_user_cols_error()) |
|
238 |
+ is_numeric_col <- purrr::map_lgl(value_cols, function(col) { |
|
239 |
+ if (!is.double(df[[col]]) && |
|
240 |
+ !is.integer(df[[col]])) { |
|
241 |
+ FALSE |
|
242 |
+ } else { |
|
243 |
+ TRUE |
|
241 | 244 |
} |
242 |
- }) |
|
245 |
+ }) %>% purrr::set_names(value_cols) |
|
246 |
+ if (any(!is_numeric_col)) { |
|
247 |
+ rlang::abort(.non_num_user_cols_error( |
|
248 |
+ names(is_numeric_col)[!is_numeric_col] |
|
249 |
+ )) |
|
250 |
+ } |
|
243 | 251 |
}) |
244 | 252 |
} else { |
245 | 253 |
if (.check_mandatory_vars(x) == FALSE) { |
246 |
- stop(.non_ISM_error()) |
|
254 |
+ rlang::abort(.non_ISM_error()) |
|
247 | 255 |
} |
248 | 256 |
if (.check_complAmpID(x) == FALSE) { |
249 |
- stop(.missing_complAmpID_error()) |
|
257 |
+ rlang::abort(.missing_complAmpID_error()) |
|
250 | 258 |
} |
251 | 259 |
if (!all(value_cols %in% colnames(x))) { |
252 |
- stop(.missing_user_cols_error()) |
|
260 |
+ rlang::abort(.missing_user_cols_error( |
|
261 |
+ value_cols[!value_cols %in% colnames(x)] |
|
262 |
+ )) |
|
253 | 263 |
} |
254 |
- purrr::walk(value_cols, function(col) { |
|
255 |
- expr <- rlang::expr(`$`(x, !!col)) |
|
256 |
- if (!is.numeric(rlang::eval_tidy(expr)) && |
|
257 |
- !is.integer(rlang::eval_tidy(expr))) { |
|
258 |
- stop(.non_num_user_cols_error()) |
|
264 |
+ is_numeric_col <- purrr::map_lgl(value_cols, function(col) { |
|
265 |
+ if (!is.double(x[[col]]) && |
|
266 |
+ !is.integer(x[[col]])) { |
|
267 |
+ FALSE |
|
268 |
+ } else { |
|
269 |
+ TRUE |
|
259 | 270 |
} |
260 |
- }) |
|
271 |
+ }) %>% purrr::set_names(value_cols) |
|
272 |
+ if (any(!is_numeric_col)) { |
|
273 |
+ rlang::abort(.non_num_user_cols_error( |
|
274 |
+ names(is_numeric_col)[!is_numeric_col] |
|
275 |
+ )) |
|
276 |
+ } |
|
261 | 277 |
} |
262 | 278 |
# Check association file |
263 | 279 |
stopifnot(is.data.frame(association_file)) |
... | ... |
@@ -1,11 +1,12 @@ |
1 | 1 |
#------------------------------------------------------------------------------# |
2 | 2 |
# Analysis functions |
3 | 3 |
#------------------------------------------------------------------------------# |
4 |
-#' Computes the abundance of every integration in the sample. |
|
4 |
+ |
|
5 |
+#' Computes the abundance for every integration event in the input data frame. |
|
5 | 6 |
#' |
6 | 7 |
#' \lifecycle{maturing} |
7 |
-#' Abundance is obtained for every row by calculating the ratio |
|
8 |
-#' between the single value and the total value for the sample. |
|
8 |
+#' Abundance is obtained for every integration event by calculating the ratio |
|
9 |
+#' between the single value and the total value for the given group. |
|
9 | 10 |
#' |
10 | 11 |
#' @details Abundance will be computed upon the user selected columns |
11 | 12 |
#' in the `columns` parameter. For each column a corresponding |
... | ... |
@@ -13,19 +14,28 @@ |
13 | 14 |
#' column) will be produced. |
14 | 15 |
#' |
15 | 16 |
#' @param x An integration matrix - aka a data frame that includes |
16 |
-#' the `mandatory_IS_vars()` as columns |
|
17 |
+#' the `mandatory_IS_vars()` as columns. The matrix can either be aggregated |
|
18 |
+#' (via `aggregate_values_by_key()`) or not. |
|
17 | 19 |
#' @param columns A character vector of column names to process, |
18 | 20 |
#' must be numeric or integer columns |
19 | 21 |
#' @param percentage Add abundance as percentage? |
22 |
+#' @param key The key to group by when calculating totals |
|
23 |
+#' @param keep_totals A value between `TRUE`, `FALSE` or `df`. If `TRUE`, |
|
24 |
+#' the intermediate totals for each group will be kept in the output |
|
25 |
+#' data frame as a dedicated column with a trailing "_tot". If `FALSE`, |
|
26 |
+#' totals won't be included in the output data frame. If `df`, the totals |
|
27 |
+#' are returned to the user as a separate data frame, together with the |
|
28 |
+#' abundance data frame. |
|
20 | 29 |
#' |
21 | 30 |
#' @family Analysis functions |
22 | 31 |
#' |
23 | 32 |
#' @importFrom magrittr `%>%` |
24 |
-#' @importFrom tibble is_tibble |
|
25 | 33 |
#' @import dplyr |
26 | 34 |
#' @importFrom rlang .data eval_tidy parse_expr |
35 |
+#' @importFrom purrr map_lgl |
|
27 | 36 |
#' @importFrom stringr str_replace |
28 |
-#' @return An integration matrix |
|
37 |
+#' @return Either a single data frame with computed abundance values or |
|
38 |
+#' a list of 2 data frames (abundance_df, quant_totals) |
|
29 | 39 |
#' @export |
30 | 40 |
#' |
31 | 41 |
#' @examples |
... | ... |
@@ -33,54 +43,79 @@ |
33 | 43 |
#' package = "ISAnalytics" |
34 | 44 |
#' ) |
35 | 45 |
#' matrix <- import_single_Vispa2Matrix(path) |
36 |
-#' abundance <- compute_abundance(matrix) |
|
37 |
-compute_abundance <- function(x, columns = "Value", percentage = TRUE) { |
|
46 |
+#' |
|
47 |
+#' # Simple integration matrix - grouping by CompleteAmplificationID |
|
48 |
+#' abundance1 <- compute_abundance(matrix) |
|
49 |
+#' abundance1 |
|
50 |
+#' |
|
51 |
+#' # Keeping totals as a separate data frame |
|
52 |
+#' abundance2 <- compute_abundance(matrix, keep_totals = "df") |
|
53 |
+#' abundance2 |
|
54 |
+compute_abundance <- function(x, |
|
55 |
+ columns = "Value", |
|
56 |
+ percentage = TRUE, |
|
57 |
+ key = "CompleteAmplificationID", |
|
58 |
+ keep_totals = FALSE) { |
|
38 | 59 |
## Check parameters |
39 |
- stopifnot(tibble::is_tibble(x)) |
|
60 |
+ stopifnot(is.data.frame(x)) |
|
40 | 61 |
stopifnot(is.character(columns)) |
62 |
+ stopifnot(is.character(key)) |
|
41 | 63 |
if (.check_mandatory_vars(x) == FALSE) { |
42 | 64 |
stop(.non_ISM_error()) |
43 | 65 |
} |
44 |
- if (.check_complAmpID(x) == FALSE) { |
|
45 |
- stop(.missing_complAmpID_error()) |
|
46 |
- } |
|
47 | 66 |
stopifnot(is.logical(percentage) & length(percentage) == 1) |
48 |
- if (!all(columns %in% colnames(x))) { |
|
49 |
- stop(.missing_user_cols_error()) |
|
67 |
+ if (!all(columns %in% colnames(x)) | !all(key %in% colnames(x))) { |
|
68 |
+ missing_cols <- c( |
|
69 |
+ columns[!columns %in% colnames(x)], |
|
70 |
+ key[!key %in% colnames(x)] |
|
71 |
+ ) |
|
72 |
+ rlang::abort(.missing_user_cols_error(missing_cols)) |
|
50 | 73 |
} |
51 |
- purrr::walk(columns, function(col) { |
|
74 |
+ non_num_cols <- purrr::map_lgl(columns, function(col) { |
|
52 | 75 |
expr <- rlang::expr(`$`(x, !!col)) |
53 |
- if (!is.numeric(rlang::eval_tidy(expr)) & |
|
54 |
- !is.numeric(rlang::eval_tidy(expr))) { |
|
55 |
- stop(.non_num_user_cols_error()) |
|
76 |
+ if (is.numeric(rlang::eval_tidy(expr)) | |
|
77 |
+ is.integer(rlang::eval_tidy(expr))) { |
|
78 |
+ return(FALSE) |
|
79 |
+ } else { |
|
80 |
+ return(TRUE) |
|
56 | 81 |
} |
57 | 82 |
}) |
83 |
+ if (any(non_num_cols)) { |
|
84 |
+ stop(.non_num_user_cols_error(columns[non_num_cols])) |
|
85 |
+ } |
|
86 |
+ stopifnot(is.logical(keep_totals) || keep_totals == "df") |
|
58 | 87 |
## Computation |
88 |
+ ### Computes totals for each group defined by key |
|
59 | 89 |
totals <- x %>% |
60 |
- dplyr::group_by(.data$CompleteAmplificationID) %>% |
|
90 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(key))) %>% |
|
61 | 91 |
dplyr::summarise( |
62 | 92 |
dplyr::across(dplyr::all_of(columns), |
63 | 93 |
sum, |
64 |
- .names = "{.col}_sum" |
|
94 |
+ .names = "{.col}_tot" |
|
65 | 95 |
), |
66 | 96 |
.groups = "drop" |
67 | 97 |
) |
98 |
+ ### Computes abundance as value (for each col) / total of the corresponding |
|
99 |
+ ### group (defined by key) |
|
68 | 100 |
abundance_df <- x %>% |
69 |
- dplyr::left_join(totals, by = "CompleteAmplificationID") %>% |
|
101 |
+ dplyr::left_join(totals, by = key) %>% |
|
70 | 102 |
dplyr::mutate(dplyr::across(dplyr::all_of(columns), |
71 | 103 |
list(ab = ~ .x / rlang::eval_tidy( |
72 | 104 |
rlang::parse_expr( |
73 | 105 |
paste( |
74 | 106 |
dplyr::cur_column(), |
75 |
- "sum", |
|
107 |
+ "tot", |
|
76 | 108 |
sep = "_" |
77 | 109 |
) |
78 | 110 |
) |
79 | 111 |
)), |
80 | 112 |
.names = "{.col}_RelAbundance" |
81 | 113 |
)) %>% |
82 |
- dplyr::select(-c(dplyr::all_of(paste(columns, "sum", sep = "_")))) %>% |
|
83 | 114 |
dplyr::distinct() |
115 |
+ if (keep_totals == FALSE || keep_totals == "df") { |
|
116 |
+ abundance_df <- abundance_df %>% |
|
117 |
+ dplyr::select(-c(dplyr::all_of(paste(columns, "tot", sep = "_")))) |
|
118 |
+ } |
|
84 | 119 |
if (percentage == TRUE) { |
85 | 120 |
abundance_df <- abundance_df %>% |
86 | 121 |
dplyr::mutate( |
... | ... |
@@ -93,7 +128,11 @@ compute_abundance <- function(x, columns = "Value", percentage = TRUE) { |
93 | 128 |
dplyr::contains("PercAbundance") |
94 | 129 |
) |
95 | 130 |
} |
96 |
- abundance_df |
|
131 |
+ if (keep_totals == "df") { |
|
132 |
+ return(list(abundance_df = abundance_df, quant_totals = totals)) |
|
133 |
+ } else { |
|
134 |
+ return(abundance_df) |
|
135 |
+ } |
|
97 | 136 |
} |
98 | 137 |
|
99 | 138 |
|
... | ... |
@@ -139,9 +178,11 @@ compute_abundance <- function(x, columns = "Value", percentage = TRUE) { |
139 | 178 |
#' root_pth <- system.file("extdata", "fs.zip", package = "ISAnalytics") |
140 | 179 |
#' root <- unzip_file_system(root_pth, "fs") |
141 | 180 |
#' matrices <- import_parallel_Vispa2Matrices_auto( |
142 |
-#' path, root, |
|
143 |
-#' c("fragmentEstimate", "seqCount"), "annotated", 2, NULL, "ANY", |
|
144 |
-#' dates_format = "dmy" |
|
181 |
+#' association_file = path, root = root, |
|
182 |
+#' quantification_type = c("fragmentEstimate", "seqCount"), |
|
183 |
+#' matrix_type = "annotated", workers = 2, patterns = NULL, |
|
184 |
+#' matching_opt = "ANY", |
|
185 |
+#' dates_format = "dmy", multi_quant_matrix = FALSE |
|
145 | 186 |
#' ) |
146 | 187 |
#' total_matrix <- comparison_matrix(matrices) |
147 | 188 |
#' options(op) |
... | ... |
@@ -225,13 +266,17 @@ comparison_matrix <- function(x, |
225 | 266 |
#' ) |
226 | 267 |
#' root_pth <- system.file("extdata", "fs.zip", package = "ISAnalytics") |
227 | 268 |
#' root <- unzip_file_system(root_pth, "fs") |
228 |
-#' matrices <- import_parallel_Vispa2Matrices_auto( |
|
229 |
-#' path, root, |
|
230 |
-#' c("fragmentEstimate", "seqCount"), "annotated", 2, NULL, "ANY", |
|
269 |
+#' association_file <- import_association_file( |
|
270 |
+#' path = path, root = root, |
|
231 | 271 |
#' dates_format = "dmy" |
232 | 272 |
#' ) |
233 |
-#' total_matrix <- comparison_matrix(matrices) |
|
234 |
-#' separated_matrix <- separate_quant_matrices(total_matrix) |
|
273 |
+#' matrices <- import_parallel_Vispa2Matrices_auto( |
|
274 |
+#' association_file = association_file, |
|
275 |
+#' quantification_type = c("seqCount", "fragmentEstimate"), |
|
276 |
+#' matrix_type = "annotated", workers = 2, patterns = NULL, |
|
277 |
+#' matching_opt = "ANY" |
|
278 |
+#' ) |
|
279 |
+#' separated_matrix <- separate_quant_matrices(matrices) |
|
235 | 280 |
#' options(op) |
236 | 281 |
separate_quant_matrices <- function(x, fragmentEstimate = "fragmentEstimate", |
237 | 282 |
seqCount = "seqCount", |
... | ... |
@@ -446,33 +491,40 @@ threshold_filter <- function(x, |
446 | 491 |
} |
447 | 492 |
|
448 | 493 |
|
449 |
-#' Sorts and keeps the top n integration sites in a data frame. |
|
494 |
+#' Sorts and keeps the top n integration sites based on the values |
|
495 |
+#' in a given column. |
|
450 | 496 |
#' |
451 | 497 |
#' \lifecycle{experimental} |
452 | 498 |
#' The input data frame will be sorted by the highest values in |
453 | 499 |
#' the columns specified and the top n rows will be returned as output. |
454 | 500 |
#' The user can choose to keep additional columns in the output |
455 | 501 |
#' by passing a vector of column names or passing 2 "shortcuts": |
456 |
-#' * `keep` = "everything" keeps all columns in the original data frame |
|
457 |
-#' * `keep` = "nothing" only keeps the mandatory columns |
|
502 |
+#' * `keep = "everything"` keeps all columns in the original data frame |
|
503 |
+#' * `keep = "nothing"` only keeps the mandatory columns |
|
458 | 504 |
#' (`mandatory_IS_vars()`) plus the columns in the `columns` parameter. |
459 | 505 |
#' |
460 | 506 |
#' @param x An integration matrix (data frame containing |
461 | 507 |
#' `mandatory_IS_vars()`) |
462 |
-#' @param n How many rows should the output have? Must be numeric |
|
508 |
+#' @param n How many integrations should be sliced (in total or |
|
509 |
+#' for each group)? Must be numeric |
|
463 | 510 |
#' or integer and greater than 0 |
464 | 511 |
#' @param columns Columns to use for the sorting. If more than a column |
465 | 512 |
#' is supplied primary ordering is done on the first column, |
466 | 513 |
#' secondary ordering on all other columns |
467 | 514 |
#' @param keep Names of the columns to keep besides `mandatory_IS_vars()` |
468 | 515 |
#' and `columns` |
516 |
+#' @param key Either `NULL` or a character vector of column names to group |
|
517 |
+#' by. If not `NULL` the input will be grouped and the top fraction will |
|
518 |
+#' be extracted from each group. |
|
469 | 519 |
#' |
470 | 520 |
#' @family Analysis functions |
471 | 521 |
#' |
472 |
-#' @importFrom dplyr arrange across all_of desc slice_head select |
|
522 |
+#' @import dplyr |
|
473 | 523 |
#' @importFrom magrittr `%>%` |
524 |
+#' @importFrom rlang abort |
|
474 | 525 |
#' |
475 |
-#' @return A data frame with `n` rows |
|
526 |
+#' @return Either a data frame with at most n rows or |
|
527 |
+#' a data frames with at most n*(number of groups) rows. |
|
476 | 528 |
#' @export |
477 | 529 |
#' |
478 | 530 |
#' @examples |
... | ... |
@@ -490,21 +542,40 @@ threshold_filter <- function(x, |
490 | 542 |
#' columns = c("Value", "Value2"), |
491 | 543 |
#' keep = "nothing" |
492 | 544 |
#' ) |
493 |
-top_integrations <- function(x, n = 50, columns = "RelAbundance", |
|
494 |
- keep = "everything") { |
|
545 |
+#' top_key <- top_integrations(smpl, |
|
546 |
+#' n = 3, |
|
547 |
+#' columns = "Value", |
|
548 |
+#' keep = "Value2", |
|
549 |
+#' key = "CompleteAmplificationID" |
|
550 |
+#' ) |
|
551 |
+top_integrations <- function(x, n = 50, |
|
552 |
+ columns = "fragmentEstimate_sum_RelAbundance", |
|
553 |
+ keep = "everything", key = NULL) { |
|
495 | 554 |
stopifnot(is.data.frame(x)) |
496 | 555 |
stopifnot(is.numeric(n) & length(n) == 1 & n > 0) |
497 | 556 |
stopifnot(is.character(keep)) |
498 | 557 |
stopifnot(is.character(columns)) |
558 |
+ stopifnot(is.null(key) || is.character(key)) |
|
499 | 559 |
if (!.check_mandatory_vars(x)) { |
500 |
- stop(.non_ISM_error()) |
|
560 |
+ rlang::abort(.non_ISM_error()) |
|
501 | 561 |
} |
502 | 562 |
if (!all(columns %in% colnames(x))) { |
503 |
- stop(.missing_user_cols_error()) |
|
563 |
+ rlang::abort(.missing_user_cols_error( |
|
564 |
+ columns[!columns %in% colnames(x)] |
|
565 |
+ )) |
|
504 | 566 |
} |
505 | 567 |
if (!(all(keep == "everything") || all(keep == "nothing"))) { |
506 | 568 |
if (any(!keep %in% colnames(x))) { |
507 |
- stop(.missing_user_cols_error()) |
|
569 |
+ rlang::abort(.missing_user_cols_error( |
|
570 |
+ keep[!keep %in% colnames(x)] |
|
571 |
+ )) |
|
572 |
+ } |
|
573 |
+ } |
|
574 |
+ if (!is.null(key)) { |
|
575 |
+ if (!all(key %in% colnames(x))) { |
|
576 |
+ rlang::abort(.missing_user_cols_error( |
|
577 |
+ key[!key %in% colnames(x)] |
|
578 |
+ )) |
|
508 | 579 |
} |
509 | 580 |
} |
510 | 581 |
essential_cols <- c(mandatory_IS_vars(), columns) |
... | ... |
@@ -515,6 +586,18 @@ top_integrations <- function(x, n = 50, columns = "RelAbundance", |
515 | 586 |
} else { |
516 | 587 |
keep[!keep %in% essential_cols] |
517 | 588 |
} |
589 |
+ if (!is.null(key)) { |
|
590 |
+ result <- x %>% |
|
591 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(key))) %>% |
|
592 |
+ dplyr::arrange(dplyr::across( |
|
593 |
+ dplyr::all_of(columns), |
|
594 |
+ dplyr::desc |
|
595 |
+ ), .by_group = TRUE) %>% |
|
596 |
+ dplyr::slice_head(n = n) %>% |
|
597 |
+ dplyr::select(dplyr::all_of(c(key, essential_cols, to_keep))) %>% |
|
598 |
+ dplyr::ungroup() |
|
599 |
+ return(result) |
|
600 |
+ } |
|
518 | 601 |
result <- x %>% |
519 | 602 |
dplyr::arrange(dplyr::across( |
520 | 603 |
dplyr::all_of(columns), |
... | ... |
@@ -602,7 +685,7 @@ top_integrations <- function(x, n = 50, columns = "RelAbundance", |
602 | 685 |
#' association_file = association_file, root = NULL, |
603 | 686 |
#' quantification_type = c("seqCount", "fragmentEstimate"), |
604 | 687 |
#' matrix_type = "annotated", workers = 2, patterns = NULL, |
605 |
-#' matching_opt = "ANY" |
|
688 |
+#' matching_opt = "ANY", multi_quant_matrix = FALSE |
|
606 | 689 |
#' ) |
607 | 690 |
#' |
608 | 691 |
#' stats <- sample_statistics(matrices$seqCount, association_file) |
... | ... |
@@ -719,6 +802,7 @@ sample_statistics <- function(x, metadata, |
719 | 802 |
#' @importFrom rlang .data |
720 | 803 |
#' @importFrom magrittr `%>%` |
721 | 804 |
#' @importFrom stats median pt p.adjust |
805 |
+#' @importFrom utils read.csv |
|
722 | 806 |
#' |
723 | 807 |
#' @return A data frame |
724 | 808 |
#' @export |
... | ... |
@@ -742,7 +826,7 @@ sample_statistics <- function(x, metadata, |
742 | 826 |
#' dates_format = "dmy" |
743 | 827 |
#' ) |
744 | 828 |
#' |
745 |
-#' cis <- CIS_grubbs(matrices$seqCount) |
|
829 |
+#' cis <- CIS_grubbs(matrices) |
|
746 | 830 |
#' |
747 | 831 |
#' options(op) |
748 | 832 |
CIS_grubbs <- function(x, |
... | ... |
@@ -776,28 +860,34 @@ CIS_grubbs <- function(x, |
776 | 860 |
|
777 | 861 |
# Try to import annotation file |
778 | 862 |
if (ext == "tsv") { |
779 |
- refgenes <- read.csv( |
|
863 |
+ refgenes <- utils::read.csv( |
|
780 | 864 |
file = genomic_annotation_file, |
781 | 865 |
header = TRUE, fill = TRUE, sep = "\t", |
782 | 866 |
check.names = FALSE, |
783 | 867 |
na.strings = c("NONE", "NA", "NULL", "NaN", "") |
784 | 868 |
) |
785 | 869 |
refgenes <- tibble::as_tibble(refgenes) %>% |
786 |
- dplyr::mutate(chrom = stringr::str_replace_all(.data$chrom, |
|
787 |
- "chr", "")) |
|
870 |
+ dplyr::mutate(chrom = stringr::str_replace_all( |
|
871 |
+ .data$chrom, |
|
872 |
+ "chr", "" |
|
873 |
+ )) |
|
788 | 874 |
} else if (ext == "csv") { |
789 |
- refgenes <- read.csv( |
|
875 |
+ refgenes <- utils::read.csv( |
|
790 | 876 |
file = genomic_annotation_file, |
791 | 877 |
header = TRUE, fill = TRUE, |
792 | 878 |
check.names = FALSE, |
793 | 879 |
na.strings = c("NONE", "NA", "NULL", "NaN", "") |
794 | 880 |
) |
795 | 881 |
refgenes <- tibble::as_tibble(refgenes) %>% |
796 |
- dplyr::mutate(chrom = stringr::str_replace_all(.data$chrom, |
|
797 |
- "chr", "")) |
|
882 |
+ dplyr::mutate(chrom = stringr::str_replace_all( |
|
883 |
+ .data$chrom, |
|
884 |
+ "chr", "" |
|
885 |
+ )) |
|
798 | 886 |
} else { |
799 |
- stop(paste("The genomic annotation file must be either in", |
|
800 |
- ".tsv or .csv format (compressed or not)")) |
|
887 |
+ stop(paste( |
|
888 |
+ "The genomic annotation file must be either in", |
|
889 |
+ ".tsv or .csv format (compressed or not)" |
|
890 |
+ )) |
|
801 | 891 |
} |
802 | 892 |
|
803 | 893 |
# Check annotation file format |
... | ... |
@@ -832,7 +922,8 @@ CIS_grubbs <- function(x, |
832 | 922 |
stats::median(.data$integration_locus), |
833 | 923 |
distinct_orientations = dplyr::n_distinct(.data$strand), |
834 | 924 |
describe = list(tibble::as_tibble( |
835 |
- psych::describe(.data$integration_locus))), |
|
925 |
+ psych::describe(.data$integration_locus) |
|
926 |
+ )), |
|
836 | 927 |
.groups = "drop" |
837 | 928 |
) %>% |
838 | 929 |
tidyr::unnest(.data$describe, keep_empty = TRUE, names_sep = "_") |
... | ... |
@@ -1028,7 +1119,7 @@ CIS_grubbs <- function(x, |
1028 | 1119 |
#' association_file = association_file, root = NULL, |
1029 | 1120 |
#' quantification_type = c("seqCount", "fragmentEstimate"), |
1030 | 1121 |
#' matrix_type = "annotated", workers = 2, patterns = NULL, |
1031 |
-#' matching_opt = "ANY" |
|
1122 |
+#' matching_opt = "ANY", multi_quant_matrix = FALSE |
|
1032 | 1123 |
#' ) |
1033 | 1124 |
#' |
1034 | 1125 |
#' #### EXTERNAL AGGREGATION |
... | ... |
@@ -60,8 +60,11 @@ |
60 | 60 |
#' dates_format = "dmy" |
61 | 61 |
#' ) |
62 | 62 |
#' matrices <- import_parallel_Vispa2Matrices_auto( |
63 |
-#' association_file, NULL, |
|
64 |
-#' c("fragmentEstimate", "seqCount"), "annotated", 2, NULL, "ANY" |
|
63 |
+#' association_file = association_file, root = NULL, |
|
64 |
+#' quantification_type = c("fragmentEstimate", "seqCount"), |
|
65 |
+#' matrix_type = "annotated", workers = 2, |
|
66 |
+#' patterns = NULL, matching_opt = "ANY", |
|
67 |
+#' multi_quant_matrix = FALSE |
|
65 | 68 |
#' ) |
66 | 69 |
#' matrices <- remove_collisions(matrices, association_file) |
67 | 70 |
#' options(op) |
... | ... |
@@ -406,8 +409,11 @@ remove_collisions <- function(x, |
406 | 409 |
#' dates_format = "dmy" |
407 | 410 |
#' ) |
408 | 411 |
#' matrices <- import_parallel_Vispa2Matrices_auto( |
409 |
-#' association_file, NULL, |
|
410 |
-#' c("fragmentEstimate", "seqCount"), "annotated", 2, NULL, "ANY" |
|
412 |
+#' association_file = association_file, root = NULL, |
|
413 |
+#' quantification_type = c("fragmentEstimate", "seqCount"), |
|
414 |
+#' matrix_type = "annotated", workers = 2, |
|
415 |
+#' patterns = NULL, matching_opt = "ANY", |
|
416 |
+#' multi_quant_matrix = FALSE |
|
411 | 417 |
#' ) |
412 | 418 |
#' sc_matrix <- remove_collisions(matrices$seqCount, association_file) |
413 | 419 |
#' others <- matrices[!names(matrices) %in% "seqCount"] |
... | ... |
@@ -1,3 +1,7 @@ |
1 |
+#------------------------------------------------------------------------------# |
|
2 |
+# Exported/Internal variables |
|
3 |
+#------------------------------------------------------------------------------# |
|
4 |
+ |
|
1 | 5 |
#' Names of mandatory variables for an integration matrix. |
2 | 6 |
#' |
3 | 7 |
#' Contains the names of the columns that need to be present in order for a |
... | ... |
@@ -12,6 +16,25 @@ mandatory_IS_vars <- function() { |
12 | 16 |
c("chr", "integration_locus", "strand") |
13 | 17 |
} |
14 | 18 |
|
19 |
+# Internal: associates column types with column names for a more precise |
|
20 |
+# import |
|
21 |
+.mandatory_IS_types <- function(mode) { |
|
22 |
+ if (mode == "fread") { |
|
23 |
+ return(list( |
|
24 |
+ character = c("chr", "strand"), |
|
25 |
+ integer = "integration_locus" |
|
26 |
+ )) |
|
27 |
+ } else { |
|
28 |
+ return( |
|
29 |
+ list( |
|
30 |
+ chr = "c", |
|
31 |
+ integration_locus = "i", |
|
32 |
+ strand = "c" |
|
33 |
+ ) |
|
34 |
+ ) |
|
35 |
+ } |
|
36 |
+} |
|
37 |
+ |
|
15 | 38 |
#' Names of the annotation variables for an integration matrix. |
16 | 39 |
#' |
17 | 40 |
#' Contains the names of the columns that are present if the integration matrix |
... | ... |
@@ -26,6 +49,19 @@ annotation_IS_vars <- function() { |
26 | 49 |
c("GeneName", "GeneStrand") |
27 | 50 |
} |
28 | 51 |
|
52 |
+# Internal: associates column types with column names for a more precise |
|
53 |
+# import |
|
54 |
+.annotation_IS_types <- function(mode) { |
|
55 |
+ if (mode == "fread") { |
|
56 |
+ return(list(character = c("GeneName", "GeneStrand"))) |
|
57 |
+ } else { |
|
58 |
+ return(list( |
|
59 |
+ GeneName = "c", |
|
60 |
+ GeneStrand = "c" |
|
61 |
+ )) |
|
62 |
+ } |
|
63 |
+} |
|
64 |
+ |
|
29 | 65 |
#' Names of the columns in the association file. |
30 | 66 |
#' |
31 | 67 |
#' All the names of the columns present in the association file. |
... | ... |
@@ -55,10 +91,95 @@ association_file_columns <- function() { |
55 | 91 |
"Kapa", "ulForPool", "CompleteAmplificationID", "UniqueID", |
56 | 92 |
"StudyTestID", |
57 | 93 |
"StudyTestGroup", "MouseID", "Tigroup", "Tisource", |
58 |
- "PathToFolderProjectID" |
|
94 |
+ "PathToFolderProjectID", |
|
95 |
+ "SamplesNameCheck", |
|
96 |
+ "TimepointDays", "TimepointMonths", |
|
97 |
+ "TimepointYears", "ng DNA corrected" |
|
59 | 98 |
) |
60 | 99 |
} |
61 | 100 |
|
101 |
+.af_col_types <- function(mode) { |
|
102 |
+ if (mode == "fread") { |
|
103 |
+ types <- list( |
|
104 |
+ character = c( |
|
105 |
+ "ProjectID", "FUSIONID", "PoolID", "TagSequence", |
|
106 |
+ "SubjectID", "VectorType", "VectorID", "ExperimentID", |
|
107 |
+ "Tissue", "TimePoint", "DNAFragmentation", |
|
108 |
+ "PCRMethod", "TagIDextended", "Keywords", |
|
109 |
+ "CellMarker", "TagID", "NGSProvider", "NGSTechnology", |
|
110 |
+ "ConverrtedFilesDir", "ConverrtedFilesName", |
|
111 |
+ "SourceFileFolder", "SourceFileNameR1", |
|
112 |
+ "SourceFileNameR2", "DNAnumber", "LinearPCRID", |
|
113 |
+ "1stExpoPCRID", "2ndExpoID", "FusionPrimerPCRID", |
|
114 |
+ "Genome", "Genotype", "Notes", "AddedField1", |
|
115 |
+ "AddedField2", "AddedField3", "AddedField4", |
|
116 |
+ "concatenatePoolIDSeqRun", "CompleteAmplificationID", |
|
117 |
+ "UniqueID", "StudyTestID", "Tigroup", "Tisource", |
|
118 |
+ "PathToFolderProjectID", "SamplesNameCheck", |
|
119 |
+ "DNAextractionDate", "LinearPCRDate", |
|
120 |
+ "SonicationDate", "LigationDate", |
|
121 |
+ "FusionPrimerPCRDate", "PoolDate", "SequencingDate", |
|
122 |
+ "MOI", "AddedField6_RelativeBloodPercentage", |
|
123 |
+ "TestGroup" |
|
124 |
+ ), |
|
125 |
+ double = c( |
|
126 |
+ "DNAngUsed", "VCN", "Engraftment", "Transduction", |
|
127 |
+ "AddedField7_PurityTestFeasibility", |
|
128 |
+ "AddedField8_FacsSeparationPurity", "Kapa", |
|
129 |
+ "ulForPool", "TimepointMonths", "TimepointYears", |
|
130 |
+ "ng DNA corrected" |
|
131 |
+ ), |
|
132 |
+ integer = c( |
|
133 |
+ "ReplicateNumber", "SequencingRound", |
|
134 |
+ "StudyTestGroup", "MouseID", "TimepointDays" |
|
135 |
+ ) |
|
136 |
+ ) |
|
137 |
+ return(types) |
|
138 |
+ } |
|
139 |
+ if (mode == "readr") { |
|
140 |
+ # date_format <- unlist(strsplit(date_format)) |
|
141 |
+ # date_format <- paste0("%", date_format) |
|
142 |
+ # date_format <- paste0(date_format, collapse = "%.") |
|
143 |
+ types <- list( |
|
144 |
+ ProjectID = "c", FUSIONID = "c", PoolID = "c", TagSequence = "c", |
|
145 |
+ SubjectID = "c", VectorType = "c", VectorID = "c", |
|
146 |
+ ExperimentID = "c", Tissue = "c", TimePoint = "c", |
|
147 |
+ DNAFragmentation = "c", PCRMethod = "c", TagIDextended = "c", |
|
148 |
+ Keywords = "c", CellMarker = "c", TagID = "c", |
|
149 |
+ NGSProvider = "c", NGSTechnology = "c", |
|
150 |
+ ConverrtedFilesDir = "c", ConverrtedFilesName = "c", |
|
151 |
+ SourceFileFolder = "c", SourceFileNameR1 = "c", |
|
152 |
+ SourceFileNameR2 = "c", DNAnumber = "c", LinearPCRID = "c", |
|
153 |
+ `1stExpoPCRID` = "c", `2ndExpoID` = "c", |
|
154 |
+ FusionPrimerPCRID = "c", Genome = "c", Genotype = "c", |
|
155 |
+ Notes = "c", AddedField1 = "c", |
|
156 |
+ AddedField2 = "c", AddedField3 = "c", AddedField4 = "c", |
|
157 |
+ concatenatePoolIDSeqRun = "c", CompleteAmplificationID = "c", |
|
158 |
+ UniqueID = "c", StudyTestID = "c", Tigroup = "c", Tisource = "c", |
|
159 |
+ PathToFolderProjectID = "c", SamplesNameCheck = "c", |
|
160 |
+ DNAextractionDate = "c", |
|
161 |
+ LinearPCRDate = "c", |
|
162 |
+ SonicationDate = "c", |
|
163 |
+ LigationDate = "c", |
|
164 |
+ FusionPrimerPCRDate = "c", |
|
165 |
+ PoolDate = "c", |
|
166 |
+ SequencingDate = "c", |
|
167 |
+ MOI = "c", AddedField6_RelativeBloodPercentage = "c", |
|
168 |
+ DNAngUsed = "d", VCN = "d", Engraftment = "d", Transduction = "d", |
|
169 |
+ AddedField7_PurityTestFeasibility = "d", |
|
170 |
+ AddedField8_FacsSeparationPurity = "d", Kapa = "d", |
|
171 |
+ ulForPool = "d", TimepointMonths = "d", TimepointYears = "d", |
|
172 |
+ `ng DNA corrected` = "d", |
|
173 |
+ ReplicateNumber = "i", SequencingRound = "i", TestGroup = "c", |
|
174 |
+ MouseID = "i", TimepointDays = "i", |
|
175 |
+ `1stExpoPCRDate` = "c", |
|
176 |
+ `2ndExpoDate` = "c", |
|
177 |
+ StudyTestGroup = "i" |
|
178 |
+ ) |
|
179 |
+ return(types) |
|
180 |
+ } |
|
181 |
+} |
|
182 |
+ |
|
62 | 183 |
#' Names of the columns of the association file to consider for |
63 | 184 |
#' Vispa2 launch. |
64 | 185 |
#' |
... | ... |
@@ -80,3 +201,23 @@ reduced_AF_columns <- function() { |
80 | 201 |
"PoolID" |
81 | 202 |
) |
82 | 203 |
} |
204 |
+ |
|
205 |
+# Names of the columns of iss stats considered for aggregation |
|
206 |
+# USED IN: |
|
207 |
+# - .join_and_aggregate |
|
208 |
+.agg_iss_cols <- function() { |
|
209 |
+ c( |
|
210 |
+ "BARCODE_MUX", "TRIMMING_FINAL_LTRLC", |
|
211 |
+ "LV_MAPPED", |
|
212 |
+ "BWA_MAPPED_OVERALL", |
|
213 |
+ "ISS_MAPPED_PP" |
|
214 |
+ ) |
|
215 |
+} |
|
216 |
+ |
|
217 |
+.compressed_formats <- function() { |
|
218 |
+ c("gz", "bz2", "xz", "zip") |
|
219 |
+} |
|
220 |
+ |
|
221 |
+.supported_fread_compression_formats <- function() { |
|
222 |
+ c("gz", "bz2") |
|
223 |
+} |
... | ... |
@@ -1,20 +1,30 @@ |
1 | 1 |
#------------------------------------------------------------------------------# |
2 | 2 |
# Importing functions |
3 | 3 |
#------------------------------------------------------------------------------# |
4 |
+ |
|
4 | 5 |
#' Import a single integration matrix from file |
5 | 6 |
#' |
6 | 7 |
#' @description \lifecycle{stable} |
7 | 8 |
#' This function allows to read and import an integration matrix |
8 | 9 |
#' produced as the output of Vispa2 pipeline and converts it to a tidy |
9 |
-#' tibble. |
|
10 |
+#' format. |
|
10 | 11 |
#' |
11 | 12 |
#' @param path The path to the file on disk |
13 |
+#' @param to_exclude Either NULL or a character vector of column names that |
|
14 |
+#' should be ignored when importing |
|
15 |
+#' @param separator The column delimiter used |
|
12 | 16 |
#' |
13 |
-#' @return A tidy tibble |
|
17 |
+#' @return A data.table object in tidy format |
|
14 | 18 |
#' @family Import functions |
19 |
+#' @importFrom rlang abort inform |
|
20 |
+#' @importFrom fs path_ext |
|
21 |
+#' @importFrom readr read_delim cols |
|
15 | 22 |
#' @importFrom tidyr separate |
16 |
-#' @importFrom utils read.csv |
|
17 | 23 |
#' @importFrom magrittr `%>%` |
24 |
+#' @importFrom dplyr mutate |
|
25 |
+#' @importFrom stringr str_replace |
|
26 |
+#' @importFrom BiocParallel SnowParam MulticoreParam bplapply bpstop |
|
27 |
+#' @importFrom data.table melt.data.table rbindlist |
|
18 | 28 |
#' @details The import series of functions is designed to work in combination |
19 | 29 |
#' with the use of Vispa2 pipeline, please refer to this article for more |
20 | 30 |
#' details: \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5702242/}{VISPA2: |
... | ... |
@@ -29,30 +39,139 @@ |
29 | 39 |
#' package = "ISAnalytics" |
30 | 40 |
#' ) |
31 | 41 |
#' isa_dataframe <- import_single_Vispa2Matrix(path_to_file) |
32 |
-import_single_Vispa2Matrix <- function(path) { |
|
42 |
+import_single_Vispa2Matrix <- function(path, |
|
43 |
+ to_exclude = NULL, |
|
44 |
+ separator = "\t") { |
|
33 | 45 |
stopifnot(!missing(path) & is.character(path)) |
34 | 46 |
if (!file.exists(path)) { |
35 |
- stop(paste("File not found at", path)) |
|
47 |
+ rlang::abort(paste("File not found at", path)) |
|
48 |
+ } |
|
49 |
+ if (!fs::is_file(path)) { |
|
50 |
+ rlang::abort(paste("Path exists but is not a file")) |
|
36 | 51 |
} |
37 |
- df <- read.csv(path, |
|
38 |
- sep = "\t", header = TRUE, fill = TRUE, |
|
39 |
- check.names = FALSE, stringsAsFactors = FALSE |
|
52 |
+ mode <- "fread" |
|
53 |
+ ## Is the file compressed? |
|
54 |
+ is_compressed <- fs::path_ext(path) %in% .compressed_formats() |
|
55 |
+ if (is_compressed) { |
|
56 |
+ ## The compression type is supported by data.table::fread? |
|
57 |
+ compression_type <- fs::path_ext(path) |
|
58 |
+ if (!compression_type %in% .supported_fread_compression_formats()) { |
|
59 |
+ ### If not, switch to classic for reading |
|
60 |
+ mode <- "classic" |
|
61 |
+ if (getOption("ISAnalytics.verbose") == TRUE) { |
|
62 |
+ rlang::inform(.unsupported_comp_format_inf(), |
|
63 |
+ class = "unsup_comp_format" |
|
64 |
+ ) |
|
65 |
+ } |
|
66 |
+ } |
|
67 |
+ } |
|
68 |
+ ### Peak headers |
|
69 |
+ peek_headers <- readr::read_delim(path, |
|
70 |
+ delim = separator, n_max = 0, |
|
71 |
+ col_types = readr::cols() |
|
40 | 72 |
) |
41 |
- df <- tibble::as_tibble(df) |
|
42 |
- df_type <- .auto_detect_type(df) |
|
73 |
+ ## - Detect type |
|
74 |
+ df_type <- .auto_detect_type(peek_headers) |
|
75 |
+ if (df_type == "MALFORMED") { |
|
76 |
+ rlang::abort(.malformed_ISmatrix_error(), |
|
77 |
+ class = "malformed_ism" |
|
78 |
+ ) |
|
79 |
+ } |
|
80 |
+ is_annotated <- .is_annotated(peek_headers) |
|
81 |
+ ## - Start reading |
|
82 |
+ if (getOption("ISAnalytics.verbose") == TRUE) { |
|
83 |
+ rlang::inform(c("Reading file...", i = paste0("Mode: ", mode))) |
|
84 |
+ } |
|
85 |
+ df <- if (mode == "fread") { |
|
86 |
+ .read_with_fread( |
|
87 |
+ path = path, to_drop = to_exclude, |
|
88 |
+ df_type = df_type, annotated = is_annotated, |
|
89 |
+ sep = separator |
|
90 |
+ ) |
|
91 |
+ } else { |
|
92 |
+ .read_with_readr( |
|
93 |
+ path = path, to_drop = to_exclude, |
|
94 |
+ df_type = df_type, annotated = is_annotated, |
|
95 |
+ sep = separator |
|
96 |
+ ) |
|
97 |
+ } |
|
98 |
+ ## - Report summary |
|
99 |
+ if (getOption("ISAnalytics.verbose") == TRUE) { |
|
100 |
+ rlang::inform(.summary_ism_import_msg( |
|
101 |
+ df_type, |
|
102 |
+ .is_annotated(df), |
|
103 |
+ dim(df), |
|
104 |
+ mode |
|
105 |
+ ), |
|
106 |
+ class = "ism_import_summary" |
|
107 |
+ ) |
|
108 |
+ } |
|
43 | 109 |
if (df_type == "OLD") { |
44 |
- df <- df %>% tidyr::separate( |
|
45 |
- col = .data$IS_genomicID, |
|
46 |
- into = mandatory_IS_vars(), |
|
47 |
- sep = "_", remove = TRUE, |
|
48 |
- convert = TRUE |
|
110 |
+ df <- df %>% |
|
111 |
+ tidyr::separate( |
|
112 |
+ col = .data$IS_genomicID, |
|
113 |
+ into = mandatory_IS_vars(), |
|
114 |
+ sep = "_", remove = TRUE, |
|
115 |
+ convert = TRUE |
|
116 |
+ ) %>% |
|
117 |
+ dplyr::mutate(chr = stringr::str_replace( |
|
118 |
+ .data$chr, "chr", "" |
|
119 |
+ )) |
|
120 |
+ } |
|
121 |
+ ## - Split in chunks |
|
122 |
+ if (getOption("ISAnalytics.verbose") == TRUE) { |
|
123 |
+ rlang::inform("Reshaping...") |
|
124 |
+ } |
|
125 |
+ chunks <- split(df, |
|
126 |
+ by = c("chr"), |
|
127 |
+ verbose = FALSE |
|
128 |
+ ) |
|
129 |
+ ## - Melt in parallel |
|
130 |
+ p <- if (.Platform$OS.type == "windows") { |
|
131 |
+ BiocParallel::SnowParam( |
|
132 |
+ tasks = length(chunks), |
|
133 |
+ progressbar = getOption("ISAnalytics.verbose"), |
|
134 |
+ exportglobals = FALSE, |
|
135 |
+ stop.on.error = TRUE |
|
136 |
+ ) |
|
137 |
+ } else { |
|
138 |
+ BiocParallel::MulticoreParam( |
|
139 |
+ tasks = length(chunks), |
|
140 |
+ progressbar = getOption("ISAnalytics.verbose"), |
|
141 |
+ exportglobals = FALSE, |
|
142 |
+ stop.on.error = TRUE |
|
49 | 143 |
) |
50 | 144 |
} |
51 |
- if (df_type == "MALFORMED") { |
|
52 |
- warning(.malformed_ISmatrix_warning()) |
|
145 |
+ mt <- function(data, annot) { |
|
146 |
+ id_vars <- if (annot) { |
|
147 |
+ c( |
|
148 |
+ mandatory_IS_vars(), |
|
149 |
+ annotation_IS_vars() |
|
150 |
+ ) |
|
151 |
+ } else { |
|
152 |
+ mandatory_IS_vars() |
|
153 |
+ } |
|
154 |
+ data.table::melt.data.table(data, |
|
155 |
+ id.vars = id_vars, |
|
156 |
+ variable.name = "CompleteAmplificationID", |
|
157 |
+ value.name = "Value", |
|
158 |
+ na.rm = TRUE, |
|
159 |
+ verbose = FALSE |
|
160 |
+ ) |
|
161 |
+ } |
|
162 |
+ tidy_chunks <- BiocParallel::bplapply( |
|
163 |
+ X = chunks, |
|
164 |
+ FUN = mt, |
|
165 |
+ annot = is_annotated, |
|
166 |
+ BPPARAM = p |
|
167 |
+ ) |
|
168 |
+ BiocParallel::bpstop(p) |
|
169 |
+ tidy <- data.table::rbindlist(tidy_chunks) |
|
170 |
+ tidy <- tidy["Value" > 0] |
|
171 |
+ if (getOption("ISAnalytics.verbose") == TRUE) { |
|
172 |
+ rlang::inform("Done!") |
|
53 | 173 |
} |
54 |
- isadf <- .messy_to_tidy(df) |
|
55 |
- isadf |
|
174 |
+ return(tidy) |
|
56 | 175 |
} |
57 | 176 |
|
58 | 177 |
|
... | ... |
@@ -70,6 +189,13 @@ import_single_Vispa2Matrix <- function(path) { |
70 | 189 |
#' specified (ex: 1 becomes 0001 with a tp_padding of 4) |
71 | 190 |
#' @param dates_format A single string indicating how dates should be parsed. |
72 | 191 |
#' Must be a value in: \code{date_formats()} |
192 |
+#' @param separator The column separator used in the file |
|
193 |
+#' @param filter_for A named list where names represent column names that |
|
194 |
+#' must be filtered. For example: `list(ProjectID = c("PROJECT1", "PROJECT2))` |
|
195 |
+#' will filter the association file so that it contains only those rows |
|
196 |
+#' for which the value of the column "ProjectID" is one of the specified |
|
197 |
+#' values. If multiple columns are present in the list all filtering |
|
198 |
+#' conditions are applied as a logical AND. |
|
73 | 199 |
#' @param export_widget_path A path on disk to save produced widgets or NULL |
74 | 200 |
#' if the user doesn't wish to save the html file |
75 | 201 |
#' @family Import functions |
... | ... |
@@ -102,6 +228,9 @@ import_single_Vispa2Matrix <- function(path) { |
102 | 228 |
#' If 'NULL' the file system alignment step is skipped. |
103 | 229 |
#' @export |
104 | 230 |
#' |
231 |
+#' @importFrom purrr map_lgl set_names is_empty |
|
232 |
+#' @importFrom rlang inform |
|
233 |
+#' @importFrom magrittr `%>%` |
|
105 | 234 |
#' @seealso \code{\link{date_formats}} |
106 | 235 |
#' @examples |
107 | 236 |
#' op <- options("ISAnalytics.widgets" = FALSE) |
... | ... |
@@ -114,6 +243,8 @@ import_single_Vispa2Matrix <- function(path) { |
114 | 243 |
#' options(op) |
115 | 244 |
import_association_file <- function(path, |
116 | 245 |
root = NULL, tp_padding = 4, dates_format = "ymd", |
246 |
+ separator = "\t", |
|
247 |
+ filter_for = NULL, |
|
117 | 248 |
export_widget_path = NULL) { |
118 | 249 |
# Check parameters |
119 | 250 |
stopifnot(is.character(path) & length(path) == 1) |
... | ... |
@@ -125,32 +256,80 @@ import_association_file <- function(path, |
125 | 256 |
stopifnot((is.numeric(tp_padding) | |
126 | 257 |
is.integer(tp_padding)) & length(tp_padding) == 1) |
127 | 258 |
stopifnot(length(dates_format) == 1 & dates_format %in% date_formats()) |
128 |
- |
|
259 |
+ stopifnot(is.character(separator) && length(separator) == 1) |
|
260 |
+ # Check filter |
|
261 |
+ stopifnot(is.null(filter_for) || |
|
262 |
+ (is.list(filter_for) && !is.null(names(filter_for)))) |
|
129 | 263 |
# Read file and check the correctness |
130 |
- af_checks <- .manage_association_file(path, root, tp_padding, dates_format) |
|
264 |
+ af_checks <- .manage_association_file( |
|
265 |
+ path, root, tp_padding, dates_format, |
|
266 |
+ separator, filter_for |
|
267 |
+ ) |
|
131 | 268 |
as_file <- af_checks$af |
269 |
+ parsing_problems <- af_checks$parsing_probs |
|
270 |
+ date_problems <- af_checks$date_probs |
|
132 | 271 |
checks <- af_checks$check |
133 |
- |
|
134 |
- if (!is.null(checks)) { |
|
135 |
- # Checks if the association file and the file system are aligned |
|
272 |
+ col_probs <- NULL |
|
273 |
+ if (!.check_af_correctness(as_file)) { |
|
274 |
+ col_probs[["missing"]] <- association_file_columns()[ |
|
275 |
+ !association_file_columns() %in% colnames(as_file) |
|
276 |
+ ] |
|
277 |
+ } |
|
278 |
+ non_standard <- colnames(as_file)[ |
|
279 |
+ !colnames(as_file) %in% c(association_file_columns(), "Path") |
|
280 |
+ ] |
|
281 |
+ if (!purrr::is_empty(non_standard)) { |
|
282 |
+ col_probs[["non_standard"]] <- non_standard |
|
283 |
+ } |
|
284 |
+ missing_dates <- purrr::map_lgl(date_columns_coll(), function(date_col) { |
|
285 |
+ any(is.na(as_file[[date_col]])) |
|
286 |
+ }) %>% purrr::set_names(date_columns_coll()) |
|
287 |
+ missing_dates <- names(missing_dates)[missing_dates == TRUE] |
|
288 |
+ something_to_report <- any(!is.null(c( |
|
289 |
+ parsing_problems, |
|
290 |
+ date_problems, |
|
291 |
+ checks, |
|
292 |
+ col_probs, |
|
293 |
+ missing_dates |
|
294 |
+ ))) |
|
295 |
+ if (something_to_report) { |
|
296 |
+ summary_report <- .summary_af_import_msg( |
|
297 |
+ pars_prob = parsing_problems, dates_prob = date_problems, |
|
298 |
+ cols_prob = col_probs, crit_na = missing_dates, |
|
299 |
+ checks = ifelse(is.null(checks), |
|
300 |
+ yes = "skipped", |
|
301 |
+ no = ifelse(any(!checks$Found), |
|
302 |
+ "problems detected", |
|
303 |
+ "no problems detected" |
|
304 |
+ ) |
|
305 |
+ ) |
|
306 |
+ ) |
|
136 | 307 |
if (getOption("ISAnalytics.widgets") == TRUE) { |
137 | 308 |
withCallingHandlers( |
138 | 309 |
expr = { |
139 | 310 |
withRestarts( |
140 | 311 |
{ |
141 |
- widg <- .checker_widget(checks) |
|
312 |
+ widg <- .checker_widget( |
|
313 |
+ parsing_problems, |
|
314 |
+ date_problems, |
|
315 |
+ checks, |
|
316 |
+ col_probs, |
|
317 |
+ missing_dates |
|
318 |
+ ) |
|
142 | 319 |
print(widg) |
143 | 320 |
}, |
144 | 321 |
print_err = function() { |
145 |
- message(.widgets_error()) |
|
322 |
+ rlang::inform(.widgets_error()) |
|
146 | 323 |
if (getOption("ISAnalytics.verbose") == TRUE) { |
147 |
- print(checks, n = nrow(checks)) |
|
324 |
+ rlang::inform(summary_report, |
|
325 |
+ class = "summary_report" |
|
326 |
+ ) |
|
148 | 327 |
} |
149 | 328 |
} |
150 | 329 |
) |
151 | 330 |
}, |
152 | 331 |
error = function(cnd) { |
153 |
- message(conditionMessage(cnd)) |
|
332 |
+ rlang::inform(conditionMessage(cnd)) |
|
154 | 333 |
invokeRestart("print_err") |
155 | 334 |
} |
156 | 335 |
) |
... | ... |
@@ -161,7 +340,9 @@ import_association_file <- function(path, |
161 | 340 |
) |
162 | 341 |
} |
163 | 342 |
} else if (getOption("ISAnalytics.verbose") == TRUE) { |
164 |
- print(checks, n = nrow(checks), width = Inf) |
|
343 |
+ rlang::inform(summary_report, |
|
344 |
+ class = "summary_report" |
|
345 |
+ ) |
|
165 | 346 |
} |
166 | 347 |
} |
167 | 348 |
as_file |
... | ... |
@@ -188,38 +369,34 @@ import_association_file <- function(path, |
188 | 369 |
#' if necessary, duplicate files. During the execution, a series of reports is |
189 | 370 |
#' shown in html format. |
190 | 371 |
#' @param association_file A single string containing the path to the |
191 |
-#' association file on disk, or a tibble resulting from the previous call of |
|
372 |
+#' association file on disk, or a data frame resulting from a previous call to |
|
192 | 373 |
#' `import_association_file` |
193 |
-#' @param root A single string containing the path to the root folder containing |
|
194 |
-#' Vispa2 output. Can be NULL if association_file parameter is a tibble |
|
195 | 374 |
#' @param quantification_type A vector of requested quantification_types. Must |
196 | 375 |
#' be one in `quantification_types()` |
197 | 376 |
#' @param matrix_type A single string representing the type of matrices to |
198 | 377 |
#' be imported. Can only be one in `"annotated"` or `"not_annotated"` |
199 |
-#' @param workers A single number representing the number of parallel workers to |
|
200 |
-#' use for the import |
|
201 |
-#' @param tp_padding Timepoint padding, indicates the number of digits of the |
|
202 |
-#' "Timepoint" column once imported. Fills the content with 0s up to the length |
|
203 |
-#' specified (ex: 1 becomes 0001 with a tp_padding of 4) |
|
204 |
-#' @param dates_format A single string indicating how dates should be parsed. |
|
205 |
-#' Must be a value in: \code{date_formats()} |
|
378 |
+#' @param workers A single integer representing the number |
|
379 |
+#' of parallel workers to use for the import |
|
206 | 380 |
#' @param multi_quant_matrix If set to TRUE will produce a |
207 | 381 |
#' multi-quantification matrix (data frame) through `comparison_matrix` |
208 | 382 |
#' instead of a list. |
209 |
-#' @param export_widget_path A path on disk to save produced widgets or NULL |
|
210 |
-#' if the user doesn't wish to save the html file |
|
211 |
-#' @param ... Additional arguments to pass to `comparison_matrix` |
|
383 |
+#' @param export_report_path A path on disk to save produced import report |
|
384 |
+#' or NULL if the user doesn't wish to save the html file |
|
385 |
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Additional named arguments |
|
386 |
+#' to pass to `ìmport_association_file` and `comparison_matrix` |
|
212 | 387 |
#' |
213 |
-#' @seealso \code{\link{comparison_matrix}} |
|
388 |
+#' @seealso \code{\link{comparison_matrix}}, |
|
389 |
+#' \code{\link{import_association_file}} |
|
214 | 390 |
#' |
215 | 391 |
#' @importFrom htmltools browsable tagList |
216 |
-#' @importFrom tibble is_tibble |
|
217 |
-#' @importFrom tidyr unnest |
|
218 |
-#' @importFrom dplyr select |
|
392 |
+#' @importFrom dplyr filter |
|
393 |
+#' @importFrom rlang dots_list inform abort call2 eval_tidy fn_fmls_names |
|
394 |
+#' @importFrom magrittr `%>%` |
|
219 | 395 |
#' @family Import functions |
220 | 396 |
#' |
221 |
-#' @return A named list of tibbles containing data from all imported integration |
|
222 |
-#' matrices, divided by quantification type |
|
397 |
+#' @return A named list of data frames containing data from |
|
398 |
+#' all imported integration |
|
399 |
+#' matrices, divided by quantification type or a multi-quantification matrix |
|
223 | 400 |
#' @export |
224 | 401 |
#' |
225 | 402 |
#' @examples |
... | ... |
@@ -227,28 +404,24 @@ import_association_file <- function(path, |
227 | 404 |
#' # Can't run because it's interactive and requires user input |
228 | 405 |
#' matrices <- import_parallel_Vispa2Matrices_interactive( |
229 | 406 |
#' association_file, |
230 |
-#' root, quantification_type, matrix_type, workers, |
|
231 |
-#' dates_format = "dmy" |
|
407 |
+#' quantification_type, |
|
408 |
+#' matrix_type = "annotated", |
|
409 |
+#' workers = 2, |
|
410 |
+#' multi_quant_matrix = FALSE, |
|
411 |
+#' export_report_path = NULL, |
|
232 | 412 |
#' ) |
233 | 413 |
#' } |
234 | 414 |
import_parallel_Vispa2Matrices_interactive <- function(association_file, |
235 |
- root, |
|
236 | 415 |
quantification_type, |
237 | 416 |
matrix_type = "annotated", |
238 | 417 |
workers = 2, |
239 |
- tp_padding = 4, |
|
240 |
- dates_format = "ymd", |
|
241 |
- multi_quant_matrix = FALSE, |
|
242 |
- export_widget_path = NULL, |
|
418 |
+ multi_quant_matrix = TRUE, |
|
419 |
+ export_report_path = NULL, |
|
243 | 420 |
...) { |
244 | 421 |
# Check parameters |
245 |
- stopifnot(!missing(association_file)) |
|
246 |
- stopifnot(is.character(association_file) | |
|
422 |
+ stopifnot((is.character(association_file) & |
|
423 |
+ length(association_file) == 1) || |
|
247 | 424 |
is.data.frame(association_file)) |
248 |
- stopifnot((is.character(root) && length(root) == 1) | is.null(root)) |
|
249 |
- if (is.character(association_file)) { |
|
250 |
- stopifnot(length(association_file) == 1) |
|
251 |
- } |
|
252 | 425 |
stopifnot(is.numeric(workers) & length(workers) == 1) |
253 | 426 |
workers <- floor(workers) |
254 | 427 |
stopifnot(!missing(quantification_type)) |
... | ... |
@@ -257,18 +430,49 @@ import_parallel_Vispa2Matrices_interactive <- function(association_file, |
257 | 430 |
"annotated", |
258 | 431 |
"not_annotated" |
259 | 432 |
)) |
260 |
- stopifnot((is.numeric(tp_padding) | |
|
261 |
- is.integer(tp_padding)) & length(tp_padding) == 1) |
|
262 |
- stopifnot(length(dates_format) == 1 & dates_format %in% date_formats()) |
|
263 | 433 |
stopifnot(is.logical(multi_quant_matrix) & length(multi_quant_matrix) == 1) |
264 |
- |
|
265 |
- # Manage association file |
|
266 |
- association_file <- .manage_association_file( |
|
267 |
- association_file, root, |
|
268 |
- tp_padding, dates_format |
|
269 |
- ) |
|
270 |
- checker_widg <- association_file[[2]] |
|
271 |
- association_file <- association_file[[1]] |
|
434 |
+ ## Collect dot args |
|
435 |
+ if (is.character(association_file) || isTRUE(multi_quant_matrix)) { |
|
436 |
+ dots_args <- rlang::dots_list(..., .named = TRUE, .homonyms = "first") |
|
437 |
+ if (is.character(association_file)) { |
|
438 |
+ import_af_arg_names <- rlang::fn_fmls_names(import_association_file) |
|
439 |
+ import_af_arg_names <- import_af_arg_names[ |
|
440 |
+ import_af_arg_names != "path" |
|
441 |
+ ] |
|
442 |
+ import_af_args <- dots_args[names(dots_args) %in% |
|
443 |
+ import_af_arg_names] |
|
444 |
+ } |
|
445 |
+ if (isTRUE(multi_quant_matrix)) { |
|
446 |
+ mult_arg_names <- rlang::fn_fmls_names(comparison_matrix) |
|
447 |
+ mult_arg_names <- mult_arg_names[mult_arg_names != "x"] |
|
448 |
+ mult_args <- dots_args[names(dots_args) %in% |
|
449 |
+ mult_arg_names] |
|
450 |
+ } |
|
451 |
+ } |
|
452 |
+ ## Import association file if provided a path |
|
453 |
+ if (is.character(association_file)) { |
|
454 |
+ association_file <- rlang::eval_tidy( |
|
455 |
+ rlang::call2("import_association_file", |
|
456 |
+ path = association_file, |
|
457 |
+ !!!import_af_args |
|
458 |
+ ) |
|
459 |
+ ) |
|
460 |
+ } |
|