... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: ISAnalytics |
2 | 2 |
Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies |
3 |
-Version: 1.3.2 |
|
3 |
+Version: 1.3.3 |
|
4 | 4 |
Date: 2020-07-03 |
5 | 5 |
Authors@R: c( |
6 | 6 |
person(given = "Andrea", |
... | ... |
@@ -25,8 +25,6 @@ Depends: |
25 | 25 |
magrittr |
26 | 26 |
Imports: |
27 | 27 |
utils, |
28 |
- reactable, |
|
29 |
- htmltools, |
|
30 | 28 |
dplyr, |
31 | 29 |
readr, |
32 | 30 |
tidyr, |
... | ... |
@@ -36,20 +34,18 @@ Imports: |
36 | 34 |
BiocParallel, |
37 | 35 |
stringr, |
38 | 36 |
fs, |
39 |
- zip, |
|
40 | 37 |
lubridate, |
41 | 38 |
lifecycle, |
42 | 39 |
ggplot2, |
43 | 40 |
ggrepel, |
44 | 41 |
stats, |
45 |
- upsetjs, |
|
46 | 42 |
psych, |
47 |
- grDevices, |
|
48 | 43 |
data.table, |
49 | 44 |
readxl, |
50 | 45 |
tools, |
51 | 46 |
Rcapture, |
52 |
- plotly |
|
47 |
+ grDevices, |
|
48 |
+ zip |
|
53 | 49 |
Encoding: UTF-8 |
54 | 50 |
LazyData: false |
55 | 51 |
Roxygen: list(markdown = TRUE) |
... | ... |
@@ -59,7 +55,6 @@ Suggests: |
59 | 55 |
covr, |
60 | 56 |
knitr, |
61 | 57 |
BiocStyle, |
62 |
- knitcitations, |
|
63 | 58 |
sessioninfo, |
64 | 59 |
rmarkdown, |
65 | 60 |
roxygen2, |
... | ... |
@@ -69,7 +64,13 @@ Suggests: |
69 | 64 |
ggalluvial, |
70 | 65 |
scales, |
71 | 66 |
gridExtra, |
72 |
- R.utils |
|
67 |
+ R.utils, |
|
68 |
+ RefManageR, |
|
69 |
+ flexdashboard, |
|
70 |
+ DT, |
|
71 |
+ circlize, |
|
72 |
+ plotly, |
|
73 |
+ ggvenn |
|
73 | 74 |
VignetteBuilder: knitr |
74 | 75 |
RdMacros: |
75 | 76 |
lifecycle |
... | ... |
@@ -11,6 +11,7 @@ export(as_sparse_matrix) |
11 | 11 |
export(association_file_columns) |
12 | 12 |
export(available_outlier_tests) |
13 | 13 |
export(blood_lineages_default) |
14 |
+export(circos_genomic_density) |
|
14 | 15 |
export(clinical_relevant_suspicious_genes) |
15 | 16 |
export(comparison_matrix) |
16 | 17 |
export(compute_abundance) |
... | ... |
@@ -20,11 +21,13 @@ export(date_columns_coll) |
20 | 21 |
export(date_formats) |
21 | 22 |
export(default_iss_file_prefixes) |
22 | 23 |
export(default_meta_agg) |
24 |
+export(default_report_path) |
|
23 | 25 |
export(default_stats) |
24 | 26 |
export(generate_Vispa2_launch_AF) |
25 | 27 |
export(generate_blank_association_file) |
26 | 28 |
export(import_Vispa2_stats) |
27 | 29 |
export(import_association_file) |
30 |
+export(import_parallel_Vispa2Matrices) |
|
28 | 31 |
export(import_parallel_Vispa2Matrices_auto) |
29 | 32 |
export(import_parallel_Vispa2Matrices_interactive) |
30 | 33 |
export(import_single_Vispa2Matrix) |
... | ... |
@@ -38,6 +41,7 @@ export(outliers_by_pool_fragments) |
38 | 41 |
export(quantification_types) |
39 | 42 |
export(realign_after_collisions) |
40 | 43 |
export(reduced_AF_columns) |
44 |
+export(refGene_table_cols) |
|
41 | 45 |
export(remove_collisions) |
42 | 46 |
export(sample_statistics) |
43 | 47 |
export(separate_quant_matrices) |
... | ... |
@@ -46,12 +50,8 @@ export(threshold_filter) |
46 | 50 |
export(top_abund_tableGrob) |
47 | 51 |
export(top_integrations) |
48 | 52 |
export(unzip_file_system) |
49 |
-import(BiocParallel) |
|
50 |
-import(dplyr) |
|
51 | 53 |
import(ggplot2) |
52 |
-import(htmltools) |
|
53 | 54 |
import(lifecycle) |
54 |
-import(upsetjs) |
|
55 | 55 |
importFrom(BiocParallel,MulticoreParam) |
56 | 56 |
importFrom(BiocParallel,SnowParam) |
57 | 57 |
importFrom(BiocParallel,bplapply) |
... | ... |
@@ -61,28 +61,48 @@ importFrom(BiocParallel,bptry) |
61 | 61 |
importFrom(Rcapture,closedp.0) |
62 | 62 |
importFrom(Rcapture,closedp.bc) |
63 | 63 |
importFrom(data.table,.SD) |
64 |
+importFrom(data.table,`%chin%`) |
|
64 | 65 |
importFrom(data.table,fread) |
65 | 66 |
importFrom(data.table,melt.data.table) |
66 | 67 |
importFrom(data.table,rbindlist) |
67 | 68 |
importFrom(data.table,setDT) |
69 |
+importFrom(data.table,setnames) |
|
68 | 70 |
importFrom(dplyr,across) |
69 | 71 |
importFrom(dplyr,all_of) |
72 |
+importFrom(dplyr,anti_join) |
|
70 | 73 |
importFrom(dplyr,arrange) |
71 | 74 |
importFrom(dplyr,bind_cols) |
72 | 75 |
importFrom(dplyr,bind_rows) |
76 |
+importFrom(dplyr,contains) |
|
77 |
+importFrom(dplyr,cur_column) |
|
78 |
+importFrom(dplyr,desc) |
|
73 | 79 |
importFrom(dplyr,distinct) |
80 |
+importFrom(dplyr,everything) |
|
74 | 81 |
importFrom(dplyr,filter) |
82 |
+importFrom(dplyr,first) |
|
75 | 83 |
importFrom(dplyr,full_join) |
76 | 84 |
importFrom(dplyr,group_by) |
85 |
+importFrom(dplyr,group_keys) |
|
86 |
+importFrom(dplyr,group_modify) |
|
77 | 87 |
importFrom(dplyr,group_split) |
88 |
+importFrom(dplyr,if_else) |
|
78 | 89 |
importFrom(dplyr,inner_join) |
79 | 90 |
importFrom(dplyr,intersect) |
80 | 91 |
importFrom(dplyr,left_join) |
81 | 92 |
importFrom(dplyr,mutate) |
93 |
+importFrom(dplyr,n) |
|
94 |
+importFrom(dplyr,n_distinct) |
|
95 |
+importFrom(dplyr,pull) |
|
82 | 96 |
importFrom(dplyr,rename) |
97 |
+importFrom(dplyr,rename_with) |
|
83 | 98 |
importFrom(dplyr,select) |
84 | 99 |
importFrom(dplyr,semi_join) |
85 | 100 |
importFrom(dplyr,slice) |
101 |
+importFrom(dplyr,slice_head) |
|
102 |
+importFrom(dplyr,starts_with) |
|
103 |
+importFrom(dplyr,summarise) |
|
104 |
+importFrom(dplyr,transmute) |
|
105 |
+importFrom(dplyr,ungroup) |
|
86 | 106 |
importFrom(fs,as_fs_path) |
87 | 107 |
importFrom(fs,dir_create) |
88 | 108 |
importFrom(fs,dir_exists) |
... | ... |
@@ -92,28 +112,34 @@ importFrom(fs,is_dir) |
92 | 112 |
importFrom(fs,path) |
93 | 113 |
importFrom(fs,path_dir) |
94 | 114 |
importFrom(fs,path_ext) |
115 |
+importFrom(fs,path_ext_set) |
|
116 |
+importFrom(fs,path_home) |
|
95 | 117 |
importFrom(fs,path_wd) |
118 |
+importFrom(ggplot2,aes) |
|
96 | 119 |
importFrom(ggplot2,aes_) |
120 |
+importFrom(ggplot2,element_blank) |
|
121 |
+importFrom(ggplot2,element_text) |
|
122 |
+importFrom(ggplot2,geom_hline) |
|
123 |
+importFrom(ggplot2,geom_point) |
|
124 |
+importFrom(ggplot2,geom_raster) |
|
97 | 125 |
importFrom(ggplot2,geom_text) |
98 | 126 |
importFrom(ggplot2,ggplot) |
127 |
+importFrom(ggplot2,ggplot_build) |
|
128 |
+importFrom(ggplot2,labs) |
|
129 |
+importFrom(ggplot2,rel) |
|
130 |
+importFrom(ggplot2,scale_alpha_continuous) |
|
131 |
+importFrom(ggplot2,scale_fill_gradientn) |
|
99 | 132 |
importFrom(ggplot2,scale_fill_viridis_d) |
133 |
+importFrom(ggplot2,scale_x_continuous) |
|
134 |
+importFrom(ggplot2,scale_y_continuous) |
|
100 | 135 |
importFrom(ggplot2,sym) |
136 |
+importFrom(ggplot2,theme) |
|
137 |
+importFrom(ggplot2,unit) |
|
101 | 138 |
importFrom(ggrepel,geom_label_repel) |
102 |
-importFrom(grDevices,colorRamp) |
|
103 |
-importFrom(grDevices,rgb) |
|
104 |
-importFrom(htmltools,browsable) |
|
105 |
-importFrom(htmltools,div) |
|
106 |
-importFrom(htmltools,h1) |
|
107 |
-importFrom(htmltools,h2) |
|
108 |
-importFrom(htmltools,h3) |
|
109 |
-importFrom(htmltools,h4) |
|
110 |
-importFrom(htmltools,save_html) |
|
111 |
-importFrom(htmltools,tagList) |
|
112 |
-importFrom(htmltools,tags) |
|
139 |
+importFrom(lifecycle,deprecate_warn) |
|
113 | 140 |
importFrom(lubridate,parse_date_time) |
141 |
+importFrom(lubridate,today) |
|
114 | 142 |
importFrom(magrittr,`%>%`) |
115 |
-importFrom(plotly,plot_ly) |
|
116 |
-importFrom(psych,describe) |
|
117 | 143 |
importFrom(purrr,cross_df) |
118 | 144 |
importFrom(purrr,detect_index) |
119 | 145 |
importFrom(purrr,flatten) |
... | ... |
@@ -129,9 +155,11 @@ importFrom(purrr,map2_lgl) |
129 | 155 |
importFrom(purrr,map_chr) |
130 | 156 |
importFrom(purrr,map_dbl) |
131 | 157 |
importFrom(purrr,map_dfr) |
158 |
+importFrom(purrr,map_int) |
|
132 | 159 |
importFrom(purrr,map_lgl) |
133 | 160 |
importFrom(purrr,pmap) |
134 | 161 |
importFrom(purrr,pmap_chr) |
162 |
+importFrom(purrr,pmap_dbl) |
|
135 | 163 |
importFrom(purrr,pmap_df) |
136 | 164 |
importFrom(purrr,pmap_dfr) |
137 | 165 |
importFrom(purrr,pmap_lgl) |
... | ... |
@@ -139,22 +167,18 @@ importFrom(purrr,reduce) |
139 | 167 |
importFrom(purrr,set_names) |
140 | 168 |
importFrom(purrr,walk) |
141 | 169 |
importFrom(purrr,walk2) |
142 |
-importFrom(reactable,colDef) |
|
143 |
-importFrom(reactable,colFormat) |
|
144 |
-importFrom(reactable,reactable) |
|
145 |
-importFrom(reactable,reactableTheme) |
|
146 | 170 |
importFrom(readr,cols) |
147 |
-importFrom(readr,parse_factor) |
|
148 | 171 |
importFrom(readr,problems) |
149 | 172 |
importFrom(readr,read_delim) |
150 | 173 |
importFrom(readr,write_tsv) |
151 |
-importFrom(readxl,read_excel) |
|
152 | 174 |
importFrom(rlang,.data) |
175 |
+importFrom(rlang,`!!!`) |
|
153 | 176 |
importFrom(rlang,`:=`) |
154 | 177 |
importFrom(rlang,abort) |
155 | 178 |
importFrom(rlang,arg_match) |
156 | 179 |
importFrom(rlang,as_function) |
157 | 180 |
importFrom(rlang,call2) |
181 |
+importFrom(rlang,current_env) |
|
158 | 182 |
importFrom(rlang,dots_list) |
159 | 183 |
importFrom(rlang,enexpr) |
160 | 184 |
importFrom(rlang,env_bind) |
... | ... |
@@ -167,11 +191,9 @@ importFrom(rlang,is_formula) |
167 | 191 |
importFrom(rlang,is_function) |
168 | 192 |
importFrom(rlang,list2) |
169 | 193 |
importFrom(rlang,parse_expr) |
194 |
+importFrom(rlang,sym) |
|
170 | 195 |
importFrom(stats,dt) |
171 |
-importFrom(stats,median) |
|
172 | 196 |
importFrom(stats,na.omit) |
173 |
-importFrom(stats,p.adjust) |
|
174 |
-importFrom(stats,pt) |
|
175 | 197 |
importFrom(stats,setNames) |
176 | 198 |
importFrom(stats,shapiro.test) |
177 | 199 |
importFrom(stringr,str_detect) |
... | ... |
@@ -179,17 +201,14 @@ importFrom(stringr,str_pad) |
179 | 201 |
importFrom(stringr,str_replace) |
180 | 202 |
importFrom(stringr,str_replace_all) |
181 | 203 |
importFrom(stringr,str_split) |
204 |
+importFrom(stringr,str_to_lower) |
|
182 | 205 |
importFrom(stringr,str_to_upper) |
206 |
+importFrom(tibble,add_case) |
|
183 | 207 |
importFrom(tibble,add_column) |
184 |
-importFrom(tibble,add_row) |
|
185 | 208 |
importFrom(tibble,as_tibble) |
186 | 209 |
importFrom(tibble,as_tibble_col) |
187 |
-importFrom(tibble,as_tibble_row) |
|
188 |
-importFrom(tibble,is_tibble) |
|
189 | 210 |
importFrom(tibble,tibble) |
190 |
-importFrom(tibble,tibble_row) |
|
191 | 211 |
importFrom(tibble,tribble) |
192 |
-importFrom(tidyr,everything) |
|
193 | 212 |
importFrom(tidyr,nest) |
194 | 213 |
importFrom(tidyr,pivot_longer) |
195 | 214 |
importFrom(tidyr,pivot_wider) |
... | ... |
@@ -2,6 +2,45 @@ |
2 | 2 |
title: "NEWS" |
3 | 3 |
output: github_document |
4 | 4 |
--- |
5 |
+# ISAnalytics 1.3.3 (TBD) |
|
6 |
+ |
|
7 |
+## MAJOR CHANGES |
|
8 |
+* Completely reworked interactive HTML report system, for details take |
|
9 |
+a look at the new vignette `vignette("report_system", package = "ISAnalytics")` |
|
10 |
+* Old `ISAnalytics.widgets` option has been replaced by `ISAnalytics.reports` |
|
11 |
+* In `remove_collisions()`, removed arguments `seq_count_col`, |
|
12 |
+`max_rows_reports` and `save_widget_path`, added arguments `quant_cols` |
|
13 |
+and `report_path` (see documentation for details) |
|
14 |
+ |
|
15 |
+## MINOR CHANGES |
|
16 |
+* `import_single_Vispa2Matrix()` now allows keeping additional non-standard |
|
17 |
+columns |
|
18 |
+* `compute_near_integrations()` is now faster on bigger data sets |
|
19 |
+* Changed default values for arguments `columns` and `key` in |
|
20 |
+`compute_abundance()` |
|
21 |
+* `compute_near_integrations()` now produces only re-calibration map in *.tsv |
|
22 |
+format |
|
23 |
+* `CIS_grubbs()` now supports calculations for each group specified in argument |
|
24 |
+`by` |
|
25 |
+* In `sample_statistics()` now there is the option to include the calculation |
|
26 |
+of distinct integration sites for each group (if mandatory vars are present) |
|
27 |
+ |
|
28 |
+## NEW FUNCTIONALITY |
|
29 |
+* Added new plotting function `circos_genomic_density()` |
|
30 |
+ |
|
31 |
+## FIXES |
|
32 |
+* Fixed minor issue with NA values in alluvial plots |
|
33 |
+ |
|
34 |
+## DEPRECATIONS |
|
35 |
+* `import_parallel_Vispa2Matrices_interactive()` and |
|
36 |
+`import_parallel_Vispa2Matrices_auto()` are officially deprecated in favor |
|
37 |
+of `import_parallel_Vispa2Matrices()` |
|
38 |
+ |
|
39 |
+## OTHER |
|
40 |
+* The package has now a more complete and functional example data set |
|
41 |
+for executable examples |
|
42 |
+* Reworked documentation |
|
43 |
+ |
|
5 | 44 |
# ISAnalytics 1.3.2 (2021-06-28) |
6 | 45 |
|
7 | 46 |
## FIXES |
... | ... |
@@ -1,6 +1,54 @@ |
1 | 1 |
NEWS |
2 | 2 |
================ |
3 | 3 |
|
4 |
+# ISAnalytics 1.3.3 (TBD) |
|
5 |
+ |
|
6 |
+## MAJOR CHANGES |
|
7 |
+ |
|
8 |
+- Completely reworked interactive HTML report system, for details take |
|
9 |
+ a look at the new vignette |
|
10 |
+ `vignette("report_system", package = "ISAnalytics")` |
|
11 |
+- Old `ISAnalytics.widgets` option has been replaced by |
|
12 |
+ `ISAnalytics.reports` |
|
13 |
+- In `remove_collisions()`, removed arguments `seq_count_col`, |
|
14 |
+ `max_rows_reports` and `save_widget_path`, added arguments |
|
15 |
+ `quant_cols` and `report_path` (see documentation for details) |
|
16 |
+ |
|
17 |
+## MINOR CHANGES |
|
18 |
+ |
|
19 |
+- `import_single_Vispa2Matrix()` now allows keeping additional |
|
20 |
+ non-standard columns |
|
21 |
+- `compute_near_integrations()` is now faster on bigger data sets |
|
22 |
+- Changed default values for arguments `columns` and `key` in |
|
23 |
+ `compute_abundance()` |
|
24 |
+- `compute_near_integrations()` now produces only re-calibration map |
|
25 |
+ in \*.tsv format |
|
26 |
+- `CIS_grubbs()` now supports calculations for each group specified in |
|
27 |
+ argument `by` |
|
28 |
+- In `sample_statistics()` now there is the option to include the |
|
29 |
+ calculation of distinct integration sites for each group (if |
|
30 |
+ mandatory vars are present) |
|
31 |
+ |
|
32 |
+## NEW FUNCTIONALITY |
|
33 |
+ |
|
34 |
+- Added new plotting function `circos_genomic_density()` |
|
35 |
+ |
|
36 |
+## FIXES |
|
37 |
+ |
|
38 |
+- Fixed minor issue with NA values in alluvial plots |
|
39 |
+ |
|
40 |
+## DEPRECATIONS |
|
41 |
+ |
|
42 |
+- `import_parallel_Vispa2Matrices_interactive()` and |
|
43 |
+ `import_parallel_Vispa2Matrices_auto()` are officially deprecated in |
|
44 |
+ favor of `import_parallel_Vispa2Matrices()` |
|
45 |
+ |
|
46 |
+## OTHER |
|
47 |
+ |
|
48 |
+- The package has now a more complete and functional example data set |
|
49 |
+ for executable examples |
|
50 |
+- Reworked documentation |
|
51 |
+ |
|
4 | 52 |
# ISAnalytics 1.3.2 (2021-06-28) |
5 | 53 |
|
6 | 54 |
## FIXES |
7 | 55 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,15 @@ |
1 |
+ |
|
2 |
+#' @title Deprecated functions in package \pkg{ISAnalytics}. |
|
3 |
+#' @description These functions are provided for compatibility |
|
4 |
+#' with older versions of \sQuote{ISAnalytics} only, |
|
5 |
+#' and will be defunct at the next release. |
|
6 |
+#' @name ISAnalytics-deprecated |
|
7 |
+#' @details The following functions are deprecated and will be made defunct; |
|
8 |
+#' use the replacement indicated below: |
|
9 |
+#' |
|
10 |
+#' * import_parallel_Vispa2Matrices_auto: |
|
11 |
+#' \code{\link{import_parallel_Vispa2Matrices}} |
|
12 |
+#' * import_parallel_Vispa2Matrices_interactive: |
|
13 |
+#' \code{\link{import_parallel_Vispa2Matrices}} |
|
14 |
+#' @keywords internal |
|
15 |
+NULL |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
#' identified from genomics next generation sequencing reads for |
3 | 3 |
#' clonal tracking studies |
4 | 4 |
#' |
5 |
-#' @description \lifecycle{maturing} |
|
5 |
+#' @description \lifecycle{stable} |
|
6 | 6 |
#' In gene therapy, stem cells are modified using viral |
7 | 7 |
#' vectors to deliver the therapeutic transgene and replace functional |
8 | 8 |
#' properties since the genetic modification is stable and inherited in |
... | ... |
@@ -27,8 +27,7 @@ |
27 | 27 |
#' * \code{\link{import_single_Vispa2Matrix}} |
28 | 28 |
#' * \code{\link{import_association_file}} |
29 | 29 |
#' * \code{\link{import_Vispa2_stats}} |
30 |
-#' * \code{\link{import_parallel_Vispa2Matrices_interactive}} |
|
31 |
-#' * \code{\link{import_parallel_Vispa2Matrices_auto}} |
|
30 |
+#' * \code{\link{import_parallel_Vispa2Matrices}} |
|
32 | 31 |
#' * Aggregation functions: |
33 | 32 |
#' * \code{\link{aggregate_metadata}} |
34 | 33 |
#' * \code{\link{aggregate_values_by_key}} |
... | ... |
@@ -58,6 +57,7 @@ |
58 | 57 |
#' * \code{\link{sharing_heatmap}} |
59 | 58 |
#' * \code{\link{integration_alluvial_plot}} |
60 | 59 |
#' * \code{\link{top_abund_tableGrob}} |
60 |
+#' * \code{\link{circos_genomic_density}} |
|
61 | 61 |
#' * Utility functions: |
62 | 62 |
#' * \code{\link{generate_blank_association_file}} |
63 | 63 |
#' * \code{\link{generate_Vispa2_launch_AF}} |
... | ... |
@@ -65,16 +65,17 @@ |
65 | 65 |
#' * \code{\link{as_sparse_matrix}} |
66 | 66 |
#' |
67 | 67 |
#' @section Vignettes: |
68 |
-#' * \code{vignette("How to use import functions", package = "ISAnalytics")} |
|
69 |
-#' * \code{vignette("Collision removal functionality", |
|
68 |
+#' * \code{vignette("how_to_import_functions", package = "ISAnalytics")} |
|
69 |
+#' * \code{vignette("collision_removal", |
|
70 | 70 |
#' package = "ISAnalytics")} |
71 |
-#' * \code{vignette("Working with aggregate functions", |
|
71 |
+#' * \code{vignette("aggregate_function_usage", |
|
72 | 72 |
#' package = "ISAnalytics")} |
73 |
-#' * \code{vignette("Using ISAnalytics without RStudio support", |
|
73 |
+#' * \code{vignette("report_system", |
|
74 | 74 |
#' package = "ISAnalytics")} |
75 | 75 |
#' |
76 | 76 |
#' @docType package |
77 | 77 |
#' @name ISAnalytics |
78 |
+#' @keywords internal |
|
78 | 79 |
NULL |
79 | 80 |
|
80 | 81 |
## usethis namespace: start |
... | ... |
@@ -1,17 +1,17 @@ |
1 | 1 |
#------------------------------------------------------------------------------# |
2 | 2 |
# Aggregate functions |
3 | 3 |
#------------------------------------------------------------------------------# |
4 |
- |
|
5 | 4 |
#' Performs aggregation on metadata contained in the association file. |
6 | 5 |
#' |
7 |
-#' \lifecycle{maturing} |
|
6 |
+#' \lifecycle{stable} |
|
8 | 7 |
#' Groups metadata by the specified grouping keys and returns a |
9 | 8 |
#' summary of info for each group. For more details on how to use this function: |
10 |
-#' \code{vignette("Working with aggregate functions", package = "ISAnalytics")} |
|
9 |
+#' \code{vignette("aggregate_function_usage", package = "ISAnalytics")} |
|
11 | 10 |
#' |
12 | 11 |
#' @param association_file The imported association file |
13 |
-#' (via link{import_association_file}) |
|
14 |
-#' @param grouping_keys A character vector of column names to form a group |
|
12 |
+#' (via \link{import_association_file}) |
|
13 |
+#' @param grouping_keys A character vector of column names to form a grouping |
|
14 |
+#' operation |
|
15 | 15 |
#' @param aggregating_functions A data frame containing specifications |
16 | 16 |
#' of the functions to be applied to columns in the association file during |
17 | 17 |
#' aggregation. It defaults to \link{default_meta_agg}. The structure of |
... | ... |
@@ -29,17 +29,11 @@ |
29 | 29 |
#' @export |
30 | 30 |
#' |
31 | 31 |
#' @examples |
32 |
-#' op <- options("ISAnalytics.widgets" = FALSE, "ISAnalytics.verbose" = FALSE) |
|
33 |
-#' path_AF <- system.file("extdata", "ex_association_file.tsv", |
|
34 |
-#' package = "ISAnalytics" |
|
35 |
-#' ) |
|
36 |
-#' root_correct <- system.file("extdata", "fs.zip", package = "ISAnalytics") |
|
37 |
-#' root_correct <- unzip_file_system(root_correct, "fs") |
|
38 |
-#' association_file <- import_association_file(path_AF, root_correct, |
|
39 |
-#' dates_format = "dmy" |
|
32 |
+#' data("association_file", package = "ISAnalytics") |
|
33 |
+#' aggreg_meta <- aggregate_metadata( |
|
34 |
+#' association_file = association_file |
|
40 | 35 |
#' ) |
41 |
-#' aggregated_meta <- aggregate_metadata(association_file) |
|
42 |
-#' options(op) |
|
36 |
+#' head(aggreg_meta) |
|
43 | 37 |
aggregate_metadata <- function(association_file, |
44 | 38 |
grouping_keys = c( |
45 | 39 |
"SubjectID", |
... | ... |
@@ -78,11 +72,12 @@ aggregate_metadata <- function(association_file, |
78 | 72 |
function_tbl = aggregating_functions |
79 | 73 |
) |
80 | 74 |
if (is.null(aggregated)) { |
81 |
- rlang::inform(paste( |
|
75 |
+ msg <- paste( |
|
82 | 76 |
"No columns in `aggregating_functions$Column`", |
83 | 77 |
"was found in column names of the association", |
84 | 78 |
"file. Nothing to return." |
85 |
- )) |
|
79 |
+ ) |
|
80 |
+ rlang::inform(msg) |
|
86 | 81 |
} |
87 | 82 |
aggregated |
88 | 83 |
} |
... | ... |
@@ -145,11 +140,11 @@ default_meta_agg <- function() { |
145 | 140 |
|
146 | 141 |
#' Aggregates matrices values based on specified key. |
147 | 142 |
#' |
148 |
-#' \lifecycle{maturing} |
|
143 |
+#' \lifecycle{stable} |
|
149 | 144 |
#' Performs aggregation on values contained in the integration matrices based |
150 | 145 |
#' on the key and the specified lambda. For more details on how to use this |
151 | 146 |
#' function: |
152 |
-#' \code{vignette("Working with aggregate functions", package = "ISAnalytics")} |
|
147 |
+#' \code{vignette("aggregate_function_usage", package = "ISAnalytics")} |
|
153 | 148 |
#' |
154 | 149 |
#' @details |
155 | 150 |
#' ## Setting the lambda parameter |
... | ... |
@@ -190,8 +185,8 @@ default_meta_agg <- function() { |
190 | 185 |
#' * Function should return a single value or a list/data frame: |
191 | 186 |
#' if a list or a data frame is returned as a result, all the columns |
192 | 187 |
#' will be added to the final data frame. |
193 |
-#' @param x A single integration matrix (tibble) or a list of imported |
|
194 |
-#' integration matrices (tibble) |
|
188 |
+#' @param x A single integration matrix or a list of imported |
|
189 |
+#' integration matrices |
|
195 | 190 |
#' @param association_file The imported association file |
196 | 191 |
#' @param value_cols A character vector containing the names of the |
197 | 192 |
#' columns to apply the given lambdas. Must be numeric or integer |
... | ... |
@@ -215,26 +210,14 @@ default_meta_agg <- function() { |
215 | 210 |
#' @export |
216 | 211 |
#' |
217 | 212 |
#' @examples |
218 |
-#' op <- options("ISAnalytics.widgets" = FALSE, "ISAnalytics.verbose" = FALSE) |
|
219 |
-#' path_AF <- system.file("extdata", "ex_association_file.tsv", |
|
220 |
-#' package = "ISAnalytics" |
|
221 |
-#' ) |
|
222 |
-#' root_correct <- system.file("extdata", "fs.zip", package = "ISAnalytics") |
|
223 |
-#' root_correct <- unzip_file_system(root_correct, "fs") |
|
224 |
-#' association_file <- import_association_file(path_AF, root_correct, |
|
225 |
-#' dates_format = "dmy" |
|
226 |
-#' ) |
|
227 |
-#' matrices <- import_parallel_Vispa2Matrices_auto( |
|
228 |
-#' association_file = association_file, root = NULL, |
|
229 |
-#' quantification_type = c("fragmentEstimate", "seqCount"), |
|
230 |
-#' matrix_type = "annotated", workers = 2, matching_opt = "ANY" |
|
231 |
-#' ) |
|
232 |
-#' agg <- aggregate_values_by_key( |
|
233 |
-#' x = matrices, |
|
213 |
+#' data("integration_matrices", package = "ISAnalytics") |
|
214 |
+#' data("association_file", package = "ISAnalytics") |
|
215 |
+#' aggreg <- aggregate_values_by_key( |
|
216 |
+#' x = integration_matrices, |
|
234 | 217 |
#' association_file = association_file, |
235 |
-#' value_cols = c("fragmentEstimate", "seqCount") |
|
218 |
+#' value_cols = c("seqCount", "fragmentEstimate") |
|
236 | 219 |
#' ) |
237 |
-#' options(op) |
|
220 |
+#' head(aggreg) |
|
238 | 221 |
aggregate_values_by_key <- function(x, |
239 | 222 |
association_file, |
240 | 223 |
value_cols = "Value", |
... | ... |
@@ -255,7 +238,7 @@ aggregate_values_by_key <- function(x, |
255 | 238 |
purrr::walk(x, function(df) { |
256 | 239 |
stopifnot(is.data.frame(df)) |
257 | 240 |
if (.check_mandatory_vars(df) == FALSE) { |
258 |
- rlang::abort(.non_ISM_error()) |
|
241 |
+ rlang::abort(.missing_mand_vars()) |
|
259 | 242 |
} |
260 | 243 |
if (!all(join_af_by %in% colnames(df))) { |
261 | 244 |
rlang::abort(c( |
... | ... |
@@ -293,7 +276,7 @@ aggregate_values_by_key <- function(x, |
293 | 276 |
}) |
294 | 277 |
} else { |
295 | 278 |
if (.check_mandatory_vars(x) == FALSE) { |
296 |
- rlang::abort(.non_ISM_error()) |
|
279 |
+ rlang::abort(.missing_mand_vars()) |
|
297 | 280 |
} |
298 | 281 |
if (!all(join_af_by %in% colnames(x))) { |
299 | 282 |
rlang::abort(c( |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
|
5 | 5 |
#' Computes the abundance for every integration event in the input data frame. |
6 | 6 |
#' |
7 |
-#' \lifecycle{maturing} |
|
7 |
+#' \lifecycle{stable} |
|
8 | 8 |
#' Abundance is obtained for every integration event by calculating the ratio |
9 | 9 |
#' between the single value and the total value for the given group. |
10 | 10 |
#' |
... | ... |
@@ -30,8 +30,9 @@ |
30 | 30 |
#' @family Analysis functions |
31 | 31 |
#' |
32 | 32 |
#' @importFrom magrittr `%>%` |
33 |
-#' @import dplyr |
|
34 |
-#' @importFrom rlang .data eval_tidy parse_expr |
|
33 |
+#' @importFrom dplyr group_by across all_of summarise left_join mutate |
|
34 |
+#' @importFrom dplyr cur_column distinct select contains rename_with |
|
35 |
+#' @importFrom rlang .data eval_tidy parse_expr abort |
|
35 | 36 |
#' @importFrom purrr map_lgl |
36 | 37 |
#' @importFrom stringr str_replace |
37 | 38 |
#' @return Either a single data frame with computed abundance values or |
... | ... |
@@ -39,29 +40,24 @@ |
39 | 40 |
#' @export |
40 | 41 |
#' |
41 | 42 |
#' @examples |
42 |
-#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz", |
|
43 |
-#' package = "ISAnalytics" |
|
43 |
+#' data("integration_matrices", package = "ISAnalytics") |
|
44 |
+#' abund <- compute_abundance( |
|
45 |
+#' x = integration_matrices, |
|
46 |
+#' columns = "fragmentEstimate", |
|
47 |
+#' key = "CompleteAmplificationID" |
|
44 | 48 |
#' ) |
45 |
-#' matrix <- import_single_Vispa2Matrix(path) |
|
46 |
-#' |
|
47 |
-#' # Simple integration matrix - grouping by CompleteAmplificationID |
|
48 |
-#' abundance1 <- compute_abundance(matrix) |
|
49 |
-#' abundance1 |
|
50 |
-#' |
|
51 |
-#' # Keeping totals as a separate data frame |
|
52 |
-#' abundance2 <- compute_abundance(matrix, keep_totals = "df") |
|
53 |
-#' abundance2 |
|
49 |
+#' head(abund) |
|
54 | 50 |
compute_abundance <- function(x, |
55 |
- columns = "Value", |
|
51 |
+ columns = c("fragmentEstimate_sum"), |
|
56 | 52 |
percentage = TRUE, |
57 |
- key = "CompleteAmplificationID", |
|
53 |
+ key = c("SubjectID", "CellMarker", "Tissue", "TimePoint"), |
|
58 | 54 |
keep_totals = FALSE) { |
59 | 55 |
## Check parameters |
60 | 56 |
stopifnot(is.data.frame(x)) |
61 | 57 |
stopifnot(is.character(columns)) |
62 | 58 |
stopifnot(is.character(key)) |
63 | 59 |
if (.check_mandatory_vars(x) == FALSE) { |
64 |
- stop(.non_ISM_error()) |
|
60 |
+ rlang::abort(.missing_mand_vars()) |
|
65 | 61 |
} |
66 | 62 |
stopifnot(is.logical(percentage) & length(percentage) == 1) |
67 | 63 |
if (!all(columns %in% colnames(x)) | !all(key %in% colnames(x))) { |
... | ... |
@@ -81,7 +77,7 @@ compute_abundance <- function(x, |
81 | 77 |
} |
82 | 78 |
}) |
83 | 79 |
if (any(non_num_cols)) { |
84 |
- stop(.non_num_user_cols_error(columns[non_num_cols])) |
|
80 |
+ rlang::abort(.non_num_user_cols_error(columns[non_num_cols])) |
|
85 | 81 |
} |
86 | 82 |
stopifnot(is.logical(keep_totals) || keep_totals == "df") |
87 | 83 |
## Computation |
... | ... |
@@ -139,8 +135,8 @@ compute_abundance <- function(x, |
139 | 135 |
#' obtain a single integration matrix from individual quantification |
140 | 136 |
#' matrices. |
141 | 137 |
#' |
142 |
-#' \lifecycle{maturing} |
|
143 |
-#' Takes a list of integration matrices referring to different qunatification |
|
138 |
+#' \lifecycle{stable} |
|
139 |
+#' Takes a list of integration matrices referring to different quantification |
|
144 | 140 |
#' types and merges them in a single data frame that has multiple |
145 | 141 |
#' value columns, each renamed according to their quantification type |
146 | 142 |
#' of reference. |
... | ... |
@@ -171,21 +167,22 @@ compute_abundance <- function(x, |
171 | 167 |
#' @export |
172 | 168 |
#' |
173 | 169 |
#' @examples |
174 |
-#' op <- options("ISAnalytics.widgets" = FALSE) |
|
175 |
-#' path <- system.file("extdata", "ex_association_file.tsv", |
|
170 |
+#' fs_path <- system.file("extdata", "fs.zip", package = "ISAnalytics") |
|
171 |
+#' fs <- unzip_file_system(fs_path, "fs") |
|
172 |
+#' af_path <- system.file("extdata", "asso.file.tsv.gz", |
|
176 | 173 |
#' package = "ISAnalytics" |
177 | 174 |
#' ) |
178 |
-#' root_pth <- system.file("extdata", "fs.zip", package = "ISAnalytics") |
|
179 |
-#' root <- unzip_file_system(root_pth, "fs") |
|
180 |
-#' matrices <- import_parallel_Vispa2Matrices_auto( |
|
181 |
-#' association_file = path, root = root, |
|
182 |
-#' quantification_type = c("fragmentEstimate", "seqCount"), |
|
183 |
-#' matrix_type = "annotated", workers = 2, patterns = NULL, |
|
184 |
-#' matching_opt = "ANY", |
|
185 |
-#' dates_format = "dmy", multi_quant_matrix = FALSE |
|
175 |
+#' af <- import_association_file(af_path, |
|
176 |
+#' root = fs, |
|
177 |
+#' import_iss = FALSE, |
|
178 |
+#' report_path = NULL |
|
186 | 179 |
#' ) |
187 |
-#' total_matrix <- comparison_matrix(matrices) |
|
188 |
-#' options(op) |
|
180 |
+#' matrices <- import_parallel_Vispa2Matrices(af, |
|
181 |
+#' c("seqCount", "fragmentEstimate"), |
|
182 |
+#' mode = "AUTO", report_path = NULL, multi_quant_matrix = FALSE |
|
183 |
+#' ) |
|
184 |
+#' multi_quant <- comparison_matrix(matrices) |
|
185 |
+#' head(multi_quant) |
|
189 | 186 |
comparison_matrix <- function(x, |
190 | 187 |
fragmentEstimate = "fragmentEstimate", |
191 | 188 |
seqCount = "seqCount", |
... | ... |
@@ -226,7 +223,7 @@ comparison_matrix <- function(x, |
226 | 223 |
#' Separate a multiple-quantification matrix into single quantification |
227 | 224 |
#' matrices. |
228 | 225 |
#' |
229 |
-#' \lifecycle{maturing} |
|
226 |
+#' \lifecycle{stable} |
|
230 | 227 |
#' The function separates a single multi-quantification integration |
231 | 228 |
#' matrix, obtained via \link{comparison_matrix}, into single |
232 | 229 |
#' quantification matrices as a named list of tibbles. |
... | ... |
@@ -256,24 +253,11 @@ comparison_matrix <- function(x, |
256 | 253 |
#' @export |
257 | 254 |
#' |
258 | 255 |
#' @examples |
259 |
-#' op <- options("ISAnalytics.widgets" = FALSE) |
|
260 |
-#' path <- system.file("extdata", "ex_association_file.tsv", |
|
261 |
-#' package = "ISAnalytics" |
|
256 |
+#' data("integration_matrices", package = "ISAnalytics") |
|
257 |
+#' separated <- separate_quant_matrices( |
|
258 |
+#' integration_matrices |
|
262 | 259 |
#' ) |
263 |
-#' root_pth <- system.file("extdata", "fs.zip", package = "ISAnalytics") |
|
264 |
-#' root <- unzip_file_system(root_pth, "fs") |
|
265 |
-#' association_file <- import_association_file( |
|
266 |
-#' path = path, root = root, |
|
267 |
-#' dates_format = "dmy" |
|
268 |
-#' ) |
|
269 |
-#' matrices <- import_parallel_Vispa2Matrices_auto( |
|
270 |
-#' association_file = association_file, |
|
271 |
-#' quantification_type = c("seqCount", "fragmentEstimate"), |
|
272 |
-#' matrix_type = "annotated", workers = 2, patterns = NULL, |
|
273 |
-#' matching_opt = "ANY" |
|
274 |
-#' ) |
|
275 |
-#' separated_matrix <- separate_quant_matrices(matrices) |
|
276 |
-#' options(op) |
|
260 |
+#' separated |
|
277 | 261 |
separate_quant_matrices <- function(x, |
278 | 262 |
fragmentEstimate = "fragmentEstimate", |
279 | 263 |
seqCount = "seqCount", |
... | ... |
@@ -315,7 +299,9 @@ separate_quant_matrices <- function(x, |
315 | 299 |
rlang::inform(.non_quant_cols_msg(to_copy)) |
316 | 300 |
} |
317 | 301 |
separated <- purrr::map(num_cols, function(quant) { |
318 |
- x[c(key, to_copy, quant)] %>% dplyr::rename(Value = quant) |
|
302 |
+ x %>% |
|
303 |
+ dplyr::select(dplyr::all_of(c(key, to_copy, quant))) %>% |
|
304 |
+ dplyr::rename(Value = quant) |
|
319 | 305 |
}) %>% purrr::set_names(names(num_cols)) |
320 | 306 |
separated |
321 | 307 |
} |
... | ... |
@@ -388,12 +374,12 @@ separate_quant_matrices <- function(x, |
388 | 374 |
#' print(example_list) |
389 | 375 |
#' |
390 | 376 |
#' filtered <- threshold_filter(example_list, |
391 |
-#' threshold = list(first = c(20, 60), |
|
392 |
-#' third = c(25)), |
|
393 |
-#' cols_to_compare = list(first = c("a", "b"), |
|
394 |
-#' third = c("a")), |
|
395 |
-#' comparators = list(first = c(">", "<"), |
|
396 |
-#' third = c(">="))) |
|
377 |
+#' threshold = list(first = c(20, 60), |
|
378 |
+#' third = c(25)), |
|
379 |
+#' cols_to_compare = list(first = c("a", "b"), |
|
380 |
+#' third = c("a")), |
|
381 |
+#' comparators = list(first = c(">", "<"), |
|
382 |
+#' third = c(">="))) |
|
397 | 383 |
#' print(filtered) |
398 | 384 |
#' |
399 | 385 |
#' ``` |
... | ... |
@@ -411,11 +397,11 @@ separate_quant_matrices <- function(x, |
411 | 397 |
#' |
412 | 398 |
#' ```r |
413 | 399 |
#' filtered <- threshold_filter(example_list, |
414 |
-#' threshold = list(first = c(20, 60), |
|
415 |
-#' third = c(25, 65)), |
|
416 |
-#' cols_to_compare = c("a", "b"), |
|
417 |
-#' comparators = list(first = c(">", "<"), |
|
418 |
-#' third = c(">=", "<="))) |
|
400 |
+#' threshold = list(first = c(20, 60), |
|
401 |
+#' third = c(25, 65)), |
|
402 |
+#' cols_to_compare = c("a", "b"), |
|
403 |
+#' comparators = list(first = c(">", "<"), |
|
404 |
+#' third = c(">=", "<="))) |
|
419 | 405 |
#' ``` |
420 | 406 |
#' In this example, different threshold and comparators will be applied |
421 | 407 |
#' to the same columns in all data frames. |
... | ... |
@@ -468,6 +454,7 @@ separate_quant_matrices <- function(x, |
468 | 454 |
#' third = c(">=") |
469 | 455 |
#' ) |
470 | 456 |
#' ) |
457 |
+#' filtered |
|
471 | 458 |
threshold_filter <- function(x, |
472 | 459 |
threshold, |
473 | 460 |
cols_to_compare = "Value", |
... | ... |
@@ -510,7 +497,6 @@ threshold_filter <- function(x, |
510 | 497 |
#' |
511 | 498 |
#' @family Analysis functions |
512 | 499 |
#' |
513 |
-#' @import dplyr |
|
514 | 500 |
#' @importFrom magrittr `%>%` |
515 | 501 |
#' @importFrom rlang abort |
516 | 502 |
#' |
... | ... |
@@ -539,16 +525,18 @@ threshold_filter <- function(x, |
539 | 525 |
#' keep = "Value2", |
540 | 526 |
#' key = "CompleteAmplificationID" |
541 | 527 |
#' ) |
542 |
-top_integrations <- function(x, n = 50, |
|
528 |
+top_integrations <- function(x, |
|
529 |
+ n = 20, |
|
543 | 530 |
columns = "fragmentEstimate_sum_RelAbundance", |
544 |
- keep = "everything", key = NULL) { |
|
531 |
+ keep = "everything", |
|
532 |
+ key = NULL) { |
|
545 | 533 |
stopifnot(is.data.frame(x)) |
546 | 534 |
stopifnot(is.numeric(n) & length(n) == 1 & n > 0) |
547 | 535 |
stopifnot(is.character(keep)) |
548 | 536 |
stopifnot(is.character(columns)) |
549 | 537 |
stopifnot(is.null(key) || is.character(key)) |
550 | 538 |
if (!.check_mandatory_vars(x)) { |
551 |
- rlang::abort(.non_ISM_error()) |
|
539 |
+ rlang::abort(.missing_mand_vars()) |
|
552 | 540 |
} |
553 | 541 |
if (!all(columns %in% colnames(x))) { |
554 | 542 |
rlang::abort(.missing_user_cols_error( |
... | ... |
@@ -616,28 +604,18 @@ top_integrations <- function(x, n = 50, |
616 | 604 |
#' For example: |
617 | 605 |
#' |
618 | 606 |
#' ```r |
619 |
-#' ### Importing association file and matrices |
|
620 |
-#' path_AF <- system.file("extdata", "ex_association_file.tsv", |
|
621 |
-#' package = "ISAnalytics") |
|
622 |
-#' root_correct <- system.file("extdata", "fs.zip", |
|
623 |
-#' package = "ISAnalytics") |
|
624 |
-#' root_correct <- unzip_file_system(root_correct, "fs") |
|
625 |
-#' |
|
626 |
-#' association_file <- import_association_file(path_AF, root_correct) |
|
627 |
-#' matrices <- import_parallel_Vispa2Matrices_auto( |
|
628 |
-#' association_file = association_file , root = NULL, |
|
629 |
-#' quantification_type = c("seqCount","fragmentEstimate"), |
|
630 |
-#' matrix_type = "annotated", workers = 2, patterns = NULL, |
|
631 |
-#' matching_opt = "ANY", dates_format = "dmy") |
|
632 |
-#' |
|
633 |
-#' ### Aggregating data (both by same key) |
|
634 |
-#' aggreggated_x <- aggregate_values_by_key(matrices$seqCount, |
|
635 |
-#' association_file) |
|
636 |
-#' aggregated_meta <- aggregate_metadata(association_file) |
|
637 |
-#' |
|
638 |
-#' ### Sample statistics |
|
639 |
-#' sample_stats <- sample_statistics(x = aggregated_x, |
|
640 |
-#' metadata = aggregated_meta, |
|
607 |
+#' data("integration_matrices", package = "ISAnalytics") |
|
608 |
+#' data("association_file", package = "ISAnalytics") |
|
609 |
+#' aggreg <- aggregate_values_by_key( |
|
610 |
+#' x = integration_matrices, |
|
611 |
+#' association_file = association_file, |
|
612 |
+#' value_cols = c("seqCount", "fragmentEstimate") |
|
613 |
+#' ) |
|
614 |
+#' aggreg_meta <- aggregate_metadata(association_file = association_file) |
|
615 |
+#' |
|
616 |
+#' sample_stats <- sample_statistics(x = aggreg, |
|
617 |
+#' metadata = aggreg_meta, |
|
618 |
+#' value_columns = c("seqCount", "fragmentEstimate"), |
|
641 | 619 |
#' sample_key = c("SubjectID", "CellMarker","Tissue", "TimePoint")) |
642 | 620 |
#' |
643 | 621 |
#' ``` |
... | ... |
@@ -645,74 +623,64 @@ top_integrations <- function(x, n = 50, |
645 | 623 |
#' @param metadata The metadata data frame |
646 | 624 |
#' @param sample_key Character vector representing the key for identifying |
647 | 625 |
#' a sample |
648 |
-#' @param value_columns THe name of the columns to be computed, |
|
626 |
+#' @param value_columns The name of the columns to be computed, |
|
649 | 627 |
#' must be numeric or integer |
650 | 628 |
#' @param functions A named list of function or purrr-style lambdas |
629 |
+#' @param add_integrations_count Add the count of distinct integration sites |
|
630 |
+#' for each group? Can be computed only if `x` contains the mandatory columns |
|
631 |
+#' `chr`, `integration_locus`, `strand` |
|
651 | 632 |
#' |
652 | 633 |
#' @family Analysis functions |
653 |
-#' @importFrom rlang eval_tidy expr |
|
654 |
-#' @importFrom purrr is_function is_formula |
|
655 |
-#' @import dplyr |
|
634 |
+#' @importFrom rlang eval_tidy expr abort .data sym inform |
|
635 |
+#' @importFrom purrr is_function is_formula map_lgl walk map set_names |
|
636 |
+#' @importFrom dplyr group_by across all_of summarise rename_with bind_cols |
|
637 |
+#' @importFrom dplyr n_distinct left_join |
|
656 | 638 |
#' @importFrom magrittr `%>%` |
657 | 639 |
#' |
658 | 640 |
#' @return A list with modified x and metadata data frames |
659 | 641 |
#' @export |
660 | 642 |
#' |
661 | 643 |
#' @examples |
662 |
-#' op <- options(ISAnalytics.widgets = FALSE) |
|
663 |
-#' |
|
664 |
-#' path_AF <- system.file("extdata", "ex_association_file.tsv", |
|
665 |
-#' package = "ISAnalytics" |
|
666 |
-#' ) |
|
667 |
-#' root_correct <- system.file("extdata", "fs.zip", |
|
668 |
-#' package = "ISAnalytics" |
|
644 |
+#' data("integration_matrices", package = "ISAnalytics") |
|
645 |
+#' data("association_file", package = "ISAnalytics") |
|
646 |
+#' stats <- sample_statistics( |
|
647 |
+#' x = integration_matrices, |
|
648 |
+#' metadata = association_file, |
|
649 |
+#' value_columns = c("seqCount", "fragmentEstimate") |
|
669 | 650 |
#' ) |
670 |
-#' root_correct <- unzip_file_system(root_correct, "fs") |
|
671 |
-#' |
|
672 |
-#' association_file <- import_association_file(path_AF, root_correct, |
|
673 |
-#' dates_format = "dmy" |
|
674 |
-#' ) |
|
675 |
-#' matrices <- import_parallel_Vispa2Matrices_auto( |
|
676 |
-#' association_file = association_file, root = NULL, |
|
677 |
-#' quantification_type = c("seqCount", "fragmentEstimate"), |
|
678 |
-#' matrix_type = "annotated", workers = 2, patterns = NULL, |
|
679 |
-#' matching_opt = "ANY", multi_quant_matrix = FALSE |
|
680 |
-#' ) |
|
681 |
-#' |
|
682 |
-#' stats <- sample_statistics(matrices$seqCount, association_file) |
|
683 |
-#' options(op) |
|
651 |
+#' stats |
|
684 | 652 |
sample_statistics <- function(x, metadata, |
685 | 653 |
sample_key = "CompleteAmplificationID", |
686 | 654 |
value_columns = "Value", |
687 |
- functions = default_stats()) { |
|
655 |
+ functions = default_stats(), |
|
656 |
+ add_integrations_count = TRUE) { |
|
688 | 657 |
stopifnot(is.data.frame(x)) |
689 | 658 |
stopifnot(is.data.frame(metadata)) |
690 | 659 |
stopifnot(is.character(sample_key)) |
691 | 660 |
stopifnot(is.character(value_columns)) |
692 | 661 |
stopifnot(is.list(functions)) |
693 |
- if (!all(sample_key %in% colnames(x))) { |
|
694 |
- stop(paste("Key columns not found in the data frame")) |
|
662 |
+ stopifnot(is.logical(add_integrations_count)) |
|
663 |
+ if (!all(c(sample_key, value_columns) %in% colnames(x))) { |
|
664 |
+ rlang::abort(.missing_user_cols_error(c(sample_key, value_columns)[ |
|
665 |
+ !c(sample_key, value_columns) %in% colnames(x) |
|
666 |
+ ])) |
|
695 | 667 |
} |
696 | 668 |
if (!all(sample_key %in% colnames(metadata))) { |
697 |
- stop(paste("Key columns not found in metadata")) |
|
669 |
+ rlang::abort(.missing_user_cols_meta_error(sample_key[ |
|
670 |
+ !sample_key %in% colnames(x) |
|
671 |
+ ])) |
|
698 | 672 |
} |
699 |
- if (!all(value_columns %in% colnames(x))) { |
|
700 |
- stop(paste("Value columns not found in the data frame")) |
|
701 |
- } |
|
702 |
- purrr::walk(value_columns, function(col) { |
|
673 |
+ vcols_are_numeric <- purrr::map_lgl(value_columns, function(col) { |
|
703 | 674 |
expr <- rlang::expr(`$`(x, !!col)) |
704 |
- if (!is.numeric(rlang::eval_tidy(expr)) && |
|
705 |
- !is.integer(rlang::eval_tidy(expr))) { |
|
706 |
- stop(paste("Some or all of value columns are not numeric")) |
|
707 |
- } |
|
675 |
+ is.numeric(rlang::eval_tidy(expr)) || |
|
676 |
+ is.integer(rlang::eval_tidy(expr)) |
|
708 | 677 |
}) |
678 |
+ if (any(vcols_are_numeric == FALSE)) { |
|
679 |
+ rlang::abort(.non_num_user_cols_error(value_columns[!vcols_are_numeric])) |
|
680 |
+ } |
|
709 | 681 |
purrr::walk(functions, function(f) { |
710 | 682 |
if (!(purrr::is_function(f) | purrr::is_formula(f))) { |
711 |
- stop(paste( |
|
712 |
- "The function parameter should contain a list", |
|
713 |
- "of either functions or formulas.", |
|
714 |
- "See ?sample_statistics for details" |
|
715 |
- )) |
|
683 |
+ rlang::abort(.non_function_elem_error()) |
|
716 | 684 |
} |
717 | 685 |
}) |
718 | 686 |
|
... | ... |
@@ -743,6 +711,24 @@ sample_statistics <- function(x, metadata, |
743 | 711 |
} |
744 | 712 |
} |
745 | 713 |
|
714 |
+ if (add_integrations_count) { |
|
715 |
+ if (all(mandatory_IS_vars() %in% colnames(x))) { |
|
716 |
+ mand_sym <- purrr::map(mandatory_IS_vars(), rlang::sym) |
|
717 |
+ nIS <- x %>% |
|
718 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(sample_key))) %>% |
|
719 |
+ dplyr::summarise( |
|
720 |
+ nIS = dplyr::n_distinct(!!!mand_sym), |
|
721 |
+ .groups = "drop" |
|
722 |
+ ) |
|
723 |
+ result <- result %>% |
|
724 |
+ dplyr::left_join(nIS, by = sample_key) |
|
725 |
+ } else { |
|
726 |
+ if (getOption("ISAnalytics.verbose")) { |
|
727 |
+ rlang::inform(.inform_skip_count_is()) |
|
728 |
+ } |
|
729 |
+ } |
|
730 |
+ } |
|
731 |
+ |
|
746 | 732 |
updated_meta <- metadata %>% dplyr::left_join(result, by = sample_key) |
747 | 733 |
return(list(x = result, metadata = updated_meta)) |
748 | 734 |
} |
... | ... |
@@ -750,7 +736,7 @@ sample_statistics <- function(x, metadata, |
750 | 736 |
|
751 | 737 |
#' Grubbs test for Common Insertion Sites (CIS). |
752 | 738 |
#' |
753 |
-#' \lifecycle{experimental} |
|
739 |
+#' \lifecycle{stable} |
|
754 | 740 |
#' Statistical approach for the validation of common insertion sites |
755 | 741 |
#' significance based on the comparison of the integration frequency |
756 | 742 |
#' at the CIS gene with respect to other genes contained in the |
... | ... |
@@ -762,279 +748,145 @@ sample_statistics <- function(x, metadata, |
762 | 748 |
#' ## Genomic annotation file |
763 | 749 |
#' This file is a data base, or more simply a .tsv file to import, with |
764 | 750 |
#' genes annotation for the specific genome. The annotations for the |
765 |
-#' human genome (hg19) is already included in this package. |
|
751 |
+#' human genome (hg19) and murine genome (mm9 and mm10) are already |
|
752 |
+#' included in this package: to use one of them just |
|
753 |
+#' set the argument `genomic_annotation_file` to either `"hg19"`, |
|
754 |
+#' `"mm9"` or `"mm10"`. |
|
766 | 755 |
#' If for any reason the user is performing an analysis on another genome, |
767 | 756 |
#' this file needs to be changed respecting the USCS Genome Browser |
768 |
-#' format, meaning the input file headers should be: |
|
769 |
-#' |
|
770 |
-#' ```{r echo=FALSE, tidy=TRUE} |
|
771 |
-#' cat(c( |
|
772 |
-#' paste(c("name2", "chrom", "strand"), collapse = ", "), "\n", |
|
773 |
-#' paste(c("min_txStart","max_txEnd", "minmax_TxLen"), collapse = ", "), |
|
774 |
-#' "\n", |
|
775 |
-#' paste(c("average_TxLen", "name", "min_cdsStart"), collapse = ", "), |
|
776 |
-#' "\n", |
|
777 |
-#' paste(c("max_cdsEnd","minmax_CdsLen", "average_CdsLen"), collapse = ", ") |
|
778 |
-#' )) |
|
757 |
+#' format, meaning the input file headers should include: |
|
779 | 758 |
#' |
780 |
-#' ``` |
|
759 |
+#' `r refGene_table_cols()` |
|
781 | 760 |
#' |
782 | 761 |
#' @param x An integration matrix, must include the `mandatory_IS_vars()` |
783 | 762 |
#' columns and the `annotation_IS_vars()` columns |
784 | 763 |
#' @param genomic_annotation_file Database file for gene annotation, |
785 |
-#' see details |
|
764 |
+#' see details. |
|
786 | 765 |
#' @param grubbs_flanking_gene_bp Number of base pairs flanking a gene |
787 | 766 |
#' @param threshold_alpha Significance threshold |
788 |
-#' @param add_standard_padjust Compute the standard padjust? |
|
767 |
+#' @param by Either `NULL` or a character vector of column names. If not |
|
768 |
+#' NULL, the function will perform calculations for each group and return |
|
769 |
+#' a list of data frames with the results. E.g. for `by = "SubjectID"`, |
|
770 |
+#' CIS will be computed for each distinct SubjectID found in the table |
|
771 |
+#' (of course, "SubjectID" column must be included in the input data frame). |
|
789 | 772 |
#' |
790 | 773 |
#' @family Analysis functions |
791 | 774 |
#' |
792 |
-#' @import dplyr |
|
793 | 775 |
#' @importFrom tibble as_tibble |
794 |
-#' @importFrom rlang .data |
|
776 |
+#' @importFrom rlang .data abort current_env eval_tidy sym |
|
795 | 777 |
#' @importFrom magrittr `%>%` |
796 |
-#' @importFrom stats median pt p.adjust |
|
797 | 778 |
#' @importFrom utils read.csv |
779 |
+#' @importFrom stringr str_replace_all |
|
780 |
+#' @importFrom tidyr unite |
|
781 |
+#' @importFrom purrr set_names map |
|
798 | 782 |
#' |
799 | 783 |
#' @return A data frame |
800 | 784 |
#' @export |
801 | 785 |
#' |
802 | 786 |
#' @examples |
803 |
-#' op <- options(ISAnalytics.widgets = FALSE) |
|
804 |
-#' |
|
805 |
-#' path_AF <- system.file("extdata", "ex_association_file.tsv", |
|
806 |
-#' package = "ISAnalytics" |
|
807 |
-#' ) |
|
808 |
-#' root_correct <- system.file("extdata", "fs.zip", |
|
809 |
-#' package = "ISAnalytics" |
|
810 |
-#' ) |
|
811 |
-#' root_correct <- unzip_file_system(root_correct, "fs") |
|
812 |
-#' |
|
813 |
-#' matrices <- import_parallel_Vispa2Matrices_auto( |
|
814 |
-#' association_file = path_AF, root = root_correct, |
|
815 |
-#' quantification_type = c("seqCount", "fragmentEstimate"), |
|
816 |
-#' matrix_type = "annotated", workers = 2, patterns = NULL, |
|
817 |
-#' matching_opt = "ANY", |
|
818 |
-#' dates_format = "dmy" |
|
819 |
-#' ) |
|
820 |
-#' |
|
821 |
-#' cis <- CIS_grubbs(matrices) |
|
822 |
-#' |
|
823 |
-#' options(op) |
|
787 |
+#' data("integration_matrices", package = "ISAnalytics") |
|
788 |
+#' cis <- CIS_grubbs(integration_matrices) |
|
789 |
+#' head(cis) |
|
824 | 790 |
CIS_grubbs <- function(x, |
825 |
- genomic_annotation_file = |
|
826 |
- system.file("extdata", "hg19.refGene.oracle.tsv.xz", |
|
827 |
- package = "ISAnalytics" |
|
828 |
- ), |
|
791 |
+ genomic_annotation_file = "hg19", |
|
829 | 792 |
grubbs_flanking_gene_bp = 100000, |
830 | 793 |
threshold_alpha = 0.05, |
831 |
- add_standard_padjust = TRUE) { |
|
794 |
+ by = NULL) { |
|
832 | 795 |
# Check x has the correct structure |
833 | 796 |
stopifnot(is.data.frame(x)) |
834 | 797 |
if (!all(mandatory_IS_vars() %in% colnames(x))) { |
835 |
- stop(.non_ISM_error()) |
|
798 |
+ rlang::abort(.missing_mand_vars()) |
|
836 | 799 |
} |
837 | 800 |
if (!.is_annotated(x)) { |
838 |
- stop(.missing_annot()) |
|
801 |
+ rlang::abort(.missing_annot()) |
|
839 | 802 |
} |
840 | 803 |
# Check other parameters |
841 |
- stopifnot(is.character(genomic_annotation_file) & |
|
842 |
- length(genomic_annotation_file) == 1) |
|
843 |
- stopifnot(is.numeric(grubbs_flanking_gene_bp) | |
|
844 |
- is.integer(grubbs_flanking_gene_bp)) |
|
845 |
- stopifnot(length(grubbs_flanking_gene_bp) == 1) |
|
846 |
- stopifnot(is.numeric(threshold_alpha) & length(threshold_alpha) == 1) |
|
847 |
- stopifnot(is.logical(add_standard_padjust) & |
|
848 |
- length(add_standard_padjust) == 1) |
|
849 |
- stopifnot(file.exists(genomic_annotation_file)) |
|
850 |
- # Determine file extension |
|
851 |
- ext <- .check_file_extension(genomic_annotation_file) |
|
852 |
- |
|
853 |
- # Try to import annotation file |
|
854 |
- if (ext == "tsv") { |
|
855 |
- refgenes <- utils::read.csv( |
|
856 |
- file = genomic_annotation_file, |
|
857 |
- header = TRUE, fill = TRUE, sep = "\t", |
|
858 |
- check.names = FALSE, |
|
859 |
- na.strings = c("NONE", "NA", "NULL", "NaN", "") |
|
860 |
- ) |
|
861 |
- refgenes <- tibble::as_tibble(refgenes) %>% |
|
862 |
- dplyr::mutate(chrom = stringr::str_replace_all( |
|
863 |
- .data$chrom, |
|
864 |
- "chr", "" |
|
865 |
- )) |
|
866 |
- } else if (ext == "csv") { |
|
867 |
- refgenes <- utils::read.csv( |
|
868 |
- file = genomic_annotation_file, |
|
869 |
- header = TRUE, fill = TRUE, |
|
870 |
- check.names = FALSE, |
|
871 |
- na.strings = c("NONE", "NA", "NULL", "NaN", "") |
|
872 |
- ) |
|
873 |
- refgenes <- tibble::as_tibble(refgenes) %>% |
|
874 |
- dplyr::mutate(chrom = stringr::str_replace_all( |
|
875 |
- .data$chrom, |
|
876 |
- "chr", "" |
|
877 |
- )) |
|
804 |
+ stopifnot(is.character(genomic_annotation_file)) |
|
805 |
+ genomic_annotation_file <- genomic_annotation_file[1] |
|
806 |
+ if (genomic_annotation_file %in% c("hg19", "mm9", "mm10")) { |
|
807 |
+ gen_file <- paste0("refGenes_", genomic_annotation_file) |
|
808 |
+ utils::data(list = gen_file, envir = rlang::current_env()) |
|
809 |
+ refgenes <- rlang::eval_tidy(rlang::sym(gen_file)) |
|
878 | 810 |
} else { |
879 |
- stop(paste( |
|
880 |
- "The genomic annotation file must be either in", |
|
881 |
- ".tsv or .csv format (compressed or not)" |
|
882 |
- )) |
|
883 |
- } |
|
884 |
- |
|
885 |
- # Check annotation file format |
|
886 |
- if (!all(c( |
|
887 |
- "name2", "chrom", "strand", "min_txStart", "max_txEnd", |
|
888 |
- "minmax_TxLen", "average_TxLen", "name", "min_cdsStart", |
|
889 |
- "max_cdsEnd", "minmax_CdsLen", "average_CdsLen" |
|
890 |
- ) %in% |
|
891 |
- colnames(refgenes))) { |
|
892 |
- stop(.non_standard_annotation_structure()) |
|
893 |
- } |
|
894 |
- ### Begin - init phase |
|
895 |
- df_by_gene <- x %>% |
|
896 |
- dplyr::group_by( |
|
897 |
- .data$GeneName, |
|
898 |
- .data$GeneStrand, |
|
899 |
- .data$chr |
|
900 |
- ) %>% |
|
901 |
- dplyr::summarise( |
|
902 |
- n_IS_perGene = dplyr::n_distinct( |
|
903 |
- .data$integration_locus |
|
904 |
- ), |
|
905 |
- min_bp_integration_locus = |
|
906 |
- min(.data$integration_locus), |
|
907 |
- max_bp_integration_locus = |
|
908 |
- max(.data$integration_locus), |
|
909 |
- IS_span_bp = (max(.data$integration_locus) - |
|
910 |
- min(.data$integration_locus)), |
|
911 |
- avg_bp_integration_locus = |
|
912 |
- mean(.data$integration_locus), |
|
913 |
- median_bp_integration_locus = |
|
914 |
- stats::median(.data$integration_locus), |
|
915 |
- distinct_orientations = dplyr::n_distinct(.data$strand), |
|
916 |
- describe = list(tibble::as_tibble( |
|
917 |
- psych::describe(.data$integration_locus) |
|
918 |
- )), |
|
919 |
- .groups = "drop" |
|
920 |
- ) %>% |
|
921 |
- tidyr::unnest(.data$describe, keep_empty = TRUE, names_sep = "_") |
|
922 |
- |
|
923 |
- df_bygene_withannotation <- df_by_gene %>% |
|
924 |
- dplyr::inner_join(refgenes, by = c( |
|
925 |
- "chr" = "chrom", |
|
926 |
- "GeneStrand" = "strand", |
|
927 |
- "GeneName" = "name2" |
|
928 |
- )) %>% |
|
929 |
- dplyr::select(c( |
|
930 |
- dplyr::all_of(colnames(df_by_gene)), |
|
931 |
- .data$average_TxLen |
|
932 |
- )) |
|
933 |
- n_elements <- nrow(df_bygene_withannotation) |
|
934 |
- |
|
935 |
- df_bygene_withannotation <- df_bygene_withannotation %>% |
|
936 |
- dplyr::mutate( |
|
937 |
- geneIS_frequency_byHitIS = .data$n_IS_perGene / n_elements |
|
938 |
- ) |
|
939 |
- |
|
940 |
- ### Grubbs test |
|
941 |
- ### --- Gene Frequency |
|
942 |
- df_bygene_withannotation <- df_bygene_withannotation %>% |
|
943 |
- dplyr::mutate( |
|
944 |
- raw_gene_integration_frequency = |
|
945 |
- .data$n_IS_perGene / .data$average_TxLen, |
|
946 |
- integration_frequency_withtolerance = .data$n_IS_perGene / |
|
947 |
- (.data$average_TxLen + grubbs_flanking_gene_bp) * 1000, |
|
948 |
- minus_log2_integration_freq_withtolerance = |
|
949 |
- -log(x = .data$integration_frequency_withtolerance, base = 2) |
|
950 |
- ) |
|
951 |
- ### --- z score |
|
952 |
- z_mlif <- function(x) { |
|
953 |
- sqrt((n_elements * (n_elements - 2) * x^2) / |
|
954 |
- (((n_elements - 1)^2) - (n_elements * x^2))) |
|
955 |
- } |
|
956 |
- df_bygene_withannotation <- df_bygene_withannotation %>% |
|
957 |
- dplyr::mutate( |
|
958 |
- zscore_minus_log2_int_freq_tolerance = |
|
959 |
- scale(-log( |
|
960 |
- x = .data$integration_frequency_withtolerance, |
|
961 |
- base = 2 |
|
962 |
- )), |
|
963 |
- neg_zscore_minus_log2_int_freq_tolerance = |
|
964 |
- -.data$zscore_minus_log2_int_freq_tolerance, |
|
965 |
- t_z_mlif = z_mlif( |
|
966 |
- .data$neg_zscore_minus_log2_int_freq_tolerance |
|
811 |
+ stopifnot(file.exists(genomic_annotation_file)) |
|
812 |
+ # Determine file extension |
|
813 |
+ ext <- .check_file_extension(genomic_annotation_file) |
|
814 |
+ # Try to import annotation file |
|
815 |
+ if (ext == "tsv") { |
|
816 |
+ refgenes <- utils::read.csv( |
|
817 |
+ file = genomic_annotation_file, |
|
818 |
+ header = TRUE, fill = TRUE, sep = "\t", |
|
819 |
+ check.names = FALSE, |
|
820 |
+ na.strings = c("NONE", "NA", "NULL", "NaN", "") |
|
967 | 821 |
) |
968 |
- ) |
|
969 |
- ### --- tdist |
|
970 |
- t_dist_2t <- function(x, deg) { |
|
971 |
- return((1 - stats::pt(x, deg)) * 2) |
|
972 |
- } |
|
973 |
- df_bygene_withannotation <- df_bygene_withannotation %>% |
|
974 |
- dplyr::mutate( |
|
975 |
- tdist2t = t_dist_2t(.data$t_z_mlif, n_elements - 2), |
|
976 |
- tdist_pt = pt( |
|
977 |
- q = .data$t_z_mlif, |
|
978 |
- df = n_elements - 2 |
|
979 |
- ), |
|
980 |
- tdist_bonferroni_default = ifelse( |
|
981 |
- .data$tdist2t * n_elements > 1, 1, |
|
982 |
- .data$tdist2t * n_elements |
|
822 |
+ # Check annotation file format |
|
823 |
+ if (!all(refGene_table_cols() %in% colnames(refgenes))) { |
|
824 |
+ rlang::abort(.non_standard_annotation_structure()) |
|
825 |
+ } |
|
826 |
+ refgenes <- tibble::as_tibble(refgenes) %>% |
|
827 |
+ dplyr::mutate(chrom = stringr::str_replace_all( |
|
828 |
+ .data$chrom, |
|
829 |
+ "chr", "" |
|
830 |
+ )) |
|
831 |
+ } else if (ext == "csv") { |
|
832 |
+ refgenes <- utils::read.csv( |
|
833 |
+ file = genomic_annotation_file, |
|
834 |
+ header = TRUE, fill = TRUE, |
|
835 |
+ check.names = FALSE, |
|
836 |
+ na.strings = c("NONE", "NA", "NULL", "NaN", "") |
|
983 | 837 |
) |
984 |
- ) |
|
985 |
- if (add_standard_padjust == TRUE) { |
|
986 |
- df_bygene_withannotation <- df_bygene_withannotation %>% |
|
987 |
- dplyr::mutate( |
|
988 |
- tdist_bonferroni = stats::p.adjust( |
|
989 |
- .data$tdist2t, |
|
990 |
- method = "bonferroni", |
|
991 |
- n = length(.data$tdist2t) |
|
992 |
- ), |
|
993 |
- tdist_fdr = stats::p.adjust( |
|
994 |
- .data$tdist2t, |
|
995 |
- method = "fdr", |
|
996 |
- n = length(.data$tdist2t) |
|
997 |
- ), |
|
998 |
- tdist_benjamini = stats::p.adjust( |
|
999 |
- .data$tdist2t, |
|
1000 |
- method = "BY", |
|
1001 |
- n = length(.data$tdist2t) |
|
1002 |
- ) |
|
838 |
+ # Check annotation file format |
|
839 |
+ if (!all(refGene_table_cols() %in% colnames(refgenes))) { |
|
840 |
+ rlang::abort(.non_standard_annotation_structure()) |
|
841 |
+ } |
|
842 |
+ refgenes <- tibble::as_tibble(refgenes) %>% |
|
843 |
+ dplyr::mutate(chrom = stringr::str_replace_all( |
|
844 |
+ .data$chrom, |
|
845 |
+ "chr", "" |
|
846 |
+ )) |
|
847 |
+ } else { |
|
848 |
+ gen_file_err <- paste( |
|
849 |
+ "The genomic annotation file must be either in", |
|
850 |
+ ".tsv or .csv format (compressed or not)" |
|
1003 | 851 |
) |
852 |
+ rlang::abort(gen_file_err) |
|
853 |
+ } |
|
1004 | 854 |
} |
1005 |
- df_bygene_withannotation <- df_bygene_withannotation %>% |
|
1006 |
- dplyr::mutate( |
|
1007 |
- tdist_positive_and_corrected = |
|
1008 |
- ifelse( |
|
1009 |
- (.data$tdist_bonferroni_default < threshold_alpha & |
|
1010 |
- .data$neg_zscore_minus_log2_int_freq_tolerance > 0), |
|
1011 |
- .data$tdist_bonferroni_default, |
|
1012 |
- NA |
|
1013 |
- ), |
|
1014 |
- tdist_positive = ifelse( |
|
1015 |
- (.data$tdist2t < threshold_alpha & |
|
1016 |
- .data$neg_zscore_minus_log2_int_freq_tolerance > 0), |
|
1017 |
- .data$tdist2t, |
|
1018 |
- NA |
|
1019 |
- ) |
|
1020 |
- ) |
|
1021 |
- EM_correction_N <- length( |
|
1022 |
- df_bygene_withannotation$tdist_positive[ |
|
1023 |
- !is.na(df_bygene_withannotation$tdist_positive) |
|
1024 |
- ] |
|
1025 |
- ) |
|
1026 |
- df_bygene_withannotation <- df_bygene_withannotation %>% |
|
1027 |
- dplyr::mutate( |
|
1028 |
- tdist_positive_and_correctedEM = |
|
1029 |
- ifelse( |
|
1030 |
- (.data$tdist2t * EM_correction_N < |
|
1031 |
- threshold_alpha & |
|
1032 |
- .data$neg_zscore_minus_log2_int_freq_tolerance > 0), |
|
1033 |
- .data$tdist2t * EM_correction_N, |
|
1034 |
- NA |
|
1035 |
- ) |
|
855 |
+ stopifnot(is.numeric(grubbs_flanking_gene_bp) | |
|
856 |
+ is.integer(grubbs_flanking_gene_bp)) |
|
857 |
+ grubbs_flanking_gene_bp <- grubbs_flanking_gene_bp[1] |
|
858 |
+ stopifnot(is.numeric(threshold_alpha)) |
|
859 |
+ threshold_alpha <- threshold_alpha[1] |
|
860 |
+ stopifnot(is.null(by) || is.character(by)) |
|
861 |
+ if (!all(by %in% colnames(x))) { |
|
862 |
+ rlang::abort(.missing_user_cols_error(by[!by %in% colnames(x)])) |
|
863 |
+ } |
|
864 |
+ result <- if (is.null(by)) { |
|
865 |
+ .cis_grubb_calc( |
|
866 |
+ x = x, |
|
867 |
+ refgenes = refgenes, |
|
868 |
+ grubbs_flanking_gene_bp = grubbs_flanking_gene_bp, |
|
869 |
+ threshold_alpha = threshold_alpha |
|
1036 | 870 |
) |
1037 |
- return(df_bygene_withannotation) |
|
871 |
+ } else { |
|
872 |
+ grouped <- x %>% |
|
873 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(by))) |
|
874 |
+ group_keys <- grouped %>% |
|
875 |
+ dplyr::group_keys() %>% |
|
876 |
+ tidyr::unite(col = "id", dplyr::everything()) %>% |
|
877 |
+ dplyr::pull(.data$id) |
|
878 |
+ split <- x %>% |
|
879 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(by))) %>% |
|
880 |
+ dplyr::group_split() %>% |
|
881 |
+ purrr::set_names(group_keys) |
|
882 |
+ purrr::map(split, ~ .cis_grubb_calc( |
|
883 |
+ x = .x, |
|