Browse code

[NEW] Update to version 1.3.1

Giulia Pais authored on 24/06/2021 15:28:27
Showing49 changed files

... ...
@@ -9,3 +9,4 @@
9 9
 ^docs$
10 10
 ^pkgdown$
11 11
 ^doc$
12
+^Design$
... ...
@@ -2,20 +2,20 @@ on:
2 2
   push:
3 3
     branches:
4 4
       - master
5
+    tags:
6
+      -'*'
5 7
 
6 8
 name: pkgdown
7 9
 
8 10
 jobs:
9 11
   pkgdown:
10
-    runs-on: ubuntu-latest
12
+    runs-on: macOS-latest
11 13
     env:
12 14
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13 15
     steps:
14 16
       - uses: actions/checkout@v2
15 17
 
16 18
       - uses: r-lib/actions/setup-r@v1
17
-        with:
18
-          r-version: 'release'
19 19
 
20 20
       - uses: r-lib/actions/setup-pandoc@v1
21 21
 
... ...
@@ -26,7 +26,7 @@ jobs:
26 26
           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
27 27
         shell: Rscript {0}
28 28
 
29
-      - name: Cache R packages
29
+      - name: Restore R package cache
30 30
         uses: actions/cache@v2
31 31
         with:
32 32
           path: ${{ env.R_LIBS_USER }}
... ...
@@ -1,6 +1,6 @@
1 1
 Package: ISAnalytics
2 2
 Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies
3
-Version: 1.3.0
3
+Version: 1.3.1
4 4
 Date: 2020-07-03
5 5
 Authors@R: c(
6 6
   person(given = "Andrea",
... ...
@@ -65,7 +65,10 @@ Suggests:
65 65
     roxygen2,
66 66
     vegan,
67 67
     withr,
68
-    extraDistr
68
+    extraDistr,
69
+    ggalluvial,
70
+    scales,
71
+    gridExtra
69 72
 VignetteBuilder: knitr
70 73
 RdMacros: 
71 74
     lifecycle
... ...
@@ -28,6 +28,8 @@ export(import_association_file)
28 28
 export(import_parallel_Vispa2Matrices_auto)
29 29
 export(import_parallel_Vispa2Matrices_interactive)
30 30
 export(import_single_Vispa2Matrix)
31
+export(integration_alluvial_plot)
32
+export(is_sharing)
31 33
 export(known_clinical_oncogenes)
32 34
 export(mandatory_IS_vars)
33 35
 export(matching_options)
... ...
@@ -39,7 +41,9 @@ export(reduced_AF_columns)
39 41
 export(remove_collisions)
40 42
 export(sample_statistics)
41 43
 export(separate_quant_matrices)
44
+export(sharing_heatmap)
42 45
 export(threshold_filter)
46
+export(top_abund_tableGrob)
43 47
 export(top_integrations)
44 48
 export(unzip_file_system)
45 49
 import(BiocParallel)
... ...
@@ -56,6 +60,7 @@ importFrom(BiocParallel,bpstop)
56 60
 importFrom(BiocParallel,bptry)
57 61
 importFrom(Rcapture,closedp.0)
58 62
 importFrom(Rcapture,closedp.bc)
63
+importFrom(data.table,.SD)
59 64
 importFrom(data.table,fread)
60 65
 importFrom(data.table,melt.data.table)
61 66
 importFrom(data.table,rbindlist)
... ...
@@ -88,6 +93,11 @@ importFrom(fs,path)
88 93
 importFrom(fs,path_dir)
89 94
 importFrom(fs,path_ext)
90 95
 importFrom(fs,path_wd)
96
+importFrom(ggplot2,aes_)
97
+importFrom(ggplot2,geom_text)
98
+importFrom(ggplot2,ggplot)
99
+importFrom(ggplot2,scale_fill_viridis_d)
100
+importFrom(ggplot2,sym)
91 101
 importFrom(ggrepel,geom_label_repel)
92 102
 importFrom(grDevices,colorRamp)
93 103
 importFrom(grDevices,rgb)
... ...
@@ -184,6 +194,7 @@ importFrom(tidyr,nest)
184 194
 importFrom(tidyr,pivot_longer)
185 195
 importFrom(tidyr,pivot_wider)
186 196
 importFrom(tidyr,separate)
197
+importFrom(tidyr,unite)
187 198
 importFrom(tidyr,unnest)
188 199
 importFrom(tools,file_path_sans_ext)
189 200
 importFrom(utils,read.csv)
190 201
new file mode 100644
... ...
@@ -0,0 +1,268 @@
1
+---
2
+title: "NEWS"
3
+output: github_document
4
+---
5
+# ISAnalytics 1.3.1 (2021-06-24)
6
+
7
+## NEW FUNCTIONALITY
8
+
9
+* `is_sharing` computes the sharing of IS between groups
10
+* `sharing_heatmap` allows visualization of sharing data through heatmaps
11
+* `integration_alluvial_plot` allows visualization of integration sites
12
+distribution in groups over time.
13
+* `top_abund_tableGrob` can be used in combination with the previous function
14
+or by itself to obtain a summary of top abundant integrations as an R graphic
15
+(tableGrob) object that can be combined with plots.
16
+
17
+## MINOR UPDATES
18
+* Added more default stats functions to `default_stats`
19
+* Added optional automatic conversion of time points in months and years
20
+when importing association file
21
+* Minor fixes in `generate_Vispa2_launch_AF`
22
+
23
+# ISAnalytics 1.1.11 (2021-05-11)
24
+
25
+## NEW FUNCTIONALITY
26
+
27
+* `HSC_population_size_estimate` and `HSC_population_plot` allow estimates
28
+on hematopoietic stem cell population size
29
+* Importing of Vispa2 stats per pool now has a dedicated function,
30
+`import_Vispa2_stats`
31
+* `outlier_filter` and `outliers_by_pool_fragments` offer a mean to filter
32
+poorly represented samples based on custom outliers tests
33
+
34
+## VISIBLE USER CHANGES
35
+
36
+* The argument `import_stats` of `aggregate_metadata` is officially deprecated
37
+in favor of `import_Vispa2_stats`
38
+* `aggregate_metadata` is now a lot more flexible on what operations can be
39
+performed on columns via the new argument `aggregating_functions`
40
+* `import_association_file` allows directly for the import of Vispa2 stats
41
+and converts time points to months and years where not already present
42
+* File system alignment of `import_association_file` now produces 3 separate
43
+columns for paths
44
+* `separate_quant_matrices` and `comparison_matrix` now do not require 
45
+mandatory columns other than the quantifications - this allows for separation
46
+or joining also for aggregated matrices
47
+
48
+## FIXES
49
+
50
+* Fixed a minor issue in `CIS_volcano_plot` that caused duplication of some
51
+labels if highlighted genes were provided in input
52
+
53
+# ISAnalytics 1.1.10 (2021-04-08)
54
+
55
+## FIXES
56
+
57
+* Fixed issue in `compute_near_integrations`: when provided recalibration
58
+map export path as a folder now the function works correctly and produces
59
+an automatically generated file name
60
+* Fixed issue in `aggregate_metadata`: now paths to folder that contains
61
+Vispa2 stats is looked up correctly. Also, VISPA2 stats columns are aggregated
62
+if found in the input data frame independently from the parameter 
63
+`import_stats`.
64
+
65
+## IMPROVEMENTS
66
+
67
+* `compute_abundance` can now take as input aggregated matrices and has
68
+additional parameters to offer more flexibility to the user. Major updates
69
+and improvements also on documentation and reproducible examples.
70
+* Major improvements in function `import_single_Vispa2Matrix`: import is 
71
+now preferentially carried out using `data.table::fread` greatly speeding up 
72
+the process - where not possible `readr::read_delim` is used instead
73
+* Major improvements in function `import_association_file`: greatly improved
74
+parsing precision (each column has a dedicated type), import report now 
75
+signals parsing problems and their location and signals also
76
+problems in parsing dates.
77
+Report also includes potential problems in column names and signals missing
78
+data in important columns. Added also the possibility to give various file 
79
+formats in input including `*.xls(x)` formats.
80
+* Function `top_integrations` can now take additional parameters to compute
81
+top n genes for each specified group
82
+* Removed faceting parameters in `CIS_volcano_plot` due to poor precision
83
+(easier to add faceting manually) and added parameters to return the 
84
+data frame that generated the plot as an additional result. Also, it is 
85
+now possible to specify a vector of gene names to highlight even if they're
86
+not above the annotation threshold.
87
+
88
+## MINOR
89
+
90
+* ISAnalytics website has improved graphic theme and has an additional button
91
+on the right that leads to the devel (or release) version of the website
92
+* Updated vignettes
93
+
94
+## FOR DEVS ONLY
95
+
96
+* Complete rework of test suite to be compliant to testthat v.3
97
+
98
+# ISAnalytics 1.1.9 (2021-02-17)
99
+
100
+## FIXES
101
+
102
+* Fixed minor issues in internal functions with absolute file paths & corrected
103
+typos
104
+
105
+# ISAnalytics 1.1.8 (2020-02-15)
106
+
107
+## FIXES
108
+
109
+* Fixed minor issues in internal functions to optimize file system alignment
110
+
111
+
112
+# ISAnalytics 1.1.7 (2020-02-10)
113
+
114
+## FIXES
115
+
116
+* Fixed minor issues in import_association_file when checking parameters
117
+
118
+# ISAnalytics 1.1.6 (2020-02-06)
119
+
120
+## UPGRADES
121
+
122
+* It is now possible to save html reports to file from 
123
+import_parallel_Vispa2Matrices_auto and 
124
+import_parallel_Vispa2Matrices_interactive, remove_collisions and 
125
+compute_near_integrations
126
+
127
+## FIXES
128
+
129
+* Fixed sample_statistics: now functions that have data frame output do not
130
+produce nested tables. Flat tables are ready to be saved to file or can be
131
+nested.
132
+* Simplified association file check logic in remove_collisions: now 
133
+function blocks only if the af doesn't contain the needed columns
134
+
135
+# ISAnalytics 1.1.5 (2020-02-03)
136
+
137
+## UPGRADES
138
+
139
+* Upgraded import_association_file function: now file alignment is not
140
+mandatory anymore and it is possible to save the html report to file
141
+* Updated vignettes and documentation
142
+
143
+# ISAnalytics 1.1.4 (2020-11-16)
144
+
145
+## UPGRADES
146
+
147
+* Greatly improved reports for collision removal function
148
+* General improvements for all widget reports
149
+
150
+# ISAnalytics 1.1.3 (2020-11-10)
151
+
152
+## FIXES
153
+
154
+* Further fixes for printing reports when widgets not available
155
+* Added progress bar to collision processing in `remove_collisions`
156
+* Updated vignettes
157
+
158
+## NEW
159
+
160
+* Added vignette "Using ISAnalytics without RStudio support"
161
+
162
+# ISAnalytics 1.1.2 (2020-11-05)
163
+
164
+## FIXES
165
+
166
+* Fixed missing restarts for non-blocking widgets
167
+
168
+# ISAnalytics 1.1.1 (2020-11-04)
169
+
170
+## FIXES
171
+
172
+* Functions that make use of widgets do not interrupt execution anymore if 
173
+errors are thrown while producing or printing the widgets
174
+* Optimized widget printing for importing functions
175
+* If widgets can't be printed and verbose option is active, reports are now 
176
+displayed on console instead (needed for usage in environments that do not 
177
+have access to a browser)
178
+* Other minor fixes (typos)
179
+* Bug fixes: fixed a few bugs in importing and recalibration functions
180
+* Minor fix in import_association_file file function: added multiple strings
181
+to be translated as NA
182
+
183
+## IMPORTANT NOTES
184
+
185
+* Vignette building might fail due to the fact that package "knitcitations" 
186
+is temporarily unavailable through CRAN
187
+* ISAnalytics is finally in release on bioconductor!
188
+
189
+# ISAnalytics 0.99.14 (2020-10-21)
190
+
191
+* Minor fixes in tests
192
+
193
+# ISAnalytics 0.99.13 (2020-10-19)
194
+
195
+## NEW FEATURES
196
+
197
+* Added analysis functions `CIS_grubbs` and `cumulative_count_union`
198
+* Added plotting functions `CIS_volcano_plot`
199
+
200
+# ISAnalytics 0.99.12 (2020-10-04)
201
+
202
+## NEW FEATURES
203
+
204
+* Added analysis function `sample_statistics`
205
+
206
+## SIGNIFICANT USER-VISIBLE CHANGES
207
+
208
+* `aggregate_values_by_key` has a simplified interface and supports
209
+multi-quantification matrices
210
+
211
+## MINOR CHANGES
212
+
213
+* Updated vignettes
214
+* `import_parallel_Vispa2Matrices_interactive` and
215
+`import_parallel_Vispa2Matrices_auto` now have an option to return 
216
+a multi-quantification matrix directly after import instead of a list
217
+
218
+# ISAnalytics 0.99.11 (2020-09-21)
219
+
220
+## NEW FEATURES
221
+
222
+* Added analysis functions `threshold_filter`, `top_integrations`
223
+* Added support for multi-quantification matrices in `compute_abundance`
224
+
225
+## MINOR FIXES
226
+
227
+* Fixed bug in `comparison_matrix` that ignored custom column names
228
+* Fixed issues in some documentation pages
229
+
230
+# ISAnalytics 0.99.10 (2020-09-14)
231
+
232
+ISanalytics is officially on bioconductor!
233
+
234
+## NEW FEATURES
235
+
236
+* Added analysis functions `comparison_matrix` and `separate_quant_matrices`
237
+* Added utility function `as_sparse_matrix`
238
+* Added package logo
239
+
240
+## SIGNIFICANT USER-VISIBLE CHANGES
241
+
242
+* Changed algorithm for `compute_near_integrations`
243
+* Added support for multi-quantification matrices to `remove_collisions`
244
+* Added usage of lifecycle badges in documentation: users can now see if 
245
+a feature is experimental/maturing/stable etc
246
+
247
+## MINOR FIXES
248
+
249
+* Added fix for `import_single_Vispa2Matrix` to remove non significant 
250
+0 values
251
+
252
+# ISAnalytics 0.99.9 (2020-09-01)
253
+
254
+## NEW FEATURES
255
+
256
+* Added functionality: aggregate functions
257
+* Added vignette on aggregate functions
258
+* Added recalibration functions
259
+* Added first analysis function (compute_abundance)
260
+
261
+## SIGNIFICANT USER-VISIBLE CHANGES
262
+
263
+* Dropped structure `ISADataFrame`: now the package only uses standard tibbles
264
+* Modified package documentation
265
+
266
+# ISAnalytics 0.99.8 (2020-08-12)
267
+
268
+* Submitted to Bioconductor
... ...
@@ -1,211 +1,241 @@
1
-\title{ISAnalytics News}
1
+NEWS
2
+================
3
+
4
+# ISAnalytics 1.3.1 (2021-06-24)
5
+
6
+## NEW FUNCTIONALITY
7
+
8
+-   `is_sharing` computes the sharing of IS between groups
9
+-   `sharing_heatmap` allows visualization of sharing data through
10
+    heatmaps
11
+-   `integration_alluvial_plot` allows visualization of integration
12
+    sites distribution in groups over time.
13
+-   `top_abund_tableGrob` can be used in combination with the previous
14
+    function or by itself to obtain a summary of top abundant
15
+    integrations as an R graphic (tableGrob) object that can be combined
16
+    with plots.
17
+
18
+## MINOR UPDATES
19
+
20
+-   Added more default stats functions to `default_stats`
21
+-   Added optional automatic conversion of time points in months and
22
+    years when importing association file
23
+-   Minor fixes in `generate_Vispa2_launch_AF`
2 24
 
3 25
 # ISAnalytics 1.1.11 (2021-05-11)
4 26
 
5 27
 ## NEW FUNCTIONALITY
6 28
 
7
-* `HSC_population_size_estimate` and `HSC_population_plot` allow estimates
8
-on hematopoietic stem cell population size
9
-* Importing of Vispa2 stats per pool now has a dedicated function,
10
-`import_Vispa2_stats`
11
-* `outlier_filter` and `outliers_by_pool_fragments` offer a mean to filter
12
-poorly represented samples based on custom outliers tests
29
+-   `HSC_population_size_estimate` and `HSC_population_plot` allow
30
+    estimates on hematopoietic stem cell population size
31
+-   Importing of Vispa2 stats per pool now has a dedicated function,
32
+    `import_Vispa2_stats`
33
+-   `outlier_filter` and `outliers_by_pool_fragments` offer a mean to
34
+    filter poorly represented samples based on custom outliers tests
13 35
 
14 36
 ## VISIBLE USER CHANGES
15 37
 
16
-* The argument `import_stats` of `aggregate_metadata` is officially deprecated
17
-in favor of `import_Vispa2_stats`
18
-* `aggregate_metadata` is now a lot more flexible on what operations can be
19
-performed on columns via the new argument `aggregating_functions`
20
-* `import_association_file` allows directly for the import of Vispa2 stats
21
-and converts time points to months and years where not already present
22
-* File system alignment of `import_association_file` now produces 3 separate
23
-columns for paths
24
-* `separate_quant_matrices` and `comparison_matrix` now do not require 
25
-mandatory columns other than the quantifications - this allows for separation
26
-or joining also for aggregated matrices
38
+-   The argument `import_stats` of `aggregate_metadata` is officially
39
+    deprecated in favor of `import_Vispa2_stats`
40
+-   `aggregate_metadata` is now a lot more flexible on what operations
41
+    can be performed on columns via the new argument
42
+    `aggregating_functions`
43
+-   `import_association_file` allows directly for the import of Vispa2
44
+    stats and converts time points to months and years where not already
45
+    present
46
+-   File system alignment of `import_association_file` now produces 3
47
+    separate columns for paths
48
+-   `separate_quant_matrices` and `comparison_matrix` now do not require
49
+    mandatory columns other than the quantifications - this allows for
50
+    separation or joining also for aggregated matrices
27 51
 
28 52
 ## FIXES
29 53
 
30
-* Fixed a minor issue in `CIS_volcano_plot` that caused duplication of some
31
-labels if highlighted genes were provided in input
54
+-   Fixed a minor issue in `CIS_volcano_plot` that caused duplication of
55
+    some labels if highlighted genes were provided in input
32 56
 
33 57
 # ISAnalytics 1.1.10 (2021-04-08)
34 58
 
35 59
 ## FIXES
36 60
 
37
-* Fixed issue in `compute_near_integrations`: when provided recalibration
38
-map export path as a folder now the function works correctly and produces
39
-an automatically generated file name
40
-* Fixed issue in `aggregate_metadata`: now paths to folder that contains
41
-Vispa2 stats is looked up correctly. Also, VISPA2 stats columns are aggregated
42
-if found in the input data frame independently from the parameter 
43
-`import_stats`.
61
+-   Fixed issue in `compute_near_integrations`: when provided
62
+    recalibration map export path as a folder now the function works
63
+    correctly and produces an automatically generated file name
64
+-   Fixed issue in `aggregate_metadata`: now paths to folder that
65
+    contains Vispa2 stats is looked up correctly. Also, VISPA2 stats
66
+    columns are aggregated if found in the input data frame
67
+    independently from the parameter `import_stats`.
44 68
 
45 69
 ## IMPROVEMENTS
46 70
 
47
-* `compute_abundance` can now take as input aggregated matrices and has
48
-additional parameters to offer more flexibility to the user. Major updates
49
-and improvements also on documentation and reproducible examples.
50
-* Major improvements in function `import_single_Vispa2Matrix`: import is 
51
-now preferentially carried out using `data.table::fread` greatly speeding up 
52
-the process - where not possible `readr::read_delim` is used instead
53
-* Major improvements in function `import_association_file`: greatly improved
54
-parsing precision (each column has a dedicated type), import report now 
55
-signals parsing problems and their location and signals also
56
-problems in parsing dates.
57
-Report also includes potential problems in column names and signals missing
58
-data in important columns. Added also the possibility to give various file 
59
-formats in input including `*.xls(x)` formats.
60
-* Function `top_integrations` can now take additional parameters to compute
61
-top n genes for each specified group
62
-* Removed faceting parameters in `CIS_volcano_plot` due to poor precision
63
-(easier to add faceting manually) and added parameters to return the 
64
-data frame that generated the plot as an additional result. Also, it is 
65
-now possible to specify a vector of gene names to highlight even if they're
66
-not above the annotation threshold.
71
+-   `compute_abundance` can now take as input aggregated matrices and
72
+    has additional parameters to offer more flexibility to the user.
73
+    Major updates and improvements also on documentation and
74
+    reproducible examples.
75
+-   Major improvements in function `import_single_Vispa2Matrix`: import
76
+    is now preferentially carried out using `data.table::fread` greatly
77
+    speeding up the process - where not possible `readr::read_delim` is
78
+    used instead
79
+-   Major improvements in function `import_association_file`: greatly
80
+    improved parsing precision (each column has a dedicated type),
81
+    import report now signals parsing problems and their location and
82
+    signals also problems in parsing dates. Report also includes
83
+    potential problems in column names and signals missing data in
84
+    important columns. Added also the possibility to give various file
85
+    formats in input including `*.xls(x)` formats.
86
+-   Function `top_integrations` can now take additional parameters to
87
+    compute top n genes for each specified group
88
+-   Removed faceting parameters in `CIS_volcano_plot` due to poor
89
+    precision (easier to add faceting manually) and added parameters to
90
+    return the data frame that generated the plot as an additional
91
+    result. Also, it is now possible to specify a vector of gene names
92
+    to highlight even if they’re not above the annotation threshold.
67 93
 
68 94
 ## MINOR
69 95
 
70
-* ISAnalytics website has improved graphic theme and has an additional button
71
-on the right that leads to the devel (or release) version of the website
72
-* Updated vignettes
96
+-   ISAnalytics website has improved graphic theme and has an additional
97
+    button on the right that leads to the devel (or release) version of
98
+    the website
99
+-   Updated vignettes
73 100
 
74 101
 ## FOR DEVS ONLY
75 102
 
76
-* Complete rework of test suite to be compliant to testthat v.3
103
+-   Complete rework of test suite to be compliant to testthat v.3
77 104
 
78 105
 # ISAnalytics 1.1.9 (2021-02-17)
79 106
 
80 107
 ## FIXES
81 108
 
82
-* Fixed minor issues in internal functions with absolute file paths & corrected
83
-typos
109
+-   Fixed minor issues in internal functions with absolute file paths &
110
+    corrected typos
84 111
 
85 112
 # ISAnalytics 1.1.8 (2020-02-15)
86 113
 
87 114
 ## FIXES
88 115
 
89
-* Fixed minor issues in internal functions to optimize file system alignment
90
-
116
+-   Fixed minor issues in internal functions to optimize file system
117
+    alignment
91 118
 
92 119
 # ISAnalytics 1.1.7 (2020-02-10)
93 120
 
94 121
 ## FIXES
95 122
 
96
-* Fixed minor issues in import_association_file when checking parameters
123
+-   Fixed minor issues in import\_association\_file when checking
124
+    parameters
97 125
 
98 126
 # ISAnalytics 1.1.6 (2020-02-06)
99 127
 
100 128
 ## UPGRADES
101 129
 
102
-* It is now possible to save html reports to file from 
103
-import_parallel_Vispa2Matrices_auto and 
104
-import_parallel_Vispa2Matrices_interactive, remove_collisions and 
105
-compute_near_integrations
130
+-   It is now possible to save html reports to file from
131
+    import\_parallel\_Vispa2Matrices\_auto and
132
+    import\_parallel\_Vispa2Matrices\_interactive, remove\_collisions
133
+    and compute\_near\_integrations
106 134
 
107 135
 ## FIXES
108 136
 
109
-* Fixed sample_statistics: now functions that have data frame output do not
110
-produce nested tables. Flat tables are ready to be saved to file or can be
111
-nested.
112
-* Simplified association file check logic in remove_collisions: now 
113
-function blocks only if the af doesn't contain the needed columns
137
+-   Fixed sample\_statistics: now functions that have data frame output
138
+    do not produce nested tables. Flat tables are ready to be saved to
139
+    file or can be nested.
140
+-   Simplified association file check logic in remove\_collisions: now
141
+    function blocks only if the af doesn’t contain the needed columns
114 142
 
115 143
 # ISAnalytics 1.1.5 (2020-02-03)
116 144
 
117 145
 ## UPGRADES
118 146
 
119
-* Upgraded import_association_file function: now file alignment is not
120
-mandatory anymore and it is possible to save the html report to file
121
-* Updated vignettes and documentation
147
+-   Upgraded import\_association\_file function: now file alignment is
148
+    not mandatory anymore and it is possible to save the html report to
149
+    file
150
+-   Updated vignettes and documentation
122 151
 
123 152
 # ISAnalytics 1.1.4 (2020-11-16)
124 153
 
125 154
 ## UPGRADES
126 155
 
127
-* Greatly improved reports for collision removal function
128
-* General improvements for all widget reports
156
+-   Greatly improved reports for collision removal function
157
+-   General improvements for all widget reports
129 158
 
130 159
 # ISAnalytics 1.1.3 (2020-11-10)
131 160
 
132 161
 ## FIXES
133 162
 
134
-* Further fixes for printing reports when widgets not available
135
-* Added progress bar to collision processing in `remove_collisions`
136
-* Updated vignettes
163
+-   Further fixes for printing reports when widgets not available
164
+-   Added progress bar to collision processing in `remove_collisions`
165
+-   Updated vignettes
137 166
 
138 167
 ## NEW
139 168
 
140
-* Added vignette "Using ISAnalytics without RStudio support"
169
+-   Added vignette “Using ISAnalytics without RStudio support”
141 170
 
142 171
 # ISAnalytics 1.1.2 (2020-11-05)
143 172
 
144 173
 ## FIXES
145 174
 
146
-* Fixed missing restarts for non-blocking widgets
175
+-   Fixed missing restarts for non-blocking widgets
147 176
 
148 177
 # ISAnalytics 1.1.1 (2020-11-04)
149 178
 
150 179
 ## FIXES
151 180
 
152
-* Functions that make use of widgets do not interrupt execution anymore if 
153
-errors are thrown while producing or printing the widgets
154
-* Optimized widget printing for importing functions
155
-* If widgets can't be printed and verbose option is active, reports are now 
156
-displayed on console instead (needed for usage in environments that do not 
157
-have access to a browser)
158
-* Other minor fixes (typos)
159
-* Bug fixes: fixed a few bugs in importing and recalibration functions
160
-* Minor fix in import_association_file file function: added multiple strings
161
-to be translated as NA
181
+-   Functions that make use of widgets do not interrupt execution
182
+    anymore if errors are thrown while producing or printing the widgets
183
+-   Optimized widget printing for importing functions
184
+-   If widgets can’t be printed and verbose option is active, reports
185
+    are now displayed on console instead (needed for usage in
186
+    environments that do not have access to a browser)
187
+-   Other minor fixes (typos)
188
+-   Bug fixes: fixed a few bugs in importing and recalibration functions
189
+-   Minor fix in import\_association\_file file function: added multiple
190
+    strings to be translated as NA
162 191
 
163 192
 ## IMPORTANT NOTES
164 193
 
165
-* Vignette building might fail due to the fact that package "knitcitations" 
166
-is temporarily unavailable through CRAN
167
-* ISAnalytics is finally in release on bioconductor!
194
+-   Vignette building might fail due to the fact that package
195
+    “knitcitations” is temporarily unavailable through CRAN
196
+-   ISAnalytics is finally in release on bioconductor!
168 197
 
169 198
 # ISAnalytics 0.99.14 (2020-10-21)
170 199
 
171
-* Minor fixes in tests
200
+-   Minor fixes in tests
172 201
 
173 202
 # ISAnalytics 0.99.13 (2020-10-19)
174 203
 
175 204
 ## NEW FEATURES
176 205
 
177
-* Added analysis functions `CIS_grubbs` and `cumulative_count_union`
178
-* Added plotting functions `CIS_volcano_plot`
206
+-   Added analysis functions `CIS_grubbs` and `cumulative_count_union`
207
+-   Added plotting functions `CIS_volcano_plot`
179 208
 
180 209
 # ISAnalytics 0.99.12 (2020-10-04)
181 210
 
182 211
 ## NEW FEATURES
183 212
 
184
-* Added analysis function `sample_statistics`
213
+-   Added analysis function `sample_statistics`
185 214
 
186 215
 ## SIGNIFICANT USER-VISIBLE CHANGES
187 216
 
188
-* `aggregate_values_by_key` has a simplified interface and supports
189
-multi-quantification matrices
217
+-   `aggregate_values_by_key` has a simplified interface and supports
218
+    multi-quantification matrices
190 219
 
191 220
 ## MINOR CHANGES
192 221
 
193
-* Updated vignettes
194
-* `import_parallel_Vispa2Matrices_interactive` and
195
-`import_parallel_Vispa2Matrices_auto` now have an option to return 
196
-a multi-quantification matrix directly after import instead of a list
222
+-   Updated vignettes
223
+-   `import_parallel_Vispa2Matrices_interactive` and
224
+    `import_parallel_Vispa2Matrices_auto` now have an option to return a
225
+    multi-quantification matrix directly after import instead of a list
197 226
 
198 227
 # ISAnalytics 0.99.11 (2020-09-21)
199 228
 
200 229
 ## NEW FEATURES
201 230
 
202
-* Added analysis functions `threshold_filter`, `top_integrations`
203
-* Added support for multi-quantification matrices in `compute_abundance`
231
+-   Added analysis functions `threshold_filter`, `top_integrations`
232
+-   Added support for multi-quantification matrices in
233
+    `compute_abundance`
204 234
 
205 235
 ## MINOR FIXES
206 236
 
207
-* Fixed bug in `comparison_matrix` that ignored custom column names
208
-* Fixed issues in some documentation pages
237
+-   Fixed bug in `comparison_matrix` that ignored custom column names
238
+-   Fixed issues in some documentation pages
209 239
 
210 240
 # ISAnalytics 0.99.10 (2020-09-14)
211 241
 
... ...
@@ -213,36 +243,39 @@ ISanalytics is officially on bioconductor!
213 243
 
214 244
 ## NEW FEATURES
215 245
 
216
-* Added analysis functions `comparison_matrix` and `separate_quant_matrices`
217
-* Added utility function `as_sparse_matrix`
218
-* Added package logo
246
+-   Added analysis functions `comparison_matrix` and
247
+    `separate_quant_matrices`
248
+-   Added utility function `as_sparse_matrix`
249
+-   Added package logo
219 250
 
220 251
 ## SIGNIFICANT USER-VISIBLE CHANGES
221 252
 
222
-* Changed algorithm for `compute_near_integrations`
223
-* Added support for multi-quantification matrices to `remove_collisions`
224
-* Added usage of lifecycle badges in documentation: users can now see if 
225
-a feature is experimental/maturing/stable etc
253
+-   Changed algorithm for `compute_near_integrations`
254
+-   Added support for multi-quantification matrices to
255
+    `remove_collisions`
256
+-   Added usage of lifecycle badges in documentation: users can now see
257
+    if a feature is experimental/maturing/stable etc
226 258
 
227 259
 ## MINOR FIXES
228 260
 
229
-* Added fix for `import_single_Vispa2Matrix` to remove non significant 
230
-0 values
261
+-   Added fix for `import_single_Vispa2Matrix` to remove non significant
262
+    0 values
231 263
 
232 264
 # ISAnalytics 0.99.9 (2020-09-01)
233 265
 
234 266
 ## NEW FEATURES
235 267
 
236
-* Added functionality: aggregate functions
237
-* Added vignette on aggregate functions
238
-* Added recalibration functions
239
-* Added first analysis function (compute_abundance)
268
+-   Added functionality: aggregate functions
269
+-   Added vignette on aggregate functions
270
+-   Added recalibration functions
271
+-   Added first analysis function (compute\_abundance)
240 272
 
241 273
 ## SIGNIFICANT USER-VISIBLE CHANGES
242 274
 
243
-* Dropped structure `ISADataFrame`: now the package only uses standard tibbles
244
-* Modified package documentation
275
+-   Dropped structure `ISADataFrame`: now the package only uses standard
276
+    tibbles
277
+-   Modified package documentation
245 278
 
246 279
 # ISAnalytics 0.99.8 (2020-08-12)
247 280
 
248
-* Submitted to Bioconductor
281
+-   Submitted to Bioconductor
... ...
@@ -49,11 +49,15 @@
49 49
 #'   * \code{\link{sample_statistics}}
50 50
 #'   * \code{\link{CIS_grubbs}}
51 51
 #'   * \code{\link{cumulative_count_union}}
52
+#'   * \code{\link{is_sharing}}
52 53
 #' * HSC population size estimate:
53 54
 #'   * \code{\link{HSC_population_size_estimate}}
54 55
 #' * Plotting functions:
55 56
 #'   * \code{\link{CIS_volcano_plot}}
56 57
 #'   * \code{\link{HSC_population_plot}}
58
+#'   * \code{\link{sharing_heatmap}}
59
+#'   * \code{\link{integration_alluvial_plot}}
60
+#'   * \code{\link{top_abund_tableGrob}}
57 61
 #' * Utility functions:
58 62
 #'   * \code{\link{generate_blank_association_file}}
59 63
 #'   * \code{\link{generate_Vispa2_launch_AF}}
... ...
@@ -66,6 +70,8 @@
66 70
 #' package = "ISAnalytics")}
67 71
 #' * \code{vignette("Working with aggregate functions",
68 72
 #' package = "ISAnalytics")}
73
+#' * \code{vignette("Using ISAnalytics without RStudio support",
74
+#' package = "ISAnalytics")}
69 75
 #'
70 76
 #' @docType package
71 77
 #' @name ISAnalytics
... ...
@@ -48,14 +48,13 @@ aggregate_metadata <- function(association_file,
48 48
         "TimePoint"
49 49
     ),
50 50
     aggregating_functions = default_meta_agg(),
51
-    import_stats = lifecycle::deprecated()
52
-    ) {
51
+    import_stats = lifecycle::deprecated()) {
53 52
     # Check parameters
54 53
     stopifnot(is.data.frame(association_file))
55 54
     stopifnot(!is.null(grouping_keys))
56 55
     stopifnot(is.character(grouping_keys))
57 56
     keys_missing <- grouping_keys[!grouping_keys %in%
58
-                                      colnames(association_file)]
57
+        colnames(association_file)]
59 58
     if (!purrr::is_empty(keys_missing)) {
60 59
         rlang::abort(.missing_user_cols_error(keys_missing))
61 60
     }
... ...
@@ -64,19 +63,26 @@ aggregate_metadata <- function(association_file,
64 63
             when = "1.1.11",
65 64
             what = "aggregate_metadata(import_stats)",
66 65
             details = c("Import Vispa2 stats functionality moved",
67
-                        i = paste("Please use `import_Vispa2_stats()`",
68
-                                  "or",
69
-                                  "`import_association_file(import_iss = TRUE)`",
70
-                                  "instead."))
66
+                i = paste(
67
+                    "Please use `import_Vispa2_stats()`",
68
+                    "or",
69
+                    "`import_association_file(import_iss = TRUE)`",
70
+                    "instead."
71
+                )
72
+            )
71 73
         )
72 74
     }
73
-    aggregated <- .aggregate_meta(association_file = association_file,
74
-                                  grouping_keys = grouping_keys,
75
-                                  function_tbl = aggregating_functions)
75
+    aggregated <- .aggregate_meta(
76
+        association_file = association_file,
77
+        grouping_keys = grouping_keys,
78
+        function_tbl = aggregating_functions
79
+    )
76 80
     if (is.null(aggregated)) {
77
-        rlang::inform(paste("No columns in `aggregating_functions$Column`",
78
-                            "was found in column names of the association",
79
-                            "file. Nothing to return."))
81
+        rlang::inform(paste(
82
+            "No columns in `aggregating_functions$Column`",
83
+            "was found in column names of the association",
84
+            "file. Nothing to return."
85
+        ))
80 86
     }
81 87
     aggregated
82 88
 }
... ...
@@ -109,7 +115,7 @@ aggregate_metadata <- function(association_file,
109 115
 #' default_meta_agg()
110 116
 default_meta_agg <- function() {
111 117
     tibble::tribble(
112
-        ~ Column, ~ Function, ~ Args, ~ Output_colname,
118
+        ~Column, ~Function, ~Args, ~Output_colname,
113 119
         "FusionPrimerPCRDate", ~ suppressWarnings(min(.x, na.rm = TRUE)),
114 120
         NA, "{.col}_min",
115 121
         "LinearPCRDate", ~ suppressWarnings(min(.x, na.rm = TRUE)),
... ...
@@ -252,12 +258,19 @@ aggregate_values_by_key <- function(x,
252 258
                 rlang::abort(.non_ISM_error())
253 259
             }
254 260
             if (!all(join_af_by %in% colnames(df))) {
255
-                rlang::abort(c(x = paste("Missing common columns",
256
-                                         "to join metadata"),
257
-                               i = paste("Missing: ",
258
-                                         paste0(join_af_by[!join_af_by %in%
259
-                                                               colnames(df)],
260
-                                                collapse = ", "))))
261
+                rlang::abort(c(
262
+                    x = paste(
263
+                        "Missing common columns",
264
+                        "to join metadata"
265
+                    ),
266
+                    i = paste(
267
+                        "Missing: ",
268
+                        paste0(join_af_by[!join_af_by %in%
269
+                            colnames(df)],
270
+                        collapse = ", "
271
+                        )
272
+                    )
273
+                ))
261 274
             }
262 275
             if (!all(value_cols %in% colnames(df))) {
263 276
                 rlang::abort(.missing_user_cols_error(
... ...
@@ -283,12 +296,19 @@ aggregate_values_by_key <- function(x,
283 296
             rlang::abort(.non_ISM_error())
284 297
         }
285 298
         if (!all(join_af_by %in% colnames(x))) {
286
-            rlang::abort(c(x = paste("Missing common columns",
287
-                                     "to join metadata"),
288
-                           i = paste("Missing: ",
289
-                                     paste0(join_af_by[!join_af_by %in%
290
-                                                           colnames(x)],
291
-                                            collapse = ", "))))
299
+            rlang::abort(c(
300
+                x = paste(
301
+                    "Missing common columns",
302
+                    "to join metadata"
303
+                ),
304
+                i = paste(
305
+                    "Missing: ",
306
+                    paste0(join_af_by[!join_af_by %in%
307
+                        colnames(x)],
308
+                    collapse = ", "
309
+                    )
310
+                )
311
+            ))
292 312
         }
293 313
         if (!all(value_cols %in% colnames(x))) {
294 314
             rlang::abort(.missing_user_cols_error(
... ...
@@ -274,17 +274,17 @@ comparison_matrix <- function(x,
274 274
 #' )
275 275
 #' separated_matrix <- separate_quant_matrices(matrices)
276 276
 #' options(op)
277
-separate_quant_matrices <- function(
278
-    x,
277
+separate_quant_matrices <- function(x,
279 278
     fragmentEstimate = "fragmentEstimate",
280 279
     seqCount = "seqCount",
281 280
     barcodeCount = "barcodeCount",
282 281
     cellCount = "cellCount",
283 282
     ShsCount = "ShsCount",
284
-    key = c(mandatory_IS_vars(),
285
-            annotation_IS_vars(),
286
-            "CompleteAmplificationID")
287
-    ) {
283
+    key = c(
284
+        mandatory_IS_vars(),
285
+        annotation_IS_vars(),
286
+        "CompleteAmplificationID"
287
+    )) {
288 288
     stopifnot(is.data.frame(x))
289 289
     if (!all(key %in% colnames(x))) {
290 290
         rlang::abort(.missing_user_cols_error(key[!key %in% colnames(x)]))
... ...
@@ -715,6 +715,7 @@ sample_statistics <- function(x, metadata,
715 715
             ))
716 716
         }
717 717
     })
718
+
718 719
     result <- x %>%
719 720
         dplyr::group_by(dplyr::across(dplyr::all_of(sample_key))) %>%
720 721
         dplyr::summarise(dplyr::across(
... ...
@@ -1245,6 +1246,201 @@ cumulative_count_union <- function(x,
1245 1246
     return(res)
1246 1247
 }
1247 1248
 
1249
+#' Sharing of integration sites between given groups.
1250
+#'
1251
+#' \lifecycle{experimental}
1252
+#' Computes the amount of integrations shared between the groups identified
1253
+#' by the fields in the `group_key` argument.
1254
+#' An integration site is always identified by the triple
1255
+#' `(chr, integration_locus, strand)`, thus these columns must be present
1256
+#' in the input data frame.
1257
+#'
1258
+#' @details
1259
+#' ## Input data frame
1260
+#' The data frame provided in input must be in a suitable format for the
1261
+#' calculations to be accurate. Please note that this function does not
1262
+#' perform any sort of aggregation, it only relies on counts of
1263
+#' distinct integration sites.
1264
+#'
1265
+#' ## Outputs
1266
+#' By default the function outputs a list of 2 data frames:
1267
+#' * The classical sharing data frame with absolute values.
1268
+#' If the argument `relative_is_sharing` is set to TRUE it also contains
1269
+#' the relative sharing (see below).
1270
+#' * The count of distinct IS for each group
1271
+#'
1272
+#' The relative sharing is calculated, for each pair of groups (A, B) as
1273
+#' 3 separate columns
1274
+#' \itemize{
1275
+#'   \item Shared over A: (∩(A,B) / |A|) * 100
1276
+#'   \item Shared over B: (∩(A,B) / |B|) * 100
1277
+#'   \item Shared over union: (∩(A,B) / |∪(A,B)|) * 100
1278
+#' }
1279
+#'
1280
+#' ## Plotting sharing
1281
+#' The sharing data obtained can be easily plotted in a heatmap via the
1282
+#' function \code{\link{sharing_heatmap}}.
1283
+#'
1284
+#' @param x An integration matrix, aka a data frame containing the columns
1285
+#' `r mandatory_IS_vars()`. See details.
1286
+#' @param group_key Character vector of column names which identify a
1287
+#' single group. An associated group id will be derived by concatenating
1288
+#' the values of these fields, separated by "_"
1289
+#' @param is_count Logical, if TRUE returns also the count of IS for
1290
+#' each group
1291
+#' @param relative_is_sharing Logical, if TRUE also returns the relative
1292
+#' sharing. See details.
1293
+#'
1294
+#' @family Analysis functions
1295
+#' @return A named list of data frames or a single data frame
1296
+#' @export
1297
+#'
1298
+#' @examples
1299
+#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
1300
+#'     package = "ISAnalytics"
1301
+#' )
1302
+#' matrix <- import_single_Vispa2Matrix(path)
1303
+#' sharing <- is_sharing(matrix, group_key = "CompleteAmplificationID")
1304
+is_sharing <- function(x,
1305
+    group_key = c(
1306
+        "SubjectID",
1307
+        "CellMarker",
1308
+        "Tissue",
1309
+        "TimePoint"
1310
+    ),
1311
+    is_count = TRUE,
1312
+    relative_is_sharing = TRUE) {
1313
+    ## Checks
1314
+    stopifnot(is.data.frame(x))
1315
+    stopifnot(is.character(group_key))
1316
+    stopifnot(is.logical(is_count))
1317
+    stopifnot(is.logical(relative_is_sharing))
1318
+    if (!all(group_key %in% colnames(x))) {
1319
+        rlang::abort(
1320
+            .missing_user_cols_error(
1321
+                group_key[!group_key %in% colnames(x)]
1322
+            )
1323
+        )
1324
+    }
1325
+    if (!all(mandatory_IS_vars() %in% colnames(x))) {
1326
+        rlang::abort(
1327
+            .missing_needed_cols(
1328
+                mandatory_IS_vars()[!mandatory_IS_vars() %in% colnames(x)]
1329
+            )
1330
+        )
1331
+    }
1332
+
1333
+    ## --- Nest
1334
+    nested <- x %>%
1335
+        dplyr::select(dplyr::all_of(c(mandatory_IS_vars(), group_key))) %>%
1336
+        dplyr::distinct() %>%
1337
+        tidyr::nest(is_set = mandatory_IS_vars()) %>%
1338
+        tidyr::unite(col = "group_id", group_key)
1339
+
1340
+    ## --- Number of IS for each group
1341
+    is_n <- nested %>%
1342
+        dplyr::transmute(
1343
+            group_id = .data$group_id,
1344
+            num_IS = purrr::map_int(.data$is_set, nrow)
1345
+        )
1346
+
1347
+    ## --- Absolute numeber of IS shared
1348
+    abs_shared_df <- tibble::tibble(
1349
+        group1 = character(0),
1350
+        group2 = character(0),
1351
+        shared = integer(0)
1352
+    )
1353
+    group_ids <- unique(nested$group_id)
1354
+    groups2 <- group_ids
1355
+    for (i in seq_along(group_ids)) {
1356
+        id1 <- group_ids[i]
1357
+        if (i > 1) {
1358
+            groups2 <- groups2[-1]
1359
+        }
1360
+        for (id2 in groups2) {
1361
+            if (id1 != id2) {
1362
+                shared_n <- dplyr::inner_join(
1363
+                    x = (dplyr::filter(nested, .data$group_id == id1) %>%
1364
+                        dplyr::pull(.data$is_set))[[1]],
1365
+                    y = (dplyr::filter(nested, .data$group_id == id2) %>%
1366
+                        dplyr::pull(.data$is_set))[[1]],
1367
+                    by = mandatory_IS_vars()
1368
+                ) %>% nrow()
1369
+                abs_shared_df <- abs_shared_df %>%
1370
+                    tibble::add_case(
1371
+                        group1 = c(id1, id2),
1372
+                        group2 = c(id2, id1),
1373
+                        shared = shared_n
1374
+                    )
1375
+            } else {
1376
+                shared_n <- nrow((dplyr::filter(
1377
+                    nested,
1378
+                    .data$group_id == id1
1379
+                ) %>%
1380
+                    dplyr::pull(.data$is_set))[[1]])
1381
+                abs_shared_df <- abs_shared_df %>%
1382
+                    tibble::add_case(
1383
+                        group1 = id1,
1384
+                        group2 = id2,
1385
+                        shared = shared_n
1386
+                    )
1387
+            }
1388
+        }
1389
+    }
1390
+    ### --- Relative number of IS shared
1391
+    if (relative_is_sharing) {
1392
+        abs_shared_df <- abs_shared_df %>%
1393
+            dplyr::mutate(
1394
+                on_g1 = purrr::pmap_dbl(
1395
+                    list(.data$group1, .data$group2, .data$shared),
1396
+                    function(x, y, s) {
1397
+                        if (x == y) {
1398
+                            100
1399
+                        } else {
1400
+                            x_count <- dplyr::filter(
1401
+                                is_n,
1402
+                                .data$group_id == x
1403
+                            ) %>%
1404
+                                dplyr::pull(.data$num_IS)
1405
+                            (s / x_count) * 100
1406
+                        }
1407
+                    }
1408
+                ),
1409
+                on_g2 = purrr::pmap_dbl(
1410
+                    list(.data$group1, .data$group2, .data$shared),
1411
+                    function(x, y, s) {
1412
+                        if (x == y) {
1413
+                            100
1414
+                        } else {
1415
+                            y_count <- dplyr::filter(
1416
+                                is_n,
1417
+                                .data$group_id == y
1418
+                            ) %>%
1419
+                                dplyr::pull(.data$num_IS)
1420
+                            (s / y_count) * 100
1421
+                        }
1422
+                    }
1423
+                ),
1424
+                on_union = purrr::pmap_dbl(
1425
+                    list(.data$group1, .data$group2, .data$shared),
1426
+                    function(x, y, s) {
1427
+                        x_count <- dplyr::filter(is_n, .data$group_id == x) %>%
1428
+                            dplyr::pull(.data$num_IS)
1429
+                        y_count <- dplyr::filter(is_n, .data$group_id == y) %>%
1430
+                            dplyr::pull(.data$num_IS)
1431
+                        union_count <- (s / (x_count + y_count - s)) * 100
1432
+                    }
1433
+                )
1434
+            )
1435
+    }
1436
+
1437
+    if (!is_count) {
1438
+        return(abs_shared_df)
1439
+    }
1440
+
1441
+    return(list(is_count = is_n, sharing = abs_shared_df))
1442
+}
1443
+
1248 1444
 
1249 1445
 #' A set of pre-defined functions for `sample_statistics`.
1250 1446
 #'
... ...
@@ -325,21 +325,21 @@ import_association_file <- function(path,
325 325
         as_file <- as_file %>%
326 326
             dplyr::mutate(
327 327
                 TimepointMonths = dplyr::if_else(
328
-                        condition = as.numeric(.data$TimePoint) == 0,
329
-                        true = 0,
330
-                        false = dplyr::if_else(
331
-                            condition = as.numeric(.data$TimePoint) > 0 &
332
-                                as.numeric(.data$TimePoint) < 30,
333
-                            true = ceiling(as.numeric(.data$TimePoint) / 30),
334
-                            false = round(as.numeric(.data$TimePoint) / 30)
335
-                        )
336
-                    ),
337
-                TimepointYears = dplyr::if_else(
338
-                        condition = as.numeric(.data$TimePoint) == 0,
339
-                        true = 0,
340
-                        false = ceiling(as.numeric(.data$TimePoint) / 360)
328
+                    condition = as.numeric(.data$TimePoint) == 0,
329
+                    true = 0,
330
+                    false = dplyr::if_else(
331
+                        condition = as.numeric(.data$TimePoint) > 0 &
332
+                            as.numeric(.data$TimePoint) < 30,
333
+                        true = ceiling(as.numeric(.data$TimePoint) / 30),
334
+                        false = round(as.numeric(.data$TimePoint) / 30)
341 335
                     )
342
-                ) %>%
336
+                ),
337
+                TimepointYears = dplyr::if_else(
338
+                    condition = as.numeric(.data$TimePoint) == 0,
339
+                    true = 0,
340
+                    false = ceiling(as.numeric(.data$TimePoint) / 360)
341
+                )
342
+            ) %>%
343 343
             dplyr::mutate(
344 344
                 TimepointMonths = stringr::str_pad(
345 345
                     as.character(.data$TimepointMonths),
... ...
@@ -34,8 +34,10 @@
34 34
     checks) {
35 35
     c(
36 36
         "*** Association file import summary ***",
37
-        i = paste("For detailed report please set option",
38
-                  "'ISAnalytics.widgets' to TRUE"),
37
+        i = paste(
38
+            "For detailed report please set option",
39
+            "'ISAnalytics.widgets' to TRUE"
40
+        ),
39 41
         paste0("Parsing problems detected: ", !is.null(pars_prob)),
40 42
         paste0("Date parsing problems: ", !is.null(dates_prob)),
41 43
         paste0("Column problems detected: ", !is.null(cols_prob)),
... ...
@@ -64,10 +66,12 @@
64 66
 # - import_Vispa2_stats
65 67
 .af_not_imported_err <- function() {
66 68
     c("The association file must be a data frame",
67
-      paste("Import the association file via",
68
-            "`import_association_file()` with file system alignment"),
69
-      i = "See `?import_association_file` and `?import_Vispa2_stats`"
70
-      )
69
+        paste(
70
+            "Import the association file via",
71
+            "`import_association_file()` with file system alignment"
72
+        ),
73
+        i = "See `?import_association_file` and `?import_Vispa2_stats`"
74
+    )
71 75
 }
72 76
 
73 77
 # Signals that the association file must be aligned with fs.
... ...
@@ -75,9 +79,11 @@
75 79
 # - import_Vispa2_stats
76 80
 .af_not_aligned_err <- function() {
77 81
     c("The association file has been imported without file system alignment",
78
-      paste("Import the association file via",
79
-            "`import_association_file()` with file system alignment"),
80
-      i = "See `?import_association_file` and `?import_Vispa2_stats`"
82
+        paste(
83
+            "Import the association file via",
84
+            "`import_association_file()` with file system alignment"
85
+        ),
86
+        i = "See `?import_association_file` and `?import_Vispa2_stats`"
81 87
     )
82 88
 }
83 89
 
... ...
@@ -85,8 +91,8 @@
85 91
 # - import_Vispa2_stats
86 92
 .missing_needed_cols <- function(missing) {
87 93
     c("Some required columns are missing",
88
-      i = paste("Missing columns:", paste0(missing, collapse = ", "))
89
-      )
94
+        i = paste("Missing columns:", paste0(missing, collapse = ", "))
95
+    )
90 96
 }
91 97
 
92 98
 .widgets_error <- function() {
... ...
@@ -94,7 +100,7 @@
94 100
 }
95 101
 
96 102
 .widgets_print_error <- function() {
97
-  paste("Unable to print widget report, skipping this step")
103
+    paste("Unable to print widget report, skipping this step")
98 104
 }
99 105
 
100 106
 .widgets_save_error <- function() {
... ...
@@ -204,9 +210,11 @@
204 210
 
205 211
 # @keywords internal
206 212
 .non_quant_cols_msg <- function(x) {
207
-    c(paste("Found numeric columns that are not quantification values -",
208
-        "these columns will be copied in all resulting matrices."),
209
-        i = paste0("Found: ", paste0(x, collapse = ", "))
213
+    c(paste(
214
+        "Found numeric columns that are not quantification values -",
215
+        "these columns will be copied in all resulting matrices."
216
+    ),
217
+    i = paste0("Found: ", paste0(x, collapse = ", "))
210 218
     )
211 219
 }
212 220
 
... ...
@@ -244,8 +252,10 @@
244 252
 # @keywords internal
245 253
 .nas_introduced_msg <- function() {
246 254
     c("NAs were introduced while producing the data frame.",
247
-      i = paste("The possible cause for this is:",
248
-        "some quantification matrices were not imported for all pools")
255
+        i = paste(
256
+            "The possible cause for this is:",
257
+            "some quantification matrices were not imported for all pools"
258
+        )
249 259
     )
250 260
 }
251 261
 
... ...
@@ -400,17 +410,31 @@
400 410
 }
401 411
 
402 412
 .not_min_key_err <- function(missing) {
403
-  c("The aggregation key must contain the minimal required key",
404
-    x = paste("Missing columns:",
405
-              paste0(missing, collapse = ", ")))
413
+    c("The aggregation key must contain the minimal required key",
414
+        x = paste(
415
+            "Missing columns:",
416
+            paste0(missing, collapse = ", ")
417
+        )
418
+    )
406 419
 }
407 420
 
408 421
 .agg_key_not_found_err <- function(df, key) {
409
-  c(paste("The aggregation key was not found in", df),
410
-    i = paste("Aggregation key used:", paste0(key, collapse = ", ")))
422
+    c(paste("The aggregation key was not found in", df),
423
+        i = paste("Aggregation key used:", paste0(key, collapse = ", "))
424
+    )
411 425
 }
412 426
 
413 427
 .meta_not_agg_err <- function() {
414
-  c("Metadata appears to not be aggregated by the provided aggregation key",
415
-    i = paste("See `?aggregate_metadata`"))
428
+    c("Metadata appears to not be aggregated by the provided aggregation key",
429
+        i = paste("See `?aggregate_metadata`")
430
+    )
431
+}
432
+
433
+# Error message displayed for suggestion packages that are not installed
434
+# but required by the called function
435
+.missing_pkg_error <- function(pkg) {
436
+    c("Missing package",
437
+        x = paste("Package", pkg, "is required for this functionality."),
438
+        i = paste0('To install: `install.packages("', pkg, '")`')
439
+    )
416 440
 }
... ...
@@ -68,222 +68,222 @@
68 68
 #' op <- options(ISAnalytics.widgets = FALSE)
69 69
 #'
70 70
 #' path_AF <- system.file("extdata", "ex_association_file.tsv",
71
-#'   package = "ISAnalytics"
71
+#'     package = "ISAnalytics"
72 72
 #' )
73 73
 #' root_correct <- system.file("extdata", "fs.zip",
74
-#'   package = "ISAnalytics"
74
+#'     package = "ISAnalytics"
75 75
 #' )
76 76
 #' root_correct <- unzip_file_system(root_correct, "fs")
77 77
 #'
78 78
 #' matrices <- import_parallel_Vispa2Matrices_auto(
79
-#'   association_file = path_AF, root = root_correct,
80
-#'   quantification_type = c("seqCount", "fragmentEstimate"),
81
-#'   matrix_type = "annotated", workers = 2, patterns = NULL,
82
-#'   matching_opt = "ANY",
83
-#'   dates_format = "dmy"
79
+#'     association_file = path_AF, root = root_correct,
80
+#'     quantification_type = c("seqCount", "fragmentEstimate"),
81
+#'     matrix_type = "annotated", workers = 2, patterns = NULL,
82
+#'     matching_opt = "ANY",
83
+#'     dates_format = "dmy"
84 84
 #' )
85 85
 #'
86 86
 #' cis <- CIS_grubbs(matrices)
87 87
 #' plot <- CIS_volcano_plot(cis)
88 88
 #' options(op)
89 89
 CIS_volcano_plot <- function(x,
90
-                             onco_db_file = system.file("extdata",
91
-                               "201806_uniprot-Proto-oncogene.tsv.xz",
92
-                               package = "ISAnalytics"
93
-                             ),
94
-                             tumor_suppressors_db_file = system.file("extdata",
95
-                               "201806_uniprot-Tumor-suppressor.tsv.xz",
96
-                               package = "ISAnalytics"
97
-                             ),
98
-                             species = "human",
99
-                             known_onco = known_clinical_oncogenes(),
100
-                             suspicious_genes =
101
-                               clinical_relevant_suspicious_genes(),
102
-                             significance_threshold = 0.05,
103
-                             annotation_threshold_ontots = 0.1,
104
-                             highlight_genes = NULL,
105
-                             title_prefix = NULL,
106
-                             return_df = FALSE) {
107
-  ## Check params
108
-  stopifnot(is.data.frame(x))
109
-  stopifnot(is.character(onco_db_file) & length(onco_db_file) == 1)
110
-  stopifnot(is.character(tumor_suppressors_db_file) &
111
-    length(tumor_suppressors_db_file) == 1)
112
-  stopifnot(is.character(species))
113
-  stopifnot(is.data.frame(known_onco))
114
-  stopifnot(is.data.frame(suspicious_genes))
115
-  stopifnot(is.numeric(significance_threshold) |
116
-    is.integer(significance_threshold) &
117
-      length(significance_threshold) == 1)
118
-  stopifnot(is.numeric(annotation_threshold_ontots) |
119
-    is.integer(annotation_threshold_ontots) &
120
-      length(annotation_threshold_ontots) == 1)
121
-  stopifnot(is.null(title_prefix) || (is.character(title_prefix) &
122
-    length(title_prefix == 1)))
123
-  stopifnot(is.null(highlight_genes) || is.character(highlight_genes))
124
-  stopifnot(is.logical(return_df))
125
-  if (is.null(title_prefix)) {
126
-    title_prefix <- ""
127
-  }
128
-  ## Load onco and ts
129
-  oncots_to_use <- .load_onco_ts_genes(
130
-    onco_db_file,
131
-    tumor_suppressors_db_file,
132
-    species
133
-  )
134
-  ## Check if CIS function was already called
135
-  min_cis_col <- c(
136
-    "tdist_bonferroni_default", "tdist_fdr",
137
-    "neg_zscore_minus_log2_int_freq_tolerance"
138
-  )
139
-  cis_grubbs_df <- if (!all(min_cis_col %in% colnames(x))) {
140
-    if (getOption("ISAnalytics.verbose") == TRUE) {
141
-      message(paste("Calculating CIS_grubbs for x..."))
90
+    onco_db_file = system.file("extdata",
91
+        "201806_uniprot-Proto-oncogene.tsv.xz",
92
+        package = "ISAnalytics"
93
+    ),
94
+    tumor_suppressors_db_file = system.file("extdata",
95
+        "201806_uniprot-Tumor-suppressor.tsv.xz",
96
+        package = "ISAnalytics"
97
+    ),
98
+    species = "human",
99
+    known_onco = known_clinical_oncogenes(),
100
+    suspicious_genes =
101
+        clinical_relevant_suspicious_genes(),
102
+    significance_threshold = 0.05,
103
+    annotation_threshold_ontots = 0.1,
104
+    highlight_genes = NULL,
105
+    title_prefix = NULL,
106
+    return_df = FALSE) {
107
+    ## Check params
108
+    stopifnot(is.data.frame(x))
109
+    stopifnot(is.character(onco_db_file) & length(onco_db_file) == 1)
110
+    stopifnot(is.character(tumor_suppressors_db_file) &
111
+        length(tumor_suppressors_db_file) == 1)
112
+    stopifnot(is.character(species))
113
+    stopifnot(is.data.frame(known_onco))
114
+    stopifnot(is.data.frame(suspicious_genes))
115
+    stopifnot(is.numeric(significance_threshold) |
116
+        is.integer(significance_threshold) &
117
+            length(significance_threshold) == 1)
118
+    stopifnot(is.numeric(annotation_threshold_ontots) |
119
+        is.integer(annotation_threshold_ontots) &
120
+            length(annotation_threshold_ontots) == 1)
121
+    stopifnot(is.null(title_prefix) || (is.character(title_prefix) &
122
+        length(title_prefix == 1)))
123
+    stopifnot(is.null(highlight_genes) || is.character(highlight_genes))
124
+    stopifnot(is.logical(return_df))
125
+    if (is.null(title_prefix)) {
126
+        title_prefix <- ""
142 127
     }
143
-    CIS_grubbs(x)
144
-  } else {
145
-    x
146
-  }
147
-  ## Join all dfs by gene
148
-  cis_grubbs_df <- cis_grubbs_df %>%
149
-    dplyr::left_join(oncots_to_use, by = "GeneName") %>%
150
-    dplyr::left_join(known_onco, by = "GeneName") %>%
151
-    dplyr::left_join(suspicious_genes, by = "GeneName")
152
-  ## Add info to CIS
153
-  cis_grubbs_df <- cis_grubbs_df %>%
154
-    dplyr::mutate(minus_log_p = -log(.data$tdist_bonferroni_default,
155
-      base = 10
156
-    ))
157
-  cis_grubbs_df <- cis_grubbs_df %>%
158
-    dplyr::mutate(
159
-      minus_log_p_fdr = -log(.data$tdist_fdr, base = 10),
160
-      positive_outlier_and_significant = ifelse(
161
-        test = !is.na(.data$tdist_fdr) &
162
-          .data$tdist_fdr < significance_threshold,
163
-        yes = TRUE,
164
-        no = FALSE
165
-      )
128
+    ## Load onco and ts
129
+    oncots_to_use <- .load_onco_ts_genes(
130
+        onco_db_file,
131
+        tumor_suppressors_db_file,
132
+        species
166 133
     )
167
-  cis_grubbs_df <- cis_grubbs_df %>%
168
-    dplyr::mutate(
169
-      KnownGeneClass = ifelse(
170
-        is.na(.data$Onco1_TS2),
171
-        yes = "Other",
172
-        no = ifelse(.data$Onco1_TS2 == 1,
173
-          yes = "OncoGene",
174
-          no = "TumSuppressor"
134
+    ## Check if CIS function was already called
135
+    min_cis_col <- c(
136
+        "tdist_bonferroni_default", "tdist_fdr",
137
+        "neg_zscore_minus_log2_int_freq_tolerance"
138
+    )
139
+    cis_grubbs_df <- if (!all(min_cis_col %in% colnames(x))) {
140
+        if (getOption("ISAnalytics.verbose") == TRUE) {
141
+            message(paste("Calculating CIS_grubbs for x..."))
142
+        }
143
+        CIS_grubbs(x)
144
+    } else {
145
+        x
146
+    }
147
+    ## Join all dfs by gene
148
+    cis_grubbs_df <- cis_grubbs_df %>%
149
+        dplyr::left_join(oncots_to_use, by = "GeneName") %>%
150
+        dplyr::left_join(known_onco, by = "GeneName") %>%
151
+        dplyr::left_join(suspicious_genes, by = "GeneName")
152
+    ## Add info to CIS
153
+    cis_grubbs_df <- cis_grubbs_df %>%
154
+        dplyr::mutate(minus_log_p = -log(.data$tdist_bonferroni_default,
155
+            base = 10
156
+        ))
157
+    cis_grubbs_df <- cis_grubbs_df %>%
158
+        dplyr::mutate(
159
+            minus_log_p_fdr = -log(.data$tdist_fdr, base = 10),
160
+            positive_outlier_and_significant = ifelse(
161
+                test = !is.na(.data$tdist_fdr) &
162
+                    .data$tdist_fdr < significance_threshold,
163
+                yes = TRUE,
164
+                no = FALSE
165
+            )
175 166
         )
176
-      ),
177
-      CriticalForInsMut = ifelse(!is.na(.data$KnownClonalExpansion),
178
-        yes = TRUE, no = FALSE
179
-      )
167
+    cis_grubbs_df <- cis_grubbs_df %>%
168
+        dplyr::mutate(
169
+            KnownGeneClass = ifelse(
170
+                is.na(.data$Onco1_TS2),
171
+                yes = "Other",
172
+                no = ifelse(.data$Onco1_TS2 == 1,
173
+                    yes = "OncoGene",
174
+                    no = "TumSuppressor"
175
+                )
176
+            ),
177
+            CriticalForInsMut = ifelse(!is.na(.data$KnownClonalExpansion),
178
+                yes = TRUE, no = FALSE
179
+            )
180
+        )
181
+    significance_threshold_minus_log_p <- -log(significance_threshold,
182
+        base = 10
180 183
     )
181
-  significance_threshold_minus_log_p <- -log(significance_threshold,
182
-    base = 10
183
-  )
184
-  annotation_threshold_ontots_log <- -log(annotation_threshold_ontots,
185
-    base = 10
186
-  )
187
-  ## Trace plot
188
-  plot_cis_fdr_slice <- ggplot2::ggplot(
189
-    data = cis_grubbs_df,
190
-    ggplot2::aes_(
191
-      y = ~minus_log_p_fdr,
192
-      x = ~neg_zscore_minus_log2_int_freq_tolerance,
193
-      color = ~KnownGeneClass,
194
-      fill = ~KnownGeneClass
195
-    ),
196
-    na.rm = TRUE, se = TRUE
197
-  ) +
198
-    ggplot2::geom_point(alpha = .5, size = 3) +
199
-    ggplot2::geom_hline(
200
-      yintercept = significance_threshold_minus_log_p,
201
-      color = "black", size = 1, show.legend = TRUE, linetype = "dashed"
202
-    ) +
203
-    ggplot2::scale_y_continuous(limits = c(0, max(c(
204
-      (significance_threshold_minus_log_p + 0.5),
205
-      max(cis_grubbs_df$minus_log_p_fdr, na.rm = TRUE)
206
-    ), na.rm = TRUE))) +
207
-    ggplot2::scale_x_continuous(breaks = seq(-4, 4, 2)) +
208
-    ggrepel::geom_label_repel(
209
-      data = dplyr::filter(
210
-        cis_grubbs_df,
211
-        .data$tdist_fdr < significance_threshold
212
-      ),
213
-      ggplot2::aes_(label = ~GeneName),
214
-      box.padding = ggplot2::unit(0.35, "lines"),
215
-      point.padding = ggplot2::unit(0.3, "lines"),
216
-      color = "white",
217
-      segment.color = "black",
218
-      max.overlaps = Inf
219
-    ) +
220
-    ggplot2::theme(
221
-      strip.text.y = ggplot2::element_text(
222
-        size = 16,
223
-        colour = "blue",
224
-        angle = 270
225
-      ),
226
-      strip.text.x = ggplot2::element_text(
227
-        size = 16,
228
-        colour = "blue",
229
-        angle = 0
230
-      )
231
-    ) +
232
-    ggplot2::theme(strip.text = ggplot2::element_text(
233
-      face = "bold",
234
-      size = 16
235
-    )) +
236
-    ggplot2::theme(
237
-      axis.text.x = ggplot2::element_text(size = 16),
238
-      axis.text.y = ggplot2::element_text(size = 16),
239
-      axis.title = ggplot2::element_text(size = 16),
240
-      plot.title = ggplot2::element_text(size = 20)
241
-    ) +
242
-    ggplot2::labs(
243
-      title = paste(
244
-        title_prefix,
245
-        "- Volcano plot of IS gene frequency and",
246
-        "CIS results"
247
-      ),
248
-      y = "P-value Grubbs test (-log10(p))",
249
-      x = "Integration frequency (log2)",
250
-      size = "Avg Transcr. Len",
251
-      color = "Onco TumSupp Genes",
252
-      subtitle = paste0(
253
-        "Significance threshold for annotation",
254
-        " labeling: P-value < ", significance_threshold,
255
-        "(FDR adjusted; ",
256
-        "-log = ", (round(-log(significance_threshold, base = 10), 3)),
257
-        ").\nOnco/TS genes source: UniProt (other genes ",
258
-        "labeled as 'Other'). Annotated if P-value > ",
259
-        round(annotation_threshold_ontots_log, 3), "\nexcept ",
260
-        "selected genes to be highlighted"
261
-      )
184
+    annotation_threshold_ontots_log <- -log(annotation_threshold_ontots,
185
+        base = 10
262 186
     )
263
-  if (!is.null(highlight_genes) && !purrr::is_empty(highlight_genes)) {
264
-    ## Look for the genes (case insensitive)
265
-    to_highlight <- cis_grubbs_df %>%
266
-      dplyr::filter(
267
-        stringr::str_to_lower(.data$GeneName) %in%
268
-          stringr::str_to_lower(highlight_genes),
269
-        .data$tdist_fdr >= significance_threshold
270
-      )
271
-    plot_cis_fdr_slice <- plot_cis_fdr_slice +
272
-      ggrepel::geom_label_repel(
273
-        data = to_highlight,
274
-        ggplot2::aes_(label = ~GeneName),
275
-        box.padding = ggplot2::unit(0.35, "lines"),
276
-        point.padding = ggplot2::unit(0.3, "lines"),
277
-        color = "white",
278
-        segment.color = "black",
279
-        max.overlaps = Inf
280
-      )
281
-  }
282
-  if (return_df) {
283
-    return(list(plot = plot_cis_fdr_slice, df = cis_grubbs_df))
284
-  } else {
285
-    return(plot_cis_fdr_slice)
286
-  }
187
+    ## Trace plot
188
+    plot_cis_fdr_slice <- ggplot2::ggplot(
189
+        data = cis_grubbs_df,
190
+        ggplot2::aes_(
191
+            y = ~minus_log_p_fdr,
192
+            x = ~neg_zscore_minus_log2_int_freq_tolerance,
193
+            color = ~KnownGeneClass,
194
+            fill = ~KnownGeneClass
195
+        ),
196
+        na.rm = TRUE, se = TRUE
197
+    ) +
198
+        ggplot2::geom_point(alpha = .5, size = 3) +
199
+        ggplot2::geom_hline(
200
+            yintercept = significance_threshold_minus_log_p,
201
+            color = "black", size = 1, show.legend = TRUE, linetype = "dashed"
202
+        ) +
203
+        ggplot2::scale_y_continuous(limits = c(0, max(c(
204
+            (significance_threshold_minus_log_p + 0.5),
205
+            max(cis_grubbs_df$minus_log_p_fdr, na.rm = TRUE)
206
+        ), na.rm = TRUE))) +
207
+        ggplot2::scale_x_continuous(breaks = seq(-4, 4, 2)) +
208
+        ggrepel::geom_label_repel(
209
+            data = dplyr::filter(
210
+                cis_grubbs_df,
211
+                .data$tdist_fdr < significance_threshold
212
+            ),
213
+            ggplot2::aes_(label = ~GeneName),
214
+            box.padding = ggplot2::unit(0.35, "lines"),
215
+            point.padding = ggplot2::unit(0.3, "lines"),
216
+            color = "white",
217
+            segment.color = "black",
218
+            max.overlaps = Inf
219
+        ) +
220
+        ggplot2::theme(
221
+            strip.text.y = ggplot2::element_text(
222
+                size = 16,
223
+                colour = "blue",
224
+                angle = 270
225
+            ),
226
+            strip.text.x = ggplot2::element_text(
227
+                size = 16,
228
+                colour = "blue",
229
+                angle = 0
230
+            )
231
+        ) +
232
+        ggplot2::theme(strip.text = ggplot2::element_text(
233
+            face = "bold",
234
+            size = 16
235
+        )) +
236
+        ggplot2::theme(
237
+            axis.text.x = ggplot2::element_text(size = 16),
238
+            axis.text.y = ggplot2::element_text(size = 16),
239
+            axis.title = ggplot2::element_text(size = 16),
240
+            plot.title = ggplot2::element_text(size = 20)
241
+        ) +
242
+        ggplot2::labs(
243
+            title = paste(
244
+                title_prefix,
245
+                "- Volcano plot of IS gene frequency and",
246
+                "CIS results"
247
+            ),
248
+            y = "P-value Grubbs test (-log10(p))",
249
+            x = "Integration frequency (log2)",
250
+            size = "Avg Transcr. Len",
251
+            color = "Onco TumSupp Genes",
252
+            subtitle = paste0(
253
+                "Significance threshold for annotation",
254
+                " labeling: P-value < ", significance_threshold,
255
+                "(FDR adjusted; ",
256
+                "-log = ", (round(-log(significance_threshold, base = 10), 3)),
257
+                ").\nOnco/TS genes source: UniProt (other genes ",
258
+                "labeled as 'Other'). Annotated if P-value > ",
259
+                round(annotation_threshold_ontots_log, 3), "\nexcept ",
260
+                "selected genes to be highlighted"
261
+            )
262
+        )
263
+    if (!is.null(highlight_genes) && !purrr::is_empty(highlight_genes)) {
264
+        ## Look for the genes (case insensitive)
265
+        to_highlight <- cis_grubbs_df %>%
266
+            dplyr::filter(
267
+                stringr::str_to_lower(.data$GeneName) %in%
268
+                    stringr::str_to_lower(highlight_genes),
269
+                .data$tdist_fdr >= significance_threshold
270
+            )
271
+        plot_cis_fdr_slice <- plot_cis_fdr_slice +
272
+            ggrepel::geom_label_repel(
273
+                data = to_highlight,
274
+                ggplot2::aes_(label = ~GeneName),
275
+                box.padding = ggplot2::unit(0.35, "lines"),
276
+                point.padding = ggplot2::unit(0.3, "lines"),
277
+                color = "white",
278
+                segment.color = "black",
279
+                max.overlaps = Inf
280
+            )
281
+    }
282
+    if (return_df) {
283
+        return(list(plot = plot_cis_fdr_slice, df = cis_grubbs_df))
284
+    } else {
285
+        return(plot_cis_fdr_slice)
286
+    }
287 287
 }
288 288
 
289 289
 #' Known clinical oncogenes (for mouse and human).
... ...
@@ -297,10 +297,10 @@ CIS_volcano_plot <- function(x,
297 297
 #' @examples
298 298
 #' known_clinical_oncogenes()
299 299
 known_clinical_oncogenes <- function() {
300
-  tibble::tibble(
301
-    GeneName = c("MECOM", "CCND2", "TAL1", "LMO2", "HMGA2"),
302
-    KnownClonalExpansion = TRUE
303
-  )
300
+    tibble::tibble(
301
+        GeneName = c("MECOM", "CCND2", "TAL1", "LMO2", "HMGA2"),
302
+        KnownClonalExpansion = TRUE
303
+    )
304 304
 }
305 305
 
306 306
 #' Clinical relevant suspicious genes (for mouse and human).
... ...
@@ -314,15 +314,15 @@ known_clinical_oncogenes <- function() {
314 314
 #' @examples
315 315
 #' clinical_relevant_suspicious_genes()
316 316
 clinical_relevant_suspicious_genes <- function() {
317
-  tibble::tibble(
318
-    GeneName = c(
319
-      "DNMT3A", "TET2", "ASXL1",
320
-      "JAK2", "CBL", "TP53"
321
-    ),
322
-    ClinicalRelevance = TRUE,
323
-    DOIReference =
324
-      "https://doi.org/10.1182/blood-2018-01-829937"
325
-  )
317
+    tibble::tibble(
318
+        GeneName = c(
319
+            "DNMT3A", "TET2", "ASXL1",
320
+            "JAK2", "CBL", "TP53"
321
+        ),
322
+        ClinicalRelevance = TRUE,
323
+        DOIReference =
324
+            "https://doi.org/10.1182/blood-2018-01-829937"
325
+    )
326 326
 }
327 327
 
328 328
 #' Plot of the estimated HSC population size for each patient.
... ...
@@ -364,59 +364,707 @@ clinical_relevant_suspicious_genes <- function() {
364 364
 #'     association_file = association_file,
365 365
 #'     value_cols = "seqCount"
366 366
 #' )
367
-#' estimate <- HSC_population_size_estimate(x = agg,
368
-#' metadata = aggregated_meta,
369
-#' stable_timepoints = NULL)
367
+#' estimate <- HSC_population_size_estimate(
368
+#'     x = agg,
369
+#'     metadata = aggregated_meta,
370
+#'     stable_timepoints = NULL
371
+#' )
370 372
 #' p <- HSC_population_plot(estimate, "PROJECT1")
371 373
 #' options(op)
372 374
 HSC_population_plot <- function(estimates,
373
-                                project_name,
374
-                                timepoints = "Consecutive",
375
-                                models = "Mth Chao (LB)") {
375
+    project_name,
376
+    timepoints = "Consecutive",
377
+    models = "Mth Chao (LB)") {
376 378
     if (is.null(estimates)) {
377 379
         return(NULL)
378 380
     }
379
-  ## Pre-filter
380
-  df <- estimates %>%
381
-    dplyr::filter(
382
-      .data$Timepoints %in% timepoints,
383
-      .data$Model %in% models
384
-    )
385
-  p <- ggplot2::ggplot(
386
-    data = df,
387
-    ggplot2::aes_(
388
-      y = ~PopSize,
389
-      x = ~TimePoint_to,
390
-      color = ~SubjectID
391
-    ),
392
-    na.rm = TRUE, se = TRUE
393
-  ) +
394
-    ggplot2::geom_point(alpha = .5) +
395
-    ggplot2::geom_line(size = 2, alpha = .7) +
396
-    ggplot2::theme(
397
-      axis.text.x = ggplot2::element_text(size = 14),
398
-      axis.text.y = ggplot2::element_text(size = 14),
399
-      axis.title = ggplot2::element_text(size = 16),
400
-      plot.title = ggplot2::element_text(size = 20),
401
-      strip.text.x = ggplot2::element_text(
402
-        size = 14,
403
-        colour = "darkblue",
404
-        angle = 0,
405
-        face = "bold"
406
-      ),
407
-      strip.text.y = ggplot2::element_text(
408
-        size = 14,
409
-        colour = "darkred",
410
-        angle = 270,
411
-        face = "bold"
412
-      )
381
+    ## Pre-filter
382
+    df <- estimates %>%
383
+        dplyr::filter(
384
+            .data$Timepoints %in% timepoints,
385
+            .data$Model %in% models
386
+        )
387
+    p <- ggplot2::ggplot(
388
+        data = df,
389
+        ggplot2::aes_(
390
+            y = ~PopSize,
391
+            x = ~TimePoint_to,
392
+            color = ~SubjectID
393
+        ),
394
+        na.rm = TRUE, se = TRUE
413 395
     ) +
414
-    ggplot2::labs(
415
-      title = paste(project_name, "- HSC Population size"),
416
-      x = "Time Point (months after GT)",
417
-      y = "HSC size (Chao model with bias correction)",
418
-      colour = "Patient",
419
-      subtitle = "IS from Myeloid PB cells as surrogate of HSC."
396
+        ggplot2::geom_point(alpha = .5) +
397
+        ggplot2::geom_line(size = 2, alpha = .7) +
398
+        ggplot2::theme(
399
+            axis.text.x = ggplot2::element_text(size = 14),
400
+            axis.text.y = ggplot2::element_text(size = 14),
401
+            axis.title = ggplot2::element_text(size = 16),
402
+            plot.title = ggplot2::element_text(size = 20),
403
+            strip.text.x = ggplot2::element_text(
404
+                size = 14,
405
+                colour = "darkblue",
406
+                angle = 0,
407
+                face = "bold"
408
+            ),
409
+            strip.text.y = ggplot2::element_text(
410
+                size = 14,
411
+                colour = "darkred",
412
+                angle = 270,
413
+                face = "bold"
414
+            )
415
+        ) +
416
+        ggplot2::labs(
417
+            title = paste(project_name, "- HSC Population size"),
418
+            x = "Time Point (months after GT)",
419
+            y = "HSC size (Chao model with bias correction)",
420
+            colour = "Patient",
421
+            subtitle = "IS from Myeloid PB cells as surrogate of HSC."
422
+        )
423
+    p
424
+}
425
+
426
+## --- Alluvial plots --- ##
427
+
428
+#' Alluvial plots for IS distribution in time.
429
+#'
430
+#' \lifecycle{experimental}
431
+#' Alluvial plots allow the visualization of integration sites distribution
432
+#' in different points in time in the same group.
433
+#' This functionality requires the suggested package
434
+#' [ggalluvial](https://corybrunson.github.io/ggalluvial/).
435
+#'
436
+#' @details
437
+#' ### Input data frame
438
+#' The input data frame must contain all the columns specified in the
439
+#' arguments `group`, `plot_x`, `plot_y` and `alluvia`. The standard
440
+#' input for this function is the data frame obtained via the
441
+#' \link{compute_abundance} function.
442
+#'
443
+#' ### Plotting threshold on y
444
+#' The plotting threshold on the quantification on the y axis has the
445
+#' function to highlight only relevant information on the plot and reduce
446
+#' computation time. The default value is 1, that acts on the default column
447
+#' plotted on the y axis which holds a percentage value. This translates
448
+#' in natural language roughly as "highlight with colours only those
449
+#' integrations (alluvia) that at least in 1 point in time have an
450
+#' abundance value >= 1 %". The remaining integrations will be plotted in
451
+#' grey in the strata.
452
+#'
453
+#' @param x A data frame. See details.
454
+#' @param group Character vector containing the column names that identify
455
+#' unique groups.
456
+#' @param plot_x Column name to plot on the x axis
457
+#' @param plot_y Column name to plot on the y axis
458
+#' @param alluvia Character vector of column names that uniquely identify
459
+#' alluvia
460
+#' @param alluvia_plot_y_threshold Numeric value. Everything below this
461
+#' threshold on y will be plotted in grey and aggregated. See details.
462
+#' @param top_abundant_tbl Logical. Produce the summary top abundant tables
463
+#' via \link{top_abund_tableGrob}?
464
+#' @param ... Additional arguments to pass on to \link{top_abund_tableGrob}
465
+#'
466
+#' @family Plotting functions
467
+#' @importFrom rlang abort eval_tidy call2 inform .data fn_fmls_names dots_list
468
+#' @import dplyr
469
+#' @import BiocParallel
470
+#' @importFrom purrr set_names
471
+#' @importFrom tidyr unite
472
+#'
473
+#' @return For each group a list with the associated plot and optionally
474
+#' the summary tableGrob
475
+#' @export
476
+#'
477
+#' @examples
478
+#' op <- options("ISAnalytics.widgets" = FALSE, "ISAnalytics.verbose" = FALSE)
479
+#' path_AF <- system.file("extdata", "ex_association_file.tsv",
480
+#'     package = "ISAnalytics"
481
+#' )
482
+#' root_correct <- system.file("extdata", "fs.zip", package = "ISAnalytics")
483
+#' root_correct <- unzip_file_system(root_correct, "fs")
484
+#' association_file <- import_association_file(path_AF, root_correct,
485
+#'     dates_format = "dmy"
486
+#' )
487
+#' matrices <- import_parallel_Vispa2Matrices_auto(
488
+#'     association_file = association_file, root = NULL,
489
+#'     quantification_type = c("fragmentEstimate", "seqCount"),
490
+#'     matrix_type = "annotated", workers = 2, matching_opt = "ANY"
491
+#' )
492
+#' agg <- aggregate_values_by_key(
493
+#'     x = matrices,
494
+#'     association_file = association_file,
495
+#'     value_cols = c("fragmentEstimate", "seqCount")
496
+#' )
497
+#' abundance <- compute_abundance(agg,
498
+#'     columns = "fragmentEstimate_sum",
499