Browse code

[NEW+FIX] Added funs aggregation, recalibration, fixed issues

GiuliaPais authored on 01/09/2020 19:46:55
Showing 137 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: ISAnalytics
2 2
 Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies
3
-Version: 0.99.8
3
+Version: 0.99.9
4 4
 Date: 2020-07-03
5 5
 Authors@R: c(
6 6
   person(given = "Andrea",
... ...
@@ -18,13 +18,12 @@ Authors@R: c(
18 18
 Description: In gene therapy, stem cells are modified using viral vectors to deliver the therapeutic transgene and replace functional properties since the genetic modification is stable and inherited in all cell progeny. The retrieval and mapping of the sequences flanking the virus-host DNA junctions allows the identification of insertion sites (IS), essential for monitoring the evolution of genetically modified cells in vivo. A comprehensive toolkit for the analysis of IS is required to foster clonal trackign studies and supporting the assessment of safety and long term efficacy in vivo. This package is aimed at (1) supporting automation of IS workflow, (2) performing base and advance analysis for IS tracking (clonal abundance, clonal expansions and statistics for insertional mutagenesis, etc.), (3) providing basic biology insights of transduced stem cells in vivo.
19 19
 License: CC BY 4.0
20 20
 URL: https://github.com//calabrialab/isanalytics
21
-BugReports: https://github.com//calabrialab/isanalytics
22
-biocViews: BiomedicalInformatics
21
+BugReports: https://github.com/calabrialab/ISAnalytics/issues
22
+biocViews: BiomedicalInformatics, Sequencing, SingleCell
23 23
 Depends: 
24
-    R (>= 4.0)
24
+    R (>= 4.0),
25
+    magrittr
25 26
 Imports: 
26
-    methods,
27
-    vctrs,
28 27
     utils,
29 28
     reactable,
30 29
     htmltools,
... ...
@@ -36,7 +35,6 @@ Imports:
36 35
     forcats,
37 36
     tibble,
38 37
     BiocParallel,
39
-    magrittr,
40 38
     stringr,
41 39
     fs,
42 40
     zip,
... ...
@@ -53,5 +51,6 @@ Suggests:
53 51
     knitcitations,
54 52
     sessioninfo,
55 53
     rmarkdown,
56
-    roxygen2
54
+    roxygen2,
55
+    psych
57 56
 VignetteBuilder: knitr
58 57
new file mode 100644
... ...
@@ -0,0 +1,21 @@
1
+Version: 1.0
2
+
3
+RestoreWorkspace: No
4
+SaveWorkspace: No
5
+AlwaysSaveHistory: Default
6
+
7
+EnableCodeIndexing: Yes
8
+UseSpacesForTab: Yes
9
+NumSpacesForTab: 2
10
+Encoding: UTF-8
11
+
12
+RnwWeave: Sweave
13
+LaTeX: pdfLaTeX
14
+
15
+AutoAppendNewline: Yes
16
+StripTrailingWhitespace: Yes
17
+
18
+BuildType: Package
19
+PackageUseDevtools: Yes
20
+PackageInstallArgs: --no-multiarch --with-keep.source
21
+PackageRoxygenize: rd,collate,namespace
... ...
@@ -1,44 +1,26 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
-S3method(anti_join,ISADataFrame)
4
-S3method(inner_join,ISADataFrame)
5
-S3method(left_join,ISADataFrame)
6
-S3method(pivot_longer,ISADataFrame)
7
-S3method(print,ISADataFrame)
8
-S3method(select,ISADataFrame)
9
-S3method(semi_join,ISADataFrame)
10
-export("%>%")
11
-export(":=")
12
-export(.data)
13
-export(ISADataFrame)
14
-export(add_metadata)
15
-export(as_label)
16
-export(as_name)
3
+export(aggregate_metadata)
4
+export(aggregate_values_by_key)
5
+export(annotation_IS_vars)
6
+export(association_file_columns)
7
+export(compute_abundance)
8
+export(compute_near_integrations)
17 9
 export(date_columns_coll)
18 10
 export(date_formats)
19
-export(enquo)
20
-export(enquos)
21
-export(expr)
22 11
 export(generate_Vispa2_launch_AF)
23 12
 export(generate_blank_association_file)
24 13
 export(import_association_file)
25 14
 export(import_parallel_Vispa2Matrices_auto)
26 15
 export(import_parallel_Vispa2Matrices_interactive)
27 16
 export(import_single_Vispa2Matrix)
28
-export(is.ISADataFrame)
29
-export(mandatoryVars)
17
+export(mandatory_IS_vars)
30 18
 export(matching_options)
31
-export(metadata)
32
-export(new_ISADataFrame)
33 19
 export(quantification_types)
34 20
 export(realign_after_collisions)
21
+export(reduced_AF_columns)
35 22
 export(remove_collisions)
36
-export(remove_metadata)
37
-export(sym)
38
-export(syms)
39 23
 export(unzip_file_system)
40
-export(validate_ISADataFrame)
41
-export(vec_restore.ISADataFrame)
42 24
 import(BiocParallel)
43 25
 import(dplyr)
44 26
 import(lubridate)
... ...
@@ -50,7 +32,6 @@ importFrom(BiocParallel,bpstop)
50 32
 importFrom(BiocParallel,bptry)
51 33
 importFrom(dplyr,across)
52 34
 importFrom(dplyr,all_of)
53
-importFrom(dplyr,anti_join)
54 35
 importFrom(dplyr,arrange)
55 36
 importFrom(dplyr,bind_cols)
56 37
 importFrom(dplyr,bind_rows)
... ...
@@ -66,6 +47,7 @@ importFrom(dplyr,rename)
66 47
 importFrom(dplyr,select)
67 48
 importFrom(dplyr,semi_join)
68 49
 importFrom(dplyr,slice)
50
+importFrom(dplyr,summarise)
69 51
 importFrom(forcats,as_factor)
70 52
 importFrom(forcats,fct_inseq)
71 53
 importFrom(fs,as_fs_path)
... ...
@@ -84,14 +66,16 @@ importFrom(htmltools,h4)
84 66
 importFrom(htmltools,span)
85 67
 importFrom(htmltools,tagList)
86 68
 importFrom(magrittr,`%>%`)
87
-importFrom(methods,is)
88 69
 importFrom(purrr,flatten)
89 70
 importFrom(purrr,is_empty)
90 71
 importFrom(purrr,map)
91 72
 importFrom(purrr,map2)
73
+importFrom(purrr,map2_lgl)
92 74
 importFrom(purrr,map_dbl)
93 75
 importFrom(purrr,map_lgl)
94 76
 importFrom(purrr,pmap)
77
+importFrom(purrr,pmap_df)
78
+importFrom(purrr,pmap_dfr)
95 79
 importFrom(purrr,reduce)
96 80
 importFrom(purrr,set_names)
97 81
 importFrom(purrr,walk2)
... ...
@@ -99,19 +83,15 @@ importFrom(reactable,colDef)
99 83
 importFrom(reactable,reactable)
100 84
 importFrom(reactable,reactableTheme)
101 85
 importFrom(readr,write_tsv)
102
-importFrom(rlang,":=")
103 86
 importFrom(rlang,.data)
104
-importFrom(rlang,as_label)
105
-importFrom(rlang,as_name)
106
-importFrom(rlang,enquo)
107
-importFrom(rlang,enquos)
108 87
 importFrom(rlang,env_bind)
109
-importFrom(rlang,env_parent)
88
+importFrom(rlang,env_get)
110 89
 importFrom(rlang,eval_tidy)
111 90
 importFrom(rlang,expr)
112
-importFrom(rlang,sym)
113
-importFrom(rlang,syms)
91
+importFrom(rlang,is_function)
92
+importFrom(rlang,is_installed)
114 93
 importFrom(stringr,str_detect)
94
+importFrom(stringr,str_extract)
115 95
 importFrom(stringr,str_extract_all)
116 96
 importFrom(stringr,str_pad)
117 97
 importFrom(stringr,str_replace_all)
... ...
@@ -120,11 +100,11 @@ importFrom(tibble,add_column)
120 100
 importFrom(tibble,as_tibble)
121 101
 importFrom(tibble,as_tibble_col)
122 102
 importFrom(tibble,is_tibble)
123
-importFrom(tibble,new_tibble)
124 103
 importFrom(tibble,tibble)
125 104
 importFrom(tidyr,nest)
126 105
 importFrom(tidyr,pivot_longer)
127 106
 importFrom(tidyr,separate)
107
+importFrom(tidyr,unite)
128 108
 importFrom(tidyr,unnest)
129 109
 importFrom(utils,read.csv)
130 110
 importFrom(utils,tail)
... ...
@@ -1,3 +1,19 @@
1
-# ISAnalytics 0.0.0.9000
1
+# ISAnalytics News
2
+
3
+## Changes in version 0.99.8 (2020-08-12)
4
+* Submitted to Bioconductor
5
+
6
+## Changes in version 0.99.9 (2020-09-01)
7
+
8
+#### NEW FEATURES
9
+
10
+* Added functionality: aggregate functions
11
+* Added vignette on aggregate functions
12
+* Added recalibration functions
13
+* Added first analysis function (compute_abundance)
14
+
15
+#### SIGNIFICANT USER-VISIBLE CHANGES
16
+
17
+* Dropped structure `ISADataFrame`: now the package only uses standard tibbles
18
+* Modified package documentation
2 19
 
3
-* Added a `NEWS.md` file to track changes to the package.
4 20
deleted file mode 100644
... ...
@@ -1,400 +0,0 @@
1
-#------------------------------------------------------------------------------#
2
-# ISADataFrame S3 class
3
-#------------------------------------------------------------------------------#
4
-
5
-#' Low-level efficient constructor for ISADataFrame objects.
6
-#'
7
-#' @description This is a devel function and should **not** be used
8
-#' interactively.\cr\cr
9
-#' ISADataFrame is a sub-class of tbl_df (tibble): it supports all of tibble
10
-#' functionality and adds two attributes, `mandatoryVars` and `metadata` which
11
-#' represent respectively the columns which are mandatory in an ISADataFrame
12
-#' (chr, integration_locus, strand) and various annotations which are not
13
-#' experimental data (for example GeneName, GeneStrand...).\cr
14
-#' **NOTE**: the aim of this function is to be efficent, therefore no formal
15
-#' correctness check of the produced data frame is performed and this is why
16
-#' this function should never be called interactively.\cr\cr
17
-#' From a devel perspective you could directly use this function in those
18
-#' pieces of code where there is a certainty of having the correct input
19
-#' parameters.
20
-#' For more insight on this topic take a look at
21
-#' [Hadley Wickham - Advanced R](https://adv-r.hadley.nz/s3.html#s3-classes).
22
-#' @param x a named list, a tibble or a data.frame
23
-#' @param mandVars a character vector containing the names of the mandatory
24
-#' vars that must be present in the data frame
25
-#' @param meta a character vector containing the names of the variables
26
-#' representing metadata or annotations (optional)
27
-#' @param ... optional arguments, to be used for those who want to extend
28
-#' ISADataFrame
29
-#' @param class character vector representing all the classes
30
-#'
31
-#' @return a new object of S3 class ISADataFrame
32
-#' @importFrom tibble new_tibble
33
-#' @details Note that if the constructor is supplied with a named list with
34
-#' elements having different length, the resulting
35
-#' ISADataFrame will have truncated length equal to the minimum of the lenghts
36
-#' of the elements in the list. Appropriate checks
37
-#' should be performed in validators and/or helpers.
38
-#' @seealso [new_tibble],
39
-#' [Hadley Wickham - Advanced R](https://adv-r.hadley.nz/s3.html#s3-classes)
40
-#' @export
41
-#' @examples
42
-#' # Specifing the named list only returns an ISAdf where mandatoryVars
43
-#' # are as defaults (chr, integration_locus, strand),
44
-#' # and empty metadata
45
-#' isaDf <- new_ISADataFrame(list(a = 1:10, b = 10:1))
46
-#'
47
-#' # You can change the mandatory variables by explicitly specifying the names
48
-#' isaDf <- new_ISADataFrame(list(a = 1:10, b = 10:1),
49
-#'     mandVars = c("myvar1", "myvar2")
50
-#' )
51
-#'
52
-#' # You can specify the metadata columns also
53
-#' isaDf <- new_ISADataFrame(list(a = 1:10, b = 10:1),
54
-#'     mandVars = c("myvar1", "myvar2"),
55
-#'     meta = c("m1", "m2", "m3")
56
-#' )
57
-new_ISADataFrame <- function(x,
58
-    mandVars = c("chr", "integration_locus", "strand"),
59
-    meta = character(), ..., class = character()) {
60
-    stopifnot(is.list(x))
61
-    minLength <- min(vapply(x, length, FUN.VALUE = numeric(1)))
62
-
63
-    tibble::new_tibble(x,
64
-        mandatoryVars = mandVars,
65
-        metadata = meta, ...,
66
-        nrow = minLength, class = c(class, "ISADataFrame")
67
-    )
68
-}
69
-
70
-#' Validator for ISADataFrame objects.
71
-#' @description This is a devel function and should **not** be used
72
-#' interactively.
73
-#' The validator takes an ISADataFrame as input to check if the object was built
74
-#' correctly. More specifically:\cr
75
-#' * Checks if the data frame contains the mandatory variables specified by
76
-#' the `mandatoryVars` attribute
77
-#' * Checks if the data frame contains the metadata variables specified by
78
-#' the `metadata` attribute (if not, generates only a warning)
79
-#' * Checks if there is at least one experimental data column: a column is
80
-#' considered experimental data if it's name is not contained
81
-#' both in `mandatoryVars` and `metadata` and the column contains numeric
82
-#' values.
83
-#'
84
-#' If all checks pass the function returns TRUE, otherwise some kind of error
85
-#' is shown.
86
-#'
87
-#' @param x the ISADataFrame object to validate
88
-#'
89
-#' @return 'TRUE' if all checks pass, error otherwise
90
-#' @export
91
-#'
92
-#' @examples
93
-#' isadf <- new_ISADataFrame(list(
94
-#'     chr = c(as.character(1:10)),
95
-#'     integration_locus = runif(10, min = 100, max = 10000),
96
-#'     strand = sample(c("+", "-"), 10, replace = TRUE),
97
-#'     exp_1 = runif(10, min = 0, max = 10000),
98
-#'     exp_2 = runif(10, min = 0, max = 10000),
99
-#'     exp_3 = runif(10, min = 0, max = 10000)
100
-#' ))
101
-#'
102
-#' validate_ISADataFrame(isadf)
103
-validate_ISADataFrame <- function(x) {
104
-    stopifnot(is.ISADataFrame(x))
105
-    # checks if ISAdf contains the mandatory vars columns
106
-    if (!all(vapply(
107
-        X = mandatoryVars(x), FUN = is.element,
108
-        set = colnames(x), FUN.VALUE = logical(1)
109
-    ))) {
110
-        stop(paste(
111
-            "Validation of ISADataFrame failed: the input data",
112
-            "doesn't contain the mandatory variables"
113
-        ))
114
-    }
115
-    # checks if there is at least one experimental data column (column type
116
-    # must be numeric)
117
-    checknonnum <- .check_nonNumdata(x)
118
-    if (checknonnum == FALSE) {
119
-        stop(paste(
120
-            "Validation of ISADataFrame failed: no experimental",
121
-            "variables found"
122
-        ))
123
-    }
124
-    if (checknonnum == "Warning") {
125
-        warning(paste(
126
-            "Validation of ISADataFrame - warning: found",
127
-            "experimental columns with non numeric type"
128
-        ))
129
-    }
130
-    # checks if the specified metadata are present in the data frame
131
-    checkM <- .check_metadata(x)
132
-    if (checkM == FALSE) {
133
-        warning(paste(
134
-            "Validation of ISADataFrame - warning: the input",
135
-            "data doesn't contain the specified metadata columns"
136
-        ))
137
-    }
138
-
139
-    return(TRUE)
140
-}
141
-
142
-
143
-
144
-
145
-#' Helper function to obtain ISADataFrame object.
146
-#'
147
-#' @description This function is intended to be used interactively and should be
148
-#' used to build correct ISADataFrames.
149
-#' If called with parameter `try.correct = TRUE` the function is able to catch
150
-#' and correct minor issues such as:
151
-#' * When provided with a named list as a parameter, if the elements do not have
152
-#' the same length the shortest are filled with NAs to match the longest element
153
-#' * When there are metadata attributes declared that are not present in the
154
-#' data frame they're removed
155
-#' * When non-numeric columns are detected but are not declared as metadata
156
-#' they're added to the metadata attribute.
157
-#'
158
-#' Errors will be thrown in at least 2 cases:
159
-#' * The mandatoryVars are not included in the data frame
160
-#' * There are no experimental data columns. Note that experimental data columns
161
-#' must be numeric.
162
-#' @param x a named list, a tibble or a data.frame
163
-#' @param metadata the metadata fields that are present in the table (should be
164
-#' all variables that are not mandatory and are not experimental data)
165
-#' @param try.correct if set to TRUE is able to fix minor issues
166
-#'
167
-#' @return a properly built ISADataFrame
168
-#' @export
169
-#' @importFrom rlang env_bind env_parent
170
-#' @importFrom tibble is_tibble
171
-#' @examples
172
-#' aListWithSomeIssues <- list(
173
-#'     chr = c(as.character(1:10)),
174
-#'     integration_locus = runif(10, min = 100, max = 10000),
175
-#'     strand = sample(c("+", "-"), 10, replace = TRUE),
176
-#'     meta1 = rep_len("m1", 10),
177
-#'     nonNumericdata = rep_len("random", 10),
178
-#'     exp_1 = runif(5, min = 0, max = 10000),
179
-#'     exp_2 = runif(10, min = 0, max = 10000),
180
-#'     exp_3 = runif(8, min = 0, max = 10000)
181
-#' )
182
-#'
183
-#' isadf <- ISADataFrame(aListWithSomeIssues,
184
-#'     metadata = c("meta1"),
185
-#'     try.correct = TRUE
186
-#' )
187
-#' head(isadf)
188
-ISADataFrame <- function(x, metadata = character(), try.correct = TRUE) {
189
-    stopifnot(is.list(x) | is.data.frame(x) | is_tibble(x) | is.ISADataFrame(x))
190
-    if (is.list(x)) {
191
-        lengths <- vapply(x, length, FUN.VALUE = numeric(1))
192
-        equalLeng <- (lengths == lengths[[1]])
193
-        if (!all(equalLeng)) {
194
-            if (!try.correct) {
195
-                stop(paste(
196
-                    "Error in ISADataFrame(): list provided as input",
197
-                    "has elements with different lengths.",
198
-                    "Try try.correct = TRUE"
199
-                ))
200
-            } else {
201
-                max <- max(lengths)
202
-                x <- lapply(x, FUN = function(x) {
203
-                    if (length(x) < max) {
204
-                        append(x, rep_len(NA, max - length(x)))
205
-                    } else {
206
-                        x
207
-                    }
208
-                })
209
-                message(paste(
210
-                    "Warning - introduced NAs to fix issues in",
211
-                    "provided list"
212
-                ))
213
-            }
214
-        }
215
-    }
216
-    isaDf <- new_ISADataFrame(x, meta = metadata)
217
-    resultValidation <- withCallingHandlers(
218
-        {
219
-            validate_ISADataFrame(isaDf)
220
-        },
221
-        error = function(cond) {
222
-            stop(paste(
223
-                "Couldn't build ISADataFrame from provided input.",
224
-                "Aborting.", conditionMessage(cond)
225
-            ))
226
-        },
227
-        warning = function(cond) {
228
-            if (try.correct) {
229
-                if (conditionMessage(cond) == paste(
230
-                    "Validation of ISADataFrame",
231
-                    "- warning: the input data doesn't contain the specified",
232
-                    "metadata columns"
233
-                )) {
234
-                    present <- which(vapply(
235
-                        X = metadata(isaDf),
236
-                        FUN = is.element,
237
-                        set = colnames(isaDf),
238
-                        FUN.VALUE = logical(1)
239
-                    ))
240
-                    attr(isaDf, "metadata") <- names(present)
241
-                    rlang::env_bind(env_parent(), isaDf = isaDf)
242
-                    message(paste(
243
-                        "Auto-corrected: the input data does not",
244
-                        "contain the specified metadata columns"
245
-                    ))
246
-                    invokeRestart("muffleWarning")
247
-                }
248
-                if (conditionMessage(cond) == paste(
249
-                    "Validation of ISADataFrame",
250
-                    "- warning: found experimental columns with non numeric",
251
-                    "type"
252
-                )) {
253
-                    nnum <- .find_nonNumData(isaDf)
254
-                    attr(isaDf, "metadata") <- c(metadata(isaDf), names(nnum))
255
-                    if (.check_atLeastOneExp(isaDf)) {
256
-                        rlang::env_bind(env_parent(), isaDf = isaDf)
257
-                        message(paste(
258
-                            "Auto-corrected: found experimental",
259
-                            "columns with non numeric type"
260
-                        ))
261
-                        invokeRestart("muffleWarning")
262
-                    } else {
263
-                        stop(paste(
264
-                            "Validation of ISADataFrame failed:",
265
-                            "no experimental variables found"
266
-                        ))
267
-                    }
268
-                }
269
-            } else {
270
-                stop(paste(
271
-                    "Could not build ISADataFrame - warnings thrown:",
272
-                    conditionMessage(cond),
273
-                    paste(
274
-                        "\n", "Try auto-correct function by using",
275
-                        "try.correct = TRUE"
276
-                    )
277
-                ))
278
-            }
279
-        }
280
-    )
281
-    isaDf
282
-}
283
-
284
-
285
-#' Gets the value of the attribute mandatoryVars.
286
-#'
287
-#' @param x an ISADataFrame object
288
-#'
289
-#' @return a character vector
290
-#' @export
291
-#'
292
-#' @examples
293
-#' expList <- list(
294
-#'     chr = c(as.character(1:10)),
295
-#'     integration_locus = runif(10, min = 100, max = 10000),
296
-#'     strand = sample(c("+", "-"), 10, replace = TRUE),
297
-#'     meta1 = rep_len("m1", 10),
298
-#'     exp_1 = runif(10, min = 0, max = 10000),
299
-#'     exp_2 = runif(10, min = 0, max = 10000),
300
-#'     exp_3 = runif(10, min = 0, max = 10000)
301
-#' )
302
-#' isadf <- ISADataFrame(expList, metadata = c("meta1"))
303
-#' mandatory <- mandatoryVars(isadf)
304
-mandatoryVars <- function(x) {
305
-    stopifnot(is.ISADataFrame(x))
306
-    attr(x, "mandatoryVars")
307
-}
308
-
309
-#' Gets the value of the attribute metadata.
310
-#'
311
-#' @param x An ISADataFrame object
312
-#'
313
-#' @return A character vector
314
-#' @export
315
-#'
316
-#' @examples
317
-#' expList <- list(
318
-#'     chr = c(as.character(1:10)),
319
-#'     integration_locus = runif(10, min = 100, max = 10000),
320
-#'     strand = sample(c("+", "-"), 10, replace = TRUE),
321
-#'     meta1 = rep_len("m1", 10),
322
-#'     exp_1 = runif(10, min = 0, max = 10000),
323
-#'     exp_2 = runif(10, min = 0, max = 10000),
324
-#'     exp_3 = runif(10, min = 0, max = 10000)
325
-#' )
326
-#' isadf <- ISADataFrame(expList, metadata = c("meta1"))
327
-#' meta <- metadata(isadf)
328
-metadata <- function(x) {
329
-    stopifnot(is.ISADataFrame(x))
330
-    attr(x, "metadata")
331
-}
332
-
333
-#' Adds new metadata to the metadata attribute.
334
-#'
335
-#' The metadata has to be present in the data frame in order to be added.
336
-#'
337
-#' @param x The ISADataFrame object
338
-#' @param meta A character vector containing the names of the columns to
339
-#' register as metadata
340
-#'
341
-#' @return An ISADataFrame object
342
-#' @export
343
-#'
344
-#' @examples
345
-#' example_matrix_ann <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
346
-#'     package = "ISAnalytics"
347
-#' )
348
-#' isadf_ann <- import_single_Vispa2Matrix(example_matrix_ann)
349
-#' isadf_ann <- add_metadata(isadf_ann, c("GeneName"))
350
-add_metadata <- function(x, meta) {
351
-    stopifnot(is.ISADataFrame(x))
352
-    stopifnot(is.character(meta))
353
-    found <- meta %in% colnames(x)
354
-    if (all(found == FALSE)) {
355
-        stop(paste("None of metadata specified found in x"))
356
-    }
357
-    if (any(found == FALSE)) {
358
-        meta <- meta[found]
359
-        current <- metadata(x)
360
-        attr(x, "metadata") <- c(current, meta)
361
-        message(paste(
362
-            "Some of the metadata specified were not found in the data",
363
-            "frame and were ignored."
364
-        ))
365
-        return(x)
366
-    }
367
-    current <- metadata(x)
368
-    attr(x, "metadata") <- c(current, meta)
369
-    return(x)
370
-}
371
-
372
-#' Removes metadata from the metadata attribute.
373
-#'
374
-#' The metadata has to be present in the attribute in order to be removed.
375
-#'
376
-#' @param x The ISADataFrame object
377
-#' @param meta A character vector containing the names of the columns to
378
-#' register as metadata
379
-#'
380
-#' @return An ISADataFrame object
381
-#' @export
382
-#'
383
-#' @examples
384
-#' example_matrix_ann <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
385
-#'     package = "ISAnalytics"
386
-#' )
387
-#' isadf_ann <- import_single_Vispa2Matrix(example_matrix_ann)
388
-#' isadf_ann <- remove_metadata(isadf_ann, c("GeneName"))
389
-remove_metadata <- function(x, meta) {
390
-    stopifnot(is.ISADataFrame(x))
391
-    stopifnot(is.character(meta))
392
-    found <- meta %in% metadata(x)
393
-    if (all(found == FALSE)) {
394
-        stop(paste("None of metadata specified found in x"))
395
-    }
396
-    meta <- meta[found]
397
-    current <- metadata(x)
398
-    attr(x, "metadata") <- current[!current %in% meta]
399
-    return(x)
400
-}
... ...
@@ -1,8 +1,54 @@
1
-#' @keywords internal
2
-"_PACKAGE"
3
-
4
-# The following block is used by usethis to automatically manage
5
-# roxygen namespace tags. Modify with care!
6
-## usethis namespace: start
7
-## usethis namespace: end
1
+#' ISAnalytics: Analyze gene therapy vector insertion sites data
2
+#' identified from genomics next generation sequencing reads for
3
+#' clonal tracking studies
4
+#'
5
+#' @description In gene therapy, stem cells are modified using viral
6
+#' vectors to deliver the therapeutic transgene and replace functional
7
+#' properties since the genetic modification is stable and inherited in
8
+#' all cell progeny. The retrieval and mapping of the sequences flanking
9
+#' the virus-host DNA junctions allows the identification of insertion
10
+#' sites (IS), essential for monitoring the evolution of genetically
11
+#' modified cells in vivo. A comprehensive toolkit for the analysis of
12
+#' IS is required to foster clonal trackign studies and supporting the
13
+#' assessment of safety and long term efficacy in vivo. This package
14
+#' is aimed at (1) supporting automation of IS workflow, (2) performing
15
+#' base and advance analysis for IS tracking (clonal abundance, clonal
16
+#' expansions and statistics for insertional mutagenesis, etc.),
17
+#' (3) providing basic biology insights of transduced stem cells in vivo.
18
+#'
19
+#' @section Useful resources:
20
+#' * \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5702242/}{VISPA2:
21
+#' A Scalable Pipeline for High-Throughput Identification
22
+#' and Annotation of Vector Integration Sites}
23
+#'
24
+#' @section ISAnalytics function families:
25
+#' * Import functions:
26
+#'   * \code{\link{import_single_Vispa2Matrix}}
27
+#'   * \code{\link{import_association_file}}
28
+#'   * \code{\link{import_parallel_Vispa2Matrices_interactive}}
29
+#'   * \code{\link{import_parallel_Vispa2Matrices_auto}}
30
+#' * Aggregation functions:
31
+#'   * \code{\link{aggregate_metadata}}
32
+#'   * \code{\link{aggregate_values_by_key}}
33
+#' * Collision removal functions:
34
+#'   * \code{\link{remove_collisions}}
35
+#'   * \code{\link{realign_after_collisions}}
36
+#' * Recalibration functions:
37
+#'   * \code{\link{compute_near_integrations}}
38
+#' * Analysis functions:
39
+#'   * \code{\link{compute_abundance}}
40
+#' * Utility functions:
41
+#'   * \code{\link{generate_blank_association_file}}
42
+#'   * \code{\link{generate_Vispa2_launch_AF}}
43
+#'   * \code{\link{unzip_file_system}}
44
+#'
45
+#' @section Vignettes:
46
+#' * \code{vignette("How to use import functions", package = "ISAnalytics")}
47
+#' * \code{vignette("Collision removal functionality",
48
+#' package = "ISAnalytics")}
49
+#' * \code{vignette("Working with aggregate functions",
50
+#' package = "ISAnalytics")}
51
+#'
52
+#' @docType package
53
+#' @name ISAnalytics
8 54
 NULL
9 55
new file mode 100644
... ...
@@ -0,0 +1,239 @@
1
+#------------------------------------------------------------------------------#
2
+# Aggregate functions
3
+#------------------------------------------------------------------------------#
4
+
5
+#' Performs aggregation on metadata contained in the association file.
6
+#'
7
+#' Groups metadata by grouping_keys and returns a summary of info for each
8
+#' group. For more details on how to use this function:
9
+#' \code{vignette("Working with aggregate functions", package = "ISAnalytics")}
10
+#'
11
+#' @param association_file The imported association file
12
+#' (via `import_association_file`)
13
+#' @param grouping_keys A character vector of column names to form a group
14
+#' @param import_stats Should Vispa2 stats files be imported and included?
15
+#' @family Aggregate functions
16
+#' @importFrom purrr is_empty
17
+#' @importFrom tibble is_tibble
18
+#'
19
+#' @return A tibble
20
+#' @export
21
+#'
22
+#' @examples
23
+#' op <- options("ISAnalytics.widgets" = FALSE)
24
+#' path_AF <- system.file("extdata", "ex_association_file.tsv",
25
+#'     package = "ISAnalytics"
26
+#' )
27
+#' root_correct <- system.file("extdata", "fs.zip", package = "ISAnalytics")
28
+#' root_correct <- unzip_file_system(root_correct, "fs")
29
+#' association_file <- import_association_file(path_AF, root_correct)
30
+#' aggregated_meta <- aggregate_metadata(association_file, import_stats = FALSE)
31
+#' options(op)
32
+aggregate_metadata <- function(association_file,
33
+    grouping_keys = c(
34
+        "SubjectID", "CellMarker",
35
+        "Tissue", "TimePoint"
36
+    ),
37
+    import_stats = TRUE) {
38
+    # Check parameters
39
+    stopifnot(tibble::is_tibble(association_file))
40
+    min_missing <- setdiff(.min_var_set(), colnames(association_file))
41
+    if (!purrr::is_empty(min_missing)) {
42
+        stop(paste(c(
43
+            "Association file is missing some of the mandatory columns:",
44
+            min_missing
45
+        ), collapse = "\n"))
46
+    }
47
+    stopifnot(!is.null(grouping_keys))
48
+    stopifnot(is.character(grouping_keys))
49
+    keys_missing <- setdiff(grouping_keys, colnames(association_file))
50
+    if (!purrr::is_empty(keys_missing)) {
51
+        stop(paste(c(
52
+            "Some of the grouping keys you provided were not found:",
53
+            keys_missing
54
+        ), collapse = "\n"))
55
+    }
56
+    stopifnot(is.logical(import_stats) & length(import_stats) == 1)
57
+    # Import if true
58
+    stats <- NULL
59
+    if (import_stats == TRUE) {
60
+        stats <- .import_stats_iss(association_file)
61
+        if (is.null(stats) & getOption("ISAnalytics.verbose") == TRUE) {
62
+            message(paste("No Vispa2 stats files found for import,
63
+                    ignoring this step"))
64
+        } else {
65
+            if (getOption("ISAnalytics.widgets") == TRUE) {
66
+                report <- stats[[2]]
67
+                stats <- stats[[1]]
68
+                widg <- .iss_import_widget(report)
69
+                print(widg)
70
+            } else {
71
+                stats <- stats[[1]]
72
+            }
73
+        }
74
+    }
75
+    aggregated <- .join_and_aggregate(association_file, stats, grouping_keys)
76
+    aggregated
77
+}
78
+
79
+#' Aggregates matrices values based on specified key.
80
+#'
81
+#' Performs aggregation on values contained in the integration matrices based
82
+#' on the key and the specified lambda. For more details on how to use this
83
+#' function:
84
+#' \code{vignette("Working with aggregate functions", package = "ISAnalytics")}
85
+#'
86
+#' @param x A single integration matrix (tibble) or a list of imported
87
+#' integration matrices (tibble)
88
+#' @param association_file The imported association file
89
+#' @param key A string or a character vector with column names of the
90
+#' association file to take as keys
91
+#' @param lambda The name of the aggregating function to apply to values
92
+#' @param args Other arguments to pass to lambda, can be set to NULL
93
+#' @param group Other variables to include in the grouping besides `key`,
94
+#' can be set to NULL
95
+#' @param namespace The namespace that exports `lambda`. Can be set to NULL if
96
+#' lambda is not an exported object but rather a user-defined function in some
97
+#' environment.
98
+#' @param env The environment in which the function should look for symbols
99
+#' @family Aggregate functions
100
+#'
101
+#' @importFrom rlang is_installed is_function eval_tidy env_get
102
+#' @importFrom tibble is_tibble
103
+#' @importFrom purrr map_lgl
104
+#'
105
+#' @return A list of tibbles or a single tibble according to input
106
+#' @export
107
+#'
108
+#' @examples
109
+#' op <- options("ISAnalytics.widgets" = FALSE)
110
+#' path_AF <- system.file("extdata", "ex_association_file.tsv",
111
+#'     package = "ISAnalytics"
112
+#' )
113
+#' root_correct <- system.file("extdata", "fs.zip", package = "ISAnalytics")
114
+#' root_correct <- unzip_file_system(root_correct, "fs")
115
+#' association_file <- import_association_file(path_AF, root_correct)
116
+#' matrices <- import_parallel_Vispa2Matrices_auto(
117
+#'     association_file = association_file, root = NULL,
118
+#'     quantification_type = c("fragmentEstimate", "seqCount"),
119
+#'     matrix_type = "annotated", workers = 2, matching_opt = "ANY"
120
+#' )
121
+#' agg <- aggregate_values_by_key(
122
+#'     x = matrices$seqCount,
123
+#'     association_file = association_file,
124
+#'     key = "SubjectID", args = list(na.rm = TRUE)
125
+#' )
126
+#' options(op)
127
+aggregate_values_by_key <- function(x,
128
+    association_file,
129
+    key = "SubjectID",
130
+    lambda = "sum",
131
+    args = NULL,
132
+    group = c(
133
+        mandatory_IS_vars(),
134
+        annotation_IS_vars()
135
+    ),
136
+    namespace = "base",
137
+    env = .GlobalEnv) {
138
+    stopifnot(tibble::is_tibble(x) || is.list(x))
139
+    if (!tibble::is_tibble(x)) {
140
+        all_int_mat <- purrr::map_lgl(x, function(y) {
141
+            if (tibble::is_tibble(y)) {
142
+                mand <- .check_mandatory_vars(y)
143
+                mand
144
+            } else {
145
+                FALSE
146
+            }
147
+        })
148
+        value_present <- purrr::map_lgl(x, .check_value_col)
149
+    } else {
150
+        all_int_mat <- .check_mandatory_vars(x)
151
+        value_present <- .check_value_col(x)
152
+    }
153
+    if (!all(all_int_mat == TRUE)) {
154
+        stop(.non_ISM_error())
155
+    }
156
+    if (!all(value_present == TRUE)) {
157
+        stop(.missing_value_col_error())
158
+    }
159
+    # Check association file
160
+    stopifnot(tibble::is_tibble(association_file))
161
+    # Check key
162
+    stopifnot(is.character(key))
163
+    if (!all(key %in% colnames(association_file))) {
164
+        stop("Key fields are missing from association file")
165
+    }
166
+    # Check lambda
167
+    stopifnot(is.character(lambda) & length(lambda) == 1)
168
+    # Check group
169
+    stopifnot(is.character(group) | is.null(group))
170
+    matrix_cols <- if (tibble::is_tibble(x)) {
171
+        colnames(x)
172
+    } else {
173
+        colnames(x[[1]])
174
+    }
175
+    if (!all(group %in% c(colnames(association_file), matrix_cols))) {
176
+        stop(paste("Grouping variables not found"))
177
+    }
178
+    # Check args
179
+    stopifnot((is.list(args) & !is.null(names(args)) | is.null(args)))
180
+    # Check namespace
181
+    stopifnot((is.character(namespace) & length(namespace) == 1) |
182
+        is.null(namespace))
183
+    # Check env
184
+    stopifnot(is.environment(env))
185
+    # Check if lambda is an exported function of namespace (only if namespace is
186
+    # not null)
187
+    if (!is.null(namespace)) {
188
+        if (!rlang::is_installed(namespace)) {
189
+            stop(paste("Namespace", namespace, " is not installed"))
190
+        }
191
+        tryCatch(
192
+            expr = {
193
+                fn <- getExportedValue(
194
+                    rlang::eval_tidy(namespace),
195
+                    rlang::eval_tidy(lambda)
196
+                )
197
+                if (!rlang::is_function(fn)) {
198
+                    stop("Provided lambda is not a function")
199
+                }
200
+            },
201
+            error = function(cond) {
202
+                stop(paste(
203
+                    lambda, "is not an exported object from namespace",
204
+                    namespace
205
+                ))
206
+            }
207
+        )
208
+    } else {
209
+        # If no namespace check if the lambda is correctly defined in the
210
+        # environment and it's actually a function
211
+        fn <- rlang::env_get(env = env, nm = lambda, default = NULL)
212
+        if (is.null(fn)) {
213
+            stop(paste(
214
+                "No binding found for", lambda, "in the specified",
215
+                "environment"
216
+            ))
217
+        }
218
+        if (!rlang::is_function(fn)) {
219
+            stop(paste(
220
+                lambda, "defined in the specified environment is not a",
221
+                "function"
222
+            ))
223
+        }
224
+    }
225
+    if (tibble::is_tibble(x)) {
226
+        x <- list(x)
227
+        agg_matrix <- .aggregate_lambda(
228
+            x, association_file, key, lambda, group,
229
+            args, namespace, env
230
+        )
231
+        return(agg_matrix[[1]])
232
+    }
233
+    agg_matrix <- .aggregate_lambda(
234
+        x, association_file, key, lambda, group,
235
+        args, namespace, env
236
+    )
237
+    agg_matrix
238
+}
239
+
0 240
deleted file mode 100644
... ...
@@ -1,209 +0,0 @@
1
-
2
-#' Pivot_longer implementation for ISADataFrame.
3
-#' @inheritParams tidyr::pivot_longer
4
-#' @export
5
-#' @importFrom tidyr pivot_longer
6
-#' @return An ISADataFrame
7
-#' @examples
8
-#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
9
-#'     package = "ISAnalytics"
10
-#' )
11
-#' df <- read.csv(path,
12
-#'     sep = "\t", check.names = FALSE,
13
-#'     stringsAsFactors = FALSE
14
-#' )
15
-#' isadf <- ISADataFrame(df, metadata = c("GeneName", "GeneStrand"))
16
-#' isadf <- tidyr::pivot_longer(isadf,
17
-#'     cols = 6:10, names_to = "ExpID",
18
-#'     values_to = "Val", values_drop_na = TRUE
19
-#' )
20
-pivot_longer.ISADataFrame <- function(data,
21
-    cols,
22
-    names_to = "name",
23
-    names_prefix = NULL,
24
-    names_sep = NULL,
25
-    names_pattern = NULL,
26
-    names_ptypes = list(),
27
-    names_transform = list(),
28
-    names_repair = "check_unique",
29
-    values_to = "value",
30
-    values_drop_na = FALSE,
31
-    values_ptypes = list(),
32
-    values_transform = list(),
33
-    ...) {
34
-    df <- vctrs::vec_restore(NextMethod(), data)
35
-    df <- .fix_metadata(df)
36
-    df
37
-}
38
-
39
-#' inner_join implementation for ISADataFrame.
40
-#' @inheritParams dplyr::inner_join
41
-#' @importFrom dplyr inner_join
42
-#'
43
-#' @return An ISADataFrame
44
-#' @export
45
-#'
46
-#' @examples
47
-#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
48
-#'     package = "ISAnalytics"
49
-#' )
50
-#' isadf1 <- import_single_Vispa2Matrix(path)
51
-#' isadf2 <- import_single_Vispa2Matrix(path)
52
-#' isadf1 %>% dplyr::inner_join(isadf2)
53
-inner_join.ISADataFrame <- function(x,
54
-    y,
55
-    by = NULL,
56
-    copy = FALSE,
57
-    suffix = c(".x", ".y"), ...) {
58
-    df <- vctrs::vec_restore(NextMethod(), x)
59
-    df <- .fix_metadata(df)
60
-    df
61
-}
62
-
63
-#' left_join implementation for ISADataFrame.
64
-#' @inheritParams dplyr::left_join
65
-#' @importFrom dplyr left_join
66
-#'
67
-#' @return An ISADataFrame
68
-#' @export
69
-#'
70
-#' @examples
71
-#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
72
-#'     package = "ISAnalytics"
73
-#' )
74
-#' isadf1 <- import_single_Vispa2Matrix(path)
75
-#' isadf2 <- import_single_Vispa2Matrix(path)
76
-#' isadf1 %>% dplyr::left_join(isadf2)
77
-left_join.ISADataFrame <- function(x,
78
-    y,
79
-    by = NULL,
80
-    copy = FALSE,
81
-    suffix = c(".x", ".y"), ...,
82
-    keep = FALSE) {
83
-    df <- vctrs::vec_restore(NextMethod(), x)
84
-    df <- .fix_metadata(df)
85
-    df
86
-}
87
-#' semi_join implementation for ISADataFrame.
88
-#' @inheritParams dplyr::semi_join
89
-#' @importFrom dplyr semi_join
90
-#'
91
-#' @return An ISADataFrame
92
-#' @export
93
-#'
94
-#' @examples
95
-#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
96
-#'     package = "ISAnalytics"
97
-#' )
98
-#' isadf1 <- import_single_Vispa2Matrix(path)
99
-#' isadf2 <- import_single_Vispa2Matrix(path)
100
-#' isadf1 %>% dplyr::semi_join(isadf2)
101
-semi_join.ISADataFrame <- function(x,
102
-    y,
103
-    by = NULL,
104
-    copy = FALSE, ...) {
105
-    df <- vctrs::vec_restore(NextMethod(), x)
106
-    df <- .fix_metadata(df)
107
-    df
108
-}
109
-
110
-#' anti_join implementation for ISADataFrame.
111
-#' @inheritParams dplyr::anti_join
112
-#' @importFrom dplyr anti_join
113
-#'
114
-#' @return An ISADataFrame
115
-#' @export
116
-#'
117
-#' @examples
118
-#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
119
-#'     package = "ISAnalytics"
120
-#' )
121
-#' isadf1 <- import_single_Vispa2Matrix(path)
122
-#' isadf2 <- import_single_Vispa2Matrix(path)
123
-#' isadf1 %>% dplyr::anti_join(isadf2)
124
-anti_join.ISADataFrame <- function(x, y, by = NULL, copy = FALSE, ...) {
125
-    df <- vctrs::vec_restore(NextMethod(), x)
126
-    df <- .fix_metadata(df)
127
-    df
128
-}
129
-
130
-#' select implementation for ISADataFrame.
131
-#' @inheritParams dplyr::select
132
-#' @importFrom dplyr select
133
-#'
134
-#' @return An ISADataFrame
135
-#' @export
136
-#'
137
-#' @examples
138
-#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
139
-#'     package = "ISAnalytics"
140
-#' )
141
-#' isadf1 <- import_single_Vispa2Matrix(path)
142
-#' isadf1 %>% dplyr::select(chr)
143
-select.ISADataFrame <- function(.data, ...) {
144
-    df <- vctrs::vec_restore(NextMethod(), .data)
145
-    df <- .fix_metadata(df)
146
-    df
147
-}
148
-
149
-#' Is the object an ISADataFrame?
150
-#'
151
-#' @param x an object
152
-#'
153
-#' @importFrom methods is
154
-#' @return TRUE or FALSE
155
-#' @export
156
-#'
157
-#' @examples
158
-#' is.ISADataFrame(1:10)
159
-is.ISADataFrame <- function(x) {
160
-    is(x, "ISADataFrame")
161
-}
162
-
163
-#' Printing ISADataFrames
164
-#'
165
-#' @param x an ISADataFrame object
166
-#' @param ... optional arguments to print
167
-#'
168
-#' @export
169
-#' @return Nothing
170
-#'
171
-#' @examples
172
-#' expList <- list(
173
-#'     chr = c(as.character(1:10)),
174
-#'     integration_locus = runif(10, min = 100, max = 10000),
175
-#'     strand = sample(c("+", "-"), 10, replace = TRUE),
176
-#'     meta1 = rep_len("m1", 10),
177
-#'     exp_1 = runif(10, min = 0, max = 10000),
178
-#'     exp_2 = runif(10, min = 0, max = 10000),
179
-#'     exp_3 = runif(10, min = 0, max = 10000)
180
-#' )
181
-#' isadf <- ISADataFrame(expList, metadata = c("meta1"))
182
-#' print(isadf)
183
-print.ISADataFrame <- function(x, ...) {
184
-    cat(
185
-        "mandatoryVars: ",
186
-        paste0(mandatoryVars(x)[seq_len(length(mandatoryVars(x)) - 1)], ", "),
187
-        mandatoryVars(x)[length(mandatoryVars(x))],
188
-        "\n"
189
-    )
190
-    if (length(metadata(x)) > 1) {
191
-        cat("metadata: ", paste0(
192
-            metadata(x)[seq_len(length(metadata(x)) - 1)],
193
-            ", "
194
-        ), metadata(x)[length(metadata(x))], "\n")
195
-    } else {
196
-        cat("metadata: ", metadata(x), "\n")
197
-    }
198
-    NextMethod(...)
199
-}
200
-
201
-
202
-#' Implementation of vec_restore for ISADataFrame.
203
-#' @inheritParams vctrs::vec_restore
204
-#' @return See official documentation at \code{
205
-#' \link[vctrs:vec_proxy]{vec_restore}}
206
-#' @export
207
-vec_restore.ISADataFrame <- function(x, to, ...) {
208
-    new_ISADataFrame(x, meta = attr(to, "metadata"))
209
-}
210 0
new file mode 100644
... ...
@@ -0,0 +1,55 @@
1
+#------------------------------------------------------------------------------#
2
+# Analysis functions
3
+#------------------------------------------------------------------------------#
4
+#' Computes the abundance of every integration in the sample.
5
+#'
6
+#' Abundance is obtained for every row by calculating the ratio
7
+#' between the single value and the total value for the sample.
8
+#'
9
+#' @param x An integration matrix
10
+#' @param percentage Add abundance as percentage?
11
+#' @family Analysis functions
12
+#'
13
+#' @importFrom magrittr `%>%`
14
+#' @importFrom tibble is_tibble
15
+#' @importFrom dplyr group_by summarise left_join mutate select
16
+#' @importFrom rlang .data
17
+#' @return An integration matrix
18
+#' @export
19
+#'
20
+#' @examples
21
+#' path <- system.file("extdata", "ex_annotated_ISMatrix.tsv.xz",
22
+#' package = "ISAnalytics")
23
+#' matrix <- import_single_Vispa2Matrix(path)
24
+#' abundance <- compute_abundance(matrix)
25
+compute_abundance <- function(x, percentage = TRUE) {
26
+    ## Check parameters
27
+    stopifnot(tibble::is_tibble(x))
28
+    if (.check_mandatory_vars(x) == FALSE) {
29
+        stop(.non_ISM_error())
30
+    }
31
+    if (.check_complAmpID(x) == FALSE) {
32
+        stop(.missing_complAmpID_error())
33
+    }
34
+    if (.check_value_col(x) == FALSE) {
35
+        stop(.missing_value_col_error())
36
+    }
37
+    stopifnot(is.logical(percentage) & length(percentage) == 1)
38
+    ## Computation
39
+    totals <- x %>%
40
+        dplyr::group_by(.data$CompleteAmplificationID) %>%
41
+        dplyr::summarise(
42
+            QuantificationSum = sum(.data$Value)
43
+        )
44
+    abundance_df <- x %>%
45
+        dplyr::left_join(totals, by = "CompleteAmplificationID") %>%
46
+        dplyr::mutate(AbsAbundance = .data$Value/.data$QuantificationSum) %>%
47
+        dplyr::select(-c(.data$QuantificationSum))
48
+    if (percentage == TRUE) {
49
+        abundance_df <- abundance_df %>%
50
+            dplyr::mutate(
51
+                PercAbundance = .data$AbsAbundance * 100
52
+            )
53
+    }
54
+    abundance_df
55
+}
... ...
@@ -19,7 +19,8 @@
19 19
 #' \code{vignette("Collision removal functionality", package = "ISAnalytics")}
20 20
 #'
21 21
 #' @param x A named list of matrices (names must be quantification types), or
22
-#' a single ISADataFrame representing the sequence count matrix of interest.
22
+#' a single integration matrix representing the sequence count matrix of
23
+#' interest.
23 24
 #' @param association_file The association file imported via
24 25
 #' `import_association_file`
25 26
 #' @param date_col The date column that should be considered for the analysis.
... ...
@@ -28,12 +29,15 @@
28 29
 #' to be considered when deciding between seqCount value.
29 30
 #' @family Collision removal
30 31
 #' @importFrom dplyr bind_rows all_of select
32
+#' @importFrom tibble is_tibble
33
+#' @importFrom magrittr `%>%`
31 34
 #' @seealso \code{\link{date_columns_coll}}
32 35
 #'
33
-#' @return A list of ISADataframes with removed collisions
36
+#' @return A list of tibbles with removed collisions
34 37
 #' @export
35 38
 #'
36 39
 #' @examples
40
+#' op <- options("ISAnalytics.widgets" = FALSE)
37 41
 #' path <- system.file("extdata", "ex_association_file.tsv",
38 42
 #'     package = "ISAnalytics"
39 43
 #' )
... ...
@@ -45,19 +49,46 @@
45 49
 #'     c("fragmentEstimate", "seqCount"), "annotated", 2, NULL, "ANY"
46 50
 #' )
47 51
 #' matrices <- remove_collisions(matrices, association_file)
52
+#' options(op)
48 53
 remove_collisions <- function(x,
49 54
     association_file,
50 55
     date_col = "SequencingDate",
51 56
     reads_ratio = 10) {
52 57
     # Check basic parameter correctness
53 58
     stopifnot(is.list(x) & !is.null(names(x)))
54
-    if (is.ISADataFrame(x)) {
59
+    if (tibble::is_tibble(x)) {
60
+        if (.check_mandatory_vars(x) == FALSE) {
61
+            stop(.non_ISM_error())
62
+        }
63
+        if (.check_complAmpID(x) == FALSE) {
64
+            stop(.missing_complAmpID_error())
65
+        }
66
+        if (.check_value_col(x) == FALSE) {
67
+            stop(.missing_value_col_error())
68
+        }
55 69
         x <- list(seqCount = x)
56
-    }
57
-    stopifnot(all(names(x) %in% quantification_types()))
58
-    all_ISAdf <- purrr::map_lgl(x, is.ISADataFrame)
59
-    if (!all(all_ISAdf)) {
60
-        stop("x contains elements that are not ISADataFrame objects")
70
+    } else {
71
+        stopifnot(all(names(x) %in% quantification_types()))
72
+        ## remove_collisions requires seqCount matrix, check if the list
73
+        ## contains one
74
+        if ((!"seqCount" %in% names(x)) ||
75
+            nrow(x$seqCount) == 0) {
76
+            stop(paste(
77
+                "Sequence count data frame is required for collision removal",
78
+                "but none was detected in x"
79
+            ))
80
+        }
81
+        all_ISm <- purrr::map_lgl(x, .check_mandatory_vars)
82
+        if (!all(all_ISm)) {
83
+            stop(.non_ISM_error())
84
+        }
85
+        all_campid <- purrr::map_lgl(x, .check_complAmpID)
86
+        if (!all(all_campid)) {
87
+            stop(.missing_complAmpID_error())
88
+        }
89
+        if (.check_value_col(x$seqCount) == FALSE) {
90
+            stop(.missing_value_col_error())
91
+        }
61 92
     }
62 93
     stopifnot(tibble::is_tibble(association_file))
63 94
     stopifnot(is.character(date_col) & length(date_col) == 1)
... ...
@@ -79,15 +110,6 @@ remove_collisions <- function(x,
79 110
     }
80 111
 
81 112
     # Check imported matrices vs association file
82
-    ## remove_collisions requires seqCount matrix, check if the list contains
83
-    ## one
84
-    if ((!"seqCount" %in% names(x)) ||
85
-        nrow(x$seqCount) == 0) {
86
-        stop(paste(
87
-            "Sequence count data frame is required for collision removal",
88
-            "but none was detected in x"
89
-        ))
90
-    }
91 113
     seq_count_df <- x$seqCount
92 114
     ## Check if association file contains all info relative to content the of
93 115
     ## the matrix
... ...
@@ -106,8 +128,8 @@ remove_collisions <- function(x,
106 128
         not_included <- .check_same_info(association_file, seq_count_df)
107 129
         if (nrow(not_included) > 0) {
108 130
             message(paste("Found additional data relative to some projects",
109
-                          "that are not included in the imported matrices.",
110
-                          "Here is a summary",
131
+                "that are not included in the imported matrices.",
132
+                "Here is a summary",
111 133
                 collapse = "\n"
112 134
             ))
113 135
             print(not_included)
... ...
@@ -136,7 +158,7 @@ remove_collisions <- function(x,
136 158
     final_matr <- fixed_collisions %>%
137 159
         dplyr::bind_rows(splitted_df$non_collisions) %>%
138 160
         dplyr::select(dplyr::all_of(colnames(seq_count_df)))
139
-    if (verbose == TRUE) {
161
+    if (getOption("ISAnalytics.widgets") == TRUE) {
140 162
         summary_tbl <- .summary_table(
141 163
             before = joined, after = final_matr,
142 164
             association_file = association_file
... ...
@@ -169,8 +191,8 @@ remove_collisions <- function(x,
169 191
                 "to re-align other related matrices see",
170 192
                 "?realign_after_collisions"
171 193
             ))
172
-            return(list(seqCount = final_matr))
173 194
         }
195
+        return(list(seqCount = final_matr))
174 196
     }
175 197
 }
176 198
 
... ...
@@ -194,6 +216,7 @@ remove_collisions <- function(x,
194 216
 #' "seqCount".
195 217
 #' @importFrom dplyr semi_join
196 218
 #' @importFrom purrr map_lgl
219
+#' @importFrom magrittr `%>%`
197 220
 #' @family Collision removal
198 221
 #' @seealso \code{\link{remove_collisions}}
199 222
 #'
... ...
@@ -201,6 +224,7 @@ remove_collisions <- function(x,
201 224
 #' @export
202 225
 #'
203 226
 #' @examples
227
+#' op <- options("ISAnalytics.widgets" = FALSE)
204 228
 #' path <- system.file("extdata", "ex_association_file.tsv",
205 229
 #'     package = "ISAnalytics"
206 230
 #' )
... ...
@@ -214,21 +238,23 @@ remove_collisions <- function(x,
214 238
 #' sc_matrix <- remove_collisions(matrices$seqCount, association_file)
215 239
 #' others <- matrices[!names(matrices) %in% "seqCount"]
216 240
 #' aligned_matrices <- realign_after_collisions(sc_matrix$seqCount, others)
241
+#' options(op)
217 242
 realign_after_collisions <- function(sc_matrix, other_matrices) {
218 243
     stopifnot(is.list(other_matrices) & !is.null(names(other_matrices)))
219 244
     stopifnot(all(names(other_matrices) %in% quantification_types()))
220
-    all_ISAdf <- purrr::map_lgl(other_matrices, is.ISADataFrame)
221
-    if (!all(all_ISAdf)) {
222
-        stop(paste(
223
-            "other_matrices list contains elements that are not",
224
-            "ISADataFrame objects"
225
-        ))
245
+    all_ISm <- purrr::map_lgl(other_matrices, .check_mandatory_vars)
246
+    if (!all(all_ISm)) {
247
+        stop(.non_ISM_error())
248
+    }
249
+    all_campid <- purrr::map_lgl(other_matrices, .check_complAmpID)
250
+    if (!all(all_campid)) {
251
+        stop(.missing_complAmpID_error())
226 252
     }
227 253
     realigned <- purrr::map(other_matrices, function(x) {
228 254
         x %>% dplyr::semi_join(sc_matrix,
229 255
             by = c(
230
-                "chr", "integration_locus",
231
-                "strand", "CompleteAmplificationID"
256
+                mandatory_IS_vars(),
257
+                "CompleteAmplificationID"
232 258
             )
233 259
         )
234 260
     })
235 261
new file mode 100644
... ...
@@ -0,0 +1,82 @@
1
+#' Names of mandatory variables for an integration matrix.
2
+#'
3
+#' Contains the names of the columns that need to be present in order for a
4
+#' tibble to be considered an integration matrix.
5
+#'
6
+#' @return A character vector
7
+#' @export
8
+#'
9
+#' @examples
10