Browse code

[UPGRADE] Upgrade to v 1.3.5

Giulia Pais authored on 21/09/2021 15:15:06
Showing48 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: ISAnalytics
2 2
 Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies
3
-Version: 1.3.4
3
+Version: 1.3.5
4 4
 Date: 2020-07-03
5 5
 Authors@R: c(
6 6
   person(given = "Andrea",
... ...
@@ -70,7 +70,8 @@ Suggests:
70 70
     DT,
71 71
     circlize,
72 72
     plotly,
73
-    ggvenn
73
+    gtools,
74
+    eulerr
74 75
 VignetteBuilder: knitr
75 76
 RdMacros: 
76 77
     lifecycle
... ...
@@ -17,6 +17,7 @@ export(comparison_matrix)
17 17
 export(compute_abundance)
18 18
 export(compute_near_integrations)
19 19
 export(cumulative_count_union)
20
+export(cumulative_is)
20 21
 export(date_columns_coll)
21 22
 export(date_formats)
22 23
 export(default_iss_file_prefixes)
... ...
@@ -46,6 +47,7 @@ export(remove_collisions)
46 47
 export(sample_statistics)
47 48
 export(separate_quant_matrices)
48 49
 export(sharing_heatmap)
50
+export(sharing_venn)
49 51
 export(threshold_filter)
50 52
 export(top_abund_tableGrob)
51 53
 export(top_integrations)
... ...
@@ -101,7 +103,6 @@ importFrom(dplyr,slice)
101 103
 importFrom(dplyr,slice_head)
102 104
 importFrom(dplyr,starts_with)
103 105
 importFrom(dplyr,summarise)
104
-importFrom(dplyr,transmute)
105 106
 importFrom(dplyr,ungroup)
106 107
 importFrom(fs,as_fs_path)
107 108
 importFrom(fs,dir_create)
... ...
@@ -155,11 +156,9 @@ importFrom(purrr,map2_lgl)
155 156
 importFrom(purrr,map_chr)
156 157
 importFrom(purrr,map_dbl)
157 158
 importFrom(purrr,map_dfr)
158
-importFrom(purrr,map_int)
159 159
 importFrom(purrr,map_lgl)
160 160
 importFrom(purrr,pmap)
161 161
 importFrom(purrr,pmap_chr)
162
-importFrom(purrr,pmap_dbl)
163 162
 importFrom(purrr,pmap_df)
164 163
 importFrom(purrr,pmap_dfr)
165 164
 importFrom(purrr,pmap_lgl)
... ...
@@ -203,7 +202,6 @@ importFrom(stringr,str_replace_all)
203 202
 importFrom(stringr,str_split)
204 203
 importFrom(stringr,str_to_lower)
205 204
 importFrom(stringr,str_to_upper)
206
-importFrom(tibble,add_case)
207 205
 importFrom(tibble,add_column)
208 206
 importFrom(tibble,as_tibble)
209 207
 importFrom(tibble,as_tibble_col)
... ...
@@ -2,6 +2,24 @@
2 2
 title: "NEWS"
3 3
 output: github_document
4 4
 ---
5
+# ISAnalytics 1.3.5 (2021-09-21)
6
+
7
+## MAJOR CHANGES
8
+
9
+* Reworked `is_sharing()` function, detailed usage in vignette `vignette("sharing_analyses", package = "ISAnalytics")`
10
+
11
+## NEW
12
+
13
+* New function `cumulative_is()`
14
+* New function for plotting sharing as venn/euler diagrams `sharing_venn()`
15
+
16
+
17
+# ISAnalytics 1.3.4 (2021-08 -03)
18
+
19
+## FIXES/MINOR UPDATES
20
+* Fixed issue in tests that lead to broken build
21
+* Slightly modified included data set for better examples
22
+
5 23
 # ISAnalytics 1.3.3 (2021-07-30)
6 24
 
7 25
 ## MAJOR CHANGES
... ...
@@ -1,6 +1,19 @@
1 1
 NEWS
2 2
 ================
3 3
 
4
+# ISAnalytics 1.3.5 (2021-09-21)
5
+
6
+## MAJOR CHANGES
7
+
8
+-   Reworked `is_sharing()` function, detailed usage in vignette
9
+    `vignette("sharing_analyses", package = "ISAnalytics")`
10
+
11
+## NEW
12
+
13
+-   New function `cumulative_is()`
14
+-   New function for plotting sharing as venn/euler diagrams
15
+    `sharing_venn()`
16
+
4 17
 # ISAnalytics 1.3.4 (2021-08 -03)
5 18
 
6 19
 ## FIXES/MINOR UPDATES
... ...
@@ -49,6 +49,7 @@
49 49
 #'   * \code{\link{CIS_grubbs}}
50 50
 #'   * \code{\link{cumulative_count_union}}
51 51
 #'   * \code{\link{is_sharing}}
52
+#'   * \code{\link{cumulative_is}}
52 53
 #' * HSC population size estimate:
53 54
 #'   * \code{\link{HSC_population_size_estimate}}
54 55
 #' * Plotting functions:
... ...
@@ -58,6 +59,7 @@
58 59
 #'   * \code{\link{integration_alluvial_plot}}
59 60
 #'   * \code{\link{top_abund_tableGrob}}
60 61
 #'   * \code{\link{circos_genomic_density}}
62
+#'   * \code{\link{sharing_venn}}
61 63
 #' * Utility functions:
62 64
 #'   * \code{\link{generate_blank_association_file}}
63 65
 #'   * \code{\link{generate_Vispa2_launch_AF}}
... ...
@@ -72,6 +74,8 @@
72 74
 #' package = "ISAnalytics")}
73 75
 #' * \code{vignette("report_system",
74 76
 #' package = "ISAnalytics")}
77
+#' * \code{vignette("sharing_analyses",
78
+#' package = "ISAnalytics")}
75 79
 #'
76 80
 #' @docType package
77 81
 #' @name ISAnalytics
... ...
@@ -1081,60 +1081,158 @@ cumulative_count_union <- function(x,
1081 1081
     return(res)
1082 1082
 }
1083 1083
 
1084
+#' Expands integration matrix with the cumulative is union over time.
1085
+#'
1086
+#' @description \lifecycle{experimental}
1087
+#' Given an input integration matrix that can be grouped over time,
1088
+#' this function adds integrations in groups assuming that
1089
+#' if an integration is observed at time point "t" then it is also observed in
1090
+#' time point "t+1".
1091
+#'
1092
+#' @param x An integration matrix, ideally aggregated via
1093
+#' `aggregate_values_by_key()`
1094
+#' @param key The aggregation key used
1095
+#' @param timepoint_col The name of the time point column
1096
+#' @param include_tp_zero Should time point 0 be included?
1097
+#' @param keep_og_is Keep original set of integrations as a separate column?
1098
+#' @param expand If `FALSE`, for each group, the set of integration sites is
1099
+#' returned in a separate column as a nested table, otherwise the resulting
1100
+#' column is unnested.
1101
+#'
1102
+#' @family Analysis functions
1103
+#' @return A data frame
1104
+#' @export
1105
+#'
1106
+#' @importFrom rlang .data
1107
+#' @importFrom data.table .SD
1108
+#'
1109
+#' @examples
1110
+#' data("integration_matrices", package = "ISAnalytics")
1111
+#' data("association_file", package = "ISAnalytics")
1112
+#' aggreg <- aggregate_values_by_key(
1113
+#'     x = rlang::current_env()$integration_matrices,
1114
+#'     association_file = rlang::current_env()$association_file,
1115
+#'     value_cols = c("seqCount", "fragmentEstimate")
1116
+#' )
1117
+#' cumulated_is <- cumulative_is(aggreg)
1118
+#' cumulated_is
1119
+cumulative_is <- function(x,
1120
+    key = c(
1121
+        "SubjectID",
1122
+        "CellMarker",
1123
+        "Tissue",
1124
+        "TimePoint"
1125
+    ),
1126
+    timepoint_col = "TimePoint",
1127
+    include_tp_zero = FALSE,
1128
+    keep_og_is = TRUE,
1129
+    expand = FALSE) {
1130
+    stopifnot(is.data.frame(x))
1131
+    stopifnot(is.character(key))
1132
+    stopifnot(is.character(timepoint_col))
1133
+    timepoint_col <- timepoint_col[1]
1134
+    stopifnot(is.logical(include_tp_zero))
1135
+    include_tp_zero <- include_tp_zero[1]
1136
+    stopifnot(is.logical(keep_og_is))
1137
+    stopifnot(is.logical(expand))
1138
+    if (!timepoint_col %in% key) {
1139
+        rlang::abort(.key_without_tp_err())
1140
+    }
1141
+    if (!all(key %in% colnames(x))) {
1142
+        rlang::abort(.missing_user_cols_error(key[!key %in% colnames(x)]))
1143
+    }
1144
+    is_vars <- if (.is_annotated(x)) {
1145
+        c(mandatory_IS_vars(), annotation_IS_vars())
1146
+    } else {
1147
+        mandatory_IS_vars()
1148
+    }
1149
+    temp <- x %>%
1150
+        dplyr::select(dplyr::all_of(c(key, is_vars))) %>%
1151
+        dplyr::mutate(!!timepoint_col := as.numeric(.data[[timepoint_col]]))
1152
+    if (!include_tp_zero) {
1153
+        temp <- temp %>%
1154
+            dplyr::filter(.data[[timepoint_col]] != 0)
1155
+        if (nrow(temp) == 0) {
1156
+            rlang::inform(.only_zero_tp())
1157
+            return(NULL)
1158
+        }
1159
+    }
1160
+    temp <- temp %>%
1161
+        dplyr::group_by(dplyr::across({{ key }})) %>%
1162
+        dplyr::arrange(.data[[timepoint_col]], .by_group = TRUE) %>%
1163
+        dplyr::distinct(dplyr::across(dplyr::all_of(is_vars)),
1164
+            .keep_all = TRUE
1165
+        )
1166
+    temp <- data.table::setDT(temp)
1167
+    temp <- temp[, .(is = list(.SD)), by = key]
1168
+    no_tp_key <- key[key != timepoint_col]
1169
+    splitted <- split(temp, by = no_tp_key)
1170
+    cumulate <- purrr::map(splitted, function(x) {
1171
+        x[, cumulative_is := purrr::accumulate(
1172
+            is,
1173
+            ~ data.table::funion(.x, .y)
1174
+        )]
1175
+    })
1176
+    cumulate <- data.table::rbindlist(cumulate)
1177
+    if (!keep_og_is) {
1178
+        cumulate[, is := NULL]
1179
+    }
1180
+    if (expand) {
1181
+        cumulate <- tidyr::unnest(cumulate,
1182
+            cols = "cumulative_is"
1183
+        )
1184
+        cumulate <- data.table::setDT(cumulate)
1185
+    }
1186
+    cumulate
1187
+}
1188
+
1084 1189
 #' Sharing of integration sites between given groups.
1085 1190
 #'
1086 1191
 #' \lifecycle{experimental}
1087
-#' Computes the amount of integrations shared between the groups identified
1088
-#' by the fields in the `group_key` argument.
1192
+#' Computes the amount of integration sites shared between the groups identified
1193
+#' in the input data.
1194
+#'
1195
+#' @details
1089 1196
 #' An integration site is always identified by the triple
1090 1197
 #' `(chr, integration_locus, strand)`, thus these columns must be present
1091
-#' in the input data frame.
1198
+#' in the input(s).
1092 1199
 #'
1093
-#' @details
1094
-#' ## Input data frame
1095
-#' The data frame provided in input must be in a suitable format for the
1096
-#' calculations to be accurate. Please note that this function does not
1097
-#' perform any sort of aggregation, it only relies on counts of
1098
-#' distinct integration sites.
1099
-#'
1100
-#' ## Outputs
1101
-#' By default the function outputs a list of 2 data frames:
1102
-#' * The classical sharing data frame with absolute values.
1103
-#' If the argument `relative_is_sharing` is set to TRUE it also contains
1104
-#' the relative sharing (see below).
1105
-#' * The count of distinct IS for each group
1106
-#'
1107
-#' The relative sharing is calculated, for each pair of groups (A, B) as
1108
-#' 3 separate columns
1109
-#' \itemize{
1110
-#'   \item Shared over A: (intersection(A,B) / |A|) * 100
1111
-#'   \item Shared over B: (intersection(A,B) / |B|) * 100
1112
-#'   \item Shared over union: (intersection(A,B) / |union(A,B)|) * 100
1113
-#' }
1200
+#' The function accepts multiple inputs for different scenarios, please refer
1201
+#' to the vignette
1202
+#' \code{vignette("sharing_analyses", package = "ISAnalytics")}
1203
+#' for a more in-depth explanation.
1204
+#'
1205
+#' ## Output
1206
+#' The function outputs a single data frame containing all requested
1207
+#' comparisons and optionally individual group counts, genomic coordinates
1208
+#' of the shared integration sites and truth tables for plotting venn diagrams.
1114 1209
 #'
1115 1210
 #' ## Plotting sharing
1116 1211
 #' The sharing data obtained can be easily plotted in a heatmap via the
1117
-#' function \code{\link{sharing_heatmap}}.
1212
+#' function \code{\link{sharing_heatmap}} or via the function
1213
+#' \code{\link{sharing_venn}}
1118 1214
 #'
1119
-#' @param x An integration matrix, aka a data frame containing the columns
1120
-#' `mandatory_IS_vars()`. See details.
1215
+#' @param ... One or more integration matrices
1121 1216
 #' @param group_key Character vector of column names which identify a
1122 1217
 #' single group. An associated group id will be derived by concatenating
1123 1218
 #' the values of these fields, separated by "_"
1124
-#' @param is_count Logical, if TRUE returns also the count of IS for
1125
-#' each group
1126
-#' @param relative_is_sharing Logical, if TRUE also returns the relative
1127
-#' sharing. See details.
1128
-#'
1129
-#' @importFrom rlang abort .data
1130
-#' @importFrom dplyr select all_of distinct transmute inner_join filter
1131
-#' @importFrom dplyr pull mutate
1132
-#' @importFrom tidyr nest unite
1133
-#' @importFrom purrr map_int pmap_dbl
1134
-#' @importFrom tibble tibble add_case
1219
+#' @param group_keys A list of keys for asymmetric grouping.
1220
+#' If not NULL the argument `group_key` is ignored
1221
+#' @param n_comp Number of comparisons to compute. This argument is relevant
1222
+#' only if provided a single data frame and a single key.
1223
+#' @param is_count Logical, if `TRUE` returns also the count of IS for
1224
+#' each group and the count for the union set
1225
+#' @param relative_is_sharing Logical, if `TRUE` also returns the relative
1226
+#' sharing.
1227
+#' @param minimal Compute only combinations instead of all possible
1228
+#' permutations? If `TRUE` saves time and excludes redundant comparisons.
1229
+#' @param include_self_comp Include comparisons with the same group?
1230
+#' @param keep_genomic_coord If `TRUE` keeps the genomic coordinates of the
1231
+#' shared integration sites in a dedicated column (as a nested table)
1232
+#' @param table_for_venn Add column with truth tables for venn plots?
1135 1233
 #'
1136 1234
 #' @family Analysis functions
1137
-#' @return A named list of data frames or a single data frame
1235
+#' @return A data frame
1138 1236
 #' @export
1139 1237
 #'
1140 1238
 #' @examples
... ...
@@ -1147,144 +1245,220 @@ cumulative_count_union <- function(x,
1147 1245
 #' )
1148 1246
 #' sharing <- is_sharing(aggreg)
1149 1247
 #' sharing
1150
-is_sharing <- function(x,
1248
+is_sharing <- function(...,
1151 1249
     group_key = c(
1152 1250
         "SubjectID",
1153 1251
         "CellMarker",
1154 1252
         "Tissue",
1155 1253
         "TimePoint"
1156 1254
     ),
1255
+    group_keys = NULL,
1256
+    n_comp = 2,
1157 1257
     is_count = TRUE,
1158
-    relative_is_sharing = TRUE) {
1258
+    relative_is_sharing = TRUE,
1259
+    minimal = TRUE,
1260
+    include_self_comp = FALSE,
1261
+    keep_genomic_coord = FALSE,
1262
+    table_for_venn = FALSE) {
1159 1263
     ## Checks
1160
-    stopifnot(is.data.frame(x))
1161
-    stopifnot(is.character(group_key))
1264
+    if (!requireNamespace("gtools", quietly = TRUE)) {
1265
+        rlang::abort(.missing_pkg_error("gtools"))
1266
+    }
1267
+    dots <- rlang::list2(...)
1268
+    if (is.null(dots) || purrr::is_empty(dots)) {
1269
+        rlang::abort(.no_data_supp())
1270
+    }
1271
+    all_dfs <- purrr::map_lgl(dots, ~ is.data.frame(.x))
1272
+    if (!all(all_dfs)) {
1273
+        rlang::abort(.non_df_input_err())
1274
+    }
1275
+    stopifnot(is.null(group_keys) || is.list(group_keys))
1276
+    stopifnot(is.null(group_key) || is.character(group_key))
1277
+    stopifnot(is.logical(minimal))
1278
+    stopifnot(is.logical(keep_genomic_coord))
1279
+    stopifnot(is.logical(table_for_venn))
1280
+    key_mode <- if (!is.null(group_keys)) {
1281
+        if (any(purrr::map_lgl(
1282
+            group_keys,
1283
+            ~ all(is.character(.x))
1284
+        ) == FALSE)) {
1285
+            rlang::abort(.keys_not_char_err())
1286
+        }
1287
+        if (length(unique(group_keys)) == 1) {
1288
+            if (getOption("ISAnalytics.verbose") == TRUE) {
1289
+                one_key_list <- c("Single key in list",
1290
+                    i = paste(
1291
+                        "Provided a single key in list,",
1292
+                        "automatically performing",
1293
+                        "group comparisons"
1294
+                    )
1295
+                )
1296
+                rlang::inform(one_key_list, class = "one_key_list")
1297
+            }
1298
+            group_key <- group_keys[[1]]
1299
+            "SINGLE_KEY"
1300
+        } else {
1301
+            if (is.null(names(group_keys))) {
1302
+                rlang::inform(.unnamed_keys_warn())
1303
+                def_keys <- paste0("g", seq_along(group_keys))
1304
+                names(group_keys) <- def_keys
1305
+            }
1306
+            "MULT_KEY"
1307
+        }
1308
+    } else {
1309
+        if (!is.character(group_key)) {
1310
+            rlang::abort(.keys_not_char_err())
1311
+        }
1312
+        "SINGLE_KEY"
1313
+    }
1314
+    if (key_mode == "SINGLE_KEY") {
1315
+        stopifnot(is.logical(include_self_comp))
1316
+    }
1317
+    df_mode <- if (length(dots) == 1) {
1318
+        "SINGLE_DF"
1319
+    } else {
1320
+        "MULT_DF"
1321
+    }
1162 1322
     stopifnot(is.logical(is_count))
1163 1323
     stopifnot(is.logical(relative_is_sharing))
1164
-    if (!all(group_key %in% colnames(x))) {
1165
-        rlang::abort(
1166
-            .missing_user_cols_error(
1167
-                group_key[!group_key %in% colnames(x)]
1324
+    if (df_mode == "SINGLE_DF") {
1325
+        ## Single dataframe provided
1326
+        if (!all(mandatory_IS_vars() %in% colnames(dots[[1]]))) {
1327
+            rlang::abort(
1328
+                .missing_mand_vars()
1168 1329
             )
1330
+        }
1331
+        if (key_mode == "SINGLE_KEY") {
1332
+            ## Single df - Single key
1333
+            if (!all(group_key %in% colnames(dots[[1]]))) {
1334
+                rlang::abort(
1335
+                    .missing_user_cols_error(
1336
+                        group_key[!group_key %in% colnames(dots[[1]])]
1337
+                    )
1338
+                )
1339
+            }
1340
+            stopifnot(is.numeric(n_comp) || is.integer(n_comp))
1341
+            n_comp <- n_comp[1]
1342
+            if (n_comp < 2) {
1343
+                rlang::abort("`n_comp` must be at least 2")
1344
+            }
1345
+        } else {
1346
+            ## Single df - multiple keys
1347
+            all_cols <- unique(unlist(group_keys))
1348
+            if (!all(all_cols %in% colnames(dots[[1]]))) {
1349
+                rlang::abort(
1350
+                    .missing_user_cols_error(
1351
+                        all_cols[!all_cols %in% colnames(dots[[1]])]
1352
+                    )
1353
+                )
1354
+            }
1355
+        }
1356
+    } else {
1357
+        all_mand_vars <- purrr::map_lgl(
1358
+            dots,
1359
+            ~ all(mandatory_IS_vars() %in%
1360
+                colnames(.x))
1169 1361
         )
1170
-    }
1171
-    if (!all(mandatory_IS_vars() %in% colnames(x))) {
1172
-        rlang::abort(
1173
-            .missing_needed_cols(
1174
-                mandatory_IS_vars()[!mandatory_IS_vars() %in% colnames(x)]
1362
+        if (!all(all_mand_vars)) {
1363
+            missing_mand_at <- c("Missing mandatory vars in data frames",
1364
+                i = paste(
1365
+                    "At positions: ",
1366
+                    paste0(which(!all_mand_vars),
1367
+                        collapse = ", "
1368
+                    )
1369
+                )
1175 1370
             )
1176
-        )
1177
-    }
1178
-
1179
-    ## --- Nest
1180
-    nested <- x %>%
1181
-        dplyr::select(dplyr::all_of(c(mandatory_IS_vars(), group_key))) %>%
1182
-        dplyr::distinct() %>%
1183
-        tidyr::nest(is_set = mandatory_IS_vars()) %>%
1184
-        tidyr::unite(col = "group_id", group_key)
1185
-
1186
-    ## --- Number of IS for each group
1187
-    is_n <- nested %>%
1188
-        dplyr::transmute(
1189
-            group_id = .data$group_id,
1190
-            num_IS = purrr::map_int(.data$is_set, nrow)
1191
-        )
1192
-
1193
-    ## --- Absolute numeber of IS shared
1194
-    abs_shared_df <- tibble::tibble(
1195
-        group1 = character(0),
1196
-        group2 = character(0),
1197
-        shared = integer(0)
1198
-    )
1199
-    group_ids <- unique(nested$group_id)
1200
-    groups2 <- group_ids
1201
-    for (i in seq_along(group_ids)) {
1202
-        id1 <- group_ids[i]
1203
-        if (i > 1) {
1204
-            groups2 <- groups2[-1]
1371
+            rlang::abort(missing_mand_at)
1205 1372
         }
1206
-        for (id2 in groups2) {
1207
-            if (id1 != id2) {
1208
-                shared_n <- dplyr::inner_join(
1209
-                    x = (dplyr::filter(nested, .data$group_id == id1) %>%
1210
-                        dplyr::pull(.data$is_set))[[1]],
1211
-                    y = (dplyr::filter(nested, .data$group_id == id2) %>%
1212
-                        dplyr::pull(.data$is_set))[[1]],
1213
-                    by = mandatory_IS_vars()
1214
-                ) %>% nrow()
1215
-                abs_shared_df <- abs_shared_df %>%
1216
-                    tibble::add_case(
1217
-                        group1 = c(id1, id2),
1218
-                        group2 = c(id2, id1),
1219
-                        shared = shared_n
1373
+        if (key_mode == "SINGLE_KEY") {
1374
+            ## Multiple df - single key
1375
+            key_found_df <- purrr::map_lgl(
1376
+                dots,
1377
+                ~ all(group_key %in% colnames(.x))
1378
+            )
1379
+            if (!all(key_found_df)) {
1380
+                err_msg_key_not_found <- paste(
1381
+                    "Key not found in data frames",
1382
+                    paste0(which(!key_found_df),
1383
+                        collapse = ", "
1384
+                    )
1385
+                )
1386
+                rlang::abort(err_msg_key_not_found)
1387
+            }
1388
+        } else {
1389
+            ## Multiple df - multiple keys
1390
+            if (length(dots) != length(group_keys)) {
1391
+                keys_length_err <- c("Wrong key length",
1392
+                    i = paste(
1393
+                        "When providing multiple",
1394
+                        "input data frames,",
1395
+                        "`group_keys` must have",
1396
+                        "the same length"
1220 1397
                     )
1221
-            } else {
1222
-                shared_n <- nrow((dplyr::filter(
1223
-                    nested,
1224
-                    .data$group_id == id1
1225
-                ) %>%
1226
-                    dplyr::pull(.data$is_set))[[1]])
1227
-                abs_shared_df <- abs_shared_df %>%
1228
-                    tibble::add_case(
1229
-                        group1 = id1,
1230
-                        group2 = id2,
1231
-                        shared = shared_n
1398
+                )
1399
+                rlang::abort(keys_length_err)
1400
+            }
1401
+            keys_ok <- purrr::map2_lgl(
1402
+                dots, group_keys,
1403
+                ~ all(.y %in% colnames(.x))
1404
+            )
1405
+            if (!all(keys_ok)) {
1406
+                mult_key_err <- c("Some keys not found in corresponding df",
1407
+                    x = paste(
1408
+                        "Issues identified at positions:",
1409
+                        paste0(which(!keys_ok),
1410
+                            collapse = ", "
1411
+                        )
1232 1412
                     )
1413
+                )
1414
+                rlang::abort(mult_key_err)
1233 1415
             }
1234 1416
         }
1235 1417
     }
1236
-    ### --- Relative number of IS shared
1237
-    if (relative_is_sharing) {
1238
-        abs_shared_df <- abs_shared_df %>%
1239
-            dplyr::mutate(
1240
-                on_g1 = purrr::pmap_dbl(
1241
-                    list(.data$group1, .data$group2, .data$shared),
1242
-                    function(x, y, s) {
1243
-                        if (x == y) {
1244
-                            100
1245
-                        } else {
1246
-                            x_count <- dplyr::filter(
1247
-                                is_n,
1248
-                                .data$group_id == x
1249
-                            ) %>%
1250
-                                dplyr::pull(.data$num_IS)
1251
-                            (s / x_count) * 100
1252
-                        }
1253
-                    }
1254
-                ),
1255
-                on_g2 = purrr::pmap_dbl(
1256
-                    list(.data$group1, .data$group2, .data$shared),
1257
-                    function(x, y, s) {
1258
-                        if (x == y) {
1259
-                            100
1260
-                        } else {
1261
-                            y_count <- dplyr::filter(
1262
-                                is_n,
1263
-                                .data$group_id == y
1264
-                            ) %>%
1265
-                                dplyr::pull(.data$num_IS)
1266
-                            (s / y_count) * 100
1267
-                        }
1268
-                    }
1269
-                ),
1270
-                on_union = purrr::pmap_dbl(
1271
-                    list(.data$group1, .data$group2, .data$shared),
1272
-                    function(x, y, s) {
1273
-                        x_count <- dplyr::filter(is_n, .data$group_id == x) %>%
1274
-                            dplyr::pull(.data$num_IS)
1275
-                        y_count <- dplyr::filter(is_n, .data$group_id == y) %>%
1276
-                            dplyr::pull(.data$num_IS)
1277
-                        union_count <- (s / (x_count + y_count - s)) * 100
1278
-                    }
1279
-                )
1280
-            )
1418
+    sharing <- if (key_mode == "SINGLE_KEY" & df_mode == "SINGLE_DF") {
1419
+        .sharing_singledf_single_key(
1420
+            df = dots[[1]],
1421
+            key = group_key,
1422
+            minimal = minimal,
1423
+            n_comp = n_comp,
1424
+            is_count = is_count,
1425
+            rel_sharing = relative_is_sharing,
1426
+            include_self_comp = include_self_comp,
1427
+            keep_genomic_coord = keep_genomic_coord,
1428
+            venn = table_for_venn
1429
+        )
1430
+    } else if (key_mode == "SINGLE_KEY" & df_mode == "MULT_DF") {
1431
+        .sharing_multdf_single_key(
1432
+            dfs = dots, key = group_key,
1433
+            minimal = minimal, is_count = is_count,
1434
+            rel_sharing = relative_is_sharing,
1435
+            keep_genomic_coord = keep_genomic_coord,
1436
+            venn = table_for_venn
1437
+        )
1438
+    } else if (key_mode == "MULT_KEY" & df_mode == "SINGLE_DF") {
1439
+        .sharing_singledf_mult_key(
1440
+            df = dots[[1]],
1441
+            keys = group_keys,
1442
+            minimal = minimal,
1443
+            is_count = is_count,
1444
+            rel_sharing = relative_is_sharing,
1445
+            keep_genomic_coord = keep_genomic_coord,
1446
+            venn = table_for_venn
1447
+        )
1448
+    } else {
1449
+        .sharing_multdf_mult_key(
1450
+            dfs = dots, keys = group_keys,
1451
+            minimal = minimal,
1452
+            is_count = is_count,
1453
+            rel_sharing = relative_is_sharing,
1454
+            keep_genomic_coord = keep_genomic_coord,
1455
+            venn = table_for_venn
1456
+        )
1281 1457
     }
1282
-
1283
-    if (!is_count) {
1284
-        return(abs_shared_df)
1458
+    if (getOption("ISAnalytics.verbose") == TRUE) {
1459
+        rlang::inform("Done!")
1285 1460
     }
1286
-
1287
-    return(list(is_count = is_n, sharing = abs_shared_df))
1461
+    return(sharing)
1288 1462
 }
1289 1463
 
1290 1464
 
... ...
@@ -233,7 +233,7 @@ remove_collisions <- function(x,
233 233
     final_matr <- fixed_collisions %>%
234 234
         dplyr::bind_rows(splitted_df$non_collisions) %>%
235 235
         dplyr::select(dplyr::all_of(colnames(pre_process)))
236
-    if (getOption("ISAnalytics.reports") == TRUE) {
236
+    if (getOption("ISAnalytics.reports") == TRUE & !is.null(report_path)) {
237 237
         input_summary <- .summary_input(x, quant_cols)
238 238
         missing_smpl <- if (!is.null(missing_ind)) {
239 239
             x[missing_ind, ] %>%
... ...
@@ -259,7 +259,10 @@ remove_collisions <- function(x,
259 259
             group_key = c(
260 260
                 "ProjectID",
261 261
                 "SubjectID"
262
-            )
262
+            ), n_comp = 2,
263
+            is_count = FALSE,
264
+            minimal = FALSE,
265
+            include_self_comp = TRUE
263 266
         )
264 267
         coll_info <- list(
265 268
             coll_n = splitted_df$collisions %>%
... ...
@@ -285,7 +288,10 @@ remove_collisions <- function(x,
285 288
             group_key = c(
286 289
                 "ProjectID",
287 290
                 "SubjectID"
288
-            )
291
+            ), n_comp = 2,
292
+            is_count = FALSE,
293
+            minimal = FALSE,
294
+            include_self_comp = TRUE
289 295
         )
290 296
         summary_tbl <- .summary_table(
291 297
             before = joined, after = post_joined,
... ...
@@ -345,7 +351,7 @@ remove_collisions <- function(x,
345 351
 #' \code{vignette("collision_removal", package = "ISAnalytics")}
346 352
 #'
347 353
 #' @param sc_matrix The sequence count matrix already processed for collisions
348
-#' via `remove_collisions`
354
+#' via `remove_collisions()`
349 355
 #' @param other_matrices A named list of matrices to re-align. Names in the list
350 356
 #' must be quantification types (\code{quantification_types()}) except
351 357
 #' "seqCount".
... ...
@@ -4160,3 +4160,578 @@
4160 4160
         }
4161 4161
     )
4162 4162
 }
4163
+
4164
+#---- USED IN : is_sharing ----
4165
+## Internal to find absolute shared number of is between an arbitrary
4166
+## number of groups
4167
+## Dots are group names in sharing df (actually a data.table)
4168
+.find_in_common <- function(..., lookup_tbl, keep_genomic_coord) {
4169
+    groups <- as.list(...)
4170
+    in_common <- purrr::pmap(groups, function(...) {
4171
+        grps <- list(...)
4172
+        filt <- lookup_tbl[group_id %chin% grps, ]
4173
+        common <- purrr::reduce(filt$is, function(l, r) {
4174
+            l[r, on = mandatory_IS_vars(), nomatch = 0]
4175
+        })
4176
+        common
4177
+    })
4178
+    if (!keep_genomic_coord) {
4179
+        return(purrr::map(in_common, ~ nrow(.x)))
4180
+    }
4181
+    return(list(purrr::map(in_common, ~ nrow(.x)), in_common))
4182
+}
4183
+
4184
+## Internal, to use on each row of the combinations df.
4185
+## Expands the row with all its permutations keeping same absolute shared is
4186
+## and counts if present
4187
+## - ... : row passed as a list
4188
+## - g_names: names of the groups (g1, g2...)
4189
+## - counts: are counts present? TRUE/FALSE
4190
+.sh_row_permut <- function(..., g_names, counts) {
4191
+    og_row <- list(...)
4192
+    coord <- if ("is_coord" %in% names(og_row)) {
4193
+        og_row$is_coord <- list(og_row$is_coord)
4194
+        TRUE
4195
+    } else {
4196
+        FALSE
4197
+    }
4198
+    ids <- unlist(og_row[g_names])
4199
+    og_row <- data.table::setDT(og_row)
4200
+    # If row of all equal elements no need for permutations
4201
+    if (length(unique(ids)) == 1) {
4202
+        return(og_row)
4203
+    }
4204
+    # If elements are different
4205
+    shared_is <- og_row$shared
4206
+    if (counts) {
4207
+        count_union <- og_row$count_union
4208
+    }
4209
+    perm <- gtools::permutations(
4210
+        n = length(g_names),
4211
+        r = length(g_names),
4212
+        v = g_names,
4213
+        set = TRUE,
4214
+        repeats.allowed = FALSE
4215
+    )
4216
+    colnames(perm) <- g_names
4217
+    perm <- data.table::setDT(as.data.frame(perm))
4218
+    perm[, shared := shared_is]
4219
+    if (coord) {
4220
+        perm[, is_coord := list(og_row$is_coord)]
4221
+    }
4222
+    if (counts) {
4223
+        for (g in g_names) {
4224
+            count_col <- paste0("count_", g)
4225
+            perm[, c(count_col) := paste0("count_", get(g))]
4226
+        }
4227
+        perm[, count_union := count_union]
4228
+    }
4229
+    sub_with_val <- function(val) {
4230
+        unlist(purrr::map(val, ~ og_row[[.x]]))
4231
+    }
4232
+    for (g in g_names) {
4233
+        perm[, c(g) := .(sub_with_val(get(g)))]
4234
+        if (counts) {
4235
+            count_col <- paste0("count_", g)
4236
+            perm[, c(count_col) := .(sub_with_val(get(count_col)))]
4237
+        }
4238
+    }
4239
+    return(perm)
4240
+}
4241
+
4242
+# Counts the number of integrations in the union of all groups of a row
4243
+.count_group_union <- function(..., col_groups, lookup_tbl) {
4244
+    dots <- list(...)
4245
+    if ("is_coord" %in% names(dots)) {
4246
+        dots$is_coord <- list(dots$is_coord)
4247
+    }
4248
+    row <- data.table::setDT(dots)
4249
+    groups_in_row <- row[1, ..col_groups]
4250
+    sub_lookup <- lookup_tbl[group_id %chin% groups_in_row]
4251
+    count_union <- nrow(purrr::reduce(sub_lookup$is, data.table::funion))
4252
+    row[, count_union := count_union]
4253
+    return(row)
4254
+}
4255
+
4256
+# Obtains lookup table for groups
4257
+.sh_obtain_lookup <- function(key, df) {
4258
+    temp <- df %>%
4259
+        dplyr::select(dplyr::all_of(c(key, mandatory_IS_vars()))) %>%
4260
+        dplyr::group_by(dplyr::across({{ key }})) %>%
4261
+        dplyr::distinct(dplyr::across(dplyr::all_of(mandatory_IS_vars())),
4262
+            .keep_all = TRUE
4263
+        ) %>%
4264
+        tidyr::unite(col = "group_id", dplyr::all_of(key))
4265
+    temp <- data.table::setDT(temp)
4266
+    temp <- temp[, .(is = list(.SD)), by = "group_id"]
4267
+    return(temp)
4268
+}
4269
+
4270
+# Obtains truth table for venn diagrams. To apply to each row with pmap.
4271
+# - groups : g1, g2, g3...
4272
+# - lookup : either list (mult/mult) or df
4273
+.sh_truth_tbl_venn <- function(..., lookup, groups) {
4274
+    row <- list(...)
4275
+    if (!is.data.frame(lookup)) {
4276
+        # If lookup is a list and not single df
4277
+        retrieve_is <- function(group_name) {
4278
+            label <- row[[group_name]] # a string
4279
+            retrieved <- (lookup[[group_name]])[group_id == label]
4280
+            retrieved[, group_id := paste0(group_id, "(", group_name, ")")]
4281
+            retrieved <- retrieved %>%
4282
+                tidyr::unnest(.data$is) %>%
4283
+                tidyr::unite(
4284
+                    col = "int_id",
4285
+                    dplyr::all_of(mandatory_IS_vars())
4286
+                ) %>%
4287
+                data.table::setDT()
4288
+        }
4289
+        retrieved_iss <- purrr::map(groups, retrieve_is)
4290
+        retrieved_iss <- data.table::rbindlist(retrieved_iss)
4291
+        retrieved_iss[, observed := TRUE]
4292
+        truth_tbl <- data.table::dcast(retrieved_iss, int_id ~ group_id,
4293
+            value.var = "observed",
4294
+            fill = FALSE
4295
+        )
4296
+        return(truth_tbl)
4297
+    } else {
4298
+        labels <- unlist(row[groups])
4299
+        retrieved <- lookup[group_id %in% labels]
4300
+        retrieved <- retrieved %>%
4301
+            tidyr::unnest(.data$is) %>%
4302
+            tidyr::unite(
4303
+                col = "int_id",
4304
+                dplyr::all_of(mandatory_IS_vars())
4305
+            ) %>%
4306
+            data.table::setDT()
4307
+        retrieved[, observed := TRUE]
4308
+        truth_tbl <- data.table::dcast(retrieved, int_id ~ group_id,
4309
+            value.var = "observed",
4310
+            fill = FALSE
4311
+        )
4312
+        return(truth_tbl)
4313
+    }
4314
+}
4315
+
4316
+## Computes sharing table for single input df and single key
4317
+## -key: name of the columns to do a group by
4318
+## -minimal: true or false, if true computes ONLY combinations and not
4319
+## all permutations
4320
+## -n_comp: number of comparisons (2-way sharing, 3-way sharing...).
4321
+## Should be less or equal to the distinct number of groups
4322
+## -is_count: keep the counts in the table?
4323
+## -rel_sharing: compute relative sharing? (on g_i & on_union)
4324
+## -include_self_comp: include rows with the same group for each comparison?
4325
+## useful for heatmaps. Sharing for this rows is always 100%
4326
+.sharing_singledf_single_key <- function(df, key, minimal, n_comp,
4327
+    is_count, rel_sharing,
4328
+    include_self_comp,
4329
+    keep_genomic_coord,
4330
+    venn) {
4331
+    temp <- .sh_obtain_lookup(key, df)
4332
+    # Check n and k
4333
+    if (nrow(temp) < n_comp) {
4334
+        n_comp <- nrow(temp)
4335
+        if (getOption("ISAnalytics.verbose") == TRUE) {
4336
+            warn_msg <- c("Number of requested comparisons too big",
4337
+                i = paste(
4338
+                    "The number of requested comparisons",
4339
+                    "is greater than the number of groups.",
4340
+                    "Reducing comparisons to the biggest value",
4341
+                    "allowed"
4342
+                )
4343
+            )
4344
+            rlang::inform(warn_msg)
4345
+        }
4346
+    }
4347
+    if (getOption("ISAnalytics.verbose") == TRUE) {
4348
+        rlang::inform("Calculating combinations...")
4349
+    }
4350
+    group_comb <- gtools::combinations(
4351
+        n = length(temp$group_id),
4352
+        r = n_comp,
4353
+        v = temp$group_id,
4354
+        set = TRUE,
4355
+        repeats.allowed = FALSE
4356
+    )
4357
+    cols <- paste0("g", seq_len(ncol(group_comb)))
4358
+    colnames(group_comb) <- cols
4359
+    group_comb <- data.table::setDT(as.data.frame(group_comb))
4360
+    is_counts <- temp %>%
4361
+        dplyr::mutate(count = purrr::map_int(is, ~ nrow(.x))) %>%
4362
+        dplyr::select(-.data$is)
4363
+    sharing_df <- if (!keep_genomic_coord) {
4364
+        group_comb[, shared := .find_in_common(.SD,
4365
+            lookup_tbl = temp,
4366
+            keep_genomic_coord = keep_genomic_coord
4367
+        ),
4368
+        .SDcols = cols
4369
+        ]
4370
+    } else {
4371
+        group_comb[, c("shared", "is_coord") := .find_in_common(.SD,
4372
+            lookup_tbl = temp,
4373
+            keep_genomic_coord = keep_genomic_coord
4374
+        ),
4375
+        .SDcols = cols
4376
+        ]
4377
+    }
4378
+
4379
+    if (include_self_comp) {
4380
+        ## Calculate groups with equal components
4381
+        if (getOption("ISAnalytics.verbose") == TRUE) {
4382
+            rlang::inform("Calculating self groups (requested)...")
4383
+        }
4384
+        self_rows <- purrr::map2_df(
4385
+            is_counts$group_id, is_counts$count,
4386
+            function(x, y) {
4387
+                row_ls <- as.list(setNames(rep_len(
4388
+                    x,
4389
+                    length.out = n_comp
4390
+                ), nm = cols))
4391
+                row <- data.table::setDT(row_ls)
4392
+                row[, shared := y]
4393
+                if (keep_genomic_coord) {
4394
+                    row[, is_coord := list(temp[group_id == x]$is)]
4395
+                }
4396
+                return(row)
4397
+            }
4398
+        )
4399
+        sharing_df <- data.table::rbindlist(list(
4400
+            self_rows,
4401
+            sharing_df
4402
+        ))
4403
+    }
4404
+    group_cols <- colnames(sharing_df)[!colnames(sharing_df) %in% c(
4405
+        "shared",
4406
+        "is_coord"
4407
+    )]
4408
+    if (is_count || rel_sharing) {
4409
+        ## Add counts -groups
4410
+        for (col in group_cols) {
4411
+            count_col_name <- paste0("count_", col)
4412
+            sharing_df <- sharing_df[
4413
+                dplyr::rename(
4414
+                    is_counts,
4415
+                    !!col := .data$group_id,
4416
+                    !!count_col_name := .data$count
4417
+                ),
4418
+                on = col,
4419
+                nomatch = 0
4420
+            ]
4421
+        }
4422
+        ## Add counts -union
4423
+        sharing_df <- purrr::pmap_df(sharing_df, .count_group_union,
4424
+            col_groups = group_cols,
4425
+            lookup_tbl = temp
4426
+        )
4427
+    }
4428
+    if (!minimal) {
4429
+        if (getOption("ISAnalytics.verbose") == TRUE) {
4430
+            rlang::inform("Calculating permutations (requested)...")
4431
+        }
4432
+        sharing_df <- purrr::pmap_df(sharing_df, .sh_row_permut,
4433
+            g_names = group_cols,
4434
+            counts = is_count || rel_sharing
4435
+        )
4436
+    }
4437
+    if (rel_sharing) {
4438
+        # for groups
4439
+        for (col in group_cols) {
4440
+            rel_col_name <- paste0("on_", col)
4441
+            count_col_name <- paste0("count_", col)
4442
+            sharing_df <- sharing_df %>%
4443
+                dplyr::mutate(!!rel_col_name := (.data$shared /
4444
+                    .data[[count_col_name]]) * 100)
4445
+        }
4446
+        # for union
4447
+        sharing_df <- sharing_df %>%
4448
+            dplyr::mutate(on_union = (.data$shared /
4449
+                .data$count_union) * 100)
4450
+    }
4451
+    if (!is_count) {
4452
+        sharing_df <- sharing_df %>%
4453
+            dplyr::select(!dplyr::contains("count_"))
4454
+    }
4455
+    if (venn) {
4456
+        sharing_df <- sharing_df %>%
4457
+            dplyr::mutate(truth_tbl_venn = purrr::pmap(.,
4458
+                .sh_truth_tbl_venn,
4459
+                lookup = temp,
4460
+                groups = group_cols
4461
+            ))
4462
+    }
4463
+    sharing_df
4464
+}
4465
+
4466
+## Computes sharing table for single input df and multiple keys
4467
+## -keys: name of the columns to do a group by (it is a named list)
4468
+## -minimal: true or false, if true computes ONLY combinations and not
4469
+## all permutations
4470
+## -is_count: keep the counts in the table?
4471
+## -rel_sharing: compute relative sharing? (on g_i & on_union)
4472
+.sharing_singledf_mult_key <- function(df, keys,
4473
+    minimal, is_count,
4474
+    rel_sharing, keep_genomic_coord,
4475
+    venn) {
4476
+    g_names <- names(keys)
4477
+    ## Obtain lookup table for each key
4478
+    lookup <- purrr::map(keys, ~ .sh_obtain_lookup(.x, df))
4479
+    group_labels <- purrr::map(lookup, ~ .x$group_id)
4480
+    unique_keys <- names(keys[!duplicated(keys)])
4481
+    lookup <- data.table::rbindlist(lookup[unique_keys])
4482
+    ## Obtain combinations
4483
+    combin <- data.table::setDT(purrr::cross_df(group_labels))
4484
+    sharing_df <- if (!keep_genomic_coord) {
4485
+        combin[, shared := .find_in_common(.SD,
4486
+            lookup_tbl = lookup,
4487
+            keep_genomic_coord = keep_genomic_coord
4488
+        ),
4489
+        .SDcols = g_names
4490
+        ]
4491
+    } else {
4492
+        combin[, c("shared", "is_coord") := .find_in_common(.SD,
4493
+            lookup_tbl = lookup,
4494
+            keep_genomic_coord = keep_genomic_coord
4495
+        ),
4496
+        .SDcols = g_names
4497
+        ]
4498
+    }
4499
+
4500
+    if (is_count || rel_sharing) {
4501
+        is_counts <- lookup %>%
4502
+            dplyr::mutate(count = purrr::map_int(is, ~ nrow(.x))) %>%
4503
+            dplyr::select(-.data$is)
4504
+        ## Add counts -groups
4505
+        for (col in g_names) {
4506
+            count_col_name <- paste0("count_", col)
4507
+            sharing_df <- sharing_df[
4508
+                dplyr::rename(
4509
+                    is_counts,
4510
+                    !!col := .data$group_id,
4511
+                    !!count_col_name := .data$count
4512
+                ),
4513
+                on = col,
4514
+                nomatch = 0
4515
+            ]
4516
+        }
4517
+        ## Add counts -union
4518
+        sharing_df <- purrr::pmap_df(sharing_df, .count_group_union,
4519
+            col_groups = g_names,
4520
+            lookup_tbl = lookup
4521
+        )
4522
+    }
4523
+    if (!minimal) {
4524
+        if (getOption("ISAnalytics.verbose") == TRUE) {
4525
+            rlang::inform("Calculating permutations (requested)...")
4526
+        }
4527
+        sharing_df <- purrr::pmap_df(sharing_df, .sh_row_permut,
4528
+            g_names = g_names,
4529
+            counts = is_count || rel_sharing
4530
+        )
4531
+    }
4532
+    if (rel_sharing) {
4533
+        # for groups
4534
+        for (col in g_names) {
4535
+            rel_col_name <- paste0("on_", col)
4536
+            count_col_name <- paste0("count_", col)
4537
+            sharing_df <- sharing_df %>%
4538
+                dplyr::mutate(!!rel_col_name := (.data$shared /
4539
+                    .data[[count_col_name]]) * 100)
4540
+        }
4541
+        # for union
4542
+        sharing_df <- sharing_df %>%
4543
+            dplyr::mutate(on_union = (.data$shared /
4544
+                .data$count_union) * 100)
4545
+    }
4546
+    if (!is_count) {
4547
+        sharing_df <- sharing_df %>%
4548
+            dplyr::select(!dplyr::contains("count_"))
4549
+    }
4550
+    if (venn) {
4551
+        sharing_df <- sharing_df %>%
4552
+            dplyr::mutate(truth_tbl_venn = purrr::pmap(.,
4553
+                .sh_truth_tbl_venn,
4554
+                lookup = lookup,
4555
+                groups = g_names
4556
+            ))
4557
+    }
4558
+    sharing_df
4559
+}
4560
+
4561
+## Computes sharing table for mult input dfs and single key
4562
+.sharing_multdf_single_key <- function(dfs, key, minimal,
4563
+    is_count, rel_sharing, keep_genomic_coord, venn) {
4564
+    if (is.null(names(dfs))) {
4565
+        g_names <- paste0("g", seq_len(length(dfs)))
4566
+        names(dfs) <- g_names
4567
+    }
4568
+    lookups <- purrr::map(dfs, ~ .sh_obtain_lookup(key, .x))
4569
+    group_labels <- purrr::map(lookups, ~ .x$group_id)
4570
+    lookup <- data.table::rbindlist(lookups)
4571
+    ## Obtain combinations
4572
+    combin <- data.table::setDT(purrr::cross_df(group_labels))
4573
+    sharing_df <- if (!keep_genomic_coord) {
4574
+        combin[, shared := .find_in_common(.SD,
4575
+            lookup_tbl = lookup,
4576
+            keep_genomic_coord = keep_genomic_coord
4577
+        ),
4578
+        .SDcols = names(dfs)
4579
+        ]
4580
+    } else {
4581
+        combin[, c("shared", "is_coord") := .find_in_common(.SD,
4582
+            lookup_tbl = lookup,
4583
+            keep_genomic_coord = keep_genomic_coord
4584
+        ),
4585
+        .SDcols = names(dfs)
4586
+        ]
4587
+    }
4588
+
4589
+    if (is_count || rel_sharing) {
4590
+        is_counts <- purrr::map2(lookups, names(lookups), function(x, y) {
4591
+            count_col_name <- paste0("count_", y)
4592
+            x %>%
4593
+                dplyr::mutate(!!count_col_name := purrr::map_int(
4594
+                    is, ~ nrow(.x)
4595
+                )) %>%
4596
+                dplyr::select(-.data$is) %>%
4597
+                dplyr::rename(!!y := .data$group_id)
4598
+        })
4599
+        ## Add counts -groups
4600
+        for (col in names(dfs)) {
4601
+            cnt_tbl <- is_counts[[col]]
4602
+            sharing_df <- sharing_df[
4603
+                cnt_tbl,
4604
+                on = col,
4605
+                nomatch = 0
4606
+            ]
4607
+        }
4608
+        ## Add counts -union
4609
+        sharing_df <- purrr::pmap_df(sharing_df, .count_group_union,
4610
+            col_groups = names(dfs),
4611
+            lookup_tbl = lookup
4612
+        )
4613
+    }
4614
+    if (!minimal) {
4615
+        if (getOption("ISAnalytics.verbose") == TRUE) {
4616
+            rlang::inform("Calculating permutations (requested)...")
4617
+        }
4618
+        sharing_df <- purrr::pmap_df(sharing_df, .sh_row_permut,
4619
+            g_names = names(dfs),
4620
+            counts = is_count || rel_sharing
4621
+        )
4622
+    }
4623
+    if (rel_sharing) {
4624
+        # for groups
4625
+        for (col in names(dfs)) {
4626
+            rel_col_name <- paste0("on_", col)
4627
+            count_col_name <- paste0("count_", col)
4628
+            sharing_df <- sharing_df %>%
4629
+                dplyr::mutate(!!rel_col_name := (.data$shared /
4630
+                    .data[[count_col_name]]) * 100)
4631
+        }
4632
+        # for union
4633
+        sharing_df <- sharing_df %>%
4634
+            dplyr::mutate(on_union = (.data$shared /
4635
+                .data$count_union) * 100)
4636
+    }
4637
+    if (!is_count) {
4638
+        sharing_df <- sharing_df %>%
4639
+            dplyr::select(!dplyr::contains("count_"))
4640
+    }
4641
+    if (venn) {
4642
+        sharing_df <- sharing_df %>%
4643
+            dplyr::mutate(truth_tbl_venn = purrr::pmap(.,
4644
+                .sh_truth_tbl_venn,
4645
+                lookup = lookups,
4646
+                groups = names(dfs)
4647
+            ))
4648
+    }
4649
+    sharing_df
4650
+}
4651
+
4652
+## Computes sharing table for mult input dfs and mult key
4653
+.sharing_multdf_mult_key <- function(dfs, keys, minimal,
4654
+    is_count, rel_sharing, keep_genomic_coord, venn) {
4655
+    lookups <- purrr::map2(dfs, keys, ~ .sh_obtain_lookup(.y, .x)) %>%
4656
+        purrr::set_names(names(keys))
4657
+    group_labels <- purrr::map(lookups, ~ .x$group_id)
4658
+    lookup <- data.table::rbindlist(lookups)
4659
+    ## Obtain combinations
4660
+    combin <- data.table::setDT(purrr::cross_df(group_labels))
4661
+    sharing_df <- if (!keep_genomic_coord) {
4662
+        combin[, shared := .find_in_common(.SD,
4663
+            lookup_tbl = lookup,
4664
+            keep_genomic_coord = keep_genomic_coord
4665
+        ),
4666
+        .SDcols = names(keys)
4667
+        ]
4668
+    } else {
4669
+        combin[, c("shared", "is_coord") := .find_in_common(.SD,
4670
+            lookup_tbl = lookup,
4671
+            keep_genomic_coord = keep_genomic_coord
4672
+        ),
4673
+        .SDcols = names(keys)
4674
+        ]
4675
+    }
4676
+    if (is_count || rel_sharing) {
4677
+        is_counts <- purrr::map2(lookups, names(lookups), function(x, y) {
4678
+            count_col_name <- paste0("count_", y)
4679
+            x %>%
4680
+                dplyr::mutate(!!count_col_name := purrr::map_int(
4681
+                    is, ~ nrow(.x)
4682
+                )) %>%
4683
+                dplyr::select(-.data$is) %>%
4684
+                dplyr::rename(!!y := .data$group_id)
4685
+        })
4686
+        ## Add counts -groups
4687
+        for (col in names(keys)) {
4688
+            cnt_tbl <- is_counts[[col]]
4689
+            sharing_df <- sharing_df[
4690
+                cnt_tbl,
4691
+                on = col,
4692
+                nomatch = 0
4693
+            ]
4694
+        }
4695
+        ## Add counts -union
4696
+        sharing_df <- purrr::pmap_df(sharing_df, .count_group_union,
4697
+            col_groups = names(keys),
4698
+            lookup_tbl = lookup
4699
+        )
4700
+    }
4701
+    if (!minimal) {
4702
+        if (getOption("ISAnalytics.verbose") == TRUE) {
4703
+            rlang::inform("Calculating permutations (requested)...")
4704
+        }
4705
+        sharing_df <- purrr::pmap_df(sharing_df, .sh_row_permut,
4706
+            g_names = names(keys),
4707
+            counts = is_count || rel_sharing
4708
+        )
4709
+    }
4710
+    if (rel_sharing) {
4711
+        # for groups
4712
+        for (col in names(keys)) {
4713
+            rel_col_name <- paste0("on_", col)
4714
+            count_col_name <- paste0("count_", col)
4715
+            sharing_df <- sharing_df %>%
4716
+                dplyr::mutate(!!rel_col_name := (.data$shared /
4717
+                    .data[[count_col_name]]) * 100)
4718
+        }
4719
+        # for union
4720
+        sharing_df <- sharing_df %>%
4721
+            dplyr::mutate(on_union = (.data$shared /
4722
+                .data$count_union) * 100)
4723
+    }
4724
+    if (!is_count) {
4725
+        sharing_df <- sharing_df %>%
4726
+            dplyr::select(!dplyr::contains("count_"))
4727
+    }
4728
+    if (venn) {
4729
+        sharing_df <- sharing_df %>%
4730
+            dplyr::mutate(truth_tbl_venn = purrr::pmap(.,
4731
+                .sh_truth_tbl_venn,
4732
+                lookup = lookups,
4733
+                groups = names(keys)
4734
+            ))
4735
+    }
4736
+    sharing_df
4737
+}
... ...
@@ -279,6 +279,7 @@
279 279
 # - outliers_by_pool_fragments
280 280
 # - sample_statistics
281 281
 # - compute_near_integrations
282
+# - cumulative_is
282 283
 .missing_user_cols_error <- function(missing_cols) {
283 284
     c(paste(
284 285
         "Some or all of the input column names were not found",
... ...
@@ -569,3 +570,44 @@
569 570
         )
570 571
     )
571 572
 }
573
+
574
+# USED IN:
575
+# - is_sharing
576
+.non_df_input_err <- function() {
577
+    c("Non-data frame found",
578
+        x = paste("Input list contains objects that are not data.frame")
579
+    )
580
+}
581
+
582
+# USED IN:
583
+# - is_sharing
584
+.unnamed_keys_warn <- function() {
585
+    c("Unnamed keys",
586
+        i = paste("Group keys should be named, using default names")
587
+    )
588
+}
589
+
590
+# USED IN:
591
+# - is_sharing
592
+.keys_not_char_err <- function() {
593
+    c("Keys need to be character vectors")
594
+}
595
+
596
+# USED IN:
597
+# - is_sharing
598
+.no_data_supp <- function() {
599
+    c("No data supplied",
600
+        x = paste("You must provide at least 1 data frame to the function")
601
+    )
602
+}
603
+
604
+# USED IN:
605
+# - cumulative_is
606
+.only_zero_tp <- function() {
607
+    c("Data frame empty after filtering",
608
+        i = paste(
609
+            "Data frame has 0 rows after filtering",
610
+            "out zero-timepoints. Nothing to do."
611
+        )
612
+    )
613
+}
... ...
@@ -719,6 +719,7 @@ integration_alluvial_plot <- function(x,
719 719
 #' @param perc_symbol Logical. Show percentage symbol in the quantification
720 720
 #' column?
721 721
 #'
722
+#' @family Plotting functions
722 723
 #' @return A tableGrob object
723 724
 #' @export
724 725
 #'
... ...
@@ -889,6 +890,7 @@ top_abund_tableGrob <- function(df,
889 890
 #' [plotly](https://plotly.com/r/getting-started/) is required for this
890 891
 #' functionality. Returns the heatmaps as interactive HTML widgets.
891 892
 #'
893
+#' @family Plotting functions
892 894
 #' @return A list of plots or widgets
893 895
 #' @seealso \link{is_sharing}
894 896
 #' @export
... ...
@@ -908,14 +910,17 @@ top_abund_tableGrob <- function(df,
908 910
 #'     association_file = association_file,
909 911
 #'     value_cols = c("seqCount", "fragmentEstimate")
910 912
 #' )
911
-#' sharing <- is_sharing(aggreg)
912
-#' sharing_heatmaps <- sharing_heatmap(sharing_df = sharing$sharing)
913
+#' sharing <- is_sharing(aggreg,
914
+#'     minimal = FALSE,
915
+#'     include_self_comp = TRUE
916
+#' )
917
+#' sharing_heatmaps <- sharing_heatmap(sharing_df = sharing)
913 918
 #' sharing_heatmaps$absolute
914 919
 #' sharing_heatmaps$on_g1
915 920
 #' sharing_heatmaps$on_union
916 921
 sharing_heatmap <- function(sharing_df,
917
-    show_on_x = "group1",
918
-    show_on_y = "group2",
922
+    show_on_x = "g1",
923
+    show_on_y = "g2",
919 924
     absolute_sharing_col = "shared",
920 925
     title_annot = NULL,
921 926
     plot_relative_sharing = TRUE,
... ...
@@ -1061,6 +1066,100 @@ sharing_heatmap <- function(sharing_df,
1061 1066
     return(result)
1062 1067
 }
1063 1068
 
1069
+
1070
+#' Produce tables to plot sharing venn or euler diagrams.
1071
+#'
1072
+#' @description \lifecycle{experimental}
1073
+#' This function processes a sharing data frame obtained via `is_sharing()`
1074
+#' with the option `table_for_venn = TRUE` to obtain a list of objects
1075
+#' that can be plotted as venn or euler diagrams.
1076
+#'
1077
+#' @details
1078
+#' The functions requires the package
1079
+#' [eulerr](https://jolars.github.io/eulerr/index.html). Each row of the
1080
+#' input data frame is representable as a venn/euler diagram. The function
1081
+#' allows to specify a range of row indexes to obtain a list of plottable
1082
+#' objects all at once, leave it to NULL to process all rows.
1083
+#'
1084
+#' To actually plot the data it is sufficient to call the function `plot()`
1085
+#' and specify optional customization arguments. See
1086
+#' [eulerr docs](https://jolars.github.io/eulerr/reference/plot.euler.html)
1087
+#' for more detail on this.
1088
+#'
1089
+#' @param sharing_df The sharing data frame
1090
+#' @param row_range Either `NULL` or a numeric vector of row indexes (e.g.
1091
+#' `c(1, 4, 5)` will produce tables only for rows 1, 4 and 5)
1092
+#' @param euler If `TRUE` will produce tables for euler diagrams, otherwise
1093
+#' will produce tables for venn diagrams
1094
+#'
1095
+#' @family Plotting functions
1096
+#'
1097
+#' @return A list of data frames
1098
+#' @export
1099
+#'
1100
+#' @examples
1101
+#' data("integration_matrices", package = "ISAnalytics")
1102
+#' data("association_file", package = "ISAnalytics")
1103
+#' aggreg <- aggregate_values_by_key(
1104
+#'     x = integration_matrices,
1105
+#'     association_file = association_file,
1106
+#'     value_cols = c("seqCount", "fragmentEstimate")
1107
+#' )
1108
+#' sharing <- is_sharing(aggreg, n_comp = 3, table_for_venn = TRUE)
1109
+#' venn_tbls <- sharing_venn(sharing, row_range = 1:3, euler = FALSE)
1110
+#' venn_tbls
1111
+#' plot(venn_tbls[[1]])
1112
+sharing_venn <- function(sharing_df,
1113
+    row_range = NULL,
1114
+    euler = TRUE) {
1115
+    if (!requireNamespace("eulerr", quietly = TRUE)) {
1116
+        rlang::abort(.missing_pkg_error("eulerr"))
1117
+    }
1118
+    stopifnot(is.data.frame(sharing_df))
1119
+    stopifnot(is.null(row_range) ||
1120
+        is.numeric(row_range) || is.integer(row_range))
1121
+    stopifnot(is.logical(euler))
1122
+    # Check row range
1123
+    if (is.null(row_range)) {
1124
+        row_range <- seq_len(nrow(sharing_df))
1125
+    }
1126
+    # Check truth table
1127
+    if (!"truth_tbl_venn" %in% colnames(sharing_df)) {
1128
+        no_truth_tbl_msg <- c("No truth table column",
1129
+            x = paste(
1130
+                "The column 'truth_tbl_venn'",
1131
+                "is required but seems to be missing"
1132
+            ),
1133
+            i = paste(
1134
+                "Did you forget to call",
1135
+                "`is_sharing(..., table_for_venn",
1136
+                "= TRUE)`?"
1137
+            )
1138
+        )
1139
+        rlang::abort(no_truth_tbl_msg)
1140
+    }
1141
+    # Filter data
1142
+    filtered_df <- sharing_df[row_range]
1143
+    if (nrow(filtered_df) == 0) {
1144
+        rlang::inform("Empty table, nothing to compute")
1145
+        return(NULL)
1146
+    }
1147
+    fixed_tbls <- if (euler) {
1148
+        purrr::map(filtered_df$truth_tbl_venn, function(x) {
1149
+            as_matrix <- as.matrix(x, rownames = "int_id")
1150
+            eul <- eulerr::euler(as_matrix)
1151
+            eul
1152
+        })
1153
+    } else {
1154
+        purrr::map(filtered_df$truth_tbl_venn, function(x) {
1155
+            as_matrix <- as.matrix(x, rownames = "int_id")
1156
+            eul <- eulerr::venn(as_matrix)
1157
+            eul
1158
+        })
1159
+    }
1160
+    fixed_tbls
1161
+}
1162
+
1064 1163
 #' Trace a circos plot of genomic densities.
1065 1164
 #'
1066 1165
 #' @description \lifecycle{experimental}
... ...
@@ -1109,6 +1208,7 @@ sharing_heatmap <- function(sharing_df,
1109 1208
 #' @importFrom fs is_dir dir_exists dir_create path path_ext_set path_ext
1110 1209
 #' @importFrom lubridate today
1111 1210
 #'
1211
+#' @family Plotting functions
1112 1212
 #' @return `NULL`
1113 1213
 #' @export
1114 1214
 #'
... ...
@@ -10,7 +10,7 @@
10 10
     list(
11 11
         collisions = list(
12 12
             template_name = "collision-report.Rmd",
13
-            required_pkgs = c("flexdashboard", "plotly", "DT", "ggvenn"),
13
+            required_pkgs = c("flexdashboard", "plotly", "DT", "eulerr"),
14 14
             def_filename = "collision_removal_report.html"
15 15
         ),
16 16
         vispa2_stats = list(
... ...
@@ -137,6 +137,19 @@ options("ISAnalytics.reports" = TRUE)
137 137
 Show more
138 138
 </summary>
139 139
 
140
+# ISAnalytics 1.3.5 (2021-09-21)
141
+
142
+## MAJOR CHANGES
143
+
144
+-   Reworked `is_sharing()` function, detailed usage in vignette
145
+    `vignette("sharing_analyses", package = "ISAnalytics")`
146
+
147
+## NEW
148
+
149
+-   New function `cumulative_is()`
150
+-   New function for plotting sharing as venn/euler diagrams
151
+    `sharing_venn()`
152
+
140 153
 # ISAnalytics 1.3.4 (2021-08 -03)
141 154
 
142 155
 ## FIXES/MINOR UPDATES
... ...
@@ -41,6 +41,8 @@ navbar:
41 41
         href: articles/how_to_import_functions.html
42 42
       - text: ISAnalytics report system
43 43
         href: articles/report_system.html
44
+      - text: Sharing analyses with ISAnalytics
45
+        href: articles/sharing_analyses.html
44 46
     release-v:
45 47
       icon: fas fa-code-branch
46 48
       text: RELEASE
... ...
@@ -79,6 +81,7 @@ reference:
79 81
   - sample_statistics
80 82
   - CIS_grubbs
81 83
   - cumulative_count_union
84
+  - cumulative_is
82 85
   - is_sharing
83 86
 - title: "HSC population estimate"
84 87
 - contents:
... ...
@@ -90,6 +93,7 @@ reference:
90 93
   - integration_alluvial_plot
91 94
   - top_abund_tableGrob
92 95
   - sharing_heatmap
96
+  - sharing_venn
93 97
   - circos_genomic_density
94 98
 - title: "Utility functions"
95 99
 - contents:
... ...
@@ -15,3 +15,5 @@ coverage:
15 15
 ignore:
16 16
   - "R/report-utilities.R"
17 17
   - "R/plotting-functions.R"
18
+  - "R/internal-msgs.R"
19
+  - "R/exported-vars.R"
... ...
@@ -79,18 +79,7 @@ This page contains summary information regarding the input matrix as is
79 79
 Additional info {data-orientation=rows}
80 80
 ===============================================================================
81 81
 ```{r echo=FALSE}
82
-.venn_diag <- function(info) {
83
-  all_camps <- union(info$MATRIX, info$AF)
84
-  info_df <- tibble::tibble(Sample = all_camps)
85
-  info_df <- info_df %>% 
86
-    dplyr::mutate(MATRIX = .data$Sample %in% info$MATRIX,
87
-                  AF = .data$Sample %in% info$AF)
88
-    venn_plot <- ggvenn::ggvenn(data = info_df, c("MATRIX", "AF"))
89
-    plotly::ggplotly(p = venn_plot, width = 4, height = 4) %>%
90
-        plotly::layout(xaxis = list(autorange = TRUE),
91
-               yaxis = list(autorange = TRUE))
92
-}
93
-venn_diag <- .venn_diag(samples_info)
82
+venn_diag <- eulerr::venn(samples_info)
94 83
 ```
95 84
 
96 85
 Row1 {data-height=500}
... ...
@@ -98,7 +87,7 @@ Row1 {data-height=500}
98 87
 ### Overview of samples contained in matrix and association file
99 88
 <!-- Venn diagram for shared samples between AF and matrix  -->
100 89
 ```{r}
101
-venn_diag
90
+plot(venn_diag, fills = list(fill = c("gold", "navyblue"), alpha = 0.5))
102 91
 ```
103 92
 
104 93
 ### About this page
... ...
@@ -214,7 +203,7 @@ Row4 {data-height=800 .tabset .tabset-fade}
214 203
 -------------------------------------------------------------------------------
215 204
 <!-- Sharing heatmaps -->
216 205
 ```{r echo=FALSE}
217
-heatmaps <- ISAnalytics::sharing_heatmap(pre_sharing$sharing, interactive = TRUE)
206
+heatmaps <- ISAnalytics::sharing_heatmap(pre_sharing, interactive = TRUE)
218 207
 ```
219 208
 
220 209
 ### IS sharing: absolute values
... ...
@@ -377,7 +366,7 @@ Row5 {data-height=800 .tabset .tabset-fade}
377 366
 -------------------------------------------------------------------------------
378 367
 <!-- Sharing heatmaps -->
379 368
 ```{r echo=FALSE}
380
-heatmaps_post <- ISAnalytics::sharing_heatmap(post_sharing$sharing, interactive = TRUE)
369
+heatmaps_post <- ISAnalytics::sharing_heatmap(post_sharing, interactive = TRUE)
381 370
 ```
382 371
 
383 372
 ### IS sharing: absolute values
... ...
@@ -78,6 +78,7 @@ Other Analysis functions:
78 78
 \code{\link{comparison_matrix}()},
79 79
 \code{\link{compute_abundance}()},
80 80
 \code{\link{cumulative_count_union}()},
81
+\code{\link{cumulative_is}()},
81 82
 \code{\link{is_sharing}()},
82 83
 \code{\link{sample_statistics}()},
83 84
 \code{\link{separate_quant_matrices}()},
... ...
@@ -81,7 +81,7 @@ For more details on how this files were generated use the help
81 81
 
82 82
 The default values are included in this package and
83 83
 it can be accessed by doing:\if{html}{\out{<div class="r">}}\preformatted{head(known_clinical_oncogenes())
84
-}\if{html}{\out{</div>}}\preformatted{## # A tibble: 5 x 2
84
+}\if{html}{\out{</div>}}\preformatted{## # A tibble: 5 × 2
85 85
 ##   GeneName KnownClonalExpansion
86 86
 ##   <chr>    <lgl>               
87 87
 ## 1 MECOM    TRUE                
... ...
@@ -94,7 +94,7 @@ it can be accessed by doing:\if{html}{\out{<div class="r">}}\preformatted{head(k
94 94
 If the user wants to change this parameter the input data frame must
95 95
 preserve the column structure. The same goes for the \code{suspicious_genes}
96 96
 parameter (DOIReference column is optional):\if{html}{\out{<div class="r">}}\preformatted{head(clinical_relevant_suspicious_genes())
97
-}\if{html}{\out{</div>}}\preformatted{## # A tibble: 6 x 3
97
+}\if{html}{\out{</div>}}\preformatted{## # A tibble: 6 × 3
98 98
 ##   GeneName ClinicalRelevance DOIReference                                
99 99
 ##   <chr>    <lgl>             <chr>                                       
100 100
 ## 1 DNMT3A   TRUE              https://doi.org/10.1182/blood-2018-01-829937
... ...
@@ -109,13 +109,17 @@ parameter (DOIReference column is optional):\if{html}{\out{<div class="r">}}\pre
109 109
 \examples{
110 110
 data("integration_matrices", package = "ISAnalytics")
111 111
 cis_plot <- CIS_volcano_plot(integration_matrices,
112
-    title_prefix = "PJ01"
112
+  title_prefix = "PJ01"
113 113
 )
114 114
 cis_plot
115 115
 }
116 116
 \seealso{
117 117
 Other Plotting functions: 
118 118
 \code{\link{HSC_population_plot}()},
119
-\code{\link{integration_alluvial_plot}()}
119
+\code{\link{circos_genomic_density}()},
120
+\code{\link{integration_alluvial_plot}()},
121
+\code{\link{sharing_heatmap}()},
122
+\code{\link{sharing_venn}()},
123
+\code{\link{top_abund_tableGrob}()}
120 124
 }
121 125
 \concept{Plotting functions}
... ...
@@ -33,18 +33,18 @@ Plot of the estimated HSC population size for each patient.
33 33
 data("integration_matrices", package = "ISAnalytics")
34 34
 data("association_file", package = "ISAnalytics")
35 35
 aggreg <- aggregate_values_by_key(
36
-    x = integration_matrices,
37
-    association_file = association_file,
38
-    value_cols = c("seqCount", "fragmentEstimate")
36
+  x = integration_matrices,
37
+  association_file = association_file,
38
+  value_cols = c("seqCount", "fragmentEstimate")
39 39
 )
40 40
 aggreg_meta <- aggregate_metadata(
41
-    association_file = association_file
41
+  association_file = association_file
42 42
 )
43 43
 estimate <- HSC_population_size_estimate(
44
-    x = aggreg,
45
-    metadata = aggreg_meta,
46
-    stable_timepoints = c(90, 180, 360),
47
-    cell_type = "Other"
44
+  x = aggreg,
45
+  metadata = aggreg_meta,
46
+  stable_timepoints = c(90, 180, 360),
47
+  cell_type = "Other"
48 48
 )
49 49
 p <- HSC_population_plot(estimate, "PJ01")
50 50
 p
... ...
@@ -52,6 +52,10 @@ p
52 52
 \seealso{
53 53
 Other Plotting functions: 
54 54
 \code{\link{CIS_volcano_plot}()},
55
-\code{\link{integration_alluvial_plot}()}
55
+\code{\link{circos_genomic_density}()},
56
+\code{\link{integration_alluvial_plot}()},
57
+\code{\link{sharing_heatmap}()},
58
+\code{\link{sharing_venn}()},
59
+\code{\link{top_abund_tableGrob}()}
56 60
 }
57 61
 \concept{Plotting functions}
... ...
@@ -85,16 +85,16 @@ distinct non-zero time points.
85 85
 data("integration_matrices", package = "ISAnalytics")
86 86
 data("association_file", package = "ISAnalytics")
87 87
 aggreg <- aggregate_values_by_key(
88
-    x = integration_matrices,
89
-    association_file = association_file,
90
-    value_cols = c("seqCount", "fragmentEstimate")
88
+  x = integration_matrices,
89
+  association_file = association_file,
90
+  value_cols = c("seqCount", "fragmentEstimate")
91 91
 )
92 92
 aggreg_meta <- aggregate_metadata(association_file = association_file)
93 93
 estimate <- HSC_population_size_estimate(
94
-    x = aggreg,
95
-    metadata = aggreg_meta,
96
-    stable_timepoints = c(90, 180, 360),
97
-    cell_type = "Other"
94
+  x = aggreg,
95
+  metadata = aggreg_meta,
96
+  stable_timepoints = c(90, 180, 360),
97
+  cell_type = "Other"
98 98
 )
99 99
 }
100 100
 \concept{Population estimates}
... ...
@@ -71,6 +71,7 @@ and Annotation of Vector Integration Sites}
71 71
 \item \code{\link{CIS_grubbs}}
72 72
 \item \code{\link{cumulative_count_union}}
73 73
 \item \code{\link{is_sharing}}
74
+\item \code{\link{cumulative_is}}
74 75
 }
75 76
 \item HSC population size estimate:
76 77
 \itemize{
... ...
@@ -84,6 +85,7 @@ and Annotation of Vector Integration Sites}
84 85
 \item \code{\link{integration_alluvial_plot}}
85 86
 \item \code{\link{top_abund_tableGrob}}
86 87
 \item \code{\link{circos_genomic_density}}
88
+\item \code{\link{sharing_venn}}
87 89
 }
88 90
 \item Utility functions:
89 91
 \itemize{
... ...
@@ -105,6 +107,8 @@ package = "ISAnalytics")}
105 107
 package = "ISAnalytics")}
106 108
 \item \code{vignette("report_system",
107 109
 package = "ISAnalytics")}
110
+\item \code{vignette("sharing_analyses",
111
+package = "ISAnalytics")}
108 112
 }
109 113
 }
110 114
 
... ...
@@ -40,7 +40,7 @@ summary of info for each group. For more details on how to use this function:
40 40
 \examples{
41 41
 data("association_file", package = "ISAnalytics")
42 42
 aggreg_meta <- aggregate_metadata(
43
-    association_file = association_file
43
+  association_file = association_file
44 44
 )
45 45
 head(aggreg_meta)
46 46
 }
... ...
@@ -92,9 +92,9 @@ will be added to the final data frame.
92 92
 data("integration_matrices", package = "ISAnalytics")
93 93
 data("association_file", package = "ISAnalytics")
94 94
 aggreg <- aggregate_values_by_key(
95
-    x = integration_matrices,
96
-    association_file = association_file,
97
-    value_cols = c("seqCount", "fragmentEstimate")
95
+  x = integration_matrices,
96
+  association_file = association_file,
97
+  value_cols = c("seqCount", "fragmentEstimate")
98 98
 )
99 99
 head(aggreg)
100 100
 }
... ...
@@ -73,16 +73,26 @@ otherwise an error message is thrown.
73 73
 data("integration_matrices", package = "ISAnalytics")
74 74
 data("association_file", package = "ISAnalytics")
75 75
 aggreg <- aggregate_values_by_key(
76
-    x = integration_matrices,
77
-    association_file = association_file,
78
-    value_cols = c("seqCount", "fragmentEstimate")
76
+  x = integration_matrices,
77
+  association_file = association_file,
78
+  value_cols = c("seqCount", "fragmentEstimate")
79 79
 )
80 80
 by_subj <- aggreg \%>\%
81
-    dplyr::group_by(.data$SubjectID) \%>\%
82
-    dplyr::group_split()
81
+  dplyr::group_by(.data$SubjectID) \%>\%
82
+  dplyr::group_split()
83 83
 circos_genomic_density(by_subj,
84
-    track_colors = c("navyblue", "gold"),
85
-    grDevice = "default", track.height = 0.1
84
+  track_colors = c("navyblue", "gold"),
85
+  grDevice = "default", track.height = 0.1
86 86
 )
87 87
 }
88 88
 }
89
+\seealso{
90
+Other Plotting functions: 
91
+\code{\link{CIS_volcano_plot}()},
92
+\code{\link{HSC_population_plot}()},
93
+\code{\link{integration_alluvial_plot}()},
94
+\code{\link{sharing_heatmap}()},
95
+\code{\link{sharing_venn}()},
96
+\code{\link{top_abund_tableGrob}()}
97
+}
98
+\concept{Plotting functions}
... ...
@@ -47,16 +47,16 @@ of reference.
47 47
 fs_path <- system.file("extdata", "fs.zip", package = "ISAnalytics")
48 48
 fs <- unzip_file_system(fs_path, "fs")
49 49
 af_path <- system.file("extdata", "asso.file.tsv.gz",
50
-    package = "ISAnalytics"
50
+  package = "ISAnalytics"
51 51
 )
52 52
 af <- import_association_file(af_path,
53
-    root = fs,
54
-    import_iss = FALSE,
55
-    report_path = NULL
53
+  root = fs,
54
+  import_iss = FALSE,
55
+  report_path = NULL
56 56
 )
57 57
 matrices <- import_parallel_Vispa2Matrices(af,
58
-    c("seqCount", "fragmentEstimate"),
59
-    mode = "AUTO", report_path = NULL, multi_quant_matrix = FALSE
58
+  c("seqCount", "fragmentEstimate"),
59
+  mode = "AUTO", report_path = NULL, multi_quant_matrix = FALSE
60 60
 )
61 61
 multi_quant <- comparison_matrix(matrices)
62 62
 head(multi_quant)
... ...
@@ -68,6 +68,7 @@ Other Analysis functions:
68 68
 \code{\link{CIS_grubbs}()},
69 69
 \code{\link{compute_abundance}()},
70 70
 \code{\link{cumulative_count_union}()},
71
+\code{\link{cumulative_is}()},
71 72
 \code{\link{is_sharing}()},
72 73
 \code{\link{sample_statistics}()},
73 74
 \code{\link{separate_quant_matrices}()},
... ...
@@ -49,9 +49,9 @@ column) will be produced.
49 49
 \examples{
50 50
 data("integration_matrices", package = "ISAnalytics")
51 51
 abund <- compute_abundance(
52
-    x = integration_matrices,
53
-    columns = "fragmentEstimate",
54
-    key = "CompleteAmplificationID"
52
+  x = integration_matrices,
53
+  columns = "fragmentEstimate",
54
+  key = "CompleteAmplificationID"
55 55
 )
56 56
 head(abund)
57 57
 }
... ...
@@ -60,6 +60,7 @@ Other Analysis functions:
60 60
 \code{\link{CIS_grubbs}()},
61 61
 \code{\link{comparison_matrix}()},
62 62
 \code{\link{cumulative_count_union}()},
63
+\code{\link{cumulative_is}()},
63 64
 \code{\link{is_sharing}()},
64 65
 \code{\link{sample_statistics}()},
65 66
 \code{\link{separate_quant_matrices}()},
... ...
@@ -83,7 +83,7 @@ all quantification matrices.
83 83
 \examples{
84 84
 data("integration_matrices", package = "ISAnalytics")
85 85
 rec <- compute_near_integrations(
86
-    x = integration_matrices, map_as_file = FALSE
86
+  x = integration_matrices, map_as_file = FALSE
87 87
 )
88 88
 head(rec)
89 89
 }
... ...
@@ -77,9 +77,9 @@ the chosen column to avoid undesired results.
77 77
 data("integration_matrices", package = "ISAnalytics")
78 78
 data("association_file", package = "ISAnalytics")
79 79
 aggreg <- aggregate_values_by_key(
80
-    x = integration_matrices,
81
-    association_file = association_file,
82
-    value_cols = c("seqCount", "fragmentEstimate")
80
+  x = integration_matrices,
81
+  association_file = association_file,
82
+  value_cols = c("seqCount", "fragmentEstimate")
83 83
 )
84 84
 cumulative_count <- cumulative_count_union(aggreg)
85 85
 cumulative_count
... ...
@@ -89,6 +89,7 @@ Other Analysis functions:
89 89
 \code{\link{CIS_grubbs}()},
90 90
 \code{\link{comparison_matrix}()},
91 91
 \code{\link{compute_abundance}()},
92
+\code{\link{cumulative_is}()},
92 93
 \code{\link{is_sharing}()},
93 94
 \code{\link{sample_statistics}()},
94 95
 \code{\link{separate_quant_matrices}()},
95 96
new file mode 100644
... ...
@@ -0,0 +1,65 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/analysis-functions.R
3
+\name{cumulative_is}
4
+\alias{cumulative_is}
5
+\title{Expands integration matrix with the cumulative is union over time.}
6
+\usage{
7
+cumulative_is(
8
+  x,
9
+  key = c("SubjectID", "CellMarker", "Tissue", "TimePoint"),
10
+  timepoint_col = "TimePoint",
11
+  include_tp_zero = FALSE,
12
+  keep_og_is = TRUE,
13
+  expand = FALSE
14
+)
15
+}
16
+\arguments{
17
+\item{x}{An integration matrix, ideally aggregated via
18
+\code{aggregate_values_by_key()}}
19
+
20
+\item{key}{The aggregation key used}
21
+
22
+\item{timepoint_col}{The name of the time point column}
23
+
24
+\item{include_tp_zero}{Should time point 0 be included?}
25
+
26
+\item{keep_og_is}{Keep original set of integrations as a separate column?}
27
+
28
+\item{expand}{If \code{FALSE}, for each group, the set of integration sites is
29
+returned in a separate column as a nested table, otherwise the resulting
30
+column is unnested.}
31
+}
32
+\value{
33
+A data frame
34
+}
35
+\description{
36
+\lifecycle{experimental}
37
+Given an input integration matrix that can be grouped over time,
38
+this function adds integrations in groups assuming that
39
+if an integration is observed at time point "t" then it is also observed in
40
+time point "t+1".
41
+}
42
+\examples{
43
+data("integration_matrices", package = "ISAnalytics")
44
+data("association_file", package = "ISAnalytics")
45
+aggreg <- aggregate_values_by_key(
46
+  x = rlang::current_env()$integration_matrices,
47
+  association_file = rlang::current_env()$association_file,
48
+  value_cols = c("seqCount", "fragmentEstimate")
49
+)
50
+cumulated_is <- cumulative_is(aggreg)
51
+cumulated_is
52
+}
53
+\seealso{
54
+Other Analysis functions: 
55
+\code{\link{CIS_grubbs}()},
56
+\code{\link{comparison_matrix}()},
57
+\code{\link{compute_abundance}()},
58
+\code{\link{cumulative_count_union}()},
59
+\code{\link{is_sharing}()},
60
+\code{\link{sample_statistics}()},
61
+\code{\link{separate_quant_matrices}()},
62
+\code{\link{threshold_filter}()},
63
+\code{\link{top_integrations}()}
64
+}
65
+\concept{Analysis functions}
... ...
@@ -45,16 +45,16 @@ file has been aligned with the file system
45 45
 fs_path <- system.file("extdata", "fs.zip", package = "ISAnalytics")
46 46
 fs <- unzip_file_system(fs_path, "fs")
47 47
 af_path <- system.file("extdata", "asso.file.tsv.gz",
48
-    package = "ISAnalytics"
48
+  package = "ISAnalytics"
49 49
 )
50 50
 af <- import_association_file(af_path,
51
-    root = fs,
52
-    import_iss = FALSE,
53
-    report_path = NULL
51
+  root = fs,
52
+  import_iss = FALSE,
53
+  report_path = NULL
54 54
 )
55 55
 stats_files <- import_Vispa2_stats(af,
56
-    join_w