Browse code

[UPDATE] Update to version 1.3.7

Giulia Pais authored on 20/10/2021 17:04:55
Showing 22 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: ISAnalytics
2 2
 Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies
3
-Version: 1.3.6
3
+Version: 1.3.7
4 4
 Date: 2020-07-03
5 5
 Authors@R: c(
6 6
   person(given = "Andrea",
... ...
@@ -34,6 +34,7 @@ export(import_parallel_Vispa2Matrices_interactive)
34 34
 export(import_single_Vispa2Matrix)
35 35
 export(integration_alluvial_plot)
36 36
 export(is_sharing)
37
+export(iss_source)
37 38
 export(known_clinical_oncogenes)
38 39
 export(mandatory_IS_vars)
39 40
 export(matching_options)
... ...
@@ -2,6 +2,16 @@
2 2
 title: "NEWS"
3 3
 output: github_document
4 4
 ---
5
+# ISAnalytics 1.3.7 (2021-10-20)
6
+
7
+## NEW
8
+
9
+* Added new feature `iss_source()`
10
+
11
+## FIXES
12
+
13
+* Fixed minor issues in data files `refGenes_mm9` and function `compute_near_integrations()`
14
+
5 15
 # ISAnalytics 1.3.6 (2021-10-05)
6 16
 
7 17
 ## NEW
... ...
@@ -1,6 +1,17 @@
1 1
 NEWS
2 2
 ================
3 3
 
4
+# ISAnalytics 1.3.7 (2021-10-20)
5
+
6
+## NEW
7
+
8
+-   Added new feature `iss_source()`
9
+
10
+## FIXES
11
+
12
+-   Fixed minor issues in data files `refGenes_mm9` and function
13
+    `compute_near_integrations()`
14
+
4 15
 # ISAnalytics 1.3.6 (2021-10-05)
5 16
 
6 17
 ## NEW
... ...
@@ -1545,21 +1545,25 @@ is_sharing <- function(...,
1545 1545
 #'     association_file = rlang::current_env()$association_file,
1546 1546
 #'     value_cols = c("seqCount", "fragmentEstimate")
1547 1547
 #' )
1548
-#' filtered_by_purity <- purity_filter(x = aggreg,
1549
-#' value_column = "seqCount_sum")
1548
+#' filtered_by_purity <- purity_filter(
1549
+#'     x = aggreg,
1550
+#'     value_column = "seqCount_sum"
1551
+#' )
1550 1552
 #' head(filtered_by_purity)
1551 1553
 purity_filter <- function(x,
1552
-                          lineages = blood_lineages_default(),
1553
-                          aggregation_key = c("SubjectID", "CellMarker",
1554
-                                              "Tissue", "TimePoint"),
1555
-                          group_key = c("CellMarker", "Tissue"),
1556
-                          selected_groups = NULL,
1557
-                          join_on = "CellMarker",
1558
-                          min_value = 3,
1559
-                          impurity_threshold = 10,
1560
-                          by_timepoint = TRUE,
1561
-                          timepoint_column = "TimePoint",
1562
-                          value_column = "seqCount_sum") {
1554
+    lineages = blood_lineages_default(),
1555
+    aggregation_key = c(
1556
+        "SubjectID", "CellMarker",
1557
+        "Tissue", "TimePoint"
1558
+    ),
1559
+    group_key = c("CellMarker", "Tissue"),
1560
+    selected_groups = NULL,
1561
+    join_on = "CellMarker",
1562
+    min_value = 3,
1563
+    impurity_threshold = 10,
1564
+    by_timepoint = TRUE,
1565
+    timepoint_column = "TimePoint",
1566
+    value_column = "seqCount_sum") {
1563 1567
     ## Checks
1564 1568
     #### - Base
1565 1569
     stopifnot(is.data.frame(x))
... ...
@@ -1570,12 +1574,12 @@ purity_filter <- function(x,
1570 1574
     stopifnot(is.numeric(impurity_threshold) || is.integer(impurity_threshold))
1571 1575
     stopifnot(is.character(value_column))
1572 1576
     stopifnot(is.null(selected_groups) || is.character(selected_groups) ||
1573
-                  is.data.frame(selected_groups))
1577
+        is.data.frame(selected_groups))
1574 1578
     #### - Keys
1575 1579
     if (!all(aggregation_key %in% colnames(x))) {
1576 1580
         rlang::abort(.missing_user_cols_error(
1577 1581
             aggregation_key[!aggregation_key %in% colnames(x)]
1578
-            ))
1582
+        ))
1579 1583
     }
1580 1584
     if (!value_column[1] %in% colnames(x)) {
1581 1585
         rlang::abort(.missing_user_cols_error(value_column))
... ...
@@ -1588,23 +1592,28 @@ purity_filter <- function(x,
1588 1592
         ### If lineages info is needed
1589 1593
         stopifnot(is.data.frame(lineages))
1590 1594
         if (!all(group_key %in% unique(c(colnames(x), colnames(lineages))))) {
1591
-            missing_cols <- group_key[!group_key %in% unique(c(colnames(x),
1592
-                                               colnames(lineages)))]
1595
+            missing_cols <- group_key[!group_key %in% unique(c(
1596
+                colnames(x),
1597
+                colnames(lineages)
1598
+            ))]
1593 1599
             rlang::abort(.missing_user_cols_error(missing_cols))
1594 1600
         }
1595 1601
         if (!(all(join_on %in% colnames(x)) &
1596
-              all(join_on %in% colnames(lineages)))
1597
-            ) {
1602
+            all(join_on %in% colnames(lineages)))
1603
+        ) {
1598 1604
             missing_common <- c("Missing common column(s) to join on",
1599
-                                i = paste("The column(s) provided in argument",
1600
-                                          "`join_on` is missing from one or",
1601
-                                          "both data frames, aborting"))
1602
-           rlang::abort(missing_common)
1605
+                i = paste(
1606
+                    "The column(s) provided in argument",
1607
+                    "`join_on` is missing from one or",
1608
+                    "both data frames, aborting"
1609
+                )
1610
+            )
1611
+            rlang::abort(missing_common)
1603 1612
         }
1604 1613
         TRUE
1605 1614
     }
1606 1615
     if (by_timepoint) {
1607
-       stopifnot(is.character(timepoint_column))
1616
+        stopifnot(is.character(timepoint_column))
1608 1617
         timepoint_column <- timepoint_column[1]
1609 1618
         if (!timepoint_column %in% colnames(x)) {
1610 1619
             rlang::abort(.missing_user_cols_error(timepoint_column))
... ...
@@ -1625,13 +1634,17 @@ purity_filter <- function(x,
1625 1634
     }
1626 1635
     grouped <- x %>%
1627 1636
         dplyr::group_by(dplyr::across(dplyr::all_of(c(is_vars, group_key)))) %>%
1628
-        dplyr::summarise(Value = sum(.data[[value_column]]),
1629
-                         .groups = "drop")
1637
+        dplyr::summarise(
1638
+            Value = sum(.data[[value_column]]),
1639
+            .groups = "drop"
1640
+        )
1630 1641
     #### - value filter
1631
-    filtered_value <- threshold_filter(x = grouped,
1632
-                                 threshold = min_value,
1633
-                                 cols_to_compare = "Value",
1634
-                                 comparators = ">=")
1642
+    filtered_value <- threshold_filter(
1643
+        x = grouped,
1644
+        threshold = min_value,
1645
+        cols_to_compare = "Value",
1646
+        comparators = ">="
1647
+    )
1635 1648
     #### - Separating IS 1: group filtering
1636 1649
     pre_filt <- list()
1637 1650
     if (is.null(selected_groups) || purrr::is_empty(selected_groups)) {
... ...
@@ -1644,7 +1657,7 @@ purity_filter <- function(x,
1644 1657
             dplyr::filter(!.data[[group_key[1]]] %in% selected_groups)
1645 1658
     } else {
1646 1659
         ok_cols <- colnames(selected_groups)[colnames(selected_groups) %in%
1647
-                                                 group_key]
1660
+            group_key]
1648 1661
         selected_groups <- selected_groups %>%
1649 1662
             dplyr::select(dplyr::all_of(ok_cols)) %>%
1650 1663
             dplyr::distinct()
... ...
@@ -1654,11 +1667,13 @@ purity_filter <- function(x,
1654 1667
             pre_filt[["keep"]] <- filtered_value[0, ]
1655 1668
         } else {
1656 1669
             pre_filt[["process"]] <- dplyr::inner_join(filtered_value,
1657
-                                                       selected_groups,
1658
-                                                       by = ok_cols)
1670
+                selected_groups,
1671
+                by = ok_cols
1672
+            )
1659 1673
             pre_filt[["keep"]] <- dplyr::anti_join(filtered_value,
1660
-                                                   selected_groups,
1661
-                                                   by = ok_cols)
1674
+                selected_groups,
1675
+                by = ok_cols
1676
+            )
1662 1677
         }
1663 1678
     }
1664 1679
     if (nrow(pre_filt$process) == 0) {
... ...
@@ -1695,7 +1710,7 @@ purity_filter <- function(x,
1695 1710
         max_val <- max(group$Value)
1696 1711
         processed <- group %>%
1697 1712
             dplyr::mutate(remove = (max_val / .data$Value) >
1698
-                              impurity_threshold) %>%
1713
+                impurity_threshold) %>%
1699 1714
             dplyr::filter(remove == FALSE) %>%
1700 1715
             dplyr::select(-.data$remove)
1701 1716
         processed
... ...
@@ -1711,6 +1726,163 @@ purity_filter <- function(x,
1711 1726
     final
1712 1727
 }
1713 1728
 
1729
+#' Find the source of IS by evaluating sharing.
1730
+#'
1731
+#' @description \lifecycle{experimental}
1732
+#' The function computes the sharing between a reference group of interest
1733
+#' for each time point and a selection of groups of interest. In this way
1734
+#' it is possible to observe the percentage of shared integration sites between
1735
+#' reference and each group and identify in which time point a certain IS was
1736
+#' observed for the first time.
1737
+#'
1738
+#' @param reference A data frame containing one or more groups of reference.
1739
+#' Groups are identified by `ref_group_key`
1740
+#' @param selection A data frame containing one or more groups of interest
1741
+#' to compare.
1742
+#' Groups are identified by `selection_group_key`
1743
+#' @param ref_group_key Character vector of column names that identify a
1744
+#' unique group in the `reference` data frame
1745
+#' @param selection_group_key Character vector of column names that identify a
1746
+#' unique group in the `selection` data frame
1747
+#' @param timepoint_column Name of the column holding time point
1748
+#' info?
1749
+#' @param by_subject Should calculations be performed for each subject
1750
+#' separately?
1751
+#' @param subject_column Name of the column holding subjects information.
1752
+#' Relevant only if `by_subject = TRUE`
1753
+#'
1754
+#' @return A list of data frames or a data frame
1755
+#' @family Analysis functions
1756
+#' @export
1757
+#'
1758
+#' @examples
1759
+#' data("integration_matrices", package = "ISAnalytics")
1760
+#' data("association_file", package = "ISAnalytics")
1761
+#' aggreg <- aggregate_values_by_key(
1762
+#'     x = rlang::current_env()$integration_matrices,
1763
+#'     association_file = rlang::current_env()$association_file,
1764
+#'     value_cols = c("seqCount", "fragmentEstimate")
1765
+#' )
1766
+#' df1 <- aggreg %>%
1767
+#'     dplyr::filter(.data$Tissue == "BM")
1768
+#' df2 <- aggreg %>%
1769
+#'     dplyr::filter(.data$Tissue == "PB")
1770
+#' source <- iss_source(df1, df2)
1771
+#' source
1772
+#' ggplot2::ggplot(source$PT001, ggplot2::aes(
1773
+#'     x = as.factor(g2_TimePoint),
1774
+#'     y = sharing_perc, fill = g1
1775
+#' )) +
1776
+#'     ggplot2::geom_col() +
1777
+#'     ggplot2::labs(
1778
+#'         x = "Time point", y = "Shared IS % with MNC BM",
1779
+#'         title = "Source of is MNC BM vs MNC PB"
1780
+#'     )
1781
+iss_source <- function(reference,
1782
+    selection,
1783
+    ref_group_key = c(
1784
+        "SubjectID", "CellMarker",
1785
+        "Tissue", "TimePoint"
1786
+    ),
1787
+    selection_group_key = c(
1788
+        "SubjectID", "CellMarker",
1789
+        "Tissue", "TimePoint"
1790
+    ),
1791
+    timepoint_column = "TimePoint",
1792
+    by_subject = TRUE,
1793
+    subject_column = "SubjectID") {
1794
+    ## Checks
1795
+    stopifnot(is.data.frame(reference) & is.data.frame(selection))
1796
+    stopifnot(is.character(ref_group_key) & is.character(selection_group_key))
1797
+    stopifnot(is.character(timepoint_column))
1798
+    stopifnot(is.logical(by_subject))
1799
+    by_subject <- by_subject[1]
1800
+    if (!all(ref_group_key %in% colnames(reference))) {
1801
+        rlang::abort(.missing_user_cols_error(
1802
+            ref_group_key[!ref_group_key %in% colnames(reference)]
1803
+        ))
1804
+    }
1805
+    if (!all(selection_group_key %in% colnames(selection))) {
1806
+        rlang::abort(.missing_user_cols_error(
1807
+            selection_group_key[!selection_group_key %in% colnames(selection)]
1808
+        ))
1809
+    }
1810
+    timepoint_column <- timepoint_column[1]
1811
+    if (!timepoint_column %in% colnames(reference) |
1812
+        !timepoint_column %in% colnames(selection)) {
1813
+        rlang::abort(.missing_needed_cols(timepoint_column))
1814
+    }
1815
+    ## Workflow choice
1816
+    if (by_subject) {
1817
+        stopifnot(is.character(subject_column))
1818
+        subject_column <- subject_column[1]
1819
+        ref_split <- reference %>%
1820
+            dplyr::group_by(dplyr::across({{ subject_column }}))
1821
+        ref_subjs <- ref_split %>%
1822
+            dplyr::group_keys() %>%
1823
+            dplyr::pull(.data[[subject_column]])
1824
+        ref_split <- ref_split %>%
1825
+            dplyr::group_split() %>%
1826
+            purrr::set_names(ref_subjs)
1827
+        sel_split <- selection %>%
1828
+            dplyr::group_by(dplyr::across({{ subject_column }}))
1829
+        sel_subjs <- sel_split %>%
1830
+            dplyr::group_keys() %>%
1831
+            dplyr::pull(.data[[subject_column]])
1832
+        sel_split <- sel_split %>%
1833
+            dplyr::group_split() %>%
1834
+            purrr::set_names(sel_subjs)
1835
+        shared <- .sharing_for_source(ref_split,
1836
+            sel_split,
1837
+            ref_key = ref_group_key,
1838
+            sel_key = selection_group_key,
1839
+            tp_col = timepoint_column,
1840
+            subj_col = subject_column
1841
+        )
1842
+        shared <- purrr::map(
1843
+            shared,
1844
+            ~ .x %>%
1845
+                dplyr::select(
1846
+                    -.data$count_g1, -.data$count_g2,
1847
+                    -.data$count_union
1848
+                ) %>%
1849
+                tidyr::unnest(.data$is_coord, keep_empty = TRUE) %>%
1850
+                dplyr::mutate(sharing_perc = dplyr::if_else(
1851
+                    shared == 0, 0, .data$on_g2 / .data$shared
1852
+                )) %>%
1853
+                dplyr::select(
1854
+                    -.data$shared, -.data$on_g1, -.data$on_g2,
1855
+                    -.data$on_union
1856
+                )
1857
+        )
1858
+    } else {
1859
+        shared <- .sharing_for_source(reference,
1860
+            selection,
1861
+            ref_key = ref_group_key,
1862
+            sel_key = selection_group_key,
1863
+            tp_col = timepoint_column,
1864
+            subj_col = subject_column
1865
+        )
1866
+        shared <- shared %>%
1867
+            dplyr::select(
1868
+                -.data$count_g1, -.data$count_g2,
1869
+                -.data$count_union
1870
+            ) %>%
1871
+            tidyr::unnest(.data$is_coord, keep_empty = TRUE) %>%
1872
+            dplyr::mutate(sharing_perc = dplyr::if_else(shared == 0,
1873
+                0,
1874
+                .data$on_g2 /
1875
+                    .data$shared
1876
+            )) %>%
1877
+            dplyr::select(
1878
+                -.data$shared, -.data$on_g1, -.data$on_g2,
1879
+                -.data$on_union
1880
+            )
1881
+    }
1882
+
1883
+    return(shared)
1884
+}
1885
+
1714 1886
 #' A set of pre-defined functions for `sample_statistics`.
1715 1887
 #'
1716 1888
 #' @return A named list of functions/purrr-style lambdas
... ...
@@ -4735,3 +4735,170 @@
4735 4735
     }
4736 4736
     sharing_df
4737 4737
 }
4738
+
4739
+#---- USED IN : iss_source ----
4740
+.assign_iss_by_tp <- function(df, timepoint_column) {
4741
+    is_vars <- if (.is_annotated(df)) {
4742
+        c(mandatory_IS_vars(), annotation_IS_vars())
4743
+    } else {
4744
+        mandatory_IS_vars()
4745
+    }
4746
+    return(df %>%
4747
+        dplyr::mutate(!!timepoint_column := as.numeric(
4748
+            .data[[timepoint_column]]
4749
+        )) %>%
4750
+        dplyr::group_by(dplyr::across(dplyr::all_of(is_vars))) %>%
4751
+        dplyr::group_modify(~ {
4752
+            if (nrow(.x) == 1) {
4753
+                return(.x)
4754
+            }
4755
+            min_tp <- min(.x[[timepoint_column]])
4756
+            return(.x %>%
4757
+                dplyr::filter(.data[[timepoint_column]] == min_tp))
4758
+        }) %>%
4759
+        dplyr::ungroup())
4760
+}
4761
+
4762
+.sharing_for_source <- function(ref,
4763
+    sel,
4764
+    ref_key,
4765
+    sel_key,
4766
+    tp_col,
4767
+    subj_col) {
4768
+    if (!is.data.frame(ref)) {
4769
+        ### Workflow by subject
4770
+        common_names <- if (!all(names(ref) == names(sel))) {
4771
+            intersect(names(ref), names(sel))
4772
+        } else {
4773
+            names(ref)
4774
+        }
4775
+        if (length(common_names) == 0) {
4776
+            no_common_err <- c("No common subjects",
4777
+                x = paste(
4778
+                    "No common subjects between",
4779
+                    "reference and selection"
4780
+                ),
4781
+                i = paste(
4782
+                    "In reference:",
4783
+                    paste0(names(ref), collapse = ", "),
4784
+                    "\n",
4785
+                    "In selection: ",
4786
+                    paste0(names(sel), collapse = ", ")
4787
+                )
4788
+            )
4789
+            rlang::abort(no_common_err)
4790
+        }
4791
+        if (getOption("ISAnalytics.verbose") == TRUE &&
4792
+            (length(common_names) < length(names(ref)) ||
4793
+                length(common_names) < length(names(sel)))) {
4794
+            all_names <- union(names(ref), names(sel))
4795
+            common_warn_msg <- c("Mismatch in subjects found",
4796
+                i = paste(
4797
+                    "Some subjects were excluded from",
4798
+                    "computations because they were",
4799
+                    "absent from reference or from",
4800
+                    "selection"
4801
+                ),
4802
+                paste(
4803
+                    "Excluded: ",
4804
+                    paste0(all_names[!all_names %in%
4805
+                        common_names],
4806
+                    collapse = ", "
4807
+                    )
4808
+                )
4809
+            )
4810
+        }
4811
+        if (.Platform$OS.type == "windows") {
4812
+            p <- BiocParallel::SnowParam(
4813
+                tasks = length(common_names),
4814
+                progressbar = getOption("ISAnalytics.verbose"),
4815
+                exportglobals = FALSE
4816
+            )
4817
+        } else {
4818
+            p <- BiocParallel::MulticoreParam(
4819
+                tasks = length(common_names),
4820
+                progressbar = getOption("ISAnalytics.verbose"),
4821
+                exportglobals = FALSE
4822
+            )
4823
+        }
4824
+        FUN <- function(subj,
4825
+    ref, sel, ref_key, sel_key,
4826
+    tp_col) {
4827
+            ref_df <- ref[[subj]]
4828
+            sel_df <- sel[[subj]]
4829
+            ref_key_min <- ref_key[ref_key != tp_col]
4830
+            ref_split <- ref_df %>%
4831
+                dplyr::group_by(dplyr::across(dplyr::all_of(ref_key_min))) %>%
4832
+                dplyr::group_split()
4833
+            quiet_sharing <- purrr::quietly(is_sharing)
4834
+            sharing <- purrr::map_df(ref_split, ~ {
4835
+                assigned_ref <- .assign_iss_by_tp(.x,
4836
+                    timepoint_column = tp_col
4837
+                )
4838
+                sh <- (quiet_sharing(assigned_ref,
4839
+                    sel_df,
4840
+                    group_keys = list(
4841
+                        g1 = ref_key,
4842
+                        g2 = sel_key
4843
+                    ),
4844
+                    keep_genomic_coord = TRUE
4845
+                ))$result
4846
+                sh <- sh %>%
4847
+                    tidyr::separate(
4848
+                        col = "g1",
4849
+                        into = paste0("g1_", ref_key),
4850
+                        remove = FALSE,
4851
+                        convert = TRUE
4852
+                    ) %>%
4853
+                    tidyr::separate(
4854
+                        col = "g2",
4855
+                        into = paste0("g2_", sel_key),
4856
+                        remove = FALSE,
4857
+                        convert = TRUE
4858
+                    )
4859
+            })
4860
+        }
4861
+
4862
+        shared <- BiocParallel::bplapply(common_names, FUN,
4863
+            ref = ref,
4864
+            sel = sel,
4865
+            ref_key = ref_key,
4866
+            sel_key = sel_key,
4867
+            tp_col = tp_col,
4868
+            BPPARAM = p
4869
+        ) %>%
4870
+            purrr::set_names(common_names)
4871
+        return(shared)
4872
+    }
4873
+    ref_key_min <- ref_key[ref_key != tp_col]
4874
+    ref_split <- ref %>%
4875
+        dplyr::group_by(dplyr::across(dplyr::all_of(ref_key_min))) %>%
4876
+        dplyr::group_split()
4877
+    quiet_sharing <- purrr::quietly(is_sharing)
4878
+    sharing <- purrr::map_df(ref_split, ~ {
4879
+        assigned_ref <- .assign_iss_by_tp(.x,
4880
+            timepoint_column = tp_col
4881
+        )
4882
+        (quiet_sharing(assigned_ref,
4883
+            sel,
4884
+            group_keys = list(
4885
+                g1 = ref_key,
4886
+                g2 = sel_key
4887
+            ),
4888
+            keep_genomic_coord = TRUE
4889
+        ))$result
4890
+    })
4891
+    sharing %>%
4892
+        tidyr::separate(
4893
+            col = "g1",
4894
+            into = paste0("g1_", ref_key),
4895
+            remove = FALSE,
4896
+            convert = TRUE
4897
+        ) %>%
4898
+        tidyr::separate(
4899
+            col = "g2",
4900
+            into = paste0("g2_", sel_key),
4901
+            remove = FALSE,
4902
+            convert = TRUE
4903
+        )
4904
+}
... ...
@@ -137,6 +137,17 @@ options("ISAnalytics.reports" = TRUE)
137 137
 Show more
138 138
 </summary>
139 139
 
140
+# ISAnalytics 1.3.7 (2021-10-20)
141
+
142
+## NEW
143
+
144
+-   Added new feature `iss_source()`
145
+
146
+## FIXES
147
+
148
+-   Fixed minor issues in data files `refGenes_mm9` and function
149
+    `compute_near_integrations()`
150
+
140 151
 # ISAnalytics 1.3.6 (2021-10-05)
141 152
 
142 153
 ## NEW
... ...
@@ -82,6 +82,7 @@ reference:
82 82
   - cumulative_is
83 83
   - is_sharing
84 84
   - purity_filter
85
+  - iss_source
85 86
 - title: "HSC population estimate"
86 87
 - contents:
87 88
   - HSC_population_size_estimate
... ...
@@ -80,6 +80,7 @@ Other Analysis functions:
80 80
 \code{\link{cumulative_count_union}()},
81 81
 \code{\link{cumulative_is}()},
82 82
 \code{\link{is_sharing}()},
83
+\code{\link{iss_source}()},
83 84
 \code{\link{purity_filter}()},
84 85
 \code{\link{sample_statistics}()},
85 86
 \code{\link{separate_quant_matrices}()},
... ...
@@ -70,6 +70,7 @@ Other Analysis functions:
70 70
 \code{\link{cumulative_count_union}()},
71 71
 \code{\link{cumulative_is}()},
72 72
 \code{\link{is_sharing}()},
73
+\code{\link{iss_source}()},
73 74
 \code{\link{purity_filter}()},
74 75
 \code{\link{sample_statistics}()},
75 76
 \code{\link{separate_quant_matrices}()},
... ...
@@ -62,6 +62,7 @@ Other Analysis functions:
62 62
 \code{\link{cumulative_count_union}()},
63 63
 \code{\link{cumulative_is}()},
64 64
 \code{\link{is_sharing}()},
65
+\code{\link{iss_source}()},
65 66
 \code{\link{purity_filter}()},
66 67
 \code{\link{sample_statistics}()},
67 68
 \code{\link{separate_quant_matrices}()},
... ...
@@ -91,6 +91,7 @@ Other Analysis functions:
91 91
 \code{\link{compute_abundance}()},
92 92
 \code{\link{cumulative_is}()},
93 93
 \code{\link{is_sharing}()},
94
+\code{\link{iss_source}()},
94 95
 \code{\link{purity_filter}()},
95 96
 \code{\link{sample_statistics}()},
96 97
 \code{\link{separate_quant_matrices}()},
... ...
@@ -57,6 +57,7 @@ Other Analysis functions:
57 57
 \code{\link{compute_abundance}()},
58 58
 \code{\link{cumulative_count_union}()},
59 59
 \code{\link{is_sharing}()},
60
+\code{\link{iss_source}()},
60 61
 \code{\link{purity_filter}()},
61 62
 \code{\link{sample_statistics}()},
62 63
 \code{\link{separate_quant_matrices}()},
... ...
@@ -95,6 +95,7 @@ Other Analysis functions:
95 95
 \code{\link{compute_abundance}()},
96 96
 \code{\link{cumulative_count_union}()},
97 97
 \code{\link{cumulative_is}()},
98
+\code{\link{iss_source}()},
98 99
 \code{\link{purity_filter}()},
99 100
 \code{\link{sample_statistics}()},
100 101
 \code{\link{separate_quant_matrices}()},
101 102
new file mode 100644
... ...
@@ -0,0 +1,85 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/analysis-functions.R
3
+\name{iss_source}
4
+\alias{iss_source}
5
+\title{Find the source of IS by evaluating sharing.}
6
+\usage{
7
+iss_source(
8
+  reference,
9
+  selection,
10
+  ref_group_key = c("SubjectID", "CellMarker", "Tissue", "TimePoint"),
11
+  selection_group_key = c("SubjectID", "CellMarker", "Tissue", "TimePoint"),
12
+  timepoint_column = "TimePoint",
13
+  by_subject = TRUE,
14
+  subject_column = "SubjectID"
15
+)
16
+}
17
+\arguments{
18
+\item{reference}{A data frame containing one or more groups of reference.
19
+Groups are identified by \code{ref_group_key}}
20
+
21
+\item{selection}{A data frame containing one or more groups of interest
22
+to compare.
23
+Groups are identified by \code{selection_group_key}}
24
+
25
+\item{ref_group_key}{Character vector of column names that identify a
26
+unique group in the \code{reference} data frame}
27
+
28
+\item{selection_group_key}{Character vector of column names that identify a
29
+unique group in the \code{selection} data frame}
30
+
31
+\item{timepoint_column}{Name of the column holding time point
32
+info?}
33
+
34
+\item{by_subject}{Should calculations be performed for each subject
35
+separately?}
36
+
37
+\item{subject_column}{Name of the column holding subjects information.
38
+Relevant only if \code{by_subject = TRUE}}
39
+}
40
+\value{
41
+A list of data frames or a data frame
42
+}
43
+\description{
44
+\lifecycle{experimental}
45
+The function computes the sharing between a reference group of interest
46
+for each time point and a selection of groups of interest. In this way
47
+it is possible to observe the percentage of shared integration sites between
48
+reference and each group and identify in which time point a certain IS was
49
+observed for the first time.
50
+}
51
+\examples{
52
+data("integration_matrices", package = "ISAnalytics")
53
+data("association_file", package = "ISAnalytics")
54
+aggreg <- aggregate_values_by_key(
55
+    x = rlang::current_env()$integration_matrices,
56
+    association_file = rlang::current_env()$association_file,
57
+    value_cols = c("seqCount", "fragmentEstimate")
58
+)
59
+df1 <- aggreg \%>\%
60
+    dplyr::filter(.data$Tissue == "BM")
61
+df2 <- aggreg \%>\%
62
+    dplyr::filter(.data$Tissue == "PB")
63
+source <- iss_source(df1, df2)
64
+source
65
+ggplot2::ggplot(source$PT001, ggplot2::aes(x = as.factor(g2_TimePoint),
66
+                                           y = sharing_perc, fill = g1)) +
67
+    ggplot2::geom_col() +
68
+    ggplot2::labs(x = "Time point", y = "Shared IS \% with MNC BM",
69
+                  title = "Source of is MNC BM vs MNC PB")
70
+}
71
+\seealso{
72
+Other Analysis functions: 
73
+\code{\link{CIS_grubbs}()},
74
+\code{\link{comparison_matrix}()},
75
+\code{\link{compute_abundance}()},
76
+\code{\link{cumulative_count_union}()},
77
+\code{\link{cumulative_is}()},
78
+\code{\link{is_sharing}()},
79
+\code{\link{purity_filter}()},
80
+\code{\link{sample_statistics}()},
81
+\code{\link{separate_quant_matrices}()},
82
+\code{\link{threshold_filter}()},
83
+\code{\link{top_integrations}()}
84
+}
85
+\concept{Analysis functions}
... ...
@@ -108,8 +108,10 @@ aggreg <- aggregate_values_by_key(
108 108
     association_file = rlang::current_env()$association_file,
109 109
     value_cols = c("seqCount", "fragmentEstimate")
110 110
 )
111
-filtered_by_purity <- purity_filter(x = aggreg,
112
-value_column = "seqCount_sum")
111
+filtered_by_purity <- purity_filter(
112
+    x = aggreg,
113
+    value_column = "seqCount_sum"
114
+)
113 115
 head(filtered_by_purity)
114 116
 }
115 117
 \seealso{
... ...
@@ -120,6 +122,7 @@ Other Analysis functions:
120 122
 \code{\link{cumulative_count_union}()},
121 123
 \code{\link{cumulative_is}()},
122 124
 \code{\link{is_sharing}()},
125
+\code{\link{iss_source}()},
123 126
 \code{\link{sample_statistics}()},
124 127
 \code{\link{separate_quant_matrices}()},
125 128
 \code{\link{threshold_filter}()},
... ...
@@ -78,6 +78,7 @@ Other Analysis functions:
78 78
 \code{\link{cumulative_count_union}()},
79 79
 \code{\link{cumulative_is}()},
80 80
 \code{\link{is_sharing}()},
81
+\code{\link{iss_source}()},
81 82
 \code{\link{purity_filter}()},
82 83
 \code{\link{separate_quant_matrices}()},
83 84
 \code{\link{threshold_filter}()},
... ...
@@ -62,6 +62,7 @@ Other Analysis functions:
62 62
 \code{\link{cumulative_count_union}()},
63 63
 \code{\link{cumulative_is}()},
64 64
 \code{\link{is_sharing}()},
65
+\code{\link{iss_source}()},
65 66
 \code{\link{purity_filter}()},
66 67
 \code{\link{sample_statistics}()},
67 68
 \code{\link{threshold_filter}()},
... ...
@@ -211,6 +211,7 @@ Other Analysis functions:
211 211
 \code{\link{cumulative_count_union}()},
212 212
 \code{\link{cumulative_is}()},
213 213
 \code{\link{is_sharing}()},
214
+\code{\link{iss_source}()},
214 215
 \code{\link{purity_filter}()},
215 216
 \code{\link{sample_statistics}()},
216 217
 \code{\link{separate_quant_matrices}()},
... ...
@@ -78,6 +78,7 @@ Other Analysis functions:
78 78
 \code{\link{cumulative_count_union}()},
79 79
 \code{\link{cumulative_is}()},
80 80
 \code{\link{is_sharing}()},
81
+\code{\link{iss_source}()},
81 82
 \code{\link{purity_filter}()},
82 83
 \code{\link{sample_statistics}()},
83 84
 \code{\link{separate_quant_matrices}()},
... ...
@@ -947,3 +947,572 @@ test_that("cumulative_is produces correct output", {
947 947
     expect_true(all(mandatory_IS_vars() %in% colnames(c_is)))
948 948
     expect_true(nrow(c_is) == sum(counts))
949 949
 })
950
+
951
+#------------------------------------------------------------------------------#
952
+# Test .assign_iss_by_tp
953
+#------------------------------------------------------------------------------#
954
+test_that(".assign_iss_by_tp assigns iss correctly", {
955
+    test_df <- tibble::tribble(
956
+        ~chr, ~integration_locus, ~strand, ~CellMarker, ~Tissue, ~TimePoint,
957
+        "1", 12345, "+", "CD34", "BM", "03",
958
+        "1", 12345, "+", "CD34", "BM", "01",
959
+        "1", 12345, "+", "CD34", "BM", "06",
960
+        "2", 54321, "-", "CD34", "BM", "01",
961
+        "3", 62135, "+", "CD34", "BM", "02",
962
+        "3", 62135, "+", "CD34", "BM", "08"
963
+    )
964
+
965
+    expected_assign <- tibble::tribble(
966
+        ~chr, ~integration_locus, ~strand, ~CellMarker, ~Tissue, ~TimePoint,
967
+        "1", 12345, "+", "CD34", "BM", 1,
968
+        "2", 54321, "-", "CD34", "BM", 1,
969
+        "3", 62135, "+", "CD34", "BM", 2
970
+    )
971
+    re_assigned <- .assign_iss_by_tp(
972
+        df = test_df,
973
+        timepoint_column = "TimePoint"
974
+    )
975
+    expect_equal(re_assigned, expected_assign)
976
+})
977
+
978
+#------------------------------------------------------------------------------#
979
+# Test iss_source
980
+#------------------------------------------------------------------------------#
981
+test_df2 <- tibble::tibble(
982
+    chr = c(
983
+        "1", "1", "1", "1", "2", "2", "3", "3",
984
+        "1", "1", "1", "1", "1", "2", "3", "3"
985
+    ),
986
+    integration_locus = c(
987
+        12345, 43524, 12345, 12345, 54321,
988
+        76835, 62135, 62135, 12345, 12345,
989
+        56832, 12345, 12345, 54321, 62135,
990
+        62135
991
+    ),
992
+    strand = c(
993
+        "+", "+", "+", "+", "-", "-", "+", "+", "+",
994
+        "+", "-", "+", "+", "-", "+", "+"
995
+    ),
996
+    SubjectID = c(
997
+        "PT01", "PT02", "PT01", "PT01", "PT01",
998
+        "PT02", "PT01", "PT01", "PT01", "PT01",
999
+        "PT02", "PT01", "PT02", "PT01", "PT01",
1000
+        "PT01"
1001
+    ),
1002
+    CellMarker = c(
1003
+        "CD34", "CD34", "CD34", "CD34", "CD34",
1004
+        "CD34", "CD34", "CD34", "Whole", "Whole",
1005
+        "Whole", "Whole", "Whole", "Whole", "Whole",
1006
+        "Whole"
1007
+    ),
1008
+    Tissue = c(
1009
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1010
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM"
1011
+    ),
1012
+    TimePoint = c(
1013
+        "03", "03", "01", "06", "01", "01", "02",
1014
+        "08", "03", "01", "01", "06", "06", "01",
1015
+        "02", "08"
1016
+    )
1017
+)
1018
+
1019
+test_df3 <- tibble::tibble(
1020
+    chr = c(
1021
+        "1", "1", "1", "1", "2", "2", "3", "3", "1",
1022
+        "1", "1", "1", "1", "2", "3", "3"
1023
+    ),
1024
+    integration_locus = c(
1025
+        56252, 43524, 12345, 86435, 54321,
1026
+        76835, 62135, 432245, 12345, 534587,
1027
+        56832, 12345, 12345, 578635, 62135,
1028
+        62135
1029
+    ),
1030
+    strand = c(
1031
+        "+", "+", "+", "+", "-", "-", "+", "+", "+",
1032
+        "+", "-", "+", "+", "-", "+", "+"
1033
+    ),
1034
+    SubjectID = c(
1035
+        "PT02", "PT02", "PT01", "PT01", "PT01",
1036
+        "PT02", "PT01", "PT01", "PT01", "PT01",
1037
+        "PT02", "PT01", "PT02", "PT01", "PT02",
1038
+        "PT01"
1039
+    ),
1040
+    CellMarker = c(
1041
+        "CD14", "CD13", "CD15", "CD14", "CD14",
1042
+        "CD13", "CD14", "CD13", "CD14", "CD15",
1043
+        "CD15", "CD14", "CD15", "CD13", "CD14",
1044
+        "CD13"
1045
+    ),
1046
+    Tissue = c(
1047
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1048
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB"
1049
+    ),
1050
+    TimePoint = c(
1051
+        "03", "03", "01", "06", "01", "01", "02",
1052
+        "08", "03", "01", "01", "06", "06", "01",
1053
+        "02", "08"
1054
+    )
1055
+)
1056
+expected_res1 <- list(
1057
+    PT01 = tibble::tibble(
1058
+        g1 = c(
1059
+            "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1060
+            "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1061
+            "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1062
+            "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1063
+            "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1064
+            "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1065
+            "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1066
+            "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1067
+            "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1068
+            "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1069
+            "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1070
+            "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1071
+            "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1072
+            "PT01_Whole_BM_1", "PT01_Whole_BM_2"
1073
+        ),
1074
+        g1_SubjectID = c(
1075
+            "PT01", "PT01", "PT01", "PT01",
1076
+            "PT01", "PT01", "PT01", "PT01",
1077
+            "PT01", "PT01", "PT01", "PT01",
1078
+            "PT01", "PT01", "PT01", "PT01",
1079
+            "PT01", "PT01", "PT01", "PT01",
1080
+            "PT01", "PT01", "PT01", "PT01",
1081
+            "PT01", "PT01", "PT01", "PT01"
1082
+        ),
1083
+        g1_CellMarker = c(
1084
+            "CD34", "CD34", "CD34", "CD34",
1085
+            "CD34", "CD34", "CD34", "CD34",
1086
+            "CD34", "CD34", "CD34", "CD34",
1087
+            "CD34", "CD34", "Whole", "Whole",
1088
+            "Whole", "Whole", "Whole", "Whole",
1089
+            "Whole", "Whole", "Whole", "Whole",
1090
+            "Whole", "Whole", "Whole", "Whole"
1091
+        ),
1092
+        g1_Tissue = c(
1093
+            "BM", "BM", "BM", "BM", "BM", "BM",
1094
+            "BM", "BM", "BM", "BM", "BM", "BM",
1095
+            "BM", "BM", "BM", "BM", "BM", "BM",
1096
+            "BM", "BM", "BM", "BM", "BM", "BM",
1097
+            "BM", "BM", "BM", "BM"
1098
+        ),
1099
+        g1_TimePoint = c(
1100
+            1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
1101
+            1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
1102
+            1, 2, 1, 2
1103
+        ),
1104
+        g2 = c(
1105
+            "PT01_CD15_PB_01", "PT01_CD15_PB_01",
1106
+            "PT01_CD14_PB_06", "PT01_CD14_PB_06",
1107
+            "PT01_CD14_PB_01", "PT01_CD14_PB_01",
1108
+            "PT01_CD14_PB_02", "PT01_CD14_PB_02",
1109
+            "PT01_CD13_PB_08", "PT01_CD13_PB_08",
1110
+            "PT01_CD14_PB_03", "PT01_CD14_PB_03",
1111
+            "PT01_CD13_PB_01", "PT01_CD13_PB_01",
1112
+            "PT01_CD15_PB_01", "PT01_CD15_PB_01",
1113
+            "PT01_CD14_PB_06", "PT01_CD14_PB_06",
1114
+            "PT01_CD14_PB_01", "PT01_CD14_PB_01",
1115
+            "PT01_CD14_PB_02", "PT01_CD14_PB_02",
1116
+            "PT01_CD13_PB_08", "PT01_CD13_PB_08",
1117
+            "PT01_CD14_PB_03", "PT01_CD14_PB_03",
1118
+            "PT01_CD13_PB_01", "PT01_CD13_PB_01"
1119
+        ),
1120
+        g2_SubjectID = c(
1121
+            "PT01", "PT01", "PT01", "PT01",
1122
+            "PT01", "PT01", "PT01", "PT01",
1123
+            "PT01", "PT01", "PT01", "PT01",
1124
+            "PT01", "PT01", "PT01", "PT01",
1125
+            "PT01", "PT01", "PT01", "PT01",
1126
+            "PT01", "PT01", "PT01", "PT01",
1127
+            "PT01", "PT01", "PT01", "PT01"
1128
+        ),
1129
+        g2_CellMarker = c(
1130
+            "CD15", "CD15", "CD14", "CD14",
1131
+            "CD14", "CD14", "CD14", "CD14",
1132
+            "CD13", "CD13", "CD14", "CD14",
1133
+            "CD13", "CD13", "CD15", "CD15",
1134
+            "CD14", "CD14", "CD14", "CD14",
1135
+            "CD14", "CD14", "CD13", "CD13",
1136
+            "CD14", "CD14", "CD13", "CD13"
1137
+        ),
1138
+        g2_Tissue = c(
1139
+            "PB", "PB", "PB", "PB", "PB", "PB",
1140
+            "PB", "PB", "PB", "PB", "PB", "PB",
1141
+            "PB", "PB", "PB", "PB", "PB", "PB",
1142
+            "PB", "PB", "PB", "PB", "PB", "PB",
1143
+            "PB", "PB", "PB", "PB"
1144
+        ),
1145
+        g2_TimePoint = c(
1146
+            1, 1, 6, 6, 1, 1, 2, 2, 8, 8, 3, 3,
1147
+            1, 1, 1, 1, 6, 6, 1, 1, 2, 2, 8, 8,
1148
+            3, 3, 1, 1
1149
+        ),
1150
+        chr = c(
1151
+            "1", NA, "1", NA, "2", NA, NA, "3",
1152
+            NA, "3", "1", NA, NA, NA, "1", NA,
1153
+            "1", NA, "2", NA, NA, "3", NA, "3",
1154
+            "1", NA, NA, NA
1155
+        ),
1156
+        integration_locus = c(
1157
+            12345, NA, 12345, NA, 54321,
1158
+            NA, NA, 62135, NA, 62135, 12345,
1159
+            NA, NA, NA, 12345, NA, 12345,
1160
+            NA, 54321, NA, NA, 62135, NA,
1161
+            62135, 12345, NA, NA, NA
1162
+        ),
1163
+        strand = c(
1164
+            "+", NA, "+", NA, "-", NA, NA,
1165
+            "+", NA, "+", "+", NA, NA, NA,
1166
+            "+", NA, "+", NA, "-", NA, NA,
1167
+            "+", NA, "+", "+", NA, NA, NA
1168
+        ),
1169
+        sharing_perc = c(
1170
+            50, 0, 50, 0, 100, 0, 0, 100, 0, 50,
1171
+            100, 0, 0, 0, 50, 0, 50, 0, 100, 0,
1172
+            0, 100, 0, 50, 100, 0, 0, 0
1173
+        )
1174
+    ) %>%
1175
+        dplyr::arrange(.data$g1),
1176
+    PT02 = tibble::tibble(
1177
+        g1 = c(
1178
+            "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1179
+            "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1180
+            "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1181
+            "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1182
+            "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1183
+            "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1184
+            "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1185
+            "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1186
+            "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1187
+            "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1188
+            "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1189
+            "PT02_Whole_BM_6", "PT02_Whole_BM_1"
1190
+        ),
1191
+        g1_SubjectID = c(
1192
+            "PT02", "PT02", "PT02", "PT02",
1193
+            "PT02", "PT02", "PT02", "PT02",
1194
+            "PT02", "PT02", "PT02", "PT02",
1195
+            "PT02", "PT02", "PT02", "PT02",
1196
+            "PT02", "PT02", "PT02", "PT02",
1197
+            "PT02", "PT02", "PT02", "PT02"
1198
+        ),
1199
+        g1_CellMarker = c(
1200
+            "CD34", "CD34", "CD34", "CD34",
1201
+            "CD34", "CD34", "CD34", "CD34",
1202
+            "CD34", "CD34", "CD34", "CD34",
1203
+            "Whole", "Whole", "Whole", "Whole",
1204
+            "Whole", "Whole", "Whole", "Whole",
1205
+            "Whole", "Whole", "Whole", "Whole"
1206
+        ),
1207
+        g1_Tissue = c(
1208
+            "BM", "BM", "BM", "BM", "BM", "BM",
1209
+            "BM", "BM", "BM", "BM", "BM", "BM",
1210
+            "BM", "BM", "BM", "BM", "BM", "BM",
1211
+            "BM", "BM", "BM", "BM", "BM", "BM"
1212
+        ),
1213
+        g1_TimePoint = c(
1214
+            3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
1215
+            6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1
1216
+        ),
1217
+        g2 = c(
1218
+            "PT02_CD14_PB_03", "PT02_CD14_PB_03",
1219
+            "PT02_CD13_PB_03", "PT02_CD13_PB_03",
1220
+            "PT02_CD13_PB_01", "PT02_CD13_PB_01",
1221
+            "PT02_CD15_PB_01", "PT02_CD15_PB_01",
1222
+            "PT02_CD15_PB_06", "PT02_CD15_PB_06",
1223
+            "PT02_CD14_PB_02", "PT02_CD14_PB_02",
1224
+            "PT02_CD14_PB_03", "PT02_CD14_PB_03",
1225
+            "PT02_CD13_PB_03", "PT02_CD13_PB_03",
1226
+            "PT02_CD13_PB_01", "PT02_CD13_PB_01",
1227
+            "PT02_CD15_PB_01", "PT02_CD15_PB_01",
1228
+            "PT02_CD15_PB_06", "PT02_CD15_PB_06",
1229
+            "PT02_CD14_PB_02", "PT02_CD14_PB_02"
1230
+        ),
1231
+        g2_SubjectID = c(
1232
+            "PT02", "PT02", "PT02", "PT02",
1233
+            "PT02", "PT02", "PT02", "PT02",
1234
+            "PT02", "PT02", "PT02", "PT02",
1235
+            "PT02", "PT02", "PT02", "PT02",
1236
+            "PT02", "PT02", "PT02", "PT02",
1237
+            "PT02", "PT02", "PT02", "PT02"
1238
+        ),
1239
+        g2_CellMarker = c(
1240
+            "CD14", "CD14", "CD13", "CD13",
1241
+            "CD13", "CD13", "CD15", "CD15",
1242
+            "CD15", "CD15", "CD14", "CD14",
1243
+            "CD14", "CD14", "CD13", "CD13",
1244
+            "CD13", "CD13", "CD15", "CD15",
1245
+            "CD15", "CD15", "CD14", "CD14"
1246
+        ),
1247
+        g2_Tissue = c(
1248
+            "PB", "PB", "PB", "PB", "PB", "PB",
1249
+            "PB", "PB", "PB", "PB", "PB", "PB",
1250
+            "PB", "PB", "PB", "PB", "PB", "PB",
1251
+            "PB", "PB", "PB", "PB", "PB", "PB"
1252
+        ),
1253
+        g2_TimePoint = c(
1254
+            3, 3, 3, 3, 1, 1, 1, 1, 6, 6, 2, 2,
1255
+            3, 3, 3, 3, 1, 1, 1, 1, 6, 6, 2, 2
1256
+        ),
1257
+        chr = c(
1258
+            NA, NA, "1", NA, NA, "2", NA, NA,
1259
+            NA, NA, NA, NA, NA, NA, NA,
1260
+            NA, NA, NA, NA, 1, 1, NA, NA,
1261
+            NA
1262
+        ),
1263
+        integration_locus = c(
1264
+            NA, NA, 43524, NA, NA, 76835,
1265
+            NA, NA, NA, NA, NA, NA, NA, NA,
1266
+            NA, NA, NA, NA, NA, 56832,
1267
+            12345, NA, NA, NA
1268
+        ),
1269
+        strand = c(
1270
+            NA, NA, "+", NA, NA, "-", NA,
1271
+            NA, NA, NA, NA, NA, NA,
1272
+            NA, NA, NA, NA, NA, NA,
1273
+            "-", "+", NA, NA, NA
1274
+        ),
1275
+        sharing_perc = c(
1276
+            0, 0, 100, 0, 0, 100, 0, 0, 0, 0,
1277
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 100,
1278
+            100, 0, 0, 0
1279
+        )
1280
+    ) %>%
1281
+        dplyr::arrange(.data$g1)
1282
+)
1283
+
1284
+expected_res2 <- tibble::tibble(
1285
+    g1 = c(
1286
+        "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1287
+        "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1288
+        "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1289
+        "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1290
+        "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1291
+        "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2",
1292
+        "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_Whole_BM_1",
1293
+        "PT01_Whole_BM_2", "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1294
+        "PT01_Whole_BM_1", "PT01_Whole_BM_2", "PT01_Whole_BM_1",
1295
+        "PT01_Whole_BM_2", "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1296
+        "PT01_Whole_BM_1", "PT01_Whole_BM_2", "PT01_Whole_BM_1",
1297
+        "PT01_Whole_BM_2", "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1298
+        "PT01_Whole_BM_1", "PT01_Whole_BM_2", "PT01_Whole_BM_1",
1299
+        "PT01_Whole_BM_2", "PT01_Whole_BM_1", "PT01_Whole_BM_2",
1300
+        "PT01_Whole_BM_1", "PT01_Whole_BM_2", "PT01_Whole_BM_1",
1301
+        "PT01_Whole_BM_2", "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1302
+        "PT02_CD34_BM_3", "PT02_CD34_BM_1", "PT02_CD34_BM_3",
1303
+        "PT02_CD34_BM_1", "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1304
+        "PT02_CD34_BM_3", "PT02_CD34_BM_1", "PT02_CD34_BM_3",
1305
+        "PT02_CD34_BM_1", "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1306
+        "PT02_CD34_BM_3", "PT02_CD34_BM_1", "PT02_CD34_BM_3",
1307
+        "PT02_CD34_BM_1", "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1308
+        "PT02_CD34_BM_3", "PT02_CD34_BM_1", "PT02_CD34_BM_3",
1309
+        "PT02_CD34_BM_1", "PT02_CD34_BM_3", "PT02_CD34_BM_1",
1310
+        "PT02_Whole_BM_6", "PT02_Whole_BM_1", "PT02_Whole_BM_6",
1311
+        "PT02_Whole_BM_1", "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1312
+        "PT02_Whole_BM_6", "PT02_Whole_BM_1", "PT02_Whole_BM_6",
1313
+        "PT02_Whole_BM_1", "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1314
+        "PT02_Whole_BM_6", "PT02_Whole_BM_1", "PT02_Whole_BM_6",
1315
+        "PT02_Whole_BM_1", "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1316
+        "PT02_Whole_BM_6", "PT02_Whole_BM_1", "PT02_Whole_BM_6",
1317
+        "PT02_Whole_BM_1", "PT02_Whole_BM_6", "PT02_Whole_BM_1",
1318
+        "PT02_Whole_BM_6", "PT02_Whole_BM_1"
1319
+    ),
1320
+    g1_SubjectID = c(
1321
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01",
1322
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01",
1323
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01",
1324
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01",
1325
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01",
1326
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01",
1327
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01",
1328
+        "PT01", "PT01", "PT01", "PT02", "PT02", "PT02", "PT02",
1329
+        "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02",
1330
+        "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02",
1331
+        "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02",
1332
+        "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02",
1333
+        "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02",
1334
+        "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02",
1335
+        "PT02", "PT02", "PT02", "PT02", "PT02", "PT02"
1336
+    ),
1337
+    g1_CellMarker = c(
1338
+        "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34",
1339
+        "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34",
1340
+        "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34",
1341
+        "CD34", "CD34", "CD34", "CD34", "CD34", "Whole", "Whole",
1342
+        "Whole", "Whole", "Whole", "Whole", "Whole", "Whole",
1343
+        "Whole", "Whole", "Whole", "Whole", "Whole", "Whole",
1344
+        "Whole", "Whole", "Whole", "Whole", "Whole", "Whole",
1345
+        "Whole", "Whole", "Whole", "Whole", "Whole", "Whole",
1346
+        "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34",
1347
+        "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34",
1348
+        "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34",
1349
+        "CD34", "CD34", "CD34", "CD34", "CD34", "Whole", "Whole",
1350
+        "Whole", "Whole", "Whole", "Whole", "Whole", "Whole",
1351
+        "Whole", "Whole", "Whole", "Whole", "Whole", "Whole",
1352
+        "Whole", "Whole", "Whole", "Whole", "Whole", "Whole",
1353
+        "Whole", "Whole", "Whole", "Whole", "Whole", "Whole"
1354
+    ),
1355
+    g1_Tissue = c(
1356
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1357
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1358
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1359
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1360
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1361
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1362
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1363
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1364
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1365
+        "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM",
1366
+        "BM", "BM", "BM", "BM"
1367
+    ),
1368
+    g1_TimePoint = c(
1369
+        1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
1370
+        2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
1371
+        1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 1, 3, 1, 3,
1372
+        1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
1373
+        3, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6,
1374
+        1, 6, 1, 6, 1, 6, 1, 6, 1
1375
+    ),
1376
+    g2 = c(
1377
+        "PT02_CD14_PB_03", "PT02_CD14_PB_03", "PT02_CD13_PB_03",
1378
+        "PT02_CD13_PB_03", "PT01_CD15_PB_01", "PT01_CD15_PB_01",
1379
+        "PT01_CD14_PB_06", "PT01_CD14_PB_06", "PT01_CD14_PB_01",
1380
+        "PT01_CD14_PB_01", "PT02_CD13_PB_01", "PT02_CD13_PB_01",
1381
+        "PT01_CD14_PB_02", "PT01_CD14_PB_02", "PT01_CD13_PB_08",
1382
+        "PT01_CD13_PB_08", "PT01_CD14_PB_03", "PT01_CD14_PB_03",
1383
+        "PT02_CD15_PB_01", "PT02_CD15_PB_01", "PT02_CD15_PB_06",
1384
+        "PT02_CD15_PB_06", "PT01_CD13_PB_01", "PT01_CD13_PB_01",
1385
+        "PT02_CD14_PB_02", "PT02_CD14_PB_02", "PT02_CD14_PB_03",
1386
+        "PT02_CD14_PB_03", "PT02_CD13_PB_03", "PT02_CD13_PB_03",
1387
+        "PT01_CD15_PB_01", "PT01_CD15_PB_01", "PT01_CD14_PB_06",
1388
+        "PT01_CD14_PB_06", "PT01_CD14_PB_01", "PT01_CD14_PB_01",
1389
+        "PT02_CD13_PB_01", "PT02_CD13_PB_01", "PT01_CD14_PB_02",
1390
+        "PT01_CD14_PB_02", "PT01_CD13_PB_08", "PT01_CD13_PB_08",
1391
+        "PT01_CD14_PB_03", "PT01_CD14_PB_03", "PT02_CD15_PB_01",
1392
+        "PT02_CD15_PB_01", "PT02_CD15_PB_06", "PT02_CD15_PB_06",
1393
+        "PT01_CD13_PB_01", "PT01_CD13_PB_01", "PT02_CD14_PB_02",
1394
+        "PT02_CD14_PB_02", "PT02_CD14_PB_03", "PT02_CD14_PB_03",
1395
+        "PT02_CD13_PB_03", "PT02_CD13_PB_03", "PT01_CD15_PB_01",
1396
+        "PT01_CD15_PB_01", "PT01_CD14_PB_06", "PT01_CD14_PB_06",
1397
+        "PT01_CD14_PB_01", "PT01_CD14_PB_01", "PT02_CD13_PB_01",
1398
+        "PT02_CD13_PB_01", "PT01_CD14_PB_02", "PT01_CD14_PB_02",
1399
+        "PT01_CD13_PB_08", "PT01_CD13_PB_08", "PT01_CD14_PB_03",
1400
+        "PT01_CD14_PB_03", "PT02_CD15_PB_01", "PT02_CD15_PB_01",
1401
+        "PT02_CD15_PB_06", "PT02_CD15_PB_06", "PT01_CD13_PB_01",
1402
+        "PT01_CD13_PB_01", "PT02_CD14_PB_02", "PT02_CD14_PB_02",
1403
+        "PT02_CD14_PB_03", "PT02_CD14_PB_03", "PT02_CD13_PB_03",
1404
+        "PT02_CD13_PB_03", "PT01_CD15_PB_01", "PT01_CD15_PB_01",
1405
+        "PT01_CD14_PB_06", "PT01_CD14_PB_06", "PT01_CD14_PB_01",
1406
+        "PT01_CD14_PB_01", "PT02_CD13_PB_01", "PT02_CD13_PB_01",
1407
+        "PT01_CD14_PB_02", "PT01_CD14_PB_02", "PT01_CD13_PB_08",
1408
+        "PT01_CD13_PB_08", "PT01_CD14_PB_03", "PT01_CD14_PB_03",
1409
+        "PT02_CD15_PB_01", "PT02_CD15_PB_01", "PT02_CD15_PB_06",
1410
+        "PT02_CD15_PB_06", "PT01_CD13_PB_01", "PT01_CD13_PB_01",
1411
+        "PT02_CD14_PB_02", "PT02_CD14_PB_02"
1412
+    ),
1413
+    g2_SubjectID = c(
1414
+        "PT02", "PT02", "PT02", "PT02", "PT01", "PT01", "PT01",
1415
+        "PT01", "PT01", "PT01", "PT02", "PT02", "PT01", "PT01",
1416
+        "PT01", "PT01", "PT01", "PT01", "PT02", "PT02", "PT02",
1417
+        "PT02", "PT01", "PT01", "PT02", "PT02", "PT02", "PT02",
1418
+        "PT02", "PT02", "PT01", "PT01", "PT01", "PT01", "PT01",
1419
+        "PT01", "PT02", "PT02", "PT01", "PT01", "PT01", "PT01",
1420
+        "PT01", "PT01", "PT02", "PT02", "PT02", "PT02", "PT01",
1421
+        "PT01", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02",
1422
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT02",
1423
+        "PT02", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01",
1424
+        "PT02", "PT02", "PT02", "PT02", "PT01", "PT01", "PT02",
1425
+        "PT02", "PT02", "PT02", "PT02", "PT02", "PT01", "PT01",
1426
+        "PT01", "PT01", "PT01", "PT01", "PT02", "PT02", "PT01",
1427
+        "PT01", "PT01", "PT01", "PT01", "PT01", "PT02", "PT02",
1428
+        "PT02", "PT02", "PT01", "PT01", "PT02", "PT02"
1429
+    ),
1430
+    g2_CellMarker = c(
1431
+        "CD14", "CD14", "CD13", "CD13", "CD15", "CD15", "CD14",
1432
+        "CD14", "CD14", "CD14", "CD13", "CD13", "CD14", "CD14",
1433
+        "CD13", "CD13", "CD14", "CD14", "CD15", "CD15", "CD15",
1434
+        "CD15", "CD13", "CD13", "CD14", "CD14", "CD14", "CD14",
1435
+        "CD13", "CD13", "CD15", "CD15", "CD14", "CD14", "CD14",
1436
+        "CD14", "CD13", "CD13", "CD14", "CD14", "CD13", "CD13",
1437
+        "CD14", "CD14", "CD15", "CD15", "CD15", "CD15", "CD13",
1438
+        "CD13", "CD14", "CD14", "CD14", "CD14", "CD13", "CD13",
1439
+        "CD15", "CD15", "CD14", "CD14", "CD14", "CD14", "CD13",
1440
+        "CD13", "CD14", "CD14", "CD13", "CD13", "CD14", "CD14",
1441
+        "CD15", "CD15", "CD15", "CD15", "CD13", "CD13", "CD14",
1442
+        "CD14", "CD14", "CD14", "CD13", "CD13", "CD15", "CD15",
1443
+        "CD14", "CD14", "CD14", "CD14", "CD13", "CD13", "CD14",
1444
+        "CD14", "CD13", "CD13", "CD14", "CD14", "CD15", "CD15",
1445
+        "CD15", "CD15", "CD13", "CD13", "CD14", "CD14"
1446
+    ),
1447
+    g2_Tissue = c(
1448
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1449
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1450
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1451
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1452
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1453
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1454
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1455
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1456
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1457
+        "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB",
1458
+        "PB", "PB", "PB", "PB"
1459
+    ),
1460
+    g2_TimePoint = c(
1461
+        3, 3, 3, 3, 1, 1, 6, 6, 1, 1, 1, 1, 2, 2, 8, 8, 3, 3, 1,
1462
+        1, 6, 6, 1, 1, 2, 2, 3, 3, 3, 3, 1, 1, 6, 6, 1, 1, 1, 1,
1463
+        2, 2, 8, 8, 3, 3, 1, 1, 6, 6, 1, 1, 2, 2, 3, 3, 3, 3, 1,
1464
+        1, 6, 6, 1, 1, 1, 1, 2, 2, 8, 8, 3, 3, 1, 1, 6, 6, 1, 1,
1465
+        2, 2, 3, 3, 3, 3, 1, 1, 6, 6, 1, 1, 1, 1, 2, 2, 8, 8, 3,
1466
+        3, 1, 1, 6, 6, 1, 1, 2, 2
1467
+    ),
1468
+    chr = c(
1469
+        NA, NA, NA, NA, "1", NA, "1", NA, "2", NA, NA, NA, NA, "3", NA,
1470
+        "3", "1", NA, NA, NA, "1", NA, NA, NA, NA, "3", NA, NA, NA, NA,
1471
+        "1", NA, "1", NA, "2", NA, NA, NA, NA, "3", NA, "3", "1", NA, NA,
1472
+        NA, "1", NA, NA, NA, NA, "3", NA, NA, "1", NA, NA, NA, NA, NA, NA,
1473
+        NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1474
+        NA, NA, NA, NA, NA, "1", NA, "1", NA, NA, NA, NA, NA, NA, NA, NA,
1475
+        NA, "1", NA, NA, "1", "1", NA, NA, NA, NA, NA
1476
+    ),
1477
+    integration_locus = c(
1478
+        NA, NA, NA, NA, 12345, NA, 12345, NA, 54321, NA, NA,
1479
+        NA, NA, 62135, NA, 62135, 12345, NA, NA, NA, 12345,
1480
+        NA, NA, NA, NA, 62135, NA, NA, NA, NA, 12345, NA,
1481
+        12345, NA, 54321, NA, NA, NA, NA, 62135, NA, 62135,
1482
+        12345, NA, NA, NA, 12345, NA, NA, NA, NA, 62135, NA,
1483
+        NA, 43524, NA, NA, NA, NA, NA, NA, NA, NA, 76835, NA,
1484
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1485
+        NA, NA, NA, NA, 12345, NA, 12345, NA, NA, NA, NA, NA,
1486
+        NA, NA, NA, NA, 12345, NA, NA, 56832, 12345, NA, NA,
1487
+        NA, NA, NA
1488
+    ),
1489
+    strand = c(
1490
+        NA, NA, NA, NA, "+", NA, "+", NA, "-", NA, NA, NA, NA, "+", NA,
1491
+        "+", "+", NA, NA, NA, "+", NA, NA, NA, NA, "+", NA, NA, NA, NA,
1492
+        "+", NA, "+", NA, "-", NA, NA, NA, NA, "+", NA, "+", "+", NA,
1493
+        NA, NA, "+", NA, NA, NA, NA, "+", NA, NA, "+", NA, NA, NA, NA,
1494
+        NA, NA, NA, NA, "-", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1495
+        NA, NA, NA, NA, NA, NA, NA, "+", NA, "+", NA, NA, NA, NA, NA, NA,
1496
+        NA, NA, NA, "+", NA, NA, "-", "+", NA, NA, NA, NA, NA
1497
+    ),
1498
+    sharing_perc = c(
1499
+        0, 0, 0, 0, 50, 0, 50, 0, 100, 0, 0, 0, 0, 100, 0, 50,
1500
+        100, 0, 0, 0, 100, 0, 0, 0, 0, 100, 0, 0, 0, 0, 50, 0, 50,
1501
+        0, 100, 0, 0, 0, 0, 100, 0, 50, 100, 0, 0, 0, 100, 0, 0,
1502
+        0, 0, 100, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0,
1503
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 50,
1504
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 100, 100, 0, 0, 0,
1505
+        0, 0
1506
+    )
1507
+)
1508
+
1509
+test_that("iss_source produces expected output - per patient", {
1510
+    res <- iss_source(test_df2, test_df3) %>%
1511
+        purrr::map(~ dplyr::arrange(.x, .data$g1))
1512
+    expect_equal(res, expected_res1)
1513
+})
1514
+
1515
+test_that("iss_source produces expected output - NO per patient", {
1516
+    res <- iss_source(test_df2, test_df3, by_subject = FALSE)
1517
+    expect_equal(res, expected_res2)
1518
+})
... ...
@@ -4,325 +4,445 @@ func_name <- "purity_filter"
4 4
 # Global vars
5 5
 #------------------------------------------------------------------------------#
6 6
 
7
-df <- tibble::tibble(chr = c('1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
8
-                             '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
9
-                             '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
10
-                             '1', '1', '1', '1', '1', '1', '1', '1', '1', '1'),
11
-                     integration_locus = c(121249, 251227, 645551, 732938,
12
-                                           775536, 846681, 1029785, 1036835,
13
-                                           121249, 251227, 645551, 732938,
14
-                                           775536, 846681, 1029785, 1036835,
15
-                                           121249, 251227, 645551, 732938,
16
-                                           775536, 846681, 1029785, 1036835,
17
-                                           121249, 251227, 645551, 732938,
18
-                                           775536, 846681, 1029785, 1036835,
19
-                                           121249, 251227, 645551, 732938,
20
-                                           775536, 846681, 1029785, 1036835),
21
-                     strand = c('+', '+', '+', '+', '+', '+', '-', '+', '+',
22
-                                '+', '+', '+', '+', '+', '-', '+', '+', '+',
23
-                                '+', '+', '+', '+', '-', '+', '+', '+', '+',
24
-                                '+', '+', '+', '-', '+', '+', '+', '+', '+',
25
-                                '+', '+', '-', '+'),
26
-                     GeneName = c('LOC729737', 'LOC100132287', 'LOC100133331',
27
-                                  'LOC100288069', 'LINC01128', 'LOC100130417',
28
-                                  'C1orf159', 'C1orf159', 'LOC729737',
29
-                                  'LOC100132287', 'LOC100133331',
30
-                                  'LOC100288069', 'LINC01128', 'LOC100130417',
31
-                                  'C1orf159', 'C1orf159', 'LOC729737',
32
-                                  'LOC100132287', 'LOC100133331',
33
-                                  'LOC100288069', 'LINC01128', 'LOC100130417'