... | ... |
@@ -34,6 +34,7 @@ export(import_parallel_Vispa2Matrices_interactive) |
34 | 34 |
export(import_single_Vispa2Matrix) |
35 | 35 |
export(integration_alluvial_plot) |
36 | 36 |
export(is_sharing) |
37 |
+export(iss_source) |
|
37 | 38 |
export(known_clinical_oncogenes) |
38 | 39 |
export(mandatory_IS_vars) |
39 | 40 |
export(matching_options) |
... | ... |
@@ -2,6 +2,16 @@ |
2 | 2 |
title: "NEWS" |
3 | 3 |
output: github_document |
4 | 4 |
--- |
5 |
+# ISAnalytics 1.3.7 (2021-10-20) |
|
6 |
+ |
|
7 |
+## NEW |
|
8 |
+ |
|
9 |
+* Added new feature `iss_source()` |
|
10 |
+ |
|
11 |
+## FIXES |
|
12 |
+ |
|
13 |
+* Fixed minor issues in data files `refGenes_mm9` and function `compute_near_integrations()` |
|
14 |
+ |
|
5 | 15 |
# ISAnalytics 1.3.6 (2021-10-05) |
6 | 16 |
|
7 | 17 |
## NEW |
... | ... |
@@ -1,6 +1,17 @@ |
1 | 1 |
NEWS |
2 | 2 |
================ |
3 | 3 |
|
4 |
+# ISAnalytics 1.3.7 (2021-10-20) |
|
5 |
+ |
|
6 |
+## NEW |
|
7 |
+ |
|
8 |
+- Added new feature `iss_source()` |
|
9 |
+ |
|
10 |
+## FIXES |
|
11 |
+ |
|
12 |
+- Fixed minor issues in data files `refGenes_mm9` and function |
|
13 |
+ `compute_near_integrations()` |
|
14 |
+ |
|
4 | 15 |
# ISAnalytics 1.3.6 (2021-10-05) |
5 | 16 |
|
6 | 17 |
## NEW |
... | ... |
@@ -1545,21 +1545,25 @@ is_sharing <- function(..., |
1545 | 1545 |
#' association_file = rlang::current_env()$association_file, |
1546 | 1546 |
#' value_cols = c("seqCount", "fragmentEstimate") |
1547 | 1547 |
#' ) |
1548 |
-#' filtered_by_purity <- purity_filter(x = aggreg, |
|
1549 |
-#' value_column = "seqCount_sum") |
|
1548 |
+#' filtered_by_purity <- purity_filter( |
|
1549 |
+#' x = aggreg, |
|
1550 |
+#' value_column = "seqCount_sum" |
|
1551 |
+#' ) |
|
1550 | 1552 |
#' head(filtered_by_purity) |
1551 | 1553 |
purity_filter <- function(x, |
1552 |
- lineages = blood_lineages_default(), |
|
1553 |
- aggregation_key = c("SubjectID", "CellMarker", |
|
1554 |
- "Tissue", "TimePoint"), |
|
1555 |
- group_key = c("CellMarker", "Tissue"), |
|
1556 |
- selected_groups = NULL, |
|
1557 |
- join_on = "CellMarker", |
|
1558 |
- min_value = 3, |
|
1559 |
- impurity_threshold = 10, |
|
1560 |
- by_timepoint = TRUE, |
|
1561 |
- timepoint_column = "TimePoint", |
|
1562 |
- value_column = "seqCount_sum") { |
|
1554 |
+ lineages = blood_lineages_default(), |
|
1555 |
+ aggregation_key = c( |
|
1556 |
+ "SubjectID", "CellMarker", |
|
1557 |
+ "Tissue", "TimePoint" |
|
1558 |
+ ), |
|
1559 |
+ group_key = c("CellMarker", "Tissue"), |
|
1560 |
+ selected_groups = NULL, |
|
1561 |
+ join_on = "CellMarker", |
|
1562 |
+ min_value = 3, |
|
1563 |
+ impurity_threshold = 10, |
|
1564 |
+ by_timepoint = TRUE, |
|
1565 |
+ timepoint_column = "TimePoint", |
|
1566 |
+ value_column = "seqCount_sum") { |
|
1563 | 1567 |
## Checks |
1564 | 1568 |
#### - Base |
1565 | 1569 |
stopifnot(is.data.frame(x)) |
... | ... |
@@ -1570,12 +1574,12 @@ purity_filter <- function(x, |
1570 | 1574 |
stopifnot(is.numeric(impurity_threshold) || is.integer(impurity_threshold)) |
1571 | 1575 |
stopifnot(is.character(value_column)) |
1572 | 1576 |
stopifnot(is.null(selected_groups) || is.character(selected_groups) || |
1573 |
- is.data.frame(selected_groups)) |
|
1577 |
+ is.data.frame(selected_groups)) |
|
1574 | 1578 |
#### - Keys |
1575 | 1579 |
if (!all(aggregation_key %in% colnames(x))) { |
1576 | 1580 |
rlang::abort(.missing_user_cols_error( |
1577 | 1581 |
aggregation_key[!aggregation_key %in% colnames(x)] |
1578 |
- )) |
|
1582 |
+ )) |
|
1579 | 1583 |
} |
1580 | 1584 |
if (!value_column[1] %in% colnames(x)) { |
1581 | 1585 |
rlang::abort(.missing_user_cols_error(value_column)) |
... | ... |
@@ -1588,23 +1592,28 @@ purity_filter <- function(x, |
1588 | 1592 |
### If lineages info is needed |
1589 | 1593 |
stopifnot(is.data.frame(lineages)) |
1590 | 1594 |
if (!all(group_key %in% unique(c(colnames(x), colnames(lineages))))) { |
1591 |
- missing_cols <- group_key[!group_key %in% unique(c(colnames(x), |
|
1592 |
- colnames(lineages)))] |
|
1595 |
+ missing_cols <- group_key[!group_key %in% unique(c( |
|
1596 |
+ colnames(x), |
|
1597 |
+ colnames(lineages) |
|
1598 |
+ ))] |
|
1593 | 1599 |
rlang::abort(.missing_user_cols_error(missing_cols)) |
1594 | 1600 |
} |
1595 | 1601 |
if (!(all(join_on %in% colnames(x)) & |
1596 |
- all(join_on %in% colnames(lineages))) |
|
1597 |
- ) { |
|
1602 |
+ all(join_on %in% colnames(lineages))) |
|
1603 |
+ ) { |
|
1598 | 1604 |
missing_common <- c("Missing common column(s) to join on", |
1599 |
- i = paste("The column(s) provided in argument", |
|
1600 |
- "`join_on` is missing from one or", |
|
1601 |
- "both data frames, aborting")) |
|
1602 |
- rlang::abort(missing_common) |
|
1605 |
+ i = paste( |
|
1606 |
+ "The column(s) provided in argument", |
|
1607 |
+ "`join_on` is missing from one or", |
|
1608 |
+ "both data frames, aborting" |
|
1609 |
+ ) |
|
1610 |
+ ) |
|
1611 |
+ rlang::abort(missing_common) |
|
1603 | 1612 |
} |
1604 | 1613 |
TRUE |
1605 | 1614 |
} |
1606 | 1615 |
if (by_timepoint) { |
1607 |
- stopifnot(is.character(timepoint_column)) |
|
1616 |
+ stopifnot(is.character(timepoint_column)) |
|
1608 | 1617 |
timepoint_column <- timepoint_column[1] |
1609 | 1618 |
if (!timepoint_column %in% colnames(x)) { |
1610 | 1619 |
rlang::abort(.missing_user_cols_error(timepoint_column)) |
... | ... |
@@ -1625,13 +1634,17 @@ purity_filter <- function(x, |
1625 | 1634 |
} |
1626 | 1635 |
grouped <- x %>% |
1627 | 1636 |
dplyr::group_by(dplyr::across(dplyr::all_of(c(is_vars, group_key)))) %>% |
1628 |
- dplyr::summarise(Value = sum(.data[[value_column]]), |
|
1629 |
- .groups = "drop") |
|
1637 |
+ dplyr::summarise( |
|
1638 |
+ Value = sum(.data[[value_column]]), |
|
1639 |
+ .groups = "drop" |
|
1640 |
+ ) |
|
1630 | 1641 |
#### - value filter |
1631 |
- filtered_value <- threshold_filter(x = grouped, |
|
1632 |
- threshold = min_value, |
|
1633 |
- cols_to_compare = "Value", |
|
1634 |
- comparators = ">=") |
|
1642 |
+ filtered_value <- threshold_filter( |
|
1643 |
+ x = grouped, |
|
1644 |
+ threshold = min_value, |
|
1645 |
+ cols_to_compare = "Value", |
|
1646 |
+ comparators = ">=" |
|
1647 |
+ ) |
|
1635 | 1648 |
#### - Separating IS 1: group filtering |
1636 | 1649 |
pre_filt <- list() |
1637 | 1650 |
if (is.null(selected_groups) || purrr::is_empty(selected_groups)) { |
... | ... |
@@ -1644,7 +1657,7 @@ purity_filter <- function(x, |
1644 | 1657 |
dplyr::filter(!.data[[group_key[1]]] %in% selected_groups) |
1645 | 1658 |
} else { |
1646 | 1659 |
ok_cols <- colnames(selected_groups)[colnames(selected_groups) %in% |
1647 |
- group_key] |
|
1660 |
+ group_key] |
|
1648 | 1661 |
selected_groups <- selected_groups %>% |
1649 | 1662 |
dplyr::select(dplyr::all_of(ok_cols)) %>% |
1650 | 1663 |
dplyr::distinct() |
... | ... |
@@ -1654,11 +1667,13 @@ purity_filter <- function(x, |
1654 | 1667 |
pre_filt[["keep"]] <- filtered_value[0, ] |
1655 | 1668 |
} else { |
1656 | 1669 |
pre_filt[["process"]] <- dplyr::inner_join(filtered_value, |
1657 |
- selected_groups, |
|
1658 |
- by = ok_cols) |
|
1670 |
+ selected_groups, |
|
1671 |
+ by = ok_cols |
|
1672 |
+ ) |
|
1659 | 1673 |
pre_filt[["keep"]] <- dplyr::anti_join(filtered_value, |
1660 |
- selected_groups, |
|
1661 |
- by = ok_cols) |
|
1674 |
+ selected_groups, |
|
1675 |
+ by = ok_cols |
|
1676 |
+ ) |
|
1662 | 1677 |
} |
1663 | 1678 |
} |
1664 | 1679 |
if (nrow(pre_filt$process) == 0) { |
... | ... |
@@ -1695,7 +1710,7 @@ purity_filter <- function(x, |
1695 | 1710 |
max_val <- max(group$Value) |
1696 | 1711 |
processed <- group %>% |
1697 | 1712 |
dplyr::mutate(remove = (max_val / .data$Value) > |
1698 |
- impurity_threshold) %>% |
|
1713 |
+ impurity_threshold) %>% |
|
1699 | 1714 |
dplyr::filter(remove == FALSE) %>% |
1700 | 1715 |
dplyr::select(-.data$remove) |
1701 | 1716 |
processed |
... | ... |
@@ -1711,6 +1726,163 @@ purity_filter <- function(x, |
1711 | 1726 |
final |
1712 | 1727 |
} |
1713 | 1728 |
|
1729 |
+#' Find the source of IS by evaluating sharing. |
|
1730 |
+#' |
|
1731 |
+#' @description \lifecycle{experimental} |
|
1732 |
+#' The function computes the sharing between a reference group of interest |
|
1733 |
+#' for each time point and a selection of groups of interest. In this way |
|
1734 |
+#' it is possible to observe the percentage of shared integration sites between |
|
1735 |
+#' reference and each group and identify in which time point a certain IS was |
|
1736 |
+#' observed for the first time. |
|
1737 |
+#' |
|
1738 |
+#' @param reference A data frame containing one or more groups of reference. |
|
1739 |
+#' Groups are identified by `ref_group_key` |
|
1740 |
+#' @param selection A data frame containing one or more groups of interest |
|
1741 |
+#' to compare. |
|
1742 |
+#' Groups are identified by `selection_group_key` |
|
1743 |
+#' @param ref_group_key Character vector of column names that identify a |
|
1744 |
+#' unique group in the `reference` data frame |
|
1745 |
+#' @param selection_group_key Character vector of column names that identify a |
|
1746 |
+#' unique group in the `selection` data frame |
|
1747 |
+#' @param timepoint_column Name of the column holding time point |
|
1748 |
+#' info? |
|
1749 |
+#' @param by_subject Should calculations be performed for each subject |
|
1750 |
+#' separately? |
|
1751 |
+#' @param subject_column Name of the column holding subjects information. |
|
1752 |
+#' Relevant only if `by_subject = TRUE` |
|
1753 |
+#' |
|
1754 |
+#' @return A list of data frames or a data frame |
|
1755 |
+#' @family Analysis functions |
|
1756 |
+#' @export |
|
1757 |
+#' |
|
1758 |
+#' @examples |
|
1759 |
+#' data("integration_matrices", package = "ISAnalytics") |
|
1760 |
+#' data("association_file", package = "ISAnalytics") |
|
1761 |
+#' aggreg <- aggregate_values_by_key( |
|
1762 |
+#' x = rlang::current_env()$integration_matrices, |
|
1763 |
+#' association_file = rlang::current_env()$association_file, |
|
1764 |
+#' value_cols = c("seqCount", "fragmentEstimate") |
|
1765 |
+#' ) |
|
1766 |
+#' df1 <- aggreg %>% |
|
1767 |
+#' dplyr::filter(.data$Tissue == "BM") |
|
1768 |
+#' df2 <- aggreg %>% |
|
1769 |
+#' dplyr::filter(.data$Tissue == "PB") |
|
1770 |
+#' source <- iss_source(df1, df2) |
|
1771 |
+#' source |
|
1772 |
+#' ggplot2::ggplot(source$PT001, ggplot2::aes( |
|
1773 |
+#' x = as.factor(g2_TimePoint), |
|
1774 |
+#' y = sharing_perc, fill = g1 |
|
1775 |
+#' )) + |
|
1776 |
+#' ggplot2::geom_col() + |
|
1777 |
+#' ggplot2::labs( |
|
1778 |
+#' x = "Time point", y = "Shared IS % with MNC BM", |
|
1779 |
+#' title = "Source of is MNC BM vs MNC PB" |
|
1780 |
+#' ) |
|
1781 |
+iss_source <- function(reference, |
|
1782 |
+ selection, |
|
1783 |
+ ref_group_key = c( |
|
1784 |
+ "SubjectID", "CellMarker", |
|
1785 |
+ "Tissue", "TimePoint" |
|
1786 |
+ ), |
|
1787 |
+ selection_group_key = c( |
|
1788 |
+ "SubjectID", "CellMarker", |
|
1789 |
+ "Tissue", "TimePoint" |
|
1790 |
+ ), |
|
1791 |
+ timepoint_column = "TimePoint", |
|
1792 |
+ by_subject = TRUE, |
|
1793 |
+ subject_column = "SubjectID") { |
|
1794 |
+ ## Checks |
|
1795 |
+ stopifnot(is.data.frame(reference) & is.data.frame(selection)) |
|
1796 |
+ stopifnot(is.character(ref_group_key) & is.character(selection_group_key)) |
|
1797 |
+ stopifnot(is.character(timepoint_column)) |
|
1798 |
+ stopifnot(is.logical(by_subject)) |
|
1799 |
+ by_subject <- by_subject[1] |
|
1800 |
+ if (!all(ref_group_key %in% colnames(reference))) { |
|
1801 |
+ rlang::abort(.missing_user_cols_error( |
|
1802 |
+ ref_group_key[!ref_group_key %in% colnames(reference)] |
|
1803 |
+ )) |
|
1804 |
+ } |
|
1805 |
+ if (!all(selection_group_key %in% colnames(selection))) { |
|
1806 |
+ rlang::abort(.missing_user_cols_error( |
|
1807 |
+ selection_group_key[!selection_group_key %in% colnames(selection)] |
|
1808 |
+ )) |
|
1809 |
+ } |
|
1810 |
+ timepoint_column <- timepoint_column[1] |
|
1811 |
+ if (!timepoint_column %in% colnames(reference) | |
|
1812 |
+ !timepoint_column %in% colnames(selection)) { |
|
1813 |
+ rlang::abort(.missing_needed_cols(timepoint_column)) |
|
1814 |
+ } |
|
1815 |
+ ## Workflow choice |
|
1816 |
+ if (by_subject) { |
|
1817 |
+ stopifnot(is.character(subject_column)) |
|
1818 |
+ subject_column <- subject_column[1] |
|
1819 |
+ ref_split <- reference %>% |
|
1820 |
+ dplyr::group_by(dplyr::across({{ subject_column }})) |
|
1821 |
+ ref_subjs <- ref_split %>% |
|
1822 |
+ dplyr::group_keys() %>% |
|
1823 |
+ dplyr::pull(.data[[subject_column]]) |
|
1824 |
+ ref_split <- ref_split %>% |
|
1825 |
+ dplyr::group_split() %>% |
|
1826 |
+ purrr::set_names(ref_subjs) |
|
1827 |
+ sel_split <- selection %>% |
|
1828 |
+ dplyr::group_by(dplyr::across({{ subject_column }})) |
|
1829 |
+ sel_subjs <- sel_split %>% |
|
1830 |
+ dplyr::group_keys() %>% |
|
1831 |
+ dplyr::pull(.data[[subject_column]]) |
|
1832 |
+ sel_split <- sel_split %>% |
|
1833 |
+ dplyr::group_split() %>% |
|
1834 |
+ purrr::set_names(sel_subjs) |
|
1835 |
+ shared <- .sharing_for_source(ref_split, |
|
1836 |
+ sel_split, |
|
1837 |
+ ref_key = ref_group_key, |
|
1838 |
+ sel_key = selection_group_key, |
|
1839 |
+ tp_col = timepoint_column, |
|
1840 |
+ subj_col = subject_column |
|
1841 |
+ ) |
|
1842 |
+ shared <- purrr::map( |
|
1843 |
+ shared, |
|
1844 |
+ ~ .x %>% |
|
1845 |
+ dplyr::select( |
|
1846 |
+ -.data$count_g1, -.data$count_g2, |
|
1847 |
+ -.data$count_union |
|
1848 |
+ ) %>% |
|
1849 |
+ tidyr::unnest(.data$is_coord, keep_empty = TRUE) %>% |
|
1850 |
+ dplyr::mutate(sharing_perc = dplyr::if_else( |
|
1851 |
+ shared == 0, 0, .data$on_g2 / .data$shared |
|
1852 |
+ )) %>% |
|
1853 |
+ dplyr::select( |
|
1854 |
+ -.data$shared, -.data$on_g1, -.data$on_g2, |
|
1855 |
+ -.data$on_union |
|
1856 |
+ ) |
|
1857 |
+ ) |
|
1858 |
+ } else { |
|
1859 |
+ shared <- .sharing_for_source(reference, |
|
1860 |
+ selection, |
|
1861 |
+ ref_key = ref_group_key, |
|
1862 |
+ sel_key = selection_group_key, |
|
1863 |
+ tp_col = timepoint_column, |
|
1864 |
+ subj_col = subject_column |
|
1865 |
+ ) |
|
1866 |
+ shared <- shared %>% |
|
1867 |
+ dplyr::select( |
|
1868 |
+ -.data$count_g1, -.data$count_g2, |
|
1869 |
+ -.data$count_union |
|
1870 |
+ ) %>% |
|
1871 |
+ tidyr::unnest(.data$is_coord, keep_empty = TRUE) %>% |
|
1872 |
+ dplyr::mutate(sharing_perc = dplyr::if_else(shared == 0, |
|
1873 |
+ 0, |
|
1874 |
+ .data$on_g2 / |
|
1875 |
+ .data$shared |
|
1876 |
+ )) %>% |
|
1877 |
+ dplyr::select( |
|
1878 |
+ -.data$shared, -.data$on_g1, -.data$on_g2, |
|
1879 |
+ -.data$on_union |
|
1880 |
+ ) |
|
1881 |
+ } |
|
1882 |
+ |
|
1883 |
+ return(shared) |
|
1884 |
+} |
|
1885 |
+ |
|
1714 | 1886 |
#' A set of pre-defined functions for `sample_statistics`. |
1715 | 1887 |
#' |
1716 | 1888 |
#' @return A named list of functions/purrr-style lambdas |
... | ... |
@@ -4735,3 +4735,170 @@ |
4735 | 4735 |
} |
4736 | 4736 |
sharing_df |
4737 | 4737 |
} |
4738 |
+ |
|
4739 |
+#---- USED IN : iss_source ---- |
|
4740 |
+.assign_iss_by_tp <- function(df, timepoint_column) { |
|
4741 |
+ is_vars <- if (.is_annotated(df)) { |
|
4742 |
+ c(mandatory_IS_vars(), annotation_IS_vars()) |
|
4743 |
+ } else { |
|
4744 |
+ mandatory_IS_vars() |
|
4745 |
+ } |
|
4746 |
+ return(df %>% |
|
4747 |
+ dplyr::mutate(!!timepoint_column := as.numeric( |
|
4748 |
+ .data[[timepoint_column]] |
|
4749 |
+ )) %>% |
|
4750 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(is_vars))) %>% |
|
4751 |
+ dplyr::group_modify(~ { |
|
4752 |
+ if (nrow(.x) == 1) { |
|
4753 |
+ return(.x) |
|
4754 |
+ } |
|
4755 |
+ min_tp <- min(.x[[timepoint_column]]) |
|
4756 |
+ return(.x %>% |
|
4757 |
+ dplyr::filter(.data[[timepoint_column]] == min_tp)) |
|
4758 |
+ }) %>% |
|
4759 |
+ dplyr::ungroup()) |
|
4760 |
+} |
|
4761 |
+ |
|
4762 |
+.sharing_for_source <- function(ref, |
|
4763 |
+ sel, |
|
4764 |
+ ref_key, |
|
4765 |
+ sel_key, |
|
4766 |
+ tp_col, |
|
4767 |
+ subj_col) { |
|
4768 |
+ if (!is.data.frame(ref)) { |
|
4769 |
+ ### Workflow by subject |
|
4770 |
+ common_names <- if (!all(names(ref) == names(sel))) { |
|
4771 |
+ intersect(names(ref), names(sel)) |
|
4772 |
+ } else { |
|
4773 |
+ names(ref) |
|
4774 |
+ } |
|
4775 |
+ if (length(common_names) == 0) { |
|
4776 |
+ no_common_err <- c("No common subjects", |
|
4777 |
+ x = paste( |
|
4778 |
+ "No common subjects between", |
|
4779 |
+ "reference and selection" |
|
4780 |
+ ), |
|
4781 |
+ i = paste( |
|
4782 |
+ "In reference:", |
|
4783 |
+ paste0(names(ref), collapse = ", "), |
|
4784 |
+ "\n", |
|
4785 |
+ "In selection: ", |
|
4786 |
+ paste0(names(sel), collapse = ", ") |
|
4787 |
+ ) |
|
4788 |
+ ) |
|
4789 |
+ rlang::abort(no_common_err) |
|
4790 |
+ } |
|
4791 |
+ if (getOption("ISAnalytics.verbose") == TRUE && |
|
4792 |
+ (length(common_names) < length(names(ref)) || |
|
4793 |
+ length(common_names) < length(names(sel)))) { |
|
4794 |
+ all_names <- union(names(ref), names(sel)) |
|
4795 |
+ common_warn_msg <- c("Mismatch in subjects found", |
|
4796 |
+ i = paste( |
|
4797 |
+ "Some subjects were excluded from", |
|
4798 |
+ "computations because they were", |
|
4799 |
+ "absent from reference or from", |
|
4800 |
+ "selection" |
|
4801 |
+ ), |
|
4802 |
+ paste( |
|
4803 |
+ "Excluded: ", |
|
4804 |
+ paste0(all_names[!all_names %in% |
|
4805 |
+ common_names], |
|
4806 |
+ collapse = ", " |
|
4807 |
+ ) |
|
4808 |
+ ) |
|
4809 |
+ ) |
|
4810 |
+ } |
|
4811 |
+ if (.Platform$OS.type == "windows") { |
|
4812 |
+ p <- BiocParallel::SnowParam( |
|
4813 |
+ tasks = length(common_names), |
|
4814 |
+ progressbar = getOption("ISAnalytics.verbose"), |
|
4815 |
+ exportglobals = FALSE |
|
4816 |
+ ) |
|
4817 |
+ } else { |
|
4818 |
+ p <- BiocParallel::MulticoreParam( |
|
4819 |
+ tasks = length(common_names), |
|
4820 |
+ progressbar = getOption("ISAnalytics.verbose"), |
|
4821 |
+ exportglobals = FALSE |
|
4822 |
+ ) |
|
4823 |
+ } |
|
4824 |
+ FUN <- function(subj, |
|
4825 |
+ ref, sel, ref_key, sel_key, |
|
4826 |
+ tp_col) { |
|
4827 |
+ ref_df <- ref[[subj]] |
|
4828 |
+ sel_df <- sel[[subj]] |
|
4829 |
+ ref_key_min <- ref_key[ref_key != tp_col] |
|
4830 |
+ ref_split <- ref_df %>% |
|
4831 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(ref_key_min))) %>% |
|
4832 |
+ dplyr::group_split() |
|
4833 |
+ quiet_sharing <- purrr::quietly(is_sharing) |
|
4834 |
+ sharing <- purrr::map_df(ref_split, ~ { |
|
4835 |
+ assigned_ref <- .assign_iss_by_tp(.x, |
|
4836 |
+ timepoint_column = tp_col |
|
4837 |
+ ) |
|
4838 |
+ sh <- (quiet_sharing(assigned_ref, |
|
4839 |
+ sel_df, |
|
4840 |
+ group_keys = list( |
|
4841 |
+ g1 = ref_key, |
|
4842 |
+ g2 = sel_key |
|
4843 |
+ ), |
|
4844 |
+ keep_genomic_coord = TRUE |
|
4845 |
+ ))$result |
|
4846 |
+ sh <- sh %>% |
|
4847 |
+ tidyr::separate( |
|
4848 |
+ col = "g1", |
|
4849 |
+ into = paste0("g1_", ref_key), |
|
4850 |
+ remove = FALSE, |
|
4851 |
+ convert = TRUE |
|
4852 |
+ ) %>% |
|
4853 |
+ tidyr::separate( |
|
4854 |
+ col = "g2", |
|
4855 |
+ into = paste0("g2_", sel_key), |
|
4856 |
+ remove = FALSE, |
|
4857 |
+ convert = TRUE |
|
4858 |
+ ) |
|
4859 |
+ }) |
|
4860 |
+ } |
|
4861 |
+ |
|
4862 |
+ shared <- BiocParallel::bplapply(common_names, FUN, |
|
4863 |
+ ref = ref, |
|
4864 |
+ sel = sel, |
|
4865 |
+ ref_key = ref_key, |
|
4866 |
+ sel_key = sel_key, |
|
4867 |
+ tp_col = tp_col, |
|
4868 |
+ BPPARAM = p |
|
4869 |
+ ) %>% |
|
4870 |
+ purrr::set_names(common_names) |
|
4871 |
+ return(shared) |
|
4872 |
+ } |
|
4873 |
+ ref_key_min <- ref_key[ref_key != tp_col] |
|
4874 |
+ ref_split <- ref %>% |
|
4875 |
+ dplyr::group_by(dplyr::across(dplyr::all_of(ref_key_min))) %>% |
|
4876 |
+ dplyr::group_split() |
|
4877 |
+ quiet_sharing <- purrr::quietly(is_sharing) |
|
4878 |
+ sharing <- purrr::map_df(ref_split, ~ { |
|
4879 |
+ assigned_ref <- .assign_iss_by_tp(.x, |
|
4880 |
+ timepoint_column = tp_col |
|
4881 |
+ ) |
|
4882 |
+ (quiet_sharing(assigned_ref, |
|
4883 |
+ sel, |
|
4884 |
+ group_keys = list( |
|
4885 |
+ g1 = ref_key, |
|
4886 |
+ g2 = sel_key |
|
4887 |
+ ), |
|
4888 |
+ keep_genomic_coord = TRUE |
|
4889 |
+ ))$result |
|
4890 |
+ }) |
|
4891 |
+ sharing %>% |
|
4892 |
+ tidyr::separate( |
|
4893 |
+ col = "g1", |
|
4894 |
+ into = paste0("g1_", ref_key), |
|
4895 |
+ remove = FALSE, |
|
4896 |
+ convert = TRUE |
|
4897 |
+ ) %>% |
|
4898 |
+ tidyr::separate( |
|
4899 |
+ col = "g2", |
|
4900 |
+ into = paste0("g2_", sel_key), |
|
4901 |
+ remove = FALSE, |
|
4902 |
+ convert = TRUE |
|
4903 |
+ ) |
|
4904 |
+} |
... | ... |
@@ -137,6 +137,17 @@ options("ISAnalytics.reports" = TRUE) |
137 | 137 |
Show more |
138 | 138 |
</summary> |
139 | 139 |
|
140 |
+# ISAnalytics 1.3.7 (2021-10-20) |
|
141 |
+ |
|
142 |
+## NEW |
|
143 |
+ |
|
144 |
+- Added new feature `iss_source()` |
|
145 |
+ |
|
146 |
+## FIXES |
|
147 |
+ |
|
148 |
+- Fixed minor issues in data files `refGenes_mm9` and function |
|
149 |
+ `compute_near_integrations()` |
|
150 |
+ |
|
140 | 151 |
# ISAnalytics 1.3.6 (2021-10-05) |
141 | 152 |
|
142 | 153 |
## NEW |
... | ... |
@@ -80,6 +80,7 @@ Other Analysis functions: |
80 | 80 |
\code{\link{cumulative_count_union}()}, |
81 | 81 |
\code{\link{cumulative_is}()}, |
82 | 82 |
\code{\link{is_sharing}()}, |
83 |
+\code{\link{iss_source}()}, |
|
83 | 84 |
\code{\link{purity_filter}()}, |
84 | 85 |
\code{\link{sample_statistics}()}, |
85 | 86 |
\code{\link{separate_quant_matrices}()}, |
... | ... |
@@ -70,6 +70,7 @@ Other Analysis functions: |
70 | 70 |
\code{\link{cumulative_count_union}()}, |
71 | 71 |
\code{\link{cumulative_is}()}, |
72 | 72 |
\code{\link{is_sharing}()}, |
73 |
+\code{\link{iss_source}()}, |
|
73 | 74 |
\code{\link{purity_filter}()}, |
74 | 75 |
\code{\link{sample_statistics}()}, |
75 | 76 |
\code{\link{separate_quant_matrices}()}, |
... | ... |
@@ -62,6 +62,7 @@ Other Analysis functions: |
62 | 62 |
\code{\link{cumulative_count_union}()}, |
63 | 63 |
\code{\link{cumulative_is}()}, |
64 | 64 |
\code{\link{is_sharing}()}, |
65 |
+\code{\link{iss_source}()}, |
|
65 | 66 |
\code{\link{purity_filter}()}, |
66 | 67 |
\code{\link{sample_statistics}()}, |
67 | 68 |
\code{\link{separate_quant_matrices}()}, |
... | ... |
@@ -91,6 +91,7 @@ Other Analysis functions: |
91 | 91 |
\code{\link{compute_abundance}()}, |
92 | 92 |
\code{\link{cumulative_is}()}, |
93 | 93 |
\code{\link{is_sharing}()}, |
94 |
+\code{\link{iss_source}()}, |
|
94 | 95 |
\code{\link{purity_filter}()}, |
95 | 96 |
\code{\link{sample_statistics}()}, |
96 | 97 |
\code{\link{separate_quant_matrices}()}, |
... | ... |
@@ -57,6 +57,7 @@ Other Analysis functions: |
57 | 57 |
\code{\link{compute_abundance}()}, |
58 | 58 |
\code{\link{cumulative_count_union}()}, |
59 | 59 |
\code{\link{is_sharing}()}, |
60 |
+\code{\link{iss_source}()}, |
|
60 | 61 |
\code{\link{purity_filter}()}, |
61 | 62 |
\code{\link{sample_statistics}()}, |
62 | 63 |
\code{\link{separate_quant_matrices}()}, |
... | ... |
@@ -95,6 +95,7 @@ Other Analysis functions: |
95 | 95 |
\code{\link{compute_abundance}()}, |
96 | 96 |
\code{\link{cumulative_count_union}()}, |
97 | 97 |
\code{\link{cumulative_is}()}, |
98 |
+\code{\link{iss_source}()}, |
|
98 | 99 |
\code{\link{purity_filter}()}, |
99 | 100 |
\code{\link{sample_statistics}()}, |
100 | 101 |
\code{\link{separate_quant_matrices}()}, |
101 | 102 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,85 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/analysis-functions.R |
|
3 |
+\name{iss_source} |
|
4 |
+\alias{iss_source} |
|
5 |
+\title{Find the source of IS by evaluating sharing.} |
|
6 |
+\usage{ |
|
7 |
+iss_source( |
|
8 |
+ reference, |
|
9 |
+ selection, |
|
10 |
+ ref_group_key = c("SubjectID", "CellMarker", "Tissue", "TimePoint"), |
|
11 |
+ selection_group_key = c("SubjectID", "CellMarker", "Tissue", "TimePoint"), |
|
12 |
+ timepoint_column = "TimePoint", |
|
13 |
+ by_subject = TRUE, |
|
14 |
+ subject_column = "SubjectID" |
|
15 |
+) |
|
16 |
+} |
|
17 |
+\arguments{ |
|
18 |
+\item{reference}{A data frame containing one or more groups of reference. |
|
19 |
+Groups are identified by \code{ref_group_key}} |
|
20 |
+ |
|
21 |
+\item{selection}{A data frame containing one or more groups of interest |
|
22 |
+to compare. |
|
23 |
+Groups are identified by \code{selection_group_key}} |
|
24 |
+ |
|
25 |
+\item{ref_group_key}{Character vector of column names that identify a |
|
26 |
+unique group in the \code{reference} data frame} |
|
27 |
+ |
|
28 |
+\item{selection_group_key}{Character vector of column names that identify a |
|
29 |
+unique group in the \code{selection} data frame} |
|
30 |
+ |
|
31 |
+\item{timepoint_column}{Name of the column holding time point |
|
32 |
+info?} |
|
33 |
+ |
|
34 |
+\item{by_subject}{Should calculations be performed for each subject |
|
35 |
+separately?} |
|
36 |
+ |
|
37 |
+\item{subject_column}{Name of the column holding subjects information. |
|
38 |
+Relevant only if \code{by_subject = TRUE}} |
|
39 |
+} |
|
40 |
+\value{ |
|
41 |
+A list of data frames or a data frame |
|
42 |
+} |
|
43 |
+\description{ |
|
44 |
+\lifecycle{experimental} |
|
45 |
+The function computes the sharing between a reference group of interest |
|
46 |
+for each time point and a selection of groups of interest. In this way |
|
47 |
+it is possible to observe the percentage of shared integration sites between |
|
48 |
+reference and each group and identify in which time point a certain IS was |
|
49 |
+observed for the first time. |
|
50 |
+} |
|
51 |
+\examples{ |
|
52 |
+data("integration_matrices", package = "ISAnalytics") |
|
53 |
+data("association_file", package = "ISAnalytics") |
|
54 |
+aggreg <- aggregate_values_by_key( |
|
55 |
+ x = rlang::current_env()$integration_matrices, |
|
56 |
+ association_file = rlang::current_env()$association_file, |
|
57 |
+ value_cols = c("seqCount", "fragmentEstimate") |
|
58 |
+) |
|
59 |
+df1 <- aggreg \%>\% |
|
60 |
+ dplyr::filter(.data$Tissue == "BM") |
|
61 |
+df2 <- aggreg \%>\% |
|
62 |
+ dplyr::filter(.data$Tissue == "PB") |
|
63 |
+source <- iss_source(df1, df2) |
|
64 |
+source |
|
65 |
+ggplot2::ggplot(source$PT001, ggplot2::aes(x = as.factor(g2_TimePoint), |
|
66 |
+ y = sharing_perc, fill = g1)) + |
|
67 |
+ ggplot2::geom_col() + |
|
68 |
+ ggplot2::labs(x = "Time point", y = "Shared IS \% with MNC BM", |
|
69 |
+ title = "Source of is MNC BM vs MNC PB") |
|
70 |
+} |
|
71 |
+\seealso{ |
|
72 |
+Other Analysis functions: |
|
73 |
+\code{\link{CIS_grubbs}()}, |
|
74 |
+\code{\link{comparison_matrix}()}, |
|
75 |
+\code{\link{compute_abundance}()}, |
|
76 |
+\code{\link{cumulative_count_union}()}, |
|
77 |
+\code{\link{cumulative_is}()}, |
|
78 |
+\code{\link{is_sharing}()}, |
|
79 |
+\code{\link{purity_filter}()}, |
|
80 |
+\code{\link{sample_statistics}()}, |
|
81 |
+\code{\link{separate_quant_matrices}()}, |
|
82 |
+\code{\link{threshold_filter}()}, |
|
83 |
+\code{\link{top_integrations}()} |
|
84 |
+} |
|
85 |
+\concept{Analysis functions} |
... | ... |
@@ -108,8 +108,10 @@ aggreg <- aggregate_values_by_key( |
108 | 108 |
association_file = rlang::current_env()$association_file, |
109 | 109 |
value_cols = c("seqCount", "fragmentEstimate") |
110 | 110 |
) |
111 |
-filtered_by_purity <- purity_filter(x = aggreg, |
|
112 |
-value_column = "seqCount_sum") |
|
111 |
+filtered_by_purity <- purity_filter( |
|
112 |
+ x = aggreg, |
|
113 |
+ value_column = "seqCount_sum" |
|
114 |
+) |
|
113 | 115 |
head(filtered_by_purity) |
114 | 116 |
} |
115 | 117 |
\seealso{ |
... | ... |
@@ -120,6 +122,7 @@ Other Analysis functions: |
120 | 122 |
\code{\link{cumulative_count_union}()}, |
121 | 123 |
\code{\link{cumulative_is}()}, |
122 | 124 |
\code{\link{is_sharing}()}, |
125 |
+\code{\link{iss_source}()}, |
|
123 | 126 |
\code{\link{sample_statistics}()}, |
124 | 127 |
\code{\link{separate_quant_matrices}()}, |
125 | 128 |
\code{\link{threshold_filter}()}, |
... | ... |
@@ -78,6 +78,7 @@ Other Analysis functions: |
78 | 78 |
\code{\link{cumulative_count_union}()}, |
79 | 79 |
\code{\link{cumulative_is}()}, |
80 | 80 |
\code{\link{is_sharing}()}, |
81 |
+\code{\link{iss_source}()}, |
|
81 | 82 |
\code{\link{purity_filter}()}, |
82 | 83 |
\code{\link{separate_quant_matrices}()}, |
83 | 84 |
\code{\link{threshold_filter}()}, |
... | ... |
@@ -62,6 +62,7 @@ Other Analysis functions: |
62 | 62 |
\code{\link{cumulative_count_union}()}, |
63 | 63 |
\code{\link{cumulative_is}()}, |
64 | 64 |
\code{\link{is_sharing}()}, |
65 |
+\code{\link{iss_source}()}, |
|
65 | 66 |
\code{\link{purity_filter}()}, |
66 | 67 |
\code{\link{sample_statistics}()}, |
67 | 68 |
\code{\link{threshold_filter}()}, |
... | ... |
@@ -211,6 +211,7 @@ Other Analysis functions: |
211 | 211 |
\code{\link{cumulative_count_union}()}, |
212 | 212 |
\code{\link{cumulative_is}()}, |
213 | 213 |
\code{\link{is_sharing}()}, |
214 |
+\code{\link{iss_source}()}, |
|
214 | 215 |
\code{\link{purity_filter}()}, |
215 | 216 |
\code{\link{sample_statistics}()}, |
216 | 217 |
\code{\link{separate_quant_matrices}()}, |
... | ... |
@@ -78,6 +78,7 @@ Other Analysis functions: |
78 | 78 |
\code{\link{cumulative_count_union}()}, |
79 | 79 |
\code{\link{cumulative_is}()}, |
80 | 80 |
\code{\link{is_sharing}()}, |
81 |
+\code{\link{iss_source}()}, |
|
81 | 82 |
\code{\link{purity_filter}()}, |
82 | 83 |
\code{\link{sample_statistics}()}, |
83 | 84 |
\code{\link{separate_quant_matrices}()}, |
... | ... |
@@ -947,3 +947,572 @@ test_that("cumulative_is produces correct output", { |
947 | 947 |
expect_true(all(mandatory_IS_vars() %in% colnames(c_is))) |
948 | 948 |
expect_true(nrow(c_is) == sum(counts)) |
949 | 949 |
}) |
950 |
+ |
|
951 |
+#------------------------------------------------------------------------------# |
|
952 |
+# Test .assign_iss_by_tp |
|
953 |
+#------------------------------------------------------------------------------# |
|
954 |
+test_that(".assign_iss_by_tp assigns iss correctly", { |
|
955 |
+ test_df <- tibble::tribble( |
|
956 |
+ ~chr, ~integration_locus, ~strand, ~CellMarker, ~Tissue, ~TimePoint, |
|
957 |
+ "1", 12345, "+", "CD34", "BM", "03", |
|
958 |
+ "1", 12345, "+", "CD34", "BM", "01", |
|
959 |
+ "1", 12345, "+", "CD34", "BM", "06", |
|
960 |
+ "2", 54321, "-", "CD34", "BM", "01", |
|
961 |
+ "3", 62135, "+", "CD34", "BM", "02", |
|
962 |
+ "3", 62135, "+", "CD34", "BM", "08" |
|
963 |
+ ) |
|
964 |
+ |
|
965 |
+ expected_assign <- tibble::tribble( |
|
966 |
+ ~chr, ~integration_locus, ~strand, ~CellMarker, ~Tissue, ~TimePoint, |
|
967 |
+ "1", 12345, "+", "CD34", "BM", 1, |
|
968 |
+ "2", 54321, "-", "CD34", "BM", 1, |
|
969 |
+ "3", 62135, "+", "CD34", "BM", 2 |
|
970 |
+ ) |
|
971 |
+ re_assigned <- .assign_iss_by_tp( |
|
972 |
+ df = test_df, |
|
973 |
+ timepoint_column = "TimePoint" |
|
974 |
+ ) |
|
975 |
+ expect_equal(re_assigned, expected_assign) |
|
976 |
+}) |
|
977 |
+ |
|
978 |
+#------------------------------------------------------------------------------# |
|
979 |
+# Test iss_source |
|
980 |
+#------------------------------------------------------------------------------# |
|
981 |
+test_df2 <- tibble::tibble( |
|
982 |
+ chr = c( |
|
983 |
+ "1", "1", "1", "1", "2", "2", "3", "3", |
|
984 |
+ "1", "1", "1", "1", "1", "2", "3", "3" |
|
985 |
+ ), |
|
986 |
+ integration_locus = c( |
|
987 |
+ 12345, 43524, 12345, 12345, 54321, |
|
988 |
+ 76835, 62135, 62135, 12345, 12345, |
|
989 |
+ 56832, 12345, 12345, 54321, 62135, |
|
990 |
+ 62135 |
|
991 |
+ ), |
|
992 |
+ strand = c( |
|
993 |
+ "+", "+", "+", "+", "-", "-", "+", "+", "+", |
|
994 |
+ "+", "-", "+", "+", "-", "+", "+" |
|
995 |
+ ), |
|
996 |
+ SubjectID = c( |
|
997 |
+ "PT01", "PT02", "PT01", "PT01", "PT01", |
|
998 |
+ "PT02", "PT01", "PT01", "PT01", "PT01", |
|
999 |
+ "PT02", "PT01", "PT02", "PT01", "PT01", |
|
1000 |
+ "PT01" |
|
1001 |
+ ), |
|
1002 |
+ CellMarker = c( |
|
1003 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", |
|
1004 |
+ "CD34", "CD34", "CD34", "Whole", "Whole", |
|
1005 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", |
|
1006 |
+ "Whole" |
|
1007 |
+ ), |
|
1008 |
+ Tissue = c( |
|
1009 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1010 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM" |
|
1011 |
+ ), |
|
1012 |
+ TimePoint = c( |
|
1013 |
+ "03", "03", "01", "06", "01", "01", "02", |
|
1014 |
+ "08", "03", "01", "01", "06", "06", "01", |
|
1015 |
+ "02", "08" |
|
1016 |
+ ) |
|
1017 |
+) |
|
1018 |
+ |
|
1019 |
+test_df3 <- tibble::tibble( |
|
1020 |
+ chr = c( |
|
1021 |
+ "1", "1", "1", "1", "2", "2", "3", "3", "1", |
|
1022 |
+ "1", "1", "1", "1", "2", "3", "3" |
|
1023 |
+ ), |
|
1024 |
+ integration_locus = c( |
|
1025 |
+ 56252, 43524, 12345, 86435, 54321, |
|
1026 |
+ 76835, 62135, 432245, 12345, 534587, |
|
1027 |
+ 56832, 12345, 12345, 578635, 62135, |
|
1028 |
+ 62135 |
|
1029 |
+ ), |
|
1030 |
+ strand = c( |
|
1031 |
+ "+", "+", "+", "+", "-", "-", "+", "+", "+", |
|
1032 |
+ "+", "-", "+", "+", "-", "+", "+" |
|
1033 |
+ ), |
|
1034 |
+ SubjectID = c( |
|
1035 |
+ "PT02", "PT02", "PT01", "PT01", "PT01", |
|
1036 |
+ "PT02", "PT01", "PT01", "PT01", "PT01", |
|
1037 |
+ "PT02", "PT01", "PT02", "PT01", "PT02", |
|
1038 |
+ "PT01" |
|
1039 |
+ ), |
|
1040 |
+ CellMarker = c( |
|
1041 |
+ "CD14", "CD13", "CD15", "CD14", "CD14", |
|
1042 |
+ "CD13", "CD14", "CD13", "CD14", "CD15", |
|
1043 |
+ "CD15", "CD14", "CD15", "CD13", "CD14", |
|
1044 |
+ "CD13" |
|
1045 |
+ ), |
|
1046 |
+ Tissue = c( |
|
1047 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1048 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB" |
|
1049 |
+ ), |
|
1050 |
+ TimePoint = c( |
|
1051 |
+ "03", "03", "01", "06", "01", "01", "02", |
|
1052 |
+ "08", "03", "01", "01", "06", "06", "01", |
|
1053 |
+ "02", "08" |
|
1054 |
+ ) |
|
1055 |
+) |
|
1056 |
+expected_res1 <- list( |
|
1057 |
+ PT01 = tibble::tibble( |
|
1058 |
+ g1 = c( |
|
1059 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1060 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1061 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1062 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1063 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1064 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1065 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1066 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1067 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1068 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1069 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1070 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1071 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1072 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2" |
|
1073 |
+ ), |
|
1074 |
+ g1_SubjectID = c( |
|
1075 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1076 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1077 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1078 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1079 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1080 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1081 |
+ "PT01", "PT01", "PT01", "PT01" |
|
1082 |
+ ), |
|
1083 |
+ g1_CellMarker = c( |
|
1084 |
+ "CD34", "CD34", "CD34", "CD34", |
|
1085 |
+ "CD34", "CD34", "CD34", "CD34", |
|
1086 |
+ "CD34", "CD34", "CD34", "CD34", |
|
1087 |
+ "CD34", "CD34", "Whole", "Whole", |
|
1088 |
+ "Whole", "Whole", "Whole", "Whole", |
|
1089 |
+ "Whole", "Whole", "Whole", "Whole", |
|
1090 |
+ "Whole", "Whole", "Whole", "Whole" |
|
1091 |
+ ), |
|
1092 |
+ g1_Tissue = c( |
|
1093 |
+ "BM", "BM", "BM", "BM", "BM", "BM", |
|
1094 |
+ "BM", "BM", "BM", "BM", "BM", "BM", |
|
1095 |
+ "BM", "BM", "BM", "BM", "BM", "BM", |
|
1096 |
+ "BM", "BM", "BM", "BM", "BM", "BM", |
|
1097 |
+ "BM", "BM", "BM", "BM" |
|
1098 |
+ ), |
|
1099 |
+ g1_TimePoint = c( |
|
1100 |
+ 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, |
|
1101 |
+ 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, |
|
1102 |
+ 1, 2, 1, 2 |
|
1103 |
+ ), |
|
1104 |
+ g2 = c( |
|
1105 |
+ "PT01_CD15_PB_01", "PT01_CD15_PB_01", |
|
1106 |
+ "PT01_CD14_PB_06", "PT01_CD14_PB_06", |
|
1107 |
+ "PT01_CD14_PB_01", "PT01_CD14_PB_01", |
|
1108 |
+ "PT01_CD14_PB_02", "PT01_CD14_PB_02", |
|
1109 |
+ "PT01_CD13_PB_08", "PT01_CD13_PB_08", |
|
1110 |
+ "PT01_CD14_PB_03", "PT01_CD14_PB_03", |
|
1111 |
+ "PT01_CD13_PB_01", "PT01_CD13_PB_01", |
|
1112 |
+ "PT01_CD15_PB_01", "PT01_CD15_PB_01", |
|
1113 |
+ "PT01_CD14_PB_06", "PT01_CD14_PB_06", |
|
1114 |
+ "PT01_CD14_PB_01", "PT01_CD14_PB_01", |
|
1115 |
+ "PT01_CD14_PB_02", "PT01_CD14_PB_02", |
|
1116 |
+ "PT01_CD13_PB_08", "PT01_CD13_PB_08", |
|
1117 |
+ "PT01_CD14_PB_03", "PT01_CD14_PB_03", |
|
1118 |
+ "PT01_CD13_PB_01", "PT01_CD13_PB_01" |
|
1119 |
+ ), |
|
1120 |
+ g2_SubjectID = c( |
|
1121 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1122 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1123 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1124 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1125 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1126 |
+ "PT01", "PT01", "PT01", "PT01", |
|
1127 |
+ "PT01", "PT01", "PT01", "PT01" |
|
1128 |
+ ), |
|
1129 |
+ g2_CellMarker = c( |
|
1130 |
+ "CD15", "CD15", "CD14", "CD14", |
|
1131 |
+ "CD14", "CD14", "CD14", "CD14", |
|
1132 |
+ "CD13", "CD13", "CD14", "CD14", |
|
1133 |
+ "CD13", "CD13", "CD15", "CD15", |
|
1134 |
+ "CD14", "CD14", "CD14", "CD14", |
|
1135 |
+ "CD14", "CD14", "CD13", "CD13", |
|
1136 |
+ "CD14", "CD14", "CD13", "CD13" |
|
1137 |
+ ), |
|
1138 |
+ g2_Tissue = c( |
|
1139 |
+ "PB", "PB", "PB", "PB", "PB", "PB", |
|
1140 |
+ "PB", "PB", "PB", "PB", "PB", "PB", |
|
1141 |
+ "PB", "PB", "PB", "PB", "PB", "PB", |
|
1142 |
+ "PB", "PB", "PB", "PB", "PB", "PB", |
|
1143 |
+ "PB", "PB", "PB", "PB" |
|
1144 |
+ ), |
|
1145 |
+ g2_TimePoint = c( |
|
1146 |
+ 1, 1, 6, 6, 1, 1, 2, 2, 8, 8, 3, 3, |
|
1147 |
+ 1, 1, 1, 1, 6, 6, 1, 1, 2, 2, 8, 8, |
|
1148 |
+ 3, 3, 1, 1 |
|
1149 |
+ ), |
|
1150 |
+ chr = c( |
|
1151 |
+ "1", NA, "1", NA, "2", NA, NA, "3", |
|
1152 |
+ NA, "3", "1", NA, NA, NA, "1", NA, |
|
1153 |
+ "1", NA, "2", NA, NA, "3", NA, "3", |
|
1154 |
+ "1", NA, NA, NA |
|
1155 |
+ ), |
|
1156 |
+ integration_locus = c( |
|
1157 |
+ 12345, NA, 12345, NA, 54321, |
|
1158 |
+ NA, NA, 62135, NA, 62135, 12345, |
|
1159 |
+ NA, NA, NA, 12345, NA, 12345, |
|
1160 |
+ NA, 54321, NA, NA, 62135, NA, |
|
1161 |
+ 62135, 12345, NA, NA, NA |
|
1162 |
+ ), |
|
1163 |
+ strand = c( |
|
1164 |
+ "+", NA, "+", NA, "-", NA, NA, |
|
1165 |
+ "+", NA, "+", "+", NA, NA, NA, |
|
1166 |
+ "+", NA, "+", NA, "-", NA, NA, |
|
1167 |
+ "+", NA, "+", "+", NA, NA, NA |
|
1168 |
+ ), |
|
1169 |
+ sharing_perc = c( |
|
1170 |
+ 50, 0, 50, 0, 100, 0, 0, 100, 0, 50, |
|
1171 |
+ 100, 0, 0, 0, 50, 0, 50, 0, 100, 0, |
|
1172 |
+ 0, 100, 0, 50, 100, 0, 0, 0 |
|
1173 |
+ ) |
|
1174 |
+ ) %>% |
|
1175 |
+ dplyr::arrange(.data$g1), |
|
1176 |
+ PT02 = tibble::tibble( |
|
1177 |
+ g1 = c( |
|
1178 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1179 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1180 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1181 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1182 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1183 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1184 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1185 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1186 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1187 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1188 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1189 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1" |
|
1190 |
+ ), |
|
1191 |
+ g1_SubjectID = c( |
|
1192 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1193 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1194 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1195 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1196 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1197 |
+ "PT02", "PT02", "PT02", "PT02" |
|
1198 |
+ ), |
|
1199 |
+ g1_CellMarker = c( |
|
1200 |
+ "CD34", "CD34", "CD34", "CD34", |
|
1201 |
+ "CD34", "CD34", "CD34", "CD34", |
|
1202 |
+ "CD34", "CD34", "CD34", "CD34", |
|
1203 |
+ "Whole", "Whole", "Whole", "Whole", |
|
1204 |
+ "Whole", "Whole", "Whole", "Whole", |
|
1205 |
+ "Whole", "Whole", "Whole", "Whole" |
|
1206 |
+ ), |
|
1207 |
+ g1_Tissue = c( |
|
1208 |
+ "BM", "BM", "BM", "BM", "BM", "BM", |
|
1209 |
+ "BM", "BM", "BM", "BM", "BM", "BM", |
|
1210 |
+ "BM", "BM", "BM", "BM", "BM", "BM", |
|
1211 |
+ "BM", "BM", "BM", "BM", "BM", "BM" |
|
1212 |
+ ), |
|
1213 |
+ g1_TimePoint = c( |
|
1214 |
+ 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, |
|
1215 |
+ 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1 |
|
1216 |
+ ), |
|
1217 |
+ g2 = c( |
|
1218 |
+ "PT02_CD14_PB_03", "PT02_CD14_PB_03", |
|
1219 |
+ "PT02_CD13_PB_03", "PT02_CD13_PB_03", |
|
1220 |
+ "PT02_CD13_PB_01", "PT02_CD13_PB_01", |
|
1221 |
+ "PT02_CD15_PB_01", "PT02_CD15_PB_01", |
|
1222 |
+ "PT02_CD15_PB_06", "PT02_CD15_PB_06", |
|
1223 |
+ "PT02_CD14_PB_02", "PT02_CD14_PB_02", |
|
1224 |
+ "PT02_CD14_PB_03", "PT02_CD14_PB_03", |
|
1225 |
+ "PT02_CD13_PB_03", "PT02_CD13_PB_03", |
|
1226 |
+ "PT02_CD13_PB_01", "PT02_CD13_PB_01", |
|
1227 |
+ "PT02_CD15_PB_01", "PT02_CD15_PB_01", |
|
1228 |
+ "PT02_CD15_PB_06", "PT02_CD15_PB_06", |
|
1229 |
+ "PT02_CD14_PB_02", "PT02_CD14_PB_02" |
|
1230 |
+ ), |
|
1231 |
+ g2_SubjectID = c( |
|
1232 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1233 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1234 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1235 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1236 |
+ "PT02", "PT02", "PT02", "PT02", |
|
1237 |
+ "PT02", "PT02", "PT02", "PT02" |
|
1238 |
+ ), |
|
1239 |
+ g2_CellMarker = c( |
|
1240 |
+ "CD14", "CD14", "CD13", "CD13", |
|
1241 |
+ "CD13", "CD13", "CD15", "CD15", |
|
1242 |
+ "CD15", "CD15", "CD14", "CD14", |
|
1243 |
+ "CD14", "CD14", "CD13", "CD13", |
|
1244 |
+ "CD13", "CD13", "CD15", "CD15", |
|
1245 |
+ "CD15", "CD15", "CD14", "CD14" |
|
1246 |
+ ), |
|
1247 |
+ g2_Tissue = c( |
|
1248 |
+ "PB", "PB", "PB", "PB", "PB", "PB", |
|
1249 |
+ "PB", "PB", "PB", "PB", "PB", "PB", |
|
1250 |
+ "PB", "PB", "PB", "PB", "PB", "PB", |
|
1251 |
+ "PB", "PB", "PB", "PB", "PB", "PB" |
|
1252 |
+ ), |
|
1253 |
+ g2_TimePoint = c( |
|
1254 |
+ 3, 3, 3, 3, 1, 1, 1, 1, 6, 6, 2, 2, |
|
1255 |
+ 3, 3, 3, 3, 1, 1, 1, 1, 6, 6, 2, 2 |
|
1256 |
+ ), |
|
1257 |
+ chr = c( |
|
1258 |
+ NA, NA, "1", NA, NA, "2", NA, NA, |
|
1259 |
+ NA, NA, NA, NA, NA, NA, NA, |
|
1260 |
+ NA, NA, NA, NA, 1, 1, NA, NA, |
|
1261 |
+ NA |
|
1262 |
+ ), |
|
1263 |
+ integration_locus = c( |
|
1264 |
+ NA, NA, 43524, NA, NA, 76835, |
|
1265 |
+ NA, NA, NA, NA, NA, NA, NA, NA, |
|
1266 |
+ NA, NA, NA, NA, NA, 56832, |
|
1267 |
+ 12345, NA, NA, NA |
|
1268 |
+ ), |
|
1269 |
+ strand = c( |
|
1270 |
+ NA, NA, "+", NA, NA, "-", NA, |
|
1271 |
+ NA, NA, NA, NA, NA, NA, |
|
1272 |
+ NA, NA, NA, NA, NA, NA, |
|
1273 |
+ "-", "+", NA, NA, NA |
|
1274 |
+ ), |
|
1275 |
+ sharing_perc = c( |
|
1276 |
+ 0, 0, 100, 0, 0, 100, 0, 0, 0, 0, |
|
1277 |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, |
|
1278 |
+ 100, 0, 0, 0 |
|
1279 |
+ ) |
|
1280 |
+ ) %>% |
|
1281 |
+ dplyr::arrange(.data$g1) |
|
1282 |
+) |
|
1283 |
+ |
|
1284 |
+expected_res2 <- tibble::tibble( |
|
1285 |
+ g1 = c( |
|
1286 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1287 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1288 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1289 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1290 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1291 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_CD34_BM_1", "PT01_CD34_BM_2", |
|
1292 |
+ "PT01_CD34_BM_1", "PT01_CD34_BM_2", "PT01_Whole_BM_1", |
|
1293 |
+ "PT01_Whole_BM_2", "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1294 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", "PT01_Whole_BM_1", |
|
1295 |
+ "PT01_Whole_BM_2", "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1296 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", "PT01_Whole_BM_1", |
|
1297 |
+ "PT01_Whole_BM_2", "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1298 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", "PT01_Whole_BM_1", |
|
1299 |
+ "PT01_Whole_BM_2", "PT01_Whole_BM_1", "PT01_Whole_BM_2", |
|
1300 |
+ "PT01_Whole_BM_1", "PT01_Whole_BM_2", "PT01_Whole_BM_1", |
|
1301 |
+ "PT01_Whole_BM_2", "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1302 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", "PT02_CD34_BM_3", |
|
1303 |
+ "PT02_CD34_BM_1", "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1304 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", "PT02_CD34_BM_3", |
|
1305 |
+ "PT02_CD34_BM_1", "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1306 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", "PT02_CD34_BM_3", |
|
1307 |
+ "PT02_CD34_BM_1", "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1308 |
+ "PT02_CD34_BM_3", "PT02_CD34_BM_1", "PT02_CD34_BM_3", |
|
1309 |
+ "PT02_CD34_BM_1", "PT02_CD34_BM_3", "PT02_CD34_BM_1", |
|
1310 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", "PT02_Whole_BM_6", |
|
1311 |
+ "PT02_Whole_BM_1", "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1312 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", "PT02_Whole_BM_6", |
|
1313 |
+ "PT02_Whole_BM_1", "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1314 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", "PT02_Whole_BM_6", |
|
1315 |
+ "PT02_Whole_BM_1", "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1316 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1", "PT02_Whole_BM_6", |
|
1317 |
+ "PT02_Whole_BM_1", "PT02_Whole_BM_6", "PT02_Whole_BM_1", |
|
1318 |
+ "PT02_Whole_BM_6", "PT02_Whole_BM_1" |
|
1319 |
+ ), |
|
1320 |
+ g1_SubjectID = c( |
|
1321 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1322 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1323 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1324 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1325 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1326 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1327 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1328 |
+ "PT01", "PT01", "PT01", "PT02", "PT02", "PT02", "PT02", |
|
1329 |
+ "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", |
|
1330 |
+ "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", |
|
1331 |
+ "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", |
|
1332 |
+ "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", |
|
1333 |
+ "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", |
|
1334 |
+ "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", |
|
1335 |
+ "PT02", "PT02", "PT02", "PT02", "PT02", "PT02" |
|
1336 |
+ ), |
|
1337 |
+ g1_CellMarker = c( |
|
1338 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", |
|
1339 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", |
|
1340 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", |
|
1341 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", "Whole", "Whole", |
|
1342 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", "Whole", |
|
1343 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", "Whole", |
|
1344 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", "Whole", |
|
1345 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", "Whole", |
|
1346 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", |
|
1347 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", |
|
1348 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", "CD34", |
|
1349 |
+ "CD34", "CD34", "CD34", "CD34", "CD34", "Whole", "Whole", |
|
1350 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", "Whole", |
|
1351 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", "Whole", |
|
1352 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", "Whole", |
|
1353 |
+ "Whole", "Whole", "Whole", "Whole", "Whole", "Whole" |
|
1354 |
+ ), |
|
1355 |
+ g1_Tissue = c( |
|
1356 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1357 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1358 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1359 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1360 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1361 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1362 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1363 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1364 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1365 |
+ "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", "BM", |
|
1366 |
+ "BM", "BM", "BM", "BM" |
|
1367 |
+ ), |
|
1368 |
+ g1_TimePoint = c( |
|
1369 |
+ 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, |
|
1370 |
+ 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, |
|
1371 |
+ 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 1, 3, 1, 3, |
|
1372 |
+ 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, |
|
1373 |
+ 3, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6, |
|
1374 |
+ 1, 6, 1, 6, 1, 6, 1, 6, 1 |
|
1375 |
+ ), |
|
1376 |
+ g2 = c( |
|
1377 |
+ "PT02_CD14_PB_03", "PT02_CD14_PB_03", "PT02_CD13_PB_03", |
|
1378 |
+ "PT02_CD13_PB_03", "PT01_CD15_PB_01", "PT01_CD15_PB_01", |
|
1379 |
+ "PT01_CD14_PB_06", "PT01_CD14_PB_06", "PT01_CD14_PB_01", |
|
1380 |
+ "PT01_CD14_PB_01", "PT02_CD13_PB_01", "PT02_CD13_PB_01", |
|
1381 |
+ "PT01_CD14_PB_02", "PT01_CD14_PB_02", "PT01_CD13_PB_08", |
|
1382 |
+ "PT01_CD13_PB_08", "PT01_CD14_PB_03", "PT01_CD14_PB_03", |
|
1383 |
+ "PT02_CD15_PB_01", "PT02_CD15_PB_01", "PT02_CD15_PB_06", |
|
1384 |
+ "PT02_CD15_PB_06", "PT01_CD13_PB_01", "PT01_CD13_PB_01", |
|
1385 |
+ "PT02_CD14_PB_02", "PT02_CD14_PB_02", "PT02_CD14_PB_03", |
|
1386 |
+ "PT02_CD14_PB_03", "PT02_CD13_PB_03", "PT02_CD13_PB_03", |
|
1387 |
+ "PT01_CD15_PB_01", "PT01_CD15_PB_01", "PT01_CD14_PB_06", |
|
1388 |
+ "PT01_CD14_PB_06", "PT01_CD14_PB_01", "PT01_CD14_PB_01", |
|
1389 |
+ "PT02_CD13_PB_01", "PT02_CD13_PB_01", "PT01_CD14_PB_02", |
|
1390 |
+ "PT01_CD14_PB_02", "PT01_CD13_PB_08", "PT01_CD13_PB_08", |
|
1391 |
+ "PT01_CD14_PB_03", "PT01_CD14_PB_03", "PT02_CD15_PB_01", |
|
1392 |
+ "PT02_CD15_PB_01", "PT02_CD15_PB_06", "PT02_CD15_PB_06", |
|
1393 |
+ "PT01_CD13_PB_01", "PT01_CD13_PB_01", "PT02_CD14_PB_02", |
|
1394 |
+ "PT02_CD14_PB_02", "PT02_CD14_PB_03", "PT02_CD14_PB_03", |
|
1395 |
+ "PT02_CD13_PB_03", "PT02_CD13_PB_03", "PT01_CD15_PB_01", |
|
1396 |
+ "PT01_CD15_PB_01", "PT01_CD14_PB_06", "PT01_CD14_PB_06", |
|
1397 |
+ "PT01_CD14_PB_01", "PT01_CD14_PB_01", "PT02_CD13_PB_01", |
|
1398 |
+ "PT02_CD13_PB_01", "PT01_CD14_PB_02", "PT01_CD14_PB_02", |
|
1399 |
+ "PT01_CD13_PB_08", "PT01_CD13_PB_08", "PT01_CD14_PB_03", |
|
1400 |
+ "PT01_CD14_PB_03", "PT02_CD15_PB_01", "PT02_CD15_PB_01", |
|
1401 |
+ "PT02_CD15_PB_06", "PT02_CD15_PB_06", "PT01_CD13_PB_01", |
|
1402 |
+ "PT01_CD13_PB_01", "PT02_CD14_PB_02", "PT02_CD14_PB_02", |
|
1403 |
+ "PT02_CD14_PB_03", "PT02_CD14_PB_03", "PT02_CD13_PB_03", |
|
1404 |
+ "PT02_CD13_PB_03", "PT01_CD15_PB_01", "PT01_CD15_PB_01", |
|
1405 |
+ "PT01_CD14_PB_06", "PT01_CD14_PB_06", "PT01_CD14_PB_01", |
|
1406 |
+ "PT01_CD14_PB_01", "PT02_CD13_PB_01", "PT02_CD13_PB_01", |
|
1407 |
+ "PT01_CD14_PB_02", "PT01_CD14_PB_02", "PT01_CD13_PB_08", |
|
1408 |
+ "PT01_CD13_PB_08", "PT01_CD14_PB_03", "PT01_CD14_PB_03", |
|
1409 |
+ "PT02_CD15_PB_01", "PT02_CD15_PB_01", "PT02_CD15_PB_06", |
|
1410 |
+ "PT02_CD15_PB_06", "PT01_CD13_PB_01", "PT01_CD13_PB_01", |
|
1411 |
+ "PT02_CD14_PB_02", "PT02_CD14_PB_02" |
|
1412 |
+ ), |
|
1413 |
+ g2_SubjectID = c( |
|
1414 |
+ "PT02", "PT02", "PT02", "PT02", "PT01", "PT01", "PT01", |
|
1415 |
+ "PT01", "PT01", "PT01", "PT02", "PT02", "PT01", "PT01", |
|
1416 |
+ "PT01", "PT01", "PT01", "PT01", "PT02", "PT02", "PT02", |
|
1417 |
+ "PT02", "PT01", "PT01", "PT02", "PT02", "PT02", "PT02", |
|
1418 |
+ "PT02", "PT02", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1419 |
+ "PT01", "PT02", "PT02", "PT01", "PT01", "PT01", "PT01", |
|
1420 |
+ "PT01", "PT01", "PT02", "PT02", "PT02", "PT02", "PT01", |
|
1421 |
+ "PT01", "PT02", "PT02", "PT02", "PT02", "PT02", "PT02", |
|
1422 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", "PT02", |
|
1423 |
+ "PT02", "PT01", "PT01", "PT01", "PT01", "PT01", "PT01", |
|
1424 |
+ "PT02", "PT02", "PT02", "PT02", "PT01", "PT01", "PT02", |
|
1425 |
+ "PT02", "PT02", "PT02", "PT02", "PT02", "PT01", "PT01", |
|
1426 |
+ "PT01", "PT01", "PT01", "PT01", "PT02", "PT02", "PT01", |
|
1427 |
+ "PT01", "PT01", "PT01", "PT01", "PT01", "PT02", "PT02", |
|
1428 |
+ "PT02", "PT02", "PT01", "PT01", "PT02", "PT02" |
|
1429 |
+ ), |
|
1430 |
+ g2_CellMarker = c( |
|
1431 |
+ "CD14", "CD14", "CD13", "CD13", "CD15", "CD15", "CD14", |
|
1432 |
+ "CD14", "CD14", "CD14", "CD13", "CD13", "CD14", "CD14", |
|
1433 |
+ "CD13", "CD13", "CD14", "CD14", "CD15", "CD15", "CD15", |
|
1434 |
+ "CD15", "CD13", "CD13", "CD14", "CD14", "CD14", "CD14", |
|
1435 |
+ "CD13", "CD13", "CD15", "CD15", "CD14", "CD14", "CD14", |
|
1436 |
+ "CD14", "CD13", "CD13", "CD14", "CD14", "CD13", "CD13", |
|
1437 |
+ "CD14", "CD14", "CD15", "CD15", "CD15", "CD15", "CD13", |
|
1438 |
+ "CD13", "CD14", "CD14", "CD14", "CD14", "CD13", "CD13", |
|
1439 |
+ "CD15", "CD15", "CD14", "CD14", "CD14", "CD14", "CD13", |
|
1440 |
+ "CD13", "CD14", "CD14", "CD13", "CD13", "CD14", "CD14", |
|
1441 |
+ "CD15", "CD15", "CD15", "CD15", "CD13", "CD13", "CD14", |
|
1442 |
+ "CD14", "CD14", "CD14", "CD13", "CD13", "CD15", "CD15", |
|
1443 |
+ "CD14", "CD14", "CD14", "CD14", "CD13", "CD13", "CD14", |
|
1444 |
+ "CD14", "CD13", "CD13", "CD14", "CD14", "CD15", "CD15", |
|
1445 |
+ "CD15", "CD15", "CD13", "CD13", "CD14", "CD14" |
|
1446 |
+ ), |
|
1447 |
+ g2_Tissue = c( |
|
1448 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1449 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1450 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1451 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1452 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1453 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1454 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1455 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1456 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1457 |
+ "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", "PB", |
|
1458 |
+ "PB", "PB", "PB", "PB" |
|
1459 |
+ ), |
|
1460 |
+ g2_TimePoint = c( |
|
1461 |
+ 3, 3, 3, 3, 1, 1, 6, 6, 1, 1, 1, 1, 2, 2, 8, 8, 3, 3, 1, |
|
1462 |
+ 1, 6, 6, 1, 1, 2, 2, 3, 3, 3, 3, 1, 1, 6, 6, 1, 1, 1, 1, |
|
1463 |
+ 2, 2, 8, 8, 3, 3, 1, 1, 6, 6, 1, 1, 2, 2, 3, 3, 3, 3, 1, |
|
1464 |
+ 1, 6, 6, 1, 1, 1, 1, 2, 2, 8, 8, 3, 3, 1, 1, 6, 6, 1, 1, |
|
1465 |
+ 2, 2, 3, 3, 3, 3, 1, 1, 6, 6, 1, 1, 1, 1, 2, 2, 8, 8, 3, |
|
1466 |
+ 3, 1, 1, 6, 6, 1, 1, 2, 2 |
|
1467 |
+ ), |
|
1468 |
+ chr = c( |
|
1469 |
+ NA, NA, NA, NA, "1", NA, "1", NA, "2", NA, NA, NA, NA, "3", NA, |
|
1470 |
+ "3", "1", NA, NA, NA, "1", NA, NA, NA, NA, "3", NA, NA, NA, NA, |
|
1471 |
+ "1", NA, "1", NA, "2", NA, NA, NA, NA, "3", NA, "3", "1", NA, NA, |
|
1472 |
+ NA, "1", NA, NA, NA, NA, "3", NA, NA, "1", NA, NA, NA, NA, NA, NA, |
|
1473 |
+ NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, |
|
1474 |
+ NA, NA, NA, NA, NA, "1", NA, "1", NA, NA, NA, NA, NA, NA, NA, NA, |
|
1475 |
+ NA, "1", NA, NA, "1", "1", NA, NA, NA, NA, NA |
|
1476 |
+ ), |
|
1477 |
+ integration_locus = c( |
|
1478 |
+ NA, NA, NA, NA, 12345, NA, 12345, NA, 54321, NA, NA, |
|
1479 |
+ NA, NA, 62135, NA, 62135, 12345, NA, NA, NA, 12345, |
|
1480 |
+ NA, NA, NA, NA, 62135, NA, NA, NA, NA, 12345, NA, |
|
1481 |
+ 12345, NA, 54321, NA, NA, NA, NA, 62135, NA, 62135, |
|
1482 |
+ 12345, NA, NA, NA, 12345, NA, NA, NA, NA, 62135, NA, |
|
1483 |
+ NA, 43524, NA, NA, NA, NA, NA, NA, NA, NA, 76835, NA, |
|
1484 |
+ NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, |
|
1485 |
+ NA, NA, NA, NA, 12345, NA, 12345, NA, NA, NA, NA, NA, |
|
1486 |
+ NA, NA, NA, NA, 12345, NA, NA, 56832, 12345, NA, NA, |
|
1487 |
+ NA, NA, NA |
|
1488 |
+ ), |
|
1489 |
+ strand = c( |
|
1490 |
+ NA, NA, NA, NA, "+", NA, "+", NA, "-", NA, NA, NA, NA, "+", NA, |
|
1491 |
+ "+", "+", NA, NA, NA, "+", NA, NA, NA, NA, "+", NA, NA, NA, NA, |
|
1492 |
+ "+", NA, "+", NA, "-", NA, NA, NA, NA, "+", NA, "+", "+", NA, |
|
1493 |
+ NA, NA, "+", NA, NA, NA, NA, "+", NA, NA, "+", NA, NA, NA, NA, |
|
1494 |
+ NA, NA, NA, NA, "-", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, |
|
1495 |
+ NA, NA, NA, NA, NA, NA, NA, "+", NA, "+", NA, NA, NA, NA, NA, NA, |
|
1496 |
+ NA, NA, NA, "+", NA, NA, "-", "+", NA, NA, NA, NA, NA |
|
1497 |
+ ), |
|
1498 |
+ sharing_perc = c( |
|
1499 |
+ 0, 0, 0, 0, 50, 0, 50, 0, 100, 0, 0, 0, 0, 100, 0, 50, |
|
1500 |
+ 100, 0, 0, 0, 100, 0, 0, 0, 0, 100, 0, 0, 0, 0, 50, 0, 50, |
|
1501 |
+ 0, 100, 0, 0, 0, 0, 100, 0, 50, 100, 0, 0, 0, 100, 0, 0, |
|
1502 |
+ 0, 0, 100, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, |
|
1503 |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 50, |
|
1504 |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 100, 100, 0, 0, 0, |
|
1505 |
+ 0, 0 |
|
1506 |
+ ) |
|
1507 |
+) |
|
1508 |
+ |
|
1509 |
+test_that("iss_source produces expected output - per patient", { |
|
1510 |
+ res <- iss_source(test_df2, test_df3) %>% |
|
1511 |
+ purrr::map(~ dplyr::arrange(.x, .data$g1)) |
|
1512 |
+ expect_equal(res, expected_res1) |
|
1513 |
+}) |
|
1514 |
+ |
|
1515 |
+test_that("iss_source produces expected output - NO per patient", { |
|
1516 |
+ res <- iss_source(test_df2, test_df3, by_subject = FALSE) |
|
1517 |
+ expect_equal(res, expected_res2) |
|
1518 |
+}) |
... | ... |
@@ -4,325 +4,445 @@ func_name <- "purity_filter" |
4 | 4 |
# Global vars |
5 | 5 |
#------------------------------------------------------------------------------# |
6 | 6 |
|
7 |
-df <- tibble::tibble(chr = c('1', '1', '1', '1', '1', '1', '1', '1', '1', '1', |
|
8 |
- '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', |
|
9 |
- '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', |
|
10 |
- '1', '1', '1', '1', '1', '1', '1', '1', '1', '1'), |
|
11 |
- integration_locus = c(121249, 251227, 645551, 732938, |
|
12 |
- 775536, 846681, 1029785, 1036835, |
|
13 |
- 121249, 251227, 645551, 732938, |
|
14 |
- 775536, 846681, 1029785, 1036835, |
|
15 |
- 121249, 251227, 645551, 732938, |
|
16 |
- 775536, 846681, 1029785, 1036835, |
|
17 |
- 121249, 251227, 645551, 732938, |
|
18 |
- 775536, 846681, 1029785, 1036835, |
|
19 |
- 121249, 251227, 645551, 732938, |
|
20 |
- 775536, 846681, 1029785, 1036835), |
|
21 |
- strand = c('+', '+', '+', '+', '+', '+', '-', '+', '+', |
|
22 |
- '+', '+', '+', '+', '+', '-', '+', '+', '+', |
|
23 |
- '+', '+', '+', '+', '-', '+', '+', '+', '+', |
|
24 |
- '+', '+', '+', '-', '+', '+', '+', '+', '+', |
|
25 |
- '+', '+', '-', '+'), |
|
26 |
- GeneName = c('LOC729737', 'LOC100132287', 'LOC100133331', |
|
27 |
- 'LOC100288069', 'LINC01128', 'LOC100130417', |
|
28 |
- 'C1orf159', 'C1orf159', 'LOC729737', |
|
29 |
- 'LOC100132287', 'LOC100133331', |
|
30 |
- 'LOC100288069', 'LINC01128', 'LOC100130417', |
|
31 |
- 'C1orf159', 'C1orf159', 'LOC729737', |
|
32 |
- 'LOC100132287', 'LOC100133331', |
|
33 |
- 'LOC100288069', 'LINC01128', 'LOC100130417' |