Browse code

[NEW] Added new analysis functions and plotting functions

Added CIS_grubbs and cumulative_count_union + CIS_volcano_plot

GiuliaPais authored on 19/10/2020 19:01:13
Showing31 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: ISAnalytics
2 2
 Title: Analyze gene therapy vector insertion sites data identified from genomics next generation sequencing reads for clonal tracking studies
3
-Version: 0.99.12
3
+Version: 0.99.13
4 4
 Date: 2020-07-03
5 5
 Authors@R: c(
6 6
   person(given = "Andrea",
... ...
@@ -39,7 +39,10 @@ Imports:
39 39
     fs,
40 40
     zip,
41 41
     lubridate,
42
-    lifecycle
42
+    lifecycle,
43
+    ggplot2,
44
+    ggrepel,
45
+    stats
43 46
 Encoding: UTF-8
44 47
 LazyData: false
45 48
 Roxygen: list(markdown = TRUE)
... ...
@@ -1,13 +1,17 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
+export(CIS_grubbs)
4
+export(CIS_volcano_plot)
3 5
 export(aggregate_metadata)
4 6
 export(aggregate_values_by_key)
5 7
 export(annotation_IS_vars)
6 8
 export(as_sparse_matrix)
7 9
 export(association_file_columns)
10
+export(clinical_relevant_suspicious_genes)
8 11
 export(comparison_matrix)
9 12
 export(compute_abundance)
10 13
 export(compute_near_integrations)
14
+export(cumulative_count_union)
11 15
 export(date_columns_coll)
12 16
 export(date_formats)
13 17
 export(default_stats)
... ...
@@ -17,6 +21,7 @@ export(import_association_file)
17 21
 export(import_parallel_Vispa2Matrices_auto)
18 22
 export(import_parallel_Vispa2Matrices_interactive)
19 23
 export(import_single_Vispa2Matrix)
24
+export(known_clinical_oncogenes)
20 25
 export(mandatory_IS_vars)
21 26
 export(matching_options)
22 27
 export(quantification_types)
... ...
@@ -30,6 +35,7 @@ export(top_integrations)
30 35
 export(unzip_file_system)
31 36
 import(BiocParallel)
32 37
 import(dplyr)
38
+import(ggplot2)
33 39
 import(lifecycle)
34 40
 import(lubridate)
35 41
 importFrom(BiocParallel,MulticoreParam)
... ...
@@ -68,6 +74,7 @@ importFrom(fs,file_exists)
68 74
 importFrom(fs,path)
69 75
 importFrom(fs,path_dir)
70 76
 importFrom(fs,path_wd)
77
+importFrom(ggrepel,geom_label_repel)
71 78
 importFrom(htmltools,browsable)
72 79
 importFrom(htmltools,css)
73 80
 importFrom(htmltools,div)
... ...
@@ -101,10 +108,15 @@ importFrom(reactable,reactableTheme)
101 108
 importFrom(readr,write_tsv)
102 109
 importFrom(rlang,.data)
103 110
 importFrom(rlang,`:=`)
111
+importFrom(rlang,arg_match)
104 112
 importFrom(rlang,env_bind)
105 113
 importFrom(rlang,eval_tidy)
106 114
 importFrom(rlang,expr)
107 115
 importFrom(rlang,parse_expr)
116
+importFrom(stats,median)
117
+importFrom(stats,na.omit)
118
+importFrom(stats,p.adjust)
119
+importFrom(stats,pt)
108 120
 importFrom(stringr,str_detect)
109 121
 importFrom(stringr,str_extract)
110 122
 importFrom(stringr,str_extract_all)
... ...
@@ -126,5 +138,6 @@ importFrom(tidyr,separate)
126 138
 importFrom(tidyr,unite)
127 139
 importFrom(tidyr,unnest)
128 140
 importFrom(utils,read.csv)
141
+importFrom(utils,read.delim)
129 142
 importFrom(utils,tail)
130 143
 importFrom(zip,unzip)
... ...
@@ -1,5 +1,12 @@
1 1
 \title{ISAnalytics News}
2 2
 
3
+# ISAnalytics 0.99.13 (2020-10-19)
4
+
5
+## NEW FEATURES
6
+
7
+* Added analysis functions `CIS_grubbs` and `cumulative_count_union`
8
+* Added plotting functions `CIS_volcano_plot`
9
+
3 10
 # ISAnalytics 0.99.12 (2020-10-04)
4 11
 
5 12
 ## NEW FEATURES
... ...
@@ -43,6 +43,10 @@
43 43
 #'   * \code{\link{threshold_filter}}
44 44
 #'   * \code{\link{top_integrations}}
45 45
 #'   * \code{\link{sample_statistics}}
46
+#'   * \code{\link{CIS_grubbs}}
47
+#'   * \code{\link{cumulative_count_union}}
48
+#' * Plotting functions:
49
+#'   * \code{\link{CIS_volcano_plot}}
46 50
 #' * Utility functions:
47 51
 #'   * \code{\link{generate_blank_association_file}}
48 52
 #'   * \code{\link{generate_Vispa2_launch_AF}}
... ...
@@ -651,6 +651,472 @@ sample_statistics <- function(x, metadata,
651 651
 }
652 652
 
653 653
 
654
+#' Grubbs test for Common Insertion Sites (CIS).
655
+#'
656
+#' \lifecycle{experimental}
657
+#' Statistical approach for the validation of common insertion sites
658
+#' significance based on the comparison of the integration frequency
659
+#' at the CIS gene with respect to other genes contained in the
660
+#' surrounding genomic regions. For more details please refer to
661
+#' this paper:
662
+#' <`r .lentiviral_CIS_paper()`>
663
+#'
664
+#' @details
665
+#' ## Genomic annotation file
666
+#' This file is a data base, or more simply a .tsv file to import, with
667
+#' genes annotation for the specific genome. The annotations for the
668
+#' human genome (hg19) is already included in this package.
669
+#' If for any reason the user is performing an analysis on another genome,
670
+#' this file needs to be changed respecting the USCS Genome Browser
671
+#' format, meaning the input file headers should be:
672
+#'
673
+#' ```{r echo=FALSE, tidy=TRUE}
674
+#' cat(c(
675
+#'   paste(c("name2", "chrom", "strand"), collapse = ", "), "\n",
676
+#'   paste(c("min_txStart","max_txEnd", "minmax_TxLen"), collapse = ", "),
677
+#'   "\n",
678
+#'   paste(c("average_TxLen", "name", "min_cdsStart"), collapse = ", "),
679
+#'   "\n",
680
+#'   paste(c("max_cdsEnd","minmax_CdsLen", "average_CdsLen"), collapse = ", ")
681
+#' ))
682
+#'
683
+#' ```
684
+#'
685
+#' @param x An integration matrix, must include the `mandatory_IS_vars()`
686
+#' columns and the `annotation_IS_vars()` columns
687
+#' @param genomic_annotation_file Database file for gene annotation,
688
+#' see details
689
+#' @param grubbs_flanking_gene_bp Number of base pairs flanking a gene
690
+#' @param threshold_alpha Significance threshold
691
+#' @param add_standard_padjust Compute the standard padjust?
692
+#'
693
+#' @family Analysis functions
694
+#'
695
+#' @import dplyr
696
+#' @importFrom tibble as_tibble
697
+#' @importFrom rlang .data
698
+#' @importFrom magrittr `%>%`
699
+#' @importFrom stats median pt p.adjust
700
+#'
701
+#' @return A data frame
702
+#' @export
703
+#'
704
+#' @examples
705
+#' op <- options(ISAnalytics.widgets = FALSE)
706
+#'
707
+#' path_AF <- system.file("extdata", "ex_association_file.tsv",
708
+#'     package = "ISAnalytics"
709
+#' )
710
+#' root_correct <- system.file("extdata", "fs.zip",
711
+#'     package = "ISAnalytics"
712
+#' )
713
+#' root_correct <- unzip_file_system(root_correct, "fs")
714
+#'
715
+#' matrices <- import_parallel_Vispa2Matrices_auto(
716
+#'     association_file = path_AF, root = root_correct,
717
+#'     quantification_type = c("seqCount", "fragmentEstimate"),
718
+#'     matrix_type = "annotated", workers = 2, patterns = NULL,
719
+#'     matching_opt = "ANY"
720
+#' )
721
+#'
722
+#' cis <- CIS_grubbs(matrices$seqCount)
723
+#'
724
+#' options(op)
725
+CIS_grubbs <- function(x,
726
+    genomic_annotation_file =
727
+        system.file("extdata", "hg19.refGene.oracle.tsv.xz",
728
+            package = "ISAnalytics"
729
+        ),
730
+    grubbs_flanking_gene_bp = 100000,
731
+    threshold_alpha = 0.05,
732
+    add_standard_padjust = TRUE) {
733
+    # Check x has the correct structure
734
+    stopifnot(is.data.frame(x))
735
+    if (!all(mandatory_IS_vars() %in% colnames(x))) {
736
+        stop(.non_ISM_error())
737
+    }
738
+    if (!.is_annotated(x)) {
739
+        stop(.missing_annot())
740
+    }
741
+    # Check other parameters
742
+    stopifnot(is.character(genomic_annotation_file) &
743
+        length(genomic_annotation_file) == 1)
744
+    stopifnot(is.numeric(grubbs_flanking_gene_bp) |
745
+        is.integer(grubbs_flanking_gene_bp))
746
+    stopifnot(length(grubbs_flanking_gene_bp) == 1)
747
+    stopifnot(is.numeric(threshold_alpha) & length(threshold_alpha) == 1)
748
+    stopifnot(is.logical(add_standard_padjust) &
749
+        length(add_standard_padjust) == 1)
750
+    stopifnot(file.exists(genomic_annotation_file))
751
+    # Try to import annotation file
752
+    refgenes <- read.csv(
753
+        file = genomic_annotation_file,
754
+        header = TRUE, fill = TRUE, sep = "\t",
755
+        check.names = FALSE,
756
+        na.strings = c("NONE", "NA", "NULL", "NaN", "")
757
+    )
758
+    refgenes <- tibble::as_tibble(refgenes) %>%
759
+        dplyr::mutate(chrom = stringr::str_replace_all(.data$chrom, "chr", ""))
760
+    # Check annotation file format
761
+    if (!all(c(
762
+        "name2", "chrom", "strand", "min_txStart", "max_txEnd",
763
+        "minmax_TxLen", "average_TxLen", "name", "min_cdsStart",
764
+        "max_cdsEnd", "minmax_CdsLen", "average_CdsLen"
765
+    ) %in%
766
+        colnames(refgenes))) {
767
+        stop(.non_standard_annotation_structure())
768
+    }
769
+    ### Begin - init phase
770
+    df_by_gene <- x %>%
771
+        dplyr::group_by(
772
+            .data$GeneName,
773
+            .data$GeneStrand,
774
+            .data$chr
775
+        ) %>%
776
+        dplyr::summarise(
777
+            n_IS_perGene = dplyr::n_distinct(
778
+                .data$integration_locus
779
+            ),
780
+            min_bp_integration_locus =
781
+                min(.data$integration_locus),
782
+            max_bp_integration_locus =
783
+                max(.data$integration_locus),
784
+            IS_span_bp = (max(.data$integration_locus) -
785
+                min(.data$integration_locus)),
786
+            avg_bp_integration_locus =
787
+                mean(.data$integration_locus),
788
+            median_bp_integration_locus =
789
+                stats::median(.data$integration_locus),
790
+            distinct_orientations = dplyr::n_distinct(.data$strand),
791
+            describe = psych::describe(.data$integration_locus),
792
+            .groups = "drop"
793
+        )
794
+
795
+    df_bygene_withannotation <- df_by_gene %>%
796
+        dplyr::inner_join(refgenes, by = c(
797
+            "chr" = "chrom",
798
+            "GeneStrand" = "strand",
799
+            "GeneName" = "name2"
800
+        )) %>%
801
+        dplyr::select(c(
802
+            dplyr::all_of(colnames(df_by_gene)),
803
+            .data$average_TxLen
804
+        ))
805
+    n_elements <- nrow(df_bygene_withannotation)
806
+
807
+    df_bygene_withannotation <- df_bygene_withannotation %>%
808
+        dplyr::mutate(
809
+            geneIS_frequency_byHitIS = .data$n_IS_perGene / n_elements
810
+        )
811
+
812
+    ### Grubbs test
813
+    ### --- Gene Frequency
814
+    df_bygene_withannotation <- df_bygene_withannotation %>%
815
+        dplyr::mutate(
816
+            raw_gene_integration_frequency =
817
+                .data$n_IS_perGene / .data$average_TxLen,
818
+            integration_frequency_withtolerance = .data$n_IS_perGene /
819
+                (.data$average_TxLen + grubbs_flanking_gene_bp) * 1000,
820
+            minus_log2_integration_freq_withtolerance =
821
+                -log(x = .data$integration_frequency_withtolerance, base = 2)
822
+        )
823
+    ### --- z score
824
+    z_mlif <- function(x) {
825
+        sqrt((n_elements * (n_elements - 2) * x^2) /
826
+            (((n_elements - 1)^2) - (n_elements * x^2)))
827
+    }
828
+    df_bygene_withannotation <- df_bygene_withannotation %>%
829
+        dplyr::mutate(
830
+            zscore_minus_log2_int_freq_tolerance =
831
+                scale(-log(
832
+                    x = .data$integration_frequency_withtolerance,
833
+                    base = 2
834
+                )),
835
+            neg_zscore_minus_log2_int_freq_tolerance =
836
+                -.data$zscore_minus_log2_int_freq_tolerance,
837
+            t_z_mlif = z_mlif(
838
+                .data$neg_zscore_minus_log2_int_freq_tolerance
839
+            )
840
+        )
841
+    ### --- tdist
842
+    t_dist_2t <- function(x, deg) {
843
+        return((1 - stats::pt(x, deg)) * 2)
844
+    }
845
+    df_bygene_withannotation <- df_bygene_withannotation %>%
846
+        dplyr::mutate(
847
+            tdist2t = t_dist_2t(.data$t_z_mlif, n_elements - 2),
848
+            tdist_pt = pt(
849
+                q = .data$t_z_mlif,
850
+                df = n_elements - 2
851
+            ),
852
+            tdist_bonferroni_default = ifelse(
853
+                .data$tdist2t * n_elements > 1, 1,
854
+                .data$tdist2t * n_elements
855
+            )
856
+        )
857
+    if (add_standard_padjust == TRUE) {
858
+        df_bygene_withannotation <- df_bygene_withannotation %>%
859
+            dplyr::mutate(
860
+                tdist_bonferroni = stats::p.adjust(
861
+                    .data$tdist2t,
862
+                    method = "bonferroni",
863
+                    n = length(.data$tdist2t)
864
+                ),
865
+                tdist_fdr = stats::p.adjust(
866
+                    .data$tdist2t,
867
+                    method = "fdr",
868
+                    n = length(.data$tdist2t)
869
+                ),
870
+                tdist_benjamini = stats::p.adjust(
871
+                    .data$tdist2t,
872
+                    method = "BY",
873
+                    n = length(.data$tdist2t)
874
+                )
875
+            )
876
+    }
877
+    df_bygene_withannotation <- df_bygene_withannotation %>%
878
+        dplyr::mutate(
879
+            tdist_positive_and_corrected =
880
+                ifelse(
881
+                    (.data$tdist_bonferroni_default < threshold_alpha &
882
+                        .data$neg_zscore_minus_log2_int_freq_tolerance > 0),
883
+                    .data$tdist_bonferroni_default,
884
+                    NA
885
+                ),
886
+            tdist_positive = ifelse(
887
+                (.data$tdist2t < threshold_alpha &
888
+                    .data$neg_zscore_minus_log2_int_freq_tolerance > 0),
889
+                .data$tdist2t,
890
+                NA
891
+            )
892
+        )
893
+    EM_correction_N <- length(
894
+        df_bygene_withannotation$tdist_positive[
895
+            !is.na(df_bygene_withannotation$tdist_positive)
896
+        ]
897
+    )
898
+    df_bygene_withannotation <- df_bygene_withannotation %>%
899
+        dplyr::mutate(
900
+            tdist_positive_and_correctedEM =
901
+                ifelse(
902
+                    (.data$tdist2t * EM_correction_N <
903
+                        threshold_alpha &
904
+                        .data$neg_zscore_minus_log2_int_freq_tolerance > 0),
905
+                    .data$tdist2t * EM_correction_N,
906
+                    NA
907
+                )
908
+        )
909
+    return(df_bygene_withannotation)
910
+}
911
+
912
+#' Integrations cumulative count in time by sample
913
+#'
914
+#' \lifecycle{experimental}
915
+#' This function computes the cumulative number of integrations
916
+#' observed in each sample at different time points by assuming that
917
+#' if an integration is observed at time point "t" then it is also observed in
918
+#' time point "t+1".
919
+#'
920
+#' @details
921
+#' ## Input data frame
922
+#' The user can provide as input for the `x` parameter both a simple
923
+#' integration matrix AND setting the `aggregate` parameter to TRUE,
924
+#' or provide an already aggregated matrix via
925
+#' \link{aggregate_values_by_key}.
926
+#' If the user supplies a matrix to be aggregated the `association_file`
927
+#' parameter must not be NULL: aggregation will be done by an internal
928
+#' call to the aggregation function.
929
+#' If the user supplies an already aggregated matrix, the `key` parameter
930
+#' is the key used for aggregation -
931
+#' **NOTE: for this operation is mandatory
932
+#' that the time point column is included in the key.**
933
+#' ## Assumptions on time point format
934
+#' By using the functions provided by this package, when imported,
935
+#' an association file will be correctly formatted for future usage.
936
+#' In the formatting process there is also a padding operation performed on
937
+#' time points: this means the functions expects the time point column to
938
+#' be of type character and to be correctly padded with 0s. If the
939
+#' chosen column for time point is detected as numeric the function will
940
+#' attempt the conversion to character and automatic padding.
941
+#' If you choose to import the association file not using the
942
+#' \link{import_association_file} function, be sure to check the format of
943
+#' the chosen column to avoid undesired results.
944
+#'
945
+#' @param x A simple integration matrix or an aggregated matrix (see details)
946
+#' @param association_file NULL or the association file for x if `aggregate`
947
+#' is set to TRUE
948
+#' @param timepoint_column What is the name of the time point column?
949
+#' @param key The aggregation key - must always contain the `timepoint_column`
950
+#' @param include_tp_zero Include timepoint 0?
951
+#' @param zero How is 0 coded in the data frame?
952
+#' @param aggregate Should x be aggregated?
953
+#' @param ... Additional parameters to pass to `aggregate_values_by_key`
954
+#'
955
+#' @family Analysis functions
956
+#'
957
+#' @import dplyr
958
+#' @importFrom magrittr `%>%`
959
+#' @importFrom rlang .data
960
+#' @importFrom stringr str_pad
961
+#' @importFrom purrr reduce is_empty
962
+#' @importFrom tidyr pivot_longer
963
+#' @importFrom stats na.omit
964
+#'
965
+#' @return A data frame
966
+#' @export
967
+#'
968
+#' @examples
969
+#' op <- options(ISAnalytics.widgets = FALSE)
970
+#'
971
+#' path_AF <- system.file("extdata", "ex_association_file.tsv",
972
+#'     package = "ISAnalytics"
973
+#' )
974
+#' root_correct <- system.file("extdata", "fs.zip",
975
+#'     package = "ISAnalytics"
976
+#' )
977
+#' root_correct <- unzip_file_system(root_correct, "fs")
978
+#'
979
+#' association_file <- import_association_file(path_AF, root_correct)
980
+#' matrices <- import_parallel_Vispa2Matrices_auto(
981
+#'     association_file = association_file, root = NULL,
982
+#'     quantification_type = c("seqCount", "fragmentEstimate"),
983
+#'     matrix_type = "annotated", workers = 2, patterns = NULL,
984
+#'     matching_opt = "ANY"
985
+#' )
986
+#'
987
+#' #### EXTERNAL AGGREGATION
988
+#' aggregated <- aggregate_values_by_key(matrices$seqCount, association_file)
989
+#' cumulative_count <- cumulative_count_union(aggregated)
990
+#'
991
+#' #### INTERNAL AGGREGATION
992
+#' cumulative_count_2 <- cumulative_count_union(matrices$seqCount,
993
+#'     association_file,
994
+#'     aggregate = TRUE
995
+#' )
996
+#'
997
+#' options(op)
998
+cumulative_count_union <- function(x,
999
+    association_file = NULL,
1000
+    timepoint_column = "TimePoint",
1001
+    key = c(
1002
+        "SubjectID",
1003
+        "CellMarker",
1004
+        "Tissue",
1005
+        "TimePoint"
1006
+    ),
1007
+    include_tp_zero = FALSE,
1008
+    zero = "0000",
1009
+    aggregate = FALSE,
1010
+    ...) {
1011
+    stopifnot(is.data.frame(x))
1012
+    stopifnot(is.data.frame(association_file) | is.null(association_file))
1013
+    stopifnot(is.character(timepoint_column) & length(timepoint_column) == 1)
1014
+    stopifnot(is.character(key))
1015
+    stopifnot(is.logical(include_tp_zero))
1016
+    stopifnot(is.character(zero) & length(zero) == 1)
1017
+    stopifnot(is.logical(aggregate))
1018
+    if (aggregate == TRUE & is.null(association_file)) {
1019
+        stop(.agg_with_null_meta_err())
1020
+    }
1021
+    if (!all(timepoint_column %in% key)) {
1022
+        stop(.key_without_tp_err())
1023
+    }
1024
+    if (aggregate == FALSE) {
1025
+        if (!all(key %in% colnames(x))) {
1026
+            stop(.key_not_found())
1027
+        }
1028
+    } else {
1029
+        x <- aggregate_values_by_key(
1030
+            x = x,
1031
+            association_file = association_file,
1032
+            key = key, ...
1033
+        )
1034
+        if (is.numeric(association_file[[timepoint_column]]) |
1035
+            is.integer(association_file[[timepoint_column]])) {
1036
+            max <- max(association_file[[timepoint_column]])
1037
+            digits <- floor(log10(x)) + 1
1038
+            association_file <- association_file %>%
1039
+                dplyr::mutate({{ timepoint_column }} := stringr::str_pad(
1040
+                    as.character(.data$TimePoint),
1041
+                    digits,
1042
+                    side = "left",
1043
+                    pad = "0"
1044
+                ))
1045
+            zero <- paste0(rep_len("0", digits), collapse = "")
1046
+        }
1047
+    }
1048
+    if (include_tp_zero == FALSE) {
1049
+        x <- x %>%
1050
+            dplyr::filter(dplyr::across(
1051
+                dplyr::all_of(timepoint_column),
1052
+                ~ .x != zero
1053
+            ))
1054
+        if (nrow(x) == 0) {
1055
+            message(paste(
1056
+                "All time points zeros were excluded, the data",
1057
+                "frame is empty."
1058
+            ))
1059
+            return(x)
1060
+        }
1061
+    }
1062
+    annot <- if (.is_annotated(x)) {
1063
+        annotation_IS_vars()
1064
+    } else {
1065
+        character(0)
1066
+    }
1067
+    x <- x %>% dplyr::select(dplyr::all_of(c(
1068
+        mandatory_IS_vars(),
1069
+        annot,
1070
+        key
1071
+    )))
1072
+    cols_for_join <- colnames(x)[!colnames(x) %in% timepoint_column]
1073
+    key_minus_tp <- key[!key %in% timepoint_column]
1074
+    distinct_tp_for_each <- x %>%
1075
+        dplyr::arrange(dplyr::across(dplyr::all_of(timepoint_column))) %>%
1076
+        dplyr::group_by(dplyr::across(dplyr::all_of(key_minus_tp))) %>%
1077
+        dplyr::summarise(
1078
+            Distinct_tp = unique(
1079
+                `$`(.data, !!timepoint_column)
1080
+            ),
1081
+            .groups = "drop"
1082
+        ) %>%
1083
+        dplyr::rename({{ timepoint_column }} := "Distinct_tp")
1084
+
1085
+    splitted <- x %>%
1086
+        dplyr::arrange(dplyr::across(dplyr::all_of(timepoint_column))) %>%
1087
+        dplyr::group_by(dplyr::across(dplyr::all_of(timepoint_column))) %>%
1088
+        dplyr::group_split()
1089
+
1090
+    mult_tp <- purrr::reduce(splitted, dplyr::full_join, by = cols_for_join)
1091
+    tp_indexes <- grep(timepoint_column, colnames(mult_tp))
1092
+    tp_indexes <- tp_indexes[-1]
1093
+    res <- if (!purrr::is_empty(tp_indexes)) {
1094
+        for (i in tp_indexes) {
1095
+            mod_col <- mult_tp[[i]]
1096
+            mod_col_prec <- mult_tp[[i - 1]]
1097
+            val <- dplyr::first(stats::na.omit(mod_col))
1098
+            mod_col[!is.na(mod_col_prec)] <- val
1099
+            mult_tp[i] <- mod_col
1100
+        }
1101
+        mult_tp %>%
1102
+            tidyr::pivot_longer(
1103
+                cols = dplyr::starts_with(timepoint_column),
1104
+                values_to = timepoint_column,
1105
+                values_drop_na = TRUE
1106
+            ) %>%
1107
+            dplyr::select(-c("name")) %>%
1108
+            dplyr::distinct()
1109
+    } else {
1110
+        mult_tp
1111
+    }
1112
+    res <- res %>% dplyr::semi_join(distinct_tp_for_each, by = key)
1113
+    res <- res %>%
1114
+        dplyr::group_by(dplyr::across(dplyr::all_of(key))) %>%
1115
+        dplyr::summarise(count = dplyr::n(), .groups = "drop")
1116
+    return(res)
1117
+}
1118
+
1119
+
654 1120
 #' A set of pre-defined functions for `sample_statistics`.
655 1121
 #'
656 1122
 #' @return A named list of functions/purrr-style lambdas
... ...
@@ -2525,6 +2525,14 @@
2525 2525
 }
2526 2526
 
2527 2527
 #### ---- Internals for analysis functions ----####
2528
+#---- USED IN : CIS_grubbs ----
2529
+### link to article in documentation
2530
+.lentiviral_CIS_paper <- function() {
2531
+    paste0(
2532
+        "https://ashpublications.org/blood/article/117/20/5332/21206/",
2533
+        "Lentiviral-vector-common-integration-sites-in"
2534
+    )
2535
+}
2528 2536
 
2529 2537
 #---- USED IN : threshold_filter ----
2530 2538
 
... ...
@@ -2884,3 +2892,104 @@
2884 2892
     })
2885 2893
     return(filtered)
2886 2894
 }
2895
+
2896
+#---- USED IN : CIS_volcano_plot ----
2897
+#' @importFrom rlang arg_match
2898
+#' @importFrom utils read.delim
2899
+.load_onco_ts_genes <- function(onco_db_file,
2900
+    tumor_suppressors_db_file,
2901
+    species) {
2902
+    if (!file.exists(onco_db_file)) {
2903
+        stop(paste(
2904
+            "`onco_db_file` was not found, check you provided the",
2905
+            "correct path for the file"
2906
+        ))
2907
+    }
2908
+    if (!file.exists(tumor_suppressors_db_file)) {
2909
+        stop(paste(
2910
+            "`tumor_suppressors_db_file` was not found,",
2911
+            "check you provided the",
2912
+            "correct path for the file"
2913
+        ))
2914
+    }
2915
+    specie <- rlang::arg_match(species, values = c("human", "mouse", "all"))
2916
+    specie_name <- switch(specie,
2917
+        "human" = "Homo sapiens (Human)",
2918
+        "mouse" = "Mus musculus (Mouse)",
2919
+        "all" = c(
2920
+            "Homo sapiens (Human)",
2921
+            "Mus musculus (Mouse)"
2922
+        )
2923
+    )
2924
+    # Acquire DB
2925
+    onco_db <- utils::read.delim(
2926
+        file = onco_db_file, header = TRUE,
2927
+        fill = TRUE, sep = "\t", check.names = FALSE
2928
+    )
2929
+    tumsup_db <- utils::read.delim(
2930
+        file = tumor_suppressors_db_file,
2931
+        header = TRUE,
2932
+        fill = TRUE, sep = "\t",
2933
+        check.names = FALSE
2934
+    )
2935
+    if (getOption("ISAnalytics.verbose") == TRUE) {
2936
+        message(paste(c(
2937
+            "Loading annotated genes -  species selected: ",
2938
+            paste(c(specie_name), collapse = ", ")
2939
+        )))
2940
+    }
2941
+    # Filter and merge
2942
+    onco_df <- .filter_db(onco_db, specie_name, "OncoGene")
2943
+    tumsup_df <- .filter_db(tumsup_db, specie_name, "TumorSuppressor")
2944
+    oncots_df_to_use <- .merge_onco_tumsup(onco_df, tumsup_df)
2945
+    if (getOption("ISAnalytics.verbose") == TRUE) {
2946
+        message(paste(c(
2947
+            "Loading annotated genes -  done"
2948
+        )))
2949
+    }
2950
+    return(oncots_df_to_use)
2951
+}
2952
+
2953
+#' @import dplyr
2954
+#' @importFrom rlang .data
2955
+#' @importFrom magrittr `%>%`
2956
+.filter_db <- function(onco_db, species, output_col_name) {
2957
+    filtered_db <- onco_db %>%
2958
+        dplyr::filter(.data$Organism %in% species) %>%
2959
+        dplyr::filter(.data$Status == "reviewed" &
2960
+            !is.na(.data$`Gene names`)) %>%
2961
+        dplyr::select(.data$`Gene names`) %>%
2962
+        dplyr::distinct()
2963
+
2964
+    filtered_db <- filtered_db %>%
2965
+        dplyr::mutate(`Gene names` = stringr::str_split(
2966
+            .data$`Gene names`, " "
2967
+        )) %>%
2968
+        tidyr::unnest("Gene names") %>%
2969
+        dplyr::mutate({{ output_col_name }} := .data$`Gene names`) %>%
2970
+        dplyr::rename("GeneName" = "Gene names")
2971
+
2972
+    return(filtered_db)
2973
+}
2974
+
2975
+#' @import dplyr
2976
+#' @importFrom rlang .data
2977
+#' @importFrom magrittr `%>%`
2978
+.merge_onco_tumsup <- function(onco_df, tum_df) {
2979
+    onco_tumsup <- onco_df %>%
2980
+        dplyr::full_join(tum_df, by = "GeneName") %>%
2981
+        dplyr::mutate(Onco1_TS2 = ifelse(
2982
+            !is.na(.data$OncoGene) & !is.na(.data$TumorSuppressor),
2983
+            yes = 3,
2984
+            no = ifelse(!is.na(.data$OncoGene),
2985
+                yes = 1,
2986
+                no = ifelse(!is.na(.data$TumorSuppressor),
2987
+                    yes = 2,
2988
+                    no = NA
2989
+                )
2990
+            )
2991
+        )) %>%
2992
+        dplyr::select(-c("OncoGene", "TumorSuppressor")) %>%
2993
+        dplyr::distinct()
2994
+    return(onco_tumsup)
2995
+}
... ...
@@ -18,7 +18,7 @@
18 18
 # @keywords internal
19 19
 .missing_value_col_error <- function() {
20 20
     paste(
21
-        "The `Value` column is missing or it contains non-numeric data.",
21
+        "The value column is missing or it contains non-numeric data.",
22 22
         "The column is needed for this operation.",
23 23
         "Aborting."
24 24
     )
... ...
@@ -191,3 +191,34 @@
191 191
         "See ?threshold_filter for details"
192 192
     )
193 193
 }
194
+
195
+#---- USED IN : CIS_grubbs ----
196
+.missing_annot <- function() {
197
+    paste(
198
+        "Annotation columns are missing but are required for",
199
+        "the function correct execution"
200
+    )
201
+}
202
+
203
+.non_standard_annotation_structure <- function() {
204
+    paste(
205
+        "The genomic annotation file must have the standard UCSC format,",
206
+        "see ?CIS_grubbs for details"
207
+    )
208
+}
209
+
210
+#---- USED IN : cumulative_count_union ----
211
+.agg_with_null_meta_err <- function() {
212
+    paste(
213
+        "Matrix aggregation can't be performed without the ",
214
+        "association file, please specify one in the `metadata` parameter"
215
+    )
216
+}
217
+
218
+.key_without_tp_err <- function() {
219
+    paste("The sample key must contain the time point column")
220
+}
221
+
222
+.key_not_found <- function() {
223
+    paste("One or more columns in the sample keys were not found in x")
224
+}
194 225
new file mode 100644
... ...
@@ -0,0 +1,287 @@
1
+#------------------------------------------------------------------------------#
2
+# Plotting functions
3
+#------------------------------------------------------------------------------#
4
+
5
+#' Trace volcano plot for computed CIS data.
6
+#'
7
+#' \lifecycle{experimental}
8
+#' Traces a volcano plot for IS frequency and CIS results.
9
+#'
10
+#' @details
11
+#' ## Input data frame
12
+#' Users can supply as `x` either a simple integration matrix or a
13
+#' data frame resulting from the call to \link{CIS_grubbs}
14
+#' with `add_standard_padjust = TRUE`. In the first case an internal call to
15
+#' the function `CIS_grubbs` is performed.
16
+#'
17
+#' ## Oncogene and tumor suppressor genes files
18
+#' These files are included in the package for user convenience and are
19
+#' simply UniProt files with gene annotations for human and mouse.
20
+#' For more details on how this files were generated use the help `?filname`
21
+#' function.
22
+#'
23
+#' ## Known oncogenes
24
+#' The default values are contained in a data frame exported by this package,
25
+#' it can be accessed by doing:
26
+#'
27
+#' ```{r}
28
+#' head(known_clinical_oncogenes())
29
+#'
30
+#' ```
31
+#' If the user wants to change this parameter the input data frame must
32
+#' preserve the column structure. The same goes for the `suspicious_genes`
33
+#' parameter (DOIReference column is optional):
34
+#'
35
+#' ```{r}
36
+#' head(clinical_relevant_suspicious_genes())
37
+#'
38
+#' ```
39
+#' @family Plotting functions
40
+#'
41
+#' @param x Either a simple integration matrix or a data frame resulting
42
+#' from the call to \link{CIS_grubbs} with `add_standard_padjust = TRUE`
43
+#' @param onco_db_file Uniprot file for proto-oncogenes (see details)
44
+#' @param tumor_suppressors_db_file Uniprot file for tumor-suppressor genes
45
+#' @param species One between "human", "mouse" and "all"
46
+#' @param known_onco Data frame with known oncogenes. See details.
47
+#' @param suspicious_genes Data frame with clinical relevant suspicious
48
+#' genes. See details.
49
+#' @param significance_threshold The significance threshold
50
+#' @param annotation_threshold_ontots Value above which genes are annotated
51
+#' @param facet_rows Column names to pass to the faceting function as rows
52
+#' (NULL for no faceting)
53
+#' @param facet_cols Column names to pass to the faceting function as columns
54
+#' (NULL for no faceting)
55
+#'
56
+#' @import ggplot2
57
+#' @importFrom ggrepel geom_label_repel
58
+#' @importFrom dplyr filter
59
+#'
60
+#' @return A plot
61
+#' @export
62
+#'
63
+#' @examples
64
+#' op <- options(ISAnalytics.widgets = FALSE)
65
+#'
66
+#' path_AF <- system.file("extdata", "ex_association_file.tsv",
67
+#'     package = "ISAnalytics"
68
+#' )
69
+#' root_correct <- system.file("extdata", "fs.zip",
70
+#'     package = "ISAnalytics"
71
+#' )
72
+#' root_correct <- unzip_file_system(root_correct, "fs")
73
+#'
74
+#' matrices <- import_parallel_Vispa2Matrices_auto(
75
+#'     association_file = path_AF, root = root_correct,
76
+#'     quantification_type = c("seqCount", "fragmentEstimate"),
77
+#'     matrix_type = "annotated", workers = 2, patterns = NULL,
78
+#'     matching_opt = "ANY"
79
+#' )
80
+#'
81
+#' cis <- CIS_grubbs(matrices$seqCount)
82
+#' plot <- CIS_volcano_plot(cis)
83
+#' options(op)
84
+CIS_volcano_plot <- function(x,
85
+    onco_db_file = system.file("extdata",
86
+        "201806_uniprot-Proto-oncogene.tsv.xz",
87
+        package = "ISAnalytics"
88
+    ),
89
+    tumor_suppressors_db_file = system.file("extdata",
90
+        "201806_uniprot-Tumor-suppressor.tsv.xz",
91
+        package = "ISAnalytics"
92
+    ),
93
+    species = "human",
94
+    known_onco = known_clinical_oncogenes(),
95
+    suspicious_genes =
96
+        clinical_relevant_suspicious_genes(),
97
+    significance_threshold = 0.05,
98
+    annotation_threshold_ontots = 0.1,
99
+    facet_rows = NULL,
100
+    facet_cols = NULL) {
101
+    stopifnot(is.data.frame(x))
102
+    stopifnot(is.character(onco_db_file) & length(onco_db_file) == 1)
103
+    stopifnot(is.character(tumor_suppressors_db_file) &
104
+        length(tumor_suppressors_db_file) == 1)
105
+    stopifnot(is.character(species))
106
+    stopifnot(is.data.frame(known_onco))
107
+    stopifnot(is.data.frame(suspicious_genes))
108
+    stopifnot(is.numeric(significance_threshold) |
109
+        is.integer(significance_threshold) &
110
+            length(significance_threshold) == 1)
111
+    stopifnot(is.numeric(annotation_threshold_ontots) |
112
+        is.integer(annotation_threshold_ontots) &
113
+            length(annotation_threshold_ontots) == 1)
114
+    ## Load onco and ts
115
+    oncots_to_use <- .load_onco_ts_genes(
116
+        onco_db_file,
117
+        tumor_suppressors_db_file,
118
+        species
119
+    )
120
+    ## Check if CIS function was already called
121
+    min_cis_col <- c(
122
+        "tdist_bonferroni_default", "tdist_fdr",
123
+        "neg_zscore_minus_log2_int_freq_tolerance"
124
+    )
125
+    cis_grubbs_df <- if (!all(min_cis_col %in% colnames(x))) {
126
+        if (getOption("ISAnalytics.verbose") == TRUE) {
127
+            message(paste("Calculating CIS_grubbs for x..."))
128
+        }
129
+        CIS_grubbs(x)
130
+    } else {
131
+        x
132
+    }
133
+    ## Join all dfs by gene
134
+    cis_grubbs_df <- cis_grubbs_df %>%
135
+        dplyr::left_join(oncots_to_use, by = "GeneName") %>%
136
+        dplyr::left_join(known_onco, by = "GeneName") %>%
137
+        dplyr::left_join(suspicious_genes, by = "GeneName")
138
+
139
+    ## Add infos to CIS
140
+    cis_grubbs_df <- cis_grubbs_df %>%
141
+        dplyr::mutate(minus_log_p = -log(.data$tdist_bonferroni_default,
142
+            base = 10
143
+        ))
144
+    cis_grubbs_df <- cis_grubbs_df %>%
145
+        dplyr::mutate(
146
+            minus_log_p_fdr = -log(.data$tdist_fdr, base = 10),
147
+            positive_outlier_and_significant = ifelse(
148
+                test = !is.na(.data$tdist_fdr) &
149
+                    .data$tdist_fdr < significance_threshold,
150
+                yes = TRUE,
151
+                no = FALSE
152
+            )
153
+        )
154
+    cis_grubbs_df <- cis_grubbs_df %>%
155
+        dplyr::mutate(
156
+            KnownGeneClass = ifelse(
157
+                is.na(.data$Onco1_TS2),
158
+                yes = "Other",
159
+                no = ifelse(.data$Onco1_TS2 == 1,
160
+                    yes = "OncoGene",
161
+                    no = "TumSuppressor"
162
+                )
163
+            ),
164
+            CriticalForInsMut = ifelse(!is.na(.data$KnownClonalExpansion),
165
+                yes = TRUE, no = FALSE
166
+            )
167
+        )
168
+    significance_threshold_minus_log_p <- -log(significance_threshold,
169
+        base = 10
170
+    )
171
+    annotation_threshold_ontots_log <- -log(annotation_threshold_ontots,
172
+        base = 10
173
+    )
174
+    ## Trace plot
175
+    plot_cis_fdr_slice <- ggplot2::ggplot(
176
+        data = cis_grubbs_df,
177
+        ggplot2::aes(
178
+            y = .data$minus_log_p_fdr,
179
+            x = .data$neg_zscore_minus_log2_int_freq_tolerance,
180
+            color = .data$KnownGeneClass,
181
+            fill = .data$KnownGeneClass
182
+        ),
183
+        na.rm = TRUE, se = TRUE
184
+    ) +
185
+        ggplot2::geom_point(alpha = .5, size = 3) +
186
+        ggplot2::geom_hline(
187
+            yintercept = significance_threshold_minus_log_p,
188
+            color = "black", size = 1, show.legend = TRUE, linetype = "dashed"
189
+        ) +
190
+        ggplot2::scale_y_continuous(limits = c(0, max(c(
191
+            (significance_threshold_minus_log_p + 0.5),
192
+            max(.data$minus_log_p_fdr, na.rm = TRUE)
193
+        )))) +
194
+        ggplot2::scale_x_continuous(breaks = seq(-4, 4, 2)) +
195
+        ggplot2::facet_grid(rows = {{ facet_rows }}, cols = {{ facet_cols }}) +
196
+        ggrepel::geom_label_repel(
197
+            data = dplyr::filter(cis_grubbs_df,
198
+                          .data$tdist_fdr < significance_threshold),
199
+            ggplot2::aes(label = .data$GeneName),
200
+            box.padding = ggplot2::unit(0.35, "lines"),
201
+            point.padding = ggplot2::unit(0.3, "lines"),
202
+            color = "white",
203
+            segment.color = "black"
204
+        ) +
205
+        ggplot2::theme(
206
+            strip.text.y = ggplot2::element_text(
207
+                size = 16,
208
+                colour = "blue",
209
+                angle = 270
210
+            ),
211
+            strip.text.x = ggplot2::element_text(
212
+                size = 16,
213
+                colour = "blue",
214
+                angle = 0
215
+            )
216
+        ) +
217
+        ggplot2::theme(strip.text = ggplot2::element_text(
218
+            face = "bold",
219
+            size = 16
220
+        )) +
221
+        ggplot2::theme(
222
+            axis.text.x = ggplot2::element_text(size = 16),
223
+            axis.text.y = ggplot2::element_text(size = 16),
224
+            axis.title = ggplot2::element_text(size = 16),
225
+            plot.title = ggplot2::element_text(size = 20)
226
+        ) +
227
+        ggplot2::labs(list(
228
+            title = paste(
229
+                .data$ProjectID,
230
+                "- Volcano plot of IS gene frequency and",
231
+                "CIS results"
232
+            ),
233
+            y = "P-value Grubbs test (-log10(p))",
234
+            x = "Integration frequency (log2)",
235
+            size = "Avg Transcr. Len",
236
+            color = "Onco TumSupp Genes",
237
+            subtitle = paste0(
238
+                "Significance threshold for annotation",
239
+                " labeling: P-value < ", significance_threshold,
240
+                "(FDR adjusted;",
241
+                "-log = ", (round(-log(significance_threshold, base = 10), 3)),
242
+                ").\nOnco/TS genes source: UniProt (other genes",
243
+                "labeled as 'Other'). Annotated if P-value > ",
244
+                round(annotation_threshold_ontots_log, 3)
245
+            )
246
+        ))
247
+    return(plot_cis_fdr_slice)
248
+}
249
+
250
+#' Known clinical oncogenes (for mouse and human).
251
+#'
252
+#' @return A data frame
253
+#' @importFrom tibble tibble
254
+#'
255
+#' @family Plotting function helpers
256
+#' @export
257
+#'
258
+#' @examples
259
+#' known_clinical_oncogenes()
260
+known_clinical_oncogenes <- function() {
261
+    tibble::tibble(
262
+        GeneName = c("MECOM", "CCND2", "TAL1", "LMO2", "HMGA2"),
263
+        KnownClonalExpansion = TRUE
264
+    )
265
+}
266
+
267
+#' Clinical relevant suspicious genes (for mouse and human).
268
+#'
269
+#' @return A data frame
270
+#' @importFrom tibble tibble
271
+#'
272
+#' @family Plotting function helpers
273
+#' @export
274
+#'
275
+#' @examples
276
+#' clinical_relevant_suspicious_genes()
277
+clinical_relevant_suspicious_genes <- function() {
278
+    tibble::tibble(
279
+        GeneName = c(
280
+            "DNMT3A", "TET2", "ASXL1",
281
+            "JAK2", "CBL", "TP53"
282
+        ),
283
+        ClinicalRelevance = TRUE,
284
+        DOIReference =
285
+            "https://doi.org/10.1182/blood-2018-01-829937"
286
+    )
287
+}
... ...
@@ -63,6 +63,7 @@ more at this link: [ISAnalytics Website](https://calabrialab.github.io/ISAnalyti
63 63
 * Aggregation: more info with `vignette("Working with aggregate functions", package = "ISAnalytics")`
64 64
 * Re-calibration functions: `compute_near_integrations`
65 65
 * Analysis functions: `compute_abundance`, `comparison_matrix` `separate_quant_matrices`, others
66
+* Plotting functions: `CIS_volcano_plot`
66 67
 * Utility functions
67 68
 
68 69
 # NEWS
... ...
@@ -74,10 +74,18 @@ Website](https://calabrialab.github.io/ISAnalytics/)
74 74
   - Re-calibration functions: `compute_near_integrations`
75 75
   - Analysis functions: `compute_abundance`, `comparison_matrix`
76 76
     `separate_quant_matrices`, others
77
+  - Plotting functions: `CIS_volcano_plot`
77 78
   - Utility functions
78 79
 
79 80
 # NEWS
80 81
 
82
+# ISAnalytics 0.99.13 (2020-10-19)
83
+
84
+## NEW FEATURES
85
+
86
+  - Added analysis functions `CIS_grubbs` and `cumulative_count_union`
87
+  - Added plotting functions `CIS_volcano_plot`
88
+
81 89
 # ISAnalytics 0.99.12 (2020-10-04)
82 90
 
83 91
 ## NEW FEATURES
... ...
@@ -29,6 +29,11 @@ reference:
29 29
   - threshold_filter
30 30
   - top_integrations
31 31
   - sample_statistics
32
+  - CIS_grubbs
33
+  - cumulative_count_union
34
+- title: "Plotting functions"
35
+- contents:
36
+  - CIS_volcano_plot
32 37
 - title: "Utility functions"
33 38
 - contents:
34 39
   - as_sparse_matrix
... ...
@@ -46,6 +51,8 @@ reference:
46 51
   - quantification_types
47 52
   - reduced_AF_columns
48 53
   - default_stats
54
+  - clinical_relevant_suspicious_genes
55
+  - known_clinical_oncogenes
49 56
 - title: "Package overview"
50 57
 - contents:
51 58
   - ISAnalytics
52 59
new file mode 100644
53 60
Binary files /dev/null and b/inst/extdata/201806_uniprot-Proto-oncogene.tsv.xz differ
54 61
new file mode 100644
55 62
Binary files /dev/null and b/inst/extdata/201806_uniprot-Tumor-suppressor.tsv.xz differ
56 63
new file mode 100644
57 64
Binary files /dev/null and b/inst/extdata/hg19.refGene.oracle.tsv.xz differ
... ...
@@ -102,3 +102,43 @@
102 102
 
103 103
 #' @describeIn "fs" Example of file system with issues
104 104
 "fserr"
105
+
106
+#' Gene annotation file for human hg19 genome.
107
+#'
108
+#' @description
109
+#' This file was obtained following this steps:
110
+#'
111
+#' 1. Download from {http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/}
112
+#' the refGene.sql, knownGene.sql, knownToRefSeq.sql, kgXref.sql tables
113
+#' 2. Import everything it in mysql
114
+#' 3. Generate views for annotation:
115
+#'
116
+#' ```
117
+#' SELECT kg.`chrom`, min(kg.cdsStart) as CDS_minStart,
118
+#' max(kg.`cdsEnd`) as CDS_maxEnd, k2a.geneSymbol,
119
+#' kg.`strand` as GeneStrand, min(kg.txStart) as TSS_minStart,
120
+#' max(kg.txEnd) as TSS_maxStart,
121
+#' kg.proteinID as ProteinID, k2a.protAcc as ProteinAcc, k2a.spDisplayID
122
+#' FROM `knownGene` AS kg JOIN kgXref AS k2a
123
+#' ON BINARY kg.name = k2a.kgID COLLATE latin1_bin
124
+#' -- latin1_swedish_ci
125
+#' -- WHERE k2a.spDisplayID IS NOT NULL and (k2a.`geneSymbol` LIKE 'Tcra%' or
126
+#' k2a.`geneSymbol` LIKE 'TCRA%')
127
+#' WHERE (k2a.spDisplayID IS NOT NULL or k2a.spDisplayID NOT LIKE '')
128
+#' and k2a.`geneSymbol` LIKE 'Tcra%'
129
+#' group by kg.`chrom`, k2a.geneSymbol
130
+#' ORDER BY kg.chrom ASC , kg.txStart ASC
131
+#' ```
132
+"hg19.refGene.oracle"
133
+
134
+## keywords `proto-oncogenes` `tumor suppressor`
135
+
136
+#' Data frames for proto-oncogenes (human and mouse)
137
+#' amd tumor-suppressor genes from UniProt.
138
+#'
139
+#' @description
140
+#' The file is simply a result of a research with the keywords
141
+#' "proto-oncogenes" and "tumor suppressor" for the target genomes
142
+#' on UniProt database.
143
+"201806_uniprot-Proto-oncogene"
144
+"201806_uniprot-Tumor-suppressor"
105 145
new file mode 100644
... ...
@@ -0,0 +1,88 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/analysis-functions.R
3
+\name{CIS_grubbs}
4
+\alias{CIS_grubbs}
5
+\title{Grubbs test for Common Insertion Sites (CIS).}
6
+\usage{
7
+CIS_grubbs(
8
+  x,
9
+  genomic_annotation_file = system.file("extdata", "hg19.refGene.oracle.tsv.xz",
10
+    package = "ISAnalytics"),
11
+  grubbs_flanking_gene_bp = 1e+05,
12
+  threshold_alpha = 0.05,
13
+  add_standard_padjust = TRUE
14
+)
15
+}
16
+\arguments{
17
+\item{x}{An integration matrix, must include the \code{mandatory_IS_vars()}
18
+columns and the \code{annotation_IS_vars()} columns}
19
+
20
+\item{genomic_annotation_file}{Database file for gene annotation,
21
+see details}
22
+
23
+\item{grubbs_flanking_gene_bp}{Number of base pairs flanking a gene}
24
+
25
+\item{threshold_alpha}{Significance threshold}
26
+
27
+\item{add_standard_padjust}{Compute the standard padjust?}
28
+}
29
+\value{
30
+A data frame
31
+}
32
+\description{
33
+\lifecycle{experimental}
34
+Statistical approach for the validation of common insertion sites
35
+significance based on the comparison of the integration frequency
36
+at the CIS gene with respect to other genes contained in the
37
+surrounding genomic regions. For more details please refer to
38
+this paper:
39
+\url{https://ashpublications.org/blood/article/117/20/5332/21206/Lentiviral-vector-common-integration-sites-in}
40
+}
41
+\details{
42
+\subsection{Genomic annotation file}{
43
+
44
+This file is a data base, or more simply a .tsv file to import, with
45
+genes annotation for the specific genome. The annotations for the
46
+human genome (hg19) is already included in this package.
47
+If for any reason the user is performing an analysis on another genome,
48
+this file needs to be changed respecting the USCS Genome Browser
49
+format, meaning the input file headers should be:\preformatted{## name2, chrom, strand 
50
+##  min_txStart, max_txEnd, minmax_TxLen 
51
+##  average_TxLen, name, min_cdsStart 
52
+##  max_cdsEnd, minmax_CdsLen, average_CdsLen
53
+}
54
+}
55
+}
56
+\examples{
57
+op <- options(ISAnalytics.widgets = FALSE)
58
+
59
+path_AF <- system.file("extdata", "ex_association_file.tsv",
60
+    package = "ISAnalytics"
61
+)
62
+root_correct <- system.file("extdata", "fs.zip",
63
+    package = "ISAnalytics"
64
+)
65
+root_correct <- unzip_file_system(root_correct, "fs")
66
+
67
+matrices <- import_parallel_Vispa2Matrices_auto(
68
+    association_file = path_AF, root = root_correct,
69
+    quantification_type = c("seqCount", "fragmentEstimate"),
70
+    matrix_type = "annotated", workers = 2, patterns = NULL,
71
+    matching_opt = "ANY"
72
+)
73
+
74
+cis <- CIS_grubbs(matrices$seqCount)
75
+
76
+options(op)
77
+}
78
+\seealso{
79
+Other Analysis functions: 
80
+\code{\link{comparison_matrix}()},
81
+\code{\link{compute_abundance}()},
82
+\code{\link{cumulative_count_union}()},
83
+\code{\link{sample_statistics}()},
84
+\code{\link{separate_quant_matrices}()},
85
+\code{\link{threshold_filter}()},
86
+\code{\link{top_integrations}()}
87
+}
88
+\concept{Analysis functions}
0 89
new file mode 100644
... ...
@@ -0,0 +1,122 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/plotting-functions.R
3
+\name{CIS_volcano_plot}
4
+\alias{CIS_volcano_plot}
5
+\title{Trace volcano plot for computed CIS data.}
6
+\usage{
7
+CIS_volcano_plot(
8
+  x,
9
+  onco_db_file = system.file("extdata", "201806_uniprot-Proto-oncogene.tsv.xz", package
10
+    = "ISAnalytics"),
11
+  tumor_suppressors_db_file = system.file("extdata",
12
+    "201806_uniprot-Tumor-suppressor.tsv.xz", package = "ISAnalytics"),
13
+  species = "human",
14
+  known_onco = known_clinical_oncogenes(),
15
+  suspicious_genes = clinical_relevant_suspicious_genes(),
16
+  significance_threshold = 0.05,
17
+  annotation_threshold_ontots = 0.1,
18
+  facet_rows = NULL,
19
+  facet_cols = NULL
20
+)
21
+}
22
+\arguments{
23
+\item{x}{Either a simple integration matrix or a data frame resulting
24
+from the call to \link{CIS_grubbs} with \code{add_standard_padjust = TRUE}}
25
+
26
+\item{onco_db_file}{Uniprot file for proto-oncogenes (see details)}
27
+
28
+\item{tumor_suppressors_db_file}{Uniprot file for tumor-suppressor genes}
29
+
30
+\item{species}{One between "human", "mouse" and "all"}
31
+
32
+\item{known_onco}{Data frame with known oncogenes. See details.}
33
+
34
+\item{suspicious_genes}{Data frame with clinical relevant suspicious
35
+genes. See details.}
36
+
37
+\item{significance_threshold}{The significance threshold}
38
+
39
+\item{annotation_threshold_ontots}{Value above which genes are annotated}
40
+
41
+\item{facet_rows}{Column names to pass to the faceting function as rows
42
+(NULL for no faceting)}
43
+
44
+\item{facet_cols}{Column names to pass to the faceting function as columns
45
+(NULL for no faceting)}
46
+}
47
+\value{
48
+A plot
49
+}
50
+\description{
51
+\lifecycle{experimental}
52
+Traces a volcano plot for IS frequency and CIS results.
53
+}
54
+\details{
55
+\subsection{Input data frame}{
56
+
57
+Users can supply as \code{x} either a simple integration matrix or a
58
+data frame resulting from the call to \link{CIS_grubbs}
59
+with \code{add_standard_padjust = TRUE}. In the first case an internal call to
60
+the function \code{CIS_grubbs} is performed.
61
+}
62
+
63
+\subsection{Oncogene and tumor suppressor genes files}{
64
+
65
+These files are included in the package for user convenience and are
66
+simply UniProt files with gene annotations for human and mouse.
67
+For more details on how this files were generated use the help \code{?filname}
68
+function.
69
+}
70
+
71
+\subsection{Known oncogenes}{
72
+
73
+The default values are contained in a data frame exported by this package,
74
+it can be accessed by doing:\if{html}{\out{<div class="r">}}\preformatted{head(known_clinical_oncogenes())
75
+}\if{html}{\out{</div>}}\preformatted{## # A tibble: 5 x 2
76
+##   GeneName KnownClonalExpansion
77
+##   <chr>    <lgl>               
78
+## 1 MECOM    TRUE                
79
+## 2 CCND2    TRUE                
80
+## 3 TAL1     TRUE                
81
+## 4 LMO2     TRUE                
82
+## 5 HMGA2    TRUE
83
+}
84
+
85
+If the user wants to change this parameter the input data frame must
86
+preserve the column structure. The same goes for the \code{suspicious_genes}
87
+parameter (DOIReference column is optional):\if{html}{\out{<div class="r">}}\preformatted{head(clinical_relevant_suspicious_genes())
88
+}\if{html}{\out{</div>}}\preformatted{## # A tibble: 6 x 3
89
+##   GeneName ClinicalRelevance DOIReference                                
90
+##   <chr>    <lgl>             <chr>                                       
91
+## 1 DNMT3A   TRUE              https://doi.org/10.1182/blood-2018-01-829937
92
+## 2 TET2     TRUE              https://doi.org/10.1182/blood-2018-01-829937
93
+## 3 ASXL1    TRUE              https://doi.org/10.1182/blood-2018-01-829937
94
+## 4 JAK2     TRUE              https://doi.org/10.1182/blood-2018-01-829937
95
+## 5 CBL      TRUE              https://doi.org/10.1182/blood-2018-01-829937
96
+## 6 TP53     TRUE              https://doi.org/10.1182/blood-2018-01-829937
97
+}
98
+}
99
+}
100
+\examples{
101
+op <- options(ISAnalytics.widgets = FALSE)
102
+
103
+path_AF <- system.file("extdata", "ex_association_file.tsv",
104
+    package = "ISAnalytics"
105
+)
106
+root_correct <- system.file("extdata", "fs.zip",
107
+    package = "ISAnalytics"
108
+)
109
+root_correct <- unzip_file_system(root_correct, "fs")
110
+
111
+matrices <- import_parallel_Vispa2Matrices_auto(
112
+    association_file = path_AF, root = root_correct,
113
+    quantification_type = c("seqCount", "fragmentEstimate"),
114
+    matrix_type = "annotated", workers = 2, patterns = NULL,
115
+    matching_opt = "ANY"
116
+)
117
+
118
+cis <- CIS_grubbs(matrices$seqCount)
119
+plot <- CIS_volcano_plot(cis)
120
+options(op)
121
+}
122
+\concept{Plotting functions}
... ...
@@ -63,6 +63,12 @@ and Annotation of Vector Integration Sites}
63 63
 \item \code{\link{threshold_filter}}
64 64
 \item \code{\link{top_integrations}}
65 65
 \item \code{\link{sample_statistics}}
66
+\item \code{\link{CIS_grubbs}}
67
+\item \code{\link{cumulative_count_union}}
68
+}
69
+\item Plotting functions:
70
+\itemize{
71
+\item \code{\link{CIS_volcano_plot}}
66 72
 }
67 73
 \item Utility functions:
68 74
 \itemize{
69 75
new file mode 100644
... ...
@@ -0,0 +1,22 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/plotting-functions.R
3
+\name{clinical_relevant_suspicious_genes}
4
+\alias{clinical_relevant_suspicious_genes}
5
+\title{Clinical relevant suspicious genes (for mouse and human).}
6
+\usage{
7
+clinical_relevant_suspicious_genes()
8
+}
9
+\value{
10
+A data frame
11
+}
12
+\description{
13
+Clinical relevant suspicious genes (for mouse and human).
14
+}
15
+\examples{
16
+clinical_relevant_suspicious_genes()
17
+}
18
+\seealso{
19
+Other Plotting function helpers: 
20
+\code{\link{known_clinical_oncogenes}()}
21
+}
22
+\concept{Plotting function helpers}
... ...
@@ -61,7 +61,9 @@ options(op)
61 61
 \link{quantification_types}
62 62
 
63 63
 Other Analysis functions: 
64
+\code{\link{CIS_grubbs}()},
64 65
 \code{\link{compute_abundance}()},
66
+\code{\link{cumulative_count_union}()},
65 67
 \code{\link{sample_statistics}()},
66 68
 \code{\link{separate_quant_matrices}()},
67 69
 \code{\link{threshold_filter}()},
... ...
@@ -38,7 +38,9 @@ abundance <- compute_abundance(matrix)
38 38
 }
39 39
 \seealso{
40 40
 Other Analysis functions: 
41
+\code{\link{CIS_grubbs}()},
41 42
 \code{\link{comparison_matrix}()},
43
+\code{\link{cumulative_count_union}()},
42 44
 \code{\link{sample_statistics}()},
43 45
 \code{\link{separate_quant_matrices}()},
44 46
 \code{\link{threshold_filter}()},
45 47
new file mode 100644
... ...
@@ -0,0 +1,117 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/analysis-functions.R
3
+\name{cumulative_count_union}
4
+\alias{cumulative_count_union}
5
+\title{Integrations cumulative count in time by sample}
6
+\usage{
7
+cumulative_count_union(
8
+  x,
9
+  association_file = NULL,
10
+  timepoint_column = "TimePoint",
11
+  key = c("SubjectID", "CellMarker", "Tissue", "TimePoint"),
12
+  include_tp_zero = FALSE,
13
+  zero = "0000",
14
+  aggregate = FALSE,
15
+  ...
16
+)
17
+}
18
+\arguments{
19
+\item{x}{A simple integration matrix or an aggregated matrix (see details)}
20
+
21
+\item{association_file}{NULL or the association file for x if \code{aggregate}
22
+is set to TRUE}
23
+
24
+\item{timepoint_column}{What is the name of the time point column?}
25
+
26
+\item{key}{The aggregation key - must always contain the \code{timepoint_column}}
27
+
28
+\item{include_tp_zero}{Include timepoint 0?}
29
+
30
+\item{zero}{How is 0 coded in the data frame?}
31
+
32
+\item{aggregate}{Should x be aggregated?}
33
+
34
+\item{...}{Additional parameters to pass to \code{aggregate_values_by_key}}
35
+}
36
+\value{
37
+A data frame
38
+}
39
+\description{
40
+\lifecycle{experimental}
41
+This function computes the cumulative number of integrations
42
+observed in each sample at different time points by assuming that
43
+if an integration is observed at time point "t" then it is also observed in
44
+time point "t+1".
45
+}
46
+\details{
47
+\subsection{Input data frame}{
48
+
49
+The user can provide as input for the \code{x} parameter both a simple
50
+integration matrix AND setting the \code{aggregate} parameter to TRUE,
51
+or provide an already aggregated matrix via
52
+\link{aggregate_values_by_key}.
53
+If the user supplies a matrix to be aggregated the \code{association_file}
54
+parameter must not be NULL: aggregation will be done by an internal
55
+call to the aggregation function.
56
+If the user supplies an already aggregated matrix, the \code{key} parameter
57
+is the key used for aggregation -
58
+\strong{NOTE: for this operation is mandatory
59
+that the time point column is included in the key.}
60
+}
61
+
62
+\subsection{Assumptions on time point format}{
63
+
64
+By using the functions provided by this package, when imported,
65
+an association file will be correctly formatted for future usage.
66
+In the formatting process there is also a padding operation performed on
67
+time points: this means the functions expects the time point column to
68
+be of type character and to be correctly padded with 0s. If the
69
+chosen column for time point is detected as numeric the function will
70
+attempt the conversion to character and automatic padding.
71
+If you choose to import the association file not using the
72
+\link{import_association_file} function, be sure to check the format of
73
+the chosen column to avoid undesired results.
74
+}
75
+}
76
+\examples{
77
+op <- options(ISAnalytics.widgets = FALSE)
78
+
79
+path_AF <- system.file("extdata", "ex_association_file.tsv",
80
+    package = "ISAnalytics"
81
+)
82
+root_correct <- system.file("extdata", "fs.zip",
83
+    package = "ISAnalytics"
84
+)
85
+root_correct <- unzip_file_system(root_correct, "fs")
86
+
87
+association_file <- import_association_file(path_AF, root_correct)
88
+matrices <- import_parallel_Vispa2Matrices_auto(
89
+    association_file = association_file, root = NULL,
90
+    quantification_type = c("seqCount", "fragmentEstimate"),
91
+    matrix_type = "annotated", workers = 2, patterns = NULL,
92
+    matching_opt = "ANY"
93
+)
94
+
95
+#### EXTERNAL AGGREGATION
96
+aggregated <- aggregate_values_by_key(matrices$seqCount, association_file)
97
+cumulative_count <- cumulative_count_union(aggregated)
98
+
99
+#### INTERNAL AGGREGATION
100
+cumulative_count_2 <- cumulative_count_union(matrices$seqCount,
101
+    association_file,
102
+    aggregate = TRUE
103
+)
104
+
105
+options(op)
106
+}
107
+\seealso{
108
+Other Analysis functions: 
109
+\code{\link{CIS_grubbs}()},
110
+\code{\link{comparison_matrix}()},
111
+\code{\link{compute_abundance}()},
112
+\code{\link{sample_statistics}()},
113
+\code{\link{separate_quant_matrices}()},
114
+\code{\link{threshold_filter}()},
115
+\code{\link{top_integrations}()}
116
+}
117
+\concept{Analysis functions}
0 118
new file mode 100644
... ...
@@ -0,0 +1,22 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/plotting-functions.R
3
+\name{known_clinical_oncogenes}
4
+\alias{known_clinical_oncogenes}
5
+\title{Known clinical oncogenes (for mouse and human).}
6
+\usage{
7
+known_clinical_oncogenes()
8
+}
9
+\value{
10
+A data frame
11
+}
12
+\description{
13
+Known clinical oncogenes (for mouse and human).
14
+}
15
+\examples{
16
+known_clinical_oncogenes()
17
+}
18
+\seealso{
19
+Other Plotting function helpers: 
20
+\code{\link{clinical_relevant_suspicious_genes}()}
21
+}
22
+\concept{Plotting function helpers}
... ...
@@ -89,8 +89,10 @@ options(op)
89 89
 }
90 90
 \seealso{
91 91
 Other Analysis functions: 
92
+\code{\link{CIS_grubbs}()},
92 93
 \code{\link{comparison_matrix}()},
93 94
 \code{\link{compute_abundance}()},
95
+\code{\link{cumulative_count_union}()},
94 96
 \code{\link{separate_quant_matrices}()},
95 97
 \code{\link{threshold_filter}()},
96 98
 \code{\link{top_integrations}()}
... ...
@@ -61,8 +61,10 @@ options(op)
61 61
 \link{quantification_types}
62 62
 
63 63
 Other Analysis functions: 
64
+\code{\link{CIS_grubbs}()},
64 65
 \code{\link{comparison_matrix}()},
65 66
 \code{\link{compute_abundance}()},
67
+\code{\link{cumulative_count_union}()},
66 68
 \code{\link{sample_statistics}()},
67 69
 \code{\link{threshold_filter}()},
68 70
 \code{\link{top_integrations}()}
... ...
@@ -204,8 +204,10 @@ filtered <- threshold_filter(example_list,
204 204
 }
205 205
 \seealso{
206 206
 Other Analysis functions: 
207
+\code{\link{CIS_grubbs}()},
207 208
 \code{\link{comparison_matrix}()},
208 209
 \code{\link{compute_abundance}()},
210
+\code{\link{cumulative_count_union}()},
209 211
 \code{\link{sample_statistics}()},
210 212
 \code{\link{separate_quant_matrices}()},
211 213
 \code{\link{top_integrations}()}
... ...
@@ -53,8 +53,10 @@ top <- top_integrations(smpl,
53 53
 }
54 54
 \seealso{
55 55
 Other Analysis functions: 
56
+\code{\link{CIS_grubbs}()},
56 57
 \code{\link{comparison_matrix}()},
57 58
 \code{\link{compute_abundance}()},
59
+\code{\link{cumulative_count_union}()},
58 60
 \code{\link{sample_statistics}()},
59 61
 \code{\link{separate_quant_matrices}()},
60 62
 \code{\link{threshold_filter}()}
... ...
@@ -1125,3 +1125,143 @@ test_that("sample_statistics returns correctly", {
1125 1125
         colnames(res$x)))
1126 1126
     expect_true("Value_sum" %in% colnames(res$metadata))
1127 1127
 })
1128
+
1129
+#------------------------------------------------------------------------------#
1130
+# Test CIS_grubbs
1131
+#------------------------------------------------------------------------------#
1132
+test_that("CIS_grubbs fails if x is not standard matrix", {
1133
+    wrong <- tibble::tibble(a = c(1, 2, 3), b = c(4, 5, 6))
1134
+    expect_error(
1135
+        {
1136
+            CIS_grubbs(wrong)
1137
+        },
1138
+        regexp = .non_ISM_error()
1139
+    )
1140
+    expect_error(
1141
+        {
1142
+            CIS_grubbs(smpl)
1143
+        },
1144
+        regexp = .missing_annot()
1145
+    )
1146
+})
1147
+
1148
+test_that("CIS_grubbs fails if file doesn't exist", {
1149
+    expect_error({
1150
+        CIS_grubbs(matrices_correct$seqCount,
1151
+            genomic_annotation_file = "myfile.tsv"
1152
+        )
1153
+    })
1154
+})
1155
+
1156
+test_that("CIS_grubbs produces correct df", {
1157
+    output_cols <- c(
1158
+        "GeneName", "GeneStrand", "chr", "n_IS_perGene",
1159
+        "min_bp_integration_locus", "max_bp_integration_locus",
1160
+        "IS_span_bp", "avg_bp_integration_locus",
1161
+        "median_bp_integration_locus", "distinct_orientations",
1162
+        "describe", "average_TxLen", "geneIS_frequency_byHitIS",
1163
+        "raw_gene_integration_frequency",
1164
+        "integration_frequency_withtolerance",
1165
+        "minus_log2_integration_freq_withtolerance",
1166
+        "zscore_minus_log2_int_freq_tolerance",
1167
+        "neg_zscore_minus_log2_int_freq_tolerance", "t_z_mlif",
1168
+        "tdist2t", "tdist_pt", "tdist_bonferroni_default",
1169
+        "tdist_bonferroni", "tdist_fdr", "tdist_benjamini",
1170
+        "tdist_positive_and_corrected", "tdist_positive",
1171
+        "tdist_positive_and_correctedEM"
1172
+    )
1173
+    result <- CIS_grubbs(matrices_correct$seqCount)
1174
+    expect_true(ncol(result) == 28)
1175
+    expect_true(all(output_cols %in% colnames(result)))
1176
+    result <- CIS_grubbs(matrices_correct$seqCount,
1177
+        add_standard_padjust = FALSE
1178
+    )
1179
+    expect_true(ncol(result) == 25)
1180
+    expect_true(all(output_cols[!output_cols %in% c(
1181
+        "tdist_bonferroni",
1182
+        "tdist_fdr",
1183
+        "tdist_benjamini"
1184
+    )] %in%
1185
+        colnames(result)))
1186
+})
1187
+
1188
+#------------------------------------------------------------------------------#
1189
+# Test cumulative_count_union
1190
+#------------------------------------------------------------------------------#
1191
+minimal_agg_example <- tibble::tibble(
1192
+    chr = c(1, 2, 2, 5, 5, 3, 3, 10, 11),
1193
+    integration_locus = c(
1194
+        14505,
1195
+        1005,
1196
+        15513,
1197
+        4561,
1198
+        10167,
1199
+        5247,
1200
+        10951,
1201
+        23403,
1202
+        25611
1203
+    ),
1204
+    strand = c(
1205
+        "+", "-", "+", "+", "-", "-",
1206
+        "+", "-", "-"
1207
+    ),
1208
+    SubjectID = c(
1209
+        rep_len("S1", 5),
1210
+        rep_len("S2", 4)
1211
+    ),
1212
+    CellMarker = c(
1213
+        rep_len("CM1", 5),
1214
+        rep_len("CM2", 4)
1215
+    ),
1216
+    Tissue = c(
1217
+        rep_len("T1", 5),
1218
+        rep_len("T2", 4)
1219
+    ),
1220
+    TimePoint = c(
1221
+        rep_len("0001", 3),
1222
+        "0010", "0015",
1223
+        rep_len("0001", 2),
1224
+        rep_len("0020", 2)
1225
+    ),
1226
+    Value_sum = c(
1227
+        50, 80, 650, 46, 79, 633,
1228
+        875, 99, 123
1229
+    )
1230
+)
1231
+
1232
+test_that("cumulative_count_union stops if tp not in key", {
1233
+    expect_error(
1234
+        {
1235
+            cumulative_count_union(minimal_agg_example, key = c(
1236
+                "SubjectID",
1237
+                "CellMarker",
1238
+                "Tissue"
1239
+            ))
1240
+        },
1241
+        regexp = .key_without_tp_err()
1242
+    )
1243
+})
1244
+
1245
+test_that("cumulative_count_union produces correct output", {
1246
+    cum_count <- cumulative_count_union(minimal_agg_example)
1247
+    expected <- tibble::tibble(
1248
+        SubjectID = c(
1249
+            rep_len("S1", 3),
1250
+            rep_len("S2", 2)
1251
+        ),
1252
+        CellMarker = c(
1253
+            rep_len("CM1", 3),
1254
+            rep_len("CM2", 2)
1255
+        ),
1256
+        Tissue = c(
1257
+            rep_len("T1", 3),
1258
+            rep_len("T2", 2)
1259
+        ),
1260
+        TimePoint = c(
1261
+            "0001", "0010", "0015",
1262
+            "0001", "0020"
1263
+        ),
1264
+        count = c(3, 4, 5, 2, 4)
1265
+    )
1266
+    expect_equal(cum_count, expected)
1267
+})
... ...
@@ -28,7 +28,7 @@
28 28
   author = {{calabrialab}},
29 29
   year = {2020},
30 30
   url = {http://www.bioconductor.org/packages/ISAnalytics},
31
-  note = {https://github.com/calabrialab/ISAnalytics - R package version 0.99.11},
31
+  note = {https://github.com/calabrialab/ISAnalytics - R package version 0.99.13},
32 32
   doi = {10.18129/B9.bioc.ISAnalytics},
33 33
 }
34 34
 
... ...
@@ -28,7 +28,7 @@
28 28
   author = {{calabrialab}},
29 29
   year = {2020},
30 30
   url = {http://www.bioconductor.org/packages/ISAnalytics},
31
-  note = {https://github.com/calabrialab/ISAnalytics - R package version 0.99.11},
31
+  note = {https://github.com/calabrialab/ISAnalytics - R package version 0.99.13},
32 32
   doi = {10.18129/B9.bioc.ISAnalytics},
33 33
 }
34 34
 
... ...
@@ -28,7 +28,7 @@
28 28
   author = {{calabrialab}},
29 29
   year = {2020},
30 30
   url = {http://www.bioconductor.org/packages/ISAnalytics},
31
-  note = {https://github.com/calabrialab/ISAnalytics - R package version 0.99.11},
31
+  note = {https://github.com/calabrialab/ISAnalytics - R package version 0.99.13},
32 32
   doi = {10.18129/B9.bioc.ISAnalytics},
33 33
 }
34 34