Browse code

Modified filterData function so that NULL values work for all parameters as documented

Christian Arnold authored on 19/07/2022 21:12:48
Showing 3 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: GRaNIE
2 2
 Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data
3
-Version: 1.1.8
3
+Version: 1.1.9
4 4
 Encoding: UTF-8
5 5
 Authors@R: c(person("Christian", "Arnold", email =
6 6
         "chrarnold@web.de", role = c("cre","aut")),
... ...
@@ -618,7 +618,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
618 618
 #' @param maxNormalizedMean_peaks Numeric or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
619 619
 #' @param minNormalizedMeanRNA Numeric or \code{NULL}. Default 5. Minimum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
620 620
 #' @param maxNormalizedMeanRNA Numeric or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
621
-#' @param chrToKeep_peaks Character vector. Default \code{c(paste0("chr", 1:22), "chrX", "chrY")}. Vector of chromosomes that peaks are allowed to come from. This filter can be used to filter sex chromosomes from the peaks, for example.
621
+#' @param chrToKeep_peaks Character vector or \code{NULL}. Default \code{c(paste0("chr", 1:22), "chrX", "chrY")}. Vector of chromosomes that peaks are allowed to come from. This filter can be used to filter sex chromosomes from the peaks, for example.
622 622
 #' @param minSize_peaks Integer or \code{NULL}. Default \code{NULL}. Minimum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter.
623 623
 #' @param maxSize_peaks Integer or \code{NULL}. Default 10000. Maximum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter.
624 624
 #' @param minCV_peaks Numeric or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter.
... ...
@@ -643,11 +643,11 @@ filterData <- function (GRN,
643 643
   GRN = .addFunctionLogToObject(GRN) 
644 644
   
645 645
   checkmate::assertClass(GRN, "GRN")
646
-  checkmate::assertNumber(minNormalizedMean_peaks, lower = 0)
647
-  checkmate::assertNumber(minNormalizedMeanRNA, lower = 0)
646
+  checkmate::assertNumber(minNormalizedMean_peaks, lower = 0, null.ok = TRUE)
647
+  checkmate::assertNumber(minNormalizedMeanRNA, lower = 0, null.ok = TRUE)
648 648
   checkmate::assertNumber(maxNormalizedMean_peaks, lower = minNormalizedMean_peaks , null.ok = TRUE)
649 649
   checkmate::assertNumber(maxNormalizedMeanRNA, lower = minNormalizedMeanRNA, null.ok = TRUE)
650
-  checkmate::assertCharacter(chrToKeep_peaks, min.len = 1, any.missing = FALSE)
650
+  checkmate::assertCharacter(chrToKeep_peaks, min.len = 1, any.missing = FALSE, null.ok = TRUE)
651 651
   checkmate::assertIntegerish(minSize_peaks, lower = 1, null.ok = TRUE)
652 652
   checkmate::assertIntegerish(maxSize_peaks, lower = dplyr::if_else(is.null(minSize_peaks), 1, minSize_peaks), null.ok = TRUE)
653 653
   checkmate::assertNumber(minCV_peaks, lower = 0, null.ok = TRUE)
... ...
@@ -729,7 +729,13 @@ filterData <- function (GRN,
729 729
     minSize_peaks = 1
730 730
   }
731 731
   
732
-  futile.logger::flog.info(paste0("Filter and sort peaks and remain only those on the following chromosomes: ", paste0(chrToKeep, collapse = ",")))
732
+  if (is.null(chrToKeep)) {
733
+    chrToKeep = GRN@data$peaks$consensusPeaks %>% dplyr::pull(chr) %>% unique()
734
+  } else {
735
+    futile.logger::flog.info(paste0("Filter and sort peaks and remain only those on the following chromosomes: ", paste0(chrToKeep, collapse = ",")))
736
+  }
737
+  
738
+
733 739
   futile.logger::flog.info(paste0("Filter and sort peaks by size and remain only those smaller than : ", maxSize_peaks))
734 740
   futile.logger::flog.info(paste0(" Number of peaks before filtering: ", nrow(GRN@data$peaks$consensusPeaks)))
735 741
   ids = strsplit(GRN@data$peaks$consensusPeaks %>% dplyr::pull(!!(idColumn)), split = ":", fixed = TRUE)
... ...
@@ -798,7 +804,9 @@ filterData <- function (GRN,
798 804
   
799 805
   
800 806
   if (is.null(minMean)) {
801
-    minMean = 0
807
+    
808
+    # As data can be pre-normalized, set the minimum to a very small value so the filter is effectively off
809
+    minMean = -9e+99
802 810
   }
803 811
   
804 812
   if (is.null(maxMean)) {
... ...
@@ -838,19 +846,23 @@ filterData <- function (GRN,
838 846
     futile.logger::flog.info(paste0("  Filter genes by CV: Min = ", minCV, ", Max = ", maxCV))
839 847
   }
840 848
   
849
+  messageMean = paste0("  Filter genes by mean:")
841 850
   
842 851
   if (is.null(minMean)) {
843
-    minMean = 0
852
+    minMean = -9e+99
853
+  } else {
854
+    messageMean = paste0(messageMean, " Min = ", minMean)
844 855
   }
845 856
   
846
-  
857
+
847 858
   if (is.null(maxMean)) {
848
-    futile.logger::flog.info(paste0("  Filter genes by mean: Min = ", minMean))
849 859
     maxMean = 9e+99
850 860
   } else {
851
-    futile.logger::flog.info(paste0("  Filter genes by mean: Min = ", minMean, ", Max = ", maxMean))  
861
+    messageMean = paste0(messageMean, " Max = ", maxMean)
852 862
   }   
853 863
   
864
+  futile.logger::flog.info(messageMean)
865
+  
854 866
   
855 867
   genesFiltered = dplyr::filter(GRN@annotation$genes, 
856 868
                                 gene.CV >= minCV, gene.CV <= maxCV, 
... ...
@@ -2507,7 +2507,7 @@ plotCommunitiesStats <- function(GRN, outputFolder = NULL, basenameOutput = NULL
2507 2507
 #' Similarly to \code{\link{plotGeneralEnrichment}}, the results of the community-based enrichment analysis are plotted.. By default, the results for the 10 largest communities are displayed. Additionally, if a general enrichment analysis was previously generated, this function plots an additional heatmap to compare the general enrichment with the community based enrichment. A reduced version of this heatmap is also produced where terms are filtered out to improve visibility and display and highlight the most significant terms.
2508 2508
 #' 
2509 2509
 #' @inheritParams plotGeneralEnrichment
2510
-#' @param display Character. Default \code{"byRank"}. One of: \code{"byRank"}, \code{"byLabel"}. Specify whether the communities will by displayed based on their rank, where the largest community (with most vertices) would have a rank of 1, or by their label. Note that the label is independent of the rank.
2510
+#' @param display Character. Default \code{"byRank"}. One of: \code{"byRank"}, \code{"byLabel"}. Specify whether the communities will be displayed based on their rank, where the largest community (with most vertices) would have a rank of 1, or by their label. Note that the label is independent of the rank.
2511 2511
 #' @param communities \code{NULL} or numeric vector. Default \code{NULL}. If set to \code{NULL}, the default, all communities enrichments that have been calculated before are plotted. If a numeric vector is specified: Depending on what was specified in the \code{display} parameter, this parameter indicates either the rank or the label of the communities to be plotted. i.e. for \code{communities = c(1,4)}, if \code{display = "byRank"} the results for the first and fourth largest communities are plotted. if \code{display = "byLabel"}, the results for the communities labeled \code{"1"}, and \code{"4"} are plotted. 
2512 2512
 #' @param nSignificant Numeric. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes. 
2513 2513
 #' @param nID Numeric. Default 10. For the reduced heatmap, number of top terms to select per community.