Browse code

Merge pull request #42 from federicomarini/heatmap_winsorized

Heatmap winsorized

Federico Marini authored on 07/10/2022 13:26:45 • GitHub committed on 07/10/2022 13:26:45
Showing 5 changed files

... ...
@@ -9,7 +9,7 @@ Authors@R:
9 9
             given = "Federico", family = "Marini", role = c("aut", "cre"), 
10 10
             email = "marinif@uni-mainz.de", comment = c(ORCID = "0000-0003-3252-7758")
11 11
         ),
12
-		person(
12
+		    person(
13 13
             given = "Annekathrin", family = "Ludt", role = c("aut"), 
14 14
             email = "anneludt@uni-mainz.de", comment = c(ORCID = "0000-0002-2475-4945")
15 15
         )
... ...
@@ -1,5 +1,9 @@
1 1
 # GeneTonic 2.2.0
2 2
 
3
+## New features
4
+
5
+* `gs_heatmap` gains the `winsorize_threshold` parameter, to control the behavior of the geneset heatmap in presence of extreme values, either negative or positive ones. If not specified, the heatmap is not introducing any winsorization.
6
+
3 7
 ## Other notes
4 8
 
5 9
 * Fixed the behavior of the reactive elements after uploading the `GeneTonicList` object at runtime. 
... ...
@@ -28,6 +28,11 @@
28 28
 #' specified by [ComplexHeatmap::Heatmap()]
29 29
 #' @param center_mean Logical, whether to perform mean centering on the row-wise
30 30
 #' @param scale_row Logical, whether to standardize by row the expression values
31
+#' @param winsorize_threshold Numeric value, to be applied as value to winsorize 
32
+#' the extreme values of the heatmap. Should be a positive number. Defaults to 
33
+#' NULL, which corresponds to not applying any winsorization. Suggested values: 
34
+#' enter 2 or 3 if using row-standardized values (`scale_row` is TRUE), or visually
35
+#' inspect the range of the values if using simply mean centered values.
31 36
 #' @param anno_col_info A character vector of names in `colData(dds)` to use for
32 37
 #' decorating the heatmap as annotation.
33 38
 #' @param plot_title Character string, to specify the title of the plot,
... ...
@@ -95,6 +100,7 @@ gs_heatmap <- function(se,
95 100
                        cluster_columns = FALSE,
96 101
                        center_mean = TRUE,
97 102
                        scale_row = FALSE,
103
+                       winsorize_threshold = NULL,
98 104
                        anno_col_info = NULL,
99 105
                        plot_title = NULL,
100 106
                        ...) {
... ...
@@ -105,6 +111,11 @@ gs_heatmap <- function(se,
105 111
     res_enrich <- gtl$res_enrich
106 112
     annotation_obj <- gtl$annotation_obj
107 113
   }
114
+  
115
+  if (!is.null(winsorize_threshold)) {
116
+    stopifnot(is.numeric(winsorize_threshold))
117
+    stopifnot(winsorize_threshold >= 0)
118
+  }
108 119
 
109 120
   # check that the data would ideally be a DST, so that it is not the counts/normalized?
110 121
   mydata <- assay(se)
... ...
@@ -165,6 +176,12 @@ gs_heatmap <- function(se,
165 176
     mydata_sig <- mydata_sig[de_to_keep, , drop = FALSE]
166 177
   }
167 178
 
179
+  extreme_value <- max(abs(range(mydata_sig)))
180
+  if (!is.null(winsorize_threshold)) {
181
+    # do the winsoring
182
+    mydata_sig[mydata_sig < -winsorize_threshold] <- -winsorize_threshold
183
+    mydata_sig[mydata_sig > winsorize_threshold] <- winsorize_threshold
184
+  } 
168 185
   # dim(mydata_sig)
169 186
 
170 187
   if (is.null(plot_title)) {
... ...
@@ -18,6 +18,7 @@ gs_heatmap(
18 18
   cluster_columns = FALSE,
19 19
   center_mean = TRUE,
20 20
   scale_row = FALSE,
21
+  winsorize_threshold = NULL,
21 22
   anno_col_info = NULL,
22 23
   plot_title = NULL,
23 24
   ...
... ...
@@ -63,6 +64,12 @@ specified by \code{\link[ComplexHeatmap:Heatmap]{ComplexHeatmap::Heatmap()}}}
63 64
 
64 65
 \item{scale_row}{Logical, whether to standardize by row the expression values}
65 66
 
67
+\item{winsorize_threshold}{Numeric value, to be applied as value to winsorize
68
+the extreme values of the heatmap. Should be a positive number. Defaults to
69
+NULL, which corresponds to not applying any winsorization. Suggested values:
70
+enter 2 or 3 if using row-standardized values (\code{scale_row} is TRUE), or visually
71
+inspect the range of the values if using simply mean centered values.}
72
+
66 73
 \item{anno_col_info}{A character vector of names in \code{colData(dds)} to use for
67 74
 decorating the heatmap as annotation.}
68 75
 
... ...
@@ -47,6 +47,7 @@ test_that("Geneset heatmap is created", {
47 47
     cluster_columns = TRUE,
48 48
     center_mean = TRUE,
49 49
     scale_row = TRUE,
50
+    winsorize_threshold = 2,
50 51
     anno_col_info = c(
51 52
       "condition_char",
52 53
       "some_numbers",
... ...
@@ -91,6 +92,24 @@ test_that("Geneset heatmap is created", {
91 92
       anno_col_info = "condition"
92 93
     )
93 94
   )
95
+  
96
+  expect_error(
97
+    p5 <- gs_heatmap(
98
+      se = vst_macrophage,
99
+      res_de = res_macrophage_IFNg_vs_naive,
100
+      res_enrich = res_enrich_IFNg_vs_naive,
101
+      annotation_obj = anno_df,
102
+      genelist = mycustomlist,
103
+      FDR = 0.05,
104
+      de_only = FALSE,
105
+      cluster_rows = TRUE,
106
+      cluster_columns = TRUE,
107
+      center_mean = TRUE,
108
+      scale_row = TRUE,
109
+      winsorize_threshold = -3,
110
+      anno_col_info = "condition"
111
+    )
112
+  )
94 113
 
95 114
   file.remove("Rplots.pdf")
96 115
 })