Browse code

Merge pull request #32 from federicomarini/gsupset

Gsupset

Federico Marini authored on 30/03/2022 10:05:07 • GitHub committed on 30/03/2022 10:05:07
Showing 8 changed files

... ...
@@ -31,6 +31,7 @@ Imports:
31 31
     colorspace,
32 32
 	  colourpicker,
33 33
     ComplexHeatmap,
34
+    ComplexUpset,
34 35
     dendextend,
35 36
     DESeq2,
36 37
     dplyr,
... ...
@@ -9,6 +9,7 @@ export(checkup_gtl)
9 9
 export(cluster_markov)
10 10
 export(create_jaccard_matrix)
11 11
 export(create_kappa_matrix)
12
+export(create_upsetdata)
12 13
 export(describe_gtl)
13 14
 export(deseqresult2df)
14 15
 export(distill_enrichment)
... ...
@@ -38,6 +39,7 @@ export(gs_spider)
38 39
 export(gs_summary_heat)
39 40
 export(gs_summary_overview)
40 41
 export(gs_summary_overview_pair)
42
+export(gs_upset)
41 43
 export(gs_volcano)
42 44
 export(happy_hour)
43 45
 export(map2color)
... ...
@@ -66,6 +68,9 @@ importFrom(AnnotationDbi,Term)
66 68
 importFrom(ComplexHeatmap,Heatmap)
67 69
 importFrom(ComplexHeatmap,HeatmapAnnotation)
68 70
 importFrom(ComplexHeatmap,draw)
71
+importFrom(ComplexUpset,intersection_matrix)
72
+importFrom(ComplexUpset,intersection_size)
73
+importFrom(ComplexUpset,upset)
69 74
 importFrom(DESeq2,counts)
70 75
 importFrom(DESeq2,estimateSizeFactors)
71 76
 importFrom(DESeq2,normalizationFactors)
... ...
@@ -26,6 +26,7 @@
26 26
 #' @importFrom colorspace rainbow_hcl
27 27
 #' @importFrom colourpicker colourInput
28 28
 #' @importFrom ComplexHeatmap Heatmap HeatmapAnnotation draw
29
+#' @importFrom ComplexUpset upset intersection_matrix intersection_size
29 30
 #' @importFrom dendextend branches_attr_by_clusters set
30 31
 #' @importFrom DESeq2 vst counts estimateSizeFactors normalizationFactors sizeFactors
31 32
 #' @importFrom dplyr arrange desc group_by mutate pull slice select "%>%"
32 33
new file mode 100644
... ...
@@ -0,0 +1,243 @@
1
+#' Create a geneset upset dataset
2
+#'
3
+#' Create a data frame that can be fed to the upset function
4
+#'
5
+#' @param res_enrich A `data.frame` object, storing the result of the functional
6
+#' enrichment analysis. See more in the main function, [GeneTonic()], to see the
7
+#' formatting requirements.
8
+#' @param use_ids Logical - whether to use the `gs_id`entifiers as names, or the
9
+#' values provided as `gs_description`. Defaults to FALSE, using the full
10
+#' descriptions
11
+#'
12
+#' @return A data.frame to be used in `ComplexUpset::upset()`
13
+#' @export
14
+#'
15
+#' @examples
16
+#' # res_enrich object
17
+#' data(res_enrich_macrophage, package = "GeneTonic")
18
+#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
19
+#' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
20
+#'
21
+#' create_upsetdata(res_enrich[1:20, ])
22
+#' dim(create_upsetdata(res_enrich[1:20, ]))
23
+#'
24
+#' create_upsetdata(res_enrich[1:5, ], use_ids = TRUE)
25
+create_upsetdata <- function(res_enrich, use_ids = FALSE) {
26
+  my_gs_ids <- res_enrich$gs_id
27
+  my_gs_desc <- res_enrich$gs_description
28
+  my_gs_genes <- res_enrich$gs_genes
29
+
30
+  gsg_list <- strsplit(my_gs_genes, ",")
31
+  if (use_ids) {
32
+    names(gsg_list) <- my_gs_ids
33
+  } else {
34
+    names(gsg_list) <- my_gs_desc
35
+  }
36
+
37
+  all_genes <- unique(unlist(gsg_list))
38
+  upgsg <- unlist(lapply(gsg_list, function(x) {
39
+    x <- as.vector(match(all_genes, x))
40
+  }))
41
+
42
+  upgsg[is.na(upgsg)] <- as.integer(0)
43
+  upgsg[upgsg != 0] <- as.integer(1)
44
+
45
+  upgsg <- data.frame(
46
+    matrix(upgsg, ncol = length(gsg_list), byrow = FALSE)
47
+  )
48
+
49
+  upgsg <- upgsg[which(rowSums(upgsg) != 0), ]
50
+  names(upgsg) <- names(gsg_list)
51
+  rownames(upgsg) <- all_genes
52
+
53
+  return(upgsg)
54
+}
55
+
56
+
57
+
58
+
59
+#' Upset plot for genesets
60
+#'
61
+#' Create an upset plot for genesets
62
+#'
63
+#' @param res_enrich A `data.frame` object, storing the result of the functional
64
+#' enrichment analysis. See more in the main function, [GeneTonic()], to check the
65
+#' formatting requirements (a minimal set of columns should be present).
66
+#' @param res_de A `DESeqResults` object.
67
+#' @param annotation_obj A `data.frame` object with the feature annotation
68
+#' information, with at least two columns, `gene_id` and `gene_name`.
69
+#' @param n_gs Integer value, corresponding to the maximal number of gene sets to
70
+#' be included
71
+#' @param gtl A `GeneTonic`-list object, containing in its slots the arguments
72
+#' specified above: `dds`, `res_de`, `res_enrich`, and `annotation_obj` - the names
73
+#' of the list _must_ be specified following the content they are expecting
74
+#' @param gs_ids Character vector, containing a subset of `gs_id` as they are
75
+#' available in `res_enrich`. Lists the gene sets to be included in addition to
76
+#' the top ones (via `n_gs`)
77
+#' @param add_de_direction Logical, whether to add an annotation with info on the
78
+#' DE direction of single genes
79
+#' @param add_de_gsgenes Logical, if set to TRUE adds an annotation with detail
80
+#' on the single components of each defined subset
81
+#' @param col_upDE Character, specifying the color value to be used to mark
82
+#' upregulated genes
83
+#' @param col_downDE Character, specifying the color value to be used to mark
84
+#' downregulated genes
85
+#' @param upset_geom A geom specification to be used in the upset chart. Defaults
86
+#' sensibly to `geom_point(size = 2)`
87
+#' @param return_upsetgsg Logical, controlling the returned value. If set to TRUE,
88
+#' this function will not generate the plot but only create the corresponding
89
+#' data.frame, in case the user wants to proceed with a custom call to create an
90
+#' upset plot.
91
+#'
92
+#' @return A `ggplot` object (if plotting), or alternatively a data.frame
93
+#' @export
94
+#'
95
+#' @examples
96
+#' library("macrophage")
97
+#' library("DESeq2")
98
+#' library("org.Hs.eg.db")
99
+#' library("AnnotationDbi")
100
+#'
101
+#' # dds object
102
+#' data("gse", package = "macrophage")
103
+#' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
104
+#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
105
+#' dds_macrophage <- estimateSizeFactors(dds_macrophage)
106
+#'
107
+#' # annotation object
108
+#' anno_df <- data.frame(
109
+#'   gene_id = rownames(dds_macrophage),
110
+#'   gene_name = mapIds(org.Hs.eg.db,
111
+#'     keys = rownames(dds_macrophage),
112
+#'     column = "SYMBOL",
113
+#'     keytype = "ENSEMBL"
114
+#'   ),
115
+#'   stringsAsFactors = FALSE,
116
+#'   row.names = rownames(dds_macrophage)
117
+#' )
118
+#'
119
+#' # res object
120
+#' data(res_de_macrophage, package = "GeneTonic")
121
+#' res_de <- res_macrophage_IFNg_vs_naive
122
+#'
123
+#' # res_enrich object
124
+#' data(res_enrich_macrophage, package = "GeneTonic")
125
+#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
126
+#' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
127
+#' gs_upset(res_enrich,
128
+#'          n_gs = 10)
129
+#'
130
+#' gs_upset(res_enrich, res_de = res_de, annotation_obj = anno_df,
131
+#'          n_gs = 8,
132
+#'          add_de_direction = TRUE, add_de_gsgenes = TRUE)
133
+#'
134
+#' # or using the practical gtl (GeneTonicList)
135
+#' gtl_macrophage <- GeneTonic_list(
136
+#'   dds = dds_macrophage,
137
+#'   res_de = res_de,
138
+#'   res_enrich = res_enrich,
139
+#'   annotation_obj = anno_df
140
+#' )
141
+#'
142
+#' gs_upset(gtl = gtl_macrophage,
143
+#'          n_gs = 15,
144
+#'          add_de_direction = TRUE, add_de_gsgenes = TRUE)
145
+gs_upset <- function(res_enrich,
146
+                     res_de = NULL,
147
+                     annotation_obj = NULL,
148
+                     n_gs = 10,
149
+                     gtl = NULL,
150
+                     gs_ids = NULL,
151
+                     add_de_direction = FALSE,
152
+                     add_de_gsgenes = FALSE,
153
+                     col_upDE = "#E41A1C",
154
+                     col_downDE = "#377EB8",
155
+                     upset_geom = geom_point(size = 2),
156
+                     return_upsetgsg = FALSE) {
157
+
158
+  if (!is.null(gtl)) {
159
+    checkup_gtl(gtl)
160
+    # dds <- gtl$dds
161
+    res_de <- gtl$res_de
162
+    res_enrich <- gtl$res_enrich
163
+    annotation_obj <- gtl$annotation_obj
164
+  }
165
+
166
+  stopifnot(is.numeric(n_gs))
167
+
168
+  n_gs <- min(n_gs, nrow(res_enrich))
169
+
170
+  gs_to_use <- unique(
171
+    c(
172
+      res_enrich$gs_id[seq_len(n_gs)], # the ones from the top
173
+      gs_ids[gs_ids %in% res_enrich$gs_id] # the ones specified from the custom list
174
+    )
175
+  )
176
+
177
+  re <- res_enrich[gs_to_use, ]
178
+
179
+  upgsg <- create_upsetdata(re)
180
+
181
+  if (add_de_direction) {
182
+    if (is.null(res_de) | is.null(annotation_obj))
183
+      stop("DE results and annotation required if `add_de_direction` is TRUE, please provide them as `res_de` and `annotation_obj`")
184
+    upgsg$logFC <- res_de[annotation_obj$gene_id[
185
+      match(rownames(upgsg), annotation_obj$gene_name)], "log2FoldChange"]
186
+    upgsg$logFCsign <- upgsg$logFC >= 0
187
+
188
+    param_upset_baseanno <- list(
189
+      'Intersection size' = intersection_size(
190
+        counts = FALSE,
191
+        mapping = aes_string(fill = "logFCsign")
192
+      ) +
193
+        scale_fill_manual(
194
+          values=c('FALSE' = col_downDE, 'TRUE' = col_upDE),
195
+          labels = c("logFC<0", "logFC>0"),
196
+          name = "") +
197
+        theme(legend.position="bottom")
198
+    )
199
+  } else {
200
+    param_upset_baseanno <- "auto"
201
+  }
202
+
203
+  if (add_de_gsgenes){
204
+    if (is.null(res_de) | is.null(annotation_obj))
205
+      stop("DE results and annotation required if `add_de_gsgenes` is TRUE, please provide them as `res_de` and `annotation_obj`")
206
+    upgsg$logFC <- res_de[annotation_obj$gene_id[
207
+      match(rownames(upgsg), annotation_obj$gene_name)], "log2FoldChange"]
208
+    upgsg$logFCsign <- upgsg$logFC >= 0
209
+
210
+    param_upset_anno <- list(
211
+      'logFC' = (
212
+        ggplot(mapping = aes_string(x = "intersection", y = "logFC")) +
213
+          geom_jitter(aes_string(color = "logFC"), na.rm = TRUE) +
214
+          # geom_violin(alpha = 0.5, na.rm = TRUE) +
215
+          theme(legend.position = "none") +
216
+          scale_colour_gradient2(low = col_downDE, high = col_upDE)
217
+      )
218
+    )
219
+  } else {
220
+    param_upset_anno <- NULL
221
+  }
222
+
223
+  # if desired to work on this separately
224
+  if (return_upsetgsg) {
225
+    return(upgsg)
226
+  }
227
+
228
+  # ready to plot
229
+  ComplexUpset::upset(
230
+    data = upgsg,
231
+    intersect = names(upgsg)[seq_len(length(gs_to_use))],
232
+    name = "gsg upset",
233
+    matrix = intersection_matrix(
234
+      geom = upset_geom
235
+    ) + scale_y_discrete(position="right"),
236
+    base_annotations = param_upset_baseanno,
237
+    annotations = param_upset_anno,
238
+    width_ratio=0.1
239
+  )
240
+
241
+}
242
+
243
+
0 244
new file mode 100644
... ...
@@ -0,0 +1,34 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/gs_upset.R
3
+\name{create_upsetdata}
4
+\alias{create_upsetdata}
5
+\title{Create a geneset upset dataset}
6
+\usage{
7
+create_upsetdata(res_enrich, use_ids = FALSE)
8
+}
9
+\arguments{
10
+\item{res_enrich}{A \code{data.frame} object, storing the result of the functional
11
+enrichment analysis. See more in the main function, \code{\link[=GeneTonic]{GeneTonic()}}, to see the
12
+formatting requirements.}
13
+
14
+\item{use_ids}{Logical - whether to use the \code{gs_id}entifiers as names, or the
15
+values provided as \code{gs_description}. Defaults to FALSE, using the full
16
+descriptions}
17
+}
18
+\value{
19
+A data.frame to be used in \code{ComplexUpset::upset()}
20
+}
21
+\description{
22
+Create a data frame that can be fed to the upset function
23
+}
24
+\examples{
25
+# res_enrich object
26
+data(res_enrich_macrophage, package = "GeneTonic")
27
+res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
28
+res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
29
+
30
+create_upsetdata(res_enrich[1:20, ])
31
+dim(create_upsetdata(res_enrich[1:20, ]))
32
+
33
+create_upsetdata(res_enrich[1:5, ], use_ids = TRUE)
34
+}
0 35
new file mode 100644
... ...
@@ -0,0 +1,119 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/gs_upset.R
3
+\name{gs_upset}
4
+\alias{gs_upset}
5
+\title{Upset plot for genesets}
6
+\usage{
7
+gs_upset(
8
+  res_enrich,
9
+  res_de = NULL,
10
+  annotation_obj = NULL,
11
+  n_gs = 10,
12
+  gtl = NULL,
13
+  gs_ids = NULL,
14
+  add_de_direction = FALSE,
15
+  add_de_gsgenes = FALSE,
16
+  col_upDE = "#E41A1C",
17
+  col_downDE = "#377EB8",
18
+  upset_geom = geom_point(size = 2),
19
+  return_upsetgsg = FALSE
20
+)
21
+}
22
+\arguments{
23
+\item{res_enrich}{A \code{data.frame} object, storing the result of the functional
24
+enrichment analysis. See more in the main function, \code{\link[=GeneTonic]{GeneTonic()}}, to check the
25
+formatting requirements (a minimal set of columns should be present).}
26
+
27
+\item{res_de}{A \code{DESeqResults} object.}
28
+
29
+\item{annotation_obj}{A \code{data.frame} object with the feature annotation
30
+information, with at least two columns, \code{gene_id} and \code{gene_name}.}
31
+
32
+\item{n_gs}{Integer value, corresponding to the maximal number of gene sets to
33
+be included}
34
+
35
+\item{gtl}{A \code{GeneTonic}-list object, containing in its slots the arguments
36
+specified above: \code{dds}, \code{res_de}, \code{res_enrich}, and \code{annotation_obj} - the names
37
+of the list \emph{must} be specified following the content they are expecting}
38
+
39
+\item{gs_ids}{Character vector, containing a subset of \code{gs_id} as they are
40
+available in \code{res_enrich}. Lists the gene sets to be included in addition to
41
+the top ones (via \code{n_gs})}
42
+
43
+\item{add_de_direction}{Logical, whether to add an annotation with info on the
44
+DE direction of single genes}
45
+
46
+\item{add_de_gsgenes}{Logical, if set to TRUE adds an annotation with detail
47
+on the single components of each defined subset}
48
+
49
+\item{col_upDE}{Character, specifying the color value to be used to mark
50
+upregulated genes}
51
+
52
+\item{col_downDE}{Character, specifying the color value to be used to mark
53
+downregulated genes}
54
+
55
+\item{upset_geom}{A geom specification to be used in the upset chart. Defaults
56
+sensibly to \code{geom_point(size = 2)}}
57
+
58
+\item{return_upsetgsg}{Logical, controlling the returned value. If set to TRUE,
59
+this function will not generate the plot but only create the corresponding
60
+data.frame, in case the user wants to proceed with a custom call to create an
61
+upset plot.}
62
+}
63
+\value{
64
+A \code{ggplot} object (if plotting), or alternatively a data.frame
65
+}
66
+\description{
67
+Create an upset plot for genesets
68
+}
69
+\examples{
70
+library("macrophage")
71
+library("DESeq2")
72
+library("org.Hs.eg.db")
73
+library("AnnotationDbi")
74
+
75
+# dds object
76
+data("gse", package = "macrophage")
77
+dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
78
+rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
79
+dds_macrophage <- estimateSizeFactors(dds_macrophage)
80
+
81
+# annotation object
82
+anno_df <- data.frame(
83
+  gene_id = rownames(dds_macrophage),
84
+  gene_name = mapIds(org.Hs.eg.db,
85
+    keys = rownames(dds_macrophage),
86
+    column = "SYMBOL",
87
+    keytype = "ENSEMBL"
88
+  ),
89
+  stringsAsFactors = FALSE,
90
+  row.names = rownames(dds_macrophage)
91
+)
92
+
93
+# res object
94
+data(res_de_macrophage, package = "GeneTonic")
95
+res_de <- res_macrophage_IFNg_vs_naive
96
+
97
+# res_enrich object
98
+data(res_enrich_macrophage, package = "GeneTonic")
99
+res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
100
+res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
101
+gs_upset(res_enrich,
102
+         n_gs = 10)
103
+
104
+gs_upset(res_enrich, res_de = res_de, annotation_obj = anno_df,
105
+         n_gs = 8,
106
+         add_de_direction = TRUE, add_de_gsgenes = TRUE)
107
+
108
+# or using the practical gtl (GeneTonicList)
109
+gtl_macrophage <- GeneTonic_list(
110
+  dds = dds_macrophage,
111
+  res_de = res_de,
112
+  res_enrich = res_enrich,
113
+  annotation_obj = anno_df
114
+)
115
+
116
+gs_upset(gtl = gtl_macrophage,
117
+         n_gs = 15,
118
+         add_de_direction = TRUE, add_de_gsgenes = TRUE)
119
+}
... ...
@@ -82,8 +82,8 @@ ego_IFNg_vs_naive <- enrichGO(
82 82
 
83 83
 message("--- Running gseGO...")
84 84
 sorted_genes <- sort(
85
-  setNames(res_macrophage_IFNg_vs_naive$log2FoldChange, 
86
-           res_macrophage_IFNg_vs_naive$SYMBOL), 
85
+  setNames(res_macrophage_IFNg_vs_naive$log2FoldChange,
86
+           res_macrophage_IFNg_vs_naive$SYMBOL),
87 87
   decreasing = TRUE
88 88
 )
89 89
 
... ...
@@ -108,4 +108,12 @@ assays(dds_unnormalized)[["normalizationFactors"]] <- NULL
108 108
 res_enrich_IFNg_vs_naive <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)[1:200, ]
109 109
 message("- Done!")
110 110
 
111
+# also creating the gtl container...
112
+gtl_macrophage <- GeneTonic_list(
113
+  dds = dds_macrophage,
114
+  res_de = res_macrophage_IFNg_vs_naive,
115
+  res_enrich = res_enrich_IFNg_vs_naive,
116
+  annotation_obj = anno_df
117
+)
118
+
111 119
 message("--- Test setup script completed!")
112 120
new file mode 100644
... ...
@@ -0,0 +1,57 @@
1
+context("Testing gs_upset and related functionality")
2
+
3
+test_that("upset data is generated", {
4
+  ugsg <- create_upsetdata(res_enrich_IFNg_vs_naive[1:20, ])
5
+  dim(ugsg)
6
+
7
+  expect_is(ugsg, "data.frame")
8
+  expect_equal(dim(ugsg), c(246, 20))
9
+
10
+  ugsg_ids <- create_upsetdata(res_enrich_IFNg_vs_naive[1:5, ], use_ids = TRUE)
11
+
12
+  expect_true(
13
+    all(grepl(pattern = "^GO", colnames(ugsg_ids)))
14
+  )
15
+})
16
+
17
+
18
+test_that("gs_upset functionality up and running", {
19
+
20
+  p <- gs_upset(res_enrich_IFNg_vs_naive,
21
+                n_gs = 10)
22
+  expect_is(p, "gg")
23
+
24
+  p_anno <- gs_upset(res_enrich_IFNg_vs_naive, res_de = res_macrophage_IFNg_vs_naive, annotation_obj = anno_df,
25
+                     n_gs = 8,
26
+                     add_de_direction = TRUE, add_de_gsgenes = TRUE)
27
+  expect_is(p_anno, "gg")
28
+
29
+  p_gtl <- gs_upset(gtl = gtl_macrophage,
30
+                    n_gs = 15,
31
+                    add_de_direction = TRUE, add_de_gsgenes = TRUE)
32
+  expect_is(p_gtl, "gg")
33
+
34
+  p_gtl_df <- gs_upset(gtl = gtl_macrophage,
35
+                    n_gs = 15,
36
+                    return_upsetgsg = TRUE)
37
+  expect_is(p_gtl_df, "data.frame")
38
+
39
+  # triggering exceptions
40
+  expect_error({
41
+    p_gtl <- gs_upset(res_enrich = gtl_macrophage,
42
+                      n_gs = 15,
43
+                      add_de_direction = TRUE, add_de_gsgenes = TRUE)
44
+  })
45
+
46
+  expect_error({
47
+    p_node <- gs_upset(res_enrich = res_enrich_IFNg_vs_naive,
48
+                      n_gs = 15,
49
+                      add_de_direction = TRUE, add_de_gsgenes = FALSE)
50
+  })
51
+
52
+  expect_error({
53
+    p_noanno <- gs_upset(res_enrich = res_enrich_IFNg_vs_naive,
54
+                      n_gs = 15,
55
+                      add_de_direction = FALSE, add_de_gsgenes = TRUE)
56
+  })
57
+})