Browse code

run multiple times k-means to get a consensus partition

jokergoo authored on 22/03/2019 10:40:01
Showing5 changed files

... ...
@@ -6,7 +6,7 @@ Date: 2019-03-15
6 6
 Author: Zuguang Gu
7 7
 Maintainer: Zuguang Gu <z.gu@dkfz.de>
8 8
 Depends: R (>= 3.1.2), methods, grid, graphics, stats, grDevices
9
-Imports: circlize (>= 0.4.5), GetoptLong, colorspace,
9
+Imports: circlize (>= 0.4.5), GetoptLong, colorspace, clue,
10 10
     RColorBrewer, GlobalOptions (>= 0.1.0), parallel
11 11
 Suggests: testthat (>= 1.0.0), knitr, markdown, dendsort, 
12 12
     Cairo, png, jpeg, tiff, fastcluster,
... ...
@@ -270,4 +270,7 @@ importFrom("utils", "combn")
270 270
 importFrom("utils", "getFromNamespace")
271 271
 importFrom("utils", "packageDescription")
272 272
 importFrom("utils", "str")
273
-
273
+importFrom("clue", as.cl_hard_partition)
274
+importFrom("clue", cl_ensemble)
275
+importFrom("clue", cl_consensus)
276
+importFrom("clue", cl_class_ids)
... ...
@@ -1,6 +1,7 @@
1 1
 CHANGES in VERSION 1.99.6
2 2
 
3 3
 * adjust the size of heatmap annotations and add testing scripts
4
+* run multiple times k-means to get a consensus partition
4 5
 
5 6
 ========================
6 7
 
... ...
@@ -152,8 +152,10 @@ Heatmap = setClass("Heatmap",
152 152
 # -split A vector or a data frame by which the rows are split. But if ``cluster_rows`` is a clustering object, ``split`` can be a single number
153 153
 #        indicating to split the dendrogram by `stats::cutree`.
154 154
 # -row_km Same as ``km``.
155
+# -row_km_repeats Number of k-means runs to get a consensus k-means clustering.
155 156
 # -row_split Same as ``split``.
156 157
 # -column_km K-means clustering on columns.
158
+# -column_km_repeats Number of k-means runs to get a consensus k-means clustering.
157 159
 # -column_split Split on columns. For heatmap splitting, please refer to https://jokergoo.github.io/ComplexHeatmap-reference/book/a-single-heatmap.html#heatmap-split .
158 160
 # -gap Gap between row slices if the heatmap is split by rows. The value should be a `grid::unit` object.
159 161
 # -row_gap Same as ``gap``.
... ...
@@ -254,8 +256,10 @@ Heatmap = function(matrix, col, name,
254 256
     km = 1, 
255 257
     split = NULL, 
256 258
     row_km = km,
259
+    row_km_repeats = 10,
257 260
     row_split = split,
258 261
     column_km = 1,
262
+    column_km_repeats = 10,
259 263
     column_split = NULL,
260 264
     gap = unit(1, "mm"),
261 265
     row_gap = unit(1, "mm"),
... ...
@@ -413,8 +417,10 @@ Heatmap = function(matrix, col, name,
413 417
     .Object@matrix = matrix
414 418
 
415 419
     .Object@matrix_param$row_km = row_km
420
+    .Object@matrix_param$row_km_repeats = row_km_repeats
416 421
     .Object@matrix_param$row_gap = row_gap
417 422
     .Object@matrix_param$column_km = column_km
423
+    .Object@matrix_param$column_km_repeats = column_km_repeats
418 424
     .Object@matrix_param$column_gap = column_gap
419 425
 
420 426
     ### check row_split and column_split ###
... ...
@@ -912,6 +918,7 @@ make_cluster = function(object, which = c("row", "column")) {
912 918
     method = slot(object, paste0(which, "_dend_param"))$method
913 919
     order = slot(object, paste0(which, "_order"))  # pre-defined row order
914 920
     km = getElement(object@matrix_param, paste0(which, "_km"))
921
+    km_repeats = getElement(object@matrix_param, paste0(which, "_km_repeats"))
915 922
     split = getElement(object@matrix_param, paste0(which, "_split"))
916 923
     reorder = slot(object, paste0(which, "_dend_param"))$reorder
917 924
     cluster = slot(object, paste0(which, "_dend_param"))$cluster
... ...
@@ -1108,16 +1115,26 @@ make_cluster = function(object, which = c("row", "column")) {
1108 1115
 
1109 1116
     if(verbose) qq("clustering object is not pre-defined, clustering is applied to each @{which} slice\n")
1110 1117
     # make k-means clustering to add a split column
1118
+    consensus_kmeans = function(mat, centers, km_repeats) {
1119
+        partition_list = lapply(seq_len(km_repeats), function(i) {
1120
+            as.cl_hard_partition(kmeans(mat, centers))
1121
+        })
1122
+        partition_list = cl_ensemble(list = partition_list)
1123
+        partition_consensus = cl_consensus(partition_list)
1124
+        as.vector(cl_class_ids(partition_consensus)) 
1125
+    }
1111 1126
     if(km > 1 && is.numeric(mat)) {
1112 1127
         if(which == "row") {
1113
-            km.fit = kmeans(mat, centers = km)
1114
-            cl = km.fit$cluster
1128
+            # km.fit = kmeans(mat, centers = km)
1129
+            # cl = km.fit$cluster
1130
+            cl = consensus_kmeans(mat, km, km_repeats)
1115 1131
             meanmat = lapply(sort(unique(cl)), function(i) {
1116 1132
                 colMeans(mat[cl == i, , drop = FALSE])
1117 1133
             })
1118 1134
         } else {
1119
-            km.fit = kmeans(t(mat), centers = km)
1120
-            cl = km.fit$cluster
1135
+            # km.fit = kmeans(t(mat), centers = km)
1136
+            # cl = km.fit$cluster
1137
+            cl = consensus_kmeans(t(mat), km, km_repeats)
1121 1138
             meanmat = lapply(sort(unique(cl)), function(i) {
1122 1139
                 rowMeans(mat[, cl == i, drop = FALSE])
1123 1140
             })
1124 1141
new file mode 100644