Browse code

add mc.cores in densityHeatmap()

Zuguang Gu authored on 13/03/2019 16:40:51
Showing2 changed files

... ...
@@ -7,6 +7,7 @@ CHANGES in VERSION 1.99.5
7 7
 * `default_col()`: if the fraction of positive values in the matrix is in (0.3, 0.7), the color mapping
8 8
   is symmetric to zero.
9 9
 * check `NA` values in `anno_boxplot()` and `anno_density()`.
10
+* add `mc.cores` in `densityHeatmap()`.
10 11
 
11 12
 ========================
12 13
 
... ...
@@ -26,6 +26,7 @@
26 26
 # -clustering_distance_columns There is a specific distance method ``ks`` which is the Kolmogorov-Smirnov statistic between two distributions.
27 27
 #          For other methods, the distance is calculated on the density matrix.
28 28
 # -clustering_method_columns Pass to `Heatmap`.
29
+# -mc.cores Multiple cores for calculating ks distance.
29 30
 # -... Pass to `Heatmap`.
30 31
 #
31 32
 # == details
... ...
@@ -83,6 +84,7 @@ densityHeatmap = function(data,
83 84
 	cluster_columns = FALSE,
84 85
 	clustering_distance_columns = "ks",
85 86
 	clustering_method_columns = "complete",
87
+	mc.cores = 1,
86 88
 
87 89
 	...) {
88 90
 
... ...
@@ -138,17 +140,7 @@ densityHeatmap = function(data,
138 140
 
139 141
 	if(cluster_columns) {
140 142
 		if(clustering_distance_columns == "ks") {
141
-			nc = length(data)
142
-		    d = matrix(NA, nrow = nc, ncol = nc)
143
-		    rownames(d) = colnames(d) = rownames(d)
144
-
145
-		    for(i in 2:nc) {
146
-		        for(j in 1:(nc-1)) {
147
-		            suppressWarnings(d[i, j] <- ks_dist(data[[i]], data[[j]]))
148
-		        }
149
-		    }
150
-
151
-		    d = as.dist(d)
143
+			d = ks_dist(data, mc.cores = mc.cores)
152 144
 
153 145
 			hc = hclust(d, clustering_method_columns)
154 146
 			cluster_columns = hc
... ...
@@ -259,7 +251,8 @@ densityHeatmap = function(data,
259 251
 	return(ht_list)
260 252
 }
261 253
 
262
-ks_dist = function(x, y) {
254
+# https://stackoverflow.com/a/29853834/3425904
255
+ks_dist_pair = function(x, y) {
263 256
 	# if(length(x) > 5000) x = sample(x, 5000)
264 257
 	# if(length(y) > 5000) y = sample(y, 5000)
265 258
 	n <- length(x)
... ...
@@ -270,3 +263,44 @@ ks_dist = function(x, y) {
270 263
     z <- cumsum(ifelse(order(w) <= n.x, 1/n.x, -1/n.y))
271 264
     max(abs(z))
272 265
 }
266
+
267
+ks_dist = function(mat, mc.cores = 1) {
268
+	nr = nrow(mat)
269
+    nc = ncol(mat)
270
+
271
+	ind_mat = expand.grid(seq_len(nc), seq_len(nc))
272
+	ind_mat = ind_mat[  ind_mat[, 1] > ind_mat[, 2], , drop = FALSE]
273
+	v = mclapply(seq_len(nrow(ind_mat)), function(ind) {
274
+		i = ind_mat[ind, 1]
275
+		j = ind_mat[ind, 2]
276
+		suppressWarnings(d <- ks_dist_pair(mat[, i], mat[, j]))
277
+		return(d)
278
+	}, mc.cores = mc.cores)
279
+	v = unlist(v)
280
+
281
+	i = ind_mat[, 1]
282
+	j = ind_mat[, 2]
283
+	
284
+    ind = (j - 1) * nr + i
285
+    d = matrix(0, nrow = nc, ncol = nc)
286
+    d[ind] = v
287
+    as.dist(d)
288
+}
289
+
290
+# m = matrix(rnorm(100), 10)
291
+# ks_dist(m, mc.cores = 1)
292
+# ks_dist(m, mc.cores = 2)
293
+# ks_dist_1(m)
294
+ks_dist_1 = function(mat) {
295
+	nc = ncol(mat)
296
+    d = matrix(NA, nrow = nc, ncol = nc)
297
+    rownames(d) = colnames(d) = rownames(d)
298
+
299
+    for(i in 2:nc) {
300
+        for(j in 1:(nc-1)) {
301
+            suppressWarnings(d[i, j] <- ks_dist_pair(mat[, i], mat[, j]))
302
+        }
303
+    }
304
+
305
+    as.dist(d)
306
+}