... | ... |
@@ -1,5 +1,7 @@ |
1 | 1 |
|
2 |
-make_comb_mat_from_matrix = function(x, mode, top_n_sets = Inf, min_set_size = -Inf, complement_size = NULL) { |
|
2 |
+make_comb_mat_from_matrix = function(x, mode, top_n_sets = Inf, min_set_size = -Inf, |
|
3 |
+ universal_set = NULL, complement_size = NULL) { |
|
4 |
+ |
|
3 | 5 |
# check whether x is a binary matrix |
4 | 6 |
if(is.data.frame(x)) { |
5 | 7 |
lc = sapply(x, function(x) { |
... | ... |
@@ -38,6 +40,18 @@ make_comb_mat_from_matrix = function(x, mode, top_n_sets = Inf, min_set_size = - |
38 | 40 |
set_size = set_size[l] |
39 | 41 |
x = x[, l, drop = FALSE] |
40 | 42 |
x = x[rowSums(x) > 0, , drop = FALSE] |
43 |
+ |
|
44 |
+ if(!is.null(universal_set)) { |
|
45 |
+ if(is.null(rownames(x))) { |
|
46 |
+ stop_wrap("`x` should have row names when `universal_set` is set.") |
|
47 |
+ } |
|
48 |
+ |
|
49 |
+ x = x[intersect(rownames(x), universal_set), , drop = FALSE] |
|
50 |
+ if(nrow(x) == 0) { |
|
51 |
+ stop_wrap("There is no combination set left after intersecting to `universal_set`.") |
|
52 |
+ } |
|
53 |
+ complement_size = length(setdiff(universal_set, rownames(x))) |
|
54 |
+ } |
|
41 | 55 |
|
42 | 56 |
comb_mat = unique(x) |
43 | 57 |
rn = apply(comb_mat, 1, binaryToInt) |
... | ... |
@@ -93,22 +107,47 @@ make_comb_mat_from_matrix = function(x, mode, top_n_sets = Inf, min_set_size = - |
93 | 107 |
}) |
94 | 108 |
} |
95 | 109 |
|
110 |
+ #normalize comb_mat to 2^n - 1 columns |
|
111 |
+ n = nrow(comb_mat) |
|
112 |
+ if(ncol(comb_mat) < 2^n - 1) { |
|
113 |
+ full_comb_mat = matrix(0, nrow = n, ncol = 2^n - 1) |
|
114 |
+ j = 1 |
|
115 |
+ for(k in seq_len(n)) { |
|
116 |
+ comb = combn(n, k) |
|
117 |
+ for(i in 1:ncol(comb)) { |
|
118 |
+ full_comb_mat[comb[, i], j] = TRUE |
|
119 |
+ j = j + 1 |
|
120 |
+ } |
|
121 |
+ } |
|
122 |
+ ind = which(! apply(full_comb_mat, 2, binaryToInt) %in% apply(comb_mat, 2, binaryToInt)) |
|
123 |
+ comb_mat = cbind(comb_mat, full_comb_mat[, ind, drop = FALSE]) |
|
124 |
+ comb_size = c(comb_size, rep(0, length(ind))) |
|
125 |
+ |
|
126 |
+ } |
|
127 |
+ |
|
96 | 128 |
if(!is.null(complement_size)) { |
97 | 129 |
comb_mat = cbind(rep(0, nrow(comb_mat)), comb_mat) |
98 | 130 |
comb_size = c(complement_size, comb_size) |
99 | 131 |
} |
100 | 132 |
|
133 |
+ od = order(apply(comb_mat, 2, function(x) sum(x*seq_along(x)))) |
|
134 |
+ comb_mat = comb_mat[, od, drop = FALSE] |
|
135 |
+ comb_size = comb_size[od] |
|
136 |
+ |
|
101 | 137 |
attr(comb_mat, "set_size") = set_size |
102 | 138 |
attr(comb_mat, "comb_size") = comb_size |
103 | 139 |
attr(comb_mat, "mode") = mode |
104 | 140 |
attr(comb_mat, "set_on_rows") = TRUE |
105 | 141 |
attr(comb_mat, "x") = x |
142 |
+ attr(comb_mat, "universal_set") = universal_set |
|
106 | 143 |
class(comb_mat) = c("comb_mat", "matrix") |
107 | 144 |
return(comb_mat) |
108 | 145 |
|
109 | 146 |
} |
110 | 147 |
|
111 |
-make_comb_mat_from_list = function(lt, mode, value_fun = length, top_n_sets = Inf, min_set_size = -Inf, complement_size = NULL) { |
|
148 |
+make_comb_mat_from_list = function(lt, mode, value_fun = length, top_n_sets = Inf, |
|
149 |
+ min_set_size = -Inf, universal_set = NULL, complement_size = NULL) { |
|
150 |
+ |
|
112 | 151 |
n = length(lt) |
113 | 152 |
nm = names(lt) |
114 | 153 |
if(is.null(nm)) { |
... | ... |
@@ -145,6 +184,15 @@ make_comb_mat_from_list = function(lt, mode, value_fun = length, top_n_sets = In |
145 | 184 |
lt = lt[l] |
146 | 185 |
n = length(lt) |
147 | 186 |
nm = names(lt) |
187 |
+ |
|
188 |
+ if(!is.null(universal_set)) { |
|
189 |
+ lt = lapply(lt, function(x) intersect(x, universal_set)) |
|
190 |
+ complement_set = universal_set |
|
191 |
+ for(i in seq_along(lt)) { |
|
192 |
+ complement_set = setdiff(complement_set, lt[[i]]) |
|
193 |
+ } |
|
194 |
+ complement_size = value_fun(complement_set) |
|
195 |
+ } |
|
148 | 196 |
|
149 | 197 |
comb_mat = matrix(FALSE, nrow = n, ncol = sum(choose(n, 1:n))) |
150 | 198 |
rownames(comb_mat) = nm |
... | ... |
@@ -199,6 +247,7 @@ make_comb_mat_from_list = function(lt, mode, value_fun = length, top_n_sets = In |
199 | 247 |
attr(comb_mat, "mode") = mode |
200 | 248 |
attr(comb_mat, "set_on_rows") = TRUE |
201 | 249 |
attr(comb_mat, "lt") = lt |
250 |
+ attr(comb_mat, "universal_set") = universal_set |
|
202 | 251 |
class(comb_mat) = c("comb_mat", "matrix") |
203 | 252 |
return(comb_mat) |
204 | 253 |
} |
... | ... |
@@ -208,21 +257,28 @@ make_comb_mat_from_list = function(lt, mode, value_fun = length, top_n_sets = In |
208 | 257 |
# |
209 | 258 |
# == param |
210 | 259 |
# -lt A list of vectors. |
260 |
+# -universal_set The universal set. |
|
211 | 261 |
# |
212 | 262 |
# == details |
213 | 263 |
# It converts the list which have m sets to a binary matrix with n rows and m columns |
214 |
-# where n is the number of union of all sets in the list. |
|
264 |
+# where n is the size of universal set. |
|
215 | 265 |
# |
216 | 266 |
# == example |
217 | 267 |
# set.seed(123) |
218 |
-# lt = list(a = sample(letters, 10), |
|
219 |
-# b = sample(letters, 15), |
|
220 |
-# c = sample(letters, 20)) |
|
268 |
+# lt = list(a = sample(letters, 5), |
|
269 |
+# b = sample(letters, 10), |
|
270 |
+# c = sample(letters, 15)) |
|
221 | 271 |
# list_to_matrix(lt) |
222 |
-list_to_matrix = function(lt) { |
|
223 |
- cn = unique(unlist(lt)) |
|
224 |
- mat = matrix(0, nrow = length(cn), ncol = length(lt)) |
|
225 |
- rownames(mat) = cn |
|
272 |
+# list_to_matrix(lt, universal_set = letters) |
|
273 |
+list_to_matrix = function(lt, universal_set = NULL) { |
|
274 |
+ if(!is.null(universal_set)) { |
|
275 |
+ lt = lapply(lt, function(x) intersect(x, universal_set)) |
|
276 |
+ } else { |
|
277 |
+ universal_set = unique(unlist(lt)) |
|
278 |
+ } |
|
279 |
+ |
|
280 |
+ mat = matrix(0, nrow = length(universal_set), ncol = length(lt)) |
|
281 |
+ rownames(mat) = sort(universal_set) |
|
226 | 282 |
colnames(mat) = names(lt) |
227 | 283 |
for(i in seq_along(lt)) { |
228 | 284 |
mat[unique(lt[[i]]), i] = 1 |
... | ... |
@@ -239,6 +295,7 @@ list_to_matrix = function(lt) { |
239 | 295 |
# -mode The mode for forming the combination set, see Mode section. |
240 | 296 |
# -top_n_sets Number of sets with largest size. |
241 | 297 |
# -min_set_size Ths minimal set size that is used for generating the combination matrix. |
298 |
+# -universal_set The universal set. It if is specified, ``complement_size`` is ignored. |
|
242 | 299 |
# -complement_size The size for the complement of all sets. If it is specified, the combination |
243 | 300 |
# set name will be like "00...". |
244 | 301 |
# -value_fun For each combination set, how to calculate the size? If it is a scalar set, |
... | ... |
@@ -329,7 +386,7 @@ list_to_matrix = function(lt) { |
329 | 386 |
# m = make_comb_mat(lt) |
330 | 387 |
# } |
331 | 388 |
make_comb_mat = function(..., mode = c("distinct", "intersect", "union"), |
332 |
- top_n_sets = Inf, min_set_size = -Inf, complement_size = NULL, value_fun) { |
|
389 |
+ top_n_sets = Inf, min_set_size = -Inf, universal_set = NULL, complement_size = NULL, value_fun) { |
|
333 | 390 |
|
334 | 391 |
lt = list(...) |
335 | 392 |
|
... | ... |
@@ -337,7 +394,8 @@ make_comb_mat = function(..., mode = c("distinct", "intersect", "union"), |
337 | 394 |
if(length(lt) == 1) { |
338 | 395 |
lt = lt[[1]] |
339 | 396 |
if(!is.null(dim(lt))) { |
340 |
- return(make_comb_mat_from_matrix(lt, mode = mode, top_n_sets = top_n_sets, min_set_size = min_set_size, complement_size = complement_size)) |
|
397 |
+ return(make_comb_mat_from_matrix(lt, mode = mode, top_n_sets = top_n_sets, |
|
398 |
+ min_set_size = min_set_size, universal_set = universal_set, complement_size = complement_size)) |
|
341 | 399 |
} |
342 | 400 |
} |
343 | 401 |
|
... | ... |
@@ -350,7 +408,8 @@ make_comb_mat = function(..., mode = c("distinct", "intersect", "union"), |
350 | 408 |
value_fun = length |
351 | 409 |
} |
352 | 410 |
} |
353 |
- make_comb_mat_from_list(lt, value_fun, mode = mode, top_n_sets = top_n_sets, min_set_size = min_set_size, complement_size = complement_size) |
|
411 |
+ make_comb_mat_from_list(lt, value_fun, mode = mode, top_n_sets = top_n_sets, min_set_size = min_set_size, |
|
412 |
+ universal_set = universal_set, complement_size = complement_size) |
|
354 | 413 |
} |
355 | 414 |
|
356 | 415 |
|
... | ... |
@@ -404,7 +463,7 @@ set_name = function(m) { |
404 | 463 |
# m = make_comb_mat(lt) |
405 | 464 |
# set_size(m) |
406 | 465 |
set_size = function(m) { |
407 |
- attr(m, "set_size") |
|
466 |
+ structure(attr(m, "set_size"), names = set_name(m)) |
|
408 | 467 |
} |
409 | 468 |
|
410 | 469 |
# == title |
... | ... |
@@ -424,7 +483,7 @@ set_size = function(m) { |
424 | 483 |
# m = make_comb_mat(lt) |
425 | 484 |
# comb_size(m) |
426 | 485 |
comb_size = function(m) { |
427 |
- attr(m, "comb_size") |
|
486 |
+ structure(attr(m, "comb_size"), names = comb_name(m)) |
|
428 | 487 |
} |
429 | 488 |
|
430 | 489 |
# == title |
... | ... |
@@ -481,10 +540,11 @@ comb_name = function(m) { |
481 | 540 |
comb_degree = function(m) { |
482 | 541 |
set_on_rows = attr(m, "set_on_rows") |
483 | 542 |
if(set_on_rows) { |
484 |
- colSums(m) |
|
543 |
+ d = colSums(m) |
|
485 | 544 |
} else { |
486 |
- rowSums(m) |
|
545 |
+ d = rowSums(m) |
|
487 | 546 |
} |
547 |
+ structure(d, names = comb_name(m)) |
|
488 | 548 |
} |
489 | 549 |
|
490 | 550 |
# == title |
... | ... |
@@ -506,10 +566,6 @@ comb_degree = function(m) { |
506 | 566 |
# extract_comb(m, "110") |
507 | 567 |
extract_comb = function(m, comb_name) { |
508 | 568 |
|
509 |
- if(grepl("^0+$", comb_name)) { |
|
510 |
- stop_wrap(qq("Cannot extract elements for the complement set '@{comb_name}'.")) |
|
511 |
- } |
|
512 |
- |
|
513 | 569 |
all_comb_names = comb_name(m) |
514 | 570 |
if(!comb_name %in% all_comb_names) { |
515 | 571 |
stop_wrap(paste0("Cannot find a combination name: ", comb_name, ", valid combination name should be in `comb_name(m)`.")) |
... | ... |
@@ -519,8 +575,22 @@ extract_comb = function(m, comb_name) { |
519 | 575 |
|
520 | 576 |
x = attr(m, "x") |
521 | 577 |
lt = attr(m, "lt") |
578 |
+ universal_set = attr(m, "universal_set") |
|
522 | 579 |
mode = attr(m, "mode") |
580 |
+ |
|
581 |
+ is_complement_set = function(comb_name) { |
|
582 |
+ grepl("^0+$", comb_name) |
|
583 |
+ } |
|
584 |
+ |
|
585 |
+ if(is.null(universal_set) && is_complement_set(comb_name)) { |
|
586 |
+ stop_wrap(qq("Cannot extract elements for the complement set '@{comb_name}' since universal set was not set.")) |
|
587 |
+ } |
|
588 |
+ |
|
523 | 589 |
if(!is.null(x)) { |
590 |
+ if(is_complement_set(comb_name)) { |
|
591 |
+ return(setdiff(universal_set, rownames(x))) |
|
592 |
+ } |
|
593 |
+ |
|
524 | 594 |
if(mode == "distinct") { |
525 | 595 |
l = apply(x, 1, function(y) all(y == query)) |
526 | 596 |
} else if(mode == "intersect") { |
... | ... |
@@ -556,6 +626,14 @@ extract_comb = function(m, comb_name) { |
556 | 626 |
setdiff = getFromNamespace("setdiff", ns = "BiocGenerics") |
557 | 627 |
} |
558 | 628 |
|
629 |
+ if(is_complement_set(comb_name)) { |
|
630 |
+ s = universal_set |
|
631 |
+ for(i in seq_along(lt)) { |
|
632 |
+ s = setdiff(s, lt[[i]]) |
|
633 |
+ } |
|
634 |
+ return(s) |
|
635 |
+ } |
|
636 |
+ |
|
559 | 637 |
do_comb = function(lt, mode, do = rep(TRUE, length(lt))) { |
560 | 638 |
set1_index = which(do) |
561 | 639 |
set2_index = which(!do) |
562 | 640 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,72 @@ |
1 |
+lt = list( |
|
2 |
+ a = c("h", "t", "j", "u", "w"), |
|
3 |
+ b = c("b", "n", "v", "m", "k", "u", "j", "w", "x", "z"), |
|
4 |
+ c = c("x", "g", "b", "h", "u", "s", "n", "m", "r", "l", "q", "i", "o", "d", "z") |
|
5 |
+) |
|
6 |
+ |
|
7 |
+ |
|
8 |
+test_that("test default list_to_matrix", { |
|
9 |
+ m = list_to_matrix(lt) |
|
10 |
+ expect_that(rownames(m), is_identical_to(sort(unique(unlist(lt))))) |
|
11 |
+ expect_that(colnames(m), is_identical_to(names(lt))) |
|
12 |
+ expect_that(unname(m["u", ]), is_identical_to(c(1, 1, 1))) |
|
13 |
+ expect_that(unname(m["j", ]), is_identical_to(c(1, 1, 0))) |
|
14 |
+}) |
|
15 |
+ |
|
16 |
+test_that("test list_to_matrix with universal_set", { |
|
17 |
+ m = list_to_matrix(lt, universal_set = letters) |
|
18 |
+ expect_that(rownames(m), is_identical_to(letters)) |
|
19 |
+ expect_that(unname(m["y", ]), is_identical_to(c(0, 0, 0))) |
|
20 |
+}) |
|
21 |
+ |
|
22 |
+test_that("test list_to_matrix with universal_set which is smaller than the input set", { |
|
23 |
+ m = list_to_matrix(lt, universal_set = letters[1:10]) |
|
24 |
+ expect_that(rownames(m), is_identical_to(letters[1:10])) |
|
25 |
+ expect_that(unname(m["a", ]), is_identical_to(c(0, 0, 0))) |
|
26 |
+}) |
|
27 |
+ |
|
28 |
+test_that("test default make_comb_mat", { |
|
29 |
+ m = make_comb_mat(lt) |
|
30 |
+ |
|
31 |
+ tb = table(table(unlist(lt))) |
|
32 |
+ expect_that(tb[names(tb) == 1][[1]], equals(sum(comb_size(m)[comb_degree(m) == 1]))) |
|
33 |
+ expect_that(tb[names(tb) == 2][[1]], equals(sum(comb_size(m)[comb_degree(m) == 2]))) |
|
34 |
+ expect_that(tb[names(tb) == 3][[1]], equals(sum(comb_size(m)[comb_degree(m) == 3]))) |
|
35 |
+ |
|
36 |
+ tb = table(unlist(lt)) |
|
37 |
+ expect_that(extract_comb(m, "111"), is_identical_to(names(tb[tb == 3]))) |
|
38 |
+ |
|
39 |
+ m1 = make_comb_mat(lt) |
|
40 |
+ m2 = make_comb_mat(list_to_matrix(lt)) |
|
41 |
+ attr(m1, "x") = NULL |
|
42 |
+ attr(m2, "x") = NULL |
|
43 |
+ attr(m1, "lt") = NULL |
|
44 |
+ attr(m2, "lt") = NULL |
|
45 |
+ expect_that(m1, equals(m2)) |
|
46 |
+ |
|
47 |
+ m1 = make_comb_mat(lt) |
|
48 |
+ m2 = make_comb_mat(list_to_matrix(lt)) |
|
49 |
+ expect_that(sort(extract_comb(m1, "111")), is_identical_to(sort(extract_comb(m2, "111")))) |
|
50 |
+ expect_that(sort(extract_comb(m1, "011")), is_identical_to(sort(extract_comb(m2, "011")))) |
|
51 |
+}) |
|
52 |
+ |
|
53 |
+ |
|
54 |
+test_that("test default make_comb_mat with universal_set", { |
|
55 |
+ m = make_comb_mat(lt, universal_set = letters) |
|
56 |
+ |
|
57 |
+ expect_that(length(comb_size(m)), is_identical_to(8)) |
|
58 |
+ expect_that("000" %in% comb_name(m), is_identical_to(TRUE)) |
|
59 |
+ expect_that(0 %in% comb_degree(m), is_identical_to(TRUE)) |
|
60 |
+ |
|
61 |
+}) |
|
62 |
+ |
|
63 |
+ |
|
64 |
+test_that("test default make_comb_mat with universal_set which is smaller than the input set", { |
|
65 |
+ m = make_comb_mat(lt, universal_set = letters[1:10]) |
|
66 |
+ |
|
67 |
+ expect_that("000" %in% comb_name(m), is_identical_to(TRUE)) |
|
68 |
+ expect_that(0 %in% comb_degree(m), is_identical_to(TRUE)) |
|
69 |
+ |
|
70 |
+}) |
|
71 |
+ |
|
72 |
+# test GRanges |
|
0 | 73 |
\ No newline at end of file |