git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/Rtreemix@28790 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,476 @@ |
1 |
+/* |
|
2 |
+ max_weight_branch.c |
|
3 |
+ |
|
4 |
+ Version : 1.1 |
|
5 |
+ Author : Niko Beerenwinkel |
|
6 |
+ |
|
7 |
+ (c) Max-Planck-Institut fuer Informatik, 2004 |
|
8 |
+*/ |
|
9 |
+ |
|
10 |
+ |
|
11 |
+#include "include/max_weight_branch.h" |
|
12 |
+ |
|
13 |
+ |
|
14 |
+// global variables: |
|
15 |
+ |
|
16 |
+static map<node,string> Mut; // node labels |
|
17 |
+static map<edge,double> c; // edge weights |
|
18 |
+ |
|
19 |
+static array< array<node> > cycle_contr; |
|
20 |
+// cycle_contr[i][k] == v_C <==> the k-th cycle C in B[i] has been contracted to v_C in G[i+1] |
|
21 |
+ |
|
22 |
+static array< array<edge> > cheapest_edge; |
|
23 |
+// cheapest_edge[i][k] is some cheapest edge on the k-th cycle in B[i] |
|
24 |
+ |
|
25 |
+static map<edge,edge> Phi; |
|
26 |
+// Phi[e'] == e <==> e in G[i] has been contracted to e' in G[i+1] |
|
27 |
+ |
|
28 |
+static array< array< map<edge,edge> > > alpha; |
|
29 |
+// e = (z, y), z not in C, y in C <==> alpha[e] == pred of y in C |
|
30 |
+ |
|
31 |
+ |
|
32 |
+ |
|
33 |
+// functions: |
|
34 |
+ |
|
35 |
+int compare_weights(const edge& e1, const edge& e2) |
|
36 |
+{ |
|
37 |
+ if (c[e1] < c[e2]) return -1; |
|
38 |
+ if (c[e1] > c[e2]) return 1; |
|
39 |
+ return 0; |
|
40 |
+} |
|
41 |
+ |
|
42 |
+ |
|
43 |
+ |
|
44 |
+double BRANCHING_WEIGHT(list<edge>& B, edge_array<double>& weight) |
|
45 |
+{ |
|
46 |
+ // total weight of a branching |
|
47 |
+ |
|
48 |
+ double total_weight = 0; |
|
49 |
+ edge e; |
|
50 |
+ |
|
51 |
+ forall(e, B) |
|
52 |
+ total_weight += weight[e]; |
|
53 |
+ |
|
54 |
+ return total_weight; |
|
55 |
+} |
|
56 |
+ |
|
57 |
+ |
|
58 |
+ |
|
59 |
+void UNCOVER_BRANCHING(graph& G, list<edge>& B) |
|
60 |
+{ |
|
61 |
+ // turn G into the branching defined by B (B \subset E(G)) |
|
62 |
+ |
|
63 |
+ edge e; |
|
64 |
+ |
|
65 |
+ edge_set E_B(G); // edge set of B |
|
66 |
+ |
|
67 |
+ forall(e, B) |
|
68 |
+ E_B.insert(e); |
|
69 |
+ |
|
70 |
+ edge_set T_D(G); //edges to delete |
|
71 |
+ |
|
72 |
+ forall_edges(e, G) |
|
73 |
+ if (! E_B.member(e)) |
|
74 |
+ T_D.insert(e); |
|
75 |
+ //G.del_edge(e); |
|
76 |
+ |
|
77 |
+ for(edge_set::iterator it = T_D.begin(); it != T_D.end(); ++it) |
|
78 |
+ G.del_edge(*it); |
|
79 |
+ |
|
80 |
+} |
|
81 |
+ |
|
82 |
+ |
|
83 |
+ |
|
84 |
+void DOT(graph& G, map<node,string>& mut, map<edge,double>& cond_prob, char *filestem) |
|
85 |
+{ |
|
86 |
+ // produces output in dot format (graphviz package) |
|
87 |
+ // see http://www.research.att.com/sw/tools/graphviz/ |
|
88 |
+ |
|
89 |
+ node v; edge e; |
|
90 |
+ |
|
91 |
+ char filename[128]; |
|
92 |
+ sprintf(filename, "%s.dot", filestem); |
|
93 |
+ |
|
94 |
+ std::ofstream dotout(filename); |
|
95 |
+ dotout << "digraph MWB {" << std::endl << std::endl; |
|
96 |
+ |
|
97 |
+ forall_nodes(v, G) |
|
98 |
+ { |
|
99 |
+ dotout << "\t \"" << v << "\""; |
|
100 |
+ dotout << " [ label=\"" << mut[v] << "\", shape=\"plaintext\", height=\"0.3\", fontsize=\"12\", style=\"filled\", fillcolor=\"white\" ];" << std::endl; |
|
101 |
+ } |
|
102 |
+ dotout << std::endl; |
|
103 |
+ |
|
104 |
+ forall_edges(e, G) |
|
105 |
+ { |
|
106 |
+ node s = G.source(e); |
|
107 |
+ node t = G.target(e); |
|
108 |
+ dotout.precision(2); |
|
109 |
+ dotout << std::showpoint; |
|
110 |
+ dotout << "\t \"" << s << "\" -> \"" << t << "\""; |
|
111 |
+ dotout << " [ fontsize=\"10\", label=\"" << cond_prob[e] << "\" ];" << std::endl; |
|
112 |
+ } |
|
113 |
+ |
|
114 |
+ dotout << "}" << std::endl; |
|
115 |
+ dotout.close(); |
|
116 |
+} |
|
117 |
+ |
|
118 |
+ |
|
119 |
+double branching_weight_intern(list<edge>& B) |
|
120 |
+{ |
|
121 |
+ // total weight of a branching |
|
122 |
+ |
|
123 |
+ double w = 0; |
|
124 |
+ |
|
125 |
+ edge e; |
|
126 |
+ forall(e, B) |
|
127 |
+ w += c[e]; |
|
128 |
+ |
|
129 |
+ return w; |
|
130 |
+} |
|
131 |
+ |
|
132 |
+ |
|
133 |
+ |
|
134 |
+list<edge> max_weight_subgraph_indeg_le_1(graph& G) |
|
135 |
+{ |
|
136 |
+ // find maximum weight subgraph of G with indeg <= 1 for all v in V(G) |
|
137 |
+ // a list of edges in G of this subgraph is returned |
|
138 |
+ |
|
139 |
+ node v; |
|
140 |
+ list<edge> B; |
|
141 |
+ |
|
142 |
+ forall_nodes(v, G) // choose heaviest inedge |
|
143 |
+ { |
|
144 |
+ list<edge> L = G.in_edges(v); |
|
145 |
+ if (! L.empty()) |
|
146 |
+ B.append(L.contents(L.max(*compare_weights))); |
|
147 |
+ } |
|
148 |
+ |
|
149 |
+ return B; |
|
150 |
+} |
|
151 |
+ |
|
152 |
+ |
|
153 |
+array< list<edge> > all_cycles(graph& G, list<edge>& B) |
|
154 |
+{ |
|
155 |
+ // find all cycles in B |
|
156 |
+ |
|
157 |
+ edge e; |
|
158 |
+ |
|
159 |
+ array< list<edge> > CA; // array of cycles |
|
160 |
+ int k = -1; // cycle index |
|
161 |
+ |
|
162 |
+ // construct subgraph: |
|
163 |
+ GRAPH<node,edge> G_B; // the subgraph of G induced by B |
|
164 |
+ map<node,node> proj; // proj : V(G) -->> V(G_B) |
|
165 |
+ |
|
166 |
+ forall(e, B) |
|
167 |
+ { |
|
168 |
+ node s = G.source(e); |
|
169 |
+ if (! proj.defined(s)) |
|
170 |
+ { |
|
171 |
+ node s_G = G_B.new_node(); |
|
172 |
+ G_B[s_G] = s; // needed? |
|
173 |
+ proj[s] = s_G; |
|
174 |
+ } |
|
175 |
+ node t = G.target(e); |
|
176 |
+ if (! proj.defined(t)) |
|
177 |
+ { |
|
178 |
+ node t_G = G_B.new_node(); |
|
179 |
+ G_B[t_G] = t; // needed? |
|
180 |
+ proj[t] = t_G; |
|
181 |
+ } |
|
182 |
+ G_B[G_B.new_edge(proj[s], proj[t])] = e; |
|
183 |
+ } |
|
184 |
+ |
|
185 |
+ // identify cycles in G_B: |
|
186 |
+ list<edge> CEL; // cycle edge list, |
|
187 |
+ // will contain one edge (in G_B) from each cycle after Is_Acyclic call |
|
188 |
+ |
|
189 |
+ if (! Is_Acyclic(G_B, CEL)) |
|
190 |
+ { |
|
191 |
+ edge first_e; |
|
192 |
+ forall(first_e, CEL) |
|
193 |
+ { |
|
194 |
+ list<edge> C; // a cycle is stored as the list of its edges in G |
|
195 |
+ |
|
196 |
+ // iterate over cycle following inedges: |
|
197 |
+ node s = G_B.target(first_e); |
|
198 |
+ node first_v = s; // starting node; |
|
199 |
+ edge e = NULL; //edge e = nil; |
|
200 |
+ while (s != first_v || e == NULL)//nil) |
|
201 |
+ { |
|
202 |
+ e = G_B.in_edges(s).head(); |
|
203 |
+ s = G_B.source(e); // move on |
|
204 |
+ C.push(G_B[e]); // store corresponding edge in G (!) |
|
205 |
+ // the order in C follows the edge directions |
|
206 |
+ } |
|
207 |
+ |
|
208 |
+ k++; CA.resize(k+1); |
|
209 |
+ CA[k] = C; |
|
210 |
+ } |
|
211 |
+ } |
|
212 |
+ |
|
213 |
+ return CA; |
|
214 |
+} |
|
215 |
+ |
|
216 |
+ |
|
217 |
+ |
|
218 |
+edge predecessor_in_cycle(node y, list<edge>& C) |
|
219 |
+{ |
|
220 |
+ // predecessor edge of y in C |
|
221 |
+ |
|
222 |
+ edge e; |
|
223 |
+ forall(e, C) |
|
224 |
+ { |
|
225 |
+ if (target(e) == y) |
|
226 |
+ return e; |
|
227 |
+ } |
|
228 |
+ |
|
229 |
+ return NULL; //nil; |
|
230 |
+} |
|
231 |
+ |
|
232 |
+ |
|
233 |
+void contract_cycle_edge(edge& e, double alpha_cost, double cheapest_cost, GRAPH<node,edge>& H, node& v_C, map<edge,edge>& edge_contr, node_set& VC) |
|
234 |
+{ |
|
235 |
+ // note: e \in E(H) |
|
236 |
+ |
|
237 |
+ node s = H.source(e); // \in G(H) |
|
238 |
+ node t = H.target(e); // \in G(H) |
|
239 |
+ |
|
240 |
+ if (VC.member(s) && (! VC.member(t))) // e leaves C |
|
241 |
+ { |
|
242 |
+ edge e_tilde = H.new_edge(v_C, t); // new edge |
|
243 |
+ c[e_tilde] = c[e]; |
|
244 |
+ edge_contr[H[e]] = e_tilde; |
|
245 |
+ |
|
246 |
+ H[e_tilde] = H[e]; // update |
|
247 |
+ Phi[e_tilde] = H[e]; |
|
248 |
+ |
|
249 |
+ H.del_edge(e); |
|
250 |
+ } |
|
251 |
+ |
|
252 |
+ if ((! VC.member(s)) && VC.member(t)) // e enters C |
|
253 |
+ { |
|
254 |
+ edge e_prime = H.new_edge(s, v_C); // new edge |
|
255 |
+ c[e_prime] = c[e] - alpha_cost + cheapest_cost; |
|
256 |
+ edge_contr[H[e]] = e_prime; |
|
257 |
+ |
|
258 |
+ H[e_prime] = H[e]; // update |
|
259 |
+ Phi[e_prime] = H[e]; |
|
260 |
+ |
|
261 |
+ H.del_edge(e); |
|
262 |
+ } |
|
263 |
+ |
|
264 |
+ if (VC.member(s) && VC.member(t)) // e is in C |
|
265 |
+ H.del_edge(e); |
|
266 |
+ |
|
267 |
+} |
|
268 |
+ |
|
269 |
+ |
|
270 |
+ |
|
271 |
+string contract_cycle_node(node& v, GRAPH<node,edge>& H, node& v_C, map<node,node>& node_contr, node_set& VC) |
|
272 |
+{ |
|
273 |
+ string label; |
|
274 |
+ |
|
275 |
+ if (VC.member(v)) // v is on C |
|
276 |
+ { |
|
277 |
+ label += Mut[v]; |
|
278 |
+ H.del_node(v); |
|
279 |
+ node_contr[H[v]] = v_C; |
|
280 |
+ } |
|
281 |
+ |
|
282 |
+ return label; |
|
283 |
+} |
|
284 |
+ |
|
285 |
+ |
|
286 |
+ |
|
287 |
+void contract_cycle(int i, array< list<edge> >& CA, int k, GRAPH<node,edge>& H, map<node,node>& node_contr, map<edge,edge>& edge_contr) |
|
288 |
+{ |
|
289 |
+ node v; edge e; |
|
290 |
+ |
|
291 |
+ list<edge> C = CA[k]; // cycle to be contracted (indexed by k) |
|
292 |
+ |
|
293 |
+ // node set in H indiced by cycle (in G): |
|
294 |
+ node_set VC(H); // V(C) \subset H |
|
295 |
+ forall(e, C) |
|
296 |
+ { |
|
297 |
+ VC.insert(H.source(edge_contr[e])); |
|
298 |
+ VC.insert(H.target(edge_contr[e])); |
|
299 |
+ } |
|
300 |
+ |
|
301 |
+ // some cheapest edge on C |
|
302 |
+ cheapest_edge.resize(i+1); cheapest_edge[i].resize(k+1); |
|
303 |
+ cheapest_edge[i][k] = C.contents(C.min(*compare_weights)); |
|
304 |
+ |
|
305 |
+ // new node v_C for contracted cycle C |
|
306 |
+ node v_C = H.new_node(); |
|
307 |
+ cycle_contr.resize(i+1); cycle_contr[i].resize(k+1); |
|
308 |
+ cycle_contr[i][k] = v_C; // in G[i+1] |
|
309 |
+ |
|
310 |
+ // contract edges in H |
|
311 |
+ list<edge> EL = H.all_edges(); |
|
312 |
+ forall(e, EL) |
|
313 |
+ { |
|
314 |
+ alpha.resize(i+1); alpha[i].resize(k+1); |
|
315 |
+ alpha[i][k][H[e]] = predecessor_in_cycle(target(H[e]), C); // predecessor edge of target(e) in C |
|
316 |
+ contract_cycle_edge(e, c[alpha[i][k][H[e]]], c[cheapest_edge[i][k]], H, v_C, edge_contr, VC); |
|
317 |
+ } |
|
318 |
+ |
|
319 |
+ // contract nodes in H |
|
320 |
+ string new_label("("); |
|
321 |
+ list<node> NL = H.all_nodes(); |
|
322 |
+ forall(v, NL) |
|
323 |
+ new_label += contract_cycle_node(v, H, v_C, node_contr, VC); |
|
324 |
+ Mut[v_C] = new_label + ")"; |
|
325 |
+ |
|
326 |
+} |
|
327 |
+ |
|
328 |
+ |
|
329 |
+ |
|
330 |
+void contract_all_cycles(graph& G, int i, array< list<edge> > CL, GRAPH<node,edge>& H) |
|
331 |
+{ |
|
332 |
+ // construct the contraction (G, c) --> (H, c) |
|
333 |
+ |
|
334 |
+ node v; edge e; |
|
335 |
+ |
|
336 |
+ // {node, edge}_contr : (G, c) -->> (H, c') |
|
337 |
+ map<node,node> node_contr; // node contraction map |
|
338 |
+ map<edge,edge> edge_contr; // edge contraction map |
|
339 |
+ |
|
340 |
+ // initialize as identity map |
|
341 |
+ forall_nodes(v, G) |
|
342 |
+ { |
|
343 |
+ node_contr[v] = H.new_node(); |
|
344 |
+ H[node_contr[v]] = v; |
|
345 |
+ Mut[node_contr[v]] = Mut[v]; // labels |
|
346 |
+ } |
|
347 |
+ forall_edges(e, G) |
|
348 |
+ { |
|
349 |
+ edge_contr[e] = H.new_edge(node_contr[G.source(e)], node_contr[G.target(e)]); |
|
350 |
+ H[edge_contr[e]] = e; |
|
351 |
+ c[edge_contr[e]] = c[e]; // weights |
|
352 |
+ } |
|
353 |
+ |
|
354 |
+ // contract cycles in H |
|
355 |
+ for (int k=CL.low(); k<=CL.high(); k++) |
|
356 |
+ { |
|
357 |
+ contract_cycle(i, CL, k, H, node_contr, edge_contr); |
|
358 |
+ list<edge> empty_list; |
|
359 |
+ } |
|
360 |
+} |
|
361 |
+ |
|
362 |
+ |
|
363 |
+ |
|
364 |
+void reconstruct_branching(int i, GRAPH<node,edge>& H, list<edge>& B_H, list<edge>& B_G, array< list<edge> >& CA) |
|
365 |
+{ |
|
366 |
+ // reconstruct branching B_G from the contraction |
|
367 |
+ // (G, B_G==B[i-1]) --> (H==G[i], B_H==B[i]) |
|
368 |
+ // CA is the array of cycles in G |
|
369 |
+ |
|
370 |
+ edge e; |
|
371 |
+ |
|
372 |
+ B_G.clear(); |
|
373 |
+ |
|
374 |
+ node_set CCS(H); // contracted cycles in H |
|
375 |
+ for (int k=CA.low(); k<=CA.high(); k++) |
|
376 |
+ CCS.insert(cycle_contr[i-1][k]); |
|
377 |
+ |
|
378 |
+ // E(B_G) := E(B_H) \ {e_prime=(z, v_C) | z \in V(B_H), C cycle in B_G} |
|
379 |
+ forall(e, B_H) |
|
380 |
+ if (! CCS.member(H.target(e))) |
|
381 |
+ B_G.append(H[e]); |
|
382 |
+ |
|
383 |
+ // check all cycles in B_G |
|
384 |
+ for (int k=CA.high(); k>=CA.low(); k--) |
|
385 |
+ { |
|
386 |
+ list<edge> C = CA[k]; // cycle edges |
|
387 |
+ |
|
388 |
+ // look for e'==(z, v_C) in B_H |
|
389 |
+ edge e_prime = NULL; //nil; |
|
390 |
+ forall(e, B_H) |
|
391 |
+ if (H.target(e) == cycle_contr[i-1][k]) |
|
392 |
+ e_prime = e; |
|
393 |
+ |
|
394 |
+ if (e_prime != NULL) //nil) // e' == (z, v_C) in B_H |
|
395 |
+ { |
|
396 |
+ B_G.append(Phi[e_prime]); |
|
397 |
+ forall(e, C) |
|
398 |
+ if (e != alpha[i-1][k][Phi[e_prime]]) |
|
399 |
+ B_G.append(e); |
|
400 |
+ } |
|
401 |
+ else // no edge (z, v_C) in B_H |
|
402 |
+ { |
|
403 |
+ forall(e, C) |
|
404 |
+ if (e != cheapest_edge[i-1][k]) |
|
405 |
+ B_G.append(e); |
|
406 |
+ } |
|
407 |
+ |
|
408 |
+ } |
|
409 |
+} |
|
410 |
+ |
|
411 |
+ |
|
412 |
+ |
|
413 |
+list<edge> MAX_WEIGHT_BRANCHING(graph& G0, map<node,string>& node_label, edge_array<double>& weight) |
|
414 |
+{ |
|
415 |
+ // Compute a maximum weight branching of the weighted digraph (G, weight) |
|
416 |
+ // The list of edges of the branching is returned. |
|
417 |
+ // The algorithm with all notation is taken from the book |
|
418 |
+ // Korte, Vygen: Combinatorial Optimization, Springer 2000, pp. 121-125 |
|
419 |
+ |
|
420 |
+ node v; edge e; |
|
421 |
+ |
|
422 |
+ // step 0: data structures: |
|
423 |
+ array< GRAPH<node,edge> > G(1); // G[i+1] is subgraph of G[i] |
|
424 |
+ array< list<edge> > B(1); // B[i] \subset E(G[i]) |
|
425 |
+ array< array< list<edge> > > CAA(1); // array of cycle arrays, each cycle is a subset of B[i] |
|
426 |
+ |
|
427 |
+ // step 1: initialization. |
|
428 |
+ CopyGraph(G[0], G0); |
|
429 |
+ forall_edges(e, G[0]) // edge weights |
|
430 |
+ c[e] = weight[G[0][e]]; |
|
431 |
+ |
|
432 |
+ forall_nodes(v, G[0]) // node labels |
|
433 |
+ Mut[v] = node_label[G[0][v]]; |
|
434 |
+ |
|
435 |
+ // step 2: construct greedy subgraph B[i]. |
|
436 |
+ B[0] = max_weight_subgraph_indeg_le_1(G[0]); |
|
437 |
+ |
|
438 |
+ // step 3: for all cycles in B[i]. |
|
439 |
+ CAA[0] = all_cycles(G[0], B[0]); |
|
440 |
+ |
|
441 |
+ int i = 0; |
|
442 |
+ while (CAA[i].size() > 0) |
|
443 |
+ { |
|
444 |
+ // step 4: construct (G[i+1], c[i+1]) from (G[i], c[i]). |
|
445 |
+ G.resize(i+2); B.resize(i+2); CAA.resize(i+2); |
|
446 |
+ |
|
447 |
+ contract_all_cycles(G[i], i, CAA[i], G[i+1]); |
|
448 |
+ |
|
449 |
+ i++; |
|
450 |
+ B[i] = max_weight_subgraph_indeg_le_1(G[i]); // (step 2) |
|
451 |
+ |
|
452 |
+ CAA[i] = all_cycles(G[i], B[i]); // (step 3) |
|
453 |
+ } |
|
454 |
+ |
|
455 |
+ // step 5+6: reconstruct the maximum branching B |
|
456 |
+ while (i > 0) |
|
457 |
+ { |
|
458 |
+ reconstruct_branching(i, G[i], B[i], B[i-1], CAA[i-1]); |
|
459 |
+ i--; |
|
460 |
+ } |
|
461 |
+ |
|
462 |
+ list<edge> B0; |
|
463 |
+ forall(e, B[i]){ |
|
464 |
+ B0.append(G[i][e]); |
|
465 |
+ } |
|
466 |
+ |
|
467 |
+ //clear the global static variables |
|
468 |
+ Mut.clear(); |
|
469 |
+ c.clear(); |
|
470 |
+ cycle_contr.clear(); |
|
471 |
+ cheapest_edge.clear(); |
|
472 |
+ Phi.clear(); |
|
473 |
+ alpha.clear(); |
|
474 |
+ |
|
475 |
+ return B0; |
|
476 |
+} |