Browse code

Refactor layoutDaylight part 2

- Cache frequent and expensive lookup such as `df$x`
- Exclude nodes in vectorized manner before for-loop
- Now it runs >2x faster with exactly the same results

Watal M. Iwasaki authored on 02/03/2018 10:59:47
Showing1 changed files

... ...
@@ -78,14 +78,9 @@ layoutEqualAngle <- function(tree, branch.length ){
78 78
         ## Get "start" and "end" angles of a segment for current node in the data.frame.
79 79
         start <- df[curNode, "start"]
80 80
         end <- df[curNode, "end"]
81
-
82
-        if (length(children) == 0) {
83
-            ## is a tip
84
-            next
85
-        }
86
-
87
-        for (i in seq_along(children)) {
88
-            child <- children[i]
81
+        cur_x = df[curNode, "x"]
82
+        cur_y = df[curNode, "y"]
83
+        for (child in children) {
89 84
             ## Get the number of tips for child node.
90 85
             ntip.child <- nb.sp[child]
91 86
 
... ...
@@ -96,16 +91,12 @@ layoutEqualAngle <- function(tree, branch.length ){
96 91
             ## beta = angle of line from parent node to i-th child.
97 92
             beta <- start + alpha / 2
98 93
 
99
-            ## if (branch.length == "none") {
100
-            ##     length.child <- 1
101
-            ## } else {
102 94
             length.child <- df[child, "branch.length"]
103
-            ##}
104 95
 
105 96
             ## update geometry of data.frame.
106 97
             ## Calculate (x,y) position of the i-th child node from current node.
107
-            df[child, "x"] <- df[curNode, "x"] + cospi(beta) * length.child
108
-            df[child, "y"] <- df[curNode, "y"] + sinpi(beta) * length.child
98
+            df[child, "x"] <- cur_x + cospi(beta) * length.child
99
+            df[child, "y"] <- cur_y + sinpi(beta) * length.child
109 100
             ## Calculate orthogonal angle to beta for tip label.
110 101
             df[child, "angle"] <- -90 - 180 * beta * sign(beta - 1)
111 102
             ## Update the start and end angles of the childs segment.
... ...
@@ -113,11 +104,8 @@ layoutEqualAngle <- function(tree, branch.length ){
113 104
             df[child, "end"] <- start + alpha
114 105
             start <- start + alpha
115 106
         }
116
-
117 107
     }
118
-
119
-    return(df)
120
-
108
+    df
121 109
 }
122 110
 
123 111
 ##' Equal daylight layout method for unrooted trees.
... ...
@@ -241,7 +229,7 @@ applyLayoutDaylight <- function(df, node_id){
241 229
   angle_list = purrr::map_dfr(subtrees, ~{
242 230
     getTreeArcAngles(df, node_id, .x) %>% dplyr::bind_rows()
243 231
   }) %>% dplyr::transmute(
244
-    .data$left,
232
+    left = .data$left,
245 233
     beta = .data$left - .data$right,
246 234
     beta = ifelse(.data$beta < 0, .data$beta + 2, .data$beta),
247 235
     subtree_id = seq_len(nrow(.))
... ...
@@ -289,30 +277,29 @@ applyLayoutDaylight <- function(df, node_id){
289 277
 ##' @param subtree named list of root id of subtree (node) and list of node ids for given subtree (subtree).
290 278
 ##' @return named list with right and left angles in range [0,2] i.e 1 = 180 degrees, 1.5 = 270 degrees.
291 279
 getTreeArcAngles <- function(df, origin_id, subtree) {
280
+    df_x = df$x
281
+    df_y = df$y
282
+    x_origin = df_x[origin_id]
283
+    y_origin = df_y[origin_id]
292 284
     ## Initialise variables
293 285
     theta_child <- 0.0
294 286
     subtree_root_id <- subtree$node
295 287
     subtree_node_ids <- subtree$subtree
296
-
297 288
     ## Initialise angle from origin node to parent node.
298 289
     ## If subtree_root_id is child of origin_id
299
-    if( any(subtree_root_id == getChild.df(df, origin_id)) ){
290
+    if (subtree_root_id %in% getChild.df(df, origin_id)) {
300 291
         ## get angle from original node to parent of subtree.
301
-        theta_left <- getNodeAngle.df(df, origin_id, subtree_root_id)
292
+        theta_left <- getNodeAngle.vector(x_origin, y_origin, df_x[subtree_root_id], df_y[subtree_root_id])
302 293
         theta_right <- theta_left
303 294
     } else if( subtree_root_id == origin_id ){
304 295
         ## Special case.
305 296
         ## get angle from parent of subtree to children
306 297
         children_ids <- getChild.df(df, subtree_root_id)
307
-
308 298
         if(length(children_ids) == 2){
309 299
             ## get angles from parent to it's two children.
310
-            theta1 <- getNodeAngle.df(df, origin_id, children_ids[1])
311
-            theta2 <- getNodeAngle.df(df, origin_id, children_ids[2])
312
-
300
+            theta1 <- getNodeAngle.vector(x_origin, y_origin, df_x[children_ids[1]], df_y[children_ids[1]])
301
+            theta2 <- getNodeAngle.vector(x_origin, y_origin, df_x[children_ids[2]], df_y[children_ids[2]])
313 302
             delta <- theta1 - theta2
314
-
315
-
316 303
             ## correct delta for points crossing 180/-180 quadrant.
317 304
             if(delta > 1){
318 305
                 delta_adj = delta - 2
... ...
@@ -321,7 +308,6 @@ getTreeArcAngles <- function(df, origin_id, subtree) {
321 308
             } else{
322 309
                 delta_adj <- delta
323 310
             }
324
-
325 311
             if(delta_adj >= 0){
326 312
                 theta_left = theta1
327 313
                 theta_right = theta2
... ...
@@ -331,80 +317,50 @@ getTreeArcAngles <- function(df, origin_id, subtree) {
331 317
             }
332 318
         }else{
333 319
             ## subtree only has one child node.
334
-            theta_left <- getNodeAngle.df(df, origin_id, children_ids[1])
320
+            theta_left <- getNodeAngle.vector(x_origin, y_origin, df_x[children_ids[1]], df_y[children_ids[1]])
335 321
             theta_right <- theta_left
336 322
         }
337
-
338 323
     } else {
339 324
         ## get the real root of df tree to initialise left and right angles.
340 325
         tree_root <- getRoot.df(df)
341 326
         if( !is.na(tree_root) & is.numeric(tree_root) ){
342
-            theta_left <- getNodeAngle.df(df, origin_id, tree_root)
327
+            theta_left <- getNodeAngle.vector(x_origin, y_origin, df_x[tree_root], df_y[tree_root])
343 328
             theta_right <- theta_left
344 329
         } else{
345 330
       print('ERROR: no root found!')
346 331
       theta_left <- NA
347 332
     }
348
-
349 333
   }
350
-
351 334
   # no parent angle found.
352
-  if (is.na(theta_left) ){
335
+  # Subtree has to have 1 or more nodes to compare.
336
+  if (is.na(theta_left) || (length(subtree_node_ids) == 0)){
353 337
     return(0)
354 338
   }
355
-
356
-
357 339
   # create vector with named columns
358 340
   # left-hand and right-hand angles between origin node and the extremities of the tree nodes.
359 341
   arc <- c('left' = theta_left, 'right' = theta_right)
360 342
 
361
-  # Subtree has to have 1 or more nodes to compare.
362
-  if (length(subtree_node_ids) == 0 ){
363
-    return(0)
364
-  }
365
-
366
-
367
-  # Remove tips from nodes list, but keeping order.
368
-  # internal_nodes <- df[!df$isTip,]$node
369
-  # subtree_node_ids <- intersect(subtree_node_ids, internal_nodes)
370
-
371
-
372 343
   # Calculate the angle from the origin node to each child node.
373 344
   # Moving from parent to children in depth-first traversal.
374
-  for( i in seq_along(subtree_node_ids) ){
375
-    parent_id <- subtree_node_ids[i]
345
+  # Skip if parent_id is a tip or parent and child node are the same.
346
+  subtree_node_ids = subtree_node_ids[subtree_node_ids %in% df$parent]
347
+  subtree_node_ids = subtree_node_ids[subtree_node_ids != origin_id]
348
+  for(parent_id in subtree_node_ids){
376 349
     # Get angle from origin node to parent node.
377
-    # Skip if parent_id is a tip or parent and child node are the same.
378
-    if(origin_id == parent_id | isTip.df(df, parent_id) ){
379
-      next
380
-    }
381
-
382
-    theta_parent <- getNodeAngle.df(df, origin_id, parent_id)
383
-
350
+    theta_parent <- getNodeAngle.vector(x_origin, y_origin, df_x[parent_id], df_y[parent_id])
384 351
     children_ids <- getChild.df(df, parent_id)
385
-
386
-    for( j in seq_along(children_ids)){
387
-      #delta_x <- df[subtree_node_id, 'x'] - df[origin_id, 'x']
388
-      #delta_y <- df[subtree_node_id, 'y'] - df[origin_id, 'y']
389
-      #angles[i] <- atan2(delta_y, delta_x) / pi
390
-      child_id <- children_ids[j]
391
-      # Skip if child is parent node of subtree.
392
-      if( child_id == origin_id ){
393
-        next
394
-      }
395
-
396
-      theta_child <- getNodeAngle.df(df, origin_id, child_id)
397
-
352
+    # Skip if child is parent node of subtree.
353
+    children_ids = children_ids[children_ids != origin_id]
354
+    for(child_id in children_ids){
355
+      theta_child <- getNodeAngle.vector(x_origin, y_origin, df_x[child_id], df_y[child_id])
398 356
       # Skip if child node is already inside arc.
399 357
       # if left < right angle (arc crosses 180/-180 quadrant) and child node is not inside arc of tree.
400 358
       # OR if left > right angle (arc crosses 0/360 quadrant) and child node is inside gap
401
-      if ( (arc['left'] < arc['right'] & !( theta_child > arc['left'] & theta_child < arc['right'])) |
402
-        (arc['left'] > arc['right'] & ( theta_child < arc['left'] & theta_child > arc['right'])) ){
359
+      if ((arc['left'] < arc['right'] & !(theta_child > arc['left'] & theta_child < arc['right'])) |
360
+          (arc['left'] > arc['right'] &  (theta_child < arc['left'] & theta_child > arc['right'])) ){
403 361
         # child node inside arc.
404 362
         next
405 363
       }
406
-
407
-
408 364
       delta <- theta_child - theta_parent
409 365
       delta_adj <- delta
410 366
       # Correct the delta if parent and child angles cross the 180/-180 half of circle.
... ...
@@ -415,9 +371,7 @@ getTreeArcAngles <- function(df, origin_id, subtree) {
415 371
       }else if( delta < -1){ # Edge between parent and child cross upper and lower quadrants of cirlce
416 372
         delta_adj <- delta + 2 # delta' = delta - 360
417 373
       }
418
-
419 374
       theta_child_adj <- theta_child
420
-
421 375
       # If angle change from parent to node is positive (anti-clockwise), check left angle
422 376
       if(delta_adj > 0){
423 377
         # If child/parent edges cross the -180/180 quadrant (angle between them is > 180),
... ...
@@ -429,8 +383,7 @@ getTreeArcAngles <- function(df, origin_id, subtree) {
429 383
             theta_child_adj <- theta_child - 2
430 384
           }
431 385
         }
432
-
433
-          # check if left angle of arc is less than angle of child. Update if true.
386
+        # check if left angle of arc is less than angle of child. Update if true.
434 387
         if( arc['left'] < theta_child_adj ){
435 388
           arc['left'] <- theta_child
436 389
         }
... ...
@@ -450,15 +403,12 @@ getTreeArcAngles <- function(df, origin_id, subtree) {
450 403
         if( arc['right'] > theta_child_adj  ){
451 404
           arc['right'] <- theta_child
452 405
         }
453
-
454 406
       }
455 407
     }
456
-
457 408
   }
458 409
   # Convert arc angles of [1, -1] to [2,0] domain.
459 410
   arc[arc<0] <- arc[arc<0] + 2
460
-  return(arc)
461
-
411
+  arc
462 412
 }
463 413
 
464 414
 ##' Rotate the points in a tree data.frame around a pivot node by the angle specified.
... ...
@@ -477,31 +427,24 @@ rotateTreePoints.df <- function(df, pivot_node, nodes, angle){
477 427
   sinpitheta <- sinpi(angle)
478 428
   pivot_x = df$x[pivot_node]
479 429
   pivot_y = df$y[pivot_node]
430
+  delta_x = df$x - pivot_x
431
+  delta_y = df$y - pivot_y
480 432
   df = dplyr::mutate(df,
481
-    delta_x = .data$x - pivot_x,
482
-    delta_y = .data$y - pivot_y,
483
-    x = ifelse(node %in% nodes, cospitheta * .data$delta_x - sinpitheta * .data$delta_y + pivot_x, .data$x),
484
-    y = ifelse(node %in% nodes, sinpitheta * .data$delta_x + cospitheta * .data$delta_y + pivot_y, .data$y),
485
-    delta_x = NULL,
486
-    delta_y = NULL
433
+    x = ifelse(.data$node %in% nodes, cospitheta * delta_x - sinpitheta * delta_y + pivot_x, .data$x),
434
+    y = ifelse(.data$node %in% nodes, sinpitheta * delta_x + cospitheta * delta_y + pivot_y, .data$y)
487 435
   )
436
+  x_parent = df$x[df$parent]
437
+  y_parent = df$y[df$parent]
488 438
   # Now update tip labels of rotated tree.
489 439
   # angle is in range [0, 360]
490 440
   # Update label angle of tipnode if not root node.
491 441
   nodes = nodes[! nodes %in% df$parent]
492
-  for(node in nodes){
493
-    # if (node %in% df$parent) next
494
-    # get parent
495
-    parent_id <- df$parent[df$node == node]
496
-    # if 'node' is not root, then update label angle.
497
-    theta_parent_child <- getNodeAngle.df(df, parent_id, node)
498
-    if(is.na(theta_parent_child)) next
499
-    # Update tip label angle, that is parallel to edge.
500
-    #df[node, 'angle'] <- -90 - 180 * theta_parent_child * sign(theta_parent_child - 1)
501
-    theta_parent_child = theta_parent_child + ifelse(theta_parent_child > 0, 0, 2)
502
-    df[node, 'angle'] = 180 * theta_parent_child
503
-  }
504
-  df
442
+  df %>% dplyr::mutate(
443
+    angle = ifelse(.data$node %in% nodes,
444
+       getNodeAngle.vector(x_parent, y_parent, .data$x, .data$y) %>%
445
+         {180 * ifelse(. < 0, 2 + ., .)},
446
+       .data$angle)
447
+  )
505 448
 }
506 449
 
507 450
 ##' Get the angle between the two nodes specified.
... ...
@@ -513,15 +456,18 @@ rotateTreePoints.df <- function(df, pivot_node, nodes, angle){
513 456
 ##' @return angle in range [-1, 1], i.e. degrees/180, radians/pi
514 457
 getNodeAngle.df <- function(df, origin_node_id, node_id){
515 458
   if (origin_node_id != node_id) {
516
-    delta_x <- df$x[node_id] - df$x[origin_node_id]
517
-    delta_y <- df$y[node_id] - df$y[origin_node_id]
518
-    angle <- atan2(delta_y, delta_x) / pi
519
-    return( angle )
459
+    df_x = df$x
460
+    df_y = df$y
461
+    atan2(df_y[node_id] - df_y[origin_node_id], df_x[node_id] - df_x[origin_node_id]) / pi
520 462
   }else{
521
-    return(NA)
463
+    NA
522 464
   }
523 465
 }
524 466
 
467
+getNodeAngle.vector <- function(x_origin, y_origin, x, y) {
468
+  atan2(y - y_origin, x - x_origin) / pi
469
+}
470
+
525 471
 euc.dist <- function(x1, x2) sqrt(sum((x1 - x2) ^ 2))
526 472
 
527 473
 ## Get the distances from the node to all other nodes in data.frame (including itself if in df)
... ...
@@ -624,27 +570,20 @@ getSubtreeUnrooted.df <- function(df, node){
624 570
   if (length(children_ids) == 0L) return(NULL)
625 571
   # if node leaf, return nothing.
626 572
 
627
-  # remaining_nodes <- getNodes_by_postorder(tree)
628
-  # Remove current node from remaining_nodes list.
629
-  remaining_nodes <- setdiff(df$node, node)
630
-
631
-  subtrees <- list()
632
-  for( child in children_ids ){
633
-    subtree <- getSubtree.df(df, child)
634
-    subtrees[[length(subtrees)+1]] <- list( node = child, subtree = subtree)
635
-    # remove subtree nodes from remaining nodes.
636
-    remaining_nodes <- setdiff(remaining_nodes, subtree)
637
-  }
573
+  subtrees = tibble::tibble(
574
+    node = children_ids,
575
+    subtree = purrr::map(.data$node, ~getSubtree.df(df, .x))
576
+  )
577
+  remaining_nodes = setdiff(df$node, purrr::flatten_int(subtrees$subtree))
638 578
 
639 579
   # The remaining nodes that are not found in the child subtrees are the remaining subtree nodes.
640 580
   # ie, parent node and all other nodes. We don't care how they are connected, just their id.
641 581
   parent_id <- getParent.df(df, node)
642 582
   # If node is not root.
643
-  if( length(parent_id) > 0 & length(remaining_nodes) >= 1){
644
-    subtrees[[length(subtrees)+1]] <- list( node = parent_id, subtree = remaining_nodes)
583
+  if ((length(parent_id) > 0) & (length(remaining_nodes) > 0)) {
584
+    subtrees = tibble::add_row(subtrees, node = parent_id, subtree = list(remaining_nodes))
645 585
   }
646
-
647
-  return(subtrees)
586
+  purrr::transpose(subtrees)
648 587
 }
649 588
 
650 589