Browse code

Added missing possibility to run different aggreation fucntions over user defined windows in DataTrack [bug]. Some code cleanup in methods.

Robert Ivánek authored on 18/01/2022 08:30:58
Showing 1 changed files

... ...
@@ -35,7 +35,7 @@ setMethod("seqnames", "SequenceBSgenomeTrack", function(x) as.character(seqnames
35 35
 setMethod("seqlevels", "RangeTrack", function(x) unique(seqnames(x)))
36 36
 setMethod("seqlevels", "SequenceDNAStringSetTrack", function(x) seqnames(x)[width(x@sequence) > 0])
37 37
 setMethod("seqlevels", "SequenceRNAStringSetTrack", function(x) seqnames(x)[width(x@sequence) > 0])
38
-setMethod("seqlevels", "SequenceBSgenomeTrack", function(x) seqnames(x)[bsapply(new("BSParams", X = x@sequence, FUN = length, simplify = T)) > 0]) # maybe seqnames only to speed-up
38
+setMethod("seqlevels", "SequenceBSgenomeTrack", function(x) seqnames(x)[bsapply(new("BSParams", X = x@sequence, FUN = length, simplify = TRUE)) > 0]) # maybe seqnames only, to speed-up
39 39
 setMethod("seqinfo", "RangeTrack", function(x) table(seqnames(x)))
40 40
 
41 41
 ## Min and max ranges
... ...
@@ -205,7 +205,7 @@ setMethod("subseq", "ReferenceSequenceTrack", function(x, start = NA, end = NA,
205 205
     if (!is.na(start[1] + end[1] + width[1])) {
206 206
         warning("All 'start', 'stop' and 'width' are provided, ignoring 'width'")
207 207
         width <- NA
208
-    } 
208
+    }
209 209
     ## We want start and end to be set if width is provided
210 210
     if (!is.na(width[1])) {
211 211
         if (is.na(start) && is.na(end)) {
... ...
@@ -982,32 +982,67 @@ setMethod(
982 982
 .aggregator <- function(GdObject) {
983 983
     agFun <- .dpOrDefault(GdObject, "aggregation", "mean")
984 984
     if (is.list(agFun)) {
985
-          agFun <- agFun[[1]]
986
-      }
985
+        agFun <- agFun[[1]]
986
+    }
987 987
     fun <- if (is.character(agFun)) {
988 988
         switch(agFun,
989
-            "mean" = rowMeans,
990
-            "sum" = rowSums,
991
-            "median" = rowMedians,
992
-            "extreme" = function(x) apply(x, 1, .extreme),
993
-            "min" = rowMin,
994
-            "max" = rowMax,
995
-            rowMeans
989
+               "mean" = rowMeans,
990
+               "sum" = rowSums,
991
+               "median" = rowMedians,
992
+               "extreme" = function(x) apply(x, 1, .extreme),
993
+               "min" = rowMin,
994
+               "max" = rowMax,
995
+               rowMeans
996 996
         )
997 997
     } else {
998 998
         if (is.function(agFun)) {
999 999
             function(x) apply(x, 1, agFun)
1000 1000
         } else {
1001 1001
             stop(
1002
-                "display parameter 'aggregation' has to be a function or a character",
1003
-                "scalar in c('mean', 'median', 'sum', 'extreme')"
1004
-            )
1002
+                "display parameter 'aggregation' has to be a function or a character ",
1003
+                "scalar in c('mean', 'median', 'sum', 'min', 'max')")
1005 1004
         }
1006 1005
     }
1007 1006
     return(fun)
1008 1007
 }
1009 1008
 
1010
-
1009
+.runaggregator <- function(GdObject) {
1010
+    agFun <- .dpOrDefault(GdObject, "aggregation", "mean")
1011
+    if (is.list(agFun)) {
1012
+        agFun <- agFun[[1]]
1013
+    }
1014
+    fun <- if (is.character(agFun)) {
1015
+        switch(agFun,
1016
+               "mean" = runmean,
1017
+               "sum" = runsum,
1018
+               "median" = runmed2 <-  function(x, k, na.rm=FALSE, ...) {
1019
+                   na.action <- if (na.rm) { "na.omit" } else { "+Big_alternate" }
1020
+                       runmed(x=as.numeric(x), k=k, na.action=na.action)
1021
+                   },
1022
+               "min" = runqmin <- function(x, k, i=1, ...) { runq(x=x, k=k, i=i, ...) },
1023
+               "max" = runqmax <- function(x, k, i=k, ...) { runq(x=x, k=k, i=i, ...) },
1024
+               runmean
1025
+        )
1026
+    } else {
1027
+        if (is.function(agFun)) {
1028
+            function(x, k, na.rm=FALSE, endrule="constant") {
1029
+                ans <- vapply(0:(length(x)-k), function(offset) agFun(x[seq_len(k) + offset], na.rm=na.rm), FUN.VALUE = numeric(1))
1030
+                ans <- Rle(ans)
1031
+                if (endrule == "constant") {
1032
+                    j <- (k + 1L)%/%2L
1033
+                    runLength(ans)[1L] <- runLength(ans)[1L] + (j - 1L)
1034
+                    runLength(ans)[nrun(ans)] <- runLength(ans)[nrun(ans)] + (j - 1L)
1035
+                }
1036
+                ans
1037
+            }
1038
+        } else {
1039
+            stop(
1040
+                "display parameter 'aggregation' has to be a function or a character ",
1041
+                "scalar in c('mean', 'median', 'sum', 'min', 'max')")
1042
+        }
1043
+    }
1044
+    return(fun)
1045
+}
1011 1046
 
1012 1047
 ## For DataTracks we want to collapse data values using the aggregation function provided by calling .aggregator().
1013 1048
 ## In addition values can be aggregated over fixed window slices when gpar 'window' is not NULL, and using a sliding
... ...
@@ -1067,7 +1102,8 @@ setMethod("collapseTrack", signature(GdObject = "DataTrack"), function(GdObject,
1067 1102
             }
1068 1103
             ind <- unlist(mapply(function(x, y) x:y, start(GdObject), end(GdObject))) - min(GdObject) + 1
1069 1104
             rm[ind] <- rep(sc[1, ], width(GdObject))
1070
-            runwin <- suppressWarnings(runmean(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE))
1105
+            runAgFun <- .runaggregator(GdObject)
1106
+            runwin <- suppressWarnings(runAgFun(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE))
1071 1107
             seqSel <- findRun(as.integer(position(GdObject)) - min(GdObject) + 1, runwin)
1072 1108
             newDat <- matrix(runValue(runwin)[seqSel], nrow = 1)
1073 1109
             if (nrow(sc) > 1) {
... ...
@@ -1075,7 +1111,7 @@ setMethod("collapseTrack", signature(GdObject = "DataTrack"), function(GdObject,
1075 1111
                     newDat,
1076 1112
                     do.call(rbind, lapply(2:nrow(sc), function(x) {
1077 1113
                         rm[ind] <- rep(sc[x, ], width(GdObject))
1078
-                        runwin <- suppressWarnings(runmean(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE))
1114
+                        runwin <- suppressWarnings(runAgFun(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE))
1079 1115
                         seqSel <- findRun(as.integer(position(GdObject)) - min(GdObject) + 1, runwin)
1080 1116
                         runValue(runwin)[seqSel]
1081 1117
                         # suppressWarnings(runValue(runmean(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE)))[seqSel]
... ...
@@ -1264,7 +1300,7 @@ setMethod("subset", signature(x = "OverlayTrack"), function(x, ...) {
1264 1300
     return(x)
1265 1301
 })
1266 1302
 
1267
-## For DataTracks we cut exactly, and also reduce to the current chromosome unless told explicitely not to
1303
+## For DataTracks we cut exactly, and also reduce to the current chromosome unless told explicitly not to
1268 1304
 setMethod("subset", signature(x = "DataTrack"), function(x, from = NULL, to = NULL, sort = FALSE, drop = TRUE, use.defaults = TRUE, ...) {
1269 1305
     ## Subset to a single chromosome first
1270 1306
     if (drop) {
... ...
@@ -2102,6 +2138,7 @@ setMethod("drawGD", signature("AnnotationTrack"), function(GdObject, minBase, ma
2102 2138
         }
2103 2139
         ## Plotting of the item labels
2104 2140
         if (.dpOrDefault(GdObject, "showFeatureId", FALSE)) {
2141
+            #grid.text(str2expression(box$text), box$textX, box$textY,
2105 2142
             grid.text(box$text, box$textX, box$textY,
2106 2143
                 rot = rotation, gp = .fontGp(GdObject, subtype = "item"),
2107 2144
                 default.units = "native", just = c("center", "center")
... ...
@@ -2377,13 +2414,13 @@ setMethod("drawGD", signature("AlignmentsTrack"), function(GdObject, minBase, ma
2377 2414
             )
2378 2415
             ## print the number of reads together with the connecting lines (currently no scaling/resolution)
2379 2416
             if (sashNumbers) {
2380
-                grid.rect(sash$x[c(F, T, F)], -sash$y[c(F, T, F)],
2417
+                grid.rect(sash$x[c(FALSE, TRUE, FALSE)], -sash$y[c(FALSE, TRUE, FALSE)],
2381 2418
                     width = convertUnit(stringWidth(sash$score) * 1.5, "inches"),
2382 2419
                     height = convertUnit(stringHeight(sash$score) * 1.5, "inches"),
2383 2420
                     default.units = "native", gp = gpar(col = gp$col, fill = gp$fill)
2384 2421
                 )
2385 2422
                 grid.text(
2386
-                    label = sash$score, sash$x[c(F, T, F)], -sash$y[c(F, T, F)],
2423
+                    label = sash$score, sash$x[c(FALSE, TRUE, FALSE)], -sash$y[c(FALSE, TRUE, FALSE)],
2387 2424
                     default.units = "native", gp = gpar(col = gp$col)
2388 2425
                 )
2389 2426
             }
... ...
@@ -2715,11 +2752,7 @@ setMethod("drawGD", signature("GenomeAxisTrack"), function(GdObject, minBase, ma
2715 2752
     if (!is.null(scaleLen)) {
2716 2753
         len <- (maxBase - minBase + 1)
2717 2754
         if (scaleLen > len) {
2718
-            warning(paste("scale (", scaleLen,
2719
-                ") cannot be larger than plotted region",
2720
-                len, " - setting to ~5%\n",
2721
-                sep = ""
2722
-            ))
2755
+            warning(sprintf("scale (%d) cannot be larger than plotted region %d - setting to ~5%\n", scaleLen, len))
2723 2756
             scaleLen <- 0.05
2724 2757
         }
2725 2758
         xoff <- len * 0.03 + minBase
... ...
@@ -3587,21 +3620,21 @@ setMethod("drawGD", signature("DataTrack"), function(GdObject, minBase, maxBase,
3587 3620
             na_idx <- which(is.na(upper))
3588 3621
             ## case 1. there are no error bars to plot at all
3589 3622
             if (length(na_idx) == length(upper)) {
3590
-                if (debugMode) cat("\t Case 1: all empty. returning\n")
3623
+                if (debugMode) message("\t Case 1: all empty. returning")
3591 3624
                 return(TRUE)
3592 3625
                 ## case 2. no missing points
3593 3626
             } else if (length(na_idx) < 1) {
3594
-                if (debugMode) cat("\t Case 2: one continuous polygon\n")
3627
+                if (debugMode) message("\t Case 2: one continuous polygon")
3595 3628
                 panel.polygon(c(x, rev(x)), c(upper, rev(lower)),
3596 3629
                     border = col, col = fill, alpha = alpha, ...
3597 3630
                 )
3598 3631
                 ## case 3. have complete data with some or no missing points
3599 3632
             } else {
3600 3633
                 curr_start <- min(which(!is.na(upper)))
3601
-                if (debugMode) cat(sprintf("\t Case 3: %i of %i NA\n", length(na_idx), length(upper)))
3634
+                if (debugMode) message(sprintf("\t Case 3: %i of %i NA", length(na_idx), length(upper)))
3602 3635
                 curr_na_pos <- 1
3603 3636
                 while (curr_na_pos <= length(na_idx)) {
3604
-                    if (debugMode) cat(sprintf("\t\tcurr_na_pos = %i, na_idx length= %i\n", curr_na_pos, length(na_idx)))
3637
+                    if (debugMode) message(sprintf("\t\tcurr_na_pos = %i, na_idx length= %i", curr_na_pos, length(na_idx)))
3605 3638
                     ## complete the current poly
3606 3639
                     idx <- curr_start:(na_idx[curr_na_pos] - 1)
3607 3640
                     panel.polygon(c(x[idx], rev(x[idx])), c(upper[idx], rev(lower[idx])),
... ...
@@ -3609,7 +3642,7 @@ setMethod("drawGD", signature("DataTrack"), function(GdObject, minBase, maxBase,
3609 3642
                     )
3610 3643
                     ## contiguous empty spots - skip
3611 3644
                     while ((na_idx[curr_na_pos + 1] == na_idx[curr_na_pos] + 1) && (curr_na_pos < length(na_idx))) {
3612
-                        if (debugMode) cat(sprintf("\t\ttight-loop:curr_na_pos = %i\n", curr_na_pos))
3645
+                        if (debugMode) message(sprintf("\t\ttight-loop:curr_na_pos = %i", curr_na_pos))
3613 3646
                         curr_na_pos <- curr_na_pos + 1
3614 3647
                     }
3615 3648
                     ## at this point, either we've finished NA spots or the next one is far away.
... ...
@@ -3619,7 +3652,7 @@ setMethod("drawGD", signature("DataTrack"), function(GdObject, minBase, maxBase,
3619 3652
                 }
3620 3653
                 ## there is one last polygon at the end of the view range
3621 3654
                 if (na_idx[length(na_idx)] < length(upper)) {
3622
-                    if (debugMode) cat("\tWrapping last polygon\n")
3655
+                    if (debugMode) message("\tWrapping last polygon")
3623 3656
                     idx <- curr_start:length(upper)
3624 3657
                     panel.polygon(c(x[idx], rev(x[idx])), c(upper[idx], rev(lower[idx])),
3625 3658
                         col = fill, border = col, alpha = alpha, ...
... ...
@@ -3662,7 +3695,7 @@ setMethod("drawGD", signature("DataTrack"), function(GdObject, minBase, maxBase,
3662 3695
             names(fill) <- NULL
3663 3696
             for (j in seq_along(by)) {
3664 3697
                 g <- names(by)[j]
3665
-                if (debugMode) print(g)
3698
+                if (debugMode) message(g)
3666 3699
                 df <- data.frame(
3667 3700
                     x = position(GdObject), y = mu[[j]],
3668 3701
                     low = mu[[j]] - confint[[j]], high = mu[[j]] + confint[[j]],
... ...
@@ -4744,8 +4777,8 @@ setMethod("show", signature(object = "OverlayTrack"), function(object) {
4744 4777
 
4745 4778
 ## A helper function to plot general information about a ReferenceTrack
4746 4779
 .referenceTrackInfo <- function(object, type) {
4747
-    cat(sprintf(
4748
-        "%s '%s'\n| genome: %s\n| active chromosome: %s\n| referenced file: %s\n",
4780
+    message(sprintf(
4781
+        "%s '%s'\n| genome: %s\n| active chromosome: %s\n| referenced file: %s",
4749 4782
         type,
4750 4783
         names(object),
4751 4784
         genome(object),
... ...
@@ -4753,7 +4786,7 @@ setMethod("show", signature(object = "OverlayTrack"), function(object) {
4753 4786
         object@reference
4754 4787
     ))
4755 4788
     if (length(object@mapping) && type != "ReferenceDataTrack") {
4756
-          cat(sprintf("| mapping: %s\n", paste(names(object@mapping), as.character(object@mapping), sep = "=", collapse = ", ")))
4789
+          message(sprintf("| mapping: %s\n", paste(names(object@mapping), as.character(object@mapping), sep = "=", collapse = ", ")))
4757 4790
       }
4758 4791
 }
4759 4792