Browse code

Added possibility to use different aggregation functions for running windows (DataTrack)

Robert Ivánek authored on 07/01/2022 17:43:20
Showing 2 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: Gviz
2 2
 Title: Plotting data and annotation information along genomic coordinates
3
-Version: 1.39.2
3
+Version: 1.39.3
4 4
 Authors@R: c(person("Florian", "Hahne", role="aut"),
5 5
 	 person("Steffen", "Durinck", role="aut"),
6 6
 	 person("Robert", "Ivanek", role=c("aut", "cre"), email="robert.ivanek@unibas.ch", comment=c(ORCID="0000-0002-8403-056X")),
... ...
@@ -982,32 +982,67 @@ setMethod(
982 982
 .aggregator <- function(GdObject) {
983 983
     agFun <- .dpOrDefault(GdObject, "aggregation", "mean")
984 984
     if (is.list(agFun)) {
985
-          agFun <- agFun[[1]]
986
-      }
985
+        agFun <- agFun[[1]]
986
+    }
987 987
     fun <- if (is.character(agFun)) {
988 988
         switch(agFun,
989
-            "mean" = rowMeans,
990
-            "sum" = rowSums,
991
-            "median" = rowMedians,
992
-            "extreme" = function(x) apply(x, 1, .extreme),
993
-            "min" = rowMin,
994
-            "max" = rowMax,
995
-            rowMeans
989
+               "mean" = rowMeans,
990
+               "sum" = rowSums,
991
+               "median" = rowMedians,
992
+               "extreme" = function(x) apply(x, 1, .extreme),
993
+               "min" = rowMin,
994
+               "max" = rowMax,
995
+               rowMeans
996 996
         )
997 997
     } else {
998 998
         if (is.function(agFun)) {
999 999
             function(x) apply(x, 1, agFun)
1000 1000
         } else {
1001 1001
             stop(
1002
-                "display parameter 'aggregation' has to be a function or a character",
1003
-                "scalar in c('mean', 'median', 'sum', 'extreme')"
1004
-            )
1002
+                "display parameter 'aggregation' has to be a function or a character ",
1003
+                "scalar in c('mean', 'median', 'sum', 'min', 'max')")
1005 1004
         }
1006 1005
     }
1007 1006
     return(fun)
1008 1007
 }
1009 1008
 
1010
-
1009
+.runaggregator <- function(GdObject) {
1010
+    agFun <- .dpOrDefault(GdObject, "aggregation", "mean")
1011
+    if (is.list(agFun)) {
1012
+        agFun <- agFun[[1]]
1013
+    }
1014
+    fun <- if (is.character(agFun)) {
1015
+        switch(agFun,
1016
+               "mean" = runmean,
1017
+               "sum" = runsum,
1018
+               "median" = runmed2 <-  function(x, k, na.rm=FALSE, ...) { 
1019
+                   na.action <- if (na.rm) { "na.omit" } else { "+Big_alternate" }
1020
+                       runmed(x=as.numeric(x), k=k, na.action=na.action) 
1021
+                   },
1022
+               "min" = runqmin <- function(x, k, i=1, ...) { runq(x=x, k=k, i=i, ...) },
1023
+               "max" = runqmax <- function(x, k, i=k, ...) { runq(x=x, k=k, i=i, ...) },
1024
+               runmean
1025
+        )
1026
+    } else {
1027
+        if (is.function(agFun)) {
1028
+            function(x, k, na.rm=FALSE, endrule="constant") {
1029
+                ans <- vapply(0:(length(x)-k), function(offset) agFun(x[1:k + offset], na.rm=na.rm), FUN.VALUE = numeric(1))
1030
+                ans <- Rle(ans)
1031
+                if (endrule == "constant") {
1032
+                    j <- (k + 1L)%/%2L
1033
+                    runLength(ans)[1L] <- runLength(ans)[1L] + (j - 1L)
1034
+                    runLength(ans)[nrun(ans)] <- runLength(ans)[nrun(ans)] + (j - 1L)
1035
+                }
1036
+                ans
1037
+            }
1038
+        } else {
1039
+            stop(
1040
+                "display parameter 'aggregation' has to be a function or a character ",
1041
+                "scalar in c('mean', 'median', 'sum', 'min', 'max')")
1042
+        }
1043
+    }
1044
+    return(fun)
1045
+}
1011 1046
 
1012 1047
 ## For DataTracks we want to collapse data values using the aggregation function provided by calling .aggregator().
1013 1048
 ## In addition values can be aggregated over fixed window slices when gpar 'window' is not NULL, and using a sliding
... ...
@@ -1067,7 +1102,8 @@ setMethod("collapseTrack", signature(GdObject = "DataTrack"), function(GdObject,
1067 1102
             }
1068 1103
             ind <- unlist(mapply(function(x, y) x:y, start(GdObject), end(GdObject))) - min(GdObject) + 1
1069 1104
             rm[ind] <- rep(sc[1, ], width(GdObject))
1070
-            runwin <- suppressWarnings(runmean(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE))
1105
+            runAgFun <- .runaggregator(GdObject)
1106
+            runwin <- suppressWarnings(runAgFun(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE))
1071 1107
             seqSel <- findRun(as.integer(position(GdObject)) - min(GdObject) + 1, runwin)
1072 1108
             newDat <- matrix(runValue(runwin)[seqSel], nrow = 1)
1073 1109
             if (nrow(sc) > 1) {
... ...
@@ -1075,7 +1111,7 @@ setMethod("collapseTrack", signature(GdObject = "DataTrack"), function(GdObject,
1075 1111
                     newDat,
1076 1112
                     do.call(rbind, lapply(2:nrow(sc), function(x) {
1077 1113
                         rm[ind] <- rep(sc[x, ], width(GdObject))
1078
-                        runwin <- suppressWarnings(runmean(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE))
1114
+                        runwin <- suppressWarnings(runAgFun(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE))
1079 1115
                         seqSel <- findRun(as.integer(position(GdObject)) - min(GdObject) + 1, runwin)
1080 1116
                         runValue(runwin)[seqSel]
1081 1117
                         # suppressWarnings(runValue(runmean(Rle(as.numeric(rm)), k = windowSize, endrule = "constant", na.rm = TRUE)))[seqSel]
... ...
@@ -1264,7 +1300,7 @@ setMethod("subset", signature(x = "OverlayTrack"), function(x, ...) {
1264 1300
     return(x)
1265 1301
 })
1266 1302
 
1267
-## For DataTracks we cut exactly, and also reduce to the current chromosome unless told explicitely not to
1303
+## For DataTracks we cut exactly, and also reduce to the current chromosome unless told explicitly not to
1268 1304
 setMethod("subset", signature(x = "DataTrack"), function(x, from = NULL, to = NULL, sort = FALSE, drop = TRUE, use.defaults = TRUE, ...) {
1269 1305
     ## Subset to a single chromosome first
1270 1306
     if (drop) {