Browse code

Support different types in colQuantiles

All types expect for type = 7L (ie. the default) are calculated
very inefficiently, but expanding each column to a dense vector

This fixes the last open bit of #7

const-ae authored on 28/09/2020 16:21:45
Showing 4 changed files

... ...
@@ -507,14 +507,27 @@ setMethod("colCollapse", signature(x = "xgCMatrix"),
507 507
 #' @inherit MatrixGenerics::colQuantiles
508 508
 #' @export
509 509
 setMethod("colQuantiles", signature(x = "xgCMatrix"),
510
-          function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm=FALSE, drop = TRUE){
510
+          function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm=FALSE, type = 7L, drop = TRUE){
511 511
   if(! is.null(rows)){
512 512
     x <- x[rows, , drop = FALSE]
513 513
   }
514 514
   if(! is.null(cols)){
515 515
     x <- x[, cols, drop = FALSE]
516 516
   }
517
-  mat <- dgCMatrix_colQuantiles(x, probs, na_rm = na.rm)
517
+  if(type == 7L){
518
+    mat <- dgCMatrix_colQuantiles(x, probs, na_rm = na.rm)
519
+  }else{
520
+    mat <- t(expand_and_reduce_sparse_matrix_to_matrix(x, n_result_rows = length(probs), function(values){
521
+      if(na.rm){
522
+        values <- values[!is.na(values)]
523
+        stats::quantile(values, probs = probs, na.rm = na.rm, names = FALSE, type = type)
524
+      }else if(any(is.na(values))){
525
+        rep(NA_real_, length(probs))
526
+      }else{
527
+        stats::quantile(values, probs = probs, na.rm = na.rm, names = FALSE, type = type)
528
+      }
529
+    }))
530
+  }
518 531
   # Add dim names
519 532
   digits <- max(2L, getOption("digits"))
520 533
   colnames(mat) <- sprintf("%.*g%%", digits, 100 * probs)
... ...
@@ -34,3 +34,26 @@ reduce_sparse_matrix_to_matrix <- function(sp_mat, n_result_rows, reduce_functio
34 34
     }
35 35
   }
36 36
 }
37
+
38
+
39
+expand_and_reduce_sparse_matrix_to_matrix <- function(sp_mat, n_result_rows, reduce_function = function(dense_values){ NA_real_}){
40
+  if(length(sp_mat@p) == 0){
41
+    numeric(0)
42
+  }else{
43
+    res <- vapply(seq_len(length(sp_mat@p)-1), function(index){
44
+      start_pos <- sp_mat@p[index]
45
+      end_pos <- sp_mat@p[index + 1]
46
+      number_of_zeros <- nrow(sp_mat) - (end_pos - start_pos)
47
+      values <- sp_mat@x[start_pos + seq_len(end_pos - start_pos)]
48
+      row_indices <- sp_mat@i[start_pos + seq_len(end_pos - start_pos)]
49
+      dense_values <- rep(0, nrow(sp_mat))
50
+      dense_values[row_indices + 1] <- values
51
+      reduce_function(dense_values)
52
+    }, FUN.VALUE = rep(0.0, n_result_rows))
53
+    if(n_result_rows == 1){
54
+      matrix(res, nrow=1, ncol=length(res))
55
+    }else{
56
+      res
57
+    }
58
+  }
59
+}
... ...
@@ -11,6 +11,7 @@
11 11
   cols = NULL,
12 12
   probs = seq(from = 0, to = 1, by = 0.25),
13 13
   na.rm = FALSE,
14
+  type = 7L,
14 15
   drop = TRUE
15 16
 )
16 17
 
... ...
@@ -39,6 +40,11 @@ done.}
39 40
 \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link{NA}}s
40 41
 are excluded first, otherwise not.}
41 42
 
43
+\item{type}{An integer specifying the type of estimator. See
44
+\code{stats::\link[stats]{quantile}()}. for more details. Note, that this
45
+is not a generic argument and not all implementation of this function have
46
+to provide it.}
47
+
42 48
 \item{drop}{If \code{TRUE} a vector is returned if \code{J == 1}.
43 49
 Note, that this is not a generic argument and not all implementation of
44 50
 this function have to provide it.}
... ...
@@ -199,6 +199,17 @@ for(idx in seq_along(matrix_list)){
199 199
     expect_equal(colQuantiles(sp_mat), matrixStats::colQuantiles(mat))
200 200
     expect_equal(colQuantiles(sp_mat, na.rm=TRUE), matrixStats::colQuantiles(mat, na.rm=TRUE))
201 201
     expect_equal(colQuantiles(sp_mat, rows = row_subset, cols = col_subset), matrixStats::colQuantiles(mat, rows = row_subset, cols = col_subset))
202
+
203
+    expect_equal(colQuantiles(sp_mat, type = 1L), matrixStats::colQuantiles(mat, type = 1L))
204
+    expect_equal(colQuantiles(sp_mat, type = 2L), matrixStats::colQuantiles(mat, type = 2L))
205
+    expect_equal(colQuantiles(sp_mat, type = 3L), matrixStats::colQuantiles(mat, type = 3L))
206
+
207
+    expect_equal(colQuantiles(sp_mat, type = 4L), matrixStats::colQuantiles(mat, type = 4L))
208
+    expect_equal(colQuantiles(sp_mat, type = 5L), matrixStats::colQuantiles(mat, type = 5L))
209
+    expect_equal(colQuantiles(sp_mat, type = 6L), matrixStats::colQuantiles(mat, type = 6L))
210
+    expect_equal(colQuantiles(sp_mat, type = 7L), matrixStats::colQuantiles(mat, type = 7L))
211
+    expect_equal(colQuantiles(sp_mat, type = 8L), matrixStats::colQuantiles(mat, type = 8L))
212
+    expect_equal(colQuantiles(sp_mat, type = 9L), matrixStats::colQuantiles(mat, type = 9L))
202 213
   })
203 214
 
204 215