All types expect for type = 7L (ie. the default) are calculated
very inefficiently, but expanding each column to a dense vector
This fixes the last open bit of #7
... | ... |
@@ -507,14 +507,27 @@ setMethod("colCollapse", signature(x = "xgCMatrix"), |
507 | 507 |
#' @inherit MatrixGenerics::colQuantiles |
508 | 508 |
#' @export |
509 | 509 |
setMethod("colQuantiles", signature(x = "xgCMatrix"), |
510 |
- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm=FALSE, drop = TRUE){ |
|
510 |
+ function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm=FALSE, type = 7L, drop = TRUE){ |
|
511 | 511 |
if(! is.null(rows)){ |
512 | 512 |
x <- x[rows, , drop = FALSE] |
513 | 513 |
} |
514 | 514 |
if(! is.null(cols)){ |
515 | 515 |
x <- x[, cols, drop = FALSE] |
516 | 516 |
} |
517 |
- mat <- dgCMatrix_colQuantiles(x, probs, na_rm = na.rm) |
|
517 |
+ if(type == 7L){ |
|
518 |
+ mat <- dgCMatrix_colQuantiles(x, probs, na_rm = na.rm) |
|
519 |
+ }else{ |
|
520 |
+ mat <- t(expand_and_reduce_sparse_matrix_to_matrix(x, n_result_rows = length(probs), function(values){ |
|
521 |
+ if(na.rm){ |
|
522 |
+ values <- values[!is.na(values)] |
|
523 |
+ stats::quantile(values, probs = probs, na.rm = na.rm, names = FALSE, type = type) |
|
524 |
+ }else if(any(is.na(values))){ |
|
525 |
+ rep(NA_real_, length(probs)) |
|
526 |
+ }else{ |
|
527 |
+ stats::quantile(values, probs = probs, na.rm = na.rm, names = FALSE, type = type) |
|
528 |
+ } |
|
529 |
+ })) |
|
530 |
+ } |
|
518 | 531 |
# Add dim names |
519 | 532 |
digits <- max(2L, getOption("digits")) |
520 | 533 |
colnames(mat) <- sprintf("%.*g%%", digits, 100 * probs) |
... | ... |
@@ -34,3 +34,26 @@ reduce_sparse_matrix_to_matrix <- function(sp_mat, n_result_rows, reduce_functio |
34 | 34 |
} |
35 | 35 |
} |
36 | 36 |
} |
37 |
+ |
|
38 |
+ |
|
39 |
+expand_and_reduce_sparse_matrix_to_matrix <- function(sp_mat, n_result_rows, reduce_function = function(dense_values){ NA_real_}){ |
|
40 |
+ if(length(sp_mat@p) == 0){ |
|
41 |
+ numeric(0) |
|
42 |
+ }else{ |
|
43 |
+ res <- vapply(seq_len(length(sp_mat@p)-1), function(index){ |
|
44 |
+ start_pos <- sp_mat@p[index] |
|
45 |
+ end_pos <- sp_mat@p[index + 1] |
|
46 |
+ number_of_zeros <- nrow(sp_mat) - (end_pos - start_pos) |
|
47 |
+ values <- sp_mat@x[start_pos + seq_len(end_pos - start_pos)] |
|
48 |
+ row_indices <- sp_mat@i[start_pos + seq_len(end_pos - start_pos)] |
|
49 |
+ dense_values <- rep(0, nrow(sp_mat)) |
|
50 |
+ dense_values[row_indices + 1] <- values |
|
51 |
+ reduce_function(dense_values) |
|
52 |
+ }, FUN.VALUE = rep(0.0, n_result_rows)) |
|
53 |
+ if(n_result_rows == 1){ |
|
54 |
+ matrix(res, nrow=1, ncol=length(res)) |
|
55 |
+ }else{ |
|
56 |
+ res |
|
57 |
+ } |
|
58 |
+ } |
|
59 |
+} |
... | ... |
@@ -11,6 +11,7 @@ |
11 | 11 |
cols = NULL, |
12 | 12 |
probs = seq(from = 0, to = 1, by = 0.25), |
13 | 13 |
na.rm = FALSE, |
14 |
+ type = 7L, |
|
14 | 15 |
drop = TRUE |
15 | 16 |
) |
16 | 17 |
|
... | ... |
@@ -39,6 +40,11 @@ done.} |
39 | 40 |
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link{NA}}s |
40 | 41 |
are excluded first, otherwise not.} |
41 | 42 |
|
43 |
+\item{type}{An integer specifying the type of estimator. See |
|
44 |
+\code{stats::\link[stats]{quantile}()}. for more details. Note, that this |
|
45 |
+is not a generic argument and not all implementation of this function have |
|
46 |
+to provide it.} |
|
47 |
+ |
|
42 | 48 |
\item{drop}{If \code{TRUE} a vector is returned if \code{J == 1}. |
43 | 49 |
Note, that this is not a generic argument and not all implementation of |
44 | 50 |
this function have to provide it.} |
... | ... |
@@ -199,6 +199,17 @@ for(idx in seq_along(matrix_list)){ |
199 | 199 |
expect_equal(colQuantiles(sp_mat), matrixStats::colQuantiles(mat)) |
200 | 200 |
expect_equal(colQuantiles(sp_mat, na.rm=TRUE), matrixStats::colQuantiles(mat, na.rm=TRUE)) |
201 | 201 |
expect_equal(colQuantiles(sp_mat, rows = row_subset, cols = col_subset), matrixStats::colQuantiles(mat, rows = row_subset, cols = col_subset)) |
202 |
+ |
|
203 |
+ expect_equal(colQuantiles(sp_mat, type = 1L), matrixStats::colQuantiles(mat, type = 1L)) |
|
204 |
+ expect_equal(colQuantiles(sp_mat, type = 2L), matrixStats::colQuantiles(mat, type = 2L)) |
|
205 |
+ expect_equal(colQuantiles(sp_mat, type = 3L), matrixStats::colQuantiles(mat, type = 3L)) |
|
206 |
+ |
|
207 |
+ expect_equal(colQuantiles(sp_mat, type = 4L), matrixStats::colQuantiles(mat, type = 4L)) |
|
208 |
+ expect_equal(colQuantiles(sp_mat, type = 5L), matrixStats::colQuantiles(mat, type = 5L)) |
|
209 |
+ expect_equal(colQuantiles(sp_mat, type = 6L), matrixStats::colQuantiles(mat, type = 6L)) |
|
210 |
+ expect_equal(colQuantiles(sp_mat, type = 7L), matrixStats::colQuantiles(mat, type = 7L)) |
|
211 |
+ expect_equal(colQuantiles(sp_mat, type = 8L), matrixStats::colQuantiles(mat, type = 8L)) |
|
212 |
+ expect_equal(colQuantiles(sp_mat, type = 9L), matrixStats::colQuantiles(mat, type = 9L)) |
|
202 | 213 |
}) |
203 | 214 |
|
204 | 215 |
|