Browse code

Make sparseMatrixStats compatible with matrixStats v0.58.0

* Add na.rm to colAvgsPerRowSet
* properly import matrixStats anyMissing
* Update documentation

const-ae authored on 02/02/2021 15:57:46
Showing 10 changed files

... ...
@@ -11,7 +11,7 @@ Description: High performance functions for row and column operations on sparse
11 11
 License: MIT + file LICENSE
12 12
 Encoding: UTF-8
13 13
 LazyData: true
14
-RoxygenNote: 7.1.0
14
+RoxygenNote: 7.1.1
15 15
 LinkingTo: 
16 16
     Rcpp
17 17
 Imports: 
... ...
@@ -864,7 +864,7 @@ function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0){
864 864
 #' @aliases colAvgsPerRowSet
865 865
 #' @export
866 866
 setMethod("colAvgsPerRowSet", signature(X = "xgCMatrix"),
867
-function(X, W = NULL, cols = NULL, S, FUN = colMeans2, ..., tFUN = FALSE){
867
+function(X, W = NULL, cols = NULL, S, FUN = colMeans2, ..., na.rm = NA, tFUN = FALSE){
868 868
   if(! is.null(W)) stop("the W parameter is not supported.")
869 869
   nbrOfSets <- ncol(S)
870 870
   setNames <- colnames(S)
... ...
@@ -876,6 +876,10 @@ function(X, W = NULL, cols = NULL, S, FUN = colMeans2, ..., tFUN = FALSE){
876 876
   }
877 877
   dimX <- dim(X)
878 878
   tFUN <- as.logical(tFUN)
879
+
880
+  # Check if missing values have to be excluded while averaging
881
+  if (is.na(na.rm)) na.rm <- (base::anyNA(X@x) || matrixStats::anyMissing(S))
882
+
879 883
   colnamesX <- colnames(X)
880 884
   dimnames(X) <- list(NULL, NULL)
881 885
 
... ...
@@ -886,7 +890,11 @@ function(X, W = NULL, cols = NULL, S, FUN = colMeans2, ..., tFUN = FALSE){
886 890
     if (tFUN) {
887 891
       Zjj <- t(Zjj)
888 892
     }
889
-    Zjj <- FUN(Zjj, ...)
893
+    tryCatch({
894
+      Zjj <- FUN(Zjj, ..., na.rm = na.rm)
895
+    }, error = function(err){
896
+      Zjj <<- FUN(as.matrix(Zjj), ..., na.rm = na.rm)
897
+    })
890 898
     if (length(Zjj) != dimX[2L])
891 899
       stop("Internal error: length(Zjj) != dimX[1L]")
892 900
     Zjj
... ...
@@ -559,8 +559,8 @@ setMethod("rowIQRDiffs", signature(x = "dgCMatrix"),
559 559
 #' @rdname colAvgsPerRowSet-xgCMatrix-method
560 560
 #' @export
561 561
 setMethod("rowAvgsPerColSet", signature(X = "xgCMatrix"),
562
-          function(X, W = NULL, rows = NULL, S, FUN = rowMeans2, ..., tFUN = FALSE){
563
-  tZ <- colAvgsPerRowSet(t(X), W = W, cols = rows, S = S, FUN  = FUN, ..., tFUN = ! tFUN)
562
+          function(X, W = NULL, rows = NULL, S, FUN = rowMeans2, ..., na.rm = NA, tFUN = FALSE){
563
+  tZ <- colAvgsPerRowSet(t(X), W = W, cols = rows, S = S, FUN  = FUN, ..., na.rm = na.rm, tFUN = ! tFUN)
564 564
   t(tZ)
565 565
 })
566 566
 
... ...
@@ -52,10 +52,9 @@ mat <- matrix(rnorm(15), nrow = 5, ncol = 3)
52 52
 }
53 53
 \seealso{
54 54
 \itemize{
55
-\item \code{matrixStats::\link[matrixStats]{rowAlls}()} and
56
-\code{matrixStats::\link[matrixStats:rowAlls]{colAlls}()} which are used
57
-when the input is a \code{\link{matrix}}, \code{\link{array}},
58
-or \code{\link{numeric}} vector.
55
+\item \code{matrixStats::\link[matrixStats:rowAlls]{rowAlls}()} and
56
+\code{matrixStats::\link[matrixStats:rowAlls]{colAlls}()} which are
57
+used when the input is a \code{matrix} or \code{numeric} vector.
59 58
 \item For checks if \emph{any} element is equal to a value, see
60 59
 \code{\link[MatrixGenerics]{rowAnys}()}.
61 60
 \item \code{base::\link{all}()}.
... ...
@@ -53,8 +53,10 @@ mat <- matrix(rnorm(15), nrow = 5, ncol = 3)
53 53
 \seealso{
54 54
 \itemize{
55 55
 \item \code{matrixStats::\link[matrixStats:rowAlls]{rowAnys}()} and
56
-\code{matrixStats::\link[matrixStats:rowAlls]{colAnys}()} which are used
57
-when the input is a \code{matrix} or \code{numeric} vector.
56
+\code{matrixStats::\link[matrixStats:rowAlls]{colAnys}()} which are
57
+used when the input is a \code{matrix} or \code{numeric} vector.
58
+\item For checks if \emph{all} elements are equal to a value, see
59
+\code{\link[MatrixGenerics]{rowAlls}()}.
58 60
 \item \code{base::\link{any}()}.
59 61
 }
60 62
 }
... ...
@@ -13,6 +13,7 @@
13 13
   S,
14 14
   FUN = colMeans2,
15 15
   ...,
16
+  na.rm = NA,
16 17
   tFUN = FALSE
17 18
 )
18 19
 
... ...
@@ -23,6 +24,7 @@
23 24
   S,
24 25
   FUN = rowMeans2,
25 26
   ...,
27
+  na.rm = NA,
26 28
   tFUN = FALSE
27 29
 )
28 30
 }
... ...
@@ -44,6 +46,10 @@ applied to to each column (row) subset of \code{X} that is specified by \code{S}
44 46
 
45 47
 \item{...}{Additional arguments passed to \code{FUN}.}
46 48
 
49
+\item{na.rm}{(logical) Argument passed to \code{FUN()} as \code{na.rm = na.rm}.
50
+If \code{NA} (default), then \code{na.rm = TRUE} is used if \code{X} or \code{S} holds missing values,
51
+otherwise \code{na.rm = FALSE}.}
52
+
47 53
 \item{tFUN}{If \code{TRUE}, \code{X} is transposed before it is passed to \code{FUN}.}
48 54
 
49 55
 \item{rows}{A \code{\link{vector}} indicating the subset (and/or
... ...
@@ -78,8 +84,8 @@ mat <- matrix(rnorm(20), nrow = 5, ncol = 4)
78 84
 }
79 85
 \seealso{
80 86
 \itemize{
81
-\item \code{matrixStats::\link[matrixStats]{rowAvgsPerColSet}()} and
82
-\code{matrixStats::\link[matrixStats:rowAvgsPerColSet]{colAvgsPerRowSet}()}
87
+\item \code{matrixStats::\link[matrixStats:rowAvgsPerColSet]{rowAvgsPerColSet}()}
88
+and \code{matrixStats::\link[matrixStats:rowAvgsPerColSet]{colAvgsPerRowSet}()}
83 89
 which are used when the input is a \code{matrix} or \code{numeric} vector.
84 90
 }
85 91
 }
... ...
@@ -50,8 +50,8 @@ mat <- matrix(rnorm(15), nrow = 5, ncol = 3)
50 50
 }
51 51
 \seealso{
52 52
 \itemize{
53
-\item \code{matrixStats::\link[matrixStats]{rowCollapse}()} and
54
-\code{matrixStats::\link[matrixStats:rowCollapse]{colCollapse}()}
53
+\item \code{matrixStats::\link[matrixStats:rowCollapse]{rowCollapse}()}
54
+and \code{matrixStats::\link[matrixStats:rowCollapse]{colCollapse}()}
55 55
 which are used when the input is a \code{matrix} or \code{numeric} vector.
56 56
 }
57 57
 }
... ...
@@ -41,13 +41,9 @@ done.}
41 41
 are excluded first, otherwise not.}
42 42
 
43 43
 \item{type}{An integer specifying the type of estimator. See
44
-\code{stats::\link[stats]{quantile}()}. for more details. Note, that this
45
-is not a generic argument and not all implementation of this function have
46
-to provide it.}
44
+\code{stats::\link[stats]{quantile}()}. for more details.}
47 45
 
48
-\item{drop}{If \code{TRUE} a vector is returned if \code{J == 1}.
49
-Note, that this is not a generic argument and not all implementation of
50
-this function have to provide it.}
46
+\item{drop}{If \code{TRUE} a vector is returned if \code{J == 1}.}
51 47
 }
52 48
 \value{
53 49
 a \code{\link{numeric}} \code{NxJ} (\code{KxJ})
... ...
@@ -404,12 +404,9 @@ for(idx in seq_along(matrix_list)){
404 404
 
405 405
   test_that("colAvgsPerRowSet works", {
406 406
     S <-  suppressWarnings(matrix(seq_len(nrow(mat)), ncol = 2))
407
-    expect_equal(colAvgsPerRowSet(sp_mat, S = S, na.rm = TRUE), matrixStats::colAvgsPerRowSet(mat, S = S))
408
-    expect_equal(colAvgsPerRowSet(sp_mat, S = S, FUN = colVarDiffs, na.rm = TRUE), matrixStats::colAvgsPerRowSet(mat, S = S, FUN = colVarDiffs))
409
-    if(ncol(mat) == 1 || length(col_subset) == 1){
410
-      skip("matrixStats has a bug in colAvgsPerRowSet if the input has exactly one column")
411
-    }
412
-    expect_equal(colAvgsPerRowSet(sp_mat, S = S, na.rm = TRUE, cols = col_subset), matrixStats::colAvgsPerRowSet(mat, S = S, cols = col_subset))
407
+    expect_equal(colAvgsPerRowSet(sp_mat, S = S), matrixStats::colAvgsPerRowSet(mat, S = S))
408
+    expect_equal(colAvgsPerRowSet(sp_mat, S = S, FUN = colVarDiffs, na.rm = FALSE), matrixStats::colAvgsPerRowSet(mat, S = S, FUN = colVarDiffs, na.rm = FALSE))
409
+    expect_equal(colAvgsPerRowSet(sp_mat, S = S, na.rm = FALSE, cols = col_subset), matrixStats::colAvgsPerRowSet(mat, S = S, na.rm = FALSE, cols = col_subset))
413 410
   })
414 411
 
415 412
 }
... ...
@@ -285,9 +285,9 @@ test_that("rowCollapse works", {
285 285
 
286 286
 test_that("rowAvgsPerColSet works", {
287 287
   S <-  suppressWarnings(matrix(seq_len(ncol(mat)), ncol = 2))
288
-  expect_equal(rowAvgsPerColSet(sp_mat, S = S, na.rm = TRUE), matrixStats::rowAvgsPerColSet(mat, S = S))
289
-  expect_equal(rowAvgsPerColSet(sp_mat, S = S, FUN = rowVarDiffs, na.rm = TRUE), matrixStats::rowAvgsPerColSet(mat, S = S, FUN = rowVarDiffs))
290
-  expect_equal(rowAvgsPerColSet(sp_mat, S = S, na.rm = TRUE, rows = row_subset), matrixStats::rowAvgsPerColSet(mat, S = S, rows = row_subset))
288
+  expect_equal(rowAvgsPerColSet(sp_mat, S = S, na.rm = TRUE), matrixStats::rowAvgsPerColSet(mat, S = S, na.rm = TRUE))
289
+  expect_equal(rowAvgsPerColSet(sp_mat, S = S, FUN = rowVarDiffs, na.rm = FALSE), matrixStats::rowAvgsPerColSet(mat, S = S, FUN = rowVarDiffs, na.rm = FALSE))
290
+  expect_equal(rowAvgsPerColSet(sp_mat, S = S, na.rm = FALSE, rows = row_subset), matrixStats::rowAvgsPerColSet(mat, S = S, na.rm = FALSE, rows = row_subset))
291 291
 })
292 292
 
293 293