... | ... |
@@ -1,5 +1,10 @@ |
1 | 1 |
#include "BSseq.h" |
2 | 2 |
|
3 |
+#include "beachmat/integer_matrix.h" |
|
4 |
+#include "beachmat/numeric_matrix.h" |
|
5 |
+ |
|
6 |
+#include "utils.h" |
|
7 |
+ |
|
3 | 8 |
// NOTE: Returning Rcpp::CharacterVector rather than throwing an error because |
4 | 9 |
// this function is used within a validity method. |
5 | 10 |
|
- BSseq objects can once again use ordinary matrix objects as assays.
- Reimplement `BSmooth()` more-or-less from scratch:
- Switch from 'parallel' to 'BiocParallel' for parallelization. This brings some notable improvements:
- Smoothed results can now be written directly to an on-disk realization backend by the worker. This dramatically reduces memory usage compared to previous versions of 'bsseq' that required all results be retained in-memory.
- Parallelization is now supported on Windows through the use of a 'SnowParam' object as the value of `BPPARAM`.
- Improved error handling makes it possible to gracefully resume `BSmooth()` jobs that encountered errors by re-doing only the necessary tasks.
- Detailed and extensive job logging facilities.
- Fix bug in `BSmooth()` with the `maxGap` parameter.
- Re-factor BSseq() constructor and add fast, internal .BSseq() constructor.
- Re-factor collapseBSseq() and combine(). Should be much more performant.
- Use beachmat to implement fast validity checking of 'M' and 'Cov' matrices.
- Resave BS.chr22 (supplied data) using integer for storage mode of assays to reduce size.
- Switch from RUnit to testthat. testthat has better integration with code coverage tools that help when refactoring.
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,107 @@ |
1 |
+#include "BSseq.h" |
|
2 |
+ |
|
3 |
+// NOTE: Returning Rcpp::CharacterVector rather than throwing an error because |
|
4 |
+// this function is used within a validity method. |
|
5 |
+ |
|
6 |
+template <class M_column_class, class Cov_column_class, class M_class, |
|
7 |
+ class Cov_class> |
|
8 |
+Rcpp::RObject check_M_and_Cov_internal(M_class M_bm, Cov_class Cov_bm) { |
|
9 |
+ BEGIN_RCPP |
|
10 |
+ |
|
11 |
+ // Get the dimensions of 'M' and 'Cov' and check these are compatible. |
|
12 |
+ const size_t M_nrow = M_bm->get_nrow(); |
|
13 |
+ const size_t Cov_nrow = Cov_bm->get_nrow(); |
|
14 |
+ if (M_nrow != Cov_nrow) { |
|
15 |
+ return Rcpp::CharacterVector( |
|
16 |
+ "'M' and 'Cov' must have the same number of rows."); |
|
17 |
+ } |
|
18 |
+ const size_t M_ncol = M_bm->get_ncol(); |
|
19 |
+ const size_t Cov_ncol = Cov_bm->get_ncol(); |
|
20 |
+ if (M_ncol != Cov_ncol) { |
|
21 |
+ return Rcpp::CharacterVector( |
|
22 |
+ "'M' and 'Cov' must have the same number of columns."); |
|
23 |
+ } |
|
24 |
+ |
|
25 |
+ // Simultaneously loop over columns of 'M' and 'Cov', checking that |
|
26 |
+ // `all(0 <= M <= Cov) && !anyNA(M) && !anyNA(Cov)` && all(is.finite(Cov)). |
|
27 |
+ M_column_class M_column(M_nrow); |
|
28 |
+ Cov_column_class Cov_column(Cov_nrow); |
|
29 |
+ for (size_t j = 0; j < M_ncol; ++j) { |
|
30 |
+ // Copy the j-th column of M to M_column and the j-th column of Cov to |
|
31 |
+ // Cov_column |
|
32 |
+ M_bm->get_col(j, M_column.begin()); |
|
33 |
+ Cov_bm->get_col(j, Cov_column.begin()); |
|
34 |
+ // Construct iterators |
|
35 |
+ // NOTE: Iterators constructed outside of loop because they may be of |
|
36 |
+ // different type, which is not supported within a for loop |
|
37 |
+ // constructor. |
|
38 |
+ auto M_column_it = M_column.begin(); |
|
39 |
+ auto Cov_column_it = Cov_column.begin(); |
|
40 |
+ for (M_column_it = M_column.begin(), Cov_column_it = Cov_column.begin(); |
|
41 |
+ M_column_it != M_column.end(); |
|
42 |
+ ++M_column_it, ++Cov_column_it) { |
|
43 |
+ if (isNA(*M_column_it)) { |
|
44 |
+ return Rcpp::CharacterVector("'M' must not contain NAs."); |
|
45 |
+ } |
|
46 |
+ if (isNA(*Cov_column_it)) { |
|
47 |
+ return Rcpp::CharacterVector("'Cov' must not contain NAs."); |
|
48 |
+ } |
|
49 |
+ if (*M_column_it < 0) { |
|
50 |
+ return Rcpp::CharacterVector( |
|
51 |
+ "'M' must not contain negative values."); |
|
52 |
+ } |
|
53 |
+ if (*M_column_it > *Cov_column_it) { |
|
54 |
+ return Rcpp::CharacterVector( |
|
55 |
+ "All values of 'M' must be less than or equal to the corresponding value of 'Cov'."); |
|
56 |
+ } |
|
57 |
+ if (!R_FINITE(*Cov_column_it)) { |
|
58 |
+ return Rcpp::CharacterVector("All values of 'Cov' must be finite."); |
|
59 |
+ } |
|
60 |
+ } |
|
61 |
+ } |
|
62 |
+ |
|
63 |
+ return R_NilValue; |
|
64 |
+ END_RCPP |
|
65 |
+} |
|
66 |
+ |
|
67 |
+SEXP check_M_and_Cov(SEXP M, SEXP Cov) { |
|
68 |
+ BEGIN_RCPP |
|
69 |
+ |
|
70 |
+ // Get the type of 'M' and 'Cov', |
|
71 |
+ int M_type = beachmat::find_sexp_type(M); |
|
72 |
+ int Cov_type = beachmat::find_sexp_type(Cov); |
|
73 |
+ if (M_type == INTSXP && Cov_type == INTSXP) { |
|
74 |
+ auto M_bm = beachmat::create_integer_matrix(M); |
|
75 |
+ auto Cov_bm = beachmat::create_integer_matrix(Cov); |
|
76 |
+ return check_M_and_Cov_internal< |
|
77 |
+ Rcpp::IntegerVector, Rcpp::IntegerVector>(M_bm.get(), Cov_bm.get()); |
|
78 |
+ } else if (M_type == REALSXP && Cov_type == REALSXP) { |
|
79 |
+ auto M_bm = beachmat::create_numeric_matrix(M); |
|
80 |
+ auto Cov_bm = beachmat::create_numeric_matrix(Cov); |
|
81 |
+ return check_M_and_Cov_internal< |
|
82 |
+ Rcpp::NumericVector, Rcpp::NumericVector>(M_bm.get(), Cov_bm.get()); |
|
83 |
+ } else if (M_type == INTSXP && Cov_type == REALSXP) { |
|
84 |
+ auto M_bm = beachmat::create_integer_matrix(M); |
|
85 |
+ auto Cov_bm = beachmat::create_numeric_matrix(Cov); |
|
86 |
+ return check_M_and_Cov_internal< |
|
87 |
+ Rcpp::IntegerVector, Rcpp::NumericVector>(M_bm.get(), Cov_bm.get()); |
|
88 |
+ } else if (M_type == REALSXP && Cov_type == INTSXP) { |
|
89 |
+ auto M_bm = beachmat::create_numeric_matrix(M); |
|
90 |
+ auto Cov_bm = beachmat::create_integer_matrix(Cov); |
|
91 |
+ return check_M_and_Cov_internal< |
|
92 |
+ Rcpp::NumericVector, Rcpp::IntegerVector>(M_bm.get(), Cov_bm.get()); |
|
93 |
+ } |
|
94 |
+ else { |
|
95 |
+ return Rcpp::CharacterVector( |
|
96 |
+ "'M' and 'Cov' must contain integer or numeric values."); |
|
97 |
+ } |
|
98 |
+ END_RCPP |
|
99 |
+} |
|
100 |
+// TODOs ----------------------------------------------------------------------- |
|
101 |
+ |
|
102 |
+// TODO: Add code path to process ordinary R vectors (for use within |
|
103 |
+// read.bismark() funcionality)? |
|
104 |
+// TODO: Wasn't able to figure out how to use get_const_col. See |
|
105 |
+// https://gist.github.com/PeteHaitch/cf6cffd40d4f8bd7082eec1b0f330082 |
|
106 |
+// and note that `check_M_and_Cov_using_const_columns` never fails when |
|
107 |
+// matrix input contains NA |