- Rewrite the C++ code for peak data extraction with the pwiz backend.
- Fix unit tests.
... | ... |
@@ -2,6 +2,8 @@ CHANGES IN VERSION 2.19.5 |
2 | 2 |
------------------------- |
3 | 3 |
o header for the pwiz backend returns NA instead of 0 for not defined or |
4 | 4 |
missing information <2019-09-24 Tue>. |
5 |
+ o peaks for pwiz backend rewritten (small performance improvement) |
|
6 |
+ <2019-09-26 Thu>. |
|
5 | 7 |
|
6 | 8 |
CHANGES IN VERSION 2.19.4 |
7 | 9 |
------------------------- |
... | ... |
@@ -81,19 +81,12 @@ setMethod("spectra", "mzRpwiz", |
81 | 81 |
|
82 | 82 |
setMethod("peaksCount", c("mzRpwiz", "numeric"), |
83 | 83 |
function(object, scans) { |
84 |
- if (length(scans) == 1) { |
|
85 |
- return(object@backend$getPeakList(scans)$peaksCount) |
|
86 |
- } else { |
|
87 |
- return(sapply(scans, |
|
88 |
- function(x) |
|
89 |
- object@backend$getPeakList(x)$peaksCount)) |
|
90 |
- } |
|
84 |
+ lengths(object@backend$getPeakList(scans))/2 |
|
91 | 85 |
}) |
92 | 86 |
|
93 | 87 |
setMethod("peaksCount", c("mzRpwiz", "missing"), |
94 | 88 |
function(object) { |
95 |
- n <- length(object) |
|
96 |
- return(peaksCount(object, 1:n)) |
|
89 |
+ peaksCount(object, seq_along(object)) |
|
97 | 90 |
}) |
98 | 91 |
|
99 | 92 |
setMethod("runInfo", "mzRpwiz", |
... | ... |
@@ -18,18 +18,18 @@ setMethod("length", |
18 | 18 |
function(x) return(x@backend$getLastScan())) |
19 | 19 |
|
20 | 20 |
setMethod("peaks", "mzRramp", |
21 |
- function(object, scans) .peaks(object, scans)) |
|
21 |
+ function(object, scans) .peaks_ramp(object, scans)) |
|
22 | 22 |
|
23 | 23 |
setMethod("spectra", "mzRramp", |
24 |
- function(object, scans) .peaks(object, scans)) |
|
24 |
+ function(object, scans) .peaks_ramp(object, scans)) |
|
25 | 25 |
|
26 | 26 |
setMethod("peaksCount", |
27 | 27 |
signature=c("mzRramp","numeric"), |
28 | 28 |
function(object,scans) { |
29 | 29 |
if (length(scans)==1) { |
30 |
- return(object@backend$getPeakList(scans)$peaksCount) |
|
30 |
+ object@backend$getPeakList(scans)$peaksCount |
|
31 | 31 |
} else { |
32 |
- return(sapply(scans,function(x) object@backend$getPeakList(x)$peaksCount)) |
|
32 |
+ sapply(scans,function(x) object@backend$getPeakList(x)$peaksCount) |
|
33 | 33 |
} |
34 | 34 |
}) |
35 | 35 |
|
... | ... |
@@ -1,20 +1,26 @@ |
1 | 1 |
.peaks <- function(object, scans) { |
2 |
+ if (missing(scans)) |
|
3 |
+ scans <- 1:length(object) |
|
4 |
+ res <- object@backend$getPeakList(scans) |
|
5 |
+ if (length(res) == 1) |
|
6 |
+ res[[1]] |
|
7 |
+ else res |
|
8 |
+} |
|
9 |
+ |
|
10 |
+.peaks_ramp <- function(object, scans) { |
|
2 | 11 |
if (missing(scans)) |
3 | 12 |
scans <- 1:length(object) |
4 | 13 |
if (length(scans) == 1) { |
5 |
- return(object@backend$getPeakList(scans)$peaks) |
|
14 |
+ object@backend$getPeakList(scans)$peaks |
|
6 | 15 |
} else { |
7 |
- return(sapply(scans, |
|
8 |
- function(x) object@backend$getPeakList(x)$peaks, |
|
9 |
- simplify = FALSE)) |
|
16 |
+ sapply(scans, function(x) object@backend$getPeakList(x)$peaks, |
|
17 |
+ simplify = FALSE) |
|
10 | 18 |
} |
11 | 19 |
} |
12 | 20 |
|
13 |
- |
|
14 | 21 |
setMethod("isolationWindow", "character", |
15 | 22 |
function(object, ...) .isolationWindow(object, ...)) |
16 | 23 |
|
17 |
- |
|
18 | 24 |
.isolationWindow <- function(x, unique. = TRUE, simplify = TRUE) { |
19 | 25 |
stopifnot(all(file.exists(x))) |
20 | 26 |
if (!requireNamespace("XML")) |
... | ... |
@@ -128,7 +128,7 @@ |
128 | 128 |
\code{isolationWindowUpperOffset}, \code{scanWindowLowerLimit} and |
129 | 129 |
\code{scanWindowUpperLimit}. If multiple scans are queried, a |
130 | 130 |
\code{data.frame} is returned with the scans reported along the |
131 |
- rows. |
|
131 |
+ rows. For missing or not defined spectrum variables \code{NA} is reported. |
|
132 | 132 |
|
133 | 133 |
The \code{get3Dmap} function performs a simple resampling between |
134 | 134 |
\code{lowMz} and \code{highMz} with \code{reMz} resolution. A matrix |
... | ... |
@@ -38,43 +38,6 @@ void RcppPwiz::close() |
38 | 38 |
} |
39 | 39 |
} |
40 | 40 |
|
41 |
- |
|
42 |
-// void RcppPwiz::writeMSfile(const string& file, const string& format) |
|
43 |
-// { |
|
44 |
-// if (msd != NULL) |
|
45 |
-// { |
|
46 |
-// if(format == "mgf") |
|
47 |
-// { |
|
48 |
-// std::ofstream* mgfOutFileP = new std::ofstream(file.c_str()); |
|
49 |
-// Serializer_MGF serializerMGF; |
|
50 |
-// serializerMGF.write(*mgfOutFileP, *msd); |
|
51 |
-// mgfOutFileP->flush(); |
|
52 |
-// mgfOutFileP->close(); |
|
53 |
-// } |
|
54 |
-// else if(format == "mzxml") |
|
55 |
-// { |
|
56 |
-// std::ofstream mzXMLOutFileP(file.c_str()); |
|
57 |
-// Serializer_mzXML::Config config; |
|
58 |
-// config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib; |
|
59 |
-// Serializer_mzXML serializerMzXML(config); |
|
60 |
-// serializerMzXML.write(mzXMLOutFileP, *msd); |
|
61 |
-// } |
|
62 |
-// else if(format == "mzml") |
|
63 |
-// { |
|
64 |
-// std::ofstream mzXMLOutFileP(file.c_str()); |
|
65 |
-// Serializer_mzML::Config config; |
|
66 |
-// config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib; |
|
67 |
-// Serializer_mzML mzmlSerializer(config); |
|
68 |
-// mzmlSerializer.write(mzXMLOutFileP, *msd); |
|
69 |
-// } |
|
70 |
-// else |
|
71 |
-// Rcpp::Rcerr << format << " format not supported! Please try mgf, mzML, mzXML or mz5." << std::endl; |
|
72 |
-// } |
|
73 |
-// else |
|
74 |
-// Rcpp::Rcerr << "No pwiz object available! Please open a file first!" << std::endl; |
|
75 |
-// } |
|
76 |
- |
|
77 |
- |
|
78 | 41 |
string RcppPwiz::getFilename() { |
79 | 42 |
return filename; |
80 | 43 |
} |
... | ... |
@@ -400,34 +363,43 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) { |
400 | 363 |
return Rcpp::DataFrame::create( ); |
401 | 364 |
} |
402 | 365 |
|
403 |
-Rcpp::List RcppPwiz::getPeakList (int whichScan) { |
|
366 |
+Rcpp::List RcppPwiz::getPeakList(Rcpp::IntegerVector whichScan) { |
|
404 | 367 |
if (msd != NULL) { |
405 | 368 |
SpectrumListPtr slp = msd->run.spectrumListPtr; |
406 |
- if ((whichScan <= 0) || (whichScan > slp->size())) { |
|
407 |
- Rprintf("Index whichScan out of bounds [1 ... %d].\n", slp->size()); |
|
408 |
- return Rcpp::List::create( ); |
|
409 |
- } |
|
410 |
- SpectrumPtr s = slp->spectrum(whichScan - 1, true); |
|
411 |
- vector<MZIntensityPair> pairs; |
|
412 |
- s->getMZIntensityPairs(pairs); |
|
413 |
- |
|
414 |
- Rcpp::NumericMatrix peaks(pairs.size(), 2); |
|
415 |
- |
|
416 |
- if(pairs.size()!=0) { |
|
417 |
- for (int i = 0; i < pairs.size(); i++) { |
|
418 |
- MZIntensityPair p = pairs.at(i); |
|
419 |
- peaks(i,0) = p.mz; |
|
420 |
- peaks(i,1) = p.intensity; |
|
369 |
+ int n_scans = slp->size(); |
|
370 |
+ int n_want = whichScan.size(); |
|
371 |
+ int current_scan; |
|
372 |
+ SpectrumPtr sp; |
|
373 |
+ BinaryDataArrayPtr mzs,ints; |
|
374 |
+ std::vector<double> data; |
|
375 |
+ Rcpp::NumericVector data_matrix; |
|
376 |
+ Rcpp::List res(n_want); |
|
377 |
+ for (int i = 0; i < n_want; i++) { |
|
378 |
+ current_scan = whichScan[i]; |
|
379 |
+ if (current_scan < 1 || current_scan > n_scans) { |
|
380 |
+ Rprintf("Index whichScan out of bounds [1 ... %d].\n", n_scans); |
|
381 |
+ return Rcpp::List::create( ); |
|
421 | 382 |
} |
383 |
+ sp = slp->spectrum(current_scan - 1, true); |
|
384 |
+ mzs = sp->getMZArray(); |
|
385 |
+ ints = sp->getIntensityArray(); |
|
386 |
+ if (!mzs.get() || !ints.get()) { |
|
387 |
+ Rcpp::NumericMatrix pks(0, 2); |
|
388 |
+ res[i] = pks; |
|
389 |
+ continue; |
|
390 |
+ } |
|
391 |
+ if (mzs->data.size() != ints->data.size()) |
|
392 |
+ Rcpp::Rcerr << "Sizes of mz and intensity arrays don't match." << std::endl; |
|
393 |
+ data = mzs->data; |
|
394 |
+ data.insert(data.end(), ints->data.begin(), ints->data.end()); |
|
395 |
+ data_matrix = Rcpp::wrap(data); |
|
396 |
+ data_matrix.attr("dim") = Rcpp::Dimension(ints->data.size(), 2); |
|
397 |
+ res[i] = data_matrix; |
|
422 | 398 |
} |
423 |
- |
|
424 |
- return Rcpp::List::create( |
|
425 |
- Rcpp::_["peaksCount"] = pairs.size(), |
|
426 |
- Rcpp::_["peaks"] = peaks |
|
427 |
- ) ; |
|
399 |
+ return res; |
|
428 | 400 |
} |
429 | 401 |
Rf_warningcall(R_NilValue, "pwiz not yet initialized."); |
430 |
- return Rcpp::List::create( ); |
|
402 |
+ return Rcpp::List::create(); |
|
431 | 403 |
} |
432 | 404 |
|
433 | 405 |
/** |
... | ... |
@@ -98,7 +98,7 @@ public: |
98 | 98 |
|
99 | 99 |
Rcpp::DataFrame getAllChromatogramHeaderInfo(); |
100 | 100 |
|
101 |
- Rcpp::List getPeakList(int whichScan); |
|
101 |
+ Rcpp::List getPeakList(Rcpp::IntegerVector whichScan); |
|
102 | 102 |
|
103 | 103 |
Rcpp::NumericMatrix get3DMap(std::vector<int> scanNumbers, double whichMzLow, double whichMzHigh, double resMz); |
104 | 104 |
|