Browse code

refactor: rewrite the peak extraction function for pwiz

- Rewrite the C++ code for peak data extraction with the pwiz backend.
- Fix unit tests.

jorainer authored on 26/09/2019 10:09:04
Showing 7 changed files

... ...
@@ -2,6 +2,8 @@ CHANGES IN VERSION 2.19.5
2 2
 -------------------------
3 3
  o header for the pwiz backend returns NA instead of 0 for not defined or
4 4
    missing information <2019-09-24 Tue>.
5
+ o peaks for pwiz backend rewritten (small performance improvement)
6
+   <2019-09-26 Thu>.
5 7
 
6 8
 CHANGES IN VERSION 2.19.4
7 9
 -------------------------
... ...
@@ -81,19 +81,12 @@ setMethod("spectra", "mzRpwiz",
81 81
 
82 82
 setMethod("peaksCount", c("mzRpwiz", "numeric"),
83 83
           function(object, scans) {
84
-            if (length(scans) == 1) {
85
-              return(object@backend$getPeakList(scans)$peaksCount)
86
-            } else {
87
-                return(sapply(scans,
88
-                              function(x)
89
-                                  object@backend$getPeakList(x)$peaksCount))
90
-            }
84
+              lengths(object@backend$getPeakList(scans))/2
91 85
           })
92 86
 
93 87
 setMethod("peaksCount", c("mzRpwiz", "missing"),
94 88
           function(object) {
95
-            n <- length(object)
96
-            return(peaksCount(object, 1:n))
89
+            peaksCount(object, seq_along(object))
97 90
           })
98 91
 
99 92
 setMethod("runInfo", "mzRpwiz",
... ...
@@ -18,18 +18,18 @@ setMethod("length",
18 18
           function(x) return(x@backend$getLastScan()))
19 19
 
20 20
 setMethod("peaks", "mzRramp",
21
-          function(object, scans) .peaks(object, scans))
21
+          function(object, scans) .peaks_ramp(object, scans))
22 22
 
23 23
 setMethod("spectra", "mzRramp",
24
-          function(object, scans) .peaks(object, scans))
24
+          function(object, scans) .peaks_ramp(object, scans))
25 25
 
26 26
 setMethod("peaksCount",
27 27
           signature=c("mzRramp","numeric"),
28 28
           function(object,scans) {
29 29
               if (length(scans)==1) {
30
-                  return(object@backend$getPeakList(scans)$peaksCount)
30
+                  object@backend$getPeakList(scans)$peaksCount
31 31
               } else {
32
-                  return(sapply(scans,function(x) object@backend$getPeakList(x)$peaksCount))
32
+                  sapply(scans,function(x) object@backend$getPeakList(x)$peaksCount)
33 33
               }
34 34
           })
35 35
 
... ...
@@ -1,20 +1,26 @@
1 1
 .peaks <- function(object, scans) {
2
+    if (missing(scans))
3
+        scans <- 1:length(object)
4
+    res <- object@backend$getPeakList(scans)
5
+    if (length(res) == 1)
6
+        res[[1]]
7
+    else res
8
+}
9
+
10
+.peaks_ramp <- function(object, scans) {
2 11
     if (missing(scans))
3 12
         scans <- 1:length(object)
4 13
     if (length(scans) == 1) {
5
-        return(object@backend$getPeakList(scans)$peaks)
14
+        object@backend$getPeakList(scans)$peaks
6 15
     } else {
7
-        return(sapply(scans,
8
-                      function(x) object@backend$getPeakList(x)$peaks,
9
-                      simplify = FALSE))
16
+        sapply(scans, function(x) object@backend$getPeakList(x)$peaks,
17
+               simplify = FALSE)
10 18
     }
11 19
 }
12 20
 
13
-
14 21
 setMethod("isolationWindow", "character",
15 22
           function(object, ...) .isolationWindow(object, ...))
16 23
 
17
-
18 24
 .isolationWindow <- function(x, unique. = TRUE, simplify = TRUE) {
19 25
     stopifnot(all(file.exists(x)))
20 26
     if (!requireNamespace("XML"))
... ...
@@ -128,7 +128,7 @@
128 128
   \code{isolationWindowUpperOffset}, \code{scanWindowLowerLimit} and
129 129
   \code{scanWindowUpperLimit}. If multiple scans are queried, a
130 130
   \code{data.frame} is returned with the scans reported along the
131
-  rows.
131
+  rows. For missing or not defined spectrum variables \code{NA} is reported.
132 132
 
133 133
   The \code{get3Dmap} function performs a simple resampling between
134 134
   \code{lowMz} and \code{highMz} with \code{reMz} resolution. A matrix
... ...
@@ -38,43 +38,6 @@ void RcppPwiz::close()
38 38
     }
39 39
 }
40 40
 
41
-
42
-// void RcppPwiz::writeMSfile(const string& file, const string& format)
43
-// {
44
-//     if (msd != NULL)
45
-//     {
46
-//         if(format == "mgf")
47
-//         {
48
-//             std::ofstream* mgfOutFileP = new std::ofstream(file.c_str());
49
-//             Serializer_MGF serializerMGF;
50
-//             serializerMGF.write(*mgfOutFileP, *msd);
51
-//             mgfOutFileP->flush();
52
-//             mgfOutFileP->close();
53
-//         }
54
-//         else if(format == "mzxml")
55
-//         {
56
-//             std::ofstream mzXMLOutFileP(file.c_str());
57
-//             Serializer_mzXML::Config config;
58
-//             config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib;
59
-//             Serializer_mzXML serializerMzXML(config);
60
-//             serializerMzXML.write(mzXMLOutFileP, *msd);
61
-//         }
62
-//         else if(format == "mzml")
63
-//         {
64
-//             std::ofstream mzXMLOutFileP(file.c_str());
65
-//             Serializer_mzML::Config config;
66
-//             config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib;
67
-//             Serializer_mzML mzmlSerializer(config);
68
-//             mzmlSerializer.write(mzXMLOutFileP, *msd);
69
-//         }
70
-//         else
71
-//             Rcpp::Rcerr << format << " format not supported! Please try mgf, mzML, mzXML or mz5." << std::endl;
72
-//     }
73
-//     else
74
-//         Rcpp::Rcerr << "No pwiz object available! Please open a file first!" << std::endl;
75
-// }
76
-
77
-
78 41
 string RcppPwiz::getFilename() {
79 42
   return filename;
80 43
 }
... ...
@@ -400,34 +363,43 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) {
400 363
   return Rcpp::DataFrame::create( );
401 364
 }
402 365
 
403
-Rcpp::List RcppPwiz::getPeakList (int whichScan) {
366
+Rcpp::List RcppPwiz::getPeakList(Rcpp::IntegerVector whichScan) {
404 367
   if (msd != NULL) {
405 368
     SpectrumListPtr slp = msd->run.spectrumListPtr;
406
-    if ((whichScan <= 0) || (whichScan > slp->size())) {
407
-      Rprintf("Index whichScan out of bounds [1 ... %d].\n", slp->size());
408
-      return Rcpp::List::create( );
409
-    }
410
-    SpectrumPtr s = slp->spectrum(whichScan - 1, true);
411
-    vector<MZIntensityPair> pairs;
412
-    s->getMZIntensityPairs(pairs);
413
-
414
-    Rcpp::NumericMatrix peaks(pairs.size(), 2);
415
-
416
-    if(pairs.size()!=0) {
417
-      for (int i = 0; i < pairs.size(); i++) {
418
-	MZIntensityPair p = pairs.at(i);
419
-	peaks(i,0) = p.mz;
420
-	peaks(i,1) = p.intensity;
369
+    int n_scans = slp->size();
370
+    int n_want = whichScan.size();
371
+    int current_scan;
372
+    SpectrumPtr sp;
373
+    BinaryDataArrayPtr mzs,ints;
374
+    std::vector<double> data;
375
+    Rcpp::NumericVector data_matrix;
376
+    Rcpp::List res(n_want);
377
+    for (int i = 0; i < n_want; i++) {
378
+      current_scan = whichScan[i];
379
+      if (current_scan < 1 || current_scan > n_scans) {
380
+	Rprintf("Index whichScan out of bounds [1 ... %d].\n", n_scans);
381
+	return Rcpp::List::create( );
421 382
       }
383
+      sp = slp->spectrum(current_scan - 1, true);
384
+      mzs = sp->getMZArray();
385
+      ints = sp->getIntensityArray();
386
+      if (!mzs.get() || !ints.get()) {
387
+	Rcpp::NumericMatrix pks(0, 2);
388
+	res[i] = pks;
389
+	continue;
390
+      }
391
+      if (mzs->data.size() != ints->data.size())
392
+	Rcpp::Rcerr << "Sizes of mz and intensity arrays don't match." << std::endl;
393
+      data = mzs->data;
394
+      data.insert(data.end(), ints->data.begin(), ints->data.end());
395
+      data_matrix = Rcpp::wrap(data);
396
+      data_matrix.attr("dim") = Rcpp::Dimension(ints->data.size(), 2);
397
+      res[i] = data_matrix;
422 398
     }
423
-
424
-    return Rcpp::List::create(
425
-			      Rcpp::_["peaksCount"]  = pairs.size(),
426
-			      Rcpp::_["peaks"]  = peaks
427
-			      ) ;
399
+    return res;
428 400
   }
429 401
   Rf_warningcall(R_NilValue, "pwiz not yet initialized.");
430
-  return Rcpp::List::create( );
402
+  return Rcpp::List::create();
431 403
 }
432 404
 
433 405
 /**
... ...
@@ -98,7 +98,7 @@ public:
98 98
 
99 99
     Rcpp::DataFrame getAllChromatogramHeaderInfo();
100 100
 
101
-    Rcpp::List getPeakList(int whichScan);
101
+    Rcpp::List getPeakList(Rcpp::IntegerVector whichScan);
102 102
 
103 103
     Rcpp::NumericMatrix get3DMap(std::vector<int> scanNumbers, double whichMzLow, double whichMzHigh, double resMz);
104 104