Browse code

Add News

Steffen Neumann authored on 20/11/2021 11:19:02
Showing5 changed files

... ...
@@ -2,7 +2,7 @@ Package: mzR
2 2
 Type: Package
3 3
 Title: parser for netCDF, mzXML, mzData and mzML and mzIdentML files
4 4
        (mass spectrometry data)
5
-Version: 2.27.2
5
+Version: 2.29.2
6 6
 Author: Bernd Fischer, Steffen Neumann, Laurent Gatto, Qiang Kou, Johannes Rainer
7 7
 Maintainer: Steffen Neumann <sneumann@ipb-halle.de>,
8 8
             Laurent Gatto <laurent.gatto@uclouvain.be>,
... ...
@@ -1,3 +1,13 @@
1
+CHANGES IN VERSION 2.29.2
2
+-------------------------
3
+ o Update to Proteowizard 3_0_21263
4
+ o Removed RAMP backend, dropping ability to read mzData
5
+
6
+CHANGES IN VERSION 2.29.1
7
+-------------------------
8
+ o Pwiz backend partially re-written to avoid segfault on macOS
9
+   (https://github.com/sneumann/xcms/issues/422).
10
+
1 11
 CHANGES IN VERSION 2.27.2
2 12
 -------------------------
3 13
  o Remove support for the ramp backend.
... ...
@@ -50,10 +50,12 @@ setMethod("detector", "mzRpwiz",
50 50
 
51 51
 setMethod("header", c("mzRpwiz", "missing"),
52 52
           function(object) {
53
-              res <- object@backend$getAllScanHeaderInfo()
54
-              res$filterString <- as.character(res$filterString)
55
-              res$spectrumId <- as.character(res$spectrumId)
56
-              res
53
+              scans <- seq_len(object@backend$getLastScan())
54
+              ## res <- object@backend$getAllScanHeaderInfo()
55
+              ## res$filterString <- as.character(res$filterString)
56
+              ## res$spectrumId <- as.character(res$spectrumId)
57
+              ## res
58
+              header(object, scans)
57 59
           })
58 60
 
59 61
 setMethod("header", c("mzRpwiz", "numeric"),
... ...
@@ -3,6 +3,7 @@
3 3
 RcppPwiz::RcppPwiz()
4 4
 {
5 5
   msd = NULL;
6
+  nativeIdFormat = CVID_Unknown;
6 7
   instrumentInfo = Rcpp::List::create();
7 8
   chromatogramsInfo = Rcpp::DataFrame::create();
8 9
   isInCacheInstrumentInfo = FALSE;
... ...
@@ -15,12 +16,13 @@ RcppPwiz::~RcppPwiz()
15 16
   RcppPwiz::close();
16 17
 }
17 18
 
18
-void RcppPwiz::open(const string& fileName)
19
+// void RcppPwiz::open(const string& fileName)
20
+void RcppPwiz::open(Rcpp::StringVector fileName)
19 21
 {
20 22
 
21
-  filename = fileName;
22
-  msd = new MSDataFile(fileName);
23
-
23
+  filename = Rcpp::as<std::string>(fileName(0));
24
+  msd = new MSDataFile(filename);
25
+  nativeIdFormat = id::getDefaultNativeIDFormat(*msd);
24 26
 }
25 27
 
26 28
 /* Release all memory on close. */
... ...
@@ -30,6 +32,7 @@ void RcppPwiz::close()
30 32
     {
31 33
       delete msd;
32 34
       msd = NULL;
35
+      nativeIdFormat = CVID_Unknown;
33 36
       instrumentInfo = Rcpp::List::create();
34 37
       chromatogramsInfo = Rcpp::DataFrame::create();
35 38
       isInCacheInstrumentInfo = FALSE;
... ...
@@ -127,12 +130,45 @@ Rcpp::List RcppPwiz::getInstrumentInfo ( )
127 130
   return instrumentInfo;
128 131
 }
129 132
 
133
+int RcppPwiz::getAcquisitionNumber(string id, size_t index) const
134
+{
135
+  // const SpectrumIdentity& si = msd->run.spectrumListPtr->spectrumIdentity(index);
136
+  string scanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, id);
137
+  if (scanNumber.empty())
138
+    return static_cast<int>(index) + 1;
139
+  else
140
+    return lexical_cast<int>(scanNumber);
141
+}
142
+
143
+// Using this function instead of pwiz translateNativeIDToScanNumber because
144
+// it randomly causes segfaults on macOS.
145
+// int RcppPwiz::getAcquisitionNumber(string id, size_t index) const
146
+// {
147
+//   if (id.find("controllerType") != std::string::npos) {
148
+//     if (id.find("controllerType=0 controllerNumber=1") == std::string::npos)
149
+//       return static_cast<int>(index) + 1;
150
+//   }
151
+//   string e;
152
+//   std::smatch match;
153
+//   if (id.find("scan=") != std::string::npos)
154
+//     e ="scan=(\\d+)";
155
+//   else if (id.find("index=") != std::string::npos)
156
+//     e = "index=(\\d+)";
157
+//   else if (id.find("spectrum=") != std::string::npos)
158
+//     e = "spectrum=(\\d+)";
159
+//   else if (id.find("scanId=") != std::string::npos)
160
+//     e = "scanId=(\\d+)";
161
+//   else return static_cast<int>(index) + 1;
162
+//   if (std::regex_search(id, match, std::regex(e)))
163
+//     return lexical_cast<int>(match[1]);
164
+//   else return static_cast<int>(index) + 1;
165
+// }
166
+
130 167
 Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
131 168
   if (msd != NULL) {
132 169
     SpectrumListPtr slp = msd->run.spectrumListPtr;
133
-    int N = slp->size();
134
-    int N_scans = whichScan.size();
135
-    CVID nativeIdFormat = id::getDefaultNativeIDFormat(*msd);
170
+    size_t N = slp->size();
171
+    size_t N_scans = whichScan.size();
136 172
     Rcpp::IntegerVector seqNum(N_scans); // number in sequence observed file (1-based)
137 173
     Rcpp::IntegerVector acquisitionNum(N_scans); // scan number as declared in File (may be gaps)
138 174
     Rcpp::IntegerVector msLevel(N_scans);
... ...
@@ -165,23 +201,20 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
165 201
     Rcpp::NumericVector scanWindowLowerLimit(N_scans);
166 202
     Rcpp::NumericVector scanWindowUpperLimit(N_scans);
167 203
     
168
-    for (int i = 0; i < N_scans; i++) {
204
+    for (size_t i = 0; i < N_scans; i++) {
169 205
       int current_scan = whichScan[i];
170
-      SpectrumPtr sp = slp->spectrum(current_scan - 1, false);
206
+      size_t current_index = static_cast<size_t>(current_scan - 1);
207
+      // SpectrumPtr sp = slp->spectrum(current_index, false);
208
+      SpectrumPtr sp = slp->spectrum(current_index, DetailLevel_FullMetadata);
171 209
       Scan dummy;
172 210
       Scan& scan = sp->scanList.scans.empty() ? dummy : sp->scanList.scans[0];
211
+      if (scan.empty())
212
+	Rprintf("Scan with index %d empty.\n", current_scan);
173 213
       // seqNum
174 214
       seqNum[i] = current_scan;
175
-      // acquisitionNum
176
-      string id = sp->id;
177
-      string scanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, id);
178
-      if (scanNumber.empty()) {
179
-	acquisitionNum[i] = current_scan;
180
-      } else {
181
-	acquisitionNum[i] = lexical_cast<int>(scanNumber);
182
-      }
215
+      acquisitionNum[i] = getAcquisitionNumber(sp->id, current_index);
183 216
       // spectrumId
184
-      spectrumId[i] = sp->id;
217
+      spectrumId[i] = Rcpp::String(sp->id);
185 218
       // msLevel
186 219
       msLevel[i] = sp->cvParam(MS_ms_level).valueAs<int>();
187 220
       // peaksCount
... ...
@@ -231,14 +264,8 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
231 264
 	collisionEnergy[i] = precursor.activation.cvParam(MS_collision_energy).valueAs<double>();
232 265
 	// precursorScanNum
233 266
 	size_t precursorIndex = slp->find(precursor.spectrumID);
234
-	if (precursorIndex < slp->size()) {
235
-	  const SpectrumIdentity& precursorSpectrum = slp->spectrumIdentity(precursorIndex);
236
-	  string precursorScanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, precursorSpectrum.id);
237
-	  if (precursorScanNumber.empty()) {
238
-	    precursorScanNum[i] = precursorIndex + 1;
239
-	  } else {
240
-	    precursorScanNum[i] = lexical_cast<int>(precursorScanNumber);
241
-	  }
267
+	if (precursorIndex < N) {
268
+	  precursorScanNum[i] = getAcquisitionNumber(precursor.spectrumID, precursorIndex);
242 269
 	} else {
243 270
 	  precursorScanNum[i] = NA_INTEGER;
244 271
 	}
... ...
@@ -277,7 +304,7 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
277 304
     
278 305
     Rcpp::List header(31);
279 306
     std::vector<std::string> names;
280
-    int i = 0;
307
+    size_t i = 0;
281 308
     names.push_back("seqNum");
282 309
     header[i++] = Rcpp::wrap(seqNum);
283 310
     names.push_back("acquisitionNum");
... ...
@@ -352,7 +379,7 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) {
352 379
   if (msd != NULL) {
353 380
     if (!isInCacheAllScanHeaderInfo) {
354 381
       SpectrumListPtr slp = msd->run.spectrumListPtr;
355
-      int N = slp->size();
382
+      size_t N = slp->size();
356 383
       
357 384
       allScanHeaderInfo = getScanHeaderInfo(Rcpp::seq(1, N));
358 385
       isInCacheAllScanHeaderInfo = TRUE;	    
... ...
@@ -366,21 +393,23 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) {
366 393
 Rcpp::List RcppPwiz::getPeakList(Rcpp::IntegerVector whichScan) {
367 394
   if (msd != NULL) {
368 395
     SpectrumListPtr slp = msd->run.spectrumListPtr;
369
-    int n_scans = slp->size();
370
-    int n_want = whichScan.size();
396
+    size_t n_scans = slp->size();
397
+    size_t n_want = whichScan.size();
371 398
     int current_scan;
372 399
     SpectrumPtr sp;
373 400
     BinaryDataArrayPtr mzs,ints;
374 401
     std::vector<double> data;
375 402
     Rcpp::NumericVector data_matrix;
376 403
     Rcpp::List res(n_want);
377
-    for (int i = 0; i < n_want; i++) {
404
+    for (size_t i = 0; i < n_want; i++) {
378 405
       current_scan = whichScan[i];
379 406
       if (current_scan < 1 || current_scan > n_scans) {
380 407
 	Rprintf("Index whichScan out of bounds [1 ... %d].\n", n_scans);
381 408
 	return Rcpp::List::create( );
382 409
       }
383
-      sp = slp->spectrum(current_scan - 1, true);
410
+      size_t current_index = static_cast<size_t>(current_scan - 1);
411
+      // sp = slp->spectrum(current_index, true);
412
+      sp = slp->spectrum(current_index, DetailLevel_FullData);
384 413
       mzs = sp->getMZArray();
385 414
       ints = sp->getIntensityArray();
386 415
       if (!mzs.get() || !ints.get()) {
... ...
@@ -847,7 +876,7 @@ Rcpp::DataFrame RcppPwiz::getChromatogramsInfo( int whichChrom )
847 876
 Rcpp::DataFrame RcppPwiz::getChromatogramHeaderInfo (Rcpp::IntegerVector whichChrom)
848 877
 {
849 878
   if (msd != NULL) {
850
-    CVID nativeIdFormat_ = id::getDefaultNativeIDFormat(*msd);
879
+    // CVID nativeIdFormat_ = id::getDefaultNativeIDFormat(*msd);
851 880
     ChromatogramListPtr clp = msd->run.chromatogramListPtr;
852 881
     if (clp.get() == 0) {
853 882
       Rf_warningcall(R_NilValue, "The direct support for chromatogram info is only available in mzML format.");
... ...
@@ -980,7 +1009,7 @@ Rcpp::NumericMatrix RcppPwiz::get3DMap ( std::vector<int> scanNumbers, double wh
980 1009
       //Rprintf("%d\n",1);
981 1010
       for (int i = 0; i < scanNumbers.size(); i++)
982 1011
         {
983
-	  SpectrumPtr s = slp->spectrum(scanNumbers[i] - 1, true);
1012
+	  SpectrumPtr s = slp->spectrum(scanNumbers[i] - 1, DetailLevel_FullMetadata);
984 1013
 	  vector<MZIntensityPair> pairs;
985 1014
 	  s->getMZIntensityPairs(pairs);
986 1015
 
... ...
@@ -28,6 +28,7 @@
28 28
 #include <fstream>
29 29
 #include <string>
30 30
 #include <iostream>
31
+#include <regex>
31 32
 
32 33
 #if defined(__MINGW32__)
33 34
 #include <windows.h>
... ...
@@ -49,18 +50,21 @@ private:
49 50
     Rcpp::DataFrame allScanHeaderInfo;
50 51
     bool isInCacheAllScanHeaderInfo;
51 52
     string filename;
53
+    CVID nativeIdFormat;
52 54
     void addSpectrumList(MSData& msd,
53 55
 			 Rcpp::DataFrame& spctr_header,
54 56
 			 Rcpp::List& spctr_data,
55 57
 			 bool rtime_seconds);
56 58
     void addDataProcessing(MSData& msd, Rcpp::StringVector soft_proc);
57
-
59
+    int getAcquisitionNumber(string id, size_t index) const;
60
+  
58 61
 public:
59 62
 
60 63
     RcppPwiz();
61 64
     virtual ~RcppPwiz();
62 65
 
63
-    void open(const string& fileNames);
66
+    // void open(const string& fileNames);
67
+    void open(Rcpp::StringVector fileNames);
64 68
     void close();
65 69
     /* void writeMSfile(const string& filenames, const string& format); */
66 70
     void writeSpectrumList(const string& file, const string& format,