Browse code

fix: read full spectra data in get3DMap (issue #269)

jorainer authored on 24/05/2022 07:28:04
Showing 1 changed files
... ...
@@ -1013,7 +1013,7 @@ Rcpp::NumericMatrix RcppPwiz::get3DMap ( std::vector<int> scanNumbers, double wh
1013 1013
       //Rprintf("%d\n",1);
1014 1014
       for (int i = 0; i < scanNumbers.size(); i++)
1015 1015
         {
1016
-	  SpectrumPtr s = slp->spectrum(scanNumbers[i] - 1, DetailLevel_FullMetadata);
1016
+	  SpectrumPtr s = slp->spectrum(scanNumbers[i] - 1, DetailLevel_FullData);
1017 1017
 	  vector<MZIntensityPair> pairs;
1018 1018
 	  s->getMZIntensityPairs(pairs);
1019 1019
 
Browse code

fix: mzML export

- Don't export/write header variables with missing values (issue #266).

jorainer authored on 22/04/2022 13:50:18
Showing 1 changed files
... ...
@@ -725,9 +725,9 @@ void RcppPwiz::addSpectrumList(MSData& msd,
725 725
     Spectrum& spct = *spectrumList->spectra[i];
726 726
     spct.set(MS_ms_level, msLevel[i]);
727 727
     // centroided
728
-    if (centroided[i] != NA_LOGICAL && centroided[i] == TRUE)
728
+    if (!Rcpp::LogicalVector::is_na(centroided[i]) && centroided[i] == TRUE)
729 729
       spct.set(MS_centroid_spectrum);
730
-    if (centroided[i] != NA_LOGICAL && centroided[i] == FALSE)
730
+    if (!Rcpp::LogicalVector::is_na(centroided[i]) && centroided[i] == FALSE)
731 731
       spct.set(MS_profile_spectrum);
732 732
     // [X] polarity
733 733
     if (polarity[i] == 0)
... ...
@@ -765,12 +765,13 @@ void RcppPwiz::addSpectrumList(MSData& msd,
765 765
     if (!Rcpp::StringVector::is_na(filterString[i]))
766 766
       spct_scan.set(MS_filter_string, filterString[i]);
767 767
 
768
-    if (ionMobilityDriftTime[i] != NA_REAL)
768
+    if (!Rcpp::NumericVector::is_na(ionMobilityDriftTime[i]))
769 769
       spct_scan.set(MS_ion_mobility_drift_time, ionMobilityDriftTime[i],
770 770
 		    UO_millisecond);
771 771
 
772 772
     // scanWindow
773
-    if (scanWindowLowerLimit[i] != NA_REAL && scanWindowUpperLimit[i] != NA_REAL) {
773
+    if (!Rcpp::NumericVector::is_na(scanWindowLowerLimit[i]) &&
774
+	!Rcpp::NumericVector::is_na(scanWindowUpperLimit[i])) {
774 775
       spct_scan.scanWindows.push_back(ScanWindow(scanWindowLowerLimit[i], scanWindowUpperLimit[i], MS_m_z));
775 776
     }
776 777
     // MSn - precursor:
... ...
@@ -804,7 +805,7 @@ void RcppPwiz::addSpectrumList(MSData& msd,
804 805
 	prec.spectrumID = spectrumId[precursor_idx];
805 806
       }
806 807
       // isolation window
807
-      if (isolationWindowTargetMZ[i] != NA_REAL) {
808
+      if (!Rcpp::NumericVector::is_na(isolationWindowTargetMZ[i])) {
808 809
 	prec.isolationWindow.set(MS_isolation_window_target_m_z, isolationWindowTargetMZ[i]);
809 810
 	prec.isolationWindow.set(MS_isolation_window_lower_offset, isolationWindowLowerOffset[i]);
810 811
 	prec.isolationWindow.set(MS_isolation_window_upper_offset, isolationWindowUpperOffset[i]);
jorainer authored on 18/03/2022 13:47:56
Showing 0 changed files
Browse code

feat: pwiz returns peak matrices with column names

- `peaks` method for the proteowizard backend also peaks matrices with colnames
set.

jorainer authored on 18/03/2022 10:38:45
Showing 1 changed files
... ...
@@ -374,6 +374,7 @@ Rcpp::List RcppPwiz::getPeakList(Rcpp::IntegerVector whichScan) {
374 374
     std::vector<double> data;
375 375
     Rcpp::NumericVector data_matrix;
376 376
     Rcpp::List res(n_want);
377
+    Rcpp::CharacterVector cn = Rcpp::CharacterVector::create("mz", "intensity");
377 378
     for (int i = 0; i < n_want; i++) {
378 379
       current_scan = whichScan[i];
379 380
       if (current_scan < 1 || current_scan > n_scans) {
... ...
@@ -385,6 +386,7 @@ Rcpp::List RcppPwiz::getPeakList(Rcpp::IntegerVector whichScan) {
385 386
       ints = sp->getIntensityArray();
386 387
       if (!mzs.get() || !ints.get()) {
387 388
 	Rcpp::NumericMatrix pks(0, 2);
389
+	Rcpp::colnames(pks) = cn;
388 390
 	res[i] = pks;
389 391
 	continue;
390 392
       }
... ...
@@ -394,6 +396,7 @@ Rcpp::List RcppPwiz::getPeakList(Rcpp::IntegerVector whichScan) {
394 396
       data.insert(data.end(), ints->data.begin(), ints->data.end());
395 397
       data_matrix = Rcpp::wrap(data);
396 398
       data_matrix.attr("dim") = Rcpp::Dimension(ints->data.size(), 2);
399
+      Rcpp::colnames(data_matrix) = cn;
397 400
       res[i] = data_matrix;
398 401
     }
399 402
     return res;
Browse code

Fix Warning from Wparentheses

Steffen Neumann authored on 26/01/2022 20:17:58
Showing 1 changed files
... ...
@@ -70,7 +70,7 @@ Rcpp::List RcppPwiz::getInstrumentInfo ( )
70 70
       if (!isInCacheInstrumentInfo)
71 71
         {
72 72
 
73
-	  vector<InstrumentConfigurationPtr> icp = msd->instrumentConfigurationPtrs; // NULL for mzData	    
73
+	  vector<InstrumentConfigurationPtr> icp = msd->instrumentConfigurationPtrs; // NULL for mzData
74 74
 	  if (icp.size() != 0)
75 75
             {
76 76
 	      CVTranslator cvTranslator;
... ...
@@ -88,13 +88,13 @@ Rcpp::List RcppPwiz::getInstrumentInfo ( )
88 88
 	      // defined.
89 89
 	      // Have to use try-catch
90 90
 	      try {
91
-		ionisation = std::string(adapter.ionisation());		  
91
+		ionisation = std::string(adapter.ionisation());
92 92
 	      } catch(...) {}
93 93
 	      try {
94
-		analyzer = std::string(adapter.analyzer());		  
94
+		analyzer = std::string(adapter.analyzer());
95 95
 	      } catch(...) {}
96 96
 	      try {
97
-		detector = std::string(adapter.detector());		  
97
+		detector = std::string(adapter.detector());
98 98
 	      } catch(...) {}
99 99
 	      instrumentInfo = Rcpp::List::create(
100 100
 						  Rcpp::_["manufacturer"]  = std::string(adapter.manufacturer()),
... ...
@@ -200,7 +200,7 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
200 200
     Rcpp::NumericVector isolationWindowUpperOffset(N_scans);
201 201
     Rcpp::NumericVector scanWindowLowerLimit(N_scans);
202 202
     Rcpp::NumericVector scanWindowUpperLimit(N_scans);
203
-    
203
+
204 204
     for (size_t i = 0; i < N_scans; i++) {
205 205
       int current_scan = whichScan[i];
206 206
       size_t current_index = static_cast<size_t>(current_scan - 1);
... ...
@@ -236,7 +236,7 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
236 236
       polarity[i] = (param.cvid==MS_negative_scan ? 0 : (param.cvid==MS_positive_scan ? +1 : -1 ) );
237 237
       // centroided
238 238
       param = sp->cvParamChild(MS_spectrum_representation);
239
-      centroided[i] = (param.cvid==MS_centroid_spectrum ? TRUE : (param.cvid==MS_profile_spectrum ? FALSE : NA_LOGICAL));      
239
+      centroided[i] = (param.cvid==MS_centroid_spectrum ? TRUE : (param.cvid==MS_profile_spectrum ? FALSE : NA_LOGICAL));
240 240
       // retentionTime
241 241
       retentionTime[i] = scan.cvParam(MS_scan_start_time).timeInSeconds();
242 242
       // ionInjectionTime
... ...
@@ -301,7 +301,7 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
301 301
 	isolationWindowUpperOffset[i] = NA_REAL;
302 302
       }
303 303
     }
304
-    
304
+
305 305
     Rcpp::List header(31);
306 306
     std::vector<std::string> names;
307 307
     size_t i = 0;
... ...
@@ -356,19 +356,19 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
356 356
     names.push_back("centroided");
357 357
     header[i++] = Rcpp::wrap(centroided);
358 358
     names.push_back("ionMobilityDriftTime");
359
-    header[i++] = Rcpp::wrap(ionMobilityDriftTime);      
359
+    header[i++] = Rcpp::wrap(ionMobilityDriftTime);
360 360
     names.push_back("isolationWindowTargetMZ");
361
-    header[i++] = Rcpp::wrap(isolationWindowTargetMZ);      
361
+    header[i++] = Rcpp::wrap(isolationWindowTargetMZ);
362 362
     names.push_back("isolationWindowLowerOffset");
363
-    header[i++] = Rcpp::wrap(isolationWindowLowerOffset);      
363
+    header[i++] = Rcpp::wrap(isolationWindowLowerOffset);
364 364
     names.push_back("isolationWindowUpperOffset");
365
-    header[i++] = Rcpp::wrap(isolationWindowUpperOffset);      
365
+    header[i++] = Rcpp::wrap(isolationWindowUpperOffset);
366 366
     names.push_back("scanWindowLowerLimit");
367
-    header[i++] = Rcpp::wrap(scanWindowLowerLimit);      
367
+    header[i++] = Rcpp::wrap(scanWindowLowerLimit);
368 368
     names.push_back("scanWindowUpperLimit");
369
-    header[i++] = Rcpp::wrap(scanWindowUpperLimit);      
369
+    header[i++] = Rcpp::wrap(scanWindowUpperLimit);
370 370
     header.attr("names") = names;
371
-    
371
+
372 372
     return header;
373 373
   }
374 374
   Rf_warningcall(R_NilValue, "pwiz not yet initialized.");
... ...
@@ -380,9 +380,9 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) {
380 380
     if (!isInCacheAllScanHeaderInfo) {
381 381
       SpectrumListPtr slp = msd->run.spectrumListPtr;
382 382
       size_t N = slp->size();
383
-      
383
+
384 384
       allScanHeaderInfo = getScanHeaderInfo(Rcpp::seq(1, N));
385
-      isInCacheAllScanHeaderInfo = TRUE;	    
385
+      isInCacheAllScanHeaderInfo = TRUE;
386 386
     }
387 387
     return allScanHeaderInfo ;
388 388
   }
... ...
@@ -496,7 +496,7 @@ void RcppPwiz::copyWriteMSfile(const string& file, const string& format,
496 496
       addDataProcessing(newmsd, Rcpp::as<Rcpp::StringVector>(software_processing(sp)));
497 497
     }
498 498
   }
499
-  
499
+
500 500
   // o run
501 501
   // Initialize the run and fill with data from the original file.
502 502
   Run &original_run = msd->run;
... ...
@@ -510,7 +510,7 @@ void RcppPwiz::copyWriteMSfile(const string& file, const string& format,
510 510
   }
511 511
   // Now filling with new data
512 512
   addSpectrumList(newmsd, spctr_header, spctr_data, rtime_seconds);
513
-  
513
+
514 514
   if (format == "mgf") {
515 515
     std::ofstream* mgfOutFileP = new std::ofstream(file.c_str());
516 516
     Serializer_MGF serializerMGF;
... ...
@@ -536,7 +536,7 @@ void RcppPwiz::copyWriteMSfile(const string& file, const string& format,
536 536
   }
537 537
   else
538 538
     Rcpp::Rcerr << format << " format not supported! Please try mgf, mzML, mzXML or mz5." << std::endl;
539
-  
539
+
540 540
   // Cleanup.
541 541
   delete msd;
542 542
 }
... ...
@@ -570,7 +570,7 @@ void RcppPwiz::writeSpectrumList(const string& file, const string& format,
570 570
       addDataProcessing(newmsd, Rcpp::as<Rcpp::StringVector>(software_processing(sp)));
571 571
     }
572 572
   }
573
-  
573
+
574 574
   newmsd.run.id = "Experiment_1";
575 575
 
576 576
   // Now filling with new data
... ...
@@ -607,9 +607,9 @@ void RcppPwiz::writeSpectrumList(const string& file, const string& format,
607 607
  * o soft_proc: is supposed to be a character vector of length >= 2:
608 608
  *   soft_proc[0]: The software name (required).
609 609
  *   soft_proc[1]: The software version (required).
610
- *   soft_proc[2]: The CV ID of the software. Use "MS:-1" if not known, in 
610
+ *   soft_proc[2]: The CV ID of the software. Use "MS:-1" if not known, in
611 611
  *                 which case we are NOT writing the corresponding CV element.
612
- *   soft_proc[3-length]: CV IDs of the processing steps (optional). 
612
+ *   soft_proc[3-length]: CV IDs of the processing steps (optional).
613 613
  */
614 614
 void RcppPwiz::addDataProcessing(MSData& msd, Rcpp::StringVector soft_proc) {
615 615
   SoftwarePtr new_soft(new Software);
... ...
@@ -689,7 +689,7 @@ void RcppPwiz::addSpectrumList(MSData& msd,
689 689
   Rcpp::NumericVector isolationWindowUpperOffset = spctr_header["isolationWindowUpperOffset"];
690 690
   Rcpp::NumericVector scanWindowLowerLimit = spctr_header["scanWindowLowerLimit"];
691 691
   Rcpp::NumericVector scanWindowUpperLimit = spctr_header["scanWindowUpperLimit"];
692
-  
692
+
693 693
   // From MSnbase::Spectrum        Column in the header
694 694
   // msLevel integer               $msLevel
695 695
   // peaksCount integer
... ...
@@ -710,7 +710,7 @@ void RcppPwiz::addSpectrumList(MSData& msd,
710 710
   // precursorIntensity numeric    $precursorIntensity
711 711
   // precursorCharge integer       $precursorCharge
712 712
   // collisionEnergy numeric       $collisionEnergy
713
-  
713
+
714 714
   // Now filling with new data
715 715
   shared_ptr<SpectrumListSimple> spectrumList(new SpectrumListSimple);
716 716
   // Add the default Processing pointer (fix issue #151
... ...
@@ -771,7 +771,7 @@ void RcppPwiz::addSpectrumList(MSData& msd,
771 771
       spct_scan.scanWindows.push_back(ScanWindow(scanWindowLowerLimit[i], scanWindowUpperLimit[i], MS_m_z));
772 772
     }
773 773
     // MSn - precursor:
774
-    if (precursorScanNum[i] > 0 | precursorMZ[i] > 0) {
774
+    if ( (precursorScanNum[i] > 0) | (precursorMZ[i] > 0) ) {
775 775
       // Fill precursor data. This preserves the precursor data even if the
776 776
       // precursor scan is not available (e.g. after MS level filtering).
777 777
       spct.precursors.resize(1);
... ...
@@ -814,7 +814,7 @@ void RcppPwiz::addSpectrumList(MSData& msd,
814 814
     // [X] precursorCharge
815 815
     // [X] precursorIntensity
816 816
     // [ ] mergedScan
817
-    
817
+
818 818
     Rcpp::NumericMatrix spct_vals = spctr_data[i];
819 819
     // mz values
820 820
     Rcpp::NumericVector mz_vals = spct_vals( Rcpp::_, 0);
... ...
@@ -886,7 +886,7 @@ Rcpp::DataFrame RcppPwiz::getChromatogramHeaderInfo (Rcpp::IntegerVector whichCh
886 886
       return Rcpp::DataFrame::create();
887 887
     }
888 888
 
889
-    int N = clp->size();  
889
+    int N = clp->size();
890 890
     int N_chrom = whichChrom.size();
891 891
 
892 892
     Rcpp::StringVector chromatogramId(N_chrom); // the ID from the chrom
... ...
@@ -902,7 +902,7 @@ Rcpp::DataFrame RcppPwiz::getChromatogramHeaderInfo (Rcpp::IntegerVector whichCh
902 902
     Rcpp::NumericVector productIsolationWindowTargetMZ(N_chrom);
903 903
     Rcpp::NumericVector productIsolationWindowLowerOffset(N_chrom);
904 904
     Rcpp::NumericVector productIsolationWindowUpperOffset(N_chrom);
905
-        
905
+
906 906
     for (int i = 0; i < N_chrom; i++) {
907 907
       int current_chrom = whichChrom[i];
908 908
       if (current_chrom < 0 || current_chrom > N) {
... ...
@@ -918,7 +918,7 @@ Rcpp::DataFrame RcppPwiz::getChromatogramHeaderInfo (Rcpp::IntegerVector whichCh
918 918
 	precursorIsolationWindowTargetMZ[i] = ch->precursor.isolationWindow.cvParam(MS_isolation_window_target_m_z).value.empty() ? NA_REAL : ch->precursor.isolationWindow.cvParam(MS_isolation_window_target_m_z).valueAs<double>();
919 919
 	precursorIsolationWindowLowerOffset[i] = ch->precursor.isolationWindow.cvParam(MS_isolation_window_lower_offset).value.empty() ? NA_REAL : ch->precursor.isolationWindow.cvParam(MS_isolation_window_lower_offset).valueAs<double>();
920 920
 	precursorIsolationWindowUpperOffset[i] = ch->precursor.isolationWindow.cvParam(MS_isolation_window_upper_offset).value.empty() ? NA_REAL : ch->precursor.isolationWindow.cvParam(MS_isolation_window_upper_offset).valueAs<double>();
921
-	precursorCollisionEnergy[i] = ch->precursor.activation.cvParam(MS_collision_energy).value.empty() ? NA_REAL : ch->precursor.activation.cvParam(MS_collision_energy).valueAs<double>(); 
921
+	precursorCollisionEnergy[i] = ch->precursor.activation.cvParam(MS_collision_energy).value.empty() ? NA_REAL : ch->precursor.activation.cvParam(MS_collision_energy).valueAs<double>();
922 922
       } else {
923 923
 	precursorIsolationWindowTargetMZ[i] = NA_REAL;
924 924
 	precursorIsolationWindowLowerOffset[i] = NA_REAL;
... ...
@@ -958,7 +958,7 @@ Rcpp::DataFrame RcppPwiz::getChromatogramHeaderInfo (Rcpp::IntegerVector whichCh
958 958
     chromHeader[i++] = Rcpp::wrap(productIsolationWindowLowerOffset);
959 959
     names.push_back("productIsolationWindowUpperOffset");
960 960
     chromHeader[i++] = Rcpp::wrap(productIsolationWindowUpperOffset);
961
-    
961
+
962 962
     chromHeader.attr("names") = names;
963 963
     return chromHeader;
964 964
   }
Browse code

Use pwiz to extract acquisitionNum from scanId

jorainer authored on 27/10/2021 05:52:05
Showing 1 changed files
... ...
@@ -22,9 +22,7 @@ void RcppPwiz::open(Rcpp::StringVector fileName)
22 22
 
23 23
   filename = Rcpp::as<std::string>(fileName(0));
24 24
   msd = new MSDataFile(filename);
25
-  // Better not to guess the native ID format. For mzML/mzXML all should be fine
26
-  // with the default one.
27
-  // nativeIdFormat = id::getDefaultNativeIDFormat(*msd);
25
+  nativeIdFormat = id::getDefaultNativeIDFormat(*msd);
28 26
 }
29 27
 
30 28
 /* Release all memory on close. */
... ...
@@ -132,42 +130,40 @@ Rcpp::List RcppPwiz::getInstrumentInfo ( )
132 130
   return instrumentInfo;
133 131
 }
134 132
 
135
-// int RcppPwiz::getAcquisitionNumber(size_t index) const
136
-// {
137
-//   const SpectrumIdentity& si = msd->run.spectrumListPtr->spectrumIdentity(index);
138
-//   string scanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, si.id);
139
-//   if (scanNumber.empty()) {
140
-//     return static_cast<int>(index) + 1;
141
-//   }
142
-//   else
143
-//     return lexical_cast<int>(scanNumber);
144
-//   // return static_cast<int>(index) + 1;
145
-// }
146
-
147
-// Using this function instead of pwiz translateNativeIDToScanNumber because
148
-// it randomly causes segfaults on macOS.
149 133
 int RcppPwiz::getAcquisitionNumber(string id, size_t index) const
150 134
 {
151
-  if (id.find("controllerType") != std::string::npos) {
152
-    if (id.find("controllerType=0 controllerNumber=1") == std::string::npos)
153
-      return static_cast<int>(index) + 1;
154
-  }
155
-  string e;
156
-  std::smatch match;
157
-  if (id.find("scan=") != std::string::npos)
158
-    e ="scan=(\\d+)";
159
-  else if (id.find("index=") != std::string::npos)
160
-    e = "index=(\\d+)";
161
-  else if (id.find("spectrum=") != std::string::npos)
162
-    e = "spectrum=(\\d+)";
163
-  else if (id.find("scanId=") != std::string::npos)
164
-    e = "scanId=(\\d+)";
165
-  else return static_cast<int>(index) + 1;
166
-  if (std::regex_search(id, match, std::regex(e)))
167
-    return lexical_cast<int>(match[1]);
168
-  else return static_cast<int>(index) + 1;
135
+  // const SpectrumIdentity& si = msd->run.spectrumListPtr->spectrumIdentity(index);
136
+  string scanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, id);
137
+  if (scanNumber.empty())
138
+    return static_cast<int>(index) + 1;
139
+  else
140
+    return lexical_cast<int>(scanNumber);
169 141
 }
170 142
 
143
+// Using this function instead of pwiz translateNativeIDToScanNumber because
144
+// it randomly causes segfaults on macOS.
145
+// int RcppPwiz::getAcquisitionNumber(string id, size_t index) const
146
+// {
147
+//   if (id.find("controllerType") != std::string::npos) {
148
+//     if (id.find("controllerType=0 controllerNumber=1") == std::string::npos)
149
+//       return static_cast<int>(index) + 1;
150
+//   }
151
+//   string e;
152
+//   std::smatch match;
153
+//   if (id.find("scan=") != std::string::npos)
154
+//     e ="scan=(\\d+)";
155
+//   else if (id.find("index=") != std::string::npos)
156
+//     e = "index=(\\d+)";
157
+//   else if (id.find("spectrum=") != std::string::npos)
158
+//     e = "spectrum=(\\d+)";
159
+//   else if (id.find("scanId=") != std::string::npos)
160
+//     e = "scanId=(\\d+)";
161
+//   else return static_cast<int>(index) + 1;
162
+//   if (std::regex_search(id, match, std::regex(e)))
163
+//     return lexical_cast<int>(match[1]);
164
+//   else return static_cast<int>(index) + 1;
165
+// }
166
+
171 167
 Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
172 168
   if (msd != NULL) {
173 169
     SpectrumListPtr slp = msd->run.spectrumListPtr;
Browse code

refactor: patially rewrite code to exctract header info

- Create index list in R instead of C++.
- Use `size_t` instead of `int` where suitable.
- Ensure that correct data types are passed between R and C++.
- Use new (own) implementation to extract the acquisition number from the
spectrum ID instead of `id::translateNativeIDToScanNumber` from proteowizard
to avoid segfaults on macOS (https://github.com/sneumann/xcms/issues/422).

jorainer authored on 25/10/2021 08:58:52 • Steffen Neumann committed on 25/10/2021 15:08:38
Showing 1 changed files
... ...
@@ -3,6 +3,7 @@
3 3
 RcppPwiz::RcppPwiz()
4 4
 {
5 5
   msd = NULL;
6
+  nativeIdFormat = CVID_Unknown;
6 7
   instrumentInfo = Rcpp::List::create();
7 8
   chromatogramsInfo = Rcpp::DataFrame::create();
8 9
   isInCacheInstrumentInfo = FALSE;
... ...
@@ -15,12 +16,15 @@ RcppPwiz::~RcppPwiz()
15 16
   RcppPwiz::close();
16 17
 }
17 18
 
18
-void RcppPwiz::open(const string& fileName)
19
+// void RcppPwiz::open(const string& fileName)
20
+void RcppPwiz::open(Rcpp::StringVector fileName)
19 21
 {
20 22
 
21
-  filename = fileName;
22
-  msd = new MSDataFile(fileName);
23
-
23
+  filename = Rcpp::as<std::string>(fileName(0));
24
+  msd = new MSDataFile(filename);
25
+  // Better not to guess the native ID format. For mzML/mzXML all should be fine
26
+  // with the default one.
27
+  // nativeIdFormat = id::getDefaultNativeIDFormat(*msd);
24 28
 }
25 29
 
26 30
 /* Release all memory on close. */
... ...
@@ -30,6 +34,7 @@ void RcppPwiz::close()
30 34
     {
31 35
       delete msd;
32 36
       msd = NULL;
37
+      nativeIdFormat = CVID_Unknown;
33 38
       instrumentInfo = Rcpp::List::create();
34 39
       chromatogramsInfo = Rcpp::DataFrame::create();
35 40
       isInCacheInstrumentInfo = FALSE;
... ...
@@ -127,12 +132,47 @@ Rcpp::List RcppPwiz::getInstrumentInfo ( )
127 132
   return instrumentInfo;
128 133
 }
129 134
 
135
+// int RcppPwiz::getAcquisitionNumber(size_t index) const
136
+// {
137
+//   const SpectrumIdentity& si = msd->run.spectrumListPtr->spectrumIdentity(index);
138
+//   string scanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, si.id);
139
+//   if (scanNumber.empty()) {
140
+//     return static_cast<int>(index) + 1;
141
+//   }
142
+//   else
143
+//     return lexical_cast<int>(scanNumber);
144
+//   // return static_cast<int>(index) + 1;
145
+// }
146
+
147
+// Using this function instead of pwiz translateNativeIDToScanNumber because
148
+// it randomly causes segfaults on macOS.
149
+int RcppPwiz::getAcquisitionNumber(string id, size_t index) const
150
+{
151
+  if (id.find("controllerType") != std::string::npos) {
152
+    if (id.find("controllerType=0 controllerNumber=1") == std::string::npos)
153
+      return static_cast<int>(index) + 1;
154
+  }
155
+  string e;
156
+  std::smatch match;
157
+  if (id.find("scan=") != std::string::npos)
158
+    e ="scan=(\\d+)";
159
+  else if (id.find("index=") != std::string::npos)
160
+    e = "index=(\\d+)";
161
+  else if (id.find("spectrum=") != std::string::npos)
162
+    e = "spectrum=(\\d+)";
163
+  else if (id.find("scanId=") != std::string::npos)
164
+    e = "scanId=(\\d+)";
165
+  else return static_cast<int>(index) + 1;
166
+  if (std::regex_search(id, match, std::regex(e)))
167
+    return lexical_cast<int>(match[1]);
168
+  else return static_cast<int>(index) + 1;
169
+}
170
+
130 171
 Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
131 172
   if (msd != NULL) {
132 173
     SpectrumListPtr slp = msd->run.spectrumListPtr;
133
-    int N = slp->size();
134
-    int N_scans = whichScan.size();
135
-    CVID nativeIdFormat = id::getDefaultNativeIDFormat(*msd);
174
+    size_t N = slp->size();
175
+    size_t N_scans = whichScan.size();
136 176
     Rcpp::IntegerVector seqNum(N_scans); // number in sequence observed file (1-based)
137 177
     Rcpp::IntegerVector acquisitionNum(N_scans); // scan number as declared in File (may be gaps)
138 178
     Rcpp::IntegerVector msLevel(N_scans);
... ...
@@ -165,23 +205,20 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
165 205
     Rcpp::NumericVector scanWindowLowerLimit(N_scans);
166 206
     Rcpp::NumericVector scanWindowUpperLimit(N_scans);
167 207
     
168
-    for (int i = 0; i < N_scans; i++) {
208
+    for (size_t i = 0; i < N_scans; i++) {
169 209
       int current_scan = whichScan[i];
170
-      SpectrumPtr sp = slp->spectrum(current_scan - 1, false);
210
+      size_t current_index = static_cast<size_t>(current_scan - 1);
211
+      // SpectrumPtr sp = slp->spectrum(current_index, false);
212
+      SpectrumPtr sp = slp->spectrum(current_index, DetailLevel_FullMetadata);
171 213
       Scan dummy;
172 214
       Scan& scan = sp->scanList.scans.empty() ? dummy : sp->scanList.scans[0];
215
+      if (scan.empty())
216
+	Rprintf("Scan with index %d empty.\n", current_scan);
173 217
       // seqNum
174 218
       seqNum[i] = current_scan;
175
-      // acquisitionNum
176
-      string id = sp->id;
177
-      string scanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, id);
178
-      if (scanNumber.empty()) {
179
-	acquisitionNum[i] = current_scan;
180
-      } else {
181
-	acquisitionNum[i] = lexical_cast<int>(scanNumber);
182
-      }
219
+      acquisitionNum[i] = getAcquisitionNumber(sp->id, current_index);
183 220
       // spectrumId
184
-      spectrumId[i] = sp->id;
221
+      spectrumId[i] = Rcpp::String(sp->id);
185 222
       // msLevel
186 223
       msLevel[i] = sp->cvParam(MS_ms_level).valueAs<int>();
187 224
       // peaksCount
... ...
@@ -231,14 +268,8 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
231 268
 	collisionEnergy[i] = precursor.activation.cvParam(MS_collision_energy).valueAs<double>();
232 269
 	// precursorScanNum
233 270
 	size_t precursorIndex = slp->find(precursor.spectrumID);
234
-	if (precursorIndex < slp->size()) {
235
-	  const SpectrumIdentity& precursorSpectrum = slp->spectrumIdentity(precursorIndex);
236
-	  string precursorScanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, precursorSpectrum.id);
237
-	  if (precursorScanNumber.empty()) {
238
-	    precursorScanNum[i] = precursorIndex + 1;
239
-	  } else {
240
-	    precursorScanNum[i] = lexical_cast<int>(precursorScanNumber);
241
-	  }
271
+	if (precursorIndex < N) {
272
+	  precursorScanNum[i] = getAcquisitionNumber(precursor.spectrumID, precursorIndex);
242 273
 	} else {
243 274
 	  precursorScanNum[i] = NA_INTEGER;
244 275
 	}
... ...
@@ -277,7 +308,7 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
277 308
     
278 309
     Rcpp::List header(31);
279 310
     std::vector<std::string> names;
280
-    int i = 0;
311
+    size_t i = 0;
281 312
     names.push_back("seqNum");
282 313
     header[i++] = Rcpp::wrap(seqNum);
283 314
     names.push_back("acquisitionNum");
... ...
@@ -352,7 +383,7 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) {
352 383
   if (msd != NULL) {
353 384
     if (!isInCacheAllScanHeaderInfo) {
354 385
       SpectrumListPtr slp = msd->run.spectrumListPtr;
355
-      int N = slp->size();
386
+      size_t N = slp->size();
356 387
       
357 388
       allScanHeaderInfo = getScanHeaderInfo(Rcpp::seq(1, N));
358 389
       isInCacheAllScanHeaderInfo = TRUE;	    
... ...
@@ -366,21 +397,23 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) {
366 397
 Rcpp::List RcppPwiz::getPeakList(Rcpp::IntegerVector whichScan) {
367 398
   if (msd != NULL) {
368 399
     SpectrumListPtr slp = msd->run.spectrumListPtr;
369
-    int n_scans = slp->size();
370
-    int n_want = whichScan.size();
400
+    size_t n_scans = slp->size();
401
+    size_t n_want = whichScan.size();
371 402
     int current_scan;
372 403
     SpectrumPtr sp;
373 404
     BinaryDataArrayPtr mzs,ints;
374 405
     std::vector<double> data;
375 406
     Rcpp::NumericVector data_matrix;
376 407
     Rcpp::List res(n_want);
377
-    for (int i = 0; i < n_want; i++) {
408
+    for (size_t i = 0; i < n_want; i++) {
378 409
       current_scan = whichScan[i];
379 410
       if (current_scan < 1 || current_scan > n_scans) {
380 411
 	Rprintf("Index whichScan out of bounds [1 ... %d].\n", n_scans);
381 412
 	return Rcpp::List::create( );
382 413
       }
383
-      sp = slp->spectrum(current_scan - 1, true);
414
+      size_t current_index = static_cast<size_t>(current_scan - 1);
415
+      // sp = slp->spectrum(current_index, true);
416
+      sp = slp->spectrum(current_index, DetailLevel_FullData);
384 417
       mzs = sp->getMZArray();
385 418
       ints = sp->getIntensityArray();
386 419
       if (!mzs.get() || !ints.get()) {
... ...
@@ -847,7 +880,7 @@ Rcpp::DataFrame RcppPwiz::getChromatogramsInfo( int whichChrom )
847 880
 Rcpp::DataFrame RcppPwiz::getChromatogramHeaderInfo (Rcpp::IntegerVector whichChrom)
848 881
 {
849 882
   if (msd != NULL) {
850
-    CVID nativeIdFormat_ = id::getDefaultNativeIDFormat(*msd);
883
+    // CVID nativeIdFormat_ = id::getDefaultNativeIDFormat(*msd);
851 884
     ChromatogramListPtr clp = msd->run.chromatogramListPtr;
852 885
     if (clp.get() == 0) {
853 886
       Rf_warningcall(R_NilValue, "The direct support for chromatogram info is only available in mzML format.");
... ...
@@ -980,7 +1013,7 @@ Rcpp::NumericMatrix RcppPwiz::get3DMap ( std::vector<int> scanNumbers, double wh
980 1013
       //Rprintf("%d\n",1);
981 1014
       for (int i = 0; i < scanNumbers.size(); i++)
982 1015
         {
983
-	  SpectrumPtr s = slp->spectrum(scanNumbers[i] - 1, true);
1016
+	  SpectrumPtr s = slp->spectrum(scanNumbers[i] - 1, DetailLevel_FullMetadata);
984 1017
 	  vector<MZIntensityPair> pairs;
985 1018
 	  s->getMZIntensityPairs(pairs);
986 1019
 
Browse code

refactor: rewrite the peak extraction function for pwiz

- Rewrite the C++ code for peak data extraction with the pwiz backend.
- Fix unit tests.

jorainer authored on 26/09/2019 10:09:04
Showing 1 changed files
... ...
@@ -38,43 +38,6 @@ void RcppPwiz::close()
38 38
     }
39 39
 }
40 40
 
41
-
42
-// void RcppPwiz::writeMSfile(const string& file, const string& format)
43
-// {
44
-//     if (msd != NULL)
45
-//     {
46
-//         if(format == "mgf")
47
-//         {
48
-//             std::ofstream* mgfOutFileP = new std::ofstream(file.c_str());
49
-//             Serializer_MGF serializerMGF;
50
-//             serializerMGF.write(*mgfOutFileP, *msd);
51
-//             mgfOutFileP->flush();
52
-//             mgfOutFileP->close();
53
-//         }
54
-//         else if(format == "mzxml")
55
-//         {
56
-//             std::ofstream mzXMLOutFileP(file.c_str());
57
-//             Serializer_mzXML::Config config;
58
-//             config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib;
59
-//             Serializer_mzXML serializerMzXML(config);
60
-//             serializerMzXML.write(mzXMLOutFileP, *msd);
61
-//         }
62
-//         else if(format == "mzml")
63
-//         {
64
-//             std::ofstream mzXMLOutFileP(file.c_str());
65
-//             Serializer_mzML::Config config;
66
-//             config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib;
67
-//             Serializer_mzML mzmlSerializer(config);
68
-//             mzmlSerializer.write(mzXMLOutFileP, *msd);
69
-//         }
70
-//         else
71
-//             Rcpp::Rcerr << format << " format not supported! Please try mgf, mzML, mzXML or mz5." << std::endl;
72
-//     }
73
-//     else
74
-//         Rcpp::Rcerr << "No pwiz object available! Please open a file first!" << std::endl;
75
-// }
76
-
77
-
78 41
 string RcppPwiz::getFilename() {
79 42
   return filename;
80 43
 }
... ...
@@ -400,34 +363,43 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) {
400 363
   return Rcpp::DataFrame::create( );
401 364
 }
402 365
 
403
-Rcpp::List RcppPwiz::getPeakList (int whichScan) {
366
+Rcpp::List RcppPwiz::getPeakList(Rcpp::IntegerVector whichScan) {
404 367
   if (msd != NULL) {
405 368
     SpectrumListPtr slp = msd->run.spectrumListPtr;
406
-    if ((whichScan <= 0) || (whichScan > slp->size())) {
407
-      Rprintf("Index whichScan out of bounds [1 ... %d].\n", slp->size());
408
-      return Rcpp::List::create( );
409
-    }
410
-    SpectrumPtr s = slp->spectrum(whichScan - 1, true);
411
-    vector<MZIntensityPair> pairs;
412
-    s->getMZIntensityPairs(pairs);
413
-
414
-    Rcpp::NumericMatrix peaks(pairs.size(), 2);
415
-
416
-    if(pairs.size()!=0) {
417
-      for (int i = 0; i < pairs.size(); i++) {
418
-	MZIntensityPair p = pairs.at(i);
419
-	peaks(i,0) = p.mz;
420
-	peaks(i,1) = p.intensity;
369
+    int n_scans = slp->size();
370
+    int n_want = whichScan.size();
371
+    int current_scan;
372
+    SpectrumPtr sp;
373
+    BinaryDataArrayPtr mzs,ints;
374
+    std::vector<double> data;
375
+    Rcpp::NumericVector data_matrix;
376
+    Rcpp::List res(n_want);
377
+    for (int i = 0; i < n_want; i++) {
378
+      current_scan = whichScan[i];
379
+      if (current_scan < 1 || current_scan > n_scans) {
380
+	Rprintf("Index whichScan out of bounds [1 ... %d].\n", n_scans);
381
+	return Rcpp::List::create( );
421 382
       }
383
+      sp = slp->spectrum(current_scan - 1, true);
384
+      mzs = sp->getMZArray();
385
+      ints = sp->getIntensityArray();
386
+      if (!mzs.get() || !ints.get()) {
387
+	Rcpp::NumericMatrix pks(0, 2);
388
+	res[i] = pks;
389
+	continue;
390
+      }
391
+      if (mzs->data.size() != ints->data.size())
392
+	Rcpp::Rcerr << "Sizes of mz and intensity arrays don't match." << std::endl;
393
+      data = mzs->data;
394
+      data.insert(data.end(), ints->data.begin(), ints->data.end());
395
+      data_matrix = Rcpp::wrap(data);
396
+      data_matrix.attr("dim") = Rcpp::Dimension(ints->data.size(), 2);
397
+      res[i] = data_matrix;
422 398
     }
423
-
424
-    return Rcpp::List::create(
425
-			      Rcpp::_["peaksCount"]  = pairs.size(),
426
-			      Rcpp::_["peaks"]  = peaks
427
-			      ) ;
399
+    return res;
428 400
   }
429 401
   Rf_warningcall(R_NilValue, "pwiz not yet initialized.");
430
-  return Rcpp::List::create( );
402
+  return Rcpp::List::create();
431 403
 }
432 404
 
433 405
 /**
Browse code

Fix unit tests and documentation

jorainer authored on 24/09/2019 16:50:18
Showing 1 changed files
... ...
@@ -391,7 +391,7 @@ Rcpp::DataFrame RcppPwiz::getAllScanHeaderInfo ( ) {
391 391
       SpectrumListPtr slp = msd->run.spectrumListPtr;
392 392
       int N = slp->size();
393 393
       
394
-      allScanHeaderInfo = getSpectrumHeader(Rcpp::seq(1, N));
394
+      allScanHeaderInfo = getScanHeaderInfo(Rcpp::seq(1, N));
395 395
       isInCacheAllScanHeaderInfo = TRUE;	    
396 396
     }
397 397
     return allScanHeaderInfo ;
Browse code

refactor: avoid usage of RAMPAdapter in pwiz header function

- RcppPwiz::getScanHeaderInfo does no longer use the pwiz RAMPadaptor to get
some spectra header information but extracts all information directly within
the function (making it also considerably faster). Comparison is provided in
issue #205.

jorainer authored on 24/09/2019 12:49:46
Showing 1 changed files
... ...
@@ -164,247 +164,268 @@ Rcpp::List RcppPwiz::getInstrumentInfo ( )
164 164
   return instrumentInfo;
165 165
 }
166 166
 
167
-
168
-Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan)
169
-{
170
-  if (msd != NULL)
171
-    {
172
-      SpectrumListPtr slp = msd->run.spectrumListPtr;
173
-      int N = slp->size();
174
-      
175
-      int N_scans = whichScan.size();
176
-      
177
-      ScanHeaderStruct scanHeader;
178
-      RAMPAdapter * adapter = new  RAMPAdapter(filename);
179
-      Rcpp::IntegerVector seqNum(N_scans); // number in sequence observed file (1-based)
180
-      Rcpp::IntegerVector acquisitionNum(N_scans); // scan number as declared in File (may be gaps)
181
-      Rcpp::IntegerVector msLevel(N_scans);
182
-      Rcpp::IntegerVector polarity(N_scans);
183
-      Rcpp::IntegerVector peaksCount(N_scans);
184
-      Rcpp::NumericVector totIonCurrent(N_scans);
185
-      Rcpp::NumericVector retentionTime(N_scans);        /* in seconds */
186
-      Rcpp::NumericVector basePeakMZ(N_scans);
187
-      Rcpp::NumericVector basePeakIntensity(N_scans);
188
-      Rcpp::NumericVector collisionEnergy(N_scans);
189
-      Rcpp::NumericVector ionisationEnergy(N_scans);
190
-      Rcpp::NumericVector lowMZ(N_scans);
191
-      Rcpp::NumericVector highMZ(N_scans);
192
-      Rcpp::IntegerVector precursorScanNum(N_scans); /* only if MS level > 1 */
193
-      Rcpp::NumericVector precursorMZ(N_scans);  /* only if MS level > 1 */
194
-      Rcpp::IntegerVector precursorCharge(N_scans);  /* only if MS level > 1 */
195
-      Rcpp::NumericVector precursorIntensity(N_scans);  /* only if MS level > 1 */
196
-      //char scanType[SCANTYPE_LENGTH];
197
-      //char activationMethod[SCANTYPE_LENGTH];
198
-      //char possibleCharges[SCANTYPE_LENGTH];
199
-      //int numPossibleCharges;
200
-      //bool possibleChargesArray[CHARGEARRAY_LENGTH]; /* NOTE: does NOT include "precursorCharge" information; only from "possibleCharges" */
201
-      Rcpp::IntegerVector mergedScan(N_scans);  /* only if MS level > 1 */
202
-      Rcpp::IntegerVector mergedResultScanNum(N_scans); /* scan number of the resultant merged scan */
203
-      Rcpp::IntegerVector mergedResultStartScanNum(N_scans); /* smallest scan number of the scanOrigin for merged scan */
204
-      Rcpp::IntegerVector mergedResultEndScanNum(N_scans); /* largest scan number of the scanOrigin for merged scan */
205
-      Rcpp::NumericVector ionInjectionTime(N_scans); /* The time spent filling an ion trapping device*/
206
-      Rcpp::StringVector filterString(N_scans);
207
-      Rcpp::StringVector spectrumId(N_scans);
208
-      Rcpp::LogicalVector centroided(N_scans);
209
-      Rcpp::NumericVector ionMobilityDriftTime(N_scans);
210
-      Rcpp::NumericVector isolationWindowTargetMZ(N_scans);
211
-      Rcpp::NumericVector isolationWindowLowerOffset(N_scans);
212
-      Rcpp::NumericVector isolationWindowUpperOffset(N_scans);
213
-      Rcpp::NumericVector scanWindowLowerLimit(N_scans);
214
-      Rcpp::NumericVector scanWindowUpperLimit(N_scans);
215
-      
216
-      for (int i = 0; i < N_scans; i++)
217
-	{
218
-	  int current_scan = whichScan[i];
219
-	  adapter->getScanHeader(current_scan - 1, scanHeader, false);
220
-	  seqNum[i] = scanHeader.seqNum;
221
-	  acquisitionNum[i] = scanHeader.acquisitionNum;
222
-	  msLevel[i] = scanHeader.msLevel;
223
-	  
224
-	  SpectrumPtr sp = slp->spectrum(current_scan-1, false); // Is TRUE neccessary here ? 
225
-	  Scan dummy;
226
-	  Scan& scan = sp->scanList.scans.empty() ? dummy : sp->scanList.scans[0];
227
-	  CVParam param = sp->cvParamChild(MS_scan_polarity);
228
-	  polarity[i] = (param.cvid==MS_negative_scan ? 0 : (param.cvid==MS_positive_scan ? +1 : -1 ) );
229
-	  // ionInjectionTime[i] = scan.cvParam(MS_ion_injection_time).valueAs<double>();
230
-	  ionInjectionTime[i] = (scan.cvParam(MS_ion_injection_time).timeInSeconds() * 1000);
231
-	  filterString[i] = scan.cvParam(MS_filter_string).value.empty() ? NA_STRING : Rcpp::String(scan.cvParam(MS_filter_string).value);
232
-	  ionMobilityDriftTime[i] = scan.cvParam(MS_ion_mobility_drift_time).value.empty() ? NA_REAL : (scan.cvParam(MS_ion_mobility_drift_time).timeInSeconds() * 1000);
233
-
234
-	  if (!scan.scanWindows.empty()) {
235
-	    scanWindowLowerLimit[i] = scan.scanWindows[0].cvParam(MS_scan_window_lower_limit).valueAs<double>();
236
-	    scanWindowUpperLimit[i] = scan.scanWindows[0].cvParam(MS_scan_window_upper_limit).valueAs<double>();
237
-	  } else {
238
-	    scanWindowLowerLimit[i] = NA_REAL;
239
-	    scanWindowUpperLimit[i] = NA_REAL;
240
-	  }
241
-	  
242
-	  if (!sp->precursors.empty()) {
243
-	    IsolationWindow iwin = sp->precursors[0].isolationWindow;
244
-	    if (!iwin.empty()) {
245
-	      isolationWindowTargetMZ[i] = iwin.cvParam(MS_isolation_window_target_m_z).value.empty() ? NA_REAL : iwin.cvParam(MS_isolation_window_target_m_z).valueAs<double>();
246
-	      isolationWindowLowerOffset[i] = iwin.cvParam(MS_isolation_window_lower_offset).value.empty() ? NA_REAL : iwin.cvParam(MS_isolation_window_lower_offset).valueAs<double>();
247
-	      isolationWindowUpperOffset[i] = iwin.cvParam(MS_isolation_window_upper_offset).value.empty() ? NA_REAL : iwin.cvParam(MS_isolation_window_upper_offset).valueAs<double>();
248
-	    } else {
249
-	      isolationWindowTargetMZ[i] = NA_REAL;
250
-	      isolationWindowLowerOffset[i] = NA_REAL;
251
-	      isolationWindowUpperOffset[i] = NA_REAL;
252
-	    }
167
+Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) {
168
+  if (msd != NULL) {
169
+    SpectrumListPtr slp = msd->run.spectrumListPtr;
170
+    int N = slp->size();
171
+    int N_scans = whichScan.size();
172
+    CVID nativeIdFormat = id::getDefaultNativeIDFormat(*msd);
173
+    Rcpp::IntegerVector seqNum(N_scans); // number in sequence observed file (1-based)
174
+    Rcpp::IntegerVector acquisitionNum(N_scans); // scan number as declared in File (may be gaps)
175
+    Rcpp::IntegerVector msLevel(N_scans);
176
+    Rcpp::IntegerVector polarity(N_scans);
177
+    Rcpp::IntegerVector peaksCount(N_scans);
178
+    Rcpp::NumericVector totIonCurrent(N_scans);
179
+    Rcpp::NumericVector retentionTime(N_scans);        /* in seconds */
180
+    Rcpp::NumericVector basePeakMZ(N_scans);
181
+    Rcpp::NumericVector basePeakIntensity(N_scans);
182
+    Rcpp::NumericVector collisionEnergy(N_scans);
183
+    Rcpp::NumericVector ionisationEnergy(N_scans);
184
+    Rcpp::NumericVector lowMZ(N_scans);
185
+    Rcpp::NumericVector highMZ(N_scans);
186
+    Rcpp::IntegerVector precursorScanNum(N_scans); /* only if MS level > 1 */
187
+    Rcpp::NumericVector precursorMZ(N_scans);  /* only if MS level > 1 */
188
+    Rcpp::IntegerVector precursorCharge(N_scans);  /* only if MS level > 1 */
189
+    Rcpp::NumericVector precursorIntensity(N_scans);  /* only if MS level > 1 */
190
+    Rcpp::IntegerVector mergedScan(N_scans);  /* only if MS level > 1 */
191
+    Rcpp::IntegerVector mergedResultScanNum(N_scans); /* scan number of the resultant merged scan */
192
+    Rcpp::IntegerVector mergedResultStartScanNum(N_scans); /* smallest scan number of the scanOrigin for merged scan */
193
+    Rcpp::IntegerVector mergedResultEndScanNum(N_scans); /* largest scan number of the scanOrigin for merged scan */
194
+    Rcpp::NumericVector ionInjectionTime(N_scans); /* The time spent filling an ion trapping device*/
195
+    Rcpp::StringVector filterString(N_scans);
196
+    Rcpp::StringVector spectrumId(N_scans);
197
+    Rcpp::LogicalVector centroided(N_scans);
198
+    Rcpp::NumericVector ionMobilityDriftTime(N_scans);
199
+    Rcpp::NumericVector isolationWindowTargetMZ(N_scans);
200
+    Rcpp::NumericVector isolationWindowLowerOffset(N_scans);
201
+    Rcpp::NumericVector isolationWindowUpperOffset(N_scans);
202
+    Rcpp::NumericVector scanWindowLowerLimit(N_scans);
203
+    Rcpp::NumericVector scanWindowUpperLimit(N_scans);
204
+    
205
+    for (int i = 0; i < N_scans; i++) {
206
+      int current_scan = whichScan[i];
207
+      SpectrumPtr sp = slp->spectrum(current_scan - 1, false);
208
+      Scan dummy;
209
+      Scan& scan = sp->scanList.scans.empty() ? dummy : sp->scanList.scans[0];
210
+      // seqNum
211
+      seqNum[i] = current_scan;
212
+      // acquisitionNum
213
+      string id = sp->id;
214
+      string scanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, id);
215
+      if (scanNumber.empty()) {
216
+	acquisitionNum[i] = current_scan;
217
+      } else {
218
+	acquisitionNum[i] = lexical_cast<int>(scanNumber);
219
+      }
220
+      // spectrumId
221
+      spectrumId[i] = sp->id;
222
+      // msLevel
223
+      msLevel[i] = sp->cvParam(MS_ms_level).valueAs<int>();
224
+      // peaksCount
225
+      peaksCount[i] = static_cast<int>(sp->defaultArrayLength);
226
+      // totIonCurrent
227
+      totIonCurrent[i] = sp->cvParam(MS_total_ion_current).valueAs<double>();
228
+      // basePeakMZ
229
+      basePeakMZ[i] = sp->cvParam(MS_base_peak_m_z).valueAs<double>();
230
+      // basePeakIntensity
231
+      basePeakIntensity[i] = sp->cvParam(MS_base_peak_intensity).valueAs<double>();
232
+      // ionisationEnerty
233
+      ionisationEnergy[i] = sp->cvParam(MS_ionization_energy_OBSOLETE).valueAs<double>();
234
+      // lowMZ
235
+      lowMZ[i] = sp->cvParam(MS_lowest_observed_m_z).valueAs<double>();
236
+      // highMZ
237
+      highMZ[i] = sp->cvParam(MS_highest_observed_m_z).valueAs<double>();
238
+      // polarity
239
+      CVParam param = sp->cvParamChild(MS_scan_polarity);
240
+      polarity[i] = (param.cvid==MS_negative_scan ? 0 : (param.cvid==MS_positive_scan ? +1 : -1 ) );
241
+      // centroided
242
+      param = sp->cvParamChild(MS_spectrum_representation);
243
+      centroided[i] = (param.cvid==MS_centroid_spectrum ? TRUE : (param.cvid==MS_profile_spectrum ? FALSE : NA_LOGICAL));      
244
+      // retentionTime
245
+      retentionTime[i] = scan.cvParam(MS_scan_start_time).timeInSeconds();
246
+      // ionInjectionTime
247
+      ionInjectionTime[i] = (scan.cvParam(MS_ion_injection_time).timeInSeconds() * 1000);
248
+      // filterString
249
+      filterString[i] = scan.cvParam(MS_filter_string).value.empty() ? NA_STRING : Rcpp::String(scan.cvParam(MS_filter_string).value);
250
+      // ionMobilityDriftTime
251
+      ionMobilityDriftTime[i] = scan.cvParam(MS_ion_mobility_drift_time).value.empty() ? NA_REAL : (scan.cvParam(MS_ion_mobility_drift_time).timeInSeconds() * 1000);
252
+      // scanWindowLowerLimit and scanWindowUpperLimit
253
+      if (!scan.scanWindows.empty()) {
254
+	scanWindowLowerLimit[i] = scan.scanWindows[0].cvParam(MS_scan_window_lower_limit).valueAs<double>();
255
+	scanWindowUpperLimit[i] = scan.scanWindows[0].cvParam(MS_scan_window_upper_limit).valueAs<double>();
256
+      } else {
257
+	scanWindowLowerLimit[i] = NA_REAL;
258
+	scanWindowUpperLimit[i] = NA_REAL;
259
+      }
260
+      // mergedScan - also not supported by RAMPAdapter
261
+      mergedScan[i] = NA_INTEGER;
262
+      mergedResultScanNum[i] = NA_INTEGER;
263
+      mergedResultStartScanNum[i] = NA_INTEGER;
264
+      mergedResultEndScanNum[i] = NA_INTEGER;
265
+      if (!sp->precursors.empty()) {
266
+	const Precursor& precursor = sp->precursors[0];
267
+	// collisionEnergy
268
+	collisionEnergy[i] = precursor.activation.cvParam(MS_collision_energy).valueAs<double>();
269
+	// precursorScanNum
270
+	size_t precursorIndex = slp->find(precursor.spectrumID);
271
+	if (precursorIndex < slp->size()) {
272
+	  const SpectrumIdentity& precursorSpectrum = slp->spectrumIdentity(precursorIndex);
273
+	  string precursorScanNumber = id::translateNativeIDToScanNumber(nativeIdFormat, precursorSpectrum.id);
274
+	  if (precursorScanNumber.empty()) {
275
+	    precursorScanNum[i] = precursorIndex + 1;
253 276
 	  } else {
254
-	      isolationWindowTargetMZ[i] = NA_REAL;
255
-	      isolationWindowLowerOffset[i] = NA_REAL;
256
-	      isolationWindowUpperOffset[i] = NA_REAL;
277
+	    precursorScanNum[i] = lexical_cast<int>(precursorScanNumber);
257 278
 	  }
258
-	  peaksCount[i] = scanHeader.peaksCount;
259
-	  totIonCurrent[i] = scanHeader.totIonCurrent;
260
-	  retentionTime[i] = scanHeader.retentionTime;
261
-	  basePeakMZ[i] = scanHeader.basePeakMZ;
262
-	  basePeakIntensity[i] = scanHeader.basePeakIntensity;
263
-	  collisionEnergy[i] = scanHeader.collisionEnergy;