Browse code

Add chromatogramHeader method

- Add a chromatogramHeader method to extract chromatogram header information
from an mzML file (issue #141).

jotsetung authored on 11/12/2017 09:26:45
Showing 10 changed files

... ...
@@ -2,7 +2,7 @@ Package: mzR
2 2
 Type: Package
3 3
 Title: parser for netCDF, mzXML, mzData and mzML and mzIdentML files
4 4
        (mass spectrometry data)
5
-Version: 2.13.1
5
+Version: 2.13.2
6 6
 Author: Bernd Fischer, Steffen Neumann, Laurent Gatto, Qiang Kou
7 7
 Maintainer: Bernd Fischer <b.fischer@dkfz.de>,
8 8
 	    Steffen Neumann <sneumann@ipb-halle.de>,
... ...
@@ -47,6 +47,7 @@ exportMethods(close,
47 47
               tic,
48 48
               chromatogram,
49 49
               chromatograms,
50
+              chromatogramHeader,
50 51
               writeMSData)
51 52
 
52 53
 exportClasses("mzR",
... ...
@@ -1,3 +1,8 @@
1
+CHANGES IN VERSION 2.13.2
2
+--------------------------
3
+ o Add chromatogramHeader method to read header information for chromatograms
4
+   from an mzML file.
5
+
1 6
 CHANGES IN VERSION 2.13.1
2 7
 --------------------------
3 8
  o Read filter string from mzML files and add it to the data.frame returned by
... ...
@@ -13,6 +13,8 @@ setGeneric("writeMSfile", function(object, filename, outformat) standardGeneric(
13 13
 ##            function(object, filename, outformat, header, data)
14 14
 ##                standardGeneric("writeSpectrumList"))
15 15
 setGeneric("chromatogramsInfo", function(object) standardGeneric("chromatogramsInfo"))
16
+setGeneric("chromatogramHeader", function(object, chrom)
17
+    standardGeneric("chromatogramHeader"))
16 18
 setGeneric("creationDate", function(object) standardGeneric("creationDate"))
17 19
 setGeneric("manufacturer", function(object) standardGeneric("manufacturer"))
18 20
 setGeneric("model", function(object) standardGeneric("model"))
... ...
@@ -198,3 +198,19 @@ setMethod("chromatogram", "mzRpwiz",
198 198
               }
199 199
               return(ans)
200 200
           })
201
+
202
+setMethod("chromatogramHeader", "mzRpwiz",
203
+          function(object, chrom) {
204
+              if (missing(chrom)) {
205
+                  res <- object@backend$getAllChromatogramHeaderInfo()
206
+              } else {
207
+                  stopifnot(is.numeric(chrom))
208
+                  n <- nChrom(object)
209
+                  if (min(chrom) < 1 || max(chrom) > n)
210
+                      stop("Index out of bound [", 1, ":", n, "]")
211
+                  chrom <- chrom -1L
212
+                  res <- object@backend$getChromatogramHeaderInfo(chrom)
213
+              }
214
+              res$chromatogramId <- as.character(res$chromatogramId)
215
+              res
216
+          })
... ...
@@ -21,3 +21,36 @@ test_chromatograms2 <- function() {
21 21
     checkIdentical(tic(x), chromatogram(x, 1L))
22 22
     checkIdentical(nrow(tic(x)), 7534L)
23 23
 }
24
+
25
+test_chromatogramHeader <- function() {
26
+    library(mzR)
27
+    library(RUnit)
28
+    library(msdata)
29
+    
30
+    f <- proteomics(full.names = TRUE, pattern = "MRM")
31
+    x <- openMSfile(f)
32
+
33
+    chrs <- chromatogram(x)
34
+    ch <- chromatogramHeader(x)
35
+    checkEquals(colnames(ch), c("chromatogramId", "chromatogramIndex", "polarity",
36
+                                "precursorIsolationWindowTargetMZ",
37
+                                "precursorIsolationWindowLowerOffset",
38
+                                "precursorIsolationWindowUpperOffset",
39
+                                "precursorCollisionEnergy",
40
+                                "productIsolationWindowTargetMZ",
41
+                                "productIsolationWindowLowerOffset",
42
+                                "productIsolationWindowUpperOffset"))
43
+    checkEquals(ch$chromatogramId[1], "TIC")
44
+    checkEquals(sum(is.na(ch$precursorIsolationWindowTargetMZ)), 1)
45
+    checkEquals(sum(is.na(ch$productIsolationWindowTargetMZ)), 1)    
46
+    checkEquals(length(chrs), nrow(ch))
47
+    close(x)
48
+
49
+    ## Should return only the TIC.
50
+    f <- proteomics(full.names = TRUE, pattern = "MS3")
51
+    x <- openMSfile(f)
52
+    ch <- chromatogramHeader(x)
53
+    checkEquals(nrow(ch), 1)
54
+    checkEquals(ch$chromatogramId, "TIC")
55
+    close(x)
56
+}
... ...
@@ -27,10 +27,12 @@
27 27
 \alias{get3Dmap,mzRpwiz-method}
28 28
 
29 29
 \alias{chromatogram,mzRpwiz-method}
30
+\alias{chromatogramHeader,mzRpwiz-method}
30 31
 \alias{chromatograms,mzRpwiz-method}
31 32
 \alias{tic,mzRpwiz-method}
32 33
 \alias{nChrom}
33 34
 \alias{chromatogram}
35
+\alias{chromatogramHeader}
34 36
 \alias{chromatograms}
35 37
 \alias{tic}
36 38
 
... ...
@@ -59,6 +61,8 @@
59 61
 
60 62
  chromatograms(object, ...) ## same as chromatogram
61 63
 
64
+ \S4method{chromatogramHeader}{mzRpwiz}(object, chrom)
65
+
62 66
  tic(object, ...)
63 67
 
64 68
  nChrom(object)
... ...
@@ -78,6 +82,12 @@
78 82
     returned.}
79 83
   
80 84
   \item{resMz}{a \code{numeric} defining the m/z resolution.}
85
+
86
+  \item{chrom}{
87
+    For \code{chromatogramHeader}: \code{numeric} specifying the index
88
+    of the chromatograms to be extracted from the file. If omitted, data
89
+    for all chromatograms is returned. 
90
+  }
81 91
   
82 92
   \item{...}{Other arguments. A \code{scan} parameter can be passed to
83 93
     \code{peaks}.}  }
... ...
@@ -128,6 +138,23 @@
128 138
   The \code{nChrom} function returns the number of chromatograms,
129 139
   including the total ion chromatogram.
130 140
 
141
+  The \code{chromatogramHeader} returns (similar to the \code{header}
142
+  function for spectra) a \code{data.frame} with metadata information
143
+  for the individual chromatograms. The \code{data.frame} has the
144
+  columns: \code{"chromatogramId"} (the ID of the chromatogram as
145
+  specified in the file), \code{"chromatogramIndex"} (the index of the
146
+  chromatogram within the file), \code{"polarity"} (the polarity for the
147
+  chromatogram, 0 for negative, +1 for positive and -1 for not set),
148
+  \code{"precursorIsolationWindowTargetMZ"} (the isolation window m/z of
149
+  the precursor), \code{"precursorIsolationWindowLowerOffset"},
150
+  \code{"precursorIsolationWindowUpperOffset"} (lower and upper offset
151
+  for the isolation window m/z), \code{"precursorCollisionEnergy"}
152
+  (collision energy),
153
+  \code{"productIsolationWindowTargetMZ"},
154
+  \code{"productIsolationWindowLowerOffset"} and
155
+  \code{"productIsolationWindowUpperOffset"} (definition of the m/z
156
+  isolation window for the product).
157
+  
131 158
   Note that access to chromatograms is only supported in the \code{pwiz}
132 159
   backend.
133 160
   
... ...
@@ -202,4 +229,8 @@
202 229
  head(tic(x))
203 230
  head(chromatogram(x, 1L)) ## same as tic(x)
204 231
  str(chromatogram(x, 10:12))  
232
+
233
+ ## get the header information for the chromatograms
234
+ ch <- chromatogramHeader(x)
235
+ head(ch)
205 236
 }
... ...
@@ -774,6 +774,111 @@ Rcpp::DataFrame RcppPwiz::getChromatogramsInfo( int whichChrom )
774 774
     return Rcpp::DataFrame::create( );
775 775
 }
776 776
 
777
+// get the header info for chromatograms.
778
+Rcpp::DataFrame RcppPwiz::getChromatogramHeaderInfo (Rcpp::IntegerVector whichChrom)
779
+{
780
+  if (msd != NULL) {
781
+    CVID nativeIdFormat_ = id::getDefaultNativeIDFormat(*msd); // Ask CHRIS if I'm correctly dereferencing this...
782
+    ChromatogramListPtr clp = msd->run.chromatogramListPtr;
783
+    if (clp.get() == 0) {
784
+      Rcpp::Rcerr << "The direct support for chromatogram info is only available in mzML format." << std::endl;
785
+      return Rcpp::DataFrame::create();
786
+    } else if (clp->size() == 0) {
787
+      Rcpp::Rcerr << "No available chromatogram info." << std::endl;
788
+      return Rcpp::DataFrame::create();
789
+    }
790
+
791
+    int N = clp->size();  
792
+    int N_chrom = whichChrom.size();
793
+
794
+    Rcpp::StringVector chromatogramId(N_chrom); // the ID from the chrom
795
+    Rcpp::IntegerVector chromatogramIndex(N_chrom);  // In contrast to the acquisitionNum we report here the index (1 based) of the chromatogram within the file.
796
+    Rcpp::IntegerVector polarity(N_chrom);
797
+    // MS:1000827: isolation window target m/z
798
+    // MS:1000828: isolation window lower offset
799
+    // MS:1000829: isolation window upper offset
800
+    Rcpp::NumericVector precursorIsolationWindowTargetMZ(N_chrom);
801
+    Rcpp::NumericVector precursorIsolationWindowLowerOffset(N_chrom);
802
+    Rcpp::NumericVector precursorIsolationWindowUpperOffset(N_chrom);
803
+    Rcpp::NumericVector precursorCollisionEnergy(N_chrom);
804
+    Rcpp::NumericVector productIsolationWindowTargetMZ(N_chrom);
805
+    Rcpp::NumericVector productIsolationWindowLowerOffset(N_chrom);
806
+    Rcpp::NumericVector productIsolationWindowUpperOffset(N_chrom);
807
+        
808
+    for (int i = 0; i < N_chrom; i++) {
809
+      int current_chrom = whichChrom[i];
810
+      if (current_chrom < 0 || current_chrom > N) {
811
+	Rcpp::Rcerr << "Provided index out of bounds" << std::endl;
812
+	return Rcpp::DataFrame::create();
813
+      }
814
+      ChromatogramPtr ch = clp->chromatogram(current_chrom - 1, false);
815
+      chromatogramId[i] = ch->id;
816
+      chromatogramIndex[i] = current_chrom;
817
+      CVParam param = ch->cvParamChild(MS_scan_polarity);
818
+      polarity[i] = (param.cvid==MS_negative_scan ? 0 : (param.cvid==MS_positive_scan ? +1 : -1 ) );
819
+      if (!ch->precursor.empty()) {
820
+	precursorIsolationWindowTargetMZ[i] = ch->precursor.isolationWindow.cvParam(MS_isolation_window_target_m_z).value.empty() ? NA_REAL : ch->precursor.isolationWindow.cvParam(MS_isolation_window_target_m_z).valueAs<double>();
821
+	precursorIsolationWindowLowerOffset[i] = ch->precursor.isolationWindow.cvParam(MS_isolation_window_lower_offset).value.empty() ? 0 : ch->precursor.isolationWindow.cvParam(MS_isolation_window_lower_offset).valueAs<double>();
822
+	precursorIsolationWindowUpperOffset[i] = ch->precursor.isolationWindow.cvParam(MS_isolation_window_upper_offset).value.empty() ? 0 : ch->precursor.isolationWindow.cvParam(MS_isolation_window_upper_offset).valueAs<double>();
823
+	precursorCollisionEnergy[i] = ch->precursor.activation.cvParam(MS_collision_energy).value.empty() ? NA_REAL : ch->precursor.activation.cvParam(MS_collision_energy).valueAs<double>(); 
824
+      } else {
825
+	precursorIsolationWindowTargetMZ[i] = NA_REAL;
826
+	precursorIsolationWindowLowerOffset[i] = NA_REAL;
827
+	precursorIsolationWindowUpperOffset[i] = NA_REAL;
828
+	precursorCollisionEnergy[i] = NA_REAL;
829
+      }
830
+      if (!ch->product.empty()) {
831
+	productIsolationWindowTargetMZ[i] = ch->product.isolationWindow.cvParam(MS_isolation_window_target_m_z).value.empty() ? NA_REAL : ch->product.isolationWindow.cvParam(MS_isolation_window_target_m_z).valueAs<double>();
832
+	productIsolationWindowLowerOffset[i] = ch->product.isolationWindow.cvParam(MS_isolation_window_lower_offset).value.empty() ? 0 : ch->product.isolationWindow.cvParam(MS_isolation_window_lower_offset).valueAs<double>();
833
+	productIsolationWindowUpperOffset[i] = ch->product.isolationWindow.cvParam(MS_isolation_window_upper_offset).value.empty() ? 0 : ch->product.isolationWindow.cvParam(MS_isolation_window_upper_offset).valueAs<double>();
834
+      } else {
835
+	productIsolationWindowTargetMZ[i] = NA_REAL;
836
+	productIsolationWindowLowerOffset[i] = NA_REAL;
837
+	productIsolationWindowUpperOffset[i] = NA_REAL;
838
+      }
839
+    }
840
+    Rcpp::List chromHeader(10);
841
+    std::vector<std::string> names;
842
+    int i = 0;
843
+    names.push_back("chromatogramId");
844
+    chromHeader[i++] = Rcpp::wrap(chromatogramId);
845
+    names.push_back("chromatogramIndex");
846
+    chromHeader[i++] = Rcpp::wrap(chromatogramIndex);
847
+    names.push_back("polarity");
848
+    chromHeader[i++] = Rcpp::wrap(polarity);
849
+    names.push_back("precursorIsolationWindowTargetMZ");
850
+    chromHeader[i++] = Rcpp::wrap(precursorIsolationWindowTargetMZ);
851
+    names.push_back("precursorIsolationWindowLowerOffset");
852
+    chromHeader[i++] = Rcpp::wrap(precursorIsolationWindowLowerOffset);
853
+    names.push_back("precursorIsolationWindowUpperOffset");
854
+    chromHeader[i++] = Rcpp::wrap(precursorIsolationWindowUpperOffset);
855
+    names.push_back("precursorCollisionEnergy");
856
+    chromHeader[i++] = Rcpp::wrap(precursorCollisionEnergy);
857
+    names.push_back("productIsolationWindowTargetMZ");
858
+    chromHeader[i++] = Rcpp::wrap(productIsolationWindowTargetMZ);
859
+    names.push_back("productIsolationWindowLowerOffset");
860
+    chromHeader[i++] = Rcpp::wrap(productIsolationWindowLowerOffset);
861
+    names.push_back("productIsolationWindowUpperOffset");
862
+    chromHeader[i++] = Rcpp::wrap(productIsolationWindowUpperOffset);
863
+    
864
+    chromHeader.attr("names") = names;
865
+    return chromHeader;
866
+  }
867
+  Rprintf("Warning: pwiz not yet initialized.\n ");
868
+  return Rcpp::DataFrame::create( );
869
+}
870
+
871
+Rcpp::DataFrame RcppPwiz::getAllChromatogramHeaderInfo ( ) {
872
+  if (msd != NULL) {
873
+    ChromatogramListPtr clp = msd->run.chromatogramListPtr;
874
+    int N = clp->size();
875
+    
876
+    return getChromatogramHeaderInfo(Rcpp::seq(1, N));
877
+  }
878
+  Rprintf("Warning: pwiz not yet initialized.\n ");
879
+  return Rcpp::DataFrame::create( );
880
+}
881
+
777 882
 Rcpp::NumericMatrix RcppPwiz::get3DMap ( std::vector<int> scanNumbers, double whichMzLow, double whichMzHigh, double resMz )
778 883
 {
779 884
     if (msd != NULL)
... ...
@@ -90,10 +90,14 @@ public:
90 90
      **/
91 91
     Rcpp::DataFrame getScanHeaderInfo(Rcpp::IntegerVector whichScan);
92 92
 
93
+    Rcpp::DataFrame getChromatogramHeaderInfo(Rcpp::IntegerVector whichScan);
94
+
93 95
     Rcpp::DataFrame getChromatogramsInfo(int whichChrom);
94 96
 
95 97
     Rcpp::DataFrame getAllScanHeaderInfo();
96 98
 
99
+    Rcpp::DataFrame getAllChromatogramHeaderInfo();
100
+
97 101
     Rcpp::List getPeakList(int whichScan);
98 102
 
99 103
     Rcpp::NumericMatrix get3DMap(std::vector<int> scanNumbers, double whichMzLow, double whichMzHigh, double resMz);
... ...
@@ -25,5 +25,7 @@ RCPP_MODULE(Pwiz)
25 25
     .method( "get3DMap", &RcppPwiz::get3DMap, "Reads all scans and returns them as a matrix." )
26 26
     .method( "getLastScan", &RcppPwiz::getLastScan, "Returns the last scan (not necessarily the number of scans because of missing scans)." )
27 27
     .method( "getLastChrom", &RcppPwiz::getLastChrom, "Returns the index of the last chromatogram." )
28
+    .method("getChromatogramHeaderInfo", &RcppPwiz::getChromatogramHeaderInfo, "Returns a data.frame with the chromatogram header information")
29
+    .method("getAllChromatogramHeaderInfo", &RcppPwiz::getAllChromatogramHeaderInfo, "Returns a data.frame with the header for all chromatograms")
28 30
     ;
29 31
 }