Browse code

Read/write centroiding information from and to files

- Read centroiding information from mzML/mzXML files and write it also to these
file types (only supported on pwiz backend).
- ncdf and ramp return centroided=NA.
- Update relevant documentation and unit tests.

jotsetung authored on 23/03/2018 05:37:29
Showing 9 changed files

... ...
@@ -36,7 +36,8 @@
36 36
                   mergedResultStartScanNum = "numeric",
37 37
                   mergedResultEndScanNum = "numeric",
38 38
                   injectionTime = "numeric",
39
-                  filterString = "character"
39
+                  filterString = "character",
40
+                  centroided = "logical"
40 41
                   )
41 42
     if (!is.data.frame(x))
42 43
         return("'x' is supposed to be a data.frame")
... ...
@@ -94,6 +94,7 @@ setMethod("header",
94 94
                             mergedResultEndScanNum=rep(-1, length(scans)),
95 95
                             injectionTime = rep(-1, length(scans)),
96 96
                             spectrumId = paste0("scan=", scans),
97
+                            centroided = NA,
97 98
                             stringsAsFactors = FALSE)
98 99
 
99 100
             return(result)
... ...
@@ -46,6 +46,7 @@ setMethod("header",
46 46
               res <- object@backend$getAllScanHeaderInfo()
47 47
               res$filterString <- NA_character_
48 48
               res$spectrumId <- paste0("scan=", res$acquisitionNum)
49
+              res$centroided <- NA
49 50
               res
50 51
 })
51 52
 
... ...
@@ -61,6 +62,7 @@ setMethod("header",
61 62
               }
62 63
               res$filterString <- NA_character_
63 64
               res$spectrumId <- paste0("scan=", res$acquisitionNum)
65
+              res$centroided <- NA
64 66
               res
65 67
           })
66 68
 
... ...
@@ -40,18 +40,20 @@ test_header <- function() {
40 40
   cdf <- openMSfile(file, backend="netCDF")        
41 41
 
42 42
   h <- header(cdf)
43
-  checkEquals(ncol(h), 21)
43
+  checkEquals(ncol(h), 22)
44 44
   checkEquals(nrow(h), 1278)
45
-
45
+  checkTrue(any(colnames(h) == "centroided"))
46
+  checkTrue(all(is.na(h$centroided)))
47
+  
46 48
   checkTrue(any(colnames(h) == "spectrumId"))
47 49
   checkEquals(h$spectrumId, paste0("scan=", h$acquisitionNum))
48 50
   
49 51
   h <- header(cdf, 1)
50
-  checkEquals(ncol(h), 21)
52
+  checkEquals(ncol(h), 22)
51 53
   checkEquals(nrow(h), 1)
52 54
 
53 55
   h <- header(cdf, 2:3)
54
-  checkEquals(ncol(h), 21)
56
+  checkEquals(ncol(h), 22)
55 57
   checkEquals(nrow(h), 2)
56 58
 
57 59
   close(cdf)
... ...
@@ -7,16 +7,21 @@ test_mzXML <- function() {
7 7
     length(mzxml)
8 8
     runInfo(mzxml)
9 9
     instrumentInfo(mzxml)
10
-    peaks(mzxml)
11
-    peaks(mzxml,1)
12
-    peaks(mzxml, 2:3)
10
+    checkTrue(is.list(peaks(mzxml)))
11
+    checkTrue(is.matrix(peaks(mzxml,1)))
12
+    pks <- peaks(mzxml, 2:3)
13
+    checkTrue(length(pks) == 2)
13 14
     peaksCount(mzxml)
14 15
     hdr <- header(mzxml)
15 16
     checkTrue(any(colnames(hdr) == "spectrumId"))
16
-    header(mzxml,1)
17
-    header(mzxml, 2:3)
17
+    checkTrue(all(hdr$centroided))
18
+    hdr <- header(mzxml,1)
19
+    checkTrue(is.list(hdr))
20
+    hdr <- header(mzxml, 2:3)
21
+    checkTrue(is.data.frame(hdr))
22
+    checkTrue(nrow(hdr) == 2)
18 23
     fileName(mzxml)
19
-    close(mzxml) 
24
+    close(mzxml)    
20 25
 }
21 26
 
22 27
 test_mzML <- function() {
... ...
@@ -33,9 +38,13 @@ test_mzML <- function() {
33 38
     peaksCount(mzml)
34 39
     hdr <- header(mzml)
35 40
     checkTrue(any(colnames(hdr) == "spectrumId"))
41
+    checkTrue(all(hdr$centroided))
36 42
     checkEquals(hdr$spectrumId, paste0("spectrum=", hdr$acquisitionNum))
37
-    header(mzml,1)
38
-    header(mzml,2:3)
43
+    hdr <- header(mzxml,1)
44
+    checkTrue(is.list(hdr))
45
+    hdr <- header(mzxml, 2:3)
46
+    checkTrue(is.data.frame(hdr))
47
+    checkTrue(nrow(hdr) == 2)
39 48
 
40 49
     checkTrue(ncol(header(mzml))>4)
41 50
     checkTrue(length(header(mzml,1))>4)
... ...
@@ -46,6 +55,15 @@ test_mzML <- function() {
46 55
 
47 56
     fileName(mzml)
48 57
     close(mzml)
58
+
59
+    file <- system.file("proteomics", "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML.gz",
60
+                        package = "msdata")
61
+    mzml <- openMSfile(file, backend="pwiz")
62
+    checkTrue(class(mzml)=="mzRpwiz")
63
+    hdr <- header(mzml)
64
+    checkTrue(hdr$centroided[2])
65
+    checkTrue(!hdr$centroided[1])
66
+    close(mzml)
49 67
 }
50 68
 
51 69
 ## Test the new implementation of the getScanHeaderInfo
... ...
@@ -62,18 +80,21 @@ test_getScanHeaderInfo <- function() {
62 80
     scan_3_ramp <- header(ramp, scans = 3)
63 81
     ## Ramp does not read polarity
64 82
     scan_3$polarity <- 0
83
+    scan_3$centroided <- NA
65 84
     checkEquals(scan_3[cn], scan_3_ramp[cn])
66 85
     
67 86
     ## Read all scan header
68 87
     all_scans <- header(mzml)
69 88
     all_scans_ramp <- header(ramp)
70 89
     all_scans$polarity <- 0
90
+    all_scans$centroided <- NA
71 91
     checkEquals(all_scans[, cn], all_scans_ramp[, cn])
72 92
     
73 93
     ## passing the index of all scan headers should return the same
74 94
     all_scans_2 <- header(mzml, scans = 1:nrow(all_scans))
75 95
     all_scans_ramp_2 <- header(ramp, scans = 1:nrow(all_scans))
76 96
     all_scans_2$polarity <- 0
97
+    all_scans_2$centroided <- NA
77 98
     checkEquals(all_scans, all_scans_2)
78 99
     checkEquals(as.list(all_scans[3, ]), scan_3)
79 100
     checkEquals(all_scans_2[, cn], all_scans_ramp_2[, cn])
... ...
@@ -83,6 +104,7 @@ test_getScanHeaderInfo <- function() {
83 104
     scan_3_ramp <- header(ramp, scans = c(3, 1, 14))
84 105
     ## Ramp does not read polarity
85 106
     scan_3$polarity <- 0
107
+    scan_3$centroided <- NA
86 108
     checkEquals(scan_3[, cn], scan_3_ramp[, cn])
87 109
 
88 110
     close(mzml)
... ...
@@ -95,11 +117,13 @@ test_getScanHeaderInfo <- function() {
95 117
     ramp <- openMSfile(file, backend = "Ramp")
96 118
     ## Read single scan header.
97 119
     scan_3 <- header(mzml, scans = 3)
120
+    scan_3$centroided <- NA
98 121
     scan_3_ramp <- header(ramp, scans = 3)
99 122
     checkEquals(scan_3[cn], scan_3_ramp[cn])
100 123
     
101 124
     ## Read all scan header
102 125
     all_scans <- header(mzml)
126
+    all_scans$centroided <- NA
103 127
     all_scans_ramp <- header(ramp)
104 128
     ## Ramp unable to read precursorScanNum from an mzXML file.
105 129
     all_scans$precursorScanNum <- 0
... ...
@@ -107,6 +131,7 @@ test_getScanHeaderInfo <- function() {
107 131
     
108 132
     ## passing the index of all scan headers should return the same
109 133
     all_scans_2 <- header(mzml, scans = 1:nrow(all_scans))
134
+    all_scans_2$centroided <- NA
110 135
     all_scans_ramp_2 <- header(ramp, scans = 1:nrow(all_scans))
111 136
     all_scans_2$precursorScanNum <- 0
112 137
     checkEquals(all_scans, all_scans_2)
... ...
@@ -117,6 +142,7 @@ test_getScanHeaderInfo <- function() {
117 142
     scan_3 <- header(mzml, scans = c(3, 1, 14))
118 143
     scan_3_ramp <- header(ramp, scans = c(3, 1, 14))
119 144
     scan_3$precursorScanNum <- 0
145
+    scan_3$centroided <- NA
120 146
     checkEquals(scan_3[, cn], scan_3_ramp[, cn])
121 147
 
122 148
     close(mzml)
... ...
@@ -134,6 +160,7 @@ test_getScanHeaderInfo <- function() {
134 160
     scan_3$polarity <- 0
135 161
     scan_3$injectionTime <- 0
136 162
     scan_3$filterString <- NA_character_
163
+    scan_3$centroided <- NA
137 164
     checkEquals(scan_3[cn], scan_3_ramp[cn])
138 165
     
139 166
     ## Read all scan header
... ...
@@ -143,6 +170,7 @@ test_getScanHeaderInfo <- function() {
143 170
     all_scans$polarity <- 0
144 171
     all_scans$injectionTime <- 0
145 172
     all_scans$filterString <- NA_character_
173
+    all_scans$centroided <- NA
146 174
     checkEquals(all_scans[, cn], all_scans_ramp[, cn])
147 175
     
148 176
     ## passing the index of all scan headers should return the same
... ...
@@ -152,6 +180,7 @@ test_getScanHeaderInfo <- function() {
152 180
     all_scans_2$polarity <- 0
153 181
     all_scans_2$injectionTime <- 0
154 182
     all_scans_2$filterString <- NA_character_
183
+    all_scans_2$centroided <- NA
155 184
     checkEquals(all_scans[, cn], all_scans_2[, cn])
156 185
     checkEquals(as.list(all_scans[3, cn]), scan_3[cn])
157 186
     checkEquals(all_scans_2[, cn], all_scans_ramp_2[, cn])
... ...
@@ -163,6 +192,7 @@ test_getScanHeaderInfo <- function() {
163 192
     scan_3$polarity <- 0
164 193
     scan_3$injectionTime <- 0
165 194
     scan_3$filterString <- NA_character_
195
+    scan_3$centroided <- NA
166 196
     checkEquals(scan_3[, cn], scan_3_ramp[, cn])
167 197
 
168 198
     close(mzml)
... ...
@@ -14,6 +14,9 @@ test_mzXML <- function() {
14 14
     header(mzxml,1)
15 15
     header(mzxml,2:3)
16 16
     fileName(mzxml)
17
+    hdr <- header(mzxml)
18
+    checkTrue(any(colnames(hdr) == "centroided"))
19
+    checkTrue(all(is.na(hdr$centroided)))
17 20
     close(mzxml)
18 21
 }
19 22
 
... ...
@@ -1,9 +1,6 @@
1 1
 ## Testing to write stuff.
2 2
 
3 3
 test_copyWriteMSData <- function() {
4
-    library(msdata)
5
-    library(mzR)
6
-    library(RUnit)
7 4
     test_folder = tempdir()
8 5
 
9 6
     ## INPUT: mzXML
... ...
@@ -77,11 +74,12 @@ test_copyWriteMSData <- function() {
77 74
     mzml_new <- openMSfile(fnew, backend = "pwiz")
78 75
     pks_new <- peaks(mzml_new)
79 76
     hdr_new <- header(mzml_new)
80
-    mzR::close(mzml_new)
81 77
     rownames(hdr_new) <- NULL
82 78
     rownames(hdr_sub) <- NULL
83 79
     checkEquals(pks_new, pks_sub)
84 80
     checkEquals(hdr_new, hdr_sub)
81
+    checkEquals(peaks(mzml_new, 2), pks[[3]])
82
+    mzR::close(mzml_new)
85 83
     ## mzXML
86 84
     mzR::copyWriteMSData(file = fnew, original_file = orig_file,
87 85
                          header = hdr_sub, object = pks_sub, backend = "pwiz",
... ...
@@ -149,11 +147,12 @@ test_copyWriteMSData <- function() {
149 147
     pks_new <- peaks(mzml_new)
150 148
     hdr_new <- header(mzml_new)
151 149
     ii_new <- mzR::instrumentInfo(mzml_new)
152
-    mzR::close(mzml_new)
153 150
     checkEquals(pks_new, pks)
154 151
     checkEquals(hdr_new, hdr)  ## polarity is OK here
155 152
     checkEquals(ii, ii_new)
156
-    
153
+
154
+    checkEquals(peaks(mzml_new, 12), pks[[12]])
155
+    mzR::close(mzml_new)
157 156
     ## OUTPUT: mzXML
158 157
     fnew <- paste0(test_folder, "test_copyWrite.mzXML")
159 158
     suppressWarnings(
... ...
@@ -276,11 +275,7 @@ test_copyWriteMSData <- function() {
276 275
     checkEquals(ii, ii_2)
277 276
 }
278 277
 
279
-test_writeMSData <- function() {
280
-    
281
-    library(msdata)
282
-    library(mzR)
283
-    library(RUnit)
278
+test_writeMSData <- function() {    
284 279
     test_folder = tempdir()
285 280
     ## Input: mzXML
286 281
     test_file <- system.file("threonine", "threonine_i2_e35_pH_tree.mzXML",
... ...
@@ -296,9 +291,10 @@ test_writeMSData <- function() {
296 291
     in_file <- openMSfile(out_file, backend = "pwiz")
297 292
     hdr_2 <- header(in_file)
298 293
     pks_2 <- peaks(in_file)
299
-    mzR::close(in_file)
300 294
     checkEquals(hdr, hdr_2)
301 295
     checkEquals(pks, pks_2)
296
+    checkEquals(peaks(in_file, 13), pks[[13]])
297
+    mzR::close(in_file)
302 298
 
303 299
     ## Test subsetting.
304 300
     hdr_sub <- hdr[c(1, 3, 5), ]
... ...
@@ -308,8 +304,9 @@ test_writeMSData <- function() {
308 304
     in_file <- openMSfile(out_file)
309 305
     hdr_sub_2 <- header(in_file)
310 306
     pks_sub_2 <- peaks(in_file)
311
-    mzR::close(in_file)
312 307
     checkEquals(pks_sub, pks_sub_2)
308
+    checkEquals(peaks(in_file, 3), pks[[5]])
309
+    mzR::close(in_file)
313 310
     ## mzXML does not support spectrumId, thus acquisitionNum, precursorScanNum
314 311
     ## and spectrumId will be different, but their order and mapping has to be
315 312
     ## the same.
... ...
@@ -395,8 +392,10 @@ test_writeMSData <- function() {
395 392
 
396 393
     ## mzXML output:
397 394
     out_file <- paste0(test_folder, "test_write.mzXML")
398
-    writeMSData(file = out_file, header = hdr, object = pks,
399
-                outformat = "mzXML")
395
+    suppressWarnings(
396
+        writeMSData(file = out_file, header = hdr, object = pks,
397
+                    outformat = "mzXML")
398
+    )
400 399
     in_file <- openMSfile(out_file, backend = "pwiz")
401 400
     hdr_2 <- header(in_file)
402 401
     pks_2 <- peaks(in_file)
... ...
@@ -456,8 +455,10 @@ test_writeMSData <- function() {
456 455
     hdr_sub$seqNum <- 1:nrow(hdr_sub)
457 456
     pks_sub <- pks[idx]
458 457
     fnew <- paste0(test_folder, "test_copyWrite.mzXML")
459
-    writeMSData(file = fnew, header = hdr_sub, object = pks_sub,
460
-                backend = "pwiz", outformat = "mzxml")
458
+    suppressWarnings(
459
+        writeMSData(file = fnew, header = hdr_sub, object = pks_sub,
460
+                    backend = "pwiz", outformat = "mzxml")
461
+    )
461 462
     ## Check content is same
462 463
     mzml_new <- openMSfile(fnew, backend = "pwiz")
463 464
     pks_new <- peaks(mzml_new)
... ...
@@ -535,5 +536,6 @@ test_writeMSData <- function() {
535 536
     pks_2 <- peaks(in_file)
536 537
     mzR::close(in_file)
537 538
     checkEquals(pks, pks_2)
539
+    hdr$centroided <- FALSE
538 540
     checkEquals(hdr, hdr_2)
539 541
 }
... ...
@@ -112,9 +112,10 @@
112 112
   \code{precursorCharge}, \code{precursorIntensity},
113 113
   \code{mergedScan}, \code{mergedResultScanNum},
114 114
   \code{mergedResultStartScanNum}, \code{mergedResultEndScanNum},
115
-  \code{spectrumId} and
116
-  \code{injectionTime} (ion injection time, in seconds) when available
117
-  in the original file. If multiple scans are queried, a
115
+  \code{spectrumId}, \code{centroided} (\code{logical} whether the data
116
+  of the spectrum is in centroid mode or profile mode; only for pwiz
117
+  backend) and \code{injectionTime} (ion injection time, in seconds)
118
+  when available in the original file. If multiple scans are queried, a
118 119
   \code{data.frame} is returned with the scans reported along the
119 120
   rows.
120 121
 
... ...
@@ -205,6 +205,7 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan)
205 205
       Rcpp::NumericVector ionInjectionTime(N_scans); /* The time spent filling an ion trapping device*/
206 206
       Rcpp::StringVector filterString(N_scans);
207 207
       Rcpp::StringVector spectrumId(N_scans);
208
+      Rcpp::LogicalVector centroided(N_scans);
208 209
       
209 210
       for (int i = 0; i < N_scans; i++)
210 211
 	{
... ...
@@ -241,12 +242,14 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan)
241 242
 	  mergedResultStartScanNum[i] = scanHeader.mergedResultStartScanNum;
242 243
 	  mergedResultEndScanNum[i] = scanHeader.mergedResultEndScanNum;
243 244
 	  spectrumId[i] = sp->id;
245
+	  CVParam prm = sp->cvParamChild(MS_spectrum_representation);
246
+	  centroided[i] = (prm.cvid==MS_centroid_spectrum ? TRUE : (prm.cvid==MS_profile_spectrum ? FALSE : NA_LOGICAL));
244 247
 	}
245 248
       // delete adapter issue #64
246 249
       delete adapter;
247 250
       adapter = NULL;
248 251
 
249
-      Rcpp::List header(24);
252
+      Rcpp::List header(25);
250 253
       std::vector<std::string> names;
251 254
       int i = 0;
252 255
       names.push_back("seqNum");
... ...
@@ -297,6 +300,8 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan)
297 300
       header[i++] = Rcpp::wrap(filterString);
298 301
       names.push_back("spectrumId");
299 302
       header[i++] = Rcpp::wrap(spectrumId);
303
+      names.push_back("centroided");
304
+      header[i++] = Rcpp::wrap(centroided);
300 305
       
301 306
       header.attr("names") = names;
302 307
       
... ...
@@ -610,6 +615,7 @@ void RcppPwiz::addSpectrumList(MSData& msd,
610 615
   Rcpp::NumericVector ionInjectionTime = spctr_header["injectionTime"];
611 616
   Rcpp::StringVector filterString = spctr_header["filterString"];
612 617
   Rcpp::StringVector spectrumId = spctr_header["spectrumId"];
618
+  Rcpp::LogicalVector centroided = spctr_header["centroided"];
613 619
   
614 620
   // From MSnbase::Spectrum        Column in the header
615 621
   // msLevel integer               $msLevel
... ...
@@ -656,6 +662,11 @@ void RcppPwiz::addSpectrumList(MSData& msd,
656 662
     spct.set(MS_base_peak_m_z, basePeakMZ[i]);
657 663
     spct.set(MS_base_peak_intensity, basePeakIntensity[i]);
658 664
     spct.set(MS_total_ion_current, totIonCurrent[i]);
665
+    // centroided
666
+    if (centroided[i] != NA_LOGICAL && centroided[i] == TRUE)
667
+      spct.set(MS_centroid_spectrum);
668
+    if (centroided[i] != NA_LOGICAL && centroided[i] == FALSE)
669
+      spct.set(MS_profile_spectrum);
659 670
     // TODO:
660 671
     // [X] seqNum: number observed in file.
661 672
     spct.index = seqNum[i] - 1;	// Or just i?