- Read centroiding information from mzML/mzXML files and write it also to these
file types (only supported on pwiz backend).
- ncdf and ramp return centroided=NA.
- Update relevant documentation and unit tests.
... | ... |
@@ -36,7 +36,8 @@ |
36 | 36 |
mergedResultStartScanNum = "numeric", |
37 | 37 |
mergedResultEndScanNum = "numeric", |
38 | 38 |
injectionTime = "numeric", |
39 |
- filterString = "character" |
|
39 |
+ filterString = "character", |
|
40 |
+ centroided = "logical" |
|
40 | 41 |
) |
41 | 42 |
if (!is.data.frame(x)) |
42 | 43 |
return("'x' is supposed to be a data.frame") |
... | ... |
@@ -46,6 +46,7 @@ setMethod("header", |
46 | 46 |
res <- object@backend$getAllScanHeaderInfo() |
47 | 47 |
res$filterString <- NA_character_ |
48 | 48 |
res$spectrumId <- paste0("scan=", res$acquisitionNum) |
49 |
+ res$centroided <- NA |
|
49 | 50 |
res |
50 | 51 |
}) |
51 | 52 |
|
... | ... |
@@ -61,6 +62,7 @@ setMethod("header", |
61 | 62 |
} |
62 | 63 |
res$filterString <- NA_character_ |
63 | 64 |
res$spectrumId <- paste0("scan=", res$acquisitionNum) |
65 |
+ res$centroided <- NA |
|
64 | 66 |
res |
65 | 67 |
}) |
66 | 68 |
|
... | ... |
@@ -40,18 +40,20 @@ test_header <- function() { |
40 | 40 |
cdf <- openMSfile(file, backend="netCDF") |
41 | 41 |
|
42 | 42 |
h <- header(cdf) |
43 |
- checkEquals(ncol(h), 21) |
|
43 |
+ checkEquals(ncol(h), 22) |
|
44 | 44 |
checkEquals(nrow(h), 1278) |
45 |
- |
|
45 |
+ checkTrue(any(colnames(h) == "centroided")) |
|
46 |
+ checkTrue(all(is.na(h$centroided))) |
|
47 |
+ |
|
46 | 48 |
checkTrue(any(colnames(h) == "spectrumId")) |
47 | 49 |
checkEquals(h$spectrumId, paste0("scan=", h$acquisitionNum)) |
48 | 50 |
|
49 | 51 |
h <- header(cdf, 1) |
50 |
- checkEquals(ncol(h), 21) |
|
52 |
+ checkEquals(ncol(h), 22) |
|
51 | 53 |
checkEquals(nrow(h), 1) |
52 | 54 |
|
53 | 55 |
h <- header(cdf, 2:3) |
54 |
- checkEquals(ncol(h), 21) |
|
56 |
+ checkEquals(ncol(h), 22) |
|
55 | 57 |
checkEquals(nrow(h), 2) |
56 | 58 |
|
57 | 59 |
close(cdf) |
... | ... |
@@ -7,16 +7,21 @@ test_mzXML <- function() { |
7 | 7 |
length(mzxml) |
8 | 8 |
runInfo(mzxml) |
9 | 9 |
instrumentInfo(mzxml) |
10 |
- peaks(mzxml) |
|
11 |
- peaks(mzxml,1) |
|
12 |
- peaks(mzxml, 2:3) |
|
10 |
+ checkTrue(is.list(peaks(mzxml))) |
|
11 |
+ checkTrue(is.matrix(peaks(mzxml,1))) |
|
12 |
+ pks <- peaks(mzxml, 2:3) |
|
13 |
+ checkTrue(length(pks) == 2) |
|
13 | 14 |
peaksCount(mzxml) |
14 | 15 |
hdr <- header(mzxml) |
15 | 16 |
checkTrue(any(colnames(hdr) == "spectrumId")) |
16 |
- header(mzxml,1) |
|
17 |
- header(mzxml, 2:3) |
|
17 |
+ checkTrue(all(hdr$centroided)) |
|
18 |
+ hdr <- header(mzxml,1) |
|
19 |
+ checkTrue(is.list(hdr)) |
|
20 |
+ hdr <- header(mzxml, 2:3) |
|
21 |
+ checkTrue(is.data.frame(hdr)) |
|
22 |
+ checkTrue(nrow(hdr) == 2) |
|
18 | 23 |
fileName(mzxml) |
19 |
- close(mzxml) |
|
24 |
+ close(mzxml) |
|
20 | 25 |
} |
21 | 26 |
|
22 | 27 |
test_mzML <- function() { |
... | ... |
@@ -33,9 +38,13 @@ test_mzML <- function() { |
33 | 38 |
peaksCount(mzml) |
34 | 39 |
hdr <- header(mzml) |
35 | 40 |
checkTrue(any(colnames(hdr) == "spectrumId")) |
41 |
+ checkTrue(all(hdr$centroided)) |
|
36 | 42 |
checkEquals(hdr$spectrumId, paste0("spectrum=", hdr$acquisitionNum)) |
37 |
- header(mzml,1) |
|
38 |
- header(mzml,2:3) |
|
43 |
+ hdr <- header(mzxml,1) |
|
44 |
+ checkTrue(is.list(hdr)) |
|
45 |
+ hdr <- header(mzxml, 2:3) |
|
46 |
+ checkTrue(is.data.frame(hdr)) |
|
47 |
+ checkTrue(nrow(hdr) == 2) |
|
39 | 48 |
|
40 | 49 |
checkTrue(ncol(header(mzml))>4) |
41 | 50 |
checkTrue(length(header(mzml,1))>4) |
... | ... |
@@ -46,6 +55,15 @@ test_mzML <- function() { |
46 | 55 |
|
47 | 56 |
fileName(mzml) |
48 | 57 |
close(mzml) |
58 |
+ |
|
59 |
+ file <- system.file("proteomics", "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML.gz", |
|
60 |
+ package = "msdata") |
|
61 |
+ mzml <- openMSfile(file, backend="pwiz") |
|
62 |
+ checkTrue(class(mzml)=="mzRpwiz") |
|
63 |
+ hdr <- header(mzml) |
|
64 |
+ checkTrue(hdr$centroided[2]) |
|
65 |
+ checkTrue(!hdr$centroided[1]) |
|
66 |
+ close(mzml) |
|
49 | 67 |
} |
50 | 68 |
|
51 | 69 |
## Test the new implementation of the getScanHeaderInfo |
... | ... |
@@ -62,18 +80,21 @@ test_getScanHeaderInfo <- function() { |
62 | 80 |
scan_3_ramp <- header(ramp, scans = 3) |
63 | 81 |
## Ramp does not read polarity |
64 | 82 |
scan_3$polarity <- 0 |
83 |
+ scan_3$centroided <- NA |
|
65 | 84 |
checkEquals(scan_3[cn], scan_3_ramp[cn]) |
66 | 85 |
|
67 | 86 |
## Read all scan header |
68 | 87 |
all_scans <- header(mzml) |
69 | 88 |
all_scans_ramp <- header(ramp) |
70 | 89 |
all_scans$polarity <- 0 |
90 |
+ all_scans$centroided <- NA |
|
71 | 91 |
checkEquals(all_scans[, cn], all_scans_ramp[, cn]) |
72 | 92 |
|
73 | 93 |
## passing the index of all scan headers should return the same |
74 | 94 |
all_scans_2 <- header(mzml, scans = 1:nrow(all_scans)) |
75 | 95 |
all_scans_ramp_2 <- header(ramp, scans = 1:nrow(all_scans)) |
76 | 96 |
all_scans_2$polarity <- 0 |
97 |
+ all_scans_2$centroided <- NA |
|
77 | 98 |
checkEquals(all_scans, all_scans_2) |
78 | 99 |
checkEquals(as.list(all_scans[3, ]), scan_3) |
79 | 100 |
checkEquals(all_scans_2[, cn], all_scans_ramp_2[, cn]) |
... | ... |
@@ -83,6 +104,7 @@ test_getScanHeaderInfo <- function() { |
83 | 104 |
scan_3_ramp <- header(ramp, scans = c(3, 1, 14)) |
84 | 105 |
## Ramp does not read polarity |
85 | 106 |
scan_3$polarity <- 0 |
107 |
+ scan_3$centroided <- NA |
|
86 | 108 |
checkEquals(scan_3[, cn], scan_3_ramp[, cn]) |
87 | 109 |
|
88 | 110 |
close(mzml) |
... | ... |
@@ -95,11 +117,13 @@ test_getScanHeaderInfo <- function() { |
95 | 117 |
ramp <- openMSfile(file, backend = "Ramp") |
96 | 118 |
## Read single scan header. |
97 | 119 |
scan_3 <- header(mzml, scans = 3) |
120 |
+ scan_3$centroided <- NA |
|
98 | 121 |
scan_3_ramp <- header(ramp, scans = 3) |
99 | 122 |
checkEquals(scan_3[cn], scan_3_ramp[cn]) |
100 | 123 |
|
101 | 124 |
## Read all scan header |
102 | 125 |
all_scans <- header(mzml) |
126 |
+ all_scans$centroided <- NA |
|
103 | 127 |
all_scans_ramp <- header(ramp) |
104 | 128 |
## Ramp unable to read precursorScanNum from an mzXML file. |
105 | 129 |
all_scans$precursorScanNum <- 0 |
... | ... |
@@ -107,6 +131,7 @@ test_getScanHeaderInfo <- function() { |
107 | 131 |
|
108 | 132 |
## passing the index of all scan headers should return the same |
109 | 133 |
all_scans_2 <- header(mzml, scans = 1:nrow(all_scans)) |
134 |
+ all_scans_2$centroided <- NA |
|
110 | 135 |
all_scans_ramp_2 <- header(ramp, scans = 1:nrow(all_scans)) |
111 | 136 |
all_scans_2$precursorScanNum <- 0 |
112 | 137 |
checkEquals(all_scans, all_scans_2) |
... | ... |
@@ -117,6 +142,7 @@ test_getScanHeaderInfo <- function() { |
117 | 142 |
scan_3 <- header(mzml, scans = c(3, 1, 14)) |
118 | 143 |
scan_3_ramp <- header(ramp, scans = c(3, 1, 14)) |
119 | 144 |
scan_3$precursorScanNum <- 0 |
145 |
+ scan_3$centroided <- NA |
|
120 | 146 |
checkEquals(scan_3[, cn], scan_3_ramp[, cn]) |
121 | 147 |
|
122 | 148 |
close(mzml) |
... | ... |
@@ -134,6 +160,7 @@ test_getScanHeaderInfo <- function() { |
134 | 160 |
scan_3$polarity <- 0 |
135 | 161 |
scan_3$injectionTime <- 0 |
136 | 162 |
scan_3$filterString <- NA_character_ |
163 |
+ scan_3$centroided <- NA |
|
137 | 164 |
checkEquals(scan_3[cn], scan_3_ramp[cn]) |
138 | 165 |
|
139 | 166 |
## Read all scan header |
... | ... |
@@ -143,6 +170,7 @@ test_getScanHeaderInfo <- function() { |
143 | 170 |
all_scans$polarity <- 0 |
144 | 171 |
all_scans$injectionTime <- 0 |
145 | 172 |
all_scans$filterString <- NA_character_ |
173 |
+ all_scans$centroided <- NA |
|
146 | 174 |
checkEquals(all_scans[, cn], all_scans_ramp[, cn]) |
147 | 175 |
|
148 | 176 |
## passing the index of all scan headers should return the same |
... | ... |
@@ -152,6 +180,7 @@ test_getScanHeaderInfo <- function() { |
152 | 180 |
all_scans_2$polarity <- 0 |
153 | 181 |
all_scans_2$injectionTime <- 0 |
154 | 182 |
all_scans_2$filterString <- NA_character_ |
183 |
+ all_scans_2$centroided <- NA |
|
155 | 184 |
checkEquals(all_scans[, cn], all_scans_2[, cn]) |
156 | 185 |
checkEquals(as.list(all_scans[3, cn]), scan_3[cn]) |
157 | 186 |
checkEquals(all_scans_2[, cn], all_scans_ramp_2[, cn]) |
... | ... |
@@ -163,6 +192,7 @@ test_getScanHeaderInfo <- function() { |
163 | 192 |
scan_3$polarity <- 0 |
164 | 193 |
scan_3$injectionTime <- 0 |
165 | 194 |
scan_3$filterString <- NA_character_ |
195 |
+ scan_3$centroided <- NA |
|
166 | 196 |
checkEquals(scan_3[, cn], scan_3_ramp[, cn]) |
167 | 197 |
|
168 | 198 |
close(mzml) |
... | ... |
@@ -1,9 +1,6 @@ |
1 | 1 |
## Testing to write stuff. |
2 | 2 |
|
3 | 3 |
test_copyWriteMSData <- function() { |
4 |
- library(msdata) |
|
5 |
- library(mzR) |
|
6 |
- library(RUnit) |
|
7 | 4 |
test_folder = tempdir() |
8 | 5 |
|
9 | 6 |
## INPUT: mzXML |
... | ... |
@@ -77,11 +74,12 @@ test_copyWriteMSData <- function() { |
77 | 74 |
mzml_new <- openMSfile(fnew, backend = "pwiz") |
78 | 75 |
pks_new <- peaks(mzml_new) |
79 | 76 |
hdr_new <- header(mzml_new) |
80 |
- mzR::close(mzml_new) |
|
81 | 77 |
rownames(hdr_new) <- NULL |
82 | 78 |
rownames(hdr_sub) <- NULL |
83 | 79 |
checkEquals(pks_new, pks_sub) |
84 | 80 |
checkEquals(hdr_new, hdr_sub) |
81 |
+ checkEquals(peaks(mzml_new, 2), pks[[3]]) |
|
82 |
+ mzR::close(mzml_new) |
|
85 | 83 |
## mzXML |
86 | 84 |
mzR::copyWriteMSData(file = fnew, original_file = orig_file, |
87 | 85 |
header = hdr_sub, object = pks_sub, backend = "pwiz", |
... | ... |
@@ -149,11 +147,12 @@ test_copyWriteMSData <- function() { |
149 | 147 |
pks_new <- peaks(mzml_new) |
150 | 148 |
hdr_new <- header(mzml_new) |
151 | 149 |
ii_new <- mzR::instrumentInfo(mzml_new) |
152 |
- mzR::close(mzml_new) |
|
153 | 150 |
checkEquals(pks_new, pks) |
154 | 151 |
checkEquals(hdr_new, hdr) ## polarity is OK here |
155 | 152 |
checkEquals(ii, ii_new) |
156 |
- |
|
153 |
+ |
|
154 |
+ checkEquals(peaks(mzml_new, 12), pks[[12]]) |
|
155 |
+ mzR::close(mzml_new) |
|
157 | 156 |
## OUTPUT: mzXML |
158 | 157 |
fnew <- paste0(test_folder, "test_copyWrite.mzXML") |
159 | 158 |
suppressWarnings( |
... | ... |
@@ -276,11 +275,7 @@ test_copyWriteMSData <- function() { |
276 | 275 |
checkEquals(ii, ii_2) |
277 | 276 |
} |
278 | 277 |
|
279 |
-test_writeMSData <- function() { |
|
280 |
- |
|
281 |
- library(msdata) |
|
282 |
- library(mzR) |
|
283 |
- library(RUnit) |
|
278 |
+test_writeMSData <- function() { |
|
284 | 279 |
test_folder = tempdir() |
285 | 280 |
## Input: mzXML |
286 | 281 |
test_file <- system.file("threonine", "threonine_i2_e35_pH_tree.mzXML", |
... | ... |
@@ -296,9 +291,10 @@ test_writeMSData <- function() { |
296 | 291 |
in_file <- openMSfile(out_file, backend = "pwiz") |
297 | 292 |
hdr_2 <- header(in_file) |
298 | 293 |
pks_2 <- peaks(in_file) |
299 |
- mzR::close(in_file) |
|
300 | 294 |
checkEquals(hdr, hdr_2) |
301 | 295 |
checkEquals(pks, pks_2) |
296 |
+ checkEquals(peaks(in_file, 13), pks[[13]]) |
|
297 |
+ mzR::close(in_file) |
|
302 | 298 |
|
303 | 299 |
## Test subsetting. |
304 | 300 |
hdr_sub <- hdr[c(1, 3, 5), ] |
... | ... |
@@ -308,8 +304,9 @@ test_writeMSData <- function() { |
308 | 304 |
in_file <- openMSfile(out_file) |
309 | 305 |
hdr_sub_2 <- header(in_file) |
310 | 306 |
pks_sub_2 <- peaks(in_file) |
311 |
- mzR::close(in_file) |
|
312 | 307 |
checkEquals(pks_sub, pks_sub_2) |
308 |
+ checkEquals(peaks(in_file, 3), pks[[5]]) |
|
309 |
+ mzR::close(in_file) |
|
313 | 310 |
## mzXML does not support spectrumId, thus acquisitionNum, precursorScanNum |
314 | 311 |
## and spectrumId will be different, but their order and mapping has to be |
315 | 312 |
## the same. |
... | ... |
@@ -395,8 +392,10 @@ test_writeMSData <- function() { |
395 | 392 |
|
396 | 393 |
## mzXML output: |
397 | 394 |
out_file <- paste0(test_folder, "test_write.mzXML") |
398 |
- writeMSData(file = out_file, header = hdr, object = pks, |
|
399 |
- outformat = "mzXML") |
|
395 |
+ suppressWarnings( |
|
396 |
+ writeMSData(file = out_file, header = hdr, object = pks, |
|
397 |
+ outformat = "mzXML") |
|
398 |
+ ) |
|
400 | 399 |
in_file <- openMSfile(out_file, backend = "pwiz") |
401 | 400 |
hdr_2 <- header(in_file) |
402 | 401 |
pks_2 <- peaks(in_file) |
... | ... |
@@ -456,8 +455,10 @@ test_writeMSData <- function() { |
456 | 455 |
hdr_sub$seqNum <- 1:nrow(hdr_sub) |
457 | 456 |
pks_sub <- pks[idx] |
458 | 457 |
fnew <- paste0(test_folder, "test_copyWrite.mzXML") |
459 |
- writeMSData(file = fnew, header = hdr_sub, object = pks_sub, |
|
460 |
- backend = "pwiz", outformat = "mzxml") |
|
458 |
+ suppressWarnings( |
|
459 |
+ writeMSData(file = fnew, header = hdr_sub, object = pks_sub, |
|
460 |
+ backend = "pwiz", outformat = "mzxml") |
|
461 |
+ ) |
|
461 | 462 |
## Check content is same |
462 | 463 |
mzml_new <- openMSfile(fnew, backend = "pwiz") |
463 | 464 |
pks_new <- peaks(mzml_new) |
... | ... |
@@ -535,5 +536,6 @@ test_writeMSData <- function() { |
535 | 536 |
pks_2 <- peaks(in_file) |
536 | 537 |
mzR::close(in_file) |
537 | 538 |
checkEquals(pks, pks_2) |
539 |
+ hdr$centroided <- FALSE |
|
538 | 540 |
checkEquals(hdr, hdr_2) |
539 | 541 |
} |
... | ... |
@@ -112,9 +112,10 @@ |
112 | 112 |
\code{precursorCharge}, \code{precursorIntensity}, |
113 | 113 |
\code{mergedScan}, \code{mergedResultScanNum}, |
114 | 114 |
\code{mergedResultStartScanNum}, \code{mergedResultEndScanNum}, |
115 |
- \code{spectrumId} and |
|
116 |
- \code{injectionTime} (ion injection time, in seconds) when available |
|
117 |
- in the original file. If multiple scans are queried, a |
|
115 |
+ \code{spectrumId}, \code{centroided} (\code{logical} whether the data |
|
116 |
+ of the spectrum is in centroid mode or profile mode; only for pwiz |
|
117 |
+ backend) and \code{injectionTime} (ion injection time, in seconds) |
|
118 |
+ when available in the original file. If multiple scans are queried, a |
|
118 | 119 |
\code{data.frame} is returned with the scans reported along the |
119 | 120 |
rows. |
120 | 121 |
|
... | ... |
@@ -205,6 +205,7 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) |
205 | 205 |
Rcpp::NumericVector ionInjectionTime(N_scans); /* The time spent filling an ion trapping device*/ |
206 | 206 |
Rcpp::StringVector filterString(N_scans); |
207 | 207 |
Rcpp::StringVector spectrumId(N_scans); |
208 |
+ Rcpp::LogicalVector centroided(N_scans); |
|
208 | 209 |
|
209 | 210 |
for (int i = 0; i < N_scans; i++) |
210 | 211 |
{ |
... | ... |
@@ -241,12 +242,14 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) |
241 | 242 |
mergedResultStartScanNum[i] = scanHeader.mergedResultStartScanNum; |
242 | 243 |
mergedResultEndScanNum[i] = scanHeader.mergedResultEndScanNum; |
243 | 244 |
spectrumId[i] = sp->id; |
245 |
+ CVParam prm = sp->cvParamChild(MS_spectrum_representation); |
|
246 |
+ centroided[i] = (prm.cvid==MS_centroid_spectrum ? TRUE : (prm.cvid==MS_profile_spectrum ? FALSE : NA_LOGICAL)); |
|
244 | 247 |
} |
245 | 248 |
// delete adapter issue #64 |
246 | 249 |
delete adapter; |
247 | 250 |
adapter = NULL; |
248 | 251 |
|
249 |
- Rcpp::List header(24); |
|
252 |
+ Rcpp::List header(25); |
|
250 | 253 |
std::vector<std::string> names; |
251 | 254 |
int i = 0; |
252 | 255 |
names.push_back("seqNum"); |
... | ... |
@@ -297,6 +300,8 @@ Rcpp::DataFrame RcppPwiz::getScanHeaderInfo (Rcpp::IntegerVector whichScan) |
297 | 300 |
header[i++] = Rcpp::wrap(filterString); |
298 | 301 |
names.push_back("spectrumId"); |
299 | 302 |
header[i++] = Rcpp::wrap(spectrumId); |
303 |
+ names.push_back("centroided"); |
|
304 |
+ header[i++] = Rcpp::wrap(centroided); |
|
300 | 305 |
|
301 | 306 |
header.attr("names") = names; |
302 | 307 |
|
... | ... |
@@ -610,6 +615,7 @@ void RcppPwiz::addSpectrumList(MSData& msd, |
610 | 615 |
Rcpp::NumericVector ionInjectionTime = spctr_header["injectionTime"]; |
611 | 616 |
Rcpp::StringVector filterString = spctr_header["filterString"]; |
612 | 617 |
Rcpp::StringVector spectrumId = spctr_header["spectrumId"]; |
618 |
+ Rcpp::LogicalVector centroided = spctr_header["centroided"]; |
|
613 | 619 |
|
614 | 620 |
// From MSnbase::Spectrum Column in the header |
615 | 621 |
// msLevel integer $msLevel |
... | ... |
@@ -656,6 +662,11 @@ void RcppPwiz::addSpectrumList(MSData& msd, |
656 | 662 |
spct.set(MS_base_peak_m_z, basePeakMZ[i]); |
657 | 663 |
spct.set(MS_base_peak_intensity, basePeakIntensity[i]); |
658 | 664 |
spct.set(MS_total_ion_current, totIonCurrent[i]); |
665 |
+ // centroided |
|
666 |
+ if (centroided[i] != NA_LOGICAL && centroided[i] == TRUE) |
|
667 |
+ spct.set(MS_centroid_spectrum); |
|
668 |
+ if (centroided[i] != NA_LOGICAL && centroided[i] == FALSE) |
|
669 |
+ spct.set(MS_profile_spectrum); |
|
659 | 670 |
// TODO: |
660 | 671 |
// [X] seqNum: number observed in file. |
661 | 672 |
spct.index = seqNum[i] - 1; // Or just i? |