Browse code

Enable MS data write support

- export writeMSData and copyWriteMSData functions.
- Add related unit tests and documentation.

jotsetung authored on 23/06/2017 14:15:37
Showing 9 changed files

... ...
@@ -28,3 +28,4 @@ URL: https://github.com/sneumann/mzR/
28 28
 BugReports: https://github.com/sneumann/mzR/issues/
29 29
 biocViews: Infrastructure, DataImport, Proteomics, Metabolomics,
30 30
         MassSpectrometry
31
+RoxygenNote: 6.0.1
... ...
@@ -11,7 +11,9 @@ import(Rcpp)
11 11
 export(openMSfile,
12 12
        openIDfile,
13 13
        pwiz.version,
14
-       nChrom)
14
+       nChrom,
15
+       writeMSData,
16
+       copyWriteMSData)
15 17
 
16 18
 exportMethods(close,
17 19
               isInitialized,
... ...
@@ -1,6 +1,7 @@
1 1
 CHANGES IN VERSION 2.11.4
2 2
 -------------------------
3
- o nothing yet
3
+ o writeMSData and copyWriteMSData functions enabling to export MS data to
4
+   mzML or mzXML files.
4 5
  
5 6
 CHANGES IN VERSION 2.11.3
6 7
 -------------------------
... ...
@@ -73,7 +73,7 @@ openIDfile <- function(filename, verbose = FALSE) {
73 73
 #'     for writing. Currently only \code{"pwiz"} backend is supported.
74 74
 #'
75 75
 #' @param outformat \code{character(1)} the format of the output file. One of
76
-#'     \code{"mzml"}, \code{"mzxml"} and \code{"mgf"}.
76
+#'     \code{"mzml"} or \code{"mzxml"}..
77 77
 #'
78 78
 #' @param rtime_seconds \code{logical(1)} whether the retention time is provided
79 79
 #'     in seconds or minutes (defaults to \code{TRUE}).
... ...
@@ -89,13 +89,40 @@ openIDfile <- function(filename, verbose = FALSE) {
89 89
 #'     performed with the software.
90 90
 #' 
91 91
 #' @author Johannes Rainer
92
+#'
93
+#' @seealso \code{\link{copyWriteMSData}} for a function to copy general
94
+#'     information from a MS data file and writing eventually modified MS data
95
+#'     from that originating file.
96
+#' 
97
+#' @examples
98
+#'
99
+#' ## Open a MS file and read the spectrum and header information
100
+#' library(msdata)
101
+#' fl <- system.file("threonine", "threonine_i2_e35_pH_tree.mzXML",
102
+#'     package = "msdata")
103
+#' ms_fl <- openMSfile(fl, backend = "pwiz")
104
+#'
105
+#' ## Get the spectra
106
+#' pks <- spectra(ms_fl)
107
+#' ## Get the header
108
+#' hdr <- header(ms_fl)
109
+#'
110
+#' ## Modify the spectrum data adding 100 to each intensity.
111
+#' pks <- lapply(pks, function(z) {
112
+#'     z[, 2] <- z[, 2] + 100
113
+#'     z
114
+#' })
115
+#'
116
+#' ## Write the data to a mzML file.
117
+#' out_file <- tempfile()
118
+#' writeMSData(filename = out_file, header = hdr, data = pks)
92 119
 writeMSData <- function(filename, header, data, backend = "pwiz",
93 120
                         outformat = c("mzml"),
94 121
                         rtime_seconds = TRUE,
95 122
                         software_processing) {
96 123
     backend <- match.arg(backend)
97 124
     ## supp_formats <- c("mzml", "mgf", "mzxml")
98
-    supp_formats <- "mzml"
125
+    supp_formats <- c("mzml", "mzxml")
99 126
     outformat <- match.arg(tolower(outformat), supp_formats)
100 127
     if (missing(filename))
101 128
         stop("'filename' is a required parameter")
... ...
@@ -118,6 +145,8 @@ writeMSData <- function(filename, header, data, backend = "pwiz",
118 145
         mzR <- c(mzR, "MS:1000545")
119 146
     software_processing <- c(software_processing, list(mzR))
120 147
     if (backend == "pwiz") {
148
+        if (outformat == "mzxml" & any(header$injectionTime > 0))
149
+            warning("mzXML export does not support writing ion injection time")
121 150
         pwizModule <- new(Pwiz)
122 151
         pwizModule$writeSpectrumList(filename, outformat,
123 152
                                      header, data, rtime_seconds,
... ...
@@ -136,35 +165,64 @@ writeMSData <- function(filename, header, data, backend = "pwiz",
136 165
 #'     (eventually) manipulated spectra and header data with arguments
137 166
 #'     \code{header} and \code{data}.
138 167
 #'
168
+#' @note \code{copyWriteMSData} supports at present copying data from
169
+#'     \code{mzXML} and \code{mzML} and exporting to \code{mzML}. Export to
170
+#'     \code{mzXML} can fail for some input files.
171
+#'
139 172
 #' @note This function does not allow to write new MS files with new content.
140 173
 #'     Use the \code{\link{writeMSData}} function for that.
141 174
 #'
142 175
 #' @inheritParams writeMSData
143 176
 #' 
144
-#' @param originalFile \code{character(1)} with the name of the original file
177
+#' @param original_file \code{character(1)} with the name of the original file
145 178
 #'     from which the spectrum data was first read.
146 179
 #'
147 180
 #' @seealso \code{\link{writeMSData}} for a function to save MS data to a new
148 181
 #'     mzML or mzXML file.
149 182
 #' 
150 183
 #' @author Johannes Rainer
151
-copyWriteMSData <- function(filename, originalFile, header, data,
184
+#'
185
+#' @examples
186
+#' 
187
+#' ## Open a MS file and read the spectrum and header information
188
+#' library(msdata)
189
+#' fl <- system.file("threonine", "threonine_i2_e35_pH_tree.mzXML",
190
+#'     package = "msdata")
191
+#' ms_fl <- openMSfile(fl, backend = "pwiz")
192
+#'
193
+#' ## Get the spectra
194
+#' pks <- spectra(ms_fl)
195
+#' ## Get the header
196
+#' hdr <- header(ms_fl)
197
+#'
198
+#' ## Modify the spectrum data adding 100 to each intensity.
199
+#' pks <- lapply(pks, function(z) {
200
+#'     z[, 2] <- z[, 2] + 100
201
+#'     z
202
+#' })
203
+#'
204
+#' ## Copy metadata and additional information from the originating file
205
+#' ## and save it, along with the modified data, to a new mzML file.
206
+#' out_file <- tempfile()
207
+#' copyWriteMSData(filename = out_file, original_file = fl,
208
+#'     header = hdr, data = pks)
209
+copyWriteMSData <- function(filename, original_file, header, data,
152 210
                             backend = "pwiz",
153 211
                             outformat = "mzml",
154 212
                             rtime_seconds = TRUE,
155 213
                             software_processing) {
156 214
     backend <- match.arg(backend)
157 215
     ## supp_formats <- c("mzml", "mgf", "mzxml")
158
-    supp_formats <- "mzml"
216
+    supp_formats <- c("mzml", "mzxml")
159 217
     outformat <- match.arg(tolower(outformat), supp_formats)
160 218
     if (missing(filename))
161 219
         stop("'filename' is a required parameter")
162
-    if (missing(originalFile))
163
-        stop("'originalFile' is a required parameter")
220
+    if (missing(original_file))
221
+        stop("'original_file' is a required parameter")
164 222
     if (missing(header) | missing(data))
165 223
         stop("'header' and 'data' are required")
166
-    if (!file.exists(originalFile))
167
-        stop("Original file ", originalFile, " not found")
224
+    if (!file.exists(original_file))
225
+        stop("Original file ", original_file, " not found")
168 226
     ## Other checks:
169 227
     header <- .validateHeader(header)
170 228
     if (is(header, "character"))
... ...
@@ -182,8 +240,12 @@ copyWriteMSData <- function(filename, originalFile, header, data,
182 240
         mzR <- c(mzR, "MS:1000545")
183 241
     software_processing <- c(software_processing, list(mzR))
184 242
     if (backend == "pwiz") {
243
+        if (outformat == "mzxml" & any(header$injectionTime > 0)) {
244
+            warning("mzXML export does not support writing ion injection time")
245
+            header$injectionTime = 0
246
+        }
185 247
         pwizModule <- new(Pwiz)
186
-        pwizModule$copyWriteMSfile(filename, outformat, originalFile,
248
+        pwizModule$copyWriteMSfile(filename, outformat, original_file,
187 249
                                    header, data, rtime_seconds,
188 250
                                    software_processing)
189 251
     }
... ...
@@ -1,4 +1,4 @@
1
-BUILT_RCPP_VERSION = package_version("0.12.10")
1
+BUILT_RCPP_VERSION = package_version("0.12.11")
2 2
 
3 3
 .onLoad <-
4 4
     function(libname, pkgname) {
... ...
@@ -5,40 +5,48 @@ dontrun_copyWriteMSData <- function() {
5 5
     library(mzR)
6 6
     library(RUnit)
7 7
     test_folder = "/Users/jo/Desktop/"
8
+
9
+    ## INPUT: mzXML
8 10
     orig_file <- system.file("threonine", "threonine_i2_e35_pH_tree.mzXML",
9 11
                         package = "msdata")
10 12
     mzxml <- openMSfile(orig_file, backend = "pwiz")
11 13
     pks <- peaks(mzxml)
12 14
     hdr <- header(mzxml)
15
+    ii <- mzR::instrumentInfo(mzxml)
13 16
     mzR::close(mzxml)
14 17
 
18
+    ## OUTPUT: mzML
15 19
     fnew <- paste0(test_folder, "test_copyWrite.mzML")
16
-    mzR:::copyWriteMSData(filename = fnew, originalFile = orig_file,
20
+    mzR:::copyWriteMSData(filename = fnew, original_file = orig_file,
17 21
                           header = hdr, data = pks, backend = "pwiz")
18 22
     ## Check content is same
19 23
     mzml_new <- openMSfile(fnew, backend = "pwiz")
20 24
     pks_new <- peaks(mzml_new)
21 25
     hdr_new <- header(mzml_new)
26
+    ii_new <- mzR::instrumentInfo(mzml_new)
22 27
     mzR::close(mzml_new)
23 28
     checkEquals(pks_new, pks)
24
-    checkEquals(hdr_new, hdr)  ## polarity is OK here
25
-
26
-    ## Save as mzXML
27
-    ## fnew <- paste0(test_folder, "test_copyWrite.mzXML")
28
-    ## mzR:::copyWriteMSData(filename = fnew, originalFile = orig_file,
29
-    ##                       header = hdr, data = pks, backend = "pwiz",
30
-    ##                       outformat = "mzxml")
31
-    ## ## Check content is same
32
-    ## mzml_new <- openMSfile(fnew, backend = "pwiz")
33
-    ## pks_new <- peaks(mzml_new)
34
-    ## hdr_new <- header(mzml_new)
35
-    ## mzR::close(mzml_new)
36
-    ## checkEquals(pks_new, pks)
37
-    ## checkEquals(hdr_new, hdr)  ## polarity is OK here
29
+    checkEquals(hdr_new, hdr)
30
+    checkEquals(ii, ii_new)
31
+    
32
+    ## OUTPUT: mzXML
33
+    fnew <- paste0(test_folder, "test_copyWrite.mzXML")
34
+    mzR:::copyWriteMSData(filename = fnew, original_file = orig_file,
35
+                          header = hdr, data = pks, backend = "pwiz",
36
+                          outformat = "mzxml")
37
+    ## Check content is same
38
+    mzml_new <- openMSfile(fnew, backend = "pwiz")
39
+    pks_new <- peaks(mzml_new)
40
+    hdr_new <- header(mzml_new)
41
+    ii_new <- mzR::instrumentInfo(mzml_new)
42
+    mzR::close(mzml_new)
43
+    checkEquals(pks_new, pks)
44
+    checkEquals(hdr_new, hdr)
45
+    checkEquals(ii, ii_new)
38 46
 
39 47
     ## Save as mgf
40 48
     ## fnew <- paste0(test_folder, "test_copyWrite.mgf")
41
-    ## mzR:::copyWriteMSData(filename = fnew, originalFile = orig_file,
49
+    ## mzR:::copyWriteMSData(filename = fnew, original_file = orig_file,
42 50
     ##                       header = hdr, data = pks, backend = "pwiz",
43 51
     ##                       outformat = "mgf")
44 52
     ## ## Check content is same
... ...
@@ -55,13 +63,12 @@ dontrun_copyWriteMSData <- function() {
55 63
     fnew <- paste0(test_folder, "test_copyWrite.mzML")
56 64
     ## index is not OK after subsetting
57 65
     checkException(mzR:::copyWriteMSData(filename = fnew,
58
-                                         originalFile = orig_file,
66
+                                         original_file = orig_file,
59 67
                                          header = hdr_sub, data = pks_sub,
60 68
                                          backend = "pwiz"))
61 69
     hdr_sub$seqNum <- seq_len(nrow(hdr_sub))
62
-    mzR:::copyWriteMSData(filename = fnew, originalFile = orig_file,
70
+    mzR:::copyWriteMSData(filename = fnew, original_file = orig_file,
63 71
                           header = hdr_sub, data = pks_sub, backend = "pwiz")
64
-
65 72
     ## Check content is same
66 73
     mzml_new <- openMSfile(fnew, backend = "pwiz")
67 74
     pks_new <- peaks(mzml_new)
... ...
@@ -78,43 +85,46 @@ dontrun_copyWriteMSData <- function() {
78 85
     ## wrong spectra.
79 86
     ## wrong data processing.
80 87
     checkException(mzR:::copyWriteMSData(filename = fnew,
81
-                                         originalFile = orig_file,
88
+                                         original_file = orig_file,
82 89
                                          header = pks, data = hdr,
83 90
                                          backend = "pwiz"))
84 91
     checkException(mzR:::copyWriteMSData(filename = fnew,
85
-                                         originalFile = orig_file,
92
+                                         original_file = orig_file,
86 93
                                          header = hdr, data = hdr,
87 94
                                          backend = "pwiz"))
88 95
     checkException(mzR:::copyWriteMSData(filename = fnew,
89
-                                         originalFile = orig_file,
96
+                                         original_file = orig_file,
90 97
                                          header = hdr, data = pks,
91 98
                                          backend = "Ramp"))
92 99
     checkException(mzR:::copyWriteMSData(filename = fnew,
93
-                                         originalFile = "somefile",
100
+                                         original_file = "somefile",
94 101
                                          header = hdr, data = pks,
95 102
                                          backend = "pwiz"))
96 103
     checkException(mzR:::copyWriteMSData(filename = fnew,
97
-                                         originalFile = orig_file,
104
+                                         original_file = orig_file,
98 105
                                          header = hdr, data = pks,
99 106
                                          backend = "pwiz",
100 107
                                          software_processing = c("other")))
101 108
     
102
-    ## mzML input file.
109
+    ## INPUT: mzML
103 110
     orig_file <- system.file("proteomics",
104 111
                              "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML.gz",
105 112
                              package = "msdata")
106
-    mzxml <- openMSfile(orig_file, backend = "pwiz")
107
-    pks <- peaks(mzxml)
108
-    hdr <- header(mzxml)
109
-    mzR::close(mzxml)
113
+    fl <- openMSfile(orig_file, backend = "pwiz")
114
+    pks <- peaks(fl)
115
+    hdr <- header(fl)
116
+    ii <- mzR::instrumentInfo(fl)
117
+    mzR::close(fl)
110 118
 
119
+    ## OUTPUT: mzML
111 120
     fnew <- paste0(test_folder, "test_copyWrite.mzML")
112
-    mzR:::copyWriteMSData(filename = fnew, originalFile = orig_file,
121
+    mzR:::copyWriteMSData(filename = fnew, original_file = orig_file,
113 122
                           header = hdr, data = pks, backend = "pwiz")
114 123
     ## Check content is same
115 124
     mzml_new <- openMSfile(fnew, backend = "pwiz")
116 125
     pks_new <- peaks(mzml_new)
117 126
     hdr_new <- header(mzml_new)
127
+    ii_new <- mzR::instrumentInfo(mzml_new)
118 128
     mzR::close(mzml_new)
119 129
     checkEquals(pks_new, pks)
120 130
     ## acquisitionNum and precursorScanNum will be different, replace with
... ...
@@ -124,6 +134,66 @@ dontrun_copyWriteMSData <- function() {
124 134
     hdr_new$acquisitionNum <- as.integer(factor(hdr_new$acquisitionNum))
125 135
     hdr_new$precursorScanNum <- as.integer(factor(hdr_new$precursorScanNum))
126 136
     checkEquals(hdr_new, hdr)  ## polarity is OK here
137
+    checkEquals(ii, ii_new)
138
+    
139
+    ## OUTPUT: mzXML
140
+    fnew <- paste0(test_folder, "test_copyWrite.mzXML")
141
+    mzR:::copyWriteMSData(filename = fnew, original_file = orig_file,
142
+                          header = hdr, data = pks, backend = "pwiz",
143
+                          outformat = "mzxml")
144
+    ## Check content is same
145
+    mzml_new <- openMSfile(fnew, backend = "pwiz")
146
+    pks_new <- peaks(mzml_new)
147
+    hdr_new <- header(mzml_new)
148
+    ii_new <- mzR::instrumentInfo(mzml_new)
149
+    mzR::close(mzml_new)
150
+    checkEquals(pks_new, pks)
151
+    ## acquisitionNum and precursorScanNum will be different, replace with
152
+    ## factors - order and all has to be the same though.
153
+    hdr$acquisitionNum <- as.integer(factor(hdr$acquisitionNum))
154
+    hdr$precursorScanNum <- as.integer(factor(hdr$precursorScanNum))
155
+    hdr_new$acquisitionNum <- as.integer(factor(hdr_new$acquisitionNum))
156
+    hdr_new$precursorScanNum <- as.integer(factor(hdr_new$precursorScanNum))
157
+    rt_col <- which(colnames(hdr) == "retentionTime")
158
+    checkEquals(hdr[, rt_col], hdr_new[, rt_col], tolerance = 0.01)
159
+    hdr$injectionTime <- 0  ## injectionTime export not supported.
160
+    checkEquals(hdr[ , -rt_col], hdr_new[ , -rt_col])
161
+    ## checkEquals(ii, ii_new)
162
+
163
+    ## Other mzML:
164
+    test_file <- system.file("microtofq", "MM14.mzML", package = "msdata")
165
+    in_file <- openMSfile(test_file, backend = "pwiz")
166
+    hdr <- header(in_file)
167
+    pks <- peaks(in_file)
168
+    ii <- mzR::instrumentInfo(in_file)
169
+    mzR::close(in_file)
170
+    
171
+    ## mzML
172
+    out_file <- paste0(test_folder, "test_copyWrite.mzML")
173
+    mzR:::copyWriteMSData(filename = out_file, original_file = test_file,
174
+                          header = hdr, data = pks,
175
+                          software_processing = c("MSnbase", "2.3.8"))
176
+    in_file <- openMSfile(out_file, backend = "pwiz")
177
+    hdr_2 <- header(in_file)
178
+    pks_2 <- peaks(in_file)
179
+    ii_2 <- mzR::instrumentInfo(in_file)
180
+    mzR::close(in_file)
181
+    checkEquals(hdr, hdr_2)
182
+    checkEquals(pks, pks_2)
183
+    checkEquals(ii, ii_2)
184
+    
185
+    ## mzXML output:
186
+    out_file <- paste0(test_folder, "test_copyWrite.mzXML")
187
+    mzR:::copyWriteMSData(filename = out_file, original_file = test_file,
188
+                          header = hdr, data = pks, outformat = "mzXML",
189
+                          software_processing = c("MSnbase", "2.3.8"))
190
+    in_file <- openMSfile(out_file, backend = "pwiz")
191
+    hdr_2 <- header(in_file)
192
+    pks_2 <- peaks(in_file)
193
+    mzR::close(in_file)
194
+    checkEquals(hdr, hdr_2)
195
+    checkEquals(pks, pks_2)
196
+    checkEquals(ii, ii_2)
127 197
 }
128 198
 
129 199
 dontrun_test_writeMSData <- function() {
... ...
@@ -131,6 +201,7 @@ dontrun_test_writeMSData <- function() {
131 201
     library(mzR)
132 202
     library(RUnit)
133 203
     test_folder = "/Users/jo/Desktop/"
204
+    ## Input: mzXML
134 205
     test_file <- system.file("threonine", "threonine_i2_e35_pH_tree.mzXML",
135 206
                              package = "msdata")
136 207
     in_file <- openMSfile(test_file, backend = "pwiz")
... ...
@@ -138,7 +209,7 @@ dontrun_test_writeMSData <- function() {
138 209
     pks <- peaks(in_file)
139 210
     mzR::close(in_file)
140 211
 
141
-    ## Test writing the data.
212
+    ## mzML
142 213
     out_file <- paste0(test_folder, "test_write.mzML")
143 214
     mzR:::writeMSData(filename = out_file, header = hdr, data = pks)
144 215
     in_file <- openMSfile(out_file, backend = "pwiz")
... ...
@@ -161,13 +232,113 @@ dontrun_test_writeMSData <- function() {
161 232
     checkEquals(pks, pks_2)
162 233
     
163 234
     ## mzXML output:
164
-    ## out_file <- paste0(test_folder, "test_write.mzXML")
235
+    out_file <- paste0(test_folder, "test_write.mzXML")
236
+    mzR:::writeMSData(filename = out_file, header = hdr, data = pks,
237
+                      outformat = "mzXML")
238
+    in_file <- openMSfile(out_file, backend = "pwiz")
239
+    hdr_2 <- header(in_file)
240
+    pks_2 <- peaks(in_file)
241
+    mzR::close(in_file)
242
+    checkEquals(hdr, hdr_2)
243
+    checkEquals(pks, pks_2)
244
+
245
+    ## mgf output:
246
+    ## out_file <- paste0(test_folder, "test_write.mgf")
165 247
     ## mzR:::writeMSData(filename = out_file, header = hdr, data = pks,
166
-    ##                   outformat = "mzXML")
248
+    ##                   outformat = "mgf")
167 249
     ## in_file <- openMSfile(out_file, backend = "pwiz")
168 250
     ## hdr_2 <- header(in_file)
169 251
     ## pks_2 <- peaks(in_file)
170 252
     ## mzR::close(in_file)
171 253
     ## checkEquals(hdr, hdr_2)
172 254
     ## checkEquals(pks, pks_2)
255
+
256
+    ## Input: mzML
257
+    test_file <- system.file("proteomics",
258
+                             "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML.gz",
259
+                             package = "msdata")
260
+    in_file <- openMSfile(test_file, backend = "pwiz")
261
+    hdr <- header(in_file)
262
+    pks <- peaks(in_file)
263
+    mzR::close(in_file)
264
+    
265
+    ## mzML
266
+    out_file <- paste0(test_folder, "test_write.mzML")
267
+    mzR:::writeMSData(filename = out_file, header = hdr, data = pks)
268
+    in_file <- openMSfile(out_file, backend = "pwiz")
269
+    hdr_2 <- header(in_file)
270
+    pks_2 <- peaks(in_file)
271
+    mzR::close(in_file)
272
+    checkEquals(hdr, hdr_2)
273
+    checkEquals(pks, pks_2)
274
+
275
+    ## mzXML output:
276
+    out_file <- paste0(test_folder, "test_write.mzXML")
277
+    mzR:::writeMSData(filename = out_file, header = hdr, data = pks,
278
+                      outformat = "mzXML")
279
+    in_file <- openMSfile(out_file, backend = "pwiz")
280
+    hdr_2 <- header(in_file)
281
+    pks_2 <- peaks(in_file)
282
+    mzR::close(in_file)
283
+    rt_col <- which(colnames(hdr) == "retentionTime")
284
+    checkEquals(hdr[, rt_col], hdr_2[, rt_col], tolerance = 0.01)
285
+    hdr$injectionTime <- 0  ## injectionTime export not supported.
286
+    checkEquals(hdr[ , -rt_col], hdr_2[ , -rt_col])
287
+    checkEquals(pks, pks_2)
288
+
289
+    ## Other mzML:
290
+    test_file <- system.file("microtofq", "MM14.mzML", package = "msdata")
291
+    in_file <- openMSfile(test_file, backend = "pwiz")
292
+    hdr <- header(in_file)
293
+    pks <- peaks(in_file)
294
+    mzR::close(in_file)
295
+    
296
+    ## mzML
297
+    out_file <- paste0(test_folder, "test_write.mzML")
298
+    mzR:::writeMSData(filename = out_file, header = hdr, data = pks)
299
+    in_file <- openMSfile(out_file, backend = "pwiz")
300
+    hdr_2 <- header(in_file)
301
+    pks_2 <- peaks(in_file)
302
+    mzR::close(in_file)
303
+    checkEquals(hdr, hdr_2)
304
+    checkEquals(pks, pks_2)
305
+
306
+    ## mzXML output:
307
+    out_file <- paste0(test_folder, "test_write.mzXML")
308
+    mzR:::writeMSData(filename = out_file, header = hdr, data = pks,
309
+                      outformat = "mzXML")
310
+    in_file <- openMSfile(out_file, backend = "pwiz")
311
+    hdr_2 <- header(in_file)
312
+    pks_2 <- peaks(in_file)
313
+    mzR::close(in_file)
314
+    checkEquals(hdr, hdr_2)
315
+    checkEquals(pks, pks_2)
316
+
317
+    ## mzData:
318
+    test_file <- system.file("iontrap", "extracted.mzData", package = "msdata")
319
+    in_file <- openMSfile(test_file, backend = "Ramp")
320
+    hdr <- header(in_file)
321
+    pks <- peaks(in_file)
322
+    mzR::close(in_file)
323
+
324
+    ## mzML
325
+    out_file <- paste0(test_folder, "test_write.mzML")
326
+    mzR:::writeMSData(filename = out_file, header = hdr, data = pks)
327
+    in_file <- openMSfile(out_file, backend = "pwiz")
328
+    hdr_2 <- header(in_file)
329
+    pks_2 <- peaks(in_file)
330
+    mzR::close(in_file)
331
+    checkEquals(hdr, hdr_2)
332
+    checkEquals(pks, pks_2)
333
+
334
+    ## mzXML output:
335
+    out_file <- paste0(test_folder, "test_write.mzXML")
336
+    mzR:::writeMSData(filename = out_file, header = hdr, data = pks,
337
+                      outformat = "mzXML")
338
+    in_file <- openMSfile(out_file, backend = "pwiz")
339
+    hdr_2 <- header(in_file)
340
+    pks_2 <- peaks(in_file)
341
+    mzR::close(in_file)
342
+    checkEquals(hdr, hdr_2)
343
+    checkEquals(pks, pks_2)
173 344
 }
... ...
@@ -131,6 +131,9 @@
131 131
 \seealso{
132 132
   \code{\link{instrumentInfo}} for metadata access and the
133 133
   \code{"\linkS4class{mzR}"} class.
134
+
135
+  \code{\link{writeMSData}} and \code{\link{copyWriteMSData}} for
136
+  functions to write MS data in \emph{mzML} or \code{mzXML} format.
134 137
 }
135 138
 
136 139
 \author{
137 140
deleted file mode 100644
... ...
@@ -1,17 +0,0 @@
1
-#include <Rcpp.h>
2
-#include "pwiz/data/msdata/Version.hpp"
3
-
4
-using namespace Rcpp;
5
-
6
-//' Get Proteowizard Version
7
-//' 
8
-//' Description
9
-//' Return the pwiz::msdata::Version as String
10
-RcppExport SEXP mzR_pwiz_version() {
11
-BEGIN_RCPP
12
-    Rcpp::RObject __result;
13
-    Rcpp::RNGScope __rngScope;
14
-    __result = Rcpp::wrap(pwiz::msdata::Version::str());
15
-    return __result;
16
-END_RCPP
17
-}
... ...
@@ -366,33 +366,13 @@ void RcppPwiz::copyWriteMSfile(const string& file, const string& format,
366 366
   MSData newmsd;
367 367
   newmsd.cvs = defaultCVList();
368 368
 
369
-  // Break the header down into its elements/columns:
370
-  // Rcpp::IntegerVector seqNum = spctr_header["seqNum"];
371
-  // Rcpp::IntegerVector acquisitionNum = spctr_header["acquisitionNum"];
372 369
   Rcpp::IntegerVector msLevel = spctr_header["msLevel"];
373
-  // Rcpp::IntegerVector polarity = spctr_header["polarity"];
374
-  // Rcpp::IntegerVector peaksCount = spctr_header["peaksCount"];
375
-  // Rcpp::NumericVector totIonCurrent = spctr_header["totIonCurrent"];
376
-  // Rcpp::NumericVector retentionTime = spctr_header["retentionTime"];
377
-  // Rcpp::NumericVector basePeakMZ = spctr_header["basePeakMZ"];
378
-  // Rcpp::NumericVector basePeakIntensity = spctr_header["basePeakIntensity"];
379
-  // Rcpp::NumericVector collisionEnergy = spctr_header["collisionEnergy"];
380
-  // Rcpp::NumericVector ionisationEnergy = spctr_header["ionisationEnergy"];
381
-  // Rcpp::NumericVector lowMZ = spctr_header["lowMZ"];
382
-  // Rcpp::NumericVector highMZ = spctr_header["highMZ"];
383
-  // Rcpp::IntegerVector precursorScanNum = spctr_header["precursorScanNum"];
384
-  // Rcpp::NumericVector precursorMZ = spctr_header["precursorMZ"];
385
-  // Rcpp::IntegerVector precursorCharge = spctr_header["precursorCharge"];
386
-  // Rcpp::NumericVector precursorIntensity = spctr_header["precursorIntensity"];
387
-  // Rcpp::IntegerVector mergedScan = spctr_header["mergedScan"];
388
-  // // Skipping mergedResultScanNum, mergedResultStartScanNum and mergedResultEndScanNum
389
-  // Rcpp::NumericVector ionInjectionTime = spctr_header["injectionTime"];
390
-
391 370
   // Copy data from the original file.
392 371
   // o fileDescription with: fileContent, sourceFileList
393
-  //   TODO: if we did filtering on MS or centroiding we might have to adapt the
394
-  //   fileContent.
395
-  newmsd.fileDescription = msd->fileDescription;
372
+  //   NOTE: don't copy the file description for mzXML export - somehow the
373
+  //   spectra data will then not be written.
374
+  if (format != "mzxml")
375
+    newmsd.fileDescription = msd->fileDescription;
396 376
   bool is_ms1 = false;
397 377
   bool is_msn = false;
398 378
   for (int i = 0; i < msLevel.size(); i++) {
... ...
@@ -405,15 +385,13 @@ void RcppPwiz::copyWriteMSfile(const string& file, const string& format,
405 385
     newmsd.fileDescription.fileContent.set(MS_MS1_spectrum);
406 386
   if (is_msn)
407 387
     newmsd.fileDescription.fileContent.set(MS_MSn_spectrum);
408
-  // The serializer adds also the original file here AND the newly written file.
409 388
 
410 389
   // o paramGroupList
411
-  newmsd.paramGroupPtrs = msd->paramGroupPtrs;
390
+  if (format != "mzxml")
391
+    newmsd.paramGroupPtrs = msd->paramGroupPtrs;
412 392
   // o sampleList
413 393
   newmsd.samplePtrs = msd->samplePtrs;
414 394
   // o instrumentConfigurationList
415
-  // vector<InstrumentConfigurationPtr> icp = msd->instrumentConfigurationPtrs;
416
-  // newmsd.instrumentConfigurationPtrs = icp;
417 395
   newmsd.instrumentConfigurationPtrs = msd->instrumentConfigurationPtrs;
418 396
   // o softwareList
419 397
   newmsd.softwarePtrs = msd->softwarePtrs;
... ...
@@ -425,26 +403,28 @@ void RcppPwiz::copyWriteMSfile(const string& file, const string& format,
425 403
       addDataProcessing(newmsd, Rcpp::as<Rcpp::StringVector>(software_processing(sp)));
426 404
     }
427 405
   }
428
-
406
+  
429 407
   // o run
430 408
   // Initialize the run and fill with data from the original file.
431 409
   Run &original_run = msd->run;
432 410
   newmsd.run.id = original_run.id;
433
-  newmsd.run.defaultInstrumentConfigurationPtr =
434
-    original_run.defaultInstrumentConfigurationPtr;
435
-  newmsd.run.samplePtr = original_run.samplePtr;
436
-  newmsd.run.startTimeStamp = original_run.startTimeStamp;
437
-  newmsd.run.defaultSourceFilePtr = original_run.defaultSourceFilePtr;
411
+  if (format != "mzxml") {
412
+    newmsd.run.defaultInstrumentConfigurationPtr =
413
+      original_run.defaultInstrumentConfigurationPtr;
414
+    newmsd.run.samplePtr = original_run.samplePtr;
415
+    newmsd.run.startTimeStamp = original_run.startTimeStamp;
416
+    newmsd.run.defaultSourceFilePtr = original_run.defaultSourceFilePtr;
417
+  }
438 418
   // Now filling with new data
439 419
   addSpectrumList(newmsd, spctr_header, spctr_data, rtime_seconds);
440 420
   
441
-  if(format == "mgf") {
421
+  if (format == "mgf") {
442 422
     std::ofstream* mgfOutFileP = new std::ofstream(file.c_str());
443 423
     Serializer_MGF serializerMGF;
444 424
     serializerMGF.write(*mgfOutFileP, newmsd);
445 425
     mgfOutFileP->flush();
446 426
     mgfOutFileP->close();
447
-  } else if(format == "mzxml") {
427
+  } else if (format == "mzxml") {
448 428
     std::ofstream mzXMLOutFileP(file.c_str());
449 429
     Serializer_mzXML::Config config;
450 430
     config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib;
... ...
@@ -452,7 +432,7 @@ void RcppPwiz::copyWriteMSfile(const string& file, const string& format,
452 432
     serializerMzXML.write(mzXMLOutFileP, newmsd);
453 433
     mzXMLOutFileP.flush();
454 434
     mzXMLOutFileP.close();
455
-  } else if(format == "mzml") {
435
+  } else if (format == "mzml") {
456 436
     std::ofstream mzXMLOutFileP(file.c_str());
457 437
     Serializer_mzML::Config config;
458 438
     config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib;
... ...
@@ -477,28 +457,7 @@ void RcppPwiz::writeSpectrumList(const string& file, const string& format,
477 457
   MSData newmsd;
478 458
   newmsd.cvs = defaultCVList();
479 459
 
480
-  // Break the header down into its elements/columns:
481
-  // Rcpp::IntegerVector seqNum = spctr_header["seqNum"];
482
-  // Rcpp::IntegerVector acquisitionNum = spctr_header["acquisitionNum"];
483 460
   Rcpp::IntegerVector msLevel = spctr_header["msLevel"];
484
-  // Rcpp::IntegerVector polarity = spctr_header["polarity"];
485
-  // Rcpp::IntegerVector peaksCount = spctr_header["peaksCount"];
486
-  // Rcpp::NumericVector totIonCurrent = spctr_header["totIonCurrent"];
487
-  // Rcpp::NumericVector retentionTime = spctr_header["retentionTime"];
488
-  // Rcpp::NumericVector basePeakMZ = spctr_header["basePeakMZ"];
489
-  // Rcpp::NumericVector basePeakIntensity = spctr_header["basePeakIntensity"];
490
-  // Rcpp::NumericVector collisionEnergy = spctr_header["collisionEnergy"];
491
-  // Rcpp::NumericVector ionisationEnergy = spctr_header["ionisationEnergy"];
492
-  // Rcpp::NumericVector lowMZ = spctr_header["lowMZ"];
493
-  // Rcpp::NumericVector highMZ = spctr_header["highMZ"];
494
-  // Rcpp::IntegerVector precursorScanNum = spctr_header["precursorScanNum"];
495
-  // Rcpp::NumericVector precursorMZ = spctr_header["precursorMZ"];
496
-  // Rcpp::IntegerVector precursorCharge = spctr_header["precursorCharge"];
497
-  // Rcpp::NumericVector precursorIntensity = spctr_header["precursorIntensity"];
498
-  // Rcpp::IntegerVector mergedScan = spctr_header["mergedScan"];
499
-  // // Skipping mergedResultScanNum, mergedResultStartScanNum and mergedResultEndScanNum
500
-  // Rcpp::NumericVector ionInjectionTime = spctr_header["injectionTime"];
501
-
502 461
   bool is_ms1 = false;
503 462
   bool is_msn = false;
504 463
   for (int i = 0; i < msLevel.size(); i++) {
... ...
@@ -519,18 +478,18 @@ void RcppPwiz::writeSpectrumList(const string& file, const string& format,
519 478
     }
520 479
   }
521 480
   
522
-  newmsd.run.id = "Experiment 1";
481
+  newmsd.run.id = "Experiment_1";
523 482
 
524 483
   // Now filling with new data
525 484
   addSpectrumList(newmsd, spctr_header, spctr_data, rtime_seconds);
526 485
 
527
-  if(format == "mgf") {
486
+  if (format == "mgf") {
528 487
     std::ofstream* mgfOutFileP = new std::ofstream(file.c_str());
529 488
     Serializer_MGF serializerMGF;
530 489
     serializerMGF.write(*mgfOutFileP, newmsd);
531 490
     mgfOutFileP->flush();
532 491
     mgfOutFileP->close();
533
-  } else if(format == "mzxml") {
492
+  } else if (format == "mzxml") {
534 493
     std::ofstream mzXMLOutFileP(file.c_str());
535 494
     Serializer_mzXML::Config config;
536 495
     config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib;
... ...
@@ -538,7 +497,7 @@ void RcppPwiz::writeSpectrumList(const string& file, const string& format,
538 497
     serializerMzXML.write(mzXMLOutFileP, newmsd);
539 498
     mzXMLOutFileP.flush();
540 499
     mzXMLOutFileP.close();
541
-  } else if(format == "mzml") {
500
+  } else if (format == "mzml") {
542 501
     std::ofstream mzXMLOutFileP(file.c_str());
543 502
     Serializer_mzML::Config config;
544 503
     config.binaryDataEncoderConfig.compression = BinaryDataEncoder::Compression_Zlib;