Browse code

File extension and content dependent backend selection

- openMsFile selects the backend based on the file extension (and eventually
file content). This is the default behaviour if backend is not manually
specified.
- Add related unit tests and documentation.

jotsetung authored on 23/11/2017 13:19:46
Showing 4 changed files

... ...
@@ -2,7 +2,7 @@ Package: mzR
2 2
 Type: Package
3 3
 Title: parser for netCDF, mzXML, mzData and mzML and mzIdentML files
4 4
        (mass spectrometry data)
5
-Version: 2.13.0
5
+Version: 2.13.1
6 6
 Author: Bernd Fischer, Steffen Neumann, Laurent Gatto, Qiang Kou
7 7
 Maintainer: Bernd Fischer <b.fischer@dkfz.de>,
8 8
 	    Steffen Neumann <sneumann@ipb-halle.de>,
... ...
@@ -1,6 +1,7 @@
1
-CHANGES IN VERSION 2.11.12
1
+CHANGES IN VERSION 2.13.1
2 2
 --------------------------
3
- o Nothing yet
3
+ o openMsFile automatically determine the backend to use based on file extension
4
+   and content.
4 5
  
5 6
 CHANGES IN VERSION 2.11.11
6 7
 --------------------------
... ...
@@ -1,10 +1,12 @@
1 1
 openMSfile <- function(filename,
2
-                       backend=c("pwiz", "Ramp", "netCDF"),
2
+                       backend = NULL,
3 3
                        verbose = FALSE) {
4 4
     if (!file.exists(filename))
5 5
         stop("File ",filename," not found.\n")
6 6
     filename <- path.expand(filename)
7
-    backend <- match.arg(backend)
7
+    if (is.null(backend))
8
+        backend <- .mzRBackend(filename)
9
+    backend <- match.arg(backend, c("pwiz", "Ramp", "netCDF"))
8 10
     
9 11
     if (backend == "Ramp") {
10 12
         rampModule <- new( Ramp ) 
... ...
@@ -38,6 +40,56 @@ openMSfile <- function(filename,
38 40
     }  
39 41
 }
40 42
 
43
+#' @title Define the type of mzR backend to use based on the file name or
44
+#'     content
45
+#'
46
+#' @description Simple helper to define the mzR backend that should/can be used
47
+#'     to read the file.
48
+#'
49
+#' @param x \code{character(1)} representing the file name.
50
+#'
51
+#' @return A \code{character(1)} with the name of the backend (either
52
+#'     \code{"netCDF"}, \code{"Ramp"} or \code{"pwiz"}.
53
+#'
54
+#' @author Johannes Rainer, Sebastian Gibb
55
+#'
56
+#' @noRd
57
+.mzRBackend <- function(x = character()) {
58
+    if (length(x) != 1)
59
+        stop("parameter 'x' has to be of length 1")
60
+    ## Use if/else conditions based on a suggestion from sgibb to avoid loops.
61
+    if (grepl("\\.mzml($|\\.)|\\.mzxml($|\\.)", x, ignore.case = TRUE)) {
62
+        return("pwiz")
63
+    } else if (grepl("\\.mzdata($|\\.)", x, ignore.case = TRUE)) {
64
+        return("Ramp")
65
+    } else if (grepl("\\.cdf($|\\.)|\\.nc($|\\.)", x, ignore.case = TRUE)) {
66
+        return("netCDF")
67
+    } else {
68
+        return(.mzRBackendFromContent(x))
69
+    }
70
+}
71
+
72
+#' Determine the backend from the (first few lines of the) file content.
73
+#' 
74
+#' @author Johannes Rainer
75
+#'
76
+#' @noRd
77
+.mzRBackendFromContent <- function(x = character()) {
78
+    if (length(x) != 1)
79
+        stop("parameter 'x' has to be of length 1")
80
+    suppressWarnings(
81
+        first_lines <- readLines(x, n = 4)
82
+    )
83
+    if (any(grepl("<mz[X]?ML", first_lines))) {
84
+        return("pwiz")
85
+    } else if (any(grepl("<mzData", first_lines))) {
86
+        return("Ramp")
87
+    } else if (substr(readBin(x, character(), n = 1), 1, 3) == "CDF") {
88
+        return("netCDF")
89
+    } else
90
+        stop("Could not determine file type for ", x)        
91
+}
92
+
41 93
 openIDfile <- function(filename, verbose = FALSE) {
42 94
   if (!file.exists(filename))
43 95
     stop("File ",filename," not found.\n")
... ...
@@ -9,7 +9,7 @@
9 9
   Create and check mzR objects from netCDF, mzXML, mzData or mzML files.
10 10
 }
11 11
 \usage{
12
- openMSfile(filename, backend=c("pwiz", "Ramp", "netCDF"), verbose = FALSE)
12
+ openMSfile(filename, backend = NULL, verbose = FALSE)
13 13
 
14 14
  initializeRamp(object)
15 15
 
... ...
@@ -20,10 +20,17 @@
20 20
  openIDfile(filename, verbose = FALSE)
21 21
 }
22 22
 \arguments{
23
-  \item{filename}{ Path name of the netCDF, mzData, mzXML or mzML file to
24
-    read/write. }
25
-  \item{backend}{ A \code{character} specifiying with backend API to
26
-    use. Currently 'Ramp', 'netCDF'  and 'pwiz' (default) are available.}
23
+  \item{filename}{
24
+    Path name of the netCDF, mzData, mzXML or mzML file to
25
+    read/write.
26
+  }
27
+  \item{backend}{
28
+    A \code{character(1)} specifiying which backend API to
29
+    use. Currently 'Ramp', 'netCDF'  and 'pwiz' are supported. If
30
+    \code{backend = NULL} (the default), the function tries to determine
31
+    the backend to be used based on either the file extension of the
32
+    file content.
33
+  }
27 34
   \item{object}{ An instantiated mzR object. }
28 35
   \item{verbose}{ Enable verbose output. }
29 36
   \item{...}{ Additional arguments, currently ignored. }