Browse code

Explicitly gunzip to tempfile()

Avoids error 'gzip: stdout: No space left on device' (https://github.com/Rdatatable/data.table/issues/717#issuecomment-226038307)

Peter Hickey authored on 04/07/2018 13:24:07
Showing 2 changed files

... ...
@@ -46,7 +46,7 @@ importFrom(Biostrings, "DNAString", "vmatchPattern", "reverseComplement")
46 46
 importFrom(utils, "read.delim")
47 47
 importFrom(BSgenome, "vmatchPattern")
48 48
 importFrom(tools, "file_path_as_absolute")
49
-importFrom(R.utils, "isGzipped")
49
+importFrom(R.utils, "isGzipped", "gunzip")
50 50
 
51 51
 ##
52 52
 ## Exporting
... ...
@@ -94,65 +94,72 @@
94 94
     }
95 95
     ptime1 <- proc.time()
96 96
     # TODO: Make copy if isGzipped() to avoid dependency on R.utils?
97
+    # TODO: Use isCompressedFile() and decompressFile().
97 98
     if (isGzipped(file)) {
98
-        sysname <- Sys.info()[["sysname"]]
99
-        if (sysname == "Linux") {
100
-            input <- sprintf("zcat %s", shQuote(file))
101
-        } else if (sysname == "Darwin") {
102
-            # macOS/OS X needs a different command; see
103
-            # https://github.com/Rdatatable/data.table/issues/717#issuecomment-140028670
104
-            input <- sprintf("zcat < %s", shQuote(file))
105
-        } else {
106
-            # TODO: Support other OSs (e.g, Windows, sunOS)
107
-            warning(
108
-                "Unable to find 'zcat' for use with 'data.table::fread()'.\n",
109
-                "Falling back to 'utils::read.table()'.")
110
-            is_zcat_available <- FALSE
111
-        }
112
-        # TODO: Gracefully handle situation where user incorrectly sets
113
-        #       `is_zcat_available = TRUE` and it consequently fails.
114
-        if (is_zcat_available) {
115
-            x <- fread(
116
-                input = input,
117
-                sep = "\t",
118
-                header = FALSE,
119
-                verbose = subverbose,
120
-                drop = drop,
121
-                colClasses = colClasses,
122
-                col.names = col.names,
123
-                # TODO: Check remainder of these arguments are optimal.
124
-                # TODO: Add `key` argument? Check other arguments available in
125
-                #       fread().
126
-                quote = "",
127
-                strip.white = FALSE,
128
-                showProgress = as.logical(verbose),
129
-                nThread = nThread)
130
-        } else {
131
-            x <- read.table(
132
-                file = file,
133
-                header = FALSE,
134
-                sep = "\t",
135
-                quote = "",
136
-                col.names = col.names,
137
-                colClasses = colClasses,
138
-                strip.white = FALSE)
139
-            setDT(x)
140
-        }
141
-    } else {
142
-        x <- fread(
143
-            file = file,
144
-            sep = "\t",
145
-            header = FALSE,
146
-            verbose = subverbose,
147
-            drop = drop,
148
-            colClasses = colClasses,
149
-            col.names = col.names,
150
-            # TODO: Check remainder of these arguments are optimal.
151
-            quote = "",
152
-            strip.white = FALSE,
153
-            showProgress = as.logical(verbose),
154
-            nThread = nThread)
99
+        message(
100
+            "[.readBismarkAsDT] Gunzipping file '", file, "' to tempfile().")
101
+
102
+        file <- gunzip(
103
+            filename = file,
104
+            destname = tempfile(),
105
+            remove = FALSE)
155 106
     }
107
+    # sysname <- Sys.info()[["sysname"]]
108
+    # if (sysname == "Linux") {
109
+    #     input <- sprintf("zcat %s", shQuote(file))
110
+    # } else if (sysname == "Darwin") {
111
+    #     # macOS/OS X needs a different command; see
112
+    #     # https://github.com/Rdatatable/data.table/issues/717#issuecomment-140028670
113
+    #     input <- sprintf("zcat < %s", shQuote(file))
114
+    # } else {
115
+    #     # TODO: Support other OSs (e.g, Windows, sunOS)
116
+    #     warning(
117
+    #         "Unable to find 'zcat' for use with 'data.table::fread()'.\n",
118
+    #         "Falling back to 'utils::read.table()'.")
119
+    #     is_zcat_available <- FALSE
120
+    # }
121
+    # TODO: Gracefully handle situation where user incorrectly sets
122
+    #       `is_zcat_available = TRUE` and it consequently fails.
123
+    # if (is_zcat_available) {
124
+    #     x <- fread(
125
+    #         input = input,
126
+    #         sep = "\t",
127
+    #         header = FALSE,
128
+    #         verbose = subverbose,
129
+    #         drop = drop,
130
+    #         colClasses = colClasses,
131
+    #         col.names = col.names,
132
+    #         # TODO: Check remainder of these arguments are optimal.
133
+    #         # TODO: Add `key` argument? Check other arguments available in
134
+    #         #       fread().
135
+    #         quote = "",
136
+    #         strip.white = FALSE,
137
+    #         showProgress = as.logical(verbose),
138
+    #         nThread = nThread)
139
+    # } else {
140
+    #     x <- read.table(
141
+    #         file = file,
142
+    #         header = FALSE,
143
+    #         sep = "\t",
144
+    #         quote = "",
145
+    #         col.names = col.names,
146
+    #         colClasses = colClasses,
147
+    #         strip.white = FALSE)
148
+    #     setDT(x)
149
+    # }
150
+    x <- fread(
151
+        file = file,
152
+        sep = "\t",
153
+        header = FALSE,
154
+        verbose = subverbose,
155
+        drop = drop,
156
+        colClasses = colClasses,
157
+        col.names = col.names,
158
+        # TODO: Check remainder of these arguments are optimal.
159
+        quote = "",
160
+        strip.white = FALSE,
161
+        showProgress = as.logical(verbose),
162
+        nThread = nThread)
156 163
 
157 164
     # Construct the result -----------------------------------------------------
158 165