Browse code

embed samtools sources directly, since Rsamtools has moved to htslib

Michael Lawrence authored on 11/02/2019 20:33:07
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,859 @@
1
+/*
2
+ * RAZF : Random Access compressed(Z) File
3
+ * Version: 1.0
4
+ * Release Date: 2008-10-27
5
+ *
6
+ * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>
7
+ *
8
+ * All rights reserved.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
+ * SUCH DAMAGE.
30
+ */
31
+
32
+#ifndef _NO_RAZF
33
+
34
+#include <fcntl.h>
35
+#include <stdio.h>
36
+#include <stdlib.h>
37
+#include <string.h>
38
+#include <unistd.h>
39
+#include "razf.h"
40
+
41
+
42
+#if ZLIB_VERNUM < 0x1221
43
+struct _gz_header_s {
44
+    int     text;
45
+    uLong   time;
46
+    int     xflags;
47
+    int     os;
48
+    Bytef   *extra;
49
+    uInt    extra_len;
50
+    uInt    extra_max;
51
+    Bytef   *name;
52
+    uInt    name_max;
53
+    Bytef   *comment;
54
+    uInt    comm_max;
55
+    int     hcrc;
56
+    int     done;
57
+};
58
+#warning "zlib < 1.2.2.1; RAZF writing is disabled."
59
+#endif
60
+
61
+#define DEF_MEM_LEVEL 8
62
+
63
+static inline uint32_t byte_swap_4(uint32_t v){
64
+	v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
65
+	return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
66
+}
67
+
68
+static inline uint64_t byte_swap_8(uint64_t v){
69
+	v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
70
+	v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
71
+	return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
72
+}
73
+
74
+static inline int is_big_endian(){
75
+	int x = 0x01;
76
+	char *c = (char*)&x;
77
+	return (c[0] != 0x01);
78
+}
79
+
80
+#ifndef _RZ_READONLY
81
+static void add_zindex(RAZF *rz, int64_t in, int64_t out){
82
+	if(rz->index->size == rz->index->cap){
83
+		rz->index->cap = rz->index->cap * 1.5 + 2;
84
+		rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
85
+		rz->index->bin_offsets  = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
86
+	}
87
+	if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
88
+	rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
89
+	rz->index->size ++;
90
+}
91
+
92
+static void save_zindex(RAZF *rz, int fd){
93
+	int32_t i, v32;
94
+	int is_be;
95
+	is_be = is_big_endian();
96
+	if(is_be) write(fd, &rz->index->size, sizeof(int));
97
+	else {
98
+		v32 = byte_swap_4((uint32_t)rz->index->size);
99
+		write(fd, &v32, sizeof(uint32_t));
100
+	}
101
+	v32 = rz->index->size / RZ_BIN_SIZE + 1;
102
+	if(!is_be){
103
+		for(i=0;i<v32;i++) rz->index->bin_offsets[i]  = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
104
+		for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
105
+	}
106
+	write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
107
+	write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size);
108
+}
109
+#endif
110
+
111
+#ifdef _USE_KNETFILE
112
+static void load_zindex(RAZF *rz, knetFile *fp){
113
+#else
114
+static void load_zindex(RAZF *rz, int fd){
115
+#endif
116
+	int32_t i, v32;
117
+	int is_be;
118
+	if(!rz->load_index) return;
119
+	if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex));
120
+	is_be = is_big_endian();
121
+#ifdef _USE_KNETFILE
122
+	knet_read(fp, &rz->index->size, sizeof(int));
123
+#else
124
+	read(fd, &rz->index->size, sizeof(int));
125
+#endif
126
+	if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
127
+	rz->index->cap = rz->index->size;
128
+	v32 = rz->index->size / RZ_BIN_SIZE + 1;
129
+	rz->index->bin_offsets  = malloc(sizeof(int64_t) * v32);
130
+#ifdef _USE_KNETFILE
131
+	knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32);
132
+#else
133
+	read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
134
+#endif
135
+	rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size);
136
+#ifdef _USE_KNETFILE
137
+	knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size);
138
+#else
139
+	read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size);
140
+#endif
141
+	if(!is_be){
142
+		for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
143
+		for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
144
+	}
145
+}
146
+
147
+#ifdef _RZ_READONLY
148
+static RAZF* razf_open_w(int fd)
149
+{
150
+	fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
151
+	return 0;
152
+}
153
+#else
154
+static RAZF* razf_open_w(int fd){
155
+	RAZF *rz;
156
+#ifdef _WIN32
157
+	setmode(fd, O_BINARY);
158
+#endif
159
+	rz = calloc(1, sizeof(RAZF));
160
+	rz->mode = 'w';
161
+#ifdef _USE_KNETFILE
162
+    rz->x.fpw = fd;
163
+#else
164
+	rz->filedes = fd;
165
+#endif
166
+	rz->stream = calloc(sizeof(z_stream), 1);
167
+	rz->inbuf  = malloc(RZ_BUFFER_SIZE);
168
+	rz->outbuf = malloc(RZ_BUFFER_SIZE);
169
+	rz->index = calloc(sizeof(ZBlockIndex), 1);
170
+	deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
171
+	rz->stream->avail_out = RZ_BUFFER_SIZE;
172
+	rz->stream->next_out  = rz->outbuf;
173
+	rz->header = calloc(sizeof(gz_header), 1);
174
+	rz->header->os    = 0x03; //Unix
175
+	rz->header->text  = 0;
176
+	rz->header->time  = 0;
177
+	rz->header->extra = malloc(7);
178
+	strncpy((char*)rz->header->extra, "RAZF", 4);
179
+	rz->header->extra[4] = 1; // obsolete field
180
+	// block size = RZ_BLOCK_SIZE, Big-Endian
181
+	rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
182
+	rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
183
+	rz->header->extra_len = 7;
184
+	rz->header->name = rz->header->comment  = 0;
185
+	rz->header->hcrc = 0;
186
+	deflateSetHeader(rz->stream, rz->header);
187
+	rz->block_pos = rz->block_off = 0;
188
+	return rz;
189
+}
190
+
191
+static void _razf_write(RAZF* rz, const void *data, int size){
192
+	int tout;
193
+	rz->stream->avail_in = size;
194
+	rz->stream->next_in  = (void*)data;
195
+	while(1){
196
+		tout = rz->stream->avail_out;
197
+		deflate(rz->stream, Z_NO_FLUSH);
198
+		rz->out += tout - rz->stream->avail_out;
199
+		if(rz->stream->avail_out) break;
200
+#ifdef _USE_KNETFILE
201
+		write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
202
+#else
203
+		write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
204
+#endif
205
+		rz->stream->avail_out = RZ_BUFFER_SIZE;
206
+		rz->stream->next_out  = rz->outbuf;
207
+		if(rz->stream->avail_in == 0) break;
208
+	};
209
+	rz->in += size - rz->stream->avail_in;
210
+	rz->block_off += size - rz->stream->avail_in;
211
+}
212
+
213
+static void razf_flush(RAZF *rz){
214
+	uint32_t tout;
215
+	if(rz->buf_len){
216
+		_razf_write(rz, rz->inbuf, rz->buf_len);
217
+		rz->buf_off = rz->buf_len = 0;
218
+	}
219
+	if(rz->stream->avail_out){
220
+#ifdef _USE_KNETFILE    
221
+		write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
222
+#else        
223
+		write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
224
+#endif
225
+		rz->stream->avail_out = RZ_BUFFER_SIZE;
226
+		rz->stream->next_out  = rz->outbuf;
227
+	}
228
+	while(1){
229
+		tout = rz->stream->avail_out;
230
+		deflate(rz->stream, Z_FULL_FLUSH);
231
+		rz->out += tout - rz->stream->avail_out;
232
+		if(rz->stream->avail_out == 0){
233
+#ifdef _USE_KNETFILE    
234
+			write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
235
+#else            
236
+			write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
237
+#endif
238
+			rz->stream->avail_out = RZ_BUFFER_SIZE;
239
+			rz->stream->next_out  = rz->outbuf;
240
+		} else break;
241
+	}
242
+	rz->block_pos = rz->out;
243
+	rz->block_off = 0;
244
+}
245
+
246
+static void razf_end_flush(RAZF *rz){
247
+	uint32_t tout;
248
+	if(rz->buf_len){
249
+		_razf_write(rz, rz->inbuf, rz->buf_len);
250
+		rz->buf_off = rz->buf_len = 0;
251
+	}
252
+	while(1){
253
+		tout = rz->stream->avail_out;
254
+		deflate(rz->stream, Z_FINISH);
255
+		rz->out += tout - rz->stream->avail_out;
256
+		if(rz->stream->avail_out < RZ_BUFFER_SIZE){
257
+#ifdef _USE_KNETFILE        
258
+			write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
259
+#else            
260
+			write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
261
+#endif
262
+			rz->stream->avail_out = RZ_BUFFER_SIZE;
263
+			rz->stream->next_out  = rz->outbuf;
264
+		} else break;
265
+	}
266
+}
267
+
268
+static void _razf_buffered_write(RAZF *rz, const void *data, int size){
269
+	int i, n;
270
+	while(1){
271
+		if(rz->buf_len == RZ_BUFFER_SIZE){
272
+			_razf_write(rz, rz->inbuf, rz->buf_len);
273
+			rz->buf_len = 0;
274
+		}
275
+		if(size + rz->buf_len < RZ_BUFFER_SIZE){
276
+			for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
277
+			rz->buf_len += size;
278
+			return;
279
+		} else {
280
+			n = RZ_BUFFER_SIZE - rz->buf_len;
281
+			for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
282
+			size -= n;
283
+			data += n;
284
+			rz->buf_len += n;
285
+		}
286
+	}
287
+}
288
+
289
+int razf_write(RAZF* rz, const void *data, int size){
290
+	int ori_size, n;
291
+	int64_t next_block;
292
+	ori_size = size;
293
+	next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
294
+	while(rz->in + rz->buf_len + size >= next_block){
295
+		n = next_block - rz->in - rz->buf_len;
296
+		_razf_buffered_write(rz, data, n);
297
+		data += n;
298
+		size -= n;
299
+		razf_flush(rz);
300
+		add_zindex(rz, rz->in, rz->out);
301
+		next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
302
+	}
303
+	_razf_buffered_write(rz, data, size);
304
+	return ori_size;
305
+}
306
+#endif
307
+
308
+/* gzip flag byte */
309
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
310
+#define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
311
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
312
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
313
+#define COMMENT      0x10 /* bit 4 set: file comment present */
314
+#define RESERVED     0xE0 /* bits 5..7: reserved */
315
+
316
+static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
317
+	int method, flags, n, len;
318
+	if(size < 2) return 0;
319
+	if(data[0] != 0x1f || data[1] != 0x8b) return 0;
320
+	if(size < 4) return 0;
321
+	method = data[2];
322
+	flags  = data[3];
323
+	if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
324
+	n = 4 + 6; // Skip 6 bytes
325
+	*extra_off = n + 2;
326
+	*extra_len = 0;
327
+	if(flags & EXTRA_FIELD){
328
+		if(size < n + 2) return 0;
329
+		len = ((int)data[n + 1] << 8) | data[n];
330
+		n += 2;
331
+		*extra_off = n;
332
+		while(len){
333
+			if(n >= size) return 0;
334
+			n ++;
335
+			len --;
336
+		}
337
+		*extra_len = n - (*extra_off);
338
+	}
339
+	if(flags & ORIG_NAME) while(n < size && data[n++]);
340
+	if(flags & COMMENT) while(n < size && data[n++]);
341
+	if(flags & HEAD_CRC){
342
+		if(n + 2 > size) return 0;
343
+		n += 2;
344
+	}
345
+	return n;
346
+}
347
+
348
+#ifdef _USE_KNETFILE
349
+static RAZF* razf_open_r(knetFile *fp, int _load_index){
350
+#else
351
+static RAZF* razf_open_r(int fd, int _load_index){
352
+#endif
353
+	RAZF *rz;
354
+	int ext_off, ext_len;
355
+	int n, is_be, ret;
356
+	int64_t end;
357
+	unsigned char c[] = "RAZF";
358
+	rz = calloc(1, sizeof(RAZF));
359
+	rz->mode = 'r';
360
+#ifdef _USE_KNETFILE
361
+    rz->x.fpr = fp;
362
+#else
363
+#ifdef _WIN32
364
+	setmode(fd, O_BINARY);
365
+#endif
366
+	rz->filedes = fd;
367
+#endif
368
+	rz->stream = calloc(sizeof(z_stream), 1);
369
+	rz->inbuf  = malloc(RZ_BUFFER_SIZE);
370
+	rz->outbuf = malloc(RZ_BUFFER_SIZE);
371
+	rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
372
+#ifdef _USE_KNETFILE
373
+    n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
374
+#else
375
+	n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
376
+#endif
377
+	ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
378
+	if(ret == 0){
379
+		PLAIN_FILE:
380
+		rz->in = n;
381
+		rz->file_type = FILE_TYPE_PLAIN;
382
+		memcpy(rz->outbuf, rz->inbuf, n);
383
+		rz->buf_len = n;
384
+		free(rz->stream);
385
+		rz->stream = NULL;
386
+		return rz;
387
+	}
388
+	rz->header_size = ret;
389
+	ret = inflateInit2(rz->stream, -WINDOW_BITS);
390
+	if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
391
+	rz->stream->avail_in = n - rz->header_size;
392
+	rz->stream->next_in  = rz->inbuf + rz->header_size;
393
+	rz->stream->avail_out = RZ_BUFFER_SIZE;
394
+	rz->stream->next_out  = rz->outbuf;
395
+	rz->file_type = FILE_TYPE_GZ;
396
+	rz->in = rz->header_size;
397
+	rz->block_pos = rz->header_size;
398
+	rz->next_block_pos = rz->header_size;
399
+	rz->block_off = 0;
400
+	if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
401
+	if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
402
+		fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file.  in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
403
+		return rz;
404
+	}
405
+	rz->load_index = _load_index;
406
+	rz->file_type = FILE_TYPE_RZ;
407
+#ifdef _USE_KNETFILE
408
+	if(knet_seek(fp, -16, SEEK_END) == -1){
409
+#else
410
+	if(lseek(fd, -16, SEEK_END) == -1){
411
+#endif
412
+		UNSEEKABLE:
413
+		rz->seekable = 0;
414
+		rz->index = NULL;
415
+		rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
416
+	} else {
417
+		is_be = is_big_endian();
418
+		rz->seekable = 1;
419
+#ifdef _USE_KNETFILE
420
+        knet_read(fp, &end, sizeof(int64_t));
421
+#else
422
+		read(fd, &end, sizeof(int64_t));
423
+#endif        
424
+		if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
425
+		else rz->src_end = end;
426
+
427
+#ifdef _USE_KNETFILE
428
+		knet_read(fp, &end, sizeof(int64_t));
429
+#else
430
+		read(fd, &end, sizeof(int64_t));
431
+#endif        
432
+		if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
433
+		else rz->end = end;
434
+		if(n > rz->end){
435
+			rz->stream->avail_in -= n - rz->end;
436
+			n = rz->end;
437
+		}
438
+		if(rz->end > rz->src_end){
439
+#ifdef _USE_KNETFILE
440
+            knet_seek(fp, rz->in, SEEK_SET);
441
+#else
442
+			lseek(fd, rz->in, SEEK_SET);
443
+#endif
444
+			goto UNSEEKABLE;
445
+		}
446
+#ifdef _USE_KNETFILE
447
+        knet_seek(fp, rz->end, SEEK_SET);
448
+		if(knet_tell(fp) != rz->end){
449
+			knet_seek(fp, rz->in, SEEK_SET);
450
+#else
451
+		if(lseek(fd, rz->end, SEEK_SET) != rz->end){
452
+			lseek(fd, rz->in, SEEK_SET);
453
+#endif
454
+			goto UNSEEKABLE;
455
+		}
456
+#ifdef _USE_KNETFILE
457
+		load_zindex(rz, fp);
458
+		knet_seek(fp, n, SEEK_SET);
459
+#else
460
+		load_zindex(rz, fd);
461
+		lseek(fd, n, SEEK_SET);
462
+#endif
463
+	}
464
+	return rz;
465
+}
466
+
467
+#ifdef _USE_KNETFILE
468
+RAZF* razf_dopen(int fd, const char *mode){
469
+    if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
470
+    else if(strstr(mode, "w")) return razf_open_w(fd);
471
+	return NULL;
472
+}
473
+
474
+RAZF* razf_dopen2(int fd, const char *mode)
475
+{
476
+    fprintf(stderr,"[razf_dopen2] implement me\n");
477
+    return NULL;
478
+}
479
+#else
480
+RAZF* razf_dopen(int fd, const char *mode){
481
+	if(strstr(mode, "r")) return razf_open_r(fd, 1);
482
+	else if(strstr(mode, "w")) return razf_open_w(fd);
483
+	else return NULL;
484
+}
485
+
486
+RAZF* razf_dopen2(int fd, const char *mode)
487
+{
488
+	if(strstr(mode, "r")) return razf_open_r(fd, 0);
489
+	else if(strstr(mode, "w")) return razf_open_w(fd);
490
+	else return NULL;
491
+}
492
+#endif
493
+
494
+static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
495
+	int fd;
496
+	RAZF *rz;
497
+	if(strstr(mode, "r")){
498
+#ifdef _USE_KNETFILE
499
+        knetFile *fd = knet_open(filename, "r");
500
+        if (fd == 0) {
501
+            fprintf(stderr, "[_razf_open] fail to open %s\n", filename);
502
+            return NULL;
503
+        }
504
+#else
505
+#ifdef _WIN32
506
+		fd = open(filename, O_RDONLY | O_BINARY);
507
+#else
508
+		fd = open(filename, O_RDONLY);
509
+#endif
510
+#endif
511
+		if(fd < 0) return NULL;
512
+		rz = razf_open_r(fd, _load_index);
513
+	} else if(strstr(mode, "w")){
514
+#ifdef _WIN32
515
+		fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
516
+#else
517
+		fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
518
+#endif
519
+		if(fd < 0) return NULL;
520
+		rz = razf_open_w(fd);
521
+	} else return NULL;
522
+	return rz;
523
+}
524
+
525
+RAZF* razf_open(const char *filename, const char *mode){
526
+	return _razf_open(filename, mode, 1);
527
+}
528
+
529
+RAZF* razf_open2(const char *filename, const char *mode){
530
+	return _razf_open(filename, mode, 0);
531
+}
532
+
533
+int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
534
+	int64_t n;
535
+	if(rz->mode != 'r' && rz->mode != 'R') return 0;
536
+	switch(rz->file_type){
537
+		case FILE_TYPE_PLAIN:
538
+			if(rz->end == 0x7fffffffffffffffLL){
539
+#ifdef _USE_KNETFILE
540
+				if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
541
+                n = knet_tell(rz->x.fpr);
542
+				knet_seek(rz->x.fpr, 0, SEEK_END);
543
+                rz->end = knet_tell(rz->x.fpr);
544
+				knet_seek(rz->x.fpr, n, SEEK_SET);
545
+#else
546
+				if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
547
+				rz->end = lseek(rz->filedes, 0, SEEK_END);
548
+				lseek(rz->filedes, n, SEEK_SET);
549
+#endif                
550
+			}
551
+			*u_size = *c_size = rz->end;
552
+			return 1;
553
+		case FILE_TYPE_GZ:
554
+			return 0;
555
+		case FILE_TYPE_RZ:
556
+			if(rz->src_end == rz->end) return 0;
557
+			*u_size = rz->src_end;
558
+			*c_size = rz->end;
559
+			return 1;
560
+		default:
561
+			return 0;
562
+	}
563
+}
564
+
565
+static int _razf_read(RAZF* rz, void *data, int size){
566
+	int ret, tin;
567
+	if(rz->z_eof || rz->z_err) return 0;
568
+	if (rz->file_type == FILE_TYPE_PLAIN) {
569
+#ifdef _USE_KNETFILE
570
+		ret = knet_read(rz->x.fpr, data, size);
571
+#else
572
+		ret = read(rz->filedes, data, size);
573
+#endif        
574
+		if (ret == 0) rz->z_eof = 1;
575
+		return ret;
576
+	}
577
+	rz->stream->avail_out = size;
578
+	rz->stream->next_out  = data;
579
+	while(rz->stream->avail_out){
580
+		if(rz->stream->avail_in == 0){
581
+			if(rz->in >= rz->end){ rz->z_eof = 1; break; }
582
+			if(rz->end - rz->in < RZ_BUFFER_SIZE){
583
+#ifdef _USE_KNETFILE
584
+				rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
585
+#else
586
+				rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
587
+#endif        
588
+			} else {
589
+#ifdef _USE_KNETFILE
590
+				rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
591
+#else
592
+				rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
593
+#endif        
594
+			}
595
+			if(rz->stream->avail_in == 0){
596
+				rz->z_eof = 1;
597
+				break;
598
+			}
599
+			rz->stream->next_in = rz->inbuf;
600
+		}
601
+		tin = rz->stream->avail_in;
602
+		ret = inflate(rz->stream, Z_BLOCK);
603
+		rz->in += tin - rz->stream->avail_in;
604
+		if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
605
+			fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
606
+			rz->z_err = 1;
607
+			break;
608
+		}
609
+		if(ret == Z_STREAM_END){
610
+			rz->z_eof = 1;
611
+			break;
612
+		}
613
+		if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
614
+			rz->buf_flush = 1;
615
+			rz->next_block_pos = rz->in;
616
+			break;
617
+		}
618
+	}
619
+	return size - rz->stream->avail_out;
620
+}
621
+
622
+int razf_read(RAZF *rz, void *data, int size){
623
+	int ori_size, i;
624
+	ori_size = size;
625
+	while(size > 0){
626
+		if(rz->buf_len){
627
+			if(size < rz->buf_len){
628
+				for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
629
+				rz->buf_off += size;
630
+				rz->buf_len -= size;
631
+				data += size;
632
+				rz->block_off += size;
633
+				size = 0;
634
+				break;
635
+			} else {
636
+				for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
637
+				data += rz->buf_len;
638
+				size -= rz->buf_len;
639
+				rz->block_off += rz->buf_len;
640
+				rz->buf_off = 0;
641
+				rz->buf_len = 0;
642
+				if(rz->buf_flush){
643
+					rz->block_pos = rz->next_block_pos;
644
+					rz->block_off = 0;
645
+					rz->buf_flush = 0;
646
+				}
647
+			}
648
+		} else if(rz->buf_flush){
649
+			rz->block_pos = rz->next_block_pos;
650
+			rz->block_off = 0;
651
+			rz->buf_flush = 0;
652
+		}
653
+		if(rz->buf_flush) continue;
654
+		rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
655
+		if((rz->z_eof && rz->buf_len == 0) || rz->z_err) break; /* MTM */
656
+	}
657
+	rz->out += ori_size - size;
658
+	return ori_size - size;
659
+}
660
+
661
+int razf_skip(RAZF* rz, int size){
662
+	int ori_size;
663
+	ori_size = size;
664
+	while(size > 0){
665
+		if(rz->buf_len){
666
+			if(size < rz->buf_len){
667
+				rz->buf_off += size;
668
+				rz->buf_len -= size;
669
+				rz->block_off += size;
670
+				size = 0;
671
+				break;
672
+			} else {
673
+				size -= rz->buf_len;
674
+				rz->buf_off = 0;
675
+				rz->buf_len = 0;
676
+				rz->block_off += rz->buf_len;
677
+				if(rz->buf_flush){
678
+					rz->block_pos = rz->next_block_pos;
679
+					rz->block_off = 0;
680
+					rz->buf_flush = 0;
681
+				}
682
+			}
683
+		} else if(rz->buf_flush){
684
+			rz->block_pos = rz->next_block_pos;
685
+			rz->block_off = 0;
686
+			rz->buf_flush = 0;
687
+		}
688
+		if(rz->buf_flush) continue;
689
+		rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
690
+		if((rz->z_eof && rz->buf_len == 0) || rz->z_err) break; /* MTM */
691
+	}
692
+	rz->out += ori_size - size;
693
+	return ori_size - size;
694
+}
695
+
696
+static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
697
+#ifdef _USE_KNETFILE
698
+	knet_seek(rz->x.fpr, in, SEEK_SET);
699
+#else
700
+	lseek(rz->filedes, in, SEEK_SET);
701
+#endif
702
+	rz->in  = in;
703
+	rz->out = out;
704
+	rz->block_pos = in;
705
+	rz->next_block_pos = in;
706
+	rz->block_off = 0;
707
+	rz->buf_flush = 0;
708
+	rz->z_eof = rz->z_err = 0;
709
+	inflateReset(rz->stream);
710
+	rz->stream->avail_in = 0;
711
+	rz->buf_off = rz->buf_len = 0;
712
+}
713
+
714
+int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
715
+	int64_t pos;
716
+	rz->z_eof = 0;
717
+	if(rz->file_type == FILE_TYPE_PLAIN){
718
+		rz->buf_off = rz->buf_len = 0;
719
+		pos = block_start + block_offset;
720
+#ifdef _USE_KNETFILE
721
+		knet_seek(rz->x.fpr, pos, SEEK_SET);
722
+        pos = knet_tell(rz->x.fpr);
723
+#else
724
+		pos = lseek(rz->filedes, pos, SEEK_SET);
725
+#endif
726
+		rz->out = rz->in = pos;
727
+		return pos;
728
+	}
729
+	if(block_start == rz->block_pos && block_offset >= rz->block_off) {
730
+		block_offset -= rz->block_off;
731
+		goto SKIP; // Needn't reset inflate
732
+	}
733
+	if(block_start  == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
734
+	_razf_reset_read(rz, block_start, 0);
735
+	SKIP:
736
+	if(block_offset) razf_skip(rz, block_offset);
737
+	return rz->block_off;
738
+}
739
+
740
+int64_t razf_seek(RAZF* rz, int64_t pos, int where){
741
+	int64_t idx;
742
+	int64_t seek_pos, new_out;
743
+	rz->z_eof = 0;
744
+	if (where == SEEK_CUR) pos += rz->out;
745
+	else if (where == SEEK_END) pos += rz->src_end;
746
+	if(rz->file_type == FILE_TYPE_PLAIN){
747
+#ifdef _USE_KNETFILE
748
+		knet_seek(rz->x.fpr, pos, SEEK_SET);
749
+        seek_pos = knet_tell(rz->x.fpr);
750
+#else
751
+		seek_pos = lseek(rz->filedes, pos, SEEK_SET);
752
+#endif
753
+		rz->buf_off = rz->buf_len = 0;
754
+		rz->out = rz->in = seek_pos;
755
+		return seek_pos;
756
+	} else if(rz->file_type == FILE_TYPE_GZ){
757
+		if(pos >= rz->out) goto SKIP;
758
+                /* MTM */
759
+                fprintf(stderr,
760
+                        ".gz files support sequential access only; uncompress");
761
+                abort();
762
+		return rz->out;
763
+	}
764
+	if(pos == rz->out) return pos;
765
+	if(pos > rz->src_end) return rz->out;
766
+	if(!rz->seekable || !rz->load_index){
767
+		if(pos >= rz->out) goto SKIP;
768
+	}
769
+	idx = pos / RZ_BLOCK_SIZE - 1;
770
+	seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
771
+	new_out  = (idx + 1) * RZ_BLOCK_SIZE;
772
+	if(pos > rz->out && new_out <= rz->out) goto SKIP;
773
+	_razf_reset_read(rz, seek_pos, new_out);
774
+	SKIP:
775
+	razf_skip(rz, (int)(pos - rz->out));
776
+	return rz->out;
777
+}
778
+
779
+uint64_t razf_tell2(RAZF *rz)
780
+{
781
+	/*
782
+	if (rz->load_index) {
783
+		int64_t idx, seek_pos;
784
+		idx = rz->out / RZ_BLOCK_SIZE - 1;
785
+		seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
786
+		if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
787
+			fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
788
+					(long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
789
+	}
790
+	*/
791
+	return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
792
+}
793
+
794
+int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
795
+{
796
+	if (where != SEEK_SET) return -1;
797
+	return razf_jump(rz, voffset>>16, voffset&0xffff);
798
+}
799
+
800
+void razf_close(RAZF *rz){
801
+	if(rz->mode == 'w'){
802
+#ifndef _RZ_READONLY
803
+		razf_end_flush(rz);
804
+		deflateEnd(rz->stream);
805
+		razf_flush(rz);                  /* MTM */
806
+		add_zindex(rz, rz->in, rz->out); /* MTM */
807
+#ifdef _USE_KNETFILE
808
+		save_zindex(rz, rz->x.fpw);
809
+		if(is_big_endian()){
810
+			write(rz->x.fpw, &rz->in, sizeof(int64_t));
811
+			write(rz->x.fpw, &rz->out, sizeof(int64_t));
812
+		} else {
813
+			uint64_t v64 = byte_swap_8((uint64_t)rz->in);
814
+			write(rz->x.fpw, &v64, sizeof(int64_t));
815
+			v64 = byte_swap_8((uint64_t)rz->out);
816
+			write(rz->x.fpw, &v64, sizeof(int64_t));
817
+		}
818
+#else
819
+		save_zindex(rz, rz->filedes);
820
+		if(is_big_endian()){
821
+			write(rz->filedes, &rz->in, sizeof(int64_t));
822
+			write(rz->filedes, &rz->out, sizeof(int64_t));
823
+		} else {
824
+			uint64_t v64 = byte_swap_8((uint64_t)rz->in);
825
+			write(rz->filedes, &v64, sizeof(int64_t));
826
+			v64 = byte_swap_8((uint64_t)rz->out);
827
+			write(rz->filedes, &v64, sizeof(int64_t));
828
+		}
829
+#endif
830
+#endif
831
+	} else if(rz->mode == 'r'){
832
+		if(rz->stream) inflateEnd(rz->stream);
833
+	}
834
+	if(rz->inbuf) free(rz->inbuf);
835
+	if(rz->outbuf) free(rz->outbuf);
836
+	if(rz->header){
837
+		free(rz->header->extra);
838
+		free(rz->header->name);
839
+		free(rz->header->comment);
840
+		free(rz->header);
841
+	}
842
+	if(rz->index){
843
+		free(rz->index->bin_offsets);
844
+		free(rz->index->cell_offsets);
845
+		free(rz->index);
846
+	}
847
+	free(rz->stream);
848
+#ifdef _USE_KNETFILE
849
+    if (rz->mode == 'r')
850
+        knet_close(rz->x.fpr);
851
+    if (rz->mode == 'w')
852
+        close(rz->x.fpw);
853
+#else
854
+	close(rz->filedes);
855
+#endif
856
+	free(rz);
857
+}
858
+
859
+#endif