Browse code

add seqTools package

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@95415 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 13/10/2014 19:02:34
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,223 @@
1
+/*
2
+ * dna_astream.h
3
+ *
4
+ *  Created on: 07.10.2013
5
+ *      Author: wolfgang
6
+ */
7
+
8
+#ifndef DNA_ASTREAM_H_
9
+#define DNA_ASTREAM_H_
10
+
11
+#include "dna_fstream.h"
12
+
13
+///////////////////////////////////////////////////////////////////////////////////////////////////
14
+//
15
+// dna file stream
16
+//		Keeps two (equal sized) character buffers (raw and processed) and two iterators
17
+//
18
+//		Raw buffer can be (re-) filled from dna_fstream.
19
+//		Client struct can define functions for searching and coypying content into
20
+//		processed buffer.
21
+//
22
+///////////////////////////////////////////////////////////////////////////////////////////////////
23
+
24
+
25
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
26
+//
27
+// Define buffer size and delimiting characters
28
+//
29
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
30
+
31
+static const char			das_char_eos			='\0';
32
+
33
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
34
+//
35
+// Define status flags
36
+//
37
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
38
+
39
+static const int das_err		=-1;	// Unrecoverable error state
40
+static const int das_ok			= 0;	// Must be 0 !!!
41
+static const int das_empty		= 1;	// Future raw size =0
42
+
43
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
44
+
45
+
46
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
47
+//
48
+// 	Struct definition
49
+//
50
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
51
+
52
+typedef struct dna_astream
53
+{
54
+
55
+	dfStream *dnaf;
56
+
57
+	// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  //
58
+	// Character array declarations
59
+
60
+	unsigned nchar;		// Maximal number of characters in rfc and pos
61
+						// (array size=nchar+1)
62
+
63
+	// Pointers to character arrays (= begin pointers)
64
+	char * rfc;			// Raw file content				: rfc-array
65
+	char * pos;			// Processed output sequence	: pos-array
66
+
67
+	// Iterators which reside inside arrays
68
+	char * r_iter;		// iterator for rfc
69
+	char * p_iter;		// iterator for pos
70
+
71
+	//  Point to past-the-end character of arrays ('\0')
72
+	char * r_end;		// rfc array
73
+	char * p_end;		// behind last retrievable character
74
+
75
+	int npPos;		// Number of character values in pos array (=strlen)
76
+	//
77
+	// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  //
78
+
79
+	int state;	// Carries stream state flags
80
+
81
+	unsigned nFill;
82
+	unsigned nFillWhole;
83
+	unsigned nFillWholeIncomp;
84
+	unsigned nFillPart;
85
+	unsigned nFillPartIncomp;
86
+
87
+} daStream;
88
+
89
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
90
+//
91
+// Check routines
92
+//
93
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
94
+
95
+static inline int dasEmpty(daStream *das) 		{ return das->r_iter == das->r_end; }
96
+static inline int dasProcEmpty(daStream *das)	{ return das->p_iter == das->p_end; }
97
+static inline int dasIsError(daStream *das)		{ return das->state & das_err; }
98
+static inline int dasIsOpen(daStream *das)		{ return dfs_isOpen(das->dnaf); }
99
+static inline int dasIsEof(daStream *das)		{ return dfs_stream_eof(das->dnaf); }
100
+
101
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
102
+//
103
+// Constructing and and File operations
104
+//
105
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //
106
+
107
+void das_destroy(daStream *das)
108
+{
109
+	if(das)
110
+	{
111
+		dfs_destroy(das->dnaf);
112
+		das->dnaf=0;
113
+		free(das->rfc);
114
+		das->rfc=0;
115
+		free(das->pos);
116
+		das->pos=0;
117
+		free(das);
118
+	}
119
+}
120
+
121
+
122
+int das_fill(daStream *das)
123
+{
124
+	//printf("[das_fill] before: '%s'\n",das->rfc);
125
+	size_t count = 0;
126
+
127
+	int rhs= (int)(das->r_end  - das->r_iter);		// Number of unprocessed characters in array
128
+	int lhs= (int)(das->r_iter - das->rfc);			// Number of   processed characters in array
129
+
130
+	if(dfs_isOpen(das->dnaf))
131
+	{
132
+		if(rhs>0)
133
+		{
134
+			//printf("[das_fill] rhs= %u\n",rhs);
135
+			if(lhs<rhs)
136
+			{
137
+				// Enough space at begin of array?
138
+				//printf("[das_fill] Partial fill ERROR!\n");
139
+				das->state = das_err;
140
+				return das->state;
141
+			}
142
+			// Shift unused suffix to begin
143
+			memcpy(das->rfc, das->r_iter, (size_t) rhs);
144
+			count = dfs_read(das->dnaf, das->rfc + rhs, (unsigned) lhs);
145
+
146
+			/* lhs < 0 should not be possible */
147
+			if(count < ((size_t) lhs))
148
+				++das->nFillPartIncomp;
149
+			++das->nFillPart;
150
+		}
151
+		else
152
+		{	// Refill whole array
153
+			//printf("[das_fill] rhs== %u\n",rhs);
154
+			count = dfs_read(das->dnaf, das->rfc, das->nchar);
155
+			if(count < das->nchar)
156
+				++das->nFillWholeIncomp;
157
+			++das->nFillWhole;
158
+		}
159
+		das->r_end = das->rfc + count; // past-the-end
160
+		*das->r_end = das_char_eos;    // '\0'
161
+		das->r_iter = das->rfc;        // Re-init iter
162
+		++das->nFill;
163
+	}
164
+
165
+	if(count==0)
166
+	{
167
+		//printf("[das_fill] count==0.\n");
168
+		das->state |= das_empty;
169
+		return das->state;
170
+	}
171
+	// Return success
172
+	das->state &= (~das_empty);
173
+	return das_ok;
174
+}
175
+
176
+daStream * das_init(const char* filename, unsigned das_size)
177
+{
178
+	daStream *das = calloc(sizeof(daStream), 1);
179
+	if(!das)
180
+	{
181
+		//printf("[das_init] das calloc returned 0!\n");
182
+		return 0;
183
+	}
184
+
185
+	das->dnaf = dfs_stream_init(filename);
186
+
187
+	if(!das->dnaf)
188
+	{
189
+		//printf("[das_init] dfs_stream_init returned 0!\n");
190
+		das->state = das_err;
191
+		return das;
192
+	}
193
+
194
+	das->nchar = das_size;
195
+	das->rfc = calloc(das_size + 1, sizeof(char));
196
+	if(!das->rfc)
197
+	{
198
+		//printf("[das_init] rfc calloc returned 0!\n");
199
+		das->state = das_err;
200
+		return das;
201
+	}
202
+	das->pos = calloc(das_size + 1, sizeof(char));
203
+	if(!das->pos)
204
+	{
205
+		//printf("[das_init] pos calloc returned 0!\n");
206
+		das->state = das_err;
207
+		return das;
208
+	}
209
+
210
+	das->r_end = das->rfc + das_size;
211
+	// Indicates empty buffer
212
+	// -> first das_fill will
213
+	// read complete buffer
214
+	das->r_iter = das->r_end;
215
+
216
+	// Returns memory initialized structure
217
+	// but dfs file is possibly closed
218
+	// (e.g. file not found).
219
+	return das;
220
+}
221
+
222
+
223
+#endif /* DNA_ASTREAM_H_ */