git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@95415 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,223 @@ |
1 |
+/* |
|
2 |
+ * dna_astream.h |
|
3 |
+ * |
|
4 |
+ * Created on: 07.10.2013 |
|
5 |
+ * Author: wolfgang |
|
6 |
+ */ |
|
7 |
+ |
|
8 |
+#ifndef DNA_ASTREAM_H_ |
|
9 |
+#define DNA_ASTREAM_H_ |
|
10 |
+ |
|
11 |
+#include "dna_fstream.h" |
|
12 |
+ |
|
13 |
+/////////////////////////////////////////////////////////////////////////////////////////////////// |
|
14 |
+// |
|
15 |
+// dna file stream |
|
16 |
+// Keeps two (equal sized) character buffers (raw and processed) and two iterators |
|
17 |
+// |
|
18 |
+// Raw buffer can be (re-) filled from dna_fstream. |
|
19 |
+// Client struct can define functions for searching and coypying content into |
|
20 |
+// processed buffer. |
|
21 |
+// |
|
22 |
+/////////////////////////////////////////////////////////////////////////////////////////////////// |
|
23 |
+ |
|
24 |
+ |
|
25 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
26 |
+// |
|
27 |
+// Define buffer size and delimiting characters |
|
28 |
+// |
|
29 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
30 |
+ |
|
31 |
+static const char das_char_eos ='\0'; |
|
32 |
+ |
|
33 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
34 |
+// |
|
35 |
+// Define status flags |
|
36 |
+// |
|
37 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
38 |
+ |
|
39 |
+static const int das_err =-1; // Unrecoverable error state |
|
40 |
+static const int das_ok = 0; // Must be 0 !!! |
|
41 |
+static const int das_empty = 1; // Future raw size =0 |
|
42 |
+ |
|
43 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
44 |
+ |
|
45 |
+ |
|
46 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
47 |
+// |
|
48 |
+// Struct definition |
|
49 |
+// |
|
50 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
51 |
+ |
|
52 |
+typedef struct dna_astream |
|
53 |
+{ |
|
54 |
+ |
|
55 |
+ dfStream *dnaf; |
|
56 |
+ |
|
57 |
+ // + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
58 |
+ // Character array declarations |
|
59 |
+ |
|
60 |
+ unsigned nchar; // Maximal number of characters in rfc and pos |
|
61 |
+ // (array size=nchar+1) |
|
62 |
+ |
|
63 |
+ // Pointers to character arrays (= begin pointers) |
|
64 |
+ char * rfc; // Raw file content : rfc-array |
|
65 |
+ char * pos; // Processed output sequence : pos-array |
|
66 |
+ |
|
67 |
+ // Iterators which reside inside arrays |
|
68 |
+ char * r_iter; // iterator for rfc |
|
69 |
+ char * p_iter; // iterator for pos |
|
70 |
+ |
|
71 |
+ // Point to past-the-end character of arrays ('\0') |
|
72 |
+ char * r_end; // rfc array |
|
73 |
+ char * p_end; // behind last retrievable character |
|
74 |
+ |
|
75 |
+ int npPos; // Number of character values in pos array (=strlen) |
|
76 |
+ // |
|
77 |
+ // + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
78 |
+ |
|
79 |
+ int state; // Carries stream state flags |
|
80 |
+ |
|
81 |
+ unsigned nFill; |
|
82 |
+ unsigned nFillWhole; |
|
83 |
+ unsigned nFillWholeIncomp; |
|
84 |
+ unsigned nFillPart; |
|
85 |
+ unsigned nFillPartIncomp; |
|
86 |
+ |
|
87 |
+} daStream; |
|
88 |
+ |
|
89 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
90 |
+// |
|
91 |
+// Check routines |
|
92 |
+// |
|
93 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
94 |
+ |
|
95 |
+static inline int dasEmpty(daStream *das) { return das->r_iter == das->r_end; } |
|
96 |
+static inline int dasProcEmpty(daStream *das) { return das->p_iter == das->p_end; } |
|
97 |
+static inline int dasIsError(daStream *das) { return das->state & das_err; } |
|
98 |
+static inline int dasIsOpen(daStream *das) { return dfs_isOpen(das->dnaf); } |
|
99 |
+static inline int dasIsEof(daStream *das) { return dfs_stream_eof(das->dnaf); } |
|
100 |
+ |
|
101 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
102 |
+// |
|
103 |
+// Constructing and and File operations |
|
104 |
+// |
|
105 |
+// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // |
|
106 |
+ |
|
107 |
+void das_destroy(daStream *das) |
|
108 |
+{ |
|
109 |
+ if(das) |
|
110 |
+ { |
|
111 |
+ dfs_destroy(das->dnaf); |
|
112 |
+ das->dnaf=0; |
|
113 |
+ free(das->rfc); |
|
114 |
+ das->rfc=0; |
|
115 |
+ free(das->pos); |
|
116 |
+ das->pos=0; |
|
117 |
+ free(das); |
|
118 |
+ } |
|
119 |
+} |
|
120 |
+ |
|
121 |
+ |
|
122 |
+int das_fill(daStream *das) |
|
123 |
+{ |
|
124 |
+ //printf("[das_fill] before: '%s'\n",das->rfc); |
|
125 |
+ size_t count = 0; |
|
126 |
+ |
|
127 |
+ int rhs= (int)(das->r_end - das->r_iter); // Number of unprocessed characters in array |
|
128 |
+ int lhs= (int)(das->r_iter - das->rfc); // Number of processed characters in array |
|
129 |
+ |
|
130 |
+ if(dfs_isOpen(das->dnaf)) |
|
131 |
+ { |
|
132 |
+ if(rhs>0) |
|
133 |
+ { |
|
134 |
+ //printf("[das_fill] rhs= %u\n",rhs); |
|
135 |
+ if(lhs<rhs) |
|
136 |
+ { |
|
137 |
+ // Enough space at begin of array? |
|
138 |
+ //printf("[das_fill] Partial fill ERROR!\n"); |
|
139 |
+ das->state = das_err; |
|
140 |
+ return das->state; |
|
141 |
+ } |
|
142 |
+ // Shift unused suffix to begin |
|
143 |
+ memcpy(das->rfc, das->r_iter, (size_t) rhs); |
|
144 |
+ count = dfs_read(das->dnaf, das->rfc + rhs, (unsigned) lhs); |
|
145 |
+ |
|
146 |
+ /* lhs < 0 should not be possible */ |
|
147 |
+ if(count < ((size_t) lhs)) |
|
148 |
+ ++das->nFillPartIncomp; |
|
149 |
+ ++das->nFillPart; |
|
150 |
+ } |
|
151 |
+ else |
|
152 |
+ { // Refill whole array |
|
153 |
+ //printf("[das_fill] rhs== %u\n",rhs); |
|
154 |
+ count = dfs_read(das->dnaf, das->rfc, das->nchar); |
|
155 |
+ if(count < das->nchar) |
|
156 |
+ ++das->nFillWholeIncomp; |
|
157 |
+ ++das->nFillWhole; |
|
158 |
+ } |
|
159 |
+ das->r_end = das->rfc + count; // past-the-end |
|
160 |
+ *das->r_end = das_char_eos; // '\0' |
|
161 |
+ das->r_iter = das->rfc; // Re-init iter |
|
162 |
+ ++das->nFill; |
|
163 |
+ } |
|
164 |
+ |
|
165 |
+ if(count==0) |
|
166 |
+ { |
|
167 |
+ //printf("[das_fill] count==0.\n"); |
|
168 |
+ das->state |= das_empty; |
|
169 |
+ return das->state; |
|
170 |
+ } |
|
171 |
+ // Return success |
|
172 |
+ das->state &= (~das_empty); |
|
173 |
+ return das_ok; |
|
174 |
+} |
|
175 |
+ |
|
176 |
+daStream * das_init(const char* filename, unsigned das_size) |
|
177 |
+{ |
|
178 |
+ daStream *das = calloc(sizeof(daStream), 1); |
|
179 |
+ if(!das) |
|
180 |
+ { |
|
181 |
+ //printf("[das_init] das calloc returned 0!\n"); |
|
182 |
+ return 0; |
|
183 |
+ } |
|
184 |
+ |
|
185 |
+ das->dnaf = dfs_stream_init(filename); |
|
186 |
+ |
|
187 |
+ if(!das->dnaf) |
|
188 |
+ { |
|
189 |
+ //printf("[das_init] dfs_stream_init returned 0!\n"); |
|
190 |
+ das->state = das_err; |
|
191 |
+ return das; |
|
192 |
+ } |
|
193 |
+ |
|
194 |
+ das->nchar = das_size; |
|
195 |
+ das->rfc = calloc(das_size + 1, sizeof(char)); |
|
196 |
+ if(!das->rfc) |
|
197 |
+ { |
|
198 |
+ //printf("[das_init] rfc calloc returned 0!\n"); |
|
199 |
+ das->state = das_err; |
|
200 |
+ return das; |
|
201 |
+ } |
|
202 |
+ das->pos = calloc(das_size + 1, sizeof(char)); |
|
203 |
+ if(!das->pos) |
|
204 |
+ { |
|
205 |
+ //printf("[das_init] pos calloc returned 0!\n"); |
|
206 |
+ das->state = das_err; |
|
207 |
+ return das; |
|
208 |
+ } |
|
209 |
+ |
|
210 |
+ das->r_end = das->rfc + das_size; |
|
211 |
+ // Indicates empty buffer |
|
212 |
+ // -> first das_fill will |
|
213 |
+ // read complete buffer |
|
214 |
+ das->r_iter = das->r_end; |
|
215 |
+ |
|
216 |
+ // Returns memory initialized structure |
|
217 |
+ // but dfs file is possibly closed |
|
218 |
+ // (e.g. file not found). |
|
219 |
+ return das; |
|
220 |
+} |
|
221 |
+ |
|
222 |
+ |
|
223 |
+#endif /* DNA_ASTREAM_H_ */ |