1 | 1 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,653 @@ |
1 |
+// Boost token_functions.hpp ------------------------------------------------// |
|
2 |
+ |
|
3 |
+// Copyright John R. Bandela 2001. |
|
4 |
+ |
|
5 |
+// Distributed under the Boost Software License, Version 1.0. (See |
|
6 |
+// accompanying file LICENSE_1_0.txt or copy at |
|
7 |
+// http://www.boost.org/LICENSE_1_0.txt) |
|
8 |
+ |
|
9 |
+// See http://www.boost.org/libs/tokenizer/ for documentation. |
|
10 |
+ |
|
11 |
+// Revision History: |
|
12 |
+// 01 Oct 2004 Joaquin M Lopez Munoz |
|
13 |
+// Workaround for a problem with string::assign in msvc-stlport |
|
14 |
+// 06 Apr 2004 John Bandela |
|
15 |
+// Fixed a bug involving using char_delimiter with a true input iterator |
|
16 |
+// 28 Nov 2003 Robert Zeh and John Bandela |
|
17 |
+// Converted into "fast" functions that avoid using += when |
|
18 |
+// the supplied iterator isn't an input_iterator; based on |
|
19 |
+// some work done at Archelon and a version that was checked into |
|
20 |
+// the boost CVS for a short period of time. |
|
21 |
+// 20 Feb 2002 John Maddock |
|
22 |
+// Removed using namespace std declarations and added |
|
23 |
+// workaround for BOOST_NO_STDC_NAMESPACE (the library |
|
24 |
+// can be safely mixed with regex). |
|
25 |
+// 06 Feb 2002 Jeremy Siek |
|
26 |
+// Added char_separator. |
|
27 |
+// 02 Feb 2002 Jeremy Siek |
|
28 |
+// Removed tabs and a little cleanup. |
|
29 |
+ |
|
30 |
+ |
|
31 |
+#ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_ |
|
32 |
+#define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_ |
|
33 |
+ |
|
34 |
+#include <vector> |
|
35 |
+#include <stdexcept> |
|
36 |
+#include <string> |
|
37 |
+#include <cctype> |
|
38 |
+#include <algorithm> // for find_if |
|
39 |
+#include <boost/config.hpp> |
|
40 |
+#include <boost/assert.hpp> |
|
41 |
+#include <boost/type_traits/is_pointer.hpp> |
|
42 |
+#include <boost/detail/workaround.hpp> |
|
43 |
+#include <boost/mpl/if.hpp> |
|
44 |
+#include <boost/throw_exception.hpp> |
|
45 |
+#if !defined(BOOST_NO_CWCTYPE) |
|
46 |
+#include <cwctype> |
|
47 |
+#endif |
|
48 |
+ |
|
49 |
+// |
|
50 |
+// the following must not be macros if we are to prefix them |
|
51 |
+// with std:: (they shouldn't be macros anyway...) |
|
52 |
+// |
|
53 |
+#ifdef ispunct |
|
54 |
+# undef ispunct |
|
55 |
+#endif |
|
56 |
+#ifdef iswpunct |
|
57 |
+# undef iswpunct |
|
58 |
+#endif |
|
59 |
+#ifdef isspace |
|
60 |
+# undef isspace |
|
61 |
+#endif |
|
62 |
+#ifdef iswspace |
|
63 |
+# undef iswspace |
|
64 |
+#endif |
|
65 |
+// |
|
66 |
+// fix namespace problems: |
|
67 |
+// |
|
68 |
+#ifdef BOOST_NO_STDC_NAMESPACE |
|
69 |
+namespace std{ |
|
70 |
+ using ::ispunct; |
|
71 |
+ using ::isspace; |
|
72 |
+#if !defined(BOOST_NO_CWCTYPE) |
|
73 |
+ using ::iswpunct; |
|
74 |
+ using ::iswspace; |
|
75 |
+#endif |
|
76 |
+} |
|
77 |
+#endif |
|
78 |
+ |
|
79 |
+namespace boost{ |
|
80 |
+ //=========================================================================== |
|
81 |
+ // The escaped_list_separator class. Which is a model of TokenizerFunction |
|
82 |
+ // An escaped list is a super-set of what is commonly known as a comma |
|
83 |
+ // separated value (csv) list.It is separated into fields by a comma or |
|
84 |
+ // other character. If the delimiting character is inside quotes, then it is |
|
85 |
+ // counted as a regular character.To allow for embedded quotes in a field, |
|
86 |
+ // there can be escape sequences using the \ much like C. |
|
87 |
+ // The role of the comma, the quotation mark, and the escape |
|
88 |
+ // character (backslash \), can be assigned to other characters. |
|
89 |
+ |
|
90 |
+ struct escaped_list_error : public std::runtime_error{ |
|
91 |
+ escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { } |
|
92 |
+ }; |
|
93 |
+ |
|
94 |
+ |
|
95 |
+// The out of the box GCC 2.95 on cygwin does not have a char_traits class. |
|
96 |
+// MSVC does not like the following typename |
|
97 |
+ template <class Char, |
|
98 |
+ class Traits = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type > |
|
99 |
+ class escaped_list_separator { |
|
100 |
+ |
|
101 |
+ private: |
|
102 |
+ typedef std::basic_string<Char,Traits> string_type; |
|
103 |
+ struct char_eq { |
|
104 |
+ Char e_; |
|
105 |
+ char_eq(Char e):e_(e) { } |
|
106 |
+ bool operator()(Char c) { |
|
107 |
+ return Traits::eq(e_,c); |
|
108 |
+ } |
|
109 |
+ }; |
|
110 |
+ string_type escape_; |
|
111 |
+ string_type c_; |
|
112 |
+ string_type quote_; |
|
113 |
+ bool last_; |
|
114 |
+ |
|
115 |
+ bool is_escape(Char e) { |
|
116 |
+ char_eq f(e); |
|
117 |
+ return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end(); |
|
118 |
+ } |
|
119 |
+ bool is_c(Char e) { |
|
120 |
+ char_eq f(e); |
|
121 |
+ return std::find_if(c_.begin(),c_.end(),f)!=c_.end(); |
|
122 |
+ } |
|
123 |
+ bool is_quote(Char e) { |
|
124 |
+ char_eq f(e); |
|
125 |
+ return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end(); |
|
126 |
+ } |
|
127 |
+ template <typename iterator, typename Token> |
|
128 |
+ void do_escape(iterator& next,iterator end,Token& tok) { |
|
129 |
+ if (++next == end) |
|
130 |
+ BOOST_THROW_EXCEPTION(escaped_list_error(std::string("cannot end with escape"))); |
|
131 |
+ if (Traits::eq(*next,'n')) { |
|
132 |
+ tok+='\n'; |
|
133 |
+ return; |
|
134 |
+ } |
|
135 |
+ else if (is_quote(*next)) { |
|
136 |
+ tok+=*next; |
|
137 |
+ return; |
|
138 |
+ } |
|
139 |
+ else if (is_c(*next)) { |
|
140 |
+ tok+=*next; |
|
141 |
+ return; |
|
142 |
+ } |
|
143 |
+ else if (is_escape(*next)) { |
|
144 |
+ tok+=*next; |
|
145 |
+ return; |
|
146 |
+ } |
|
147 |
+ else |
|
148 |
+ BOOST_THROW_EXCEPTION(escaped_list_error(std::string("unknown escape sequence"))); |
|
149 |
+ } |
|
150 |
+ |
|
151 |
+ public: |
|
152 |
+ |
|
153 |
+ explicit escaped_list_separator(Char e = '\\', |
|
154 |
+ Char c = ',',Char q = '\"') |
|
155 |
+ : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { } |
|
156 |
+ |
|
157 |
+ escaped_list_separator(string_type e, string_type c, string_type q) |
|
158 |
+ : escape_(e), c_(c), quote_(q), last_(false) { } |
|
159 |
+ |
|
160 |
+ void reset() {last_=false;} |
|
161 |
+ |
|
162 |
+ template <typename InputIterator, typename Token> |
|
163 |
+ bool operator()(InputIterator& next,InputIterator end,Token& tok) { |
|
164 |
+ bool bInQuote = false; |
|
165 |
+ tok = Token(); |
|
166 |
+ |
|
167 |
+ if (next == end) { |
|
168 |
+ if (last_) { |
|
169 |
+ last_ = false; |
|
170 |
+ return true; |
|
171 |
+ } |
|
172 |
+ else |
|
173 |
+ return false; |
|
174 |
+ } |
|
175 |
+ last_ = false; |
|
176 |
+ for (;next != end;++next) { |
|
177 |
+ if (is_escape(*next)) { |
|
178 |
+ do_escape(next,end,tok); |
|
179 |
+ } |
|
180 |
+ else if (is_c(*next)) { |
|
181 |
+ if (!bInQuote) { |
|
182 |
+ // If we are not in quote, then we are done |
|
183 |
+ ++next; |
|
184 |
+ // The last character was a c, that means there is |
|
185 |
+ // 1 more blank field |
|
186 |
+ last_ = true; |
|
187 |
+ return true; |
|
188 |
+ } |
|
189 |
+ else tok+=*next; |
|
190 |
+ } |
|
191 |
+ else if (is_quote(*next)) { |
|
192 |
+ bInQuote=!bInQuote; |
|
193 |
+ } |
|
194 |
+ else { |
|
195 |
+ tok += *next; |
|
196 |
+ } |
|
197 |
+ } |
|
198 |
+ return true; |
|
199 |
+ } |
|
200 |
+ }; |
|
201 |
+ |
|
202 |
+ //=========================================================================== |
|
203 |
+ // The classes here are used by offset_separator and char_separator to implement |
|
204 |
+ // faster assigning of tokens using assign instead of += |
|
205 |
+ |
|
206 |
+ namespace tokenizer_detail { |
|
207 |
+ //=========================================================================== |
|
208 |
+ // Tokenizer was broken for wide character separators, at least on Windows, since |
|
209 |
+ // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts |
|
210 |
+ // if higher values are passed in. The traits extension class should take care of this. |
|
211 |
+ // Assuming that the conditional will always get optimized out in the function |
|
212 |
+ // implementations, argument types are not a problem since both forms of character classifiers |
|
213 |
+ // expect an int. |
|
214 |
+ |
|
215 |
+#if !defined(BOOST_NO_CWCTYPE) |
|
216 |
+ template<typename traits, int N> |
|
217 |
+ struct traits_extension_details : public traits { |
|
218 |
+ typedef typename traits::char_type char_type; |
|
219 |
+ static bool isspace(char_type c) |
|
220 |
+ { |
|
221 |
+ return std::iswspace(c) != 0; |
|
222 |
+ } |
|
223 |
+ static bool ispunct(char_type c) |
|
224 |
+ { |
|
225 |
+ return std::iswpunct(c) != 0; |
|
226 |
+ } |
|
227 |
+ }; |
|
228 |
+ |
|
229 |
+ template<typename traits> |
|
230 |
+ struct traits_extension_details<traits, 1> : public traits { |
|
231 |
+ typedef typename traits::char_type char_type; |
|
232 |
+ static bool isspace(char_type c) |
|
233 |
+ { |
|
234 |
+ return std::isspace(c) != 0; |
|
235 |
+ } |
|
236 |
+ static bool ispunct(char_type c) |
|
237 |
+ { |
|
238 |
+ return std::ispunct(c) != 0; |
|
239 |
+ } |
|
240 |
+ }; |
|
241 |
+#endif |
|
242 |
+ |
|
243 |
+ |
|
244 |
+ // In case there is no cwctype header, we implement the checks manually. |
|
245 |
+ // We make use of the fact that the tested categories should fit in ASCII. |
|
246 |
+ template<typename traits> |
|
247 |
+ struct traits_extension : public traits { |
|
248 |
+ typedef typename traits::char_type char_type; |
|
249 |
+ static bool isspace(char_type c) |
|
250 |
+ { |
|
251 |
+#if !defined(BOOST_NO_CWCTYPE) |
|
252 |
+ return traits_extension_details<traits, sizeof(char_type)>::isspace(c); |
|
253 |
+#else |
|
254 |
+ return static_cast< unsigned >(c) <= 255 && std::isspace(c) != 0; |
|
255 |
+#endif |
|
256 |
+ } |
|
257 |
+ |
|
258 |
+ static bool ispunct(char_type c) |
|
259 |
+ { |
|
260 |
+#if !defined(BOOST_NO_CWCTYPE) |
|
261 |
+ return traits_extension_details<traits, sizeof(char_type)>::ispunct(c); |
|
262 |
+#else |
|
263 |
+ return static_cast< unsigned >(c) <= 255 && std::ispunct(c) != 0; |
|
264 |
+#endif |
|
265 |
+ } |
|
266 |
+ }; |
|
267 |
+ |
|
268 |
+ // The assign_or_plus_equal struct contains functions that implement |
|
269 |
+ // assign, +=, and clearing based on the iterator type. The |
|
270 |
+ // generic case does nothing for plus_equal and clearing, while |
|
271 |
+ // passing through the call for assign. |
|
272 |
+ // |
|
273 |
+ // When an input iterator is being used, the situation is reversed. |
|
274 |
+ // The assign method does nothing, plus_equal invokes operator +=, |
|
275 |
+ // and the clearing method sets the supplied token to the default |
|
276 |
+ // token constructor's result. |
|
277 |
+ // |
|
278 |
+ |
|
279 |
+ template<class IteratorTag> |
|
280 |
+ struct assign_or_plus_equal { |
|
281 |
+ template<class Iterator, class Token> |
|
282 |
+ static void assign(Iterator b, Iterator e, Token &t) { |
|
283 |
+ t.assign(b, e); |
|
284 |
+ } |
|
285 |
+ |
|
286 |
+ template<class Token, class Value> |
|
287 |
+ static void plus_equal(Token &, const Value &) { } |
|
288 |
+ |
|
289 |
+ // If we are doing an assign, there is no need for the |
|
290 |
+ // the clear. |
|
291 |
+ // |
|
292 |
+ template<class Token> |
|
293 |
+ static void clear(Token &) { } |
|
294 |
+ }; |
|
295 |
+ |
|
296 |
+ template <> |
|
297 |
+ struct assign_or_plus_equal<std::input_iterator_tag> { |
|
298 |
+ template<class Iterator, class Token> |
|
299 |
+ static void assign(Iterator , Iterator , Token &) { } |
|
300 |
+ template<class Token, class Value> |
|
301 |
+ static void plus_equal(Token &t, const Value &v) { |
|
302 |
+ t += v; |
|
303 |
+ } |
|
304 |
+ template<class Token> |
|
305 |
+ static void clear(Token &t) { |
|
306 |
+ t = Token(); |
|
307 |
+ } |
|
308 |
+ }; |
|
309 |
+ |
|
310 |
+ |
|
311 |
+ template<class Iterator> |
|
312 |
+ struct pointer_iterator_category{ |
|
313 |
+ typedef std::random_access_iterator_tag type; |
|
314 |
+ }; |
|
315 |
+ |
|
316 |
+ |
|
317 |
+ template<class Iterator> |
|
318 |
+ struct class_iterator_category{ |
|
319 |
+ typedef typename Iterator::iterator_category type; |
|
320 |
+ }; |
|
321 |
+ |
|
322 |
+ |
|
323 |
+ |
|
324 |
+ // This portably gets the iterator_tag without partial template specialization |
|
325 |
+ template<class Iterator> |
|
326 |
+ struct get_iterator_category{ |
|
327 |
+ typedef typename mpl::if_<is_pointer<Iterator>, |
|
328 |
+ pointer_iterator_category<Iterator>, |
|
329 |
+ class_iterator_category<Iterator> |
|
330 |
+ >::type cat; |
|
331 |
+ |
|
332 |
+ typedef typename cat::type iterator_category; |
|
333 |
+ }; |
|
334 |
+ |
|
335 |
+ |
|
336 |
+ } // namespace tokenizer_detail |
|
337 |
+ |
|
338 |
+ |
|
339 |
+ //=========================================================================== |
|
340 |
+ // The offset_separator class, which is a model of TokenizerFunction. |
|
341 |
+ // Offset breaks a string into tokens based on a range of offsets |
|
342 |
+ |
|
343 |
+ class offset_separator { |
|
344 |
+ private: |
|
345 |
+ |
|
346 |
+ std::vector<int> offsets_; |
|
347 |
+ unsigned int current_offset_; |
|
348 |
+ bool wrap_offsets_; |
|
349 |
+ bool return_partial_last_; |
|
350 |
+ |
|
351 |
+ public: |
|
352 |
+ template <typename Iter> |
|
353 |
+ offset_separator(Iter begin, Iter end, bool wrap_offsets = true, |
|
354 |
+ bool return_partial_last = true) |
|
355 |
+ : offsets_(begin,end), current_offset_(0), |
|
356 |
+ wrap_offsets_(wrap_offsets), |
|
357 |
+ return_partial_last_(return_partial_last) { } |
|
358 |
+ |
|
359 |
+ offset_separator() |
|
360 |
+ : offsets_(1,1), current_offset_(), |
|
361 |
+ wrap_offsets_(true), return_partial_last_(true) { } |
|
362 |
+ |
|
363 |
+ void reset() { |
|
364 |
+ current_offset_ = 0; |
|
365 |
+ } |
|
366 |
+ |
|
367 |
+ template <typename InputIterator, typename Token> |
|
368 |
+ bool operator()(InputIterator& next, InputIterator end, Token& tok) |
|
369 |
+ { |
|
370 |
+ typedef tokenizer_detail::assign_or_plus_equal< |
|
371 |
+ BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category< |
|
372 |
+ InputIterator |
|
373 |
+ >::iterator_category |
|
374 |
+ > assigner; |
|
375 |
+ |
|
376 |
+ BOOST_ASSERT(!offsets_.empty()); |
|
377 |
+ |
|
378 |
+ assigner::clear(tok); |
|
379 |
+ InputIterator start(next); |
|
380 |
+ |
|
381 |
+ if (next == end) |
|
382 |
+ return false; |
|
383 |
+ |
|
384 |
+ if (current_offset_ == offsets_.size()) |
|
385 |
+ { |
|
386 |
+ if (wrap_offsets_) |
|
387 |
+ current_offset_=0; |
|
388 |
+ else |
|
389 |
+ return false; |
|
390 |
+ } |
|
391 |
+ |
|
392 |
+ int c = offsets_[current_offset_]; |
|
393 |
+ int i = 0; |
|
394 |
+ for (; i < c; ++i) { |
|
395 |
+ if (next == end)break; |
|
396 |
+ assigner::plus_equal(tok,*next++); |
|
397 |
+ } |
|
398 |
+ assigner::assign(start,next,tok); |
|
399 |
+ |
|
400 |
+ if (!return_partial_last_) |
|
401 |
+ if (i < (c-1) ) |
|
402 |
+ return false; |
|
403 |
+ |
|
404 |
+ ++current_offset_; |
|
405 |
+ return true; |
|
406 |
+ } |
|
407 |
+ }; |
|
408 |
+ |
|
409 |
+ |
|
410 |
+ //=========================================================================== |
|
411 |
+ // The char_separator class breaks a sequence of characters into |
|
412 |
+ // tokens based on the character delimiters (very much like bad old |
|
413 |
+ // strtok). A delimiter character can either be kept or dropped. A |
|
414 |
+ // kept delimiter shows up as an output token, whereas a dropped |
|
415 |
+ // delimiter does not. |
|
416 |
+ |
|
417 |
+ // This class replaces the char_delimiters_separator class. The |
|
418 |
+ // constructor for the char_delimiters_separator class was too |
|
419 |
+ // confusing and needed to be deprecated. However, because of the |
|
420 |
+ // default arguments to the constructor, adding the new constructor |
|
421 |
+ // would cause ambiguity, so instead I deprecated the whole class. |
|
422 |
+ // The implementation of the class was also simplified considerably. |
|
423 |
+ |
|
424 |
+ enum empty_token_policy { drop_empty_tokens, keep_empty_tokens }; |
|
425 |
+ |
|
426 |
+ // The out of the box GCC 2.95 on cygwin does not have a char_traits class. |
|
427 |
+ template <typename Char, |
|
428 |
+ typename Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type > |
|
429 |
+ class char_separator |
|
430 |
+ { |
|
431 |
+ typedef tokenizer_detail::traits_extension<Tr> Traits; |
|
432 |
+ typedef std::basic_string<Char,Tr> string_type; |
|
433 |
+ public: |
|
434 |
+ explicit |
|
435 |
+ char_separator(const Char* dropped_delims, |
|
436 |
+ const Char* kept_delims = 0, |
|
437 |
+ empty_token_policy empty_tokens = drop_empty_tokens) |
|
438 |
+ : m_dropped_delims(dropped_delims), |
|
439 |
+ m_use_ispunct(false), |
|
440 |
+ m_use_isspace(false), |
|
441 |
+ m_empty_tokens(empty_tokens), |
|
442 |
+ m_output_done(false) |
|
443 |
+ { |
|
444 |
+ // Borland workaround |
|
445 |
+ if (kept_delims) |
|
446 |
+ m_kept_delims = kept_delims; |
|
447 |
+ } |
|
448 |
+ |
|
449 |
+ // use ispunct() for kept delimiters and isspace for dropped. |
|
450 |
+ explicit |
|
451 |
+ char_separator() |
|
452 |
+ : m_use_ispunct(true), |
|
453 |
+ m_use_isspace(true), |
|
454 |
+ m_empty_tokens(drop_empty_tokens), |
|
455 |
+ m_output_done(false) { } |
|
456 |
+ |
|
457 |
+ void reset() { } |
|
458 |
+ |
|
459 |
+ template <typename InputIterator, typename Token> |
|
460 |
+ bool operator()(InputIterator& next, InputIterator end, Token& tok) |
|
461 |
+ { |
|
462 |
+ typedef tokenizer_detail::assign_or_plus_equal< |
|
463 |
+ BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category< |
|
464 |
+ InputIterator |
|
465 |
+ >::iterator_category |
|
466 |
+ > assigner; |
|
467 |
+ |
|
468 |
+ assigner::clear(tok); |
|
469 |
+ |
|
470 |
+ // skip past all dropped_delims |
|
471 |
+ if (m_empty_tokens == drop_empty_tokens) |
|
472 |
+ for (; next != end && is_dropped(*next); ++next) |
|
473 |
+ { } |
|
474 |
+ |
|
475 |
+ InputIterator start(next); |
|
476 |
+ |
|
477 |
+ if (m_empty_tokens == drop_empty_tokens) { |
|
478 |
+ |
|
479 |
+ if (next == end) |
|
480 |
+ return false; |
|
481 |
+ |
|
482 |
+ |
|
483 |
+ // if we are on a kept_delims move past it and stop |
|
484 |
+ if (is_kept(*next)) { |
|
485 |
+ assigner::plus_equal(tok,*next); |
|
486 |
+ ++next; |
|
487 |
+ } else |
|
488 |
+ // append all the non delim characters |
|
489 |
+ for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next) |
|
490 |
+ assigner::plus_equal(tok,*next); |
|
491 |
+ } |
|
492 |
+ else { // m_empty_tokens == keep_empty_tokens |
|
493 |
+ |
|
494 |
+ // Handle empty token at the end |
|
495 |
+ if (next == end) |
|
496 |
+ { |
|
497 |
+ if (m_output_done == false) |
|
498 |
+ { |
|
499 |
+ m_output_done = true; |
|
500 |
+ assigner::assign(start,next,tok); |
|
501 |
+ return true; |
|
502 |
+ } |
|
503 |
+ else |
|
504 |
+ return false; |
|
505 |
+ } |
|
506 |
+ |
|
507 |
+ if (is_kept(*next)) { |
|
508 |
+ if (m_output_done == false) |
|
509 |
+ m_output_done = true; |
|
510 |
+ else { |
|
511 |
+ assigner::plus_equal(tok,*next); |
|
512 |
+ ++next; |
|
513 |
+ m_output_done = false; |
|
514 |
+ } |
|
515 |
+ } |
|
516 |
+ else if (m_output_done == false && is_dropped(*next)) { |
|
517 |
+ m_output_done = true; |
|
518 |
+ } |
|
519 |
+ else { |
|
520 |
+ if (is_dropped(*next)) |
|
521 |
+ start=++next; |
|
522 |
+ for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next) |
|
523 |
+ assigner::plus_equal(tok,*next); |
|
524 |
+ m_output_done = true; |
|
525 |
+ } |
|
526 |
+ } |
|
527 |
+ assigner::assign(start,next,tok); |
|
528 |
+ return true; |
|
529 |
+ } |
|
530 |
+ |
|
531 |
+ private: |
|
532 |
+ string_type m_kept_delims; |
|
533 |
+ string_type m_dropped_delims; |
|
534 |
+ bool m_use_ispunct; |
|
535 |
+ bool m_use_isspace; |
|
536 |
+ empty_token_policy m_empty_tokens; |
|
537 |
+ bool m_output_done; |
|
538 |
+ |
|
539 |
+ bool is_kept(Char E) const |
|
540 |
+ { |
|
541 |
+ if (m_kept_delims.length()) |
|
542 |
+ return m_kept_delims.find(E) != string_type::npos; |
|
543 |
+ else if (m_use_ispunct) { |
|
544 |
+ return Traits::ispunct(E) != 0; |
|
545 |
+ } else |
|
546 |
+ return false; |
|
547 |
+ } |
|
548 |
+ bool is_dropped(Char E) const |
|
549 |
+ { |
|
550 |
+ if (m_dropped_delims.length()) |
|
551 |
+ return m_dropped_delims.find(E) != string_type::npos; |
|
552 |
+ else if (m_use_isspace) { |
|
553 |
+ return Traits::isspace(E) != 0; |
|
554 |
+ } else |
|
555 |
+ return false; |
|
556 |
+ } |
|
557 |
+ }; |
|
558 |
+ |
|
559 |
+ //=========================================================================== |
|
560 |
+ // The following class is DEPRECATED, use class char_separators instead. |
|
561 |
+ // |
|
562 |
+ // The char_delimiters_separator class, which is a model of |
|
563 |
+ // TokenizerFunction. char_delimiters_separator breaks a string |
|
564 |
+ // into tokens based on character delimiters. There are 2 types of |
|
565 |
+ // delimiters. returnable delimiters can be returned as |
|
566 |
+ // tokens. These are often punctuation. nonreturnable delimiters |
|
567 |
+ // cannot be returned as tokens. These are often whitespace |
|
568 |
+ |
|
569 |
+ // The out of the box GCC 2.95 on cygwin does not have a char_traits class. |
|
570 |
+ template <class Char, |
|
571 |
+ class Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type > |
|
572 |
+ class char_delimiters_separator { |
|
573 |
+ private: |
|
574 |
+ |
|
575 |
+ typedef tokenizer_detail::traits_extension<Tr> Traits; |
|
576 |
+ typedef std::basic_string<Char,Tr> string_type; |
|
577 |
+ string_type returnable_; |
|
578 |
+ string_type nonreturnable_; |
|
579 |
+ bool return_delims_; |
|
580 |
+ bool no_ispunct_; |
|
581 |
+ bool no_isspace_; |
|
582 |
+ |
|
583 |
+ bool is_ret(Char E)const |
|
584 |
+ { |
|
585 |
+ if (returnable_.length()) |
|
586 |
+ return returnable_.find(E) != string_type::npos; |
|
587 |
+ else{ |
|
588 |
+ if (no_ispunct_) {return false;} |
|
589 |
+ else{ |
|
590 |
+ int r = Traits::ispunct(E); |
|
591 |
+ return r != 0; |
|
592 |
+ } |
|
593 |
+ } |
|
594 |
+ } |
|
595 |
+ bool is_nonret(Char E)const |
|
596 |
+ { |
|
597 |
+ if (nonreturnable_.length()) |
|
598 |
+ return nonreturnable_.find(E) != string_type::npos; |
|
599 |
+ else{ |
|
600 |
+ if (no_isspace_) {return false;} |
|
601 |
+ else{ |
|
602 |
+ int r = Traits::isspace(E); |
|
603 |
+ return r != 0; |
|
604 |
+ } |
|
605 |
+ } |
|
606 |
+ } |
|
607 |
+ |
|
608 |
+ public: |
|
609 |
+ explicit char_delimiters_separator(bool return_delims = false, |
|
610 |
+ const Char* returnable = 0, |
|
611 |
+ const Char* nonreturnable = 0) |
|
612 |
+ : returnable_(returnable ? returnable : string_type().c_str()), |
|
613 |
+ nonreturnable_(nonreturnable ? nonreturnable:string_type().c_str()), |
|
614 |
+ return_delims_(return_delims), no_ispunct_(returnable!=0), |
|
615 |
+ no_isspace_(nonreturnable!=0) { } |
|
616 |
+ |
|
617 |
+ void reset() { } |
|
618 |
+ |
|
619 |
+ public: |
|
620 |
+ |
|
621 |
+ template <typename InputIterator, typename Token> |
|
622 |
+ bool operator()(InputIterator& next, InputIterator end,Token& tok) { |
|
623 |
+ tok = Token(); |
|
624 |
+ |
|
625 |
+ // skip past all nonreturnable delims |
|
626 |
+ // skip past the returnable only if we are not returning delims |
|
627 |
+ for (;next!=end && ( is_nonret(*next) || (is_ret(*next) |
|
628 |
+ && !return_delims_ ) );++next) { } |
|
629 |
+ |
|
630 |
+ if (next == end) { |
|
631 |
+ return false; |
|
632 |
+ } |
|
633 |
+ |
|
634 |
+ // if we are to return delims and we are one a returnable one |
|
635 |
+ // move past it and stop |
|
636 |
+ if (is_ret(*next) && return_delims_) { |
|
637 |
+ tok+=*next; |
|
638 |
+ ++next; |
|
639 |
+ } |
|
640 |
+ else |
|
641 |
+ // append all the non delim characters |
|
642 |
+ for (;next!=end && !is_nonret(*next) && !is_ret(*next);++next) |
|
643 |
+ tok+=*next; |
|
644 |
+ |
|
645 |
+ |
|
646 |
+ return true; |
|
647 |
+ } |
|
648 |
+ }; |
|
649 |
+ |
|
650 |
+ |
|
651 |
+} //namespace boost |
|
652 |
+ |
|
653 |
+#endif |
From: Steffen Neumann <sneumann@ipb-halle.de>
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/mzR@125184 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,665 +0,0 @@ |
1 |
-// Boost token_functions.hpp ------------------------------------------------// |
|
2 |
- |
|
3 |
-// Copyright John R. Bandela 2001. |
|
4 |
- |
|
5 |
-// Distributed under the Boost Software License, Version 1.0. (See |
|
6 |
-// accompanying file LICENSE_1_0.txt or copy at |
|
7 |
-// http://www.boost.org/LICENSE_1_0.txt) |
|
8 |
- |
|
9 |
-// See http://www.boost.org/libs/tokenizer/ for documentation. |
|
10 |
- |
|
11 |
-// Revision History: |
|
12 |
-// 01 Oct 2004 Joaquin M Lopez Munoz |
|
13 |
-// Workaround for a problem with string::assign in msvc-stlport |
|
14 |
-// 06 Apr 2004 John Bandela |
|
15 |
-// Fixed a bug involving using char_delimiter with a true input iterator |
|
16 |
-// 28 Nov 2003 Robert Zeh and John Bandela |
|
17 |
-// Converted into "fast" functions that avoid using += when |
|
18 |
-// the supplied iterator isn't an input_iterator; based on |
|
19 |
-// some work done at Archelon and a version that was checked into |
|
20 |
-// the boost CVS for a short period of time. |
|
21 |
-// 20 Feb 2002 John Maddock |
|
22 |
-// Removed using namespace std declarations and added |
|
23 |
-// workaround for BOOST_NO_STDC_NAMESPACE (the library |
|
24 |
-// can be safely mixed with regex). |
|
25 |
-// 06 Feb 2002 Jeremy Siek |
|
26 |
-// Added char_separator. |
|
27 |
-// 02 Feb 2002 Jeremy Siek |
|
28 |
-// Removed tabs and a little cleanup. |
|
29 |
- |
|
30 |
- |
|
31 |
-#ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_ |
|
32 |
-#define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_ |
|
33 |
- |
|
34 |
-#include <vector> |
|
35 |
-#include <stdexcept> |
|
36 |
-#include <string> |
|
37 |
-#include <cctype> |
|
38 |
-#include <algorithm> // for find_if |
|
39 |
-#include <boost/config.hpp> |
|
40 |
-#include <boost/assert.hpp> |
|
41 |
-#include <boost/detail/workaround.hpp> |
|
42 |
-#include <boost/mpl/if.hpp> |
|
43 |
-#if !defined(BOOST_NO_CWCTYPE) |
|
44 |
-#include <cwctype> |
|
45 |
-#endif |
|
46 |
- |
|
47 |
-// |
|
48 |
-// the following must not be macros if we are to prefix them |
|
49 |
-// with std:: (they shouldn't be macros anyway...) |
|
50 |
-// |
|
51 |
-#ifdef ispunct |
|
52 |
-# undef ispunct |
|
53 |
-#endif |
|
54 |
-#ifdef iswpunct |
|
55 |
-# undef iswpunct |
|
56 |
-#endif |
|
57 |
-#ifdef isspace |
|
58 |
-# undef isspace |
|
59 |
-#endif |
|
60 |
-#ifdef iswspace |
|
61 |
-# undef iswspace |
|
62 |
-#endif |
|
63 |
-// |
|
64 |
-// fix namespace problems: |
|
65 |
-// |
|
66 |
-#ifdef BOOST_NO_STDC_NAMESPACE |
|
67 |
-namespace std{ |
|
68 |
- using ::ispunct; |
|
69 |
- using ::isspace; |
|
70 |
-#if !defined(BOOST_NO_CWCTYPE) |
|
71 |
- using ::iswpunct; |
|
72 |
- using ::iswspace; |
|
73 |
-#endif |
|
74 |
-} |
|
75 |
-#endif |
|
76 |
- |
|
77 |
-namespace boost{ |
|
78 |
- //=========================================================================== |
|
79 |
- // The escaped_list_separator class. Which is a model of TokenizerFunction |
|
80 |
- // An escaped list is a super-set of what is commonly known as a comma |
|
81 |
- // separated value (csv) list.It is separated into fields by a comma or |
|
82 |
- // other character. If the delimiting character is inside quotes, then it is |
|
83 |
- // counted as a regular character.To allow for embedded quotes in a field, |
|
84 |
- // there can be escape sequences using the \ much like C. |
|
85 |
- // The role of the comma, the quotation mark, and the escape |
|
86 |
- // character (backslash \), can be assigned to other characters. |
|
87 |
- |
|
88 |
- struct escaped_list_error : public std::runtime_error{ |
|
89 |
- escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { } |
|
90 |
- }; |
|
91 |
- |
|
92 |
- |
|
93 |
-// The out of the box GCC 2.95 on cygwin does not have a char_traits class. |
|
94 |
-// MSVC does not like the following typename |
|
95 |
- template <class Char, |
|
96 |
- class Traits = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type > |
|
97 |
- class escaped_list_separator { |
|
98 |
- |
|
99 |
- private: |
|
100 |
- typedef std::basic_string<Char,Traits> string_type; |
|
101 |
- struct char_eq { |
|
102 |
- Char e_; |
|
103 |
- char_eq(Char e):e_(e) { } |
|
104 |
- bool operator()(Char c) { |
|
105 |
- return Traits::eq(e_,c); |
|
106 |
- } |
|
107 |
- }; |
|
108 |
- string_type escape_; |
|
109 |
- string_type c_; |
|
110 |
- string_type quote_; |
|
111 |
- bool last_; |
|
112 |
- |
|
113 |
- bool is_escape(Char e) { |
|
114 |
- char_eq f(e); |
|
115 |
- return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end(); |
|
116 |
- } |
|
117 |
- bool is_c(Char e) { |
|
118 |
- char_eq f(e); |
|
119 |
- return std::find_if(c_.begin(),c_.end(),f)!=c_.end(); |
|
120 |
- } |
|
121 |
- bool is_quote(Char e) { |
|
122 |
- char_eq f(e); |
|
123 |
- return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end(); |
|
124 |
- } |
|
125 |
- template <typename iterator, typename Token> |
|
126 |
- void do_escape(iterator& next,iterator end,Token& tok) { |
|
127 |
- if (++next == end) |
|
128 |
- throw escaped_list_error(std::string("cannot end with escape")); |
|
129 |
- if (Traits::eq(*next,'n')) { |
|
130 |
- tok+='\n'; |
|
131 |
- return; |
|
132 |
- } |
|
133 |
- else if (is_quote(*next)) { |
|
134 |
- tok+=*next; |
|
135 |
- return; |
|
136 |
- } |
|
137 |
- else if (is_c(*next)) { |
|
138 |
- tok+=*next; |
|
139 |
- return; |
|
140 |
- } |
|
141 |
- else if (is_escape(*next)) { |
|
142 |
- tok+=*next; |
|
143 |
- return; |
|
144 |
- } |
|
145 |
- else |
|
146 |
- throw escaped_list_error(std::string("unknown escape sequence")); |
|
147 |
- } |
|
148 |
- |
|
149 |
- public: |
|
150 |
- |
|
151 |
- explicit escaped_list_separator(Char e = '\\', |
|
152 |
- Char c = ',',Char q = '\"') |
|
153 |
- : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { } |
|
154 |
- |
|
155 |
- escaped_list_separator(string_type e, string_type c, string_type q) |
|
156 |
- : escape_(e), c_(c), quote_(q), last_(false) { } |
|
157 |
- |
|
158 |
- void reset() {last_=false;} |
|
159 |
- |
|
160 |
- template <typename InputIterator, typename Token> |
|
161 |
- bool operator()(InputIterator& next,InputIterator end,Token& tok) { |
|
162 |
- bool bInQuote = false; |
|
163 |
- tok = Token(); |
|
164 |
- |
|
165 |
- if (next == end) { |
|
166 |
- if (last_) { |
|
167 |
- last_ = false; |
|
168 |
- return true; |
|
169 |
- } |
|
170 |
- else |
|
171 |
- return false; |
|
172 |
- } |
|
173 |
- last_ = false; |
|
174 |
- for (;next != end;++next) { |
|
175 |
- if (is_escape(*next)) { |
|
176 |
- do_escape(next,end,tok); |
|
177 |
- } |
|
178 |
- else if (is_c(*next)) { |
|
179 |
- if (!bInQuote) { |
|
180 |
- // If we are not in quote, then we are done |
|
181 |
- ++next; |
|
182 |
- // The last character was a c, that means there is |
|
183 |
- // 1 more blank field |
|
184 |
- last_ = true; |
|
185 |
- return true; |
|
186 |
- } |
|
187 |
- else tok+=*next; |
|
188 |
- } |
|
189 |
- else if (is_quote(*next)) { |
|
190 |
- bInQuote=!bInQuote; |
|
191 |
- } |
|
192 |
- else { |
|
193 |
- tok += *next; |
|
194 |
- } |
|
195 |
- } |
|
196 |
- return true; |
|
197 |
- } |
|
198 |
- }; |
|
199 |
- |
|
200 |
- //=========================================================================== |
|
201 |
- // The classes here are used by offset_separator and char_separator to implement |
|
202 |
- // faster assigning of tokens using assign instead of += |
|
203 |
- |
|
204 |
- namespace tokenizer_detail { |
|
205 |
- //=========================================================================== |
|
206 |
- // Tokenizer was broken for wide character separators, at least on Windows, since |
|
207 |
- // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts |
|
208 |
- // if higher values are passed in. The traits extension class should take care of this. |
|
209 |
- // Assuming that the conditional will always get optimized out in the function |
|
210 |
- // implementations, argument types are not a problem since both forms of character classifiers |
|
211 |
- // expect an int. |
|
212 |
- |
|
213 |
-#if !defined(BOOST_NO_CWCTYPE) |
|
214 |
- template<typename traits, int N> |
|
215 |
- struct traits_extension_details : public traits { |
|
216 |
- typedef typename traits::char_type char_type; |
|
217 |
- static bool isspace(char_type c) |
|
218 |
- { |
|
219 |
- return std::iswspace(c) != 0; |
|
220 |
- } |
|
221 |
- static bool ispunct(char_type c) |
|
222 |
- { |
|
223 |
- return std::iswpunct(c) != 0; |
|
224 |
- } |
|
225 |
- }; |
|
226 |
- |
|
227 |
- template<typename traits> |
|
228 |
- struct traits_extension_details<traits, 1> : public traits { |
|
229 |
- typedef typename traits::char_type char_type; |
|
230 |
- static bool isspace(char_type c) |
|
231 |
- { |
|
232 |
- return std::isspace(c) != 0; |
|
233 |
- } |
|
234 |
- static bool ispunct(char_type c) |
|
235 |
- { |
|
236 |
- return std::ispunct(c) != 0; |
|
237 |
- } |
|
238 |
- }; |
|
239 |
-#endif |
|
240 |
- |
|
241 |
- |
|
242 |
- // In case there is no cwctype header, we implement the checks manually. |
|
243 |
- // We make use of the fact that the tested categories should fit in ASCII. |
|
244 |
- template<typename traits> |
|
245 |