Browse code

Updating pwiz to 3_0_21263

Steffen Neumann authored on 23/09/2021 12:34:25
Showing 1 changed files
1 1
new file mode 100755
... ...
@@ -0,0 +1,653 @@
1
+// Boost token_functions.hpp  ------------------------------------------------//
2
+
3
+// Copyright John R. Bandela 2001.
4
+
5
+// Distributed under the Boost Software License, Version 1.0. (See
6
+// accompanying file LICENSE_1_0.txt or copy at
7
+// http://www.boost.org/LICENSE_1_0.txt)
8
+
9
+// See http://www.boost.org/libs/tokenizer/ for documentation.
10
+
11
+// Revision History:
12
+// 01 Oct 2004   Joaquin M Lopez Munoz
13
+//      Workaround for a problem with string::assign in msvc-stlport
14
+// 06 Apr 2004   John Bandela
15
+//      Fixed a bug involving using char_delimiter with a true input iterator
16
+// 28 Nov 2003   Robert Zeh and John Bandela
17
+//      Converted into "fast" functions that avoid using += when
18
+//      the supplied iterator isn't an input_iterator; based on
19
+//      some work done at Archelon and a version that was checked into
20
+//      the boost CVS for a short period of time.
21
+// 20 Feb 2002   John Maddock
22
+//      Removed using namespace std declarations and added
23
+//      workaround for BOOST_NO_STDC_NAMESPACE (the library
24
+//      can be safely mixed with regex).
25
+// 06 Feb 2002   Jeremy Siek
26
+//      Added char_separator.
27
+// 02 Feb 2002   Jeremy Siek
28
+//      Removed tabs and a little cleanup.
29
+
30
+
31
+#ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
32
+#define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
33
+
34
+#include <vector>
35
+#include <stdexcept>
36
+#include <string>
37
+#include <cctype>
38
+#include <algorithm> // for find_if
39
+#include <boost/config.hpp>
40
+#include <boost/assert.hpp>
41
+#include <boost/type_traits/is_pointer.hpp>
42
+#include <boost/detail/workaround.hpp>
43
+#include <boost/mpl/if.hpp>
44
+#include <boost/throw_exception.hpp>
45
+#if !defined(BOOST_NO_CWCTYPE)
46
+#include <cwctype>
47
+#endif
48
+
49
+//
50
+// the following must not be macros if we are to prefix them
51
+// with std:: (they shouldn't be macros anyway...)
52
+//
53
+#ifdef ispunct
54
+#  undef ispunct
55
+#endif
56
+#ifdef iswpunct
57
+#  undef iswpunct
58
+#endif
59
+#ifdef isspace
60
+#  undef isspace
61
+#endif
62
+#ifdef iswspace
63
+#  undef iswspace
64
+#endif
65
+//
66
+// fix namespace problems:
67
+//
68
+#ifdef BOOST_NO_STDC_NAMESPACE
69
+namespace std{
70
+ using ::ispunct;
71
+ using ::isspace;
72
+#if !defined(BOOST_NO_CWCTYPE)
73
+ using ::iswpunct;
74
+ using ::iswspace;
75
+#endif
76
+}
77
+#endif
78
+
79
+namespace boost{
80
+  //===========================================================================
81
+  // The escaped_list_separator class. Which is a model of TokenizerFunction
82
+  // An escaped list is a super-set of what is commonly known as a comma
83
+  // separated value (csv) list.It is separated into fields by a comma or
84
+  // other character. If the delimiting character is inside quotes, then it is
85
+  // counted as a regular character.To allow for embedded quotes in a field,
86
+  // there can be escape sequences using the \ much like C.
87
+  // The role of the comma, the quotation mark, and the escape
88
+  // character (backslash \), can be assigned to other characters.
89
+
90
+  struct escaped_list_error : public std::runtime_error{
91
+    escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { }
92
+  };
93
+
94
+
95
+// The out of the box GCC 2.95 on cygwin does not have a char_traits class.
96
+// MSVC does not like the following typename
97
+  template <class Char,
98
+    class Traits = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
99
+  class escaped_list_separator {
100
+
101
+  private:
102
+    typedef std::basic_string<Char,Traits> string_type;
103
+    struct char_eq {
104
+      Char e_;
105
+      char_eq(Char e):e_(e) { }
106
+      bool operator()(Char c) {
107
+        return Traits::eq(e_,c);
108
+      }
109
+    };
110
+    string_type  escape_;
111
+    string_type  c_;
112
+    string_type  quote_;
113
+    bool last_;
114
+
115
+    bool is_escape(Char e) {
116
+      char_eq f(e);
117
+      return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end();
118
+    }
119
+    bool is_c(Char e) {
120
+      char_eq f(e);
121
+      return std::find_if(c_.begin(),c_.end(),f)!=c_.end();
122
+    }
123
+    bool is_quote(Char e) {
124
+      char_eq f(e);
125
+      return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end();
126
+    }
127
+    template <typename iterator, typename Token>
128
+    void do_escape(iterator& next,iterator end,Token& tok) {
129
+      if (++next == end)
130
+        BOOST_THROW_EXCEPTION(escaped_list_error(std::string("cannot end with escape")));
131
+      if (Traits::eq(*next,'n')) {
132
+        tok+='\n';
133
+        return;
134
+      }
135
+      else if (is_quote(*next)) {
136
+        tok+=*next;
137
+        return;
138
+      }
139
+      else if (is_c(*next)) {
140
+        tok+=*next;
141
+        return;
142
+      }
143
+      else if (is_escape(*next)) {
144
+        tok+=*next;
145
+        return;
146
+      }
147
+      else
148
+        BOOST_THROW_EXCEPTION(escaped_list_error(std::string("unknown escape sequence")));
149
+    }
150
+
151
+    public:
152
+
153
+    explicit escaped_list_separator(Char  e = '\\',
154
+                                    Char c = ',',Char  q = '\"')
155
+      : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { }
156
+
157
+    escaped_list_separator(string_type e, string_type c, string_type q)
158
+      : escape_(e), c_(c), quote_(q), last_(false) { }
159
+
160
+    void reset() {last_=false;}
161
+
162
+    template <typename InputIterator, typename Token>
163
+    bool operator()(InputIterator& next,InputIterator end,Token& tok) {
164
+      bool bInQuote = false;
165
+      tok = Token();
166
+
167
+      if (next == end) {
168
+        if (last_) {
169
+          last_ = false;
170
+          return true;
171
+        }
172
+        else
173
+          return false;
174
+      }
175
+      last_ = false;
176
+      for (;next != end;++next) {
177
+        if (is_escape(*next)) {
178
+          do_escape(next,end,tok);
179
+        }
180
+        else if (is_c(*next)) {
181
+          if (!bInQuote) {
182
+            // If we are not in quote, then we are done
183
+            ++next;
184
+            // The last character was a c, that means there is
185
+            // 1 more blank field
186
+            last_ = true;
187
+            return true;
188
+          }
189
+          else tok+=*next;
190
+        }
191
+        else if (is_quote(*next)) {
192
+          bInQuote=!bInQuote;
193
+        }
194
+        else {
195
+          tok += *next;
196
+        }
197
+      }
198
+      return true;
199
+    }
200
+  };
201
+
202
+  //===========================================================================
203
+  // The classes here are used by offset_separator and char_separator to implement
204
+  // faster assigning of tokens using assign instead of +=
205
+
206
+  namespace tokenizer_detail {
207
+  //===========================================================================
208
+  // Tokenizer was broken for wide character separators, at least on Windows, since
209
+  // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts
210
+  // if higher values are passed in. The traits extension class should take care of this.
211
+  // Assuming that the conditional will always get optimized out in the function
212
+  // implementations, argument types are not a problem since both forms of character classifiers
213
+  // expect an int.
214
+
215
+#if !defined(BOOST_NO_CWCTYPE)
216
+  template<typename traits, int N>
217
+  struct traits_extension_details : public traits {
218
+    typedef typename traits::char_type char_type;
219
+    static bool isspace(char_type c)
220
+    {
221
+       return std::iswspace(c) != 0;
222
+    }
223
+    static bool ispunct(char_type c)
224
+    {
225
+       return std::iswpunct(c) != 0;
226
+    }
227
+  };
228
+
229
+  template<typename traits>
230
+  struct traits_extension_details<traits, 1> : public traits {
231
+    typedef typename traits::char_type char_type;
232
+    static bool isspace(char_type c)
233
+    {
234
+       return std::isspace(c) != 0;
235
+    }
236
+    static bool ispunct(char_type c)
237
+    {
238
+       return std::ispunct(c) != 0;
239
+    }
240
+  };
241
+#endif
242
+
243
+
244
+  // In case there is no cwctype header, we implement the checks manually.
245
+  // We make use of the fact that the tested categories should fit in ASCII.
246
+  template<typename traits>
247
+  struct traits_extension : public traits {
248
+    typedef typename traits::char_type char_type;
249
+    static bool isspace(char_type c)
250
+    {
251
+#if !defined(BOOST_NO_CWCTYPE)
252
+      return traits_extension_details<traits, sizeof(char_type)>::isspace(c);
253
+#else
254
+      return static_cast< unsigned >(c) <= 255 && std::isspace(c) != 0;
255
+#endif
256
+    }
257
+
258
+    static bool ispunct(char_type c)
259
+    {
260
+#if !defined(BOOST_NO_CWCTYPE)
261
+      return traits_extension_details<traits, sizeof(char_type)>::ispunct(c);
262
+#else
263
+      return static_cast< unsigned >(c) <= 255 && std::ispunct(c) != 0;
264
+#endif
265
+    }
266
+  };
267
+
268
+  // The assign_or_plus_equal struct contains functions that implement
269
+  // assign, +=, and clearing based on the iterator type.  The
270
+  // generic case does nothing for plus_equal and clearing, while
271
+  // passing through the call for assign.
272
+  //
273
+  // When an input iterator is being used, the situation is reversed.
274
+  // The assign method does nothing, plus_equal invokes operator +=,
275
+  // and the clearing method sets the supplied token to the default
276
+  // token constructor's result.
277
+  //
278
+
279
+  template<class IteratorTag>
280
+  struct assign_or_plus_equal {
281
+    template<class Iterator, class Token>
282
+    static void assign(Iterator b, Iterator e, Token &t) {
283
+      t.assign(b, e);
284
+    }
285
+
286
+    template<class Token, class Value>
287
+    static void plus_equal(Token &, const Value &) { }
288
+
289
+    // If we are doing an assign, there is no need for the
290
+    // the clear.
291
+    //
292
+    template<class Token>
293
+    static void clear(Token &) { }
294
+  };
295
+
296
+  template <>
297
+  struct assign_or_plus_equal<std::input_iterator_tag> {
298
+    template<class Iterator, class Token>
299
+    static void assign(Iterator , Iterator , Token &) { }
300
+    template<class Token, class Value>
301
+    static void plus_equal(Token &t, const Value &v) {
302
+      t += v;
303
+    }
304
+    template<class Token>
305
+    static void clear(Token &t) {
306
+      t = Token();
307
+    }
308
+  };
309
+
310
+
311
+  template<class Iterator>
312
+  struct pointer_iterator_category{
313
+    typedef std::random_access_iterator_tag type;
314
+  };
315
+
316
+
317
+  template<class Iterator>
318
+  struct class_iterator_category{
319
+    typedef typename Iterator::iterator_category type;
320
+  };
321
+
322
+
323
+
324
+  // This portably gets the iterator_tag without partial template specialization
325
+  template<class Iterator>
326
+    struct get_iterator_category{
327
+    typedef typename mpl::if_<is_pointer<Iterator>,
328
+      pointer_iterator_category<Iterator>,
329
+      class_iterator_category<Iterator>
330
+    >::type cat;
331
+
332
+    typedef typename cat::type iterator_category;
333
+  };
334
+
335
+
336
+  } // namespace tokenizer_detail
337
+
338
+
339
+  //===========================================================================
340
+  // The offset_separator class, which is a model of TokenizerFunction.
341
+  // Offset breaks a string into tokens based on a range of offsets
342
+
343
+  class offset_separator {
344
+  private:
345
+
346
+    std::vector<int> offsets_;
347
+    unsigned int current_offset_;
348
+    bool wrap_offsets_;
349
+    bool return_partial_last_;
350
+
351
+  public:
352
+    template <typename Iter>
353
+    offset_separator(Iter begin, Iter end, bool wrap_offsets = true,
354
+                     bool return_partial_last = true)
355
+      : offsets_(begin,end), current_offset_(0),
356
+        wrap_offsets_(wrap_offsets),
357
+        return_partial_last_(return_partial_last) { }
358
+
359
+    offset_separator()
360
+      : offsets_(1,1), current_offset_(),
361
+        wrap_offsets_(true), return_partial_last_(true) { }
362
+
363
+    void reset() {
364
+      current_offset_ = 0;
365
+    }
366
+
367
+    template <typename InputIterator, typename Token>
368
+    bool operator()(InputIterator& next, InputIterator end, Token& tok)
369
+    {
370
+      typedef tokenizer_detail::assign_or_plus_equal<
371
+        BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category<
372
+          InputIterator
373
+        >::iterator_category
374
+      > assigner;
375
+
376
+      BOOST_ASSERT(!offsets_.empty());
377
+
378
+      assigner::clear(tok);
379
+      InputIterator start(next);
380
+
381
+      if (next == end)
382
+        return false;
383
+
384
+      if (current_offset_ == offsets_.size())
385
+      {
386
+        if (wrap_offsets_)
387
+          current_offset_=0;
388
+        else
389
+          return false;
390
+      }
391
+
392
+      int c = offsets_[current_offset_];
393
+      int i = 0;
394
+      for (; i < c; ++i) {
395
+        if (next == end)break;
396
+        assigner::plus_equal(tok,*next++);
397
+      }
398
+      assigner::assign(start,next,tok);
399
+
400
+      if (!return_partial_last_)
401
+        if (i < (c-1) )
402
+          return false;
403
+
404
+      ++current_offset_;
405
+      return true;
406
+    }
407
+  };
408
+
409
+
410
+  //===========================================================================
411
+  // The char_separator class breaks a sequence of characters into
412
+  // tokens based on the character delimiters (very much like bad old
413
+  // strtok). A delimiter character can either be kept or dropped. A
414
+  // kept delimiter shows up as an output token, whereas a dropped
415
+  // delimiter does not.
416
+
417
+  // This class replaces the char_delimiters_separator class. The
418
+  // constructor for the char_delimiters_separator class was too
419
+  // confusing and needed to be deprecated. However, because of the
420
+  // default arguments to the constructor, adding the new constructor
421
+  // would cause ambiguity, so instead I deprecated the whole class.
422
+  // The implementation of the class was also simplified considerably.
423
+
424
+  enum empty_token_policy { drop_empty_tokens, keep_empty_tokens };
425
+
426
+  // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
427
+  template <typename Char,
428
+    typename Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
429
+  class char_separator
430
+  {
431
+    typedef tokenizer_detail::traits_extension<Tr> Traits;
432
+    typedef std::basic_string<Char,Tr> string_type;
433
+  public:
434
+    explicit
435
+    char_separator(const Char* dropped_delims,
436
+                   const Char* kept_delims = 0,
437
+                   empty_token_policy empty_tokens = drop_empty_tokens)
438
+      : m_dropped_delims(dropped_delims),
439
+        m_use_ispunct(false),
440
+        m_use_isspace(false),
441
+        m_empty_tokens(empty_tokens),
442
+        m_output_done(false)
443
+    {
444
+      // Borland workaround
445
+      if (kept_delims)
446
+        m_kept_delims = kept_delims;
447
+    }
448
+
449
+                // use ispunct() for kept delimiters and isspace for dropped.
450
+    explicit
451
+    char_separator()
452
+      : m_use_ispunct(true),
453
+        m_use_isspace(true),
454
+        m_empty_tokens(drop_empty_tokens),
455
+        m_output_done(false) { }
456
+
457
+    void reset() { }
458
+
459
+    template <typename InputIterator, typename Token>
460
+    bool operator()(InputIterator& next, InputIterator end, Token& tok)
461
+    {
462
+      typedef tokenizer_detail::assign_or_plus_equal<
463
+        BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category<
464
+          InputIterator
465
+        >::iterator_category
466
+      > assigner;
467
+
468
+      assigner::clear(tok);
469
+
470
+      // skip past all dropped_delims
471
+      if (m_empty_tokens == drop_empty_tokens)
472
+        for (; next != end  && is_dropped(*next); ++next)
473
+          { }
474
+
475
+      InputIterator start(next);
476
+
477
+      if (m_empty_tokens == drop_empty_tokens) {
478
+
479
+        if (next == end)
480
+          return false;
481
+
482
+
483
+        // if we are on a kept_delims move past it and stop
484
+        if (is_kept(*next)) {
485
+          assigner::plus_equal(tok,*next);
486
+          ++next;
487
+        } else
488
+          // append all the non delim characters
489
+          for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
490
+            assigner::plus_equal(tok,*next);
491
+      }
492
+      else { // m_empty_tokens == keep_empty_tokens
493
+
494
+        // Handle empty token at the end
495
+        if (next == end)
496
+        {
497
+          if (m_output_done == false)
498
+          {
499
+            m_output_done = true;
500
+            assigner::assign(start,next,tok);
501
+            return true;
502
+          }
503
+          else
504
+            return false;
505
+        }
506
+
507
+        if (is_kept(*next)) {
508
+          if (m_output_done == false)
509
+            m_output_done = true;
510
+          else {
511
+            assigner::plus_equal(tok,*next);
512
+            ++next;
513
+            m_output_done = false;
514
+          }
515
+        }
516
+        else if (m_output_done == false && is_dropped(*next)) {
517
+          m_output_done = true;
518
+        }
519
+        else {
520
+          if (is_dropped(*next))
521
+            start=++next;
522
+          for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
523
+            assigner::plus_equal(tok,*next);
524
+          m_output_done = true;
525
+        }
526
+      }
527
+      assigner::assign(start,next,tok);
528
+      return true;
529
+    }
530
+
531
+  private:
532
+    string_type m_kept_delims;
533
+    string_type m_dropped_delims;
534
+    bool m_use_ispunct;
535
+    bool m_use_isspace;
536
+    empty_token_policy m_empty_tokens;
537
+    bool m_output_done;
538
+
539
+    bool is_kept(Char E) const
540
+    {
541
+      if (m_kept_delims.length())
542
+        return m_kept_delims.find(E) != string_type::npos;
543
+      else if (m_use_ispunct) {
544
+        return Traits::ispunct(E) != 0;
545
+      } else
546
+        return false;
547
+    }
548
+    bool is_dropped(Char E) const
549
+    {
550
+      if (m_dropped_delims.length())
551
+        return m_dropped_delims.find(E) != string_type::npos;
552
+      else if (m_use_isspace) {
553
+        return Traits::isspace(E) != 0;
554
+      } else
555
+        return false;
556
+    }
557
+  };
558
+
559
+  //===========================================================================
560
+  // The following class is DEPRECATED, use class char_separators instead.
561
+  //
562
+  // The char_delimiters_separator class, which is a model of
563
+  // TokenizerFunction.  char_delimiters_separator breaks a string
564
+  // into tokens based on character delimiters. There are 2 types of
565
+  // delimiters. returnable delimiters can be returned as
566
+  // tokens. These are often punctuation. nonreturnable delimiters
567
+  // cannot be returned as tokens. These are often whitespace
568
+
569
+  // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
570
+  template <class Char,
571
+    class Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
572
+  class char_delimiters_separator {
573
+  private:
574
+
575
+    typedef tokenizer_detail::traits_extension<Tr> Traits;
576
+    typedef std::basic_string<Char,Tr> string_type;
577
+    string_type returnable_;
578
+    string_type nonreturnable_;
579
+    bool return_delims_;
580
+    bool no_ispunct_;
581
+    bool no_isspace_;
582
+
583
+    bool is_ret(Char E)const
584
+    {
585
+      if (returnable_.length())
586
+        return  returnable_.find(E) != string_type::npos;
587
+      else{
588
+        if (no_ispunct_) {return false;}
589
+        else{
590
+          int r = Traits::ispunct(E);
591
+          return r != 0;
592
+        }
593
+      }
594
+    }
595
+    bool is_nonret(Char E)const
596
+    {
597
+      if (nonreturnable_.length())
598
+        return  nonreturnable_.find(E) != string_type::npos;
599
+      else{
600
+        if (no_isspace_) {return false;}
601
+        else{
602
+          int r = Traits::isspace(E);
603
+          return r != 0;
604
+        }
605
+      }
606
+    }
607
+
608
+  public:
609
+    explicit char_delimiters_separator(bool return_delims = false,
610
+                                       const Char* returnable = 0,
611
+                                       const Char* nonreturnable = 0)
612
+      : returnable_(returnable ? returnable : string_type().c_str()),
613
+        nonreturnable_(nonreturnable ? nonreturnable:string_type().c_str()),
614
+        return_delims_(return_delims), no_ispunct_(returnable!=0),
615
+        no_isspace_(nonreturnable!=0) { }
616
+
617
+    void reset() { }
618
+
619
+  public:
620
+
621
+     template <typename InputIterator, typename Token>
622
+     bool operator()(InputIterator& next, InputIterator end,Token& tok) {
623
+     tok = Token();
624
+
625
+     // skip past all nonreturnable delims
626
+     // skip past the returnable only if we are not returning delims
627
+     for (;next!=end && ( is_nonret(*next) || (is_ret(*next)
628
+       && !return_delims_ ) );++next) { }
629
+
630
+     if (next == end) {
631
+       return false;
632
+     }
633
+
634
+     // if we are to return delims and we are one a returnable one
635
+     // move past it and stop
636
+     if (is_ret(*next) && return_delims_) {
637
+       tok+=*next;
638
+       ++next;
639
+     }
640
+     else
641
+       // append all the non delim characters
642
+       for (;next!=end && !is_nonret(*next) && !is_ret(*next);++next)
643
+         tok+=*next;
644
+
645
+
646
+     return true;
647
+   }
648
+  };
649
+
650
+
651
+} //namespace boost
652
+
653
+#endif
Browse code

drop old boost

From: Steffen Neumann <sneumann@ipb-halle.de>

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/mzR@125184 bc3139a8-67e5-0310-9ffc-ced21a209358

l.gatto authored on 15/12/2016 10:41:45
Showing 1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,665 +0,0 @@
1
-// Boost token_functions.hpp  ------------------------------------------------//
2
-
3
-// Copyright John R. Bandela 2001.
4
-
5
-// Distributed under the Boost Software License, Version 1.0. (See
6
-// accompanying file LICENSE_1_0.txt or copy at
7
-// http://www.boost.org/LICENSE_1_0.txt)
8
-
9
-// See http://www.boost.org/libs/tokenizer/ for documentation.
10
-
11
-// Revision History:
12
-// 01 Oct 2004   Joaquin M Lopez Munoz
13
-//      Workaround for a problem with string::assign in msvc-stlport
14
-// 06 Apr 2004   John Bandela
15
-//      Fixed a bug involving using char_delimiter with a true input iterator
16
-// 28 Nov 2003   Robert Zeh and John Bandela
17
-//      Converted into "fast" functions that avoid using += when
18
-//      the supplied iterator isn't an input_iterator; based on
19
-//      some work done at Archelon and a version that was checked into
20
-//      the boost CVS for a short period of time.
21
-// 20 Feb 2002   John Maddock
22
-//      Removed using namespace std declarations and added
23
-//      workaround for BOOST_NO_STDC_NAMESPACE (the library
24
-//      can be safely mixed with regex).
25
-// 06 Feb 2002   Jeremy Siek
26
-//      Added char_separator.
27
-// 02 Feb 2002   Jeremy Siek
28
-//      Removed tabs and a little cleanup.
29
-
30
-
31
-#ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
32
-#define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
33
-
34
-#include <vector>
35
-#include <stdexcept>
36
-#include <string>
37
-#include <cctype>
38
-#include <algorithm> // for find_if
39
-#include <boost/config.hpp>
40
-#include <boost/assert.hpp>
41
-#include <boost/detail/workaround.hpp>
42
-#include <boost/mpl/if.hpp>
43
-#if !defined(BOOST_NO_CWCTYPE)
44
-#include <cwctype>
45
-#endif
46
-
47
-//
48
-// the following must not be macros if we are to prefix them
49
-// with std:: (they shouldn't be macros anyway...)
50
-//
51
-#ifdef ispunct
52
-#  undef ispunct
53
-#endif
54
-#ifdef iswpunct
55
-#  undef iswpunct
56
-#endif
57
-#ifdef isspace
58
-#  undef isspace
59
-#endif
60
-#ifdef iswspace
61
-#  undef iswspace
62
-#endif
63
-//
64
-// fix namespace problems:
65
-//
66
-#ifdef BOOST_NO_STDC_NAMESPACE
67
-namespace std{
68
- using ::ispunct;
69
- using ::isspace;
70
-#if !defined(BOOST_NO_CWCTYPE)
71
- using ::iswpunct;
72
- using ::iswspace;
73
-#endif
74
-}
75
-#endif
76
-
77
-namespace boost{
78
-  //===========================================================================
79
-  // The escaped_list_separator class. Which is a model of TokenizerFunction
80
-  // An escaped list is a super-set of what is commonly known as a comma
81
-  // separated value (csv) list.It is separated into fields by a comma or
82
-  // other character. If the delimiting character is inside quotes, then it is
83
-  // counted as a regular character.To allow for embedded quotes in a field,
84
-  // there can be escape sequences using the \ much like C.
85
-  // The role of the comma, the quotation mark, and the escape
86
-  // character (backslash \), can be assigned to other characters.
87
-
88
-  struct escaped_list_error : public std::runtime_error{
89
-    escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { }
90
-  };
91
-
92
-
93
-// The out of the box GCC 2.95 on cygwin does not have a char_traits class.
94
-// MSVC does not like the following typename
95
-  template <class Char,
96
-    class Traits = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
97
-  class escaped_list_separator {
98
-
99
-  private:
100
-    typedef std::basic_string<Char,Traits> string_type;
101
-    struct char_eq {
102
-      Char e_;
103
-      char_eq(Char e):e_(e) { }
104
-      bool operator()(Char c) {
105
-        return Traits::eq(e_,c);
106
-      }
107
-    };
108
-    string_type  escape_;
109
-    string_type  c_;
110
-    string_type  quote_;
111
-    bool last_;
112
-
113
-    bool is_escape(Char e) {
114
-      char_eq f(e);
115
-      return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end();
116
-    }
117
-    bool is_c(Char e) {
118
-      char_eq f(e);
119
-      return std::find_if(c_.begin(),c_.end(),f)!=c_.end();
120
-    }
121
-    bool is_quote(Char e) {
122
-      char_eq f(e);
123
-      return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end();
124
-    }
125
-    template <typename iterator, typename Token>
126
-    void do_escape(iterator& next,iterator end,Token& tok) {
127
-      if (++next == end)
128
-        throw escaped_list_error(std::string("cannot end with escape"));
129
-      if (Traits::eq(*next,'n')) {
130
-        tok+='\n';
131
-        return;
132
-      }
133
-      else if (is_quote(*next)) {
134
-        tok+=*next;
135
-        return;
136
-      }
137
-      else if (is_c(*next)) {
138
-        tok+=*next;
139
-        return;
140
-      }
141
-      else if (is_escape(*next)) {
142
-        tok+=*next;
143
-        return;
144
-      }
145
-      else
146
-        throw escaped_list_error(std::string("unknown escape sequence"));
147
-    }
148
-
149
-    public:
150
-
151
-    explicit escaped_list_separator(Char  e = '\\',
152
-                                    Char c = ',',Char  q = '\"')
153
-      : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { }
154
-
155
-    escaped_list_separator(string_type e, string_type c, string_type q)
156
-      : escape_(e), c_(c), quote_(q), last_(false) { }
157
-
158
-    void reset() {last_=false;}
159
-
160
-    template <typename InputIterator, typename Token>
161
-    bool operator()(InputIterator& next,InputIterator end,Token& tok) {
162
-      bool bInQuote = false;
163
-      tok = Token();
164
-
165
-      if (next == end) {
166
-        if (last_) {
167
-          last_ = false;
168
-          return true;
169
-        }
170
-        else
171
-          return false;
172
-      }
173
-      last_ = false;
174
-      for (;next != end;++next) {
175
-        if (is_escape(*next)) {
176
-          do_escape(next,end,tok);
177
-        }
178
-        else if (is_c(*next)) {
179
-          if (!bInQuote) {
180
-            // If we are not in quote, then we are done
181
-            ++next;
182
-            // The last character was a c, that means there is
183
-            // 1 more blank field
184
-            last_ = true;
185
-            return true;
186
-          }
187
-          else tok+=*next;
188
-        }
189
-        else if (is_quote(*next)) {
190
-          bInQuote=!bInQuote;
191
-        }
192
-        else {
193
-          tok += *next;
194
-        }
195
-      }
196
-      return true;
197
-    }
198
-  };
199
-
200
-  //===========================================================================
201
-  // The classes here are used by offset_separator and char_separator to implement
202
-  // faster assigning of tokens using assign instead of +=
203
-
204
-  namespace tokenizer_detail {
205
-  //===========================================================================
206
-  // Tokenizer was broken for wide character separators, at least on Windows, since
207
-  // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts
208
-  // if higher values are passed in. The traits extension class should take care of this.
209
-  // Assuming that the conditional will always get optimized out in the function
210
-  // implementations, argument types are not a problem since both forms of character classifiers
211
-  // expect an int.
212
-
213
-#if !defined(BOOST_NO_CWCTYPE)
214
-  template<typename traits, int N>
215
-  struct traits_extension_details : public traits {
216
-    typedef typename traits::char_type char_type;
217
-    static bool isspace(char_type c)
218
-    {
219
-       return std::iswspace(c) != 0;
220
-    }
221
-    static bool ispunct(char_type c)
222
-    {
223
-       return std::iswpunct(c) != 0;
224
-    }
225
-  };
226
-
227
-  template<typename traits>
228
-  struct traits_extension_details<traits, 1> : public traits {
229
-    typedef typename traits::char_type char_type;
230
-    static bool isspace(char_type c)
231
-    {
232
-       return std::isspace(c) != 0;
233
-    }
234
-    static bool ispunct(char_type c)
235
-    {
236
-       return std::ispunct(c) != 0;
237
-    }
238
-  };
239
-#endif
240
-
241
-
242
-  // In case there is no cwctype header, we implement the checks manually.
243
-  // We make use of the fact that the tested categories should fit in ASCII.
244
-  template<typename traits>
245