git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqbias@57223 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,10 @@ |
1 |
-#pragma once |
|
2 |
- |
|
3 | 1 |
#ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 |
4 | 2 |
#define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 |
5 | 3 |
|
4 |
+#if !defined(__GNUC__) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4) // GCC supports "pragma once" correctly since 3.4 |
|
5 |
+#pragma once |
|
6 |
+#endif |
|
7 |
+ |
|
6 | 8 |
|
7 | 9 |
#include "stream.h" |
8 | 10 |
#include "stringsource.h" |
... | ... |
@@ -58,7 +60,13 @@ namespace YAML |
58 | 60 |
template<> |
59 | 61 |
inline bool RegEx::IsValidSource<StringCharSource>(const StringCharSource&source) const |
60 | 62 |
{ |
61 |
- return source || m_op == REGEX_EMPTY; |
|
63 |
+ switch(m_op) { |
|
64 |
+ case REGEX_MATCH: |
|
65 |
+ case REGEX_RANGE: |
|
66 |
+ return source; |
|
67 |
+ default: |
|
68 |
+ return true; |
|
69 |
+ } |
|
62 | 70 |
} |
63 | 71 |
|
64 | 72 |
template <typename Source> |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqbias@52013 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,178 @@ |
1 |
+#pragma once |
|
2 |
+ |
|
3 |
+#ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 |
|
4 |
+#define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 |
|
5 |
+ |
|
6 |
+ |
|
7 |
+#include "stream.h" |
|
8 |
+#include "stringsource.h" |
|
9 |
+#include "streamcharsource.h" |
|
10 |
+ |
|
11 |
+namespace YAML |
|
12 |
+{ |
|
13 |
+ // query matches |
|
14 |
+ inline bool RegEx::Matches(char ch) const { |
|
15 |
+ std::string str; |
|
16 |
+ str += ch; |
|
17 |
+ return Matches(str); |
|
18 |
+ } |
|
19 |
+ |
|
20 |
+ inline bool RegEx::Matches(const std::string& str) const { |
|
21 |
+ return Match(str) >= 0; |
|
22 |
+ } |
|
23 |
+ |
|
24 |
+ inline bool RegEx::Matches(const Stream& in) const { |
|
25 |
+ return Match(in) >= 0; |
|
26 |
+ } |
|
27 |
+ |
|
28 |
+ template <typename Source> |
|
29 |
+ inline bool RegEx::Matches(const Source& source) const { |
|
30 |
+ return Match(source) >= 0; |
|
31 |
+ } |
|
32 |
+ |
|
33 |
+ // Match |
|
34 |
+ // . Matches the given string against this regular expression. |
|
35 |
+ // . Returns the number of characters matched. |
|
36 |
+ // . Returns -1 if no characters were matched (the reason for |
|
37 |
+ // not returning zero is that we may have an empty regex |
|
38 |
+ // which is ALWAYS successful at matching zero characters). |
|
39 |
+ // . REMEMBER that we only match from the start of the buffer! |
|
40 |
+ inline int RegEx::Match(const std::string& str) const |
|
41 |
+ { |
|
42 |
+ StringCharSource source(str.c_str(), str.size()); |
|
43 |
+ return Match(source); |
|
44 |
+ } |
|
45 |
+ |
|
46 |
+ inline int RegEx::Match(const Stream& in) const |
|
47 |
+ { |
|
48 |
+ StreamCharSource source(in); |
|
49 |
+ return Match(source); |
|
50 |
+ } |
|
51 |
+ |
|
52 |
+ template <typename Source> |
|
53 |
+ inline bool RegEx::IsValidSource(const Source& source) const |
|
54 |
+ { |
|
55 |
+ return source; |
|
56 |
+ } |
|
57 |
+ |
|
58 |
+ template<> |
|
59 |
+ inline bool RegEx::IsValidSource<StringCharSource>(const StringCharSource&source) const |
|
60 |
+ { |
|
61 |
+ return source || m_op == REGEX_EMPTY; |
|
62 |
+ } |
|
63 |
+ |
|
64 |
+ template <typename Source> |
|
65 |
+ inline int RegEx::Match(const Source& source) const |
|
66 |
+ { |
|
67 |
+ return IsValidSource(source) ? MatchUnchecked(source) : -1; |
|
68 |
+ } |
|
69 |
+ |
|
70 |
+ template <typename Source> |
|
71 |
+ inline int RegEx::MatchUnchecked(const Source& source) const |
|
72 |
+ { |
|
73 |
+ switch(m_op) { |
|
74 |
+ case REGEX_EMPTY: |
|
75 |
+ return MatchOpEmpty(source); |
|
76 |
+ case REGEX_MATCH: |
|
77 |
+ return MatchOpMatch(source); |
|
78 |
+ case REGEX_RANGE: |
|
79 |
+ return MatchOpRange(source); |
|
80 |
+ case REGEX_OR: |
|
81 |
+ return MatchOpOr(source); |
|
82 |
+ case REGEX_AND: |
|
83 |
+ return MatchOpAnd(source); |
|
84 |
+ case REGEX_NOT: |
|
85 |
+ return MatchOpNot(source); |
|
86 |
+ case REGEX_SEQ: |
|
87 |
+ return MatchOpSeq(source); |
|
88 |
+ } |
|
89 |
+ |
|
90 |
+ return -1; |
|
91 |
+ } |
|
92 |
+ |
|
93 |
+ ////////////////////////////////////////////////////////////////////////////// |
|
94 |
+ // Operators |
|
95 |
+ // Note: the convention MatchOp*<Source> is that we can assume IsSourceValid(source). |
|
96 |
+ // So we do all our checks *before* we call these functions |
|
97 |
+ |
|
98 |
+ // EmptyOperator |
|
99 |
+ template <typename Source> |
|
100 |
+ inline int RegEx::MatchOpEmpty(const Source& source) const { |
|
101 |
+ return source[0] == Stream::eof() ? 0 : -1; |
|
102 |
+ } |
|
103 |
+ |
|
104 |
+ template <> |
|
105 |
+ inline int RegEx::MatchOpEmpty<StringCharSource>(const StringCharSource& source) const { |
|
106 |
+ return !source ? 0 : -1; // the empty regex only is successful on the empty string |
|
107 |
+ } |
|
108 |
+ |
|
109 |
+ // MatchOperator |
|
110 |
+ template <typename Source> |
|
111 |
+ inline int RegEx::MatchOpMatch(const Source& source) const { |
|
112 |
+ if(source[0] != m_a) |
|
113 |
+ return -1; |
|
114 |
+ return 1; |
|
115 |
+ } |
|
116 |
+ |
|
117 |
+ // RangeOperator |
|
118 |
+ template <typename Source> |
|
119 |
+ inline int RegEx::MatchOpRange(const Source& source) const { |
|
120 |
+ if(m_a > source[0] || m_z < source[0]) |
|
121 |
+ return -1; |
|
122 |
+ return 1; |
|
123 |
+ } |
|
124 |
+ |
|
125 |
+ // OrOperator |
|
126 |
+ template <typename Source> |
|
127 |
+ inline int RegEx::MatchOpOr(const Source& source) const { |
|
128 |
+ for(std::size_t i=0;i<m_params.size();i++) { |
|
129 |
+ int n = m_params[i].MatchUnchecked(source); |
|
130 |
+ if(n >= 0) |
|
131 |
+ return n; |
|
132 |
+ } |
|
133 |
+ return -1; |
|
134 |
+ } |
|
135 |
+ |
|
136 |
+ // AndOperator |
|
137 |
+ // Note: 'AND' is a little funny, since we may be required to match things |
|
138 |
+ // of different lengths. If we find a match, we return the length of |
|
139 |
+ // the FIRST entry on the list. |
|
140 |
+ template <typename Source> |
|
141 |
+ inline int RegEx::MatchOpAnd(const Source& source) const { |
|
142 |
+ int first = -1; |
|
143 |
+ for(std::size_t i=0;i<m_params.size();i++) { |
|
144 |
+ int n = m_params[i].MatchUnchecked(source); |
|
145 |
+ if(n == -1) |
|
146 |
+ return -1; |
|
147 |
+ if(i == 0) |
|
148 |
+ first = n; |
|
149 |
+ } |
|
150 |
+ return first; |
|
151 |
+ } |
|
152 |
+ |
|
153 |
+ // NotOperator |
|
154 |
+ template <typename Source> |
|
155 |
+ inline int RegEx::MatchOpNot(const Source& source) const { |
|
156 |
+ if(m_params.empty()) |
|
157 |
+ return -1; |
|
158 |
+ if(m_params[0].MatchUnchecked(source) >= 0) |
|
159 |
+ return -1; |
|
160 |
+ return 1; |
|
161 |
+ } |
|
162 |
+ |
|
163 |
+ // SeqOperator |
|
164 |
+ template <typename Source> |
|
165 |
+ inline int RegEx::MatchOpSeq(const Source& source) const { |
|
166 |
+ int offset = 0; |
|
167 |
+ for(std::size_t i=0;i<m_params.size();i++) { |
|
168 |
+ int n = m_params[i].Match(source + offset); // note Match, not MatchUnchecked because we need to check validity after the offset |
|
169 |
+ if(n == -1) |
|
170 |
+ return -1; |
|
171 |
+ offset += n; |
|
172 |
+ } |
|
173 |
+ |
|
174 |
+ return offset; |
|
175 |
+ } |
|
176 |
+} |
|
177 |
+ |
|
178 |
+#endif // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 |