Skip to content

Commit 9a74099

Browse files
committed
Add string search utility
1 parent d0f3778 commit 9a74099

File tree

2 files changed

+113
-4
lines changed

2 files changed

+113
-4
lines changed

include/ctre/atoms.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct any { };
1919

2020
// actual AST of regexp
2121
template <auto... Str> struct string { };
22+
template <auto... Str> struct string_search { };
2223
template <typename... Opts> struct select { };
2324
template <typename... Content> struct sequence { };
2425
struct empty { };

include/ctre/evaluation.hpp

+112-4
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,17 @@ template <typename CharT, typename Iterator, typename EndIterator> constexpr CTR
115115
}
116116

117117
template <auto... String, size_t... Idx, typename Iterator, typename EndIterator> constexpr CTRE_FORCE_INLINE string_match_result<Iterator> evaluate_match_string(Iterator current, [[maybe_unused]] const EndIterator end, std::index_sequence<Idx...>) noexcept {
118-
119-
bool same = (compare_character(String, current, end) && ... && true);
120-
121-
return {current, same};
118+
if constexpr (!std::is_same_v<Iterator, utf8_iterator> && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
119+
bool same = (::std::distance(current, end) >= sizeof...(String)) && ((String == *(current + Idx)) & ...);
120+
if (same) {
121+
return {current+=sizeof...(String), same};
122+
} else {
123+
return {current, same};
124+
}
125+
} else {
126+
bool same = (compare_character(String, current, end) && ... && true);
127+
return { current, same };
128+
}
122129
}
123130

124131
template <typename R, typename Iterator, typename EndIterator, auto... String, typename... Tail>
@@ -132,6 +139,107 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c
132139
return evaluate(begin, result.position, end, consumed_something(f, sizeof...(String) > 0), captures, ctll::list<Tail...>());
133140
}
134141

142+
template<typename Ty>
143+
constexpr bool is_prefix(Ty* word, size_t wordlen, ptrdiff_t pos) {
144+
ptrdiff_t suffixlen = wordlen - pos;
145+
for (int i = 0; i < suffixlen; i++) {
146+
if (word[i] != word[pos + i]) {
147+
return false;
148+
}
149+
}
150+
return true;
151+
}
152+
153+
template<typename Ty>
154+
constexpr size_t suffix_length(Ty* word, size_t wordlen, ptrdiff_t pos) {
155+
size_t i = 0;
156+
// increment suffix length i to the first mismatch or beginning of the word
157+
for (; (word[pos - i] == word[wordlen - 1 - i]) && (i < pos); i++);
158+
return i;
159+
}
160+
//MSVC workaround, array operator[] blows up in face if constexpr, use pointers instead
161+
template<typename Ty, auto... String>
162+
constexpr auto make_delta_2(string<String...>) {
163+
std::array<Ty, sizeof...(String)> chars{ String... };
164+
std::array<ptrdiff_t, sizeof...(String)> table;
165+
constexpr size_t patlen = sizeof...(String);
166+
size_t p = 0;
167+
size_t last_prefix_index = patlen - 1;
168+
169+
for (p = patlen - 1; p < patlen; p--) {
170+
if (is_prefix(chars.data(), patlen, p + 1)) {
171+
last_prefix_index = p + 1;
172+
}
173+
table.data()[p] = last_prefix_index + (patlen - 1 - p);
174+
}
175+
176+
for (p = 0; p < patlen - 1; p++) {
177+
size_t slen = suffix_length(chars.data(), patlen, p);
178+
if (chars.data()[p - slen] != chars.data()[patlen - 1 - slen]) {
179+
table.data()[patlen - 1 - slen] = patlen - 1 - p + slen;
180+
}
181+
}
182+
183+
return table;
184+
}
185+
186+
template <typename Iterator, typename EndIterator, auto... String>
187+
constexpr CTRE_FORCE_INLINE string_match_result<Iterator> evaluate_search_string(Iterator current, const EndIterator end, string<String...>) {
188+
if constexpr (sizeof...(String) > 2 && !std::is_same_v<Iterator, utf8_iterator> && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
189+
constexpr std::array<typename ::std::iterator_traits<Iterator>::value_type, sizeof...(String)> chars{ String... };
190+
constexpr std::array<ptrdiff_t, sizeof...(String)> delta_2 = make_delta_2<typename ::std::iterator_traits<Iterator>::value_type>(string<String...>());
191+
192+
size_t str_size = std::distance(current, end);
193+
if (str_size < sizeof...(String)) { //quick exit no way to match
194+
return { current, false };
195+
}
196+
197+
size_t i = sizeof...(String) - 1; //index over to the starting location
198+
for (; i < str_size;) {
199+
size_t j = sizeof...(String) - 1;
200+
size_t m = i + 1;
201+
for (; *(current + i) == *(chars.data() + j); --i, --j) { //match string in reverse
202+
if (j == 0) {
203+
return { current + m, true };
204+
}
205+
}
206+
size_t shift = enumeration<String...>::match_char(*(current + i)) ? static_cast<size_t>(*(delta_2.data() + j)) : sizeof...(String);
207+
i += shift;
208+
}
209+
210+
return { current + str_size, false };
211+
} else if (sizeof...(String)) {
212+
//fallback to plain string matching
213+
constexpr std::array<typename ::std::iterator_traits<Iterator>::value_type, sizeof...(String)> chars{ String... };
214+
constexpr typename ::std::iterator_traits<Iterator>::value_type first_char = chars.data()[0];
215+
while (current != end) {
216+
while (current != end && *current != first_char) {
217+
current++;
218+
}
219+
auto result = evaluate_match_string<String...>(current, end, std::make_index_sequence<sizeof...(String)>());
220+
if (result.match) {
221+
return result;
222+
} else {
223+
++current;
224+
}
225+
}
226+
return { current, false };
227+
} else {
228+
return { current, true };
229+
}
230+
}
231+
232+
template <typename R, typename Iterator, typename EndIterator, auto... String, typename... Tail>
233+
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, [[maybe_unused]] const flags& f, R captures, ctll::list<string_search<String...>, Tail...>) noexcept {
234+
auto result = evaluate_search_string(current, end, string<String...>());
235+
236+
if (!result.matched) {
237+
return not_matched;
238+
}
239+
240+
return evaluate(begin, std::advance(result.position, sizeof...(String)), end, consumed_something(f, sizeof...(String) > 0), captures, ctll::list<Tail...>());
241+
}
242+
135243
// matching select in patterns
136244
template <typename R, typename Iterator, typename EndIterator, typename HeadOptions, typename... TailOptions, typename... Tail>
137245
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, const flags & f, R captures, ctll::list<select<HeadOptions, TailOptions...>, Tail...>) noexcept {

0 commit comments

Comments
 (0)