Skip to content

Commit 08d6249

Browse files
committed
Add needle search optimization
1 parent 9a37e55 commit 08d6249

File tree

2 files changed

+194
-12
lines changed

2 files changed

+194
-12
lines changed

include/ctre/evaluation.hpp

+168-4
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,17 @@ template <typename CharT, typename Iterator, typename EndIterator> constexpr CTR
116116
}
117117

118118
template <auto... String, size_t... Idx, typename Iterator, typename EndIterator> constexpr CTRE_FORCE_INLINE string_match_result<Iterator> evaluate_match_string(Iterator current, [[maybe_unused]] const EndIterator end, std::index_sequence<Idx...>) noexcept {
119-
120-
bool same = (compare_character(String, current, end) && ... && true);
121-
122-
return {current, same};
119+
if constexpr (!std::is_same_v<Iterator, utf8_iterator> && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
120+
bool same = (::std::distance(current, end) >= sizeof...(String)) && ((String == *(current + Idx)) & ...);
121+
if (same) {
122+
return {current+=sizeof...(String), same};
123+
} else {
124+
return {current, same};
125+
}
126+
} else {
127+
bool same = (compare_character(String, current, end) && ... && true);
128+
return { current, same };
129+
}
123130
}
124131

125132
template <typename R, typename Iterator, typename EndIterator, auto... String, typename... Tail>
@@ -522,6 +529,163 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c
522529
}
523530
}
524531

532+
template <typename T>
533+
constexpr bool is_string(T) {
534+
return false;
535+
}
536+
template <auto... String>
537+
constexpr bool is_string(string<String...>) {
538+
return true;
539+
}
540+
541+
template <typename T>
542+
constexpr bool is_string_like(T) {
543+
return false;
544+
}
545+
template <auto... String>
546+
constexpr bool is_string_like(string<String...>) {
547+
return true;
548+
}
549+
template <typename CharacterLike, typename = std::enable_if_t<MatchesCharacter<CharacterLike>::template value<decltype(*std::declval<std::string_view::iterator>())>>>
550+
constexpr bool is_string_like(CharacterLike) {
551+
return true;
552+
}
553+
554+
template <typename... Content>
555+
constexpr auto extract_leading_string(ctll::list<Content...>) -> ctll::list<Content...> {
556+
return {};
557+
};
558+
template <typename... Content>
559+
constexpr auto extract_leading_string(sequence<Content...>) -> sequence<Content...> {
560+
return {};
561+
};
562+
563+
//concatenation
564+
template <auto C, auto... String, typename... Content>
565+
constexpr auto extract_leading_string(ctll::list<string<String...>, character<C>, Content...>) {
566+
return extract_leading_string(ctll::list<string<String..., C>, Content...>());
567+
}
568+
569+
template <auto... StringA, auto... StringB, typename... Content>
570+
constexpr auto extract_leading_string(ctll::list<string<StringA...>, string<StringB...>, Content...>) {
571+
return extract_leading_string(ctll::list<string<StringA..., StringB>, Content...>());
572+
}
573+
//move things up out of sequences
574+
template <typename... Content, typename... Tail>
575+
constexpr auto extract_leading_string(ctll::list<sequence<Content...>, Tail...>) {
576+
return extract_leading_string(ctll::list<Content..., Tail...>());
577+
}
578+
579+
template <typename T, typename... Content, typename... Tail>
580+
constexpr auto extract_leading_string(ctll::list<T, sequence<Content...>, Tail...>) {
581+
return extract_leading_string(ctll::list<T, Content..., Tail...>());
582+
}
583+
584+
template <typename... Content>
585+
constexpr auto make_into_sequence(ctll::list<Content...>) -> sequence<Content...> {
586+
return{};
587+
}
588+
template <typename... Content>
589+
constexpr auto make_into_sequence(sequence<Content...>) -> sequence<Content...> {
590+
return{};
591+
}
592+
593+
//boyer moore utils
594+
template<typename Ty>
595+
constexpr bool is_prefix(Ty* word, size_t wordlen, ptrdiff_t pos) {
596+
ptrdiff_t suffixlen = wordlen - pos;
597+
for (int i = 0; i < suffixlen; i++) {
598+
if (word[i] != word[pos + i]) {
599+
return false;
600+
}
601+
}
602+
return true;
603+
}
604+
605+
template<typename Ty>
606+
constexpr size_t suffix_length(Ty* word, size_t wordlen, ptrdiff_t pos) {
607+
size_t i = 0;
608+
// increment suffix length i to the first mismatch or beginning of the word
609+
for (; (word[pos - i] == word[wordlen - 1 - i]) && (i < pos); i++);
610+
return i;
611+
}
612+
//MSVC workaround, array operator[] blows up in face if constexpr, use pointers instead
613+
template<typename Ty, auto... String>
614+
constexpr auto make_delta_2(string<String...>) {
615+
std::array<Ty, sizeof...(String)> chars{ String... };
616+
std::array<ptrdiff_t, sizeof...(String)> table;
617+
constexpr size_t patlen = sizeof...(String);
618+
size_t p = 0;
619+
size_t last_prefix_index = patlen - 1;
620+
621+
for (p = patlen - 1; p < patlen; p--) {
622+
if (is_prefix(chars.data(), patlen, p + 1)) {
623+
last_prefix_index = p + 1;
624+
}
625+
table.data()[p] = last_prefix_index + (patlen - 1 - p);
626+
}
627+
628+
for (p = 0; p < patlen - 1; p++) {
629+
size_t slen = suffix_length(chars.data(), patlen, p);
630+
if (chars.data()[p - slen] != chars.data()[patlen - 1 - slen]) {
631+
table.data()[patlen - 1 - slen] = patlen - 1 - p + slen;
632+
}
633+
}
634+
635+
return table;
636+
}
637+
638+
template <typename Iterator> struct string_search_result {
639+
Iterator position;
640+
Iterator end_position;
641+
bool match;
642+
};
643+
644+
template <typename Iterator, typename EndIterator, auto... String>
645+
constexpr CTRE_FORCE_INLINE string_search_result<Iterator> search_for_string(Iterator current, const EndIterator end, string<String...>) {
646+
if constexpr (sizeof...(String) > 2 && !std::is_same_v<Iterator, utf8_iterator> && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
647+
constexpr std::array<typename ::std::iterator_traits<Iterator>::value_type, sizeof...(String)> chars{ String... };
648+
constexpr std::array<ptrdiff_t, sizeof...(String)> delta_2 = make_delta_2<typename ::std::iterator_traits<Iterator>::value_type>(string<String...>());
649+
650+
size_t str_size = std::distance(current, end);
651+
if (str_size < sizeof...(String)) { //quick exit no way to match
652+
return { current + str_size, current + str_size, false };
653+
}
654+
655+
size_t i = sizeof...(String) - 1; //index over to the starting location
656+
for (; i < str_size;) {
657+
size_t j = sizeof...(String) - 1;
658+
size_t m = i + 1;
659+
for (; *(current + i) == *(chars.data() + j); --i, --j) { //match string in reverse
660+
if (j == 0) {
661+
return { current + i, current + m, true };
662+
}
663+
}
664+
size_t shift = enumeration<String...>::match_char(*(current + i)) ? static_cast<size_t>(*(delta_2.data() + j)) : sizeof...(String);
665+
i += shift;
666+
}
667+
668+
return { current + str_size, current + str_size, false };
669+
} else if (sizeof...(String)) {
670+
//fallback to plain string matching
671+
constexpr std::array<typename ::std::iterator_traits<Iterator>::value_type, sizeof...(String)> chars{ String... };
672+
constexpr typename ::std::iterator_traits<Iterator>::value_type first_char = chars.data()[0];
673+
while (current != end) {
674+
while (current != end && *current != first_char) {
675+
current++;
676+
}
677+
auto result = evaluate_match_string<String...>(current, end, std::make_index_sequence<sizeof...(String)>());
678+
if (result.match) {
679+
return { current, result.position, result.match };
680+
} else {
681+
++current;
682+
}
683+
}
684+
return { current, current, false };
685+
} else {
686+
return { current, current, true };
687+
}
688+
}
525689

526690
}
527691

include/ctre/wrapper.hpp

+26-8
Original file line numberDiff line numberDiff line change
@@ -62,19 +62,37 @@ struct match_method {
6262
struct search_method {
6363
template <typename Modifier = singleline, typename ResultIterator = void, typename RE, typename IteratorBegin, typename IteratorEnd> constexpr CTRE_FORCE_INLINE static auto exec(IteratorBegin orig_begin, IteratorBegin begin, IteratorEnd end, RE) noexcept {
6464
using result_iterator = std::conditional_t<std::is_same_v<ResultIterator, void>, IteratorBegin, ResultIterator>;
65-
65+
using front_re = decltype(pop_and_get_front(extract_leading_string(ctll::list<RE>{})));
6666
constexpr bool fixed = starts_with_anchor(Modifier{}, ctll::list<RE>{});
6767

6868
auto it = begin;
69-
70-
for (; end != it && !fixed; ++it) {
71-
if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type<result_iterator, RE>{}, ctll::list<start_mark, RE, end_mark, accept>())) {
72-
return out;
69+
if constexpr (is_string(front_re{}.front) && size(front_re{}.list)) {
70+
it = search_for_string(it, end, front_re{}.front).position;
71+
for (; end != it;) {
72+
if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type<result_iterator, RE>{}, ctll::list<start_mark, sequence<decltype(front_re{}.front), decltype(make_into_sequence(front_re{}.list))>, end_mark, accept>())) {
73+
return out;
74+
}
75+
it = search_for_string(++it, end, front_re{}.front).position;
76+
}
77+
return evaluate(orig_begin, it, end, Modifier{}, return_type<result_iterator, RE>{}, ctll::list<start_mark, sequence<decltype(front_re{}.front), decltype(make_into_sequence(front_re{}.list))>, end_mark, accept>());
78+
} else if (is_string(front_re{}.front)) {
79+
it = search_for_string(it, end, front_re{}.front).position;
80+
for (; end != it;) {
81+
if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type<result_iterator, RE>{}, ctll::list<start_mark, decltype(front_re{}.front), end_mark, accept>())) {
82+
return out;
83+
}
84+
it = search_for_string(++it, end, front_re{}.front).position;
7385
}
86+
return evaluate(orig_begin, it, end, Modifier{}, return_type<result_iterator, RE>{}, ctll::list<start_mark, decltype(front_re{}.front), end_mark, accept>());
87+
} else {
88+
for (; end != it && !fixed; ++it) {
89+
if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type<result_iterator, RE>{}, ctll::list<start_mark, RE, end_mark, accept>())) {
90+
return out;
91+
}
92+
}
93+
// in case the RE is empty or fixed
94+
return evaluate(orig_begin, it, end, Modifier{}, return_type<result_iterator, RE>{}, ctll::list<start_mark, RE, end_mark, accept>());
7495
}
75-
76-
// in case the RE is empty or fixed
77-
return evaluate(orig_begin, it, end, Modifier{}, return_type<result_iterator, RE>{}, ctll::list<start_mark, RE, end_mark, accept>());
7896
}
7997

8098
template <typename Modifier = singleline, typename ResultIterator = void, typename RE, typename IteratorBegin, typename IteratorEnd> constexpr CTRE_FORCE_INLINE static auto exec(IteratorBegin begin, IteratorEnd end, RE) noexcept {

0 commit comments

Comments
 (0)