From 08d6249dcc0228d50c30a8331c3877be40caa163 Mon Sep 17 00:00:00 2001 From: Andersama Date: Sun, 15 Nov 2020 14:20:44 -0800 Subject: [PATCH 1/3] Add needle search optimization --- include/ctre/evaluation.hpp | 172 +++++++++++++++++++++++++++++++++++- include/ctre/wrapper.hpp | 34 +++++-- 2 files changed, 194 insertions(+), 12 deletions(-) diff --git a/include/ctre/evaluation.hpp b/include/ctre/evaluation.hpp index 6701c6fa..8a3a4b08 100644 --- a/include/ctre/evaluation.hpp +++ b/include/ctre/evaluation.hpp @@ -116,10 +116,17 @@ template constexpr CTR } template constexpr CTRE_FORCE_INLINE string_match_result evaluate_match_string(Iterator current, [[maybe_unused]] const EndIterator end, std::index_sequence) noexcept { - - bool same = (compare_character(String, current, end) && ... && true); - - return {current, same}; + if constexpr (!std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{})) { + bool same = (::std::distance(current, end) >= sizeof...(String)) && ((String == *(current + Idx)) & ...); + if (same) { + return {current+=sizeof...(String), same}; + } else { + return {current, same}; + } + } else { + bool same = (compare_character(String, current, end) && ... && true); + return { current, same }; + } } template @@ -522,6 +529,163 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c } } +template +constexpr bool is_string(T) { + return false; +} +template +constexpr bool is_string(string) { + return true; +} + +template +constexpr bool is_string_like(T) { + return false; +} +template +constexpr bool is_string_like(string) { + return true; +} +template ::template value())>>> +constexpr bool is_string_like(CharacterLike) { + return true; +} + +template +constexpr auto extract_leading_string(ctll::list) -> ctll::list { + return {}; +}; +template +constexpr auto extract_leading_string(sequence) -> sequence { + return {}; +}; + +//concatenation +template +constexpr auto extract_leading_string(ctll::list, character, Content...>) { + return extract_leading_string(ctll::list, Content...>()); +} + +template +constexpr auto extract_leading_string(ctll::list, string, Content...>) { + return extract_leading_string(ctll::list, Content...>()); +} +//move things up out of sequences +template +constexpr auto extract_leading_string(ctll::list, Tail...>) { + return extract_leading_string(ctll::list()); +} + +template +constexpr auto extract_leading_string(ctll::list, Tail...>) { + return extract_leading_string(ctll::list()); +} + +template +constexpr auto make_into_sequence(ctll::list) -> sequence { + return{}; +} +template +constexpr auto make_into_sequence(sequence) -> sequence { + return{}; +} + +//boyer moore utils +template +constexpr bool is_prefix(Ty* word, size_t wordlen, ptrdiff_t pos) { + ptrdiff_t suffixlen = wordlen - pos; + for (int i = 0; i < suffixlen; i++) { + if (word[i] != word[pos + i]) { + return false; + } + } + return true; +} + +template +constexpr size_t suffix_length(Ty* word, size_t wordlen, ptrdiff_t pos) { + size_t i = 0; + // increment suffix length i to the first mismatch or beginning of the word + for (; (word[pos - i] == word[wordlen - 1 - i]) && (i < pos); i++); + return i; +} +//MSVC workaround, array operator[] blows up in face if constexpr, use pointers instead +template +constexpr auto make_delta_2(string) { + std::array chars{ String... }; + std::array table; + constexpr size_t patlen = sizeof...(String); + size_t p = 0; + size_t last_prefix_index = patlen - 1; + + for (p = patlen - 1; p < patlen; p--) { + if (is_prefix(chars.data(), patlen, p + 1)) { + last_prefix_index = p + 1; + } + table.data()[p] = last_prefix_index + (patlen - 1 - p); + } + + for (p = 0; p < patlen - 1; p++) { + size_t slen = suffix_length(chars.data(), patlen, p); + if (chars.data()[p - slen] != chars.data()[patlen - 1 - slen]) { + table.data()[patlen - 1 - slen] = patlen - 1 - p + slen; + } + } + + return table; +} + +template struct string_search_result { + Iterator position; + Iterator end_position; + bool match; +}; + +template +constexpr CTRE_FORCE_INLINE string_search_result search_for_string(Iterator current, const EndIterator end, string) { + if constexpr (sizeof...(String) > 2 && !std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{})) { + constexpr std::array::value_type, sizeof...(String)> chars{ String... }; + constexpr std::array delta_2 = make_delta_2::value_type>(string()); + + size_t str_size = std::distance(current, end); + if (str_size < sizeof...(String)) { //quick exit no way to match + return { current + str_size, current + str_size, false }; + } + + size_t i = sizeof...(String) - 1; //index over to the starting location + for (; i < str_size;) { + size_t j = sizeof...(String) - 1; + size_t m = i + 1; + for (; *(current + i) == *(chars.data() + j); --i, --j) { //match string in reverse + if (j == 0) { + return { current + i, current + m, true }; + } + } + size_t shift = enumeration::match_char(*(current + i)) ? static_cast(*(delta_2.data() + j)) : sizeof...(String); + i += shift; + } + + return { current + str_size, current + str_size, false }; + } else if (sizeof...(String)) { + //fallback to plain string matching + constexpr std::array::value_type, sizeof...(String)> chars{ String... }; + constexpr typename ::std::iterator_traits::value_type first_char = chars.data()[0]; + while (current != end) { + while (current != end && *current != first_char) { + current++; + } + auto result = evaluate_match_string(current, end, std::make_index_sequence()); + if (result.match) { + return { current, result.position, result.match }; + } else { + ++current; + } + } + return { current, current, false }; + } else { + return { current, current, true }; + } +} } diff --git a/include/ctre/wrapper.hpp b/include/ctre/wrapper.hpp index e87b1be2..1f02b30a 100644 --- a/include/ctre/wrapper.hpp +++ b/include/ctre/wrapper.hpp @@ -62,19 +62,37 @@ struct match_method { struct search_method { template constexpr CTRE_FORCE_INLINE static auto exec(IteratorBegin orig_begin, IteratorBegin begin, IteratorEnd end, RE) noexcept { using result_iterator = std::conditional_t, IteratorBegin, ResultIterator>; - + using front_re = decltype(pop_and_get_front(extract_leading_string(ctll::list{}))); constexpr bool fixed = starts_with_anchor(Modifier{}, ctll::list{}); auto it = begin; - - for (; end != it && !fixed; ++it) { - if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list())) { - return out; + if constexpr (is_string(front_re{}.front) && size(front_re{}.list)) { + it = search_for_string(it, end, front_re{}.front).position; + for (; end != it;) { + if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list, end_mark, accept>())) { + return out; + } + it = search_for_string(++it, end, front_re{}.front).position; + } + return evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list, end_mark, accept>()); + } else if (is_string(front_re{}.front)) { + it = search_for_string(it, end, front_re{}.front).position; + for (; end != it;) { + if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list())) { + return out; + } + it = search_for_string(++it, end, front_re{}.front).position; } + return evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list()); + } else { + for (; end != it && !fixed; ++it) { + if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list())) { + return out; + } + } + // in case the RE is empty or fixed + return evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list()); } - - // in case the RE is empty or fixed - return evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list()); } template constexpr CTRE_FORCE_INLINE static auto exec(IteratorBegin begin, IteratorEnd end, RE) noexcept { From 8b3dccb55e1522244db0c0bef83983b81fd3948f Mon Sep 17 00:00:00 2001 From: Andersama Date: Sun, 15 Nov 2020 14:46:05 -0800 Subject: [PATCH 2/3] Use both iterators from string search (no need to double check string matches) --- include/ctre/wrapper.hpp | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/include/ctre/wrapper.hpp b/include/ctre/wrapper.hpp index 1f02b30a..4ba3fdce 100644 --- a/include/ctre/wrapper.hpp +++ b/include/ctre/wrapper.hpp @@ -67,23 +67,27 @@ struct search_method { auto it = begin; if constexpr (is_string(front_re{}.front) && size(front_re{}.list)) { - it = search_for_string(it, end, front_re{}.front).position; - for (; end != it;) { - if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list, end_mark, accept>())) { - return out; + auto it2 = search_for_string(it, end, front_re{}.front); + return_type result{}; + for (; end != it2.position;) { + result.set_start_mark(it2.position); + if (result = evaluate(orig_begin, it2.end_position, end, Modifier{}, result, ctll::list())) { + return result; } - it = search_for_string(++it, end, front_re{}.front).position; + result.unmatch(); + std::advance(it2.position, 1); + it2 = search_for_string(it2.position, end, front_re{}.front); } - return evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list, end_mark, accept>()); + result.set_start_mark(it2.position); + return result = evaluate(orig_begin, it2.end_position, end, Modifier{}, result, ctll::list()); } else if (is_string(front_re{}.front)) { - it = search_for_string(it, end, front_re{}.front).position; - for (; end != it;) { - if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list())) { - return out; - } - it = search_for_string(++it, end, front_re{}.front).position; - } - return evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list()); + auto it2 = search_for_string(it, end, front_re{}.front); + return_type result{}; + result.set_start_mark(it2.position); + result.set_end_mark(it2.end_position); + if (it2.match) + result.matched(); + return result; } else { for (; end != it && !fixed; ++it) { if (auto out = evaluate(orig_begin, it, end, Modifier{}, return_type{}, ctll::list())) { From 1797c516860ddb2d0d45628dec437459383f0cb8 Mon Sep 17 00:00:00 2001 From: Andersama Date: Sun, 15 Nov 2020 16:05:21 -0800 Subject: [PATCH 3/3] fixing errors... --- include/ctre/evaluation.hpp | 54 ++++++++++++++++++++++--------------- include/ctre/wrapper.hpp | 5 ++-- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/include/ctre/evaluation.hpp b/include/ctre/evaluation.hpp index 8a3a4b08..3c608da8 100644 --- a/include/ctre/evaluation.hpp +++ b/include/ctre/evaluation.hpp @@ -115,13 +115,19 @@ template constexpr CTR return false; } +struct zero_terminated_string_end_iterator; template constexpr CTRE_FORCE_INLINE string_match_result evaluate_match_string(Iterator current, [[maybe_unused]] const EndIterator end, std::index_sequence) noexcept { - if constexpr (!std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{})) { - bool same = (::std::distance(current, end) >= sizeof...(String)) && ((String == *(current + Idx)) & ...); +#if __cpp_char8_t >= 201811 + if constexpr (sizeof...(String) && !std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{}) && !std::is_same_v) { +#else + if constexpr (sizeof...(String) && is_random_accessible(typename std::iterator_traits::iterator_category{}) && !std::is_same_v) { +#endif + using char_type = decltype(*current); + bool same = ((size_t)std::distance(current, end) >= sizeof...(String)) && ((static_cast(String) == *(current + Idx)) && ...); if (same) { - return {current+=sizeof...(String), same}; + return { current += sizeof...(String), same }; } else { - return {current, same}; + return { current, same }; } } else { bool same = (compare_character(String, current, end) && ... && true); @@ -530,63 +536,63 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c } template -constexpr bool is_string(T) { +constexpr bool is_string(T) noexcept { return false; } template -constexpr bool is_string(string) { +constexpr bool is_string(string)noexcept { return true; } template -constexpr bool is_string_like(T) { +constexpr bool is_string_like(T) noexcept { return false; } template -constexpr bool is_string_like(string) { +constexpr bool is_string_like(string) noexcept { return true; } template ::template value())>>> -constexpr bool is_string_like(CharacterLike) { +constexpr bool is_string_like(CharacterLike) noexcept { return true; } template -constexpr auto extract_leading_string(ctll::list) -> ctll::list { +constexpr auto extract_leading_string(ctll::list) noexcept -> ctll::list { return {}; -}; +} template -constexpr auto extract_leading_string(sequence) -> sequence { +constexpr auto extract_leading_string(sequence) noexcept -> sequence { return {}; -}; +} //concatenation template -constexpr auto extract_leading_string(ctll::list, character, Content...>) { +constexpr auto extract_leading_string(ctll::list, character, Content...>) noexcept { return extract_leading_string(ctll::list, Content...>()); } template -constexpr auto extract_leading_string(ctll::list, string, Content...>) { - return extract_leading_string(ctll::list, Content...>()); +constexpr auto extract_leading_string(ctll::list, string, Content...>) noexcept { + return extract_leading_string(ctll::list, Content...>()); } //move things up out of sequences template -constexpr auto extract_leading_string(ctll::list, Tail...>) { +constexpr auto extract_leading_string(ctll::list, Tail...>) noexcept { return extract_leading_string(ctll::list()); } template -constexpr auto extract_leading_string(ctll::list, Tail...>) { +constexpr auto extract_leading_string(ctll::list, Tail...>) noexcept { return extract_leading_string(ctll::list()); } template -constexpr auto make_into_sequence(ctll::list) -> sequence { +constexpr auto make_into_sequence(ctll::list) noexcept -> sequence { return{}; } template -constexpr auto make_into_sequence(sequence) -> sequence { +constexpr auto make_into_sequence(sequence) noexcept -> sequence { return{}; } @@ -642,8 +648,12 @@ template struct string_search_result { }; template -constexpr CTRE_FORCE_INLINE string_search_result search_for_string(Iterator current, const EndIterator end, string) { +constexpr CTRE_FORCE_INLINE string_search_result search_for_string(Iterator current, const EndIterator end, string) noexcept { +#if __cpp_char8_t >= 201811 if constexpr (sizeof...(String) > 2 && !std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{})) { +#else + if constexpr (sizeof...(String) > 2 && is_random_accessible(typename std::iterator_traits::iterator_category{})) { +#endif constexpr std::array::value_type, sizeof...(String)> chars{ String... }; constexpr std::array delta_2 = make_delta_2::value_type>(string()); @@ -666,7 +676,7 @@ constexpr CTRE_FORCE_INLINE string_search_result search_for_string(Ite } return { current + str_size, current + str_size, false }; - } else if (sizeof...(String)) { + } else if constexpr (sizeof...(String)) { //fallback to plain string matching constexpr std::array::value_type, sizeof...(String)> chars{ String... }; constexpr typename ::std::iterator_traits::value_type first_char = chars.data()[0]; diff --git a/include/ctre/wrapper.hpp b/include/ctre/wrapper.hpp index 4ba3fdce..9475896d 100644 --- a/include/ctre/wrapper.hpp +++ b/include/ctre/wrapper.hpp @@ -71,7 +71,8 @@ struct search_method { return_type result{}; for (; end != it2.position;) { result.set_start_mark(it2.position); - if (result = evaluate(orig_begin, it2.end_position, end, Modifier{}, result, ctll::list())) { + result = evaluate(orig_begin, it2.end_position, end, Modifier{}, result, ctll::list()); + if (result) { return result; } result.unmatch(); @@ -80,7 +81,7 @@ struct search_method { } result.set_start_mark(it2.position); return result = evaluate(orig_begin, it2.end_position, end, Modifier{}, result, ctll::list()); - } else if (is_string(front_re{}.front)) { + } else if constexpr (is_string(front_re{}.front)) { auto it2 = search_for_string(it, end, front_re{}.front); return_type result{}; result.set_start_mark(it2.position);