diff --git a/.clang-format b/.clang-format index ee0513ae..101b23e3 100644 --- a/.clang-format +++ b/.clang-format @@ -25,4 +25,6 @@ BraceWrapping: AllowAllConstructorInitializersOnNextLine: true ConstructorInitializerAllOnOneLineOrOnePerLine: true AllowShortCaseLabelsOnASingleLine: true +IfMacros: + - RAPIDFUZZ_IF_CONSTEXPR IndentPPDirectives: AfterHash diff --git a/.github/workflows/linux-simple.yml b/.github/workflows/linux-simple.yml new file mode 100644 index 00000000..1b95a775 --- /dev/null +++ b/.github/workflows/linux-simple.yml @@ -0,0 +1,134 @@ +name: Linux builds (basic) + +on: [push, pull_request] + +jobs: + build: + name: ${{matrix.cxx}}, C++${{matrix.std}}, ${{matrix.build_type}} + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + compiler: + - g++-5 + - g++-6 + - g++-7 + - g++-8 + - g++-9 + - g++-10 + - clang++-6.0 + - clang++-7 + - clang++-8 + - clang++-9 + - clang++-10 + build_type: [Debug, Release] + std: [11] + include: + - cxx: g++-5 + other_pkgs: g++-5 + - cxx: g++-6 + other_pkgs: g++-6 + - cxx: g++-7 + other_pkgs: g++-7 + - cxx: g++-8 + other_pkgs: g++-8 + - cxx: g++-9 + other_pkgs: g++-9 + - cxx: g++-10 + other_pkgs: g++-10 + - cxx: clang++-6.0 + other_pkgs: clang-6.0 + - cxx: clang++-7 + other_pkgs: clang-7 + - cxx: clang++-8 + other_pkgs: clang-8 + - cxx: clang++-9 + other_pkgs: clang-9 + - cxx: clang++-10 + other_pkgs: clang-10 + + - cxx: clang++-10 + other_pkgs: clang-10 + std: 14 + build_type: Debug + - cxx: clang++-10 + other_pkgs: clang-10 + std: 17 + build_type: Debug + - cxx: clang++-10 + other_pkgs: clang-10 + std: 20 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 14 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 17 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 20 + build_type: Debug + + - cxx: clang++-10 + other_pkgs: clang-10 + std: 14 + build_type: Release + - cxx: clang++-10 + other_pkgs: clang-10 + std: 17 + build_type: Release + - cxx: clang++-10 + other_pkgs: clang-10 + std: 20 + build_type: Release + - cxx: g++-10 + other_pkgs: g++-10 + std: 14 + build_type: Release + - cxx: g++-10 + other_pkgs: g++-10 + std: 17 + build_type: Release + - cxx: g++-10 + other_pkgs: g++-10 + std: 20 + build_type: Release + + steps: + - uses: actions/checkout@v4 + + - name: Add repositories for older GCC + run: | + sudo apt-add-repository 'deb http://azure.archive.ubuntu.com/ubuntu/ bionic main' + sudo apt-add-repository 'deb http://azure.archive.ubuntu.com/ubuntu/ bionic universe' + if: ${{ matrix.cxx == 'g++-5' || matrix.cxx == 'g++-6' }} + + - name: Prepare environment + run: | + sudo apt-get update + sudo apt-get install -y ninja-build ${{matrix.other_pkgs}} + + - name: Configure CMake + env: + CXX: ${{matrix.cxx}} + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} \ + -DCMAKE_CXX_STANDARD=${{matrix.std}} \ + -DCMAKE_CXX_STANDARD_REQUIRED=ON \ + -DCMAKE_CXX_EXTENSIONS=OFF \ + -DRAPIDFUZZ_BUILD_TESTING=1 \ + -DRAPIDFUZZ_ENABLE_LINTERS=1 \ + -G Ninja + + - name: Build + working-directory: build + run: ninja + + - name: Test + working-directory: build + run: ctest -C ${{matrix.build_type}} --rerun-failed --output-on-failure -j `nproc` + diff --git a/CHANGELOG.md b/CHANGELOG.md index b92b6632..b5a76179 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## Changelog +## [3.3.0] - 2025-01-18 +### Changed +- add C++11 and C++14 support + ## [3.2.0] - 2024-12-17 ### Performance - improve calculation of min score inside partial_ratio so it can skip more alignments diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c1cead8..71f0ffca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ if (CMAKE_BINARY_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) message(FATAL_ERROR "Building in-source is not supported! Create a build dir and remove ${CMAKE_SOURCE_DIR}/CMakeCache.txt") endif() -project(rapidfuzz LANGUAGES CXX VERSION 3.2.0) +project(rapidfuzz LANGUAGES CXX VERSION 3.3.0) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") include(GNUInstallDirs) @@ -50,7 +50,7 @@ add_library(rapidfuzz INTERFACE) # provide a namespaced alias for clients to 'link' against if RapidFuzz is included as a sub-project add_library(rapidfuzz::rapidfuzz ALIAS rapidfuzz) -target_compile_features(rapidfuzz INTERFACE cxx_std_17) +target_compile_features(rapidfuzz INTERFACE cxx_std_11) target_include_directories(rapidfuzz INTERFACE diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 353088da..3932a3ef 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-14 13:57:57.746331 +// Generated: 2024-12-25 11:44:52.213162 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -20,7 +20,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* hashmap for integers which can only grow, but can't remove elements */ template @@ -213,7 +214,8 @@ struct HybridGrowingHashmap { std::array m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -221,15 +223,16 @@ struct HybridGrowingHashmap { #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct BitMatrixView { using value_type = T; using size_type = size_t; - using pointer = std::conditional_t; - using reference = std::conditional_t; + using pointer = typename std::conditional::type; + using reference = typename std::conditional::type; BitMatrixView(pointer vector, size_type cols) noexcept : m_vector(vector), m_cols(cols) {} @@ -389,12 +392,12 @@ struct ShiftedBitMatrix { return bool(m_matrix[row][col_word] & col_mask); } - auto operator[](size_t row) noexcept + BitMatrixView operator[](size_t row) noexcept { return m_matrix[row]; } - auto operator[](size_t row) const noexcept + BitMatrixView operator[](size_t row) const noexcept { return m_matrix[row]; } @@ -409,7 +412,8 @@ struct ShiftedBitMatrix { std::vector m_offsets; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -421,227 +425,23 @@ struct ShiftedBitMatrix { #include #include -namespace rapidfuzz::detail { +#include -static inline void assume(bool b) -{ -#if defined(__clang__) - __builtin_assume(b); -#elif defined(__GNUC__) || defined(__GNUG__) - if (!b) __builtin_unreachable(); -#elif defined(_MSC_VER) - __assume(b); +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1 +# define RAPIDFUZZ_IF_CONSTEXPR if constexpr +#else +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if #endif -} - -template -CharT* to_begin(CharT* s) -{ - return s; -} - -template -auto to_begin(T& x) -{ - using std::begin; - return begin(x); -} - -template -CharT* to_end(CharT* s) -{ - assume(s != nullptr); - while (*s != 0) - ++s; - - return s; -} - -template -auto to_end(T& x) -{ - using std::end; - return end(x); -} - -template -class Range { - Iter _first; - Iter _last; - // todo we might not want to cache the size for iterators - // that can can retrieve the size in O(1) time - size_t _size; - -public: - using value_type = typename std::iterator_traits::value_type; - using iterator = Iter; - using reverse_iterator = std::reverse_iterator; - - constexpr Range(Iter first, Iter last) : _first(first), _last(last) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } - - constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) - {} - - template - constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x)) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } - - constexpr iterator begin() const noexcept - { - return _first; - } - constexpr iterator end() const noexcept - { - return _last; - } - - constexpr reverse_iterator rbegin() const noexcept - { - return reverse_iterator(end()); - } - constexpr reverse_iterator rend() const noexcept - { - return reverse_iterator(begin()); - } - - constexpr size_t size() const - { - return _size; - } - - constexpr bool empty() const - { - return size() == 0; - } - explicit constexpr operator bool() const - { - return !empty(); - } - - template < - typename... Dummy, typename IterCopy = Iter, - typename = std::enable_if_t::iterator_category>>> - constexpr decltype(auto) operator[](size_t n) const - { - return _first[static_cast(n)]; - } - - constexpr void remove_prefix(size_t n) - { - if constexpr (std::is_base_of_v::iterator_category>) - _first += static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _first++; - - _size -= n; - } - constexpr void remove_suffix(size_t n) - { - if constexpr (std::is_base_of_v::iterator_category>) - _last -= static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _last--; - - _size -= n; - } - - constexpr Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) - { - if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); - - Range res = *this; - res.remove_prefix(pos); - if (count < res.size()) res.remove_suffix(res.size() - count); - - return res; - } - - constexpr decltype(auto) front() const - { - return *(_first); - } - - constexpr decltype(auto) back() const - { - return *(_last - 1); - } - - constexpr Range reversed() const - { - return {rbegin(), rend(), _size}; - } - - friend std::ostream& operator<<(std::ostream& os, const Range& seq) - { - os << "["; - for (auto x : seq) - os << static_cast(x) << ", "; - os << "]"; - return os; - } -}; - -template -Range(T& x) -> Range; -template -inline bool operator==(const Range& a, const Range& b) -{ - return std::equal(a.begin(), a.end(), b.begin(), b.end()); -} - -template -inline bool operator!=(const Range& a, const Range& b) -{ - return !(a == b); -} - -template -inline bool operator<(const Range& a, const Range& b) -{ - return (std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end())); -} - -template -inline bool operator>(const Range& a, const Range& b) -{ - return b < a; -} - -template -inline bool operator<=(const Range& a, const Range& b) -{ - return !(b < a); -} - -template -inline bool operator>=(const Range& a, const Range& b) -{ - return !(a < b); -} - -template -using RangeVec = std::vector>; - -} // namespace rapidfuzz::detail - -#include - -#include +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) || __cplusplus >= 201402L) +# define RAPIDFUZZ_CONSTEXPR_CXX14 constexpr +#else +# define RAPIDFUZZ_CONSTEXPR_CXX14 +#endif -#include #include #include #include @@ -974,13 +774,10 @@ class Editops : private std::vector { inline bool operator==(const Editops& lhs, const Editops& rhs) { - if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) { - return false; - } + if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) return false; + + if (lhs.size() != rhs.size()) return false; - if (lhs.size() != rhs.size()) { - return false; - } return std::equal(lhs.begin(), lhs.end(), rhs.begin()); } @@ -1224,63 +1021,297 @@ struct ScoreAlignment { {} }; -template -inline bool operator==(const ScoreAlignment& a, const ScoreAlignment& b) +template +inline bool operator==(const ScoreAlignment& a, const ScoreAlignment& b) +{ + return (a.score == b.score) && (a.src_start == b.src_start) && (a.src_end == b.src_end) && + (a.dest_start == b.dest_start) && (a.dest_end == b.dest_end); +} + +} // namespace rapidfuzz + +#include +#include + +namespace rapidfuzz { + +namespace detail { +template +auto inner_type(T const*) -> T; + +template +auto inner_type(T const&) -> typename T::value_type; +} // namespace detail + +template +using char_type = decltype(detail::inner_type(std::declval())); + +/* backport of std::iter_value_t from C++20 + * This does not cover the complete functionality, but should be enough for + * the use cases in this library + */ +template +using iter_value_t = typename std::iterator_traits::value_type; + +// taken from +// https://stackoverflow.com/questions/16893992/check-if-type-can-be-explicitly-converted +template +struct is_explicitly_convertible { + template + static void f(T); + + template + static constexpr auto test(int /*unused*/) -> decltype(f(static_cast(std::declval())), true) + { + return true; + } + + template + static constexpr auto test(...) -> bool + { + return false; + } + + static bool const value = test(0); +}; + +template +using rf_enable_if_t = typename std::enable_if::type; + +} // namespace rapidfuzz + +namespace rapidfuzz { +namespace detail { + +static inline void assume(bool b) +{ +#if defined(__clang__) + __builtin_assume(b); +#elif defined(__GNUC__) || defined(__GNUG__) + if (!b) __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(b); +#endif +} + +namespace to_begin_detail { +using std::begin; + +template +CharT* to_begin(CharT* s) +{ + return s; +} + +template +auto to_begin(T& x) -> decltype(begin(x)) +{ + + return begin(x); +} +} // namespace to_begin_detail + +using to_begin_detail::to_begin; + +namespace to_end_detail { +using std::end; + +template +CharT* to_end(CharT* s) +{ + assume(s != nullptr); + while (*s != 0) + ++s; + + return s; +} + +template +auto to_end(T& x) -> decltype(end(x)) +{ + return end(x); +} +} // namespace to_end_detail + +using to_end_detail::to_end; + +template +class Range { + Iter _first; + Iter _last; + // todo we might not want to cache the size for iterators + // that can can retrieve the size in O(1) time + size_t _size; + +public: + using value_type = typename std::iterator_traits::value_type; + using iterator = Iter; + using reverse_iterator = std::reverse_iterator; + + Range(Iter first, Iter last) : _first(first), _last(last) + { + assert(std::distance(_first, _last) >= 0); + _size = static_cast(std::distance(_first, _last)); + } + + Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) + {} + + template + Range(T& x) : Range(to_begin(x), to_end(x)) + {} + + iterator begin() const noexcept + { + return _first; + } + iterator end() const noexcept + { + return _last; + } + + reverse_iterator rbegin() const noexcept + { + return reverse_iterator(end()); + } + reverse_iterator rend() const noexcept + { + return reverse_iterator(begin()); + } + + size_t size() const + { + return _size; + } + + bool empty() const + { + return size() == 0; + } + explicit operator bool() const + { + return !empty(); + } + + template ::iterator_category>::value>> + auto operator[](size_t n) const -> decltype(*_first) + { + return _first[static_cast(n)]; + } + + void remove_prefix(size_t n) + { + std::advance(_first, static_cast(n)); + _size -= n; + } + + void remove_suffix(size_t n) + { + std::advance(_last, -static_cast(n)); + _size -= n; + } + + Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) + { + if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); + + Range res = *this; + res.remove_prefix(pos); + if (count < res.size()) res.remove_suffix(res.size() - count); + + return res; + } + + const value_type& front() const + { + return *_first; + } + + const value_type& back() const + { + return *(_last - 1); + } + + Range reversed() const + { + return {rbegin(), rend(), _size}; + } + + friend std::ostream& operator<<(std::ostream& os, const Range& seq) + { + os << "["; + for (auto x : seq) + os << static_cast(x) << ", "; + os << "]"; + return os; + } +}; + +template +auto make_range(Iter first, Iter last) -> Range { - return (a.score == b.score) && (a.src_start == b.src_start) && (a.src_end == b.src_end) && - (a.dest_start == b.dest_start) && (a.dest_end == b.dest_end); + return Range(first, last); } -} // namespace rapidfuzz +template +auto make_range(T& x) -> Range +{ + return {to_begin(x), to_end(x)}; +} -#include -#include +template +inline bool operator==(const Range& a, const Range& b) +{ + if (a.size() != b.size()) return false; -namespace rapidfuzz { + return std::equal(a.begin(), a.end(), b.begin()); +} -namespace detail { -template -auto inner_type(T const*) -> T; +template +inline bool operator!=(const Range& a, const Range& b) +{ + return !(a == b); +} -template -auto inner_type(T const&) -> typename T::value_type; -} // namespace detail +template +inline bool operator<(const Range& a, const Range& b) +{ + return (std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end())); +} -template -using char_type = decltype(detail::inner_type(std::declval())); +template +inline bool operator>(const Range& a, const Range& b) +{ + return b < a; +} -/* backport of std::iter_value_t from C++20 - * This does not cover the complete functionality, but should be enough for - * the use cases in this library - */ -template -using iter_value_t = typename std::iterator_traits::value_type; +template +inline bool operator<=(const Range& a, const Range& b) +{ + return !(b < a); +} -// taken from -// https://stackoverflow.com/questions/16893992/check-if-type-can-be-explicitly-converted -template -struct is_explicitly_convertible { - template - static void f(T); +template +inline bool operator>=(const Range& a, const Range& b) +{ + return !(a < b); +} - template - static constexpr auto test(int /*unused*/) -> decltype(f(static_cast(std::declval())), true) - { - return true; - } +template +using RangeVec = std::vector>; - template - static constexpr auto test(...) -> bool - { - return false; - } +} // namespace detail +} // namespace rapidfuzz - static bool const value = test(0); -}; +#include -} // namespace rapidfuzz +#include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template class SplittedSentenceView { @@ -1288,7 +1319,7 @@ class SplittedSentenceView { using CharT = iter_value_t; SplittedSentenceView(RangeVec sentence) noexcept( - std::is_nothrow_move_constructible_v>) + std::is_nothrow_move_constructible>::value) : m_sentence(std::move(sentence)) {} @@ -1360,7 +1391,8 @@ auto SplittedSentenceView::join() const -> std::vector return joined; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -1373,7 +1405,8 @@ auto SplittedSentenceView::join() const -> std::vector # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template T bit_mask_lsb(size_t n) @@ -1409,7 +1442,7 @@ constexpr uint64_t shl64(uint64_t a, U shift) return (shift < 64) ? a << shift : 0; } -constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) +RAPIDFUZZ_CONSTEXPR_CXX14 uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) { /* todo should use _addcarry_u64 when available */ a += carryin; @@ -1420,7 +1453,7 @@ constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* ca } template -constexpr T ceil_div(T a, U divisor) +RAPIDFUZZ_CONSTEXPR_CXX14 T ceil_div(T a, U divisor) { T _div = static_cast(divisor); return a / _div + static_cast(a % _div != 0); @@ -1456,7 +1489,7 @@ static inline size_t popcount(uint8_t x) } template -constexpr T rotl(T x, unsigned int n) +RAPIDFUZZ_CONSTEXPR_CXX14 T rotl(T x, unsigned int n) { unsigned int num_bits = std::numeric_limits::digits; assert(n < num_bits); @@ -1556,25 +1589,41 @@ static inline unsigned int countr_zero(uint8_t x) return countr_zero(static_cast(x)); } -template -constexpr void unroll_impl(std::integer_sequence, F&& f) -{ - (f(std::integral_constant{}), ...); -} +template +struct UnrollImpl; + +template +struct UnrollImpl { + template + static void call(F&& f) + { + f(Pos); + UnrollImpl::call(std::forward(f)); + } +}; -template -constexpr void unroll(F&& f) +template +struct UnrollImpl { + template + static void call(F&&) + {} +}; + +template +RAPIDFUZZ_CONSTEXPR_CXX14 void unroll(F&& f) { - unroll_impl(std::make_integer_sequence{}, std::forward(f)); + UnrollImpl::call(f); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #if defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct DecomposedSet { @@ -1597,10 +1646,8 @@ static inline size_t abs_diff(size_t a, size_t b) template TO opt_static_cast(const FROM& value) { - if constexpr (std::is_same_v) - return value; - else - return static_cast(value); + /* calling the cast through this template function somehow avoids useless cast warnings */ + return static_cast(value); } /** @@ -1657,13 +1704,15 @@ static inline void rf_aligned_free(void* ptr) /**@}*/ -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template DecomposedSet set_decomposition(SplittedSentenceView a, @@ -1691,6 +1740,15 @@ DecomposedSet set_decomposition(SplittedSentenceVi return {difference_ab, difference_ba, intersection}; } +template +std::pair rf_mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +{ + while (first1 != last1 && first2 != last2 && *first1 == *first2) + ++first1, ++first2; + + return std::make_pair(first1, first2); +} + /** * Removes common prefix of two string views */ @@ -1699,7 +1757,7 @@ size_t remove_common_prefix(Range& s1, Range& s2) { auto first1 = std::begin(s1); size_t prefix = static_cast( - std::distance(first1, std::mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); + std::distance(first1, rf_mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); s1.remove_prefix(prefix); s2.remove_prefix(prefix); return prefix; @@ -1711,9 +1769,9 @@ size_t remove_common_prefix(Range& s1, Range& s2) template size_t remove_common_suffix(Range& s1, Range& s2) { - auto rfirst1 = std::rbegin(s1); + auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, std::mismatch(rfirst1, std::rend(s1), std::rbegin(s2), std::rend(s2)).first)); + std::distance(rfirst1, rf_mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; @@ -1827,7 +1885,8 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last) return SplittedSentenceView(splitted); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include @@ -3089,42 +3148,43 @@ static inline native_simd operator<(const native_simd& a, const native_sim #endif #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_distance(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_distance(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, + return _normalized_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_similarity(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, - score_hint); + return _normalized_similarity(make_range(s1), make_range(s2), std::forward(args)..., + score_cutoff, score_hint); } protected: @@ -3162,11 +3222,11 @@ struct NormalizedMetricBase { template struct DistanceBase : public NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return T::_distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3174,15 +3234,16 @@ struct DistanceBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _similarity(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3190,7 +3251,8 @@ struct DistanceBase : public NormalizedMetricBase { static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -3217,11 +3279,11 @@ struct DistanceBase : public NormalizedMetricBase { template struct SimilarityBase : public NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3229,23 +3291,25 @@ struct SimilarityBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return T::_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -3260,11 +3324,21 @@ struct SimilarityBase : public NormalizedMetricBase { (maximum >= score_hint) ? maximum - score_hint : static_cast(WorstSimilarity); ResType sim = T::_similarity(s1, s2, std::forward(args)..., cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } SimilarityBase() @@ -3278,27 +3352,27 @@ struct CachedNormalizedMetricBase { double normalized_distance(InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(first2, last2), score_cutoff, score_hint); + return _normalized_distance(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_distance(const Sentence2& s2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(s2), score_cutoff, score_hint); + return _normalized_distance(make_range(s2), score_cutoff, score_hint); } template double normalized_similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(first2, last2), score_cutoff, score_hint); + return _normalized_similarity(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(s2), score_cutoff, score_hint); + return _normalized_similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3339,7 +3413,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(first2, last2), score_cutoff, score_hint); + return derived._distance(make_range(first2, last2), score_cutoff, score_hint); } template @@ -3347,7 +3421,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(s2), score_cutoff, score_hint); + return derived._distance(make_range(s2), score_cutoff, score_hint); } template @@ -3355,14 +3429,14 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(first2, last2), score_cutoff, score_hint); + return _similarity(make_range(first2, last2), score_cutoff, score_hint); } template ResType similarity(const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(s2), score_cutoff, score_hint); + return _similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3393,14 +3467,14 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(first2, last2), score_cutoff, score_hint); + return _distance(make_range(first2, last2), score_cutoff, score_hint); } template ResType distance(const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(s2), score_cutoff, score_hint); + return _distance(make_range(s2), score_cutoff, score_hint); } template @@ -3409,7 +3483,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(first2, last2), score_cutoff, score_hint); + return derived._similarity(make_range(first2, last2), score_cutoff, score_hint); } template @@ -3417,7 +3491,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(s2), score_cutoff, score_hint); + return derived._similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3430,11 +3504,21 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType hint_similarity = (maximum > score_hint) ? maximum - score_hint : 0; ResType sim = derived._similarity(s2, cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } CachedSimilarityBase() @@ -3448,28 +3532,28 @@ struct MultiNormalizedMetricBase { void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_distance(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(s2), score_cutoff); + _normalized_distance(scores, score_count, make_range(s2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(s2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -3483,7 +3567,8 @@ struct MultiNormalizedMetricBase { // reinterpretation only works when the types have the same size ResType* scores_orig = nullptr; - if constexpr (sizeof(double) == sizeof(ResType)) + + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) == sizeof(ResType)) scores_orig = reinterpret_cast(scores); else scores_orig = new ResType[derived.result_count()]; @@ -3497,7 +3582,7 @@ struct MultiNormalizedMetricBase { scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0; } - if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig; + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) != sizeof(ResType)) delete[] scores_orig; } template @@ -3525,7 +3610,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(first2, last2), score_cutoff); + derived._distance(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -3533,21 +3618,21 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(s2), score_cutoff); + derived._distance(scores, score_count, make_range(s2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(first2, last2), score_cutoff); + _similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(s2), score_cutoff); + _similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -3576,14 +3661,14 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(first2, last2), score_cutoff); + _distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void distance(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(s2), score_cutoff); + _distance(scores, score_count, make_range(s2), score_cutoff); } template @@ -3591,7 +3676,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(first2, last2), score_cutoff); + derived._similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -3599,7 +3684,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(s2), score_cutoff); + derived._similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -3612,22 +3697,34 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { for (size_t i = 0; i < derived.get_input_count(); ++i) { ResType maximum = derived.maximum(i, s2); ResType dist = maximum - scores[i]; - - if constexpr (std::is_floating_point_v) - scores[i] = (dist <= score_cutoff) ? dist : 1.0; - else - scores[i] = (dist <= score_cutoff) ? dist : score_cutoff + 1; + scores[i] = _apply_distance_score_cutoff(dist, score_cutoff); } } + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; + } + MultiSimilarityBase() {} friend T; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct RowId { @@ -3746,14 +3843,14 @@ class DamerauLevenshtein } template - static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, size_t) { return damerau_levenshtein_distance(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { /* the API will require a change when adding custom weights */ @@ -3884,8 +3981,7 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { return rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, score_cutoff); } @@ -3893,11 +3989,13 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedDamerauLevenshtein(const Sentence1& s1_) -> CachedDamerauLevenshtein>; template CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevenshtein>; +#endif } // namespace experimental } // namespace rapidfuzz @@ -3906,7 +4004,8 @@ CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevens #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Hamming : public DistanceBase::max(), bool> { friend DistanceBase::max(), bool>; @@ -3920,7 +4019,7 @@ class Hamming : public DistanceBase static size_t _distance(const Range& s1, const Range& s2, bool pad, - size_t score_cutoff, [[maybe_unused]] size_t score_hint) + size_t score_cutoff, size_t) { if (!pad && s1.size() != s2.size()) throw std::invalid_argument("Sequences are not the same length."); @@ -3957,7 +4056,8 @@ Editops hamming_editops(const Range& s1, const Range& s2, bo return ops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -4031,7 +4131,7 @@ template Editops hamming_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(first1, last1), detail::Range(first2, last2), pad_, + return detail::hamming_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), pad_, score_hint); } @@ -4039,7 +4139,7 @@ template Editops hamming_editops(const Sentence1& s1, const Sentence2& s2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(s1), detail::Range(s2), pad_, score_hint); + return detail::hamming_editops(detail::make_range(s1), detail::make_range(s2), pad_, score_hint); } /** @@ -4104,8 +4204,7 @@ struct CachedHamming : public detail::CachedDistanceBase, } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t score_hint) const { return detail::Hamming::distance(s1, s2, pad, score_cutoff, score_hint); } @@ -4114,11 +4213,13 @@ struct CachedHamming : public detail::CachedDistanceBase, bool pad; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedHamming(const Sentence1& s1_, bool pad_ = true) -> CachedHamming>; template CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHamming>; +#endif /**@}*/ @@ -4130,7 +4231,8 @@ CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHammin #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct BitvectorHashmap { BitvectorHashmap() : m_map() @@ -4338,14 +4440,16 @@ struct BlockPatternMatchVector { BitMatrix m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct LCSseqResult; @@ -4362,6 +4466,20 @@ struct LCSseqResult { size_t sim; }; +template +LCSseqResult& getMatrixRef(LCSseqResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + /* * An encoded mbleven model table. * @@ -4475,12 +4593,12 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const size_t cur_vec = 0; for (; cur_vec + interleaveCount * vecs <= block.size(); cur_vec += interleaveCount * vecs) { std::array, interleaveCount> S; - unroll([&](auto j) { S[j] = static_cast(-1); }); + unroll([&](size_t j) { S[j] = static_cast(-1); }); for (const auto& ch : s2) { - unroll([&](auto j) { + unroll([&](size_t j) { alignas(32) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); native_simd Matches(stored.data()); native_simd u = S[j] & Matches; @@ -4488,9 +4606,9 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const }); } - unroll([&](auto j) { + unroll([&](size_t j) { auto counts = popcount(~S[j]); - unroll([&](auto i) { + unroll([&](size_t i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -4502,7 +4620,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const for (const auto& ch : s2) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd Matches(stored.data()); native_simd u = S & Matches; @@ -4510,7 +4628,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const } auto counts = popcount(~S); - unroll([&](auto i) { + unroll([&](size_t i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -4527,7 +4645,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& unroll([&](size_t i) { S[i] = ~UINT64_C(0); }); LCSseqResult res; - if constexpr (RecordMatrix) res.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + } auto iter_s2 = s2.begin(); for (size_t i = 0; i < s2.size(); ++i) { @@ -4542,7 +4663,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); } @@ -4553,7 +4677,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); iter_s2++; @@ -4588,10 +4715,11 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range res; - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = band_width_left + 1 + band_width_right; size_t full_band_words = std::min(words, full_band / word_size + 2); - res.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); } /* first_block is the index of the first block in Ukkonen band. */ @@ -4602,7 +4730,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S.set_offset(row, static_cast(first_block * word_size)); + } for (size_t word = first_block; word < last_block; ++word) { const uint64_t Matches = PM.get(word, *iter_s2); @@ -4613,7 +4744,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range band_width_right) first_block = (row - band_width_right) / word_size; @@ -4685,8 +4819,7 @@ size_t lcs_seq_similarity(const BlockPatternMatchVector& block, Range size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -4718,8 +4851,7 @@ size_t lcs_seq_similarity(Range s1, Range s2, size_t score_c size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -4753,7 +4885,9 @@ Editops recover_alignment(const Range& s1, const Range& s2, if (dist == 0) return editops; - [[maybe_unused]] size_t band_width_right = s2.size() - matrix.sim; +#ifndef NDEBUG + size_t band_width_right = s2.size() - matrix.sim; +#endif auto col = len1; auto row = len2; @@ -4852,13 +4986,14 @@ class LCSseq : public SimilarityBase static size_t _similarity(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + size_t) { return lcs_seq_similarity(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -4921,13 +5056,13 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d template Editops lcs_seq_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { - return detail::lcs_seq_editops(detail::Range(first1, last1), detail::Range(first2, last2)); + return detail::lcs_seq_editops(detail::make_range(first1, last1), detail::make_range(first2, last2)); } template Editops lcs_seq_editops(const Sentence1& s1, const Sentence2& s2) { - return detail::lcs_seq_editops(detail::Range(s1), detail::Range(s2)); + return detail::lcs_seq_editops(detail::make_range(s1), detail::make_range(s2)); } #ifdef RAPIDFUZZ_SIMD @@ -4939,26 +5074,26 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz friend detail::MultiSimilarityBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -5020,14 +5155,14 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::lcs_simd(scores_, PM, s2, score_cutoff); } @@ -5058,7 +5193,7 @@ struct CachedLCSseq {} template - CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -5072,25 +5207,27 @@ struct CachedLCSseq } template - size_t _similarity(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(const detail::Range& s2, size_t score_cutoff, size_t) const { - return detail::lcs_seq_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::lcs_seq_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedLCSseq(const Sentence1& s1_) -> CachedLCSseq>; template CachedLCSseq(InputIt1 first1, InputIt1 last1) -> CachedLCSseq>; +#endif } // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template size_t indel_distance(const BlockPatternMatchVector& block, const Range& s1, @@ -5147,7 +5284,8 @@ class Indel : public DistanceBase } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -5324,11 +5462,13 @@ struct CachedIndel CachedLCSseq scorer; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedIndel(const Sentence1& s1_) -> CachedIndel>; template CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel>; +#endif } // namespace rapidfuzz @@ -5336,7 +5476,8 @@ CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct FlaggedCharsWord { uint64_t P_flag; @@ -5419,7 +5560,11 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) template static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, - [[maybe_unused]] const Range& P, +#ifdef NDEBUG + const Range&, +#else + const Range& P, +#endif const Range& T, size_t Bound) { assert(P.size() <= 64); @@ -5487,7 +5632,7 @@ static inline void flag_similar_characters_step(const BlockPatternMatchVector& P if (T_j >= 0 && T_j < 256) { for (; word + 3 < last_word - 1; word += 4) { uint64_t PM_j[4]; - unroll([&](auto i) { + unroll([&](size_t i) { PM_j[i] = PM.get(word + i, static_cast(T_j)) & (~flagged.P_flag[word + i]); }); @@ -5786,6 +5931,11 @@ struct JaroSimilaritySimdBounds { template static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -5793,7 +5943,9 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng using namespace simd_sse2; # endif - [[maybe_unused]] static constexpr size_t alignment = native_simd::alignment; +# ifndef RAPIDFUZZ_AVX2 + static constexpr size_t alignment = native_simd::alignment; +# endif static constexpr size_t vec_width = native_simd::size; assert(s2.size() <= sizeof(VecType) * 8); @@ -5853,6 +6005,11 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng template static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -5942,7 +6099,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -5956,7 +6113,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6059,7 +6216,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6072,7 +6229,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6163,13 +6320,14 @@ class Jaro : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) + double) { return jaro_similarity(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include @@ -6236,12 +6394,13 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, friend detail::MultiSimilarityBase, double, 0, 1>; friend detail::MultiNormalizedMetricBase, double>; - static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64); + static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64, "incorrect MaxLen used"); - using VecType = typename std::conditional_t< + using VecType = typename std::conditional< MaxLen == 8, uint8_t, - typename std::conditional_t>>; + typename std::conditional::type>::type>:: + type; constexpr static size_t get_vec_size() { @@ -6261,7 +6420,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, # endif } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -6332,12 +6491,12 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); detail::jaro_similarity_simd(scores_, PM, str_lens, str_lens_size, s2, score_cutoff); } template - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -6364,7 +6523,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub {} template - CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -6378,25 +6537,27 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub } template - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { - return detail::jaro_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::jaro_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedJaro(const Sentence1& s1_) -> CachedJaro>; template CachedJaro(InputIt1 first1, InputIt1 last1) -> CachedJaro>; +#endif } // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template double jaro_winkler_similarity(const Range& P, const Range& T, double prefix_weight, @@ -6474,18 +6635,19 @@ class JaroWinkler : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double prefix_weight, - double score_cutoff, [[maybe_unused]] double score_hint) + double score_cutoff, double) { return jaro_winkler_similarity(s1, s2, prefix_weight, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -6501,7 +6663,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -6517,7 +6679,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -6533,7 +6695,7 @@ double jaro_winkler_normalized_distance(const Sentence1& s1, const Sentence2& s2 } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -6621,7 +6783,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -6650,7 +6812,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) - : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::Range(first1, last1)) + : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -6664,10 +6826,9 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { - return detail::jaro_winkler_similarity(PM, detail::Range(s1), s2, prefix_weight, score_cutoff); + return detail::jaro_winkler_similarity(PM, detail::make_range(s1), s2, prefix_weight, score_cutoff); } double prefix_weight; @@ -6675,6 +6836,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1) -> CachedJaroWinkler>; @@ -6682,6 +6844,7 @@ explicit CachedJaroWinkler(const Sentence1& s1_, template CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) -> CachedJaroWinkler>; +#endif } // namespace rapidfuzz @@ -6692,7 +6855,8 @@ CachedJaroWinkler(InputIt1 first1, InputIt1 last1, #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct LevenshteinRow { uint64_t VP; @@ -6731,6 +6895,34 @@ struct LevenshteinResult { size_t dist; }; +template +LevenshteinResult& getMatrixRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + +template +LevenshteinResult& getBitRowRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordBitRow); + return reinterpret_cast&>(res); +#endif +} + template size_t generalized_levenshtein_wagner_fischer(const Range& s1, const Range& s2, LevenshteinWeightTable weights, size_t max) @@ -6918,9 +7110,10 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R LevenshteinResult res; res.dist = s1.size(); - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); } /* mask used when computing D[m,j] in the paper 10^(m-1) */ @@ -6949,19 +7142,21 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R VP = HN | ~(D0 | HP); VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } if (res.dist > max) res.dist = max + 1; - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = s2.size(); - res.vecs.emplace_back(VP, VN); + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = s2.size(); + res_.vecs.emplace_back(VP, VN); } return res; @@ -6993,12 +7188,12 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte native_simd VN(VecType(0)); alignas(alignment) std::array currDist_; - unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + unroll( + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -7009,7 +7204,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd X(stored.data()); auto D0 = (((X & VP) + VP) ^ VP) | X | VN; @@ -7033,7 +7228,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -7041,7 +7236,9 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (std::numeric_limits::max() < std::numeric_limits::max()) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -7162,14 +7359,15 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range res; res.dist = max; - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); ptrdiff_t start_offset = static_cast(max) + 2 - 64; for (size_t i = 0; i < s2.size(); ++i) { - res.VP.set_offset(i, start_offset + static_cast(i)); - res.VN.set_offset(i, start_offset + static_cast(i)); + res_.VP.set_offset(i, start_offset + static_cast(i)); + res_.VN.set_offset(i, start_offset + static_cast(i)); } } @@ -7223,9 +7421,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -7265,9 +7464,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -7302,17 +7502,19 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range scores[words - 1] = s1.size(); - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = std::min(s1.size(), 2 * max + 1); size_t full_band_words = std::min(words, full_band / word_size + 2); - res.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); + res_.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); } - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = 0; + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = 0; } max = std::min(max, std::max(s1.size(), s2.size())); @@ -7329,9 +7531,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range uint64_t HP_carry = 1; uint64_t HN_carry = 0; - if constexpr (RecordMatrix) { - res.VP.set_offset(row, static_cast(first_block * word_size)); - res.VN.set_offset(row, static_cast(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP.set_offset(row, static_cast(first_block * word_size)); + res_.VN.set_offset(row, static_cast(first_block * word_size)); } auto advance_block = [&](size_t word) { @@ -7365,9 +7568,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range vecs[word].VP = HN | ~(D0 | HP); vecs[word].VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[row][word - first_block] = vecs[word].VP; - res.VN[row][word - first_block] = vecs[word].VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[row][word - first_block] = vecs[word].VP; + res_.VN[row][word - first_block] = vecs[word].VN; } return static_cast(HP_carry) - static_cast(HN_carry); @@ -7453,26 +7657,27 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range return res; } - if constexpr (RecordBitRow) { + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { if (row == stop_row) { + auto& res_ = getBitRowRef(res); if (first_block == 0) - res.prev_score = stop_row + 1; + res_.prev_score = stop_row + 1; else { /* count backwards to find score at last position in previous block */ size_t relevant_bits = std::min((first_block + 1) * 64, s1.size()) % 64; uint64_t mask = ~UINT64_C(0); if (relevant_bits) mask >>= 64 - relevant_bits; - res.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - - popcount(vecs[first_block].VP & mask); + res_.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - + popcount(vecs[first_block].VP & mask); } - res.first_block = first_block; - res.last_block = last_block; - res.vecs = std::move(vecs); + res_.first_block = first_block; + res_.last_block = last_block; + res_.vecs = std::move(vecs); /* unknown so make sure it is <= max */ - res.dist = 0; + res_.dist = 0; return res; } } @@ -7494,7 +7699,7 @@ size_t uniform_levenshtein_distance(const BlockPatternMatchVector& block, Range< if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; if (score_cutoff < abs_diff(s1.size(), s2.size())) return score_cutoff + 1; @@ -7552,7 +7757,7 @@ size_t uniform_levenshtein_distance(Range s1, Range s2, size if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; // at least length difference insertions/deletions required if (score_cutoff < (s1.size() - s2.size())) return score_cutoff + 1; @@ -7895,7 +8100,8 @@ Editops levenshtein_editops(const Range& s1, const Range& s2 return editops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -8174,7 +8380,7 @@ template Editops levenshtein_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(first1, last1), detail::Range(first2, last2), + return detail::levenshtein_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), score_hint); } @@ -8182,7 +8388,7 @@ template Editops levenshtein_editops(const Sentence1& s1, const Sentence2& s2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(s1), detail::Range(s2), score_hint); + return detail::levenshtein_editops(detail::make_range(s1), detail::make_range(s2), score_hint); } #ifdef RAPIDFUZZ_SIMD @@ -8195,26 +8401,26 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -8278,14 +8484,14 @@ struct MultiLevenshtein : public detail::MultiDistanceBase= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -8319,7 +8525,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) - : s1(first1, last1), PM(detail::Range(first1, last1)), weights(aWeights) + : s1(first1, last1), PM(detail::make_range(first1, last1)), weights(aWeights) {} private: @@ -8345,7 +8551,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase= weights.insert_cost + weights.delete_cost) { // max can make use of the common divisor of the three weights size_t new_max = detail::ceil_div(score_cutoff, weights.insert_cost); - size_t dist = detail::indel_distance(PM, detail::Range(s1), s2, new_max); + size_t dist = detail::indel_distance(PM, detail::make_range(s1), s2, new_max); dist *= weights.insert_cost; return (dist <= score_cutoff) ? dist : score_cutoff + 1; } } - return detail::generalized_levenshtein_distance(detail::Range(s1), s2, weights, score_cutoff); + return detail::generalized_levenshtein_distance(detail::make_range(s1), s2, weights, score_cutoff); } std::vector s1; @@ -8372,6 +8578,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = { 1, 1, 1}) -> CachedLevenshtein>; @@ -8379,6 +8586,7 @@ explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights template CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein>; +#endif } // namespace rapidfuzz @@ -8386,7 +8594,8 @@ CachedLevenshtein(InputIt1 first1, InputIt1 last1, #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /** * @brief Bitparallel implementation of the OSA distance. @@ -8476,12 +8685,12 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV native_simd PM_j_old(VecType(0)); alignas(alignment) std::array currDist_; - unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + unroll( + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -8492,7 +8701,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd PM_j(stored.data()); auto TR = (andnot(PM_j, D0) << 1) & PM_j_old; @@ -8519,7 +8728,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -8527,7 +8736,9 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (std::numeric_limits::max() < std::numeric_limits::max()) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -8646,7 +8857,8 @@ class OSA : public DistanceBase::ma } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -8760,26 +8972,26 @@ struct MultiOSA friend detail::MultiDistanceBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -8840,14 +9052,14 @@ struct MultiOSA if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -8878,7 +9090,7 @@ struct CachedOSA {} template - CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -8892,8 +9104,7 @@ struct CachedOSA } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { size_t res; if (s1.empty()) @@ -8901,9 +9112,9 @@ struct CachedOSA else if (s2.empty()) res = s1.size(); else if (s1.size() < 64) - res = detail::osa_hyrroe2003(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003(PM, detail::make_range(s1), s2, score_cutoff); else - res = detail::osa_hyrroe2003_block(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003_block(PM, detail::make_range(s1), s2, score_cutoff); return (res <= score_cutoff) ? res : score_cutoff + 1; } @@ -8912,18 +9123,21 @@ struct CachedOSA detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedOSA(const Sentence1& s1_) -> CachedOSA>; template CachedOSA(InputIt1 first1, InputIt1 last1) -> CachedOSA>; +#endif /**@}*/ } // namespace rapidfuzz #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Postfix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -8936,15 +9150,15 @@ class Postfix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_suffix(s1, s2); return (dist >= score_cutoff) ? dist : 0; } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -9024,8 +9238,7 @@ struct CachedPostfix : public detail::CachedSimilarityBase } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t score_hint) const { return detail::Postfix::similarity(s1, s2, score_cutoff, score_hint); } @@ -9033,19 +9246,21 @@ struct CachedPostfix : public detail::CachedSimilarityBase std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPostfix(const Sentence1& s1_) -> CachedPostfix>; template CachedPostfix(InputIt1 first1, InputIt1 last1) -> CachedPostfix>; - +#endif /**@}*/ } // namespace rapidfuzz #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Prefix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -9058,15 +9273,15 @@ class Prefix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_prefix(s1, s2); return (dist >= score_cutoff) ? dist : 0; } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -9145,8 +9360,7 @@ struct CachedPrefix : public detail::CachedSimilarityBase, } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t) const { return detail::Prefix::similarity(s1, s2, score_cutoff, score_cutoff); } @@ -9154,11 +9368,13 @@ struct CachedPrefix : public detail::CachedSimilarityBase, std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPrefix(const Sentence1& s1_) -> CachedPrefix>; template CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix>; +#endif /**@}*/ @@ -9318,7 +9534,8 @@ std::vector opcodes_apply_vec(const Opcodes& ops, const Sentence1& s1, co #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* * taken from https://stackoverflow.com/a/17251989/11335032 @@ -9380,9 +9597,11 @@ struct CharSet { } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /** * @defgroup Fuzz Fuzz @@ -9449,7 +9668,7 @@ struct MultiRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template @@ -9490,11 +9709,13 @@ struct CachedRatio { CachedIndel cached_indel; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedRatio(const Sentence1& s1) -> CachedRatio>; template CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio>; +#endif template ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2, @@ -9563,11 +9784,13 @@ struct CachedPartialRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialRatio(const Sentence1& s1) -> CachedPartialRatio>; template CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::ratio between @@ -9672,11 +9895,13 @@ struct CachedTokenSortRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSortRatio(const Sentence1& s1) -> CachedTokenSortRatio>; template CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::partial_ratio @@ -9730,6 +9955,7 @@ struct CachedPartialTokenSortRatio { CachedPartialRatio cached_partial_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSortRatio(const Sentence1& s1) -> CachedPartialTokenSortRatio>; @@ -9737,6 +9963,7 @@ explicit CachedPartialTokenSortRatio(const Sentence1& s1) template CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSortRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -9798,11 +10025,13 @@ struct CachedTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSetRatio(const Sentence1& s1) -> CachedTokenSetRatio>; template CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -9855,12 +10084,14 @@ struct CachedPartialTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio>; template CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSetRatio>; +#endif /** * @brief Helper method that returns the maximum of fuzz::token_set_ratio and @@ -9917,11 +10148,13 @@ struct CachedTokenRatio { CachedRatio cached_ratio_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenRatio(const Sentence1& s1) -> CachedTokenRatio>; template CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio>; +#endif /** * @brief Helper method that returns the maximum of @@ -9978,11 +10211,13 @@ struct CachedPartialTokenRatio { std::vector s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenRatio(const Sentence1& s1) -> CachedPartialTokenRatio>; template CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio>; +#endif /** * @brief Calculates a weighted ratio based on the other ratio algorithms @@ -10038,11 +10273,13 @@ struct CachedWRatio { rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedWRatio(const Sentence1& s1) -> CachedWRatio>; template CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio>; +#endif /** * @brief Calculates a quick ratio between two strings using fuzz.ratio @@ -10101,13 +10338,13 @@ struct MultiQRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const { - rapidfuzz::detail::Range s2_(s2); + auto s2_ = detail::make_range(s2); if (s2_.empty()) { for (size_t i = 0; i < str_lens.size(); ++i) scores[i] = 0; @@ -10150,15 +10387,18 @@ struct CachedQRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedQRatio(const Sentence1& s1) -> CachedQRatio>; template CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio>; +#endif /**@}*/ -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz #include @@ -10168,7 +10408,8 @@ CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /********************************************** * ratio @@ -10177,7 +10418,7 @@ namespace rapidfuzz::fuzz { template double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff) { - return ratio(detail::Range(first1, last1), detail::Range(first2, last2), score_cutoff); + return ratio(detail::make_range(first1, last1), detail::make_range(first2, last2), score_cutoff); } template @@ -10191,7 +10432,7 @@ template double CachedRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double score_hint) const { - return similarity(detail::Range(first2, last2), score_cutoff, score_hint); + return similarity(detail::make_range(first2, last2), score_cutoff, score_hint); } template @@ -10207,7 +10448,7 @@ double CachedRatio::similarity(const Sentence2& s2, double score_cutoff, namespace fuzz_detail { -static constexpr double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) +static RAPIDFUZZ_CONSTEXPR_CXX14 double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) { double score = (lensum > 0) ? (100.0 - 100.0 * static_cast(dist) / static_cast(lensum)) : 100.0; @@ -10247,8 +10488,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(window.first); auto subseq2_first = s2.begin() + static_cast(window.second); - detail::Range subseq1(subseq1_first, subseq1_first + static_cast(len1)); - detail::Range subseq2(subseq2_first, subseq2_first + static_cast(len1)); + auto subseq1 = + detail::make_range(subseq1_first, subseq1_first + static_cast(len1)); + auto subseq2 = + detail::make_range(subseq2_first, subseq2_first + static_cast(len1)); if (scores[window.first] == std::numeric_limits::max()) { scores[window.first] = cached_ratio.cached_indel.distance(subseq1); @@ -10302,7 +10545,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i)); + auto subseq = rapidfuzz::detail::make_range(s2.begin(), s2.begin() + static_cast(i)); if (!s1_char_set.find(subseq.back())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -10315,7 +10558,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i), s2.end()); + auto subseq = rapidfuzz::detail::make_range(s2.begin() + static_cast(i), s2.end()); if (!s1_char_set.find(subseq.front())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -10364,8 +10607,8 @@ ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, if (!len1 || !len2) return ScoreAlignment(static_cast(len1 == len2) * 100.0, 0, len1, 0, len1); - auto s1 = detail::Range(first1, last1); - auto s2 = detail::Range(first2, last2); + auto s1 = detail::make_range(first1, last1); + auto s2 = detail::make_range(first2, last2); auto alignment = fuzz_detail::partial_ratio_impl(s1, s2, score_cutoff); if (alignment.score != 100 && s1.size() == s2.size()) { @@ -10412,7 +10655,7 @@ CachedPartialRatio::CachedPartialRatio(InputIt1 first1, InputIt1 last1) template template double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { size_t len1 = s1.size(); size_t len2 = static_cast(std::distance(first2, last2)); @@ -10424,8 +10667,8 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d if (!len1 || !len2) return static_cast(len1 == len2) * 100.0; - auto s1_ = detail::Range(s1); - auto s2 = detail::Range(first2, last2); + auto s1_ = detail::make_range(s1); + auto s2 = detail::make_range(first2, last2); double score = fuzz_detail::partial_ratio_impl(s1_, s2, cached_ratio, s1_char_set, score_cutoff).score; if (score != 100 && s1_.size() == s2.size()) { @@ -10439,8 +10682,7 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d template template -double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10467,7 +10709,7 @@ double token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double score_c template template double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10476,8 +10718,7 @@ double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10506,7 +10747,7 @@ double partial_token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10515,8 +10756,7 @@ double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10595,7 +10835,7 @@ double token_set_ratio(const Sentence1& s1, const Sentence2& s2, double score_cu template template double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10604,8 +10844,7 @@ double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10654,7 +10893,7 @@ double partial_token_set_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10663,8 +10902,7 @@ double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10806,8 +11044,9 @@ double token_ratio(const std::vector& s1_sorted, double result = 0; auto s2_sorted = tokens_b.join(); if (s1_sorted.size() < 65) { - double norm_sim = detail::indel_normalized_similarity(blockmap_s1_sorted, detail::Range(s1_sorted), - detail::Range(s2_sorted), score_cutoff / 100); + double norm_sim = + detail::indel_normalized_similarity(blockmap_s1_sorted, detail::make_range(s1_sorted), + detail::make_range(s2_sorted), score_cutoff / 100); result = norm_sim * 100; } else { @@ -10842,15 +11081,14 @@ double token_ratio(const std::vector& s1_sorted, template template double CachedTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::token_ratio(s1_tokens, cached_ratio_s1_sorted, first2, last2, score_cutoff); } template template -double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10928,15 +11166,14 @@ double partial_token_ratio(const std::vector& s1_sorted, template template double CachedPartialTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::partial_token_ratio(s1_sorted, tokens_s1, first2, last2, score_cutoff); } template template -double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10994,13 +11231,12 @@ CachedWRatio::CachedWRatio(InputIt1 first1, InputIt1 last1) cached_partial_ratio(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))), s1_sorted(tokens_s1.join()), - blockmap_s1_sorted(detail::Range(s1_sorted)) + blockmap_s1_sorted(detail::make_range(s1_sorted)) {} template template -double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { if (score_cutoff > 100) return 0; @@ -11039,8 +11275,7 @@ double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -11071,8 +11306,7 @@ double QRatio(const Sentence1& s1, const Sentence2& s2, double score_cutoff) template template -double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { auto len2 = std::distance(first2, last2); @@ -11085,12 +11319,12 @@ double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz #endif // RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED diff --git a/fuzzing/CMakeLists.txt b/fuzzing/CMakeLists.txt index b3b5efcd..2a71733d 100644 --- a/fuzzing/CMakeLists.txt +++ b/fuzzing/CMakeLists.txt @@ -1,6 +1,6 @@ function(create_fuzzer fuzzer) add_executable(fuzz_${fuzzer} fuzz_${fuzzer}.cpp) - target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_17) + target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_11) target_link_libraries(fuzz_${fuzzer} PRIVATE rapidfuzz::rapidfuzz) target_compile_options(fuzz_${fuzzer} PRIVATE -g -O1 -fsanitize=fuzzer,address -march=native) diff --git a/fuzzing/fuzz_levenshtein_editops.cpp b/fuzzing/fuzz_levenshtein_editops.cpp index fe09cb5e..7a540eae 100644 --- a/fuzzing/fuzz_levenshtein_editops.cpp +++ b/fuzzing/fuzz_levenshtein_editops.cpp @@ -30,8 +30,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) validate_editops(s1, s2, score, score); if (s1.size() > 1 && s2.size() > 1) { - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); if (hpos.left_score + hpos.right_score != score) throw std::logic_error("find_hirschberg_pos failed"); } diff --git a/rapidfuzz/details/CharSet.hpp b/rapidfuzz/details/CharSet.hpp index a00e3ee1..c6792ab8 100644 --- a/rapidfuzz/details/CharSet.hpp +++ b/rapidfuzz/details/CharSet.hpp @@ -9,7 +9,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* * taken from https://stackoverflow.com/a/17251989/11335032 @@ -71,4 +72,5 @@ struct CharSet { } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/GrowingHashmap.hpp b/rapidfuzz/details/GrowingHashmap.hpp index ba0edebc..db63ea0b 100644 --- a/rapidfuzz/details/GrowingHashmap.hpp +++ b/rapidfuzz/details/GrowingHashmap.hpp @@ -7,7 +7,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* hashmap for integers which can only grow, but can't remove elements */ template @@ -200,4 +201,5 @@ struct HybridGrowingHashmap { std::array m_extendedAscii; }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/Matrix.hpp b/rapidfuzz/details/Matrix.hpp index 7525f193..76c0c868 100644 --- a/rapidfuzz/details/Matrix.hpp +++ b/rapidfuzz/details/Matrix.hpp @@ -8,15 +8,16 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct BitMatrixView { using value_type = T; using size_type = size_t; - using pointer = std::conditional_t; - using reference = std::conditional_t; + using pointer = typename std::conditional::type; + using reference = typename std::conditional::type; BitMatrixView(pointer vector, size_type cols) noexcept : m_vector(vector), m_cols(cols) {} @@ -176,12 +177,12 @@ struct ShiftedBitMatrix { return bool(m_matrix[row][col_word] & col_mask); } - auto operator[](size_t row) noexcept + BitMatrixView operator[](size_t row) noexcept { return m_matrix[row]; } - auto operator[](size_t row) const noexcept + BitMatrixView operator[](size_t row) const noexcept { return m_matrix[row]; } @@ -196,4 +197,5 @@ struct ShiftedBitMatrix { std::vector m_offsets; }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/PatternMatchVector.hpp b/rapidfuzz/details/PatternMatchVector.hpp index 9c56a656..f60f2fa4 100644 --- a/rapidfuzz/details/PatternMatchVector.hpp +++ b/rapidfuzz/details/PatternMatchVector.hpp @@ -11,7 +11,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct BitvectorHashmap { BitvectorHashmap() : m_map() @@ -219,4 +220,5 @@ struct BlockPatternMatchVector { BitMatrix m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/Range.hpp b/rapidfuzz/details/Range.hpp index d8ac443e..f0c3b10c 100644 --- a/rapidfuzz/details/Range.hpp +++ b/rapidfuzz/details/Range.hpp @@ -13,7 +13,10 @@ #include #include -namespace rapidfuzz::detail { +#include + +namespace rapidfuzz { +namespace detail { static inline void assume(bool b) { @@ -26,6 +29,9 @@ static inline void assume(bool b) #endif } +namespace to_begin_detail { +using std::begin; + template CharT* to_begin(CharT* s) { @@ -33,11 +39,17 @@ CharT* to_begin(CharT* s) } template -auto to_begin(T& x) +auto to_begin(T& x) -> decltype(begin(x)) { - using std::begin; + return begin(x); } +} // namespace to_begin_detail + +using to_begin_detail::to_begin; + +namespace to_end_detail { +using std::end; template CharT* to_end(CharT* s) @@ -50,11 +62,13 @@ CharT* to_end(CharT* s) } template -auto to_end(T& x) +auto to_end(T& x) -> decltype(end(x)) { - using std::end; return end(x); } +} // namespace to_end_detail + +using to_end_detail::to_end; template class Range { @@ -69,87 +83,73 @@ class Range { using iterator = Iter; using reverse_iterator = std::reverse_iterator; - constexpr Range(Iter first, Iter last) : _first(first), _last(last) + Range(Iter first, Iter last) : _first(first), _last(last) { assert(std::distance(_first, _last) >= 0); _size = static_cast(std::distance(_first, _last)); } - constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) + Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) {} template - constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x)) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } + Range(T& x) : Range(to_begin(x), to_end(x)) + {} - constexpr iterator begin() const noexcept + iterator begin() const noexcept { return _first; } - constexpr iterator end() const noexcept + iterator end() const noexcept { return _last; } - constexpr reverse_iterator rbegin() const noexcept + reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } - constexpr reverse_iterator rend() const noexcept + reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } - constexpr size_t size() const + size_t size() const { return _size; } - constexpr bool empty() const + bool empty() const { return size() == 0; } - explicit constexpr operator bool() const + explicit operator bool() const { return !empty(); } - template < - typename... Dummy, typename IterCopy = Iter, - typename = std::enable_if_t::iterator_category>>> - constexpr decltype(auto) operator[](size_t n) const + template ::iterator_category>::value>> + auto operator[](size_t n) const -> decltype(*_first) { return _first[static_cast(n)]; } - constexpr void remove_prefix(size_t n) + void remove_prefix(size_t n) { - if constexpr (std::is_base_of_v::iterator_category>) - _first += static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _first++; - + std::advance(_first, static_cast(n)); _size -= n; } - constexpr void remove_suffix(size_t n) - { - if constexpr (std::is_base_of_v::iterator_category>) - _last -= static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _last--; + void remove_suffix(size_t n) + { + std::advance(_last, -static_cast(n)); _size -= n; } - constexpr Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) + Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) { if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); @@ -160,17 +160,17 @@ class Range { return res; } - constexpr decltype(auto) front() const + const value_type& front() const { - return *(_first); + return *_first; } - constexpr decltype(auto) back() const + const value_type& back() const { return *(_last - 1); } - constexpr Range reversed() const + Range reversed() const { return {rbegin(), rend(), _size}; } @@ -185,13 +185,24 @@ class Range { } }; +template +auto make_range(Iter first, Iter last) -> Range +{ + return Range(first, last); +} + template -Range(T& x) -> Range; +auto make_range(T& x) -> Range +{ + return {to_begin(x), to_end(x)}; +} template inline bool operator==(const Range& a, const Range& b) { - return std::equal(a.begin(), a.end(), b.begin(), b.end()); + if (a.size() != b.size()) return false; + + return std::equal(a.begin(), a.end(), b.begin()); } template @@ -227,4 +238,5 @@ inline bool operator>=(const Range& a, const Range& b) template using RangeVec = std::vector>; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/SplittedSentenceView.hpp b/rapidfuzz/details/SplittedSentenceView.hpp index a6b06955..52653a37 100644 --- a/rapidfuzz/details/SplittedSentenceView.hpp +++ b/rapidfuzz/details/SplittedSentenceView.hpp @@ -3,7 +3,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template class SplittedSentenceView { @@ -11,7 +12,7 @@ class SplittedSentenceView { using CharT = iter_value_t; SplittedSentenceView(RangeVec sentence) noexcept( - std::is_nothrow_move_constructible_v>) + std::is_nothrow_move_constructible>::value) : m_sentence(std::move(sentence)) {} @@ -83,4 +84,5 @@ auto SplittedSentenceView::join() const -> std::vector return joined; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/common.hpp b/rapidfuzz/details/common.hpp index f157a5ae..61733d8e 100644 --- a/rapidfuzz/details/common.hpp +++ b/rapidfuzz/details/common.hpp @@ -13,7 +13,8 @@ # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct DecomposedSet { @@ -33,13 +34,11 @@ static inline size_t abs_diff(size_t a, size_t b) return a > b ? a - b : b - a; } -template -TO opt_static_cast(const FROM &value) +template +TO opt_static_cast(const FROM& value) { - if constexpr (std::is_same_v) - return value; - else - return static_cast(value); + /* calling the cast through this template function somehow avoids useless cast warnings */ + return static_cast(value); } /** @@ -96,6 +95,7 @@ static inline void rf_aligned_free(void* ptr) /**@}*/ -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include diff --git a/rapidfuzz/details/common_impl.hpp b/rapidfuzz/details/common_impl.hpp index 2d803442..7821c6d3 100644 --- a/rapidfuzz/details/common_impl.hpp +++ b/rapidfuzz/details/common_impl.hpp @@ -5,7 +5,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template DecomposedSet set_decomposition(SplittedSentenceView a, @@ -33,6 +34,15 @@ DecomposedSet set_decomposition(SplittedSentenceVi return {difference_ab, difference_ba, intersection}; } +template +std::pair rf_mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +{ + while (first1 != last1 && first2 != last2 && *first1 == *first2) + ++first1, ++first2; + + return std::make_pair(first1, first2); +} + /** * Removes common prefix of two string views */ @@ -41,7 +51,7 @@ size_t remove_common_prefix(Range& s1, Range& s2) { auto first1 = std::begin(s1); size_t prefix = static_cast( - std::distance(first1, std::mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); + std::distance(first1, rf_mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); s1.remove_prefix(prefix); s2.remove_prefix(prefix); return prefix; @@ -53,9 +63,9 @@ size_t remove_common_prefix(Range& s1, Range& s2) template size_t remove_common_suffix(Range& s1, Range& s2) { - auto rfirst1 = std::rbegin(s1); + auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, std::mismatch(rfirst1, std::rend(s1), std::rbegin(s2), std::rend(s2)).first)); + std::distance(rfirst1, rf_mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; @@ -169,4 +179,5 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last) return SplittedSentenceView(splitted); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/config.hpp b/rapidfuzz/details/config.hpp new file mode 100644 index 00000000..8196b3e1 --- /dev/null +++ b/rapidfuzz/details/config.hpp @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2020 Max Bachmann */ + +#pragma once + +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1 +# define RAPIDFUZZ_IF_CONSTEXPR if constexpr +#else +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if +#endif + +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) || __cplusplus >= 201402L) +# define RAPIDFUZZ_CONSTEXPR_CXX14 constexpr +#else +# define RAPIDFUZZ_CONSTEXPR_CXX14 +#endif diff --git a/rapidfuzz/details/distance.hpp b/rapidfuzz/details/distance.hpp index 263af7b2..fb87057d 100644 --- a/rapidfuzz/details/distance.hpp +++ b/rapidfuzz/details/distance.hpp @@ -9,42 +9,43 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_distance(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_distance(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, + return _normalized_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_similarity(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, - score_hint); + return _normalized_similarity(make_range(s1), make_range(s2), std::forward(args)..., + score_cutoff, score_hint); } protected: @@ -82,11 +83,11 @@ struct NormalizedMetricBase { template struct DistanceBase : public NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return T::_distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -94,15 +95,16 @@ struct DistanceBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _similarity(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -110,7 +112,8 @@ struct DistanceBase : public NormalizedMetricBase { static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -137,11 +140,11 @@ struct DistanceBase : public NormalizedMetricBase { template struct SimilarityBase : public NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -149,23 +152,25 @@ struct SimilarityBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return T::_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -180,11 +185,21 @@ struct SimilarityBase : public NormalizedMetricBase { (maximum >= score_hint) ? maximum - score_hint : static_cast(WorstSimilarity); ResType sim = T::_similarity(s1, s2, std::forward(args)..., cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } SimilarityBase() @@ -198,27 +213,27 @@ struct CachedNormalizedMetricBase { double normalized_distance(InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(first2, last2), score_cutoff, score_hint); + return _normalized_distance(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_distance(const Sentence2& s2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(s2), score_cutoff, score_hint); + return _normalized_distance(make_range(s2), score_cutoff, score_hint); } template double normalized_similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(first2, last2), score_cutoff, score_hint); + return _normalized_similarity(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(s2), score_cutoff, score_hint); + return _normalized_similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -259,7 +274,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(first2, last2), score_cutoff, score_hint); + return derived._distance(make_range(first2, last2), score_cutoff, score_hint); } template @@ -267,7 +282,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(s2), score_cutoff, score_hint); + return derived._distance(make_range(s2), score_cutoff, score_hint); } template @@ -275,14 +290,14 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(first2, last2), score_cutoff, score_hint); + return _similarity(make_range(first2, last2), score_cutoff, score_hint); } template ResType similarity(const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(s2), score_cutoff, score_hint); + return _similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -313,14 +328,14 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(first2, last2), score_cutoff, score_hint); + return _distance(make_range(first2, last2), score_cutoff, score_hint); } template ResType distance(const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(s2), score_cutoff, score_hint); + return _distance(make_range(s2), score_cutoff, score_hint); } template @@ -329,7 +344,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(first2, last2), score_cutoff, score_hint); + return derived._similarity(make_range(first2, last2), score_cutoff, score_hint); } template @@ -337,7 +352,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(s2), score_cutoff, score_hint); + return derived._similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -350,11 +365,21 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType hint_similarity = (maximum > score_hint) ? maximum - score_hint : 0; ResType sim = derived._similarity(s2, cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } CachedSimilarityBase() @@ -368,28 +393,28 @@ struct MultiNormalizedMetricBase { void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_distance(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(s2), score_cutoff); + _normalized_distance(scores, score_count, make_range(s2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(s2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -403,7 +428,8 @@ struct MultiNormalizedMetricBase { // reinterpretation only works when the types have the same size ResType* scores_orig = nullptr; - if constexpr (sizeof(double) == sizeof(ResType)) + + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) == sizeof(ResType)) scores_orig = reinterpret_cast(scores); else scores_orig = new ResType[derived.result_count()]; @@ -417,7 +443,7 @@ struct MultiNormalizedMetricBase { scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0; } - if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig; + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) != sizeof(ResType)) delete[] scores_orig; } template @@ -445,7 +471,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(first2, last2), score_cutoff); + derived._distance(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -453,21 +479,21 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(s2), score_cutoff); + derived._distance(scores, score_count, make_range(s2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(first2, last2), score_cutoff); + _similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(s2), score_cutoff); + _similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -496,14 +522,14 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(first2, last2), score_cutoff); + _distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void distance(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(s2), score_cutoff); + _distance(scores, score_count, make_range(s2), score_cutoff); } template @@ -511,7 +537,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(first2, last2), score_cutoff); + derived._similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -519,7 +545,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(s2), score_cutoff); + derived._similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -532,17 +558,28 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { for (size_t i = 0; i < derived.get_input_count(); ++i) { ResType maximum = derived.maximum(i, s2); ResType dist = maximum - scores[i]; - - if constexpr (std::is_floating_point_v) - scores[i] = (dist <= score_cutoff) ? dist : 1.0; - else - scores[i] = (dist <= score_cutoff) ? dist : score_cutoff + 1; + scores[i] = _apply_distance_score_cutoff(dist, score_cutoff); } } + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; + } + MultiSimilarityBase() {} friend T; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/intrinsics.hpp b/rapidfuzz/details/intrinsics.hpp index d5bd0a14..7952759e 100644 --- a/rapidfuzz/details/intrinsics.hpp +++ b/rapidfuzz/details/intrinsics.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -14,7 +15,8 @@ # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template T bit_mask_lsb(size_t n) @@ -50,7 +52,7 @@ constexpr uint64_t shl64(uint64_t a, U shift) return (shift < 64) ? a << shift : 0; } -constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) +RAPIDFUZZ_CONSTEXPR_CXX14 uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) { /* todo should use _addcarry_u64 when available */ a += carryin; @@ -61,7 +63,7 @@ constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* ca } template -constexpr T ceil_div(T a, U divisor) +RAPIDFUZZ_CONSTEXPR_CXX14 T ceil_div(T a, U divisor) { T _div = static_cast(divisor); return a / _div + static_cast(a % _div != 0); @@ -97,7 +99,7 @@ static inline size_t popcount(uint8_t x) } template -constexpr T rotl(T x, unsigned int n) +RAPIDFUZZ_CONSTEXPR_CXX14 T rotl(T x, unsigned int n) { unsigned int num_bits = std::numeric_limits::digits; assert(n < num_bits); @@ -197,16 +199,31 @@ static inline unsigned int countr_zero(uint8_t x) return countr_zero(static_cast(x)); } -template -constexpr void unroll_impl(std::integer_sequence, F&& f) -{ - (f(std::integral_constant{}), ...); -} +template +struct UnrollImpl; + +template +struct UnrollImpl { + template + static void call(F&& f) + { + f(Pos); + UnrollImpl::call(std::forward(f)); + } +}; + +template +struct UnrollImpl { + template + static void call(F&&) + {} +}; -template -constexpr void unroll(F&& f) +template +RAPIDFUZZ_CONSTEXPR_CXX14 void unroll(F&& f) { - unroll_impl(std::make_integer_sequence{}, std::forward(f)); + UnrollImpl::call(f); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/type_traits.hpp b/rapidfuzz/details/type_traits.hpp index 06b6b1e2..2f05d2b4 100644 --- a/rapidfuzz/details/type_traits.hpp +++ b/rapidfuzz/details/type_traits.hpp @@ -49,4 +49,7 @@ struct is_explicitly_convertible { static bool const value = test(0); }; +template +using rf_enable_if_t = typename std::enable_if::type; + } // namespace rapidfuzz diff --git a/rapidfuzz/details/types.hpp b/rapidfuzz/details/types.hpp index ac3c4559..b8ec393c 100644 --- a/rapidfuzz/details/types.hpp +++ b/rapidfuzz/details/types.hpp @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include @@ -336,13 +337,10 @@ class Editops : private std::vector { inline bool operator==(const Editops& lhs, const Editops& rhs) { - if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) { - return false; - } + if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) return false; + + if (lhs.size() != rhs.size()) return false; - if (lhs.size() != rhs.size()) { - return false; - } return std::equal(lhs.begin(), lhs.end(), rhs.begin()); } diff --git a/rapidfuzz/distance/DamerauLevenshtein.hpp b/rapidfuzz/distance/DamerauLevenshtein.hpp index b1209ed7..98f3f2f1 100644 --- a/rapidfuzz/distance/DamerauLevenshtein.hpp +++ b/rapidfuzz/distance/DamerauLevenshtein.hpp @@ -133,8 +133,7 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { return rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, score_cutoff); } @@ -142,11 +141,13 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedDamerauLevenshtein(const Sentence1& s1_) -> CachedDamerauLevenshtein>; template CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevenshtein>; +#endif } // namespace experimental } // namespace rapidfuzz diff --git a/rapidfuzz/distance/DamerauLevenshtein_impl.hpp b/rapidfuzz/distance/DamerauLevenshtein_impl.hpp index 5a122872..5e34556d 100644 --- a/rapidfuzz/distance/DamerauLevenshtein_impl.hpp +++ b/rapidfuzz/distance/DamerauLevenshtein_impl.hpp @@ -11,7 +11,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct RowId { @@ -130,11 +131,11 @@ class DamerauLevenshtein } template - static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, size_t) { return damerau_levenshtein_distance(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/distance/Hamming.hpp b/rapidfuzz/distance/Hamming.hpp index d5160722..3ce33b91 100644 --- a/rapidfuzz/distance/Hamming.hpp +++ b/rapidfuzz/distance/Hamming.hpp @@ -78,7 +78,7 @@ template Editops hamming_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(first1, last1), detail::Range(first2, last2), pad_, + return detail::hamming_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), pad_, score_hint); } @@ -86,7 +86,7 @@ template Editops hamming_editops(const Sentence1& s1, const Sentence2& s2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(s1), detail::Range(s2), pad_, score_hint); + return detail::hamming_editops(detail::make_range(s1), detail::make_range(s2), pad_, score_hint); } /** @@ -151,8 +151,7 @@ struct CachedHamming : public detail::CachedDistanceBase, } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t score_hint) const { return detail::Hamming::distance(s1, s2, pad, score_cutoff, score_hint); } @@ -161,11 +160,13 @@ struct CachedHamming : public detail::CachedDistanceBase, bool pad; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedHamming(const Sentence1& s1_, bool pad_ = true) -> CachedHamming>; template CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHamming>; +#endif /**@}*/ diff --git a/rapidfuzz/distance/Hamming_impl.hpp b/rapidfuzz/distance/Hamming_impl.hpp index 8389f902..34d286cd 100644 --- a/rapidfuzz/distance/Hamming_impl.hpp +++ b/rapidfuzz/distance/Hamming_impl.hpp @@ -6,7 +6,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Hamming : public DistanceBase::max(), bool> { friend DistanceBase::max(), bool>; @@ -20,7 +21,7 @@ class Hamming : public DistanceBase static size_t _distance(const Range& s1, const Range& s2, bool pad, - size_t score_cutoff, [[maybe_unused]] size_t score_hint) + size_t score_cutoff, size_t) { if (!pad && s1.size() != s2.size()) throw std::invalid_argument("Sequences are not the same length."); @@ -57,4 +58,5 @@ Editops hamming_editops(const Range& s1, const Range& s2, bo return ops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Indel.hpp b/rapidfuzz/distance/Indel.hpp index 9cfa902b..16410075 100644 --- a/rapidfuzz/distance/Indel.hpp +++ b/rapidfuzz/distance/Indel.hpp @@ -182,10 +182,12 @@ struct CachedIndel CachedLCSseq scorer; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedIndel(const Sentence1& s1_) -> CachedIndel>; template CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Indel_impl.hpp b/rapidfuzz/distance/Indel_impl.hpp index d0ab9d50..b3139fab 100644 --- a/rapidfuzz/distance/Indel_impl.hpp +++ b/rapidfuzz/distance/Indel_impl.hpp @@ -8,7 +8,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template size_t indel_distance(const BlockPatternMatchVector& block, const Range& s1, @@ -65,4 +66,5 @@ class Indel : public DistanceBase } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index 764332cd..2948c784 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -70,12 +70,13 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, friend detail::MultiSimilarityBase, double, 0, 1>; friend detail::MultiNormalizedMetricBase, double>; - static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64); + static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64, "incorrect MaxLen used"); - using VecType = typename std::conditional_t< + using VecType = typename std::conditional< MaxLen == 8, uint8_t, - typename std::conditional_t>>; + typename std::conditional::type>::type>:: + type; constexpr static size_t get_vec_size() { @@ -95,7 +96,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, # endif } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -166,12 +167,12 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); detail::jaro_similarity_simd(scores_, PM, str_lens, str_lens_size, s2, score_cutoff); } template - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -198,7 +199,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub {} template - CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -212,20 +213,21 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub } template - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { - return detail::jaro_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::jaro_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedJaro(const Sentence1& s1_) -> CachedJaro>; template CachedJaro(InputIt1 first1, InputIt1 last1) -> CachedJaro>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/JaroWinkler.hpp b/rapidfuzz/distance/JaroWinkler.hpp index d2306df3..1bd2f082 100644 --- a/rapidfuzz/distance/JaroWinkler.hpp +++ b/rapidfuzz/distance/JaroWinkler.hpp @@ -9,7 +9,7 @@ namespace rapidfuzz { template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -25,7 +25,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -41,7 +41,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -57,7 +57,7 @@ double jaro_winkler_normalized_distance(const Sentence1& s1, const Sentence2& s2 } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -145,7 +145,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -174,7 +174,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) - : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::Range(first1, last1)) + : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -188,10 +188,9 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { - return detail::jaro_winkler_similarity(PM, detail::Range(s1), s2, prefix_weight, score_cutoff); + return detail::jaro_winkler_similarity(PM, detail::make_range(s1), s2, prefix_weight, score_cutoff); } double prefix_weight; @@ -199,6 +198,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1) -> CachedJaroWinkler>; @@ -206,5 +206,6 @@ explicit CachedJaroWinkler(const Sentence1& s1_, template CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) -> CachedJaroWinkler>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/JaroWinkler_impl.hpp b/rapidfuzz/distance/JaroWinkler_impl.hpp index c8eb6575..a40f8af8 100644 --- a/rapidfuzz/distance/JaroWinkler_impl.hpp +++ b/rapidfuzz/distance/JaroWinkler_impl.hpp @@ -3,7 +3,8 @@ #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template double jaro_winkler_similarity(const Range& P, const Range& T, double prefix_weight, @@ -81,10 +82,11 @@ class JaroWinkler : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double prefix_weight, - double score_cutoff, [[maybe_unused]] double score_hint) + double score_cutoff, double) { return jaro_winkler_similarity(s1, s2, prefix_weight, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 1c4d946a..81bdf2f7 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -9,7 +9,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct FlaggedCharsWord { uint64_t P_flag; @@ -92,7 +93,11 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) template static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, - [[maybe_unused]] const Range& P, +#ifdef NDEBUG + const Range&, +#else + const Range& P, +#endif const Range& T, size_t Bound) { assert(P.size() <= 64); @@ -160,7 +165,7 @@ static inline void flag_similar_characters_step(const BlockPatternMatchVector& P if (T_j >= 0 && T_j < 256) { for (; word + 3 < last_word - 1; word += 4) { uint64_t PM_j[4]; - unroll([&](auto i) { + unroll([&](size_t i) { PM_j[i] = PM.get(word + i, static_cast(T_j)) & (~flagged.P_flag[word + i]); }); @@ -459,6 +464,11 @@ struct JaroSimilaritySimdBounds { template static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -466,7 +476,9 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng using namespace simd_sse2; # endif - [[maybe_unused]] static constexpr size_t alignment = native_simd::alignment; +# ifndef RAPIDFUZZ_AVX2 + static constexpr size_t alignment = native_simd::alignment; +# endif static constexpr size_t vec_width = native_simd::size; assert(s2.size() <= sizeof(VecType) * 8); @@ -526,6 +538,11 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng template static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -615,7 +632,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -629,7 +646,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -732,7 +749,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -745,7 +762,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -836,10 +853,11 @@ class Jaro : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) + double) { return jaro_similarity(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/LCSseq.hpp b/rapidfuzz/distance/LCSseq.hpp index 9082dc37..8891cf2c 100644 --- a/rapidfuzz/distance/LCSseq.hpp +++ b/rapidfuzz/distance/LCSseq.hpp @@ -65,13 +65,13 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d template Editops lcs_seq_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { - return detail::lcs_seq_editops(detail::Range(first1, last1), detail::Range(first2, last2)); + return detail::lcs_seq_editops(detail::make_range(first1, last1), detail::make_range(first2, last2)); } template Editops lcs_seq_editops(const Sentence1& s1, const Sentence2& s2) { - return detail::lcs_seq_editops(detail::Range(s1), detail::Range(s2)); + return detail::lcs_seq_editops(detail::make_range(s1), detail::make_range(s2)); } #ifdef RAPIDFUZZ_SIMD @@ -83,26 +83,26 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz friend detail::MultiSimilarityBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -164,14 +164,14 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::lcs_simd(scores_, PM, s2, score_cutoff); } @@ -202,7 +202,7 @@ struct CachedLCSseq {} template - CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -216,20 +216,21 @@ struct CachedLCSseq } template - size_t _similarity(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(const detail::Range& s2, size_t score_cutoff, size_t) const { - return detail::lcs_seq_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::lcs_seq_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedLCSseq(const Sentence1& s1_) -> CachedLCSseq>; template CachedLCSseq(InputIt1 first1, InputIt1 last1) -> CachedLCSseq>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index 20d10b1a..87242db6 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -13,7 +13,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct LCSseqResult; @@ -30,6 +31,20 @@ struct LCSseqResult { size_t sim; }; +template +LCSseqResult& getMatrixRef(LCSseqResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + /* * An encoded mbleven model table. * @@ -143,12 +158,12 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const size_t cur_vec = 0; for (; cur_vec + interleaveCount * vecs <= block.size(); cur_vec += interleaveCount * vecs) { std::array, interleaveCount> S; - unroll([&](auto j) { S[j] = static_cast(-1); }); + unroll([&](size_t j) { S[j] = static_cast(-1); }); for (const auto& ch : s2) { - unroll([&](auto j) { + unroll([&](size_t j) { alignas(32) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); native_simd Matches(stored.data()); native_simd u = S[j] & Matches; @@ -156,9 +171,9 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const }); } - unroll([&](auto j) { + unroll([&](size_t j) { auto counts = popcount(~S[j]); - unroll([&](auto i) { + unroll([&](size_t i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -170,7 +185,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const for (const auto& ch : s2) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd Matches(stored.data()); native_simd u = S & Matches; @@ -178,7 +193,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const } auto counts = popcount(~S); - unroll([&](auto i) { + unroll([&](size_t i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -195,7 +210,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& unroll([&](size_t i) { S[i] = ~UINT64_C(0); }); LCSseqResult res; - if constexpr (RecordMatrix) res.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + } auto iter_s2 = s2.begin(); for (size_t i = 0; i < s2.size(); ++i) { @@ -210,7 +228,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); } @@ -221,7 +242,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); iter_s2++; @@ -256,10 +280,11 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range res; - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = band_width_left + 1 + band_width_right; size_t full_band_words = std::min(words, full_band / word_size + 2); - res.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); } /* first_block is the index of the first block in Ukkonen band. */ @@ -270,7 +295,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S.set_offset(row, static_cast(first_block * word_size)); + } for (size_t word = first_block; word < last_block; ++word) { const uint64_t Matches = PM.get(word, *iter_s2); @@ -281,7 +309,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range band_width_right) first_block = (row - band_width_right) / word_size; @@ -353,8 +384,7 @@ size_t lcs_seq_similarity(const BlockPatternMatchVector& block, Range size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -386,8 +416,7 @@ size_t lcs_seq_similarity(Range s1, Range s2, size_t score_c size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -421,7 +450,9 @@ Editops recover_alignment(const Range& s1, const Range& s2, if (dist == 0) return editops; - [[maybe_unused]] size_t band_width_right = s2.size() - matrix.sim; +#ifndef NDEBUG + size_t band_width_right = s2.size() - matrix.sim; +#endif auto col = len1; auto row = len2; @@ -520,10 +551,11 @@ class LCSseq : public SimilarityBase static size_t _similarity(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + size_t) { return lcs_seq_similarity(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Levenshtein.hpp b/rapidfuzz/distance/Levenshtein.hpp index 3d61a212..45245fea 100644 --- a/rapidfuzz/distance/Levenshtein.hpp +++ b/rapidfuzz/distance/Levenshtein.hpp @@ -283,7 +283,7 @@ template Editops levenshtein_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(first1, last1), detail::Range(first2, last2), + return detail::levenshtein_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), score_hint); } @@ -291,7 +291,7 @@ template Editops levenshtein_editops(const Sentence1& s1, const Sentence2& s2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(s1), detail::Range(s2), score_hint); + return detail::levenshtein_editops(detail::make_range(s1), detail::make_range(s2), score_hint); } #ifdef RAPIDFUZZ_SIMD @@ -304,26 +304,26 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -387,14 +387,14 @@ struct MultiLevenshtein : public detail::MultiDistanceBase= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -428,7 +428,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) - : s1(first1, last1), PM(detail::Range(first1, last1)), weights(aWeights) + : s1(first1, last1), PM(detail::make_range(first1, last1)), weights(aWeights) {} private: @@ -454,7 +454,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase= weights.insert_cost + weights.delete_cost) { // max can make use of the common divisor of the three weights size_t new_max = detail::ceil_div(score_cutoff, weights.insert_cost); - size_t dist = detail::indel_distance(PM, detail::Range(s1), s2, new_max); + size_t dist = detail::indel_distance(PM, detail::make_range(s1), s2, new_max); dist *= weights.insert_cost; return (dist <= score_cutoff) ? dist : score_cutoff + 1; } } - return detail::generalized_levenshtein_distance(detail::Range(s1), s2, weights, score_cutoff); + return detail::generalized_levenshtein_distance(detail::make_range(s1), s2, weights, score_cutoff); } std::vector s1; @@ -481,6 +481,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = { 1, 1, 1}) -> CachedLevenshtein>; @@ -488,5 +489,6 @@ explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights template CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Levenshtein_impl.hpp b/rapidfuzz/distance/Levenshtein_impl.hpp index 38758413..991f10c9 100644 --- a/rapidfuzz/distance/Levenshtein_impl.hpp +++ b/rapidfuzz/distance/Levenshtein_impl.hpp @@ -14,7 +14,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct LevenshteinRow { uint64_t VP; @@ -53,6 +54,34 @@ struct LevenshteinResult { size_t dist; }; +template +LevenshteinResult& getMatrixRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + +template +LevenshteinResult& getBitRowRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordBitRow); + return reinterpret_cast&>(res); +#endif +} + template size_t generalized_levenshtein_wagner_fischer(const Range& s1, const Range& s2, LevenshteinWeightTable weights, size_t max) @@ -240,9 +269,10 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R LevenshteinResult res; res.dist = s1.size(); - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); } /* mask used when computing D[m,j] in the paper 10^(m-1) */ @@ -271,19 +301,21 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R VP = HN | ~(D0 | HP); VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } if (res.dist > max) res.dist = max + 1; - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = s2.size(); - res.vecs.emplace_back(VP, VN); + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = s2.size(); + res_.vecs.emplace_back(VP, VN); } return res; @@ -315,12 +347,12 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte native_simd VN(VecType(0)); alignas(alignment) std::array currDist_; - unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + unroll( + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -331,7 +363,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd X(stored.data()); auto D0 = (((X & VP) + VP) ^ VP) | X | VN; @@ -355,7 +387,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -363,7 +395,9 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (std::numeric_limits::max() < std::numeric_limits::max()) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -484,14 +518,15 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range res; res.dist = max; - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); ptrdiff_t start_offset = static_cast(max) + 2 - 64; for (size_t i = 0; i < s2.size(); ++i) { - res.VP.set_offset(i, start_offset + static_cast(i)); - res.VN.set_offset(i, start_offset + static_cast(i)); + res_.VP.set_offset(i, start_offset + static_cast(i)); + res_.VN.set_offset(i, start_offset + static_cast(i)); } } @@ -545,9 +580,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -587,9 +623,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -624,17 +661,19 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range scores[words - 1] = s1.size(); - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = std::min(s1.size(), 2 * max + 1); size_t full_band_words = std::min(words, full_band / word_size + 2); - res.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); + res_.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); } - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = 0; + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = 0; } max = std::min(max, std::max(s1.size(), s2.size())); @@ -651,9 +690,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range uint64_t HP_carry = 1; uint64_t HN_carry = 0; - if constexpr (RecordMatrix) { - res.VP.set_offset(row, static_cast(first_block * word_size)); - res.VN.set_offset(row, static_cast(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP.set_offset(row, static_cast(first_block * word_size)); + res_.VN.set_offset(row, static_cast(first_block * word_size)); } auto advance_block = [&](size_t word) { @@ -687,9 +727,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range vecs[word].VP = HN | ~(D0 | HP); vecs[word].VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[row][word - first_block] = vecs[word].VP; - res.VN[row][word - first_block] = vecs[word].VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[row][word - first_block] = vecs[word].VP; + res_.VN[row][word - first_block] = vecs[word].VN; } return static_cast(HP_carry) - static_cast(HN_carry); @@ -775,26 +816,27 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range return res; } - if constexpr (RecordBitRow) { + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { if (row == stop_row) { + auto& res_ = getBitRowRef(res); if (first_block == 0) - res.prev_score = stop_row + 1; + res_.prev_score = stop_row + 1; else { /* count backwards to find score at last position in previous block */ size_t relevant_bits = std::min((first_block + 1) * 64, s1.size()) % 64; uint64_t mask = ~UINT64_C(0); if (relevant_bits) mask >>= 64 - relevant_bits; - res.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - - popcount(vecs[first_block].VP & mask); + res_.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - + popcount(vecs[first_block].VP & mask); } - res.first_block = first_block; - res.last_block = last_block; - res.vecs = std::move(vecs); + res_.first_block = first_block; + res_.last_block = last_block; + res_.vecs = std::move(vecs); /* unknown so make sure it is <= max */ - res.dist = 0; + res_.dist = 0; return res; } } @@ -816,7 +858,7 @@ size_t uniform_levenshtein_distance(const BlockPatternMatchVector& block, Range< if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; if (score_cutoff < abs_diff(s1.size(), s2.size())) return score_cutoff + 1; @@ -874,7 +916,7 @@ size_t uniform_levenshtein_distance(Range s1, Range s2, size if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; // at least length difference insertions/deletions required if (score_cutoff < (s1.size() - s2.size())) return score_cutoff + 1; @@ -1131,9 +1173,6 @@ HirschbergPos find_hirschberg_pos(const Range& s1, const Range= 0); - assert(hpos.right_score >= 0); - if (hpos.left_score + hpos.right_score > max) return find_hirschberg_pos(s1, s2, max * 2); else { @@ -1217,4 +1256,5 @@ Editops levenshtein_editops(const Range& s1, const Range& s2 return editops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/OSA.hpp b/rapidfuzz/distance/OSA.hpp index 0e56eadd..ed9d16d0 100644 --- a/rapidfuzz/distance/OSA.hpp +++ b/rapidfuzz/distance/OSA.hpp @@ -119,26 +119,26 @@ struct MultiOSA friend detail::MultiDistanceBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -199,14 +199,14 @@ struct MultiOSA if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -237,7 +237,7 @@ struct CachedOSA {} template - CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -251,8 +251,7 @@ struct CachedOSA } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { size_t res; if (s1.empty()) @@ -260,9 +259,9 @@ struct CachedOSA else if (s2.empty()) res = s1.size(); else if (s1.size() < 64) - res = detail::osa_hyrroe2003(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003(PM, detail::make_range(s1), s2, score_cutoff); else - res = detail::osa_hyrroe2003_block(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003_block(PM, detail::make_range(s1), s2, score_cutoff); return (res <= score_cutoff) ? res : score_cutoff + 1; } @@ -271,11 +270,13 @@ struct CachedOSA detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedOSA(const Sentence1& s1_) -> CachedOSA>; template CachedOSA(InputIt1 first1, InputIt1 last1) -> CachedOSA>; +#endif /**@}*/ } // namespace rapidfuzz diff --git a/rapidfuzz/distance/OSA_impl.hpp b/rapidfuzz/distance/OSA_impl.hpp index a4707dd0..8fa73289 100644 --- a/rapidfuzz/distance/OSA_impl.hpp +++ b/rapidfuzz/distance/OSA_impl.hpp @@ -10,7 +10,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /** * @brief Bitparallel implementation of the OSA distance. @@ -100,12 +101,12 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV native_simd PM_j_old(VecType(0)); alignas(alignment) std::array currDist_; - unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + unroll( + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -116,7 +117,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd PM_j(stored.data()); auto TR = (andnot(PM_j, D0) << 1) & PM_j_old; @@ -143,7 +144,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -151,7 +152,9 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (std::numeric_limits::max() < std::numeric_limits::max()) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -270,4 +273,5 @@ class OSA : public DistanceBase::ma } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/distance/Postfix.hpp b/rapidfuzz/distance/Postfix.hpp index 0da830f7..57320a5f 100644 --- a/rapidfuzz/distance/Postfix.hpp +++ b/rapidfuzz/distance/Postfix.hpp @@ -85,8 +85,7 @@ struct CachedPostfix : public detail::CachedSimilarityBase } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t score_hint) const { return detail::Postfix::similarity(s1, s2, score_cutoff, score_hint); } @@ -94,12 +93,13 @@ struct CachedPostfix : public detail::CachedSimilarityBase std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPostfix(const Sentence1& s1_) -> CachedPostfix>; template CachedPostfix(InputIt1 first1, InputIt1 last1) -> CachedPostfix>; - +#endif /**@}*/ } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Postfix_impl.hpp b/rapidfuzz/distance/Postfix_impl.hpp index 0be3abf6..87eb6ad2 100644 --- a/rapidfuzz/distance/Postfix_impl.hpp +++ b/rapidfuzz/distance/Postfix_impl.hpp @@ -6,7 +6,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Postfix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -19,12 +20,12 @@ class Postfix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_suffix(s1, s2); return (dist >= score_cutoff) ? dist : 0; } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Prefix.hpp b/rapidfuzz/distance/Prefix.hpp index 64173dc7..d3123950 100644 --- a/rapidfuzz/distance/Prefix.hpp +++ b/rapidfuzz/distance/Prefix.hpp @@ -84,8 +84,7 @@ struct CachedPrefix : public detail::CachedSimilarityBase, } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t) const { return detail::Prefix::similarity(s1, s2, score_cutoff, score_cutoff); } @@ -93,11 +92,13 @@ struct CachedPrefix : public detail::CachedSimilarityBase, std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPrefix(const Sentence1& s1_) -> CachedPrefix>; template CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix>; +#endif /**@}*/ diff --git a/rapidfuzz/distance/Prefix_impl.hpp b/rapidfuzz/distance/Prefix_impl.hpp index 41ab1f69..fac93c49 100644 --- a/rapidfuzz/distance/Prefix_impl.hpp +++ b/rapidfuzz/distance/Prefix_impl.hpp @@ -6,7 +6,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Prefix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -19,12 +20,12 @@ class Prefix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_prefix(s1, s2); return (dist >= score_cutoff) ? dist : 0; } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/fuzz.hpp b/rapidfuzz/fuzz.hpp index d303722c..de49be9f 100644 --- a/rapidfuzz/fuzz.hpp +++ b/rapidfuzz/fuzz.hpp @@ -8,7 +8,8 @@ #include #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /** * @defgroup Fuzz Fuzz @@ -75,7 +76,7 @@ struct MultiRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template @@ -116,11 +117,13 @@ struct CachedRatio { CachedIndel cached_indel; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedRatio(const Sentence1& s1) -> CachedRatio>; template CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio>; +#endif template ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2, @@ -189,11 +192,13 @@ struct CachedPartialRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialRatio(const Sentence1& s1) -> CachedPartialRatio>; template CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::ratio between @@ -298,11 +303,13 @@ struct CachedTokenSortRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSortRatio(const Sentence1& s1) -> CachedTokenSortRatio>; template CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::partial_ratio @@ -356,6 +363,7 @@ struct CachedPartialTokenSortRatio { CachedPartialRatio cached_partial_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSortRatio(const Sentence1& s1) -> CachedPartialTokenSortRatio>; @@ -363,6 +371,7 @@ explicit CachedPartialTokenSortRatio(const Sentence1& s1) template CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSortRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -424,11 +433,13 @@ struct CachedTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSetRatio(const Sentence1& s1) -> CachedTokenSetRatio>; template CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -481,12 +492,14 @@ struct CachedPartialTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio>; template CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSetRatio>; +#endif /** * @brief Helper method that returns the maximum of fuzz::token_set_ratio and @@ -543,11 +556,13 @@ struct CachedTokenRatio { CachedRatio cached_ratio_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenRatio(const Sentence1& s1) -> CachedTokenRatio>; template CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio>; +#endif /** * @brief Helper method that returns the maximum of @@ -604,11 +619,13 @@ struct CachedPartialTokenRatio { std::vector s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenRatio(const Sentence1& s1) -> CachedPartialTokenRatio>; template CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio>; +#endif /** * @brief Calculates a weighted ratio based on the other ratio algorithms @@ -664,11 +681,13 @@ struct CachedWRatio { rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedWRatio(const Sentence1& s1) -> CachedWRatio>; template CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio>; +#endif /** * @brief Calculates a quick ratio between two strings using fuzz.ratio @@ -727,13 +746,13 @@ struct MultiQRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const { - rapidfuzz::detail::Range s2_(s2); + auto s2_ = detail::make_range(s2); if (s2_.empty()) { for (size_t i = 0; i < str_lens.size(); ++i) scores[i] = 0; @@ -776,14 +795,17 @@ struct CachedQRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedQRatio(const Sentence1& s1) -> CachedQRatio>; template CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio>; +#endif /**@}*/ -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz #include diff --git a/rapidfuzz/fuzz_impl.hpp b/rapidfuzz/fuzz_impl.hpp index 1d2eb463..84be44bf 100644 --- a/rapidfuzz/fuzz_impl.hpp +++ b/rapidfuzz/fuzz_impl.hpp @@ -2,6 +2,7 @@ /* Copyright © 2021-present Max Bachmann */ /* Copyright © 2011 Adam Cohen */ +#include "rapidfuzz/details/Range.hpp" #include #include @@ -11,7 +12,8 @@ #include #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /********************************************** * ratio @@ -20,7 +22,7 @@ namespace rapidfuzz::fuzz { template double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff) { - return ratio(detail::Range(first1, last1), detail::Range(first2, last2), score_cutoff); + return ratio(detail::make_range(first1, last1), detail::make_range(first2, last2), score_cutoff); } template @@ -34,7 +36,7 @@ template double CachedRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double score_hint) const { - return similarity(detail::Range(first2, last2), score_cutoff, score_hint); + return similarity(detail::make_range(first2, last2), score_cutoff, score_hint); } template @@ -50,7 +52,7 @@ double CachedRatio::similarity(const Sentence2& s2, double score_cutoff, namespace fuzz_detail { -static constexpr double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) +static RAPIDFUZZ_CONSTEXPR_CXX14 double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) { double score = (lensum > 0) ? (100.0 - 100.0 * static_cast(dist) / static_cast(lensum)) : 100.0; @@ -90,8 +92,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(window.first); auto subseq2_first = s2.begin() + static_cast(window.second); - detail::Range subseq1(subseq1_first, subseq1_first + static_cast(len1)); - detail::Range subseq2(subseq2_first, subseq2_first + static_cast(len1)); + auto subseq1 = + detail::make_range(subseq1_first, subseq1_first + static_cast(len1)); + auto subseq2 = + detail::make_range(subseq2_first, subseq2_first + static_cast(len1)); if (scores[window.first] == std::numeric_limits::max()) { scores[window.first] = cached_ratio.cached_indel.distance(subseq1); @@ -145,7 +149,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i)); + auto subseq = rapidfuzz::detail::make_range(s2.begin(), s2.begin() + static_cast(i)); if (!s1_char_set.find(subseq.back())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -158,7 +162,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i), s2.end()); + auto subseq = rapidfuzz::detail::make_range(s2.begin() + static_cast(i), s2.end()); if (!s1_char_set.find(subseq.front())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -207,8 +211,8 @@ ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, if (!len1 || !len2) return ScoreAlignment(static_cast(len1 == len2) * 100.0, 0, len1, 0, len1); - auto s1 = detail::Range(first1, last1); - auto s2 = detail::Range(first2, last2); + auto s1 = detail::make_range(first1, last1); + auto s2 = detail::make_range(first2, last2); auto alignment = fuzz_detail::partial_ratio_impl(s1, s2, score_cutoff); if (alignment.score != 100 && s1.size() == s2.size()) { @@ -255,7 +259,7 @@ CachedPartialRatio::CachedPartialRatio(InputIt1 first1, InputIt1 last1) template template double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { size_t len1 = s1.size(); size_t len2 = static_cast(std::distance(first2, last2)); @@ -267,8 +271,8 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d if (!len1 || !len2) return static_cast(len1 == len2) * 100.0; - auto s1_ = detail::Range(s1); - auto s2 = detail::Range(first2, last2); + auto s1_ = detail::make_range(s1); + auto s2 = detail::make_range(first2, last2); double score = fuzz_detail::partial_ratio_impl(s1_, s2, cached_ratio, s1_char_set, score_cutoff).score; if (score != 100 && s1_.size() == s2.size()) { @@ -282,8 +286,7 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d template template -double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -310,7 +313,7 @@ double token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double score_c template template double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -319,8 +322,7 @@ double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -349,7 +351,7 @@ double partial_token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -358,8 +360,7 @@ double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -438,7 +439,7 @@ double token_set_ratio(const Sentence1& s1, const Sentence2& s2, double score_cu template template double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -447,8 +448,7 @@ double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -497,7 +497,7 @@ double partial_token_set_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -506,8 +506,7 @@ double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -649,8 +648,9 @@ double token_ratio(const std::vector& s1_sorted, double result = 0; auto s2_sorted = tokens_b.join(); if (s1_sorted.size() < 65) { - double norm_sim = detail::indel_normalized_similarity(blockmap_s1_sorted, detail::Range(s1_sorted), - detail::Range(s2_sorted), score_cutoff / 100); + double norm_sim = + detail::indel_normalized_similarity(blockmap_s1_sorted, detail::make_range(s1_sorted), + detail::make_range(s2_sorted), score_cutoff / 100); result = norm_sim * 100; } else { @@ -685,15 +685,14 @@ double token_ratio(const std::vector& s1_sorted, template template double CachedTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::token_ratio(s1_tokens, cached_ratio_s1_sorted, first2, last2, score_cutoff); } template template -double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -771,15 +770,14 @@ double partial_token_ratio(const std::vector& s1_sorted, template template double CachedPartialTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::partial_token_ratio(s1_sorted, tokens_s1, first2, last2, score_cutoff); } template template -double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -837,13 +835,12 @@ CachedWRatio::CachedWRatio(InputIt1 first1, InputIt1 last1) cached_partial_ratio(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))), s1_sorted(tokens_s1.join()), - blockmap_s1_sorted(detail::Range(s1_sorted)) + blockmap_s1_sorted(detail::make_range(s1_sorted)) {} template template -double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { if (score_cutoff > 100) return 0; @@ -882,8 +879,7 @@ double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -914,8 +910,7 @@ double QRatio(const Sentence1& s1, const Sentence2& s2, double score_cutoff) template template -double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { auto len2 = std::distance(first2, last2); @@ -928,10 +923,10 @@ double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz diff --git a/rapidfuzz_reference/JaroWinkler.hpp b/rapidfuzz_reference/JaroWinkler.hpp index 3b717d8e..b2447ebb 100644 --- a/rapidfuzz_reference/JaroWinkler.hpp +++ b/rapidfuzz_reference/JaroWinkler.hpp @@ -7,7 +7,7 @@ namespace rapidfuzz_reference { template >> + typename = typename std::enable_if::value>::type> double jaro_winkler_similarity(InputIt1 P_first, InputIt1 P_last, InputIt2 T_first, InputIt2 T_last, double prefix_weight = 0.1, double score_cutoff = 0.0) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 784503d9..c91c3d71 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ -#find_package(Catch2 2 QUIET) +find_package(Catch2 2 QUIET) if (Catch2_FOUND) message("Using system supplied version of Catch2") else() diff --git a/test/common.hpp b/test/common.hpp index 427f0906..3e4d728c 100644 --- a/test/common.hpp +++ b/test/common.hpp @@ -49,7 +49,7 @@ class BidirectionalIterWrapper { return *this; } - const auto& operator*() const + const value_type& operator*() const { return *iter; } @@ -58,7 +58,13 @@ class BidirectionalIterWrapper { T iter; }; -template >> +template +constexpr auto make_bidir(Iter iter) -> BidirectionalIterWrapper +{ + return BidirectionalIterWrapper(iter); +} + +template ::value>> std::basic_string str_multiply(std::basic_string a, size_t b) { std::basic_string output; diff --git a/test/distance/tests-DamerauLevenshtein.cpp b/test/distance/tests-DamerauLevenshtein.cpp index 7a3b9648..98e61036 100644 --- a/test/distance/tests-DamerauLevenshtein.cpp +++ b/test/distance/tests-DamerauLevenshtein.cpp @@ -17,9 +17,8 @@ size_t damerau_levenshtein_distance(const Sentence1& s1, const Sentence2& s2, size_t res2 = rapidfuzz::experimental::damerau_levenshtein_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); size_t res3 = rapidfuzz::experimental::damerau_levenshtein_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -36,9 +35,8 @@ size_t damerau_levenshtein_similarity(const Sentence1& s1, const Sentence2& s2, size_t res2 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); size_t res3 = rapidfuzz::experimental::damerau_levenshtein_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -56,9 +54,9 @@ double damerau_levenshtein_normalized_distance(const Sentence1& s1, const Senten double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance( s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), + score_cutoff); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); @@ -76,9 +74,9 @@ double damerau_levenshtein_normalized_similarity(const Sentence1& s1, const Sent double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity( s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), + score_cutoff); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); diff --git a/test/distance/tests-Hamming.cpp b/test/distance/tests-Hamming.cpp index 45feeb4e..cb3b7195 100644 --- a/test/distance/tests-Hamming.cpp +++ b/test/distance/tests-Hamming.cpp @@ -4,6 +4,7 @@ #include #include "../common.hpp" +#include "rapidfuzz/details/type_traits.hpp" using Catch::Matchers::WithinAbs; @@ -13,10 +14,9 @@ size_t hamming_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::hamming_distance(s1, s2, max); size_t res2 = rapidfuzz::hamming_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::hamming_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedHamming scorer(s1); + size_t res3 = rapidfuzz::hamming_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedHamming> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -31,10 +31,9 @@ size_t hamming_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = { size_t res1 = rapidfuzz::hamming_similarity(s1, s2, max); size_t res2 = rapidfuzz::hamming_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::hamming_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedHamming scorer(s1); + size_t res3 = rapidfuzz::hamming_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedHamming> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -50,10 +49,10 @@ double hamming_normalized_distance(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::hamming_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::hamming_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::hamming_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedHamming scorer(s1); + double res3 = + rapidfuzz::hamming_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedHamming> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); @@ -69,10 +68,10 @@ double hamming_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d double res1 = rapidfuzz::hamming_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::hamming_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::hamming_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedHamming scorer(s1); + double res3 = + rapidfuzz::hamming_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedHamming> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); diff --git a/test/distance/tests-Indel.cpp b/test/distance/tests-Indel.cpp index 4d787a4e..d4204657 100644 --- a/test/distance/tests-Indel.cpp +++ b/test/distance/tests-Indel.cpp @@ -14,10 +14,9 @@ size_t indel_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::indel_distance(s1, s2, max); size_t res2 = rapidfuzz::indel_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::indel_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedIndel scorer(s1); + size_t res3 = rapidfuzz::indel_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedIndel> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -62,10 +61,9 @@ size_t indel_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = 0 { size_t res1 = rapidfuzz::indel_similarity(s1, s2, max); size_t res2 = rapidfuzz::indel_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::indel_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedIndel scorer(s1); + size_t res3 = rapidfuzz::indel_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedIndel> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -109,10 +107,10 @@ double indel_normalized_distance(const Sentence1& s1, const Sentence2& s2, doubl double res1 = rapidfuzz::indel_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::indel_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::indel_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedIndel scorer(s1); + double res3 = + rapidfuzz::indel_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedIndel> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); #ifdef RAPIDFUZZ_SIMD @@ -156,10 +154,10 @@ double indel_normalized_similarity(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::indel_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::indel_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::indel_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedIndel scorer(s1); + double res3 = + rapidfuzz::indel_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedIndel> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); #ifdef RAPIDFUZZ_SIMD diff --git a/test/distance/tests-Jaro.cpp b/test/distance/tests-Jaro.cpp index fb3d0823..1bfb7341 100644 --- a/test/distance/tests-Jaro.cpp +++ b/test/distance/tests-Jaro.cpp @@ -16,10 +16,10 @@ double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cu rapidfuzz::jaro_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); #if 0 // todo double res5 = rapidfuzz::jaro_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); + make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); #endif - rapidfuzz::CachedJaro scorer(s1); + rapidfuzz::CachedJaro> scorer(s1); double res6 = scorer.similarity(s2, score_cutoff); double res7 = scorer.similarity(s2.begin(), s2.end(), score_cutoff); double res8 = scorer.normalized_similarity(s2, score_cutoff); @@ -86,10 +86,10 @@ double jaro_distance(const Sentence1& s1, const Sentence2& s2, double score_cuto rapidfuzz::jaro_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); #if 0 // todo double res5 = rapidfuzz::jaro_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); + make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); #endif - rapidfuzz::CachedJaro scorer(s1); + rapidfuzz::CachedJaro> scorer(s1); double res6 = scorer.distance(s2, score_cutoff); double res7 = scorer.distance(s2.begin(), s2.end(), score_cutoff); double res8 = scorer.normalized_distance(s2, score_cutoff); diff --git a/test/distance/tests-JaroWinkler.cpp b/test/distance/tests-JaroWinkler.cpp index bccf3915..f7f5fcdd 100644 --- a/test/distance/tests-JaroWinkler.cpp +++ b/test/distance/tests-JaroWinkler.cpp @@ -16,7 +16,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double double res3 = rapidfuzz::jaro_winkler_normalized_similarity(s1, s2, prefix_weight, score_cutoff); double res4 = rapidfuzz::jaro_winkler_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), prefix_weight, score_cutoff); - rapidfuzz::CachedJaroWinkler scorer(s1, prefix_weight); + rapidfuzz::CachedJaroWinkler> scorer(s1, prefix_weight); double res5 = scorer.similarity(s2, score_cutoff); double res6 = scorer.similarity(s2.begin(), s2.end(), score_cutoff); double res7 = scorer.normalized_similarity(s2, score_cutoff); @@ -72,7 +72,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr double res3 = rapidfuzz::jaro_winkler_normalized_distance(s1, s2, prefix_weight, score_cutoff); double res4 = rapidfuzz::jaro_winkler_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), prefix_weight, score_cutoff); - rapidfuzz::CachedJaroWinkler scorer(s1, prefix_weight); + rapidfuzz::CachedJaroWinkler> scorer(s1, prefix_weight); double res5 = scorer.distance(s2, score_cutoff); double res6 = scorer.distance(s2.begin(), s2.end(), score_cutoff); double res7 = scorer.normalized_distance(s2, score_cutoff); diff --git a/test/distance/tests-LCSseq.cpp b/test/distance/tests-LCSseq.cpp index f8cdeab1..9510bd4b 100644 --- a/test/distance/tests-LCSseq.cpp +++ b/test/distance/tests-LCSseq.cpp @@ -14,10 +14,9 @@ size_t lcs_seq_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::lcs_seq_distance(s1, s2, max); size_t res2 = rapidfuzz::lcs_seq_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::lcs_seq_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedLCSseq scorer(s1); + size_t res3 = rapidfuzz::lcs_seq_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedLCSseq> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -62,10 +61,9 @@ size_t lcs_seq_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = { size_t res1 = rapidfuzz::lcs_seq_similarity(s1, s2, max); size_t res2 = rapidfuzz::lcs_seq_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::lcs_seq_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedLCSseq scorer(s1); + size_t res3 = rapidfuzz::lcs_seq_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedLCSseq> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -109,10 +107,10 @@ double lcs_seq_normalized_distance(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::lcs_seq_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::lcs_seq_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::lcs_seq_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedLCSseq scorer(s1); + double res3 = + rapidfuzz::lcs_seq_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedLCSseq> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); @@ -128,10 +126,10 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d double res1 = rapidfuzz::lcs_seq_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::lcs_seq_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::lcs_seq_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedLCSseq scorer(s1); + double res3 = + rapidfuzz::lcs_seq_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedLCSseq> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); diff --git a/test/distance/tests-Levenshtein.cpp b/test/distance/tests-Levenshtein.cpp index 69841ce3..d5037787 100644 --- a/test/distance/tests-Levenshtein.cpp +++ b/test/distance/tests-Levenshtein.cpp @@ -19,10 +19,9 @@ size_t levenshtein_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::levenshtein_distance(s1, s2, weights, max); size_t res2 = rapidfuzz::levenshtein_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), weights, max); - size_t res3 = rapidfuzz::levenshtein_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), weights, max); - rapidfuzz::CachedLevenshtein scorer(s1, weights); + size_t res3 = rapidfuzz::levenshtein_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), weights, max); + rapidfuzz::CachedLevenshtein> scorer(s1, weights); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -77,10 +76,10 @@ double levenshtein_normalized_similarity(const Sentence1& s1, const Sentence2& s double res1 = rapidfuzz::levenshtein_normalized_similarity(s1, s2, weights, score_cutoff); double res2 = rapidfuzz::levenshtein_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), weights, score_cutoff); - double res3 = rapidfuzz::levenshtein_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), weights, score_cutoff); - rapidfuzz::CachedLevenshtein scorer(s1, weights); + double res3 = rapidfuzz::levenshtein_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), + weights, score_cutoff); + rapidfuzz::CachedLevenshtein> scorer(s1, weights); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); @@ -244,8 +243,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = str_multiply(std::string("abb"), 2); std::string s2 = str_multiply(std::string("ccccca"), 2); - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 5); REQUIRE(hpos.right_score == 6); REQUIRE(hpos.s2_mid == 6); @@ -256,8 +255,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = str_multiply(std::string("abb"), 8 * 64); std::string s2 = str_multiply(std::string("ccccca"), 8 * 64); - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 1280); REQUIRE(hpos.right_score == 1281); REQUIRE(hpos.s2_mid == 1536); @@ -268,8 +267,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = "aaaa"; std::string s2 = "bbbbbbaaaa"; - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 5); REQUIRE(hpos.right_score == 1); REQUIRE(hpos.s2_mid == 5); @@ -350,12 +349,12 @@ TEST_CASE("Levenshtein small band") "LOTJKTie3OINeOTeJKWeOSeCGOdccNKLYemunmeJKWk"; rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(s2 == rapidfuzz::editops_apply_str(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } @@ -398,12 +397,12 @@ TEST_CASE("Levenshtein small band") "HXUJGDGOhccZ"; rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(s2 == rapidfuzz::editops_apply_str(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } } @@ -444,12 +443,12 @@ TEST_CASE("Levenshtein large band (ocr example)") std::vector s2 = get_subsequence(ocr_example2, 51, 6516); rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(s2 == rapidfuzz::editops_apply_vec(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } diff --git a/test/distance/tests-OSA.cpp b/test/distance/tests-OSA.cpp index a5f7147b..22c48a3c 100644 --- a/test/distance/tests-OSA.cpp +++ b/test/distance/tests-OSA.cpp @@ -10,10 +10,9 @@ size_t osa_distance(const Sentence1& s1, const Sentence2& s2, size_t max = std:: { size_t res1 = rapidfuzz::osa_distance(s1, s2, max); size_t res2 = rapidfuzz::osa_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::osa_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedOSA scorer(s1); + size_t res3 = rapidfuzz::osa_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedOSA> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD diff --git a/test/tests-common.cpp b/test/tests-common.cpp index 0e66fe94..9882bc98 100644 --- a/test/tests-common.cpp +++ b/test/tests-common.cpp @@ -1,3 +1,4 @@ +#include "rapidfuzz/details/Range.hpp" #include #include @@ -8,28 +9,28 @@ TEST_CASE("remove affix") std::string s2 = "aaabbbbaaaaa"; { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); REQUIRE(rapidfuzz::detail::remove_common_prefix(s1_, s2_) == 2); - REQUIRE(s1_ == rapidfuzz::detail::Range("bbbbaaaa")); - REQUIRE(s2_ == rapidfuzz::detail::Range("abbbbaaaaa")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("bbbbaaaa")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("abbbbaaaaa")); } { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); REQUIRE(rapidfuzz::detail::remove_common_suffix(s1_, s2_) == 4); - REQUIRE(s1_ == rapidfuzz::detail::Range("aabbbb")); - REQUIRE(s2_ == rapidfuzz::detail::Range("aaabbbba")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("aabbbb")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("aaabbbba")); } { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); auto affix = rapidfuzz::detail::remove_common_affix(s1_, s2_); REQUIRE(affix.prefix_len == 2); REQUIRE(affix.suffix_len == 4); - REQUIRE(s1_ == rapidfuzz::detail::Range("bbbb")); - REQUIRE(s2_ == rapidfuzz::detail::Range("abbbba")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("bbbb")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("abbbba")); } }