From e91aa8ef8007f07a08eacb7ab82a13e9317fccfe Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Tue, 24 Dec 2024 23:07:07 +0100 Subject: [PATCH 01/18] hide if constexpr usage behind macro This improves the support for older C++ versions --- .clang-format | 2 + extras/rapidfuzz_amalgamated.hpp | 544 ++++++++++++++---------- fuzzing/CMakeLists.txt | 2 +- rapidfuzz/details/Range.hpp | 16 +- rapidfuzz/details/common.hpp | 10 +- rapidfuzz/details/config.hpp | 13 + rapidfuzz/details/distance.hpp | 61 ++- rapidfuzz/details/types.hpp | 1 + rapidfuzz/distance/LCSseq.hpp | 16 +- rapidfuzz/distance/LCSseq_impl.hpp | 44 +- rapidfuzz/distance/Levenshtein.hpp | 16 +- rapidfuzz/distance/Levenshtein_impl.hpp | 129 ++++-- rapidfuzz/distance/OSA.hpp | 16 +- rapidfuzz/distance/OSA_impl.hpp | 4 +- test/CMakeLists.txt | 2 +- 15 files changed, 541 insertions(+), 335 deletions(-) create mode 100644 rapidfuzz/details/config.hpp diff --git a/.clang-format b/.clang-format index ee0513ae..101b23e3 100644 --- a/.clang-format +++ b/.clang-format @@ -25,4 +25,6 @@ BraceWrapping: AllowAllConstructorInitializersOnNextLine: true ConstructorInitializerAllOnOneLineOrOnePerLine: true AllowShortCaseLabelsOnASingleLine: true +IfMacros: + - RAPIDFUZZ_IF_CONSTEXPR IndentPPDirectives: AfterHash diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 353088da..9e690a74 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,24 +1,24 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-14 13:57:57.746331 +// Generated: 2024-12-25 01:15:57.554308 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. // ---------------------------------------------------------- #ifndef RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED -#define RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED +# define RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED -#include +# include -#include -#include -#include -#include +# include +# include +# include +# include -#include -#include -#include +# include +# include +# include namespace rapidfuzz::detail { @@ -215,11 +215,11 @@ struct HybridGrowingHashmap { } // namespace rapidfuzz::detail -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include namespace rapidfuzz::detail { @@ -411,27 +411,27 @@ struct ShiftedBitMatrix { } // namespace rapidfuzz::detail -#include -#include -#include -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include +# include +# include namespace rapidfuzz::detail { static inline void assume(bool b) { -#if defined(__clang__) +# if defined(__clang__) __builtin_assume(b); -#elif defined(__GNUC__) || defined(__GNUG__) +# elif defined(__GNUC__) || defined(__GNUG__) if (!b) __builtin_unreachable(); -#elif defined(_MSC_VER) +# elif defined(_MSC_VER) __assume(b); -#endif +# endif } template @@ -536,24 +536,12 @@ class Range { constexpr void remove_prefix(size_t n) { - if constexpr (std::is_base_of_v::iterator_category>) - _first += static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _first++; - + std::advance(_first, static_cast(n)); _size -= n; } constexpr void remove_suffix(size_t n) { - if constexpr (std::is_base_of_v::iterator_category>) - _last -= static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _last--; - + std::advance(_last, -static_cast(n)); _size -= n; } @@ -637,14 +625,24 @@ using RangeVec = std::vector>; } // namespace rapidfuzz::detail -#include +# include -#include +# include -#include -#include -#include -#include +# include + +# if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1 +# define RAPIDFUZZ_IF_CONSTEXPR if constexpr +# else +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if +# endif + +# include +# include +# include namespace rapidfuzz { @@ -1233,8 +1231,8 @@ inline bool operator==(const ScoreAlignment& a, const ScoreAlignment& b) } // namespace rapidfuzz -#include -#include +# include +# include namespace rapidfuzz { @@ -1362,16 +1360,16 @@ auto SplittedSentenceView::join() const -> std::vector } // namespace rapidfuzz::detail -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include -#if defined(_MSC_VER) && !defined(__clang__) -# include -#endif +# if defined(_MSC_VER) && !defined(__clang__) +# include +# endif namespace rapidfuzz::detail { @@ -1462,15 +1460,15 @@ constexpr T rotl(T x, unsigned int n) assert(n < num_bits); unsigned int count_mask = num_bits - 1; -#if _MSC_VER && !defined(__clang__) -# pragma warning(push) +# if _MSC_VER && !defined(__clang__) +# pragma warning(push) /* unary minus operator applied to unsigned type, result still unsigned */ -# pragma warning(disable : 4146) -#endif +# pragma warning(disable : 4146) +# endif return (x << n) | (x >> (-n & count_mask)); -#if _MSC_VER && !defined(__clang__) -# pragma warning(pop) -#endif +# if _MSC_VER && !defined(__clang__) +# pragma warning(pop) +# endif } /** @@ -1479,15 +1477,15 @@ constexpr T rotl(T x, unsigned int n) template constexpr T blsi(T a) { -#if _MSC_VER && !defined(__clang__) -# pragma warning(push) +# if _MSC_VER && !defined(__clang__) +# pragma warning(push) /* unary minus operator applied to unsigned type, result still unsigned */ -# pragma warning(disable : 4146) -#endif +# pragma warning(disable : 4146) +# endif return a & -a; -#if _MSC_VER && !defined(__clang__) -# pragma warning(pop) -#endif +# if _MSC_VER && !defined(__clang__) +# pragma warning(pop) +# endif } /** @@ -1509,7 +1507,7 @@ constexpr T blsmsk(T a) return a ^ (a - 1); } -#if defined(_MSC_VER) && !defined(__clang__) +# if defined(_MSC_VER) && !defined(__clang__) static inline unsigned int countr_zero(uint32_t x) { unsigned long trailing_zero = 0; @@ -1517,14 +1515,14 @@ static inline unsigned int countr_zero(uint32_t x) return trailing_zero; } -# if defined(_M_ARM) || defined(_M_X64) +# if defined(_M_ARM) || defined(_M_X64) static inline unsigned int countr_zero(uint64_t x) { unsigned long trailing_zero = 0; _BitScanForward64(&trailing_zero, x); return trailing_zero; } -# else +# else static inline unsigned int countr_zero(uint64_t x) { uint32_t msh = (uint32_t)(x >> 32); @@ -1532,9 +1530,9 @@ static inline unsigned int countr_zero(uint64_t x) if (lsh != 0) return countr_zero(lsh); return 32 + countr_zero(msh); } -# endif +# endif -#else /* gcc / clang */ +# else /* gcc / clang */ static inline unsigned int countr_zero(uint32_t x) { return static_cast(__builtin_ctz(x)); @@ -1544,7 +1542,7 @@ static inline unsigned int countr_zero(uint64_t x) { return static_cast(__builtin_ctzll(x)); } -#endif +# endif static inline unsigned int countr_zero(uint16_t x) { @@ -1570,9 +1568,9 @@ constexpr void unroll(F&& f) } // namespace rapidfuzz::detail -#if defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) -# include -#endif +# if defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) +# include +# endif namespace rapidfuzz::detail { @@ -1597,10 +1595,8 @@ static inline size_t abs_diff(size_t a, size_t b) template TO opt_static_cast(const FROM& value) { - if constexpr (std::is_same_v) - return value; - else - return static_cast(value); + /* calling the cast through this template function somehow avoids useless cast warnings */ + return static_cast(value); } /** @@ -1632,36 +1628,36 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last); static inline void* rf_aligned_alloc(size_t alignment, size_t size) { -#if defined(_WIN32) +# if defined(_WIN32) return _aligned_malloc(size, alignment); -#elif defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) +# elif defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) return _mm_malloc(size, alignment); -#elif defined(__ANDROID__) && __ANDROID_API__ > 16 +# elif defined(__ANDROID__) && __ANDROID_API__ > 16 void* ptr = nullptr; return posix_memalign(&ptr, alignment, size) ? nullptr : ptr; -#else +# else return aligned_alloc(alignment, size); -#endif +# endif } static inline void rf_aligned_free(void* ptr) { -#if defined(_WIN32) +# if defined(_WIN32) _aligned_free(ptr); -#elif defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) +# elif defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) _mm_free(ptr); -#else +# else free(ptr); -#endif +# endif } /**@}*/ } // namespace rapidfuzz::detail -#include -#include -#include +# include +# include +# include namespace rapidfuzz::detail { @@ -1829,20 +1825,20 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last) } // namespace rapidfuzz::detail -#include +# include /* RAPIDFUZZ_LTO_HACK is used to differentiate functions between different * translation units to avoid warnings when using lto */ -#ifndef RAPIDFUZZ_EXCLUDE_SIMD -# if __AVX2__ -# define RAPIDFUZZ_SIMD -# define RAPIDFUZZ_AVX2 -# define RAPIDFUZZ_LTO_HACK 0 +# ifndef RAPIDFUZZ_EXCLUDE_SIMD +# if __AVX2__ +# define RAPIDFUZZ_SIMD +# define RAPIDFUZZ_AVX2 +# define RAPIDFUZZ_LTO_HACK 0 -# include -# include -# include -# include +# include +# include +# include +# include namespace rapidfuzz { namespace detail { @@ -2482,15 +2478,15 @@ static inline native_simd sllv(const native_simd& a, } // namespace detail } // namespace rapidfuzz -# elif (defined(_M_AMD64) || defined(_M_X64)) || defined(__SSE2__) -# define RAPIDFUZZ_SIMD -# define RAPIDFUZZ_SSE2 -# define RAPIDFUZZ_LTO_HACK 1 +# elif (defined(_M_AMD64) || defined(_M_X64)) || defined(__SSE2__) +# define RAPIDFUZZ_SIMD +# define RAPIDFUZZ_SSE2 +# define RAPIDFUZZ_LTO_HACK 1 -# include -# include -# include -# include +# include +# include +# include +# include namespace rapidfuzz { namespace detail { @@ -3003,9 +2999,9 @@ static inline native_simd operator>=(const native_simd& a, const native_simd& b) noexcept { /* sse4.1 */ -# if 0 +# if 0 return _mm_cmpeq_epi16(_mm_max_epu16(a, b), a); // a == max(a,b) -# endif +# endif __m128i s = _mm_subs_epu16(b, a); // b-a, saturated return _mm_cmpeq_epi16(s, _mm_setzero_si128()); // s == 0 @@ -3020,9 +3016,9 @@ static inline native_simd operator>=(const native_simd& a, const native_simd& b) noexcept { /* sse4.1 */ -# if 0 +# if 0 return (Vec4ib)_mm_cmpeq_epi32(_mm_max_epu32(a, b), a); // a == max(a,b) -# endif +# endif return ~(b > a); } @@ -3085,9 +3081,9 @@ static inline native_simd operator<(const native_simd& a, const native_sim } // namespace detail } // namespace rapidfuzz +# endif # endif -#endif -#include +# include namespace rapidfuzz::detail { @@ -3260,11 +3256,21 @@ struct SimilarityBase : public NormalizedMetricBase { (maximum >= score_hint) ? maximum - score_hint : static_cast(WorstSimilarity); ResType sim = T::_similarity(s1, s2, std::forward(args)..., cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } SimilarityBase() @@ -3430,11 +3436,21 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType hint_similarity = (maximum > score_hint) ? maximum - score_hint : 0; ResType sim = derived._similarity(s2, cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } CachedSimilarityBase() @@ -3483,7 +3499,8 @@ struct MultiNormalizedMetricBase { // reinterpretation only works when the types have the same size ResType* scores_orig = nullptr; - if constexpr (sizeof(double) == sizeof(ResType)) + + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) == sizeof(ResType)) scores_orig = reinterpret_cast(scores); else scores_orig = new ResType[derived.result_count()]; @@ -3497,7 +3514,7 @@ struct MultiNormalizedMetricBase { scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0; } - if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig; + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) != sizeof(ResType)) delete[] scores_orig; } template @@ -3612,14 +3629,24 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { for (size_t i = 0; i < derived.get_input_count(); ++i) { ResType maximum = derived.maximum(i, s2); ResType dist = maximum - scores[i]; - - if constexpr (std::is_floating_point_v) - scores[i] = (dist <= score_cutoff) ? dist : 1.0; - else - scores[i] = (dist <= score_cutoff) ? dist : score_cutoff + 1; + scores[i] = _apply_distance_score_cutoff(dist, score_cutoff); } } + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; + } + MultiSimilarityBase() {} friend T; @@ -3902,9 +3929,9 @@ CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevens } // namespace experimental } // namespace rapidfuzz -#include +# include -#include +# include namespace rapidfuzz::detail { @@ -4124,11 +4151,11 @@ CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHammin } // namespace rapidfuzz -#include +# include -#include -#include -#include +# include +# include +# include namespace rapidfuzz::detail { @@ -4340,10 +4367,10 @@ struct BlockPatternMatchVector { } // namespace rapidfuzz::detail -#include +# include -#include -#include +# include +# include namespace rapidfuzz::detail { @@ -4362,6 +4389,20 @@ struct LCSseqResult { size_t sim; }; +template +LCSseqResult& getMatrixRef(LCSseqResult& res) +{ +# RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else +// this is a hack since the compiler doesn't know early enough that +// this is never called when the types differ. +// On C++17 this properly uses if constexpr +assert(RecordMatrix); +return reinterpret_cast&>(res); +#endif +} + /* * An encoded mbleven model table. * @@ -4527,7 +4568,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& unroll([&](size_t i) { S[i] = ~UINT64_C(0); }); LCSseqResult res; - if constexpr (RecordMatrix) res.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + } auto iter_s2 = s2.begin(); for (size_t i = 0; i < s2.size(); ++i) { @@ -4542,7 +4586,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); } @@ -4553,7 +4600,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); iter_s2++; @@ -4588,10 +4638,11 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range res; - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = band_width_left + 1 + band_width_right; size_t full_band_words = std::min(words, full_band / word_size + 2); - res.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); } /* first_block is the index of the first block in Ukkonen band. */ @@ -4602,7 +4653,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S.set_offset(row, static_cast(first_block * word_size)); + } for (size_t word = first_block; word < last_block; ++word) { const uint64_t Matches = PM.get(word, *iter_s2); @@ -4613,7 +4667,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range band_width_right) first_block = (row - band_width_right) / word_size; @@ -4946,13 +5003,13 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; static_assert(MaxLen <= 64); @@ -5021,13 +5078,13 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz throw std::invalid_argument("scores has to have >= result_count() elements"); detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::lcs_simd(scores_, PM, s2, score_cutoff); } @@ -6731,6 +6788,34 @@ struct LevenshteinResult { size_t dist; }; +template +LevenshteinResult& getMatrixRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + +template +LevenshteinResult& getBitRowRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordBitRow); + return reinterpret_cast&>(res); +#endif +} + template size_t generalized_levenshtein_wagner_fischer(const Range& s1, const Range& s2, LevenshteinWeightTable weights, size_t max) @@ -6918,9 +7003,10 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R LevenshteinResult res; res.dist = s1.size(); - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); } /* mask used when computing D[m,j] in the paper 10^(m-1) */ @@ -6949,19 +7035,21 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R VP = HN | ~(D0 | HP); VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } if (res.dist > max) res.dist = max + 1; - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = s2.size(); - res.vecs.emplace_back(VP, VN); + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = s2.size(); + res_.vecs.emplace_back(VP, VN); } return res; @@ -7041,7 +7129,9 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (std::numeric_limits::max() < std::numeric_limits::max()) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -7162,14 +7252,15 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range res; res.dist = max; - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); ptrdiff_t start_offset = static_cast(max) + 2 - 64; for (size_t i = 0; i < s2.size(); ++i) { - res.VP.set_offset(i, start_offset + static_cast(i)); - res.VN.set_offset(i, start_offset + static_cast(i)); + res_.VP.set_offset(i, start_offset + static_cast(i)); + res_.VN.set_offset(i, start_offset + static_cast(i)); } } @@ -7223,9 +7314,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -7265,9 +7357,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -7302,17 +7395,19 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range scores[words - 1] = s1.size(); - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = std::min(s1.size(), 2 * max + 1); size_t full_band_words = std::min(words, full_band / word_size + 2); - res.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); + res_.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); } - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = 0; + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = 0; } max = std::min(max, std::max(s1.size(), s2.size())); @@ -7329,9 +7424,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range uint64_t HP_carry = 1; uint64_t HN_carry = 0; - if constexpr (RecordMatrix) { - res.VP.set_offset(row, static_cast(first_block * word_size)); - res.VN.set_offset(row, static_cast(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP.set_offset(row, static_cast(first_block * word_size)); + res_.VN.set_offset(row, static_cast(first_block * word_size)); } auto advance_block = [&](size_t word) { @@ -7365,9 +7461,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range vecs[word].VP = HN | ~(D0 | HP); vecs[word].VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[row][word - first_block] = vecs[word].VP; - res.VN[row][word - first_block] = vecs[word].VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[row][word - first_block] = vecs[word].VP; + res_.VN[row][word - first_block] = vecs[word].VN; } return static_cast(HP_carry) - static_cast(HN_carry); @@ -7453,26 +7550,27 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range return res; } - if constexpr (RecordBitRow) { + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { if (row == stop_row) { + auto& res_ = getBitRowRef(res); if (first_block == 0) - res.prev_score = stop_row + 1; + res_.prev_score = stop_row + 1; else { /* count backwards to find score at last position in previous block */ size_t relevant_bits = std::min((first_block + 1) * 64, s1.size()) % 64; uint64_t mask = ~UINT64_C(0); if (relevant_bits) mask >>= 64 - relevant_bits; - res.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - - popcount(vecs[first_block].VP & mask); + res_.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - + popcount(vecs[first_block].VP & mask); } - res.first_block = first_block; - res.last_block = last_block; - res.vecs = std::move(vecs); + res_.first_block = first_block; + res_.last_block = last_block; + res_.vecs = std::move(vecs); /* unknown so make sure it is <= max */ - res.dist = 0; + res_.dist = 0; return res; } } @@ -8202,13 +8300,13 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; static_assert(MaxLen <= 64); @@ -8279,13 +8377,13 @@ struct MultiLevenshtein : public detail::MultiDistanceBase= result_count() elements"); detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -8527,7 +8625,9 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (std::numeric_limits::max() < std::numeric_limits::max()) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -8767,13 +8867,13 @@ struct MultiOSA # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; static_assert(MaxLen <= 64); @@ -8841,13 +8941,13 @@ struct MultiOSA throw std::invalid_argument("scores has to have >= result_count() elements"); detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } diff --git a/fuzzing/CMakeLists.txt b/fuzzing/CMakeLists.txt index b3b5efcd..4d7a3d38 100644 --- a/fuzzing/CMakeLists.txt +++ b/fuzzing/CMakeLists.txt @@ -1,6 +1,6 @@ function(create_fuzzer fuzzer) add_executable(fuzz_${fuzzer} fuzz_${fuzzer}.cpp) - target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_17) + target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_14) target_link_libraries(fuzz_${fuzzer} PRIVATE rapidfuzz::rapidfuzz) target_compile_options(fuzz_${fuzzer} PRIVATE -g -O1 -fsanitize=fuzzer,address -march=native) diff --git a/rapidfuzz/details/Range.hpp b/rapidfuzz/details/Range.hpp index d8ac443e..b9f744dc 100644 --- a/rapidfuzz/details/Range.hpp +++ b/rapidfuzz/details/Range.hpp @@ -128,24 +128,12 @@ class Range { constexpr void remove_prefix(size_t n) { - if constexpr (std::is_base_of_v::iterator_category>) - _first += static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _first++; - + std::advance(_first, static_cast(n)); _size -= n; } constexpr void remove_suffix(size_t n) { - if constexpr (std::is_base_of_v::iterator_category>) - _last -= static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _last--; - + std::advance(_last, -static_cast(n)); _size -= n; } diff --git a/rapidfuzz/details/common.hpp b/rapidfuzz/details/common.hpp index f157a5ae..d938958b 100644 --- a/rapidfuzz/details/common.hpp +++ b/rapidfuzz/details/common.hpp @@ -33,13 +33,11 @@ static inline size_t abs_diff(size_t a, size_t b) return a > b ? a - b : b - a; } -template -TO opt_static_cast(const FROM &value) +template +TO opt_static_cast(const FROM& value) { - if constexpr (std::is_same_v) - return value; - else - return static_cast(value); + /* calling the cast through this template function somehow avoids useless cast warnings */ + return static_cast(value); } /** diff --git a/rapidfuzz/details/config.hpp b/rapidfuzz/details/config.hpp new file mode 100644 index 00000000..1616d750 --- /dev/null +++ b/rapidfuzz/details/config.hpp @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2020 Max Bachmann */ + +#pragma once + +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1 +# define RAPIDFUZZ_IF_CONSTEXPR if constexpr +#else +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if +#endif diff --git a/rapidfuzz/details/distance.hpp b/rapidfuzz/details/distance.hpp index 263af7b2..837ad4fb 100644 --- a/rapidfuzz/details/distance.hpp +++ b/rapidfuzz/details/distance.hpp @@ -180,11 +180,21 @@ struct SimilarityBase : public NormalizedMetricBase { (maximum >= score_hint) ? maximum - score_hint : static_cast(WorstSimilarity); ResType sim = T::_similarity(s1, s2, std::forward(args)..., cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } SimilarityBase() @@ -350,11 +360,21 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType hint_similarity = (maximum > score_hint) ? maximum - score_hint : 0; ResType sim = derived._similarity(s2, cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } CachedSimilarityBase() @@ -403,7 +423,8 @@ struct MultiNormalizedMetricBase { // reinterpretation only works when the types have the same size ResType* scores_orig = nullptr; - if constexpr (sizeof(double) == sizeof(ResType)) + + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) == sizeof(ResType)) scores_orig = reinterpret_cast(scores); else scores_orig = new ResType[derived.result_count()]; @@ -417,7 +438,7 @@ struct MultiNormalizedMetricBase { scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0; } - if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig; + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) != sizeof(ResType)) delete[] scores_orig; } template @@ -532,14 +553,24 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { for (size_t i = 0; i < derived.get_input_count(); ++i) { ResType maximum = derived.maximum(i, s2); ResType dist = maximum - scores[i]; - - if constexpr (std::is_floating_point_v) - scores[i] = (dist <= score_cutoff) ? dist : 1.0; - else - scores[i] = (dist <= score_cutoff) ? dist : score_cutoff + 1; + scores[i] = _apply_distance_score_cutoff(dist, score_cutoff); } } + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static std::enable_if_t, U> _apply_distance_score_cutoff(U score, + U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; + } + MultiSimilarityBase() {} friend T; diff --git a/rapidfuzz/details/types.hpp b/rapidfuzz/details/types.hpp index ac3c4559..8fec47e9 100644 --- a/rapidfuzz/details/types.hpp +++ b/rapidfuzz/details/types.hpp @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include diff --git a/rapidfuzz/distance/LCSseq.hpp b/rapidfuzz/distance/LCSseq.hpp index 9082dc37..5ec2e77f 100644 --- a/rapidfuzz/distance/LCSseq.hpp +++ b/rapidfuzz/distance/LCSseq.hpp @@ -90,13 +90,13 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; static_assert(MaxLen <= 64); @@ -165,13 +165,13 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz throw std::invalid_argument("scores has to have >= result_count() elements"); detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::lcs_simd(scores_, PM, s2, score_cutoff); } diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index 20d10b1a..98f6fe48 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -30,6 +30,20 @@ struct LCSseqResult { size_t sim; }; +template +LCSseqResult& getMatrixRef(LCSseqResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + /* * An encoded mbleven model table. * @@ -195,7 +209,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& unroll([&](size_t i) { S[i] = ~UINT64_C(0); }); LCSseqResult res; - if constexpr (RecordMatrix) res.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + } auto iter_s2 = s2.begin(); for (size_t i = 0; i < s2.size(); ++i) { @@ -210,7 +227,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); } @@ -221,7 +241,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); iter_s2++; @@ -256,10 +279,11 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range res; - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = band_width_left + 1 + band_width_right; size_t full_band_words = std::min(words, full_band / word_size + 2); - res.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); } /* first_block is the index of the first block in Ukkonen band. */ @@ -270,7 +294,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S.set_offset(row, static_cast(first_block * word_size)); + } for (size_t word = first_block; word < last_block; ++word) { const uint64_t Matches = PM.get(word, *iter_s2); @@ -281,7 +308,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range band_width_right) first_block = (row - band_width_right) / word_size; diff --git a/rapidfuzz/distance/Levenshtein.hpp b/rapidfuzz/distance/Levenshtein.hpp index 3d61a212..9d40966b 100644 --- a/rapidfuzz/distance/Levenshtein.hpp +++ b/rapidfuzz/distance/Levenshtein.hpp @@ -311,13 +311,13 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; static_assert(MaxLen <= 64); @@ -388,13 +388,13 @@ struct MultiLevenshtein : public detail::MultiDistanceBase= result_count() elements"); detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } diff --git a/rapidfuzz/distance/Levenshtein_impl.hpp b/rapidfuzz/distance/Levenshtein_impl.hpp index 38758413..0e772d23 100644 --- a/rapidfuzz/distance/Levenshtein_impl.hpp +++ b/rapidfuzz/distance/Levenshtein_impl.hpp @@ -53,6 +53,34 @@ struct LevenshteinResult { size_t dist; }; +template +LevenshteinResult& getMatrixRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + +template +LevenshteinResult& getBitRowRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordBitRow); + return reinterpret_cast&>(res); +#endif +} + template size_t generalized_levenshtein_wagner_fischer(const Range& s1, const Range& s2, LevenshteinWeightTable weights, size_t max) @@ -240,9 +268,10 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R LevenshteinResult res; res.dist = s1.size(); - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); } /* mask used when computing D[m,j] in the paper 10^(m-1) */ @@ -271,19 +300,21 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R VP = HN | ~(D0 | HP); VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } if (res.dist > max) res.dist = max + 1; - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = s2.size(); - res.vecs.emplace_back(VP, VN); + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = s2.size(); + res_.vecs.emplace_back(VP, VN); } return res; @@ -363,7 +394,9 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (std::numeric_limits::max() < std::numeric_limits::max()) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -484,14 +517,15 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range res; res.dist = max; - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); ptrdiff_t start_offset = static_cast(max) + 2 - 64; for (size_t i = 0; i < s2.size(); ++i) { - res.VP.set_offset(i, start_offset + static_cast(i)); - res.VN.set_offset(i, start_offset + static_cast(i)); + res_.VP.set_offset(i, start_offset + static_cast(i)); + res_.VN.set_offset(i, start_offset + static_cast(i)); } } @@ -545,9 +579,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -587,9 +622,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -624,17 +660,19 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range scores[words - 1] = s1.size(); - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = std::min(s1.size(), 2 * max + 1); size_t full_band_words = std::min(words, full_band / word_size + 2); - res.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); + res_.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); } - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = 0; + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = 0; } max = std::min(max, std::max(s1.size(), s2.size())); @@ -651,9 +689,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range uint64_t HP_carry = 1; uint64_t HN_carry = 0; - if constexpr (RecordMatrix) { - res.VP.set_offset(row, static_cast(first_block * word_size)); - res.VN.set_offset(row, static_cast(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP.set_offset(row, static_cast(first_block * word_size)); + res_.VN.set_offset(row, static_cast(first_block * word_size)); } auto advance_block = [&](size_t word) { @@ -687,9 +726,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range vecs[word].VP = HN | ~(D0 | HP); vecs[word].VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[row][word - first_block] = vecs[word].VP; - res.VN[row][word - first_block] = vecs[word].VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[row][word - first_block] = vecs[word].VP; + res_.VN[row][word - first_block] = vecs[word].VN; } return static_cast(HP_carry) - static_cast(HN_carry); @@ -775,26 +815,27 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range return res; } - if constexpr (RecordBitRow) { + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { if (row == stop_row) { + auto& res_ = getBitRowRef(res); if (first_block == 0) - res.prev_score = stop_row + 1; + res_.prev_score = stop_row + 1; else { /* count backwards to find score at last position in previous block */ size_t relevant_bits = std::min((first_block + 1) * 64, s1.size()) % 64; uint64_t mask = ~UINT64_C(0); if (relevant_bits) mask >>= 64 - relevant_bits; - res.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - - popcount(vecs[first_block].VP & mask); + res_.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - + popcount(vecs[first_block].VP & mask); } - res.first_block = first_block; - res.last_block = last_block; - res.vecs = std::move(vecs); + res_.first_block = first_block; + res_.last_block = last_block; + res_.vecs = std::move(vecs); /* unknown so make sure it is <= max */ - res.dist = 0; + res_.dist = 0; return res; } } diff --git a/rapidfuzz/distance/OSA.hpp b/rapidfuzz/distance/OSA.hpp index 0e56eadd..61ec0370 100644 --- a/rapidfuzz/distance/OSA.hpp +++ b/rapidfuzz/distance/OSA.hpp @@ -126,13 +126,13 @@ struct MultiOSA # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; static_assert(MaxLen <= 64); @@ -200,13 +200,13 @@ struct MultiOSA throw std::invalid_argument("scores has to have >= result_count() elements"); detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } diff --git a/rapidfuzz/distance/OSA_impl.hpp b/rapidfuzz/distance/OSA_impl.hpp index a4707dd0..635122db 100644 --- a/rapidfuzz/distance/OSA_impl.hpp +++ b/rapidfuzz/distance/OSA_impl.hpp @@ -151,7 +151,9 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (std::numeric_limits::max() < std::numeric_limits::max()) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 784503d9..c91c3d71 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ -#find_package(Catch2 2 QUIET) +find_package(Catch2 2 QUIET) if (Catch2_FOUND) message("Using system supplied version of Catch2") else() From 93603d55427d1686800921173a674bda937afa67 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 01:29:47 +0100 Subject: [PATCH 02/18] guard template deduction guides --- CMakeLists.txt | 2 +- extras/rapidfuzz_amalgamated.hpp | 295 ++++++++++++---------- rapidfuzz/distance/DamerauLevenshtein.hpp | 2 + rapidfuzz/distance/Hamming.hpp | 2 + rapidfuzz/distance/Indel.hpp | 2 + rapidfuzz/distance/Jaro.hpp | 2 + rapidfuzz/distance/JaroWinkler.hpp | 2 + rapidfuzz/distance/LCSseq.hpp | 2 + rapidfuzz/distance/Levenshtein.hpp | 2 + rapidfuzz/distance/OSA.hpp | 2 + rapidfuzz/distance/Postfix.hpp | 3 +- rapidfuzz/distance/Prefix.hpp | 2 + rapidfuzz/fuzz.hpp | 20 ++ 13 files changed, 208 insertions(+), 130 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c1cead8..1a9f422c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ add_library(rapidfuzz INTERFACE) # provide a namespaced alias for clients to 'link' against if RapidFuzz is included as a sub-project add_library(rapidfuzz::rapidfuzz ALIAS rapidfuzz) -target_compile_features(rapidfuzz INTERFACE cxx_std_17) +target_compile_features(rapidfuzz INTERFACE cxx_std_14) target_include_directories(rapidfuzz INTERFACE diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 9e690a74..69f7ff85 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,24 +1,24 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 01:15:57.554308 +// Generated: 2024-12-25 01:28:38.014689 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. // ---------------------------------------------------------- #ifndef RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED -# define RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED +#define RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED -# include +#include -# include -# include -# include -# include +#include +#include +#include +#include -# include -# include -# include +#include +#include +#include namespace rapidfuzz::detail { @@ -215,11 +215,11 @@ struct HybridGrowingHashmap { } // namespace rapidfuzz::detail -# include -# include -# include -# include -# include +#include +#include +#include +#include +#include namespace rapidfuzz::detail { @@ -411,27 +411,27 @@ struct ShiftedBitMatrix { } // namespace rapidfuzz::detail -# include -# include -# include -# include -# include -# include -# include -# include -# include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace rapidfuzz::detail { static inline void assume(bool b) { -# if defined(__clang__) +#if defined(__clang__) __builtin_assume(b); -# elif defined(__GNUC__) || defined(__GNUG__) +#elif defined(__GNUC__) || defined(__GNUG__) if (!b) __builtin_unreachable(); -# elif defined(_MSC_VER) +#elif defined(_MSC_VER) __assume(b); -# endif +#endif } template @@ -625,24 +625,24 @@ using RangeVec = std::vector>; } // namespace rapidfuzz::detail -# include +#include -# include +#include -# include +#include -# if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) -# define RAPIDFUZZ_DEDUCTION_GUIDES -# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1 -# define RAPIDFUZZ_IF_CONSTEXPR if constexpr -# else -# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 -# define RAPIDFUZZ_IF_CONSTEXPR if -# endif +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1 +# define RAPIDFUZZ_IF_CONSTEXPR if constexpr +#else +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if +#endif -# include -# include -# include +#include +#include +#include namespace rapidfuzz { @@ -1231,8 +1231,8 @@ inline bool operator==(const ScoreAlignment& a, const ScoreAlignment& b) } // namespace rapidfuzz -# include -# include +#include +#include namespace rapidfuzz { @@ -1360,16 +1360,16 @@ auto SplittedSentenceView::join() const -> std::vector } // namespace rapidfuzz::detail -# include -# include -# include -# include -# include -# include +#include +#include +#include +#include +#include +#include -# if defined(_MSC_VER) && !defined(__clang__) -# include -# endif +#if defined(_MSC_VER) && !defined(__clang__) +# include +#endif namespace rapidfuzz::detail { @@ -1460,15 +1460,15 @@ constexpr T rotl(T x, unsigned int n) assert(n < num_bits); unsigned int count_mask = num_bits - 1; -# if _MSC_VER && !defined(__clang__) -# pragma warning(push) +#if _MSC_VER && !defined(__clang__) +# pragma warning(push) /* unary minus operator applied to unsigned type, result still unsigned */ -# pragma warning(disable : 4146) -# endif +# pragma warning(disable : 4146) +#endif return (x << n) | (x >> (-n & count_mask)); -# if _MSC_VER && !defined(__clang__) -# pragma warning(pop) -# endif +#if _MSC_VER && !defined(__clang__) +# pragma warning(pop) +#endif } /** @@ -1477,15 +1477,15 @@ constexpr T rotl(T x, unsigned int n) template constexpr T blsi(T a) { -# if _MSC_VER && !defined(__clang__) -# pragma warning(push) +#if _MSC_VER && !defined(__clang__) +# pragma warning(push) /* unary minus operator applied to unsigned type, result still unsigned */ -# pragma warning(disable : 4146) -# endif +# pragma warning(disable : 4146) +#endif return a & -a; -# if _MSC_VER && !defined(__clang__) -# pragma warning(pop) -# endif +#if _MSC_VER && !defined(__clang__) +# pragma warning(pop) +#endif } /** @@ -1507,7 +1507,7 @@ constexpr T blsmsk(T a) return a ^ (a - 1); } -# if defined(_MSC_VER) && !defined(__clang__) +#if defined(_MSC_VER) && !defined(__clang__) static inline unsigned int countr_zero(uint32_t x) { unsigned long trailing_zero = 0; @@ -1515,14 +1515,14 @@ static inline unsigned int countr_zero(uint32_t x) return trailing_zero; } -# if defined(_M_ARM) || defined(_M_X64) +# if defined(_M_ARM) || defined(_M_X64) static inline unsigned int countr_zero(uint64_t x) { unsigned long trailing_zero = 0; _BitScanForward64(&trailing_zero, x); return trailing_zero; } -# else +# else static inline unsigned int countr_zero(uint64_t x) { uint32_t msh = (uint32_t)(x >> 32); @@ -1530,9 +1530,9 @@ static inline unsigned int countr_zero(uint64_t x) if (lsh != 0) return countr_zero(lsh); return 32 + countr_zero(msh); } -# endif +# endif -# else /* gcc / clang */ +#else /* gcc / clang */ static inline unsigned int countr_zero(uint32_t x) { return static_cast(__builtin_ctz(x)); @@ -1542,7 +1542,7 @@ static inline unsigned int countr_zero(uint64_t x) { return static_cast(__builtin_ctzll(x)); } -# endif +#endif static inline unsigned int countr_zero(uint16_t x) { @@ -1568,9 +1568,9 @@ constexpr void unroll(F&& f) } // namespace rapidfuzz::detail -# if defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) -# include -# endif +#if defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) +# include +#endif namespace rapidfuzz::detail { @@ -1628,36 +1628,36 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last); static inline void* rf_aligned_alloc(size_t alignment, size_t size) { -# if defined(_WIN32) +#if defined(_WIN32) return _aligned_malloc(size, alignment); -# elif defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) +#elif defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) return _mm_malloc(size, alignment); -# elif defined(__ANDROID__) && __ANDROID_API__ > 16 +#elif defined(__ANDROID__) && __ANDROID_API__ > 16 void* ptr = nullptr; return posix_memalign(&ptr, alignment, size) ? nullptr : ptr; -# else +#else return aligned_alloc(alignment, size); -# endif +#endif } static inline void rf_aligned_free(void* ptr) { -# if defined(_WIN32) +#if defined(_WIN32) _aligned_free(ptr); -# elif defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) +#elif defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) _mm_free(ptr); -# else +#else free(ptr); -# endif +#endif } /**@}*/ } // namespace rapidfuzz::detail -# include -# include -# include +#include +#include +#include namespace rapidfuzz::detail { @@ -1825,20 +1825,20 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last) } // namespace rapidfuzz::detail -# include +#include /* RAPIDFUZZ_LTO_HACK is used to differentiate functions between different * translation units to avoid warnings when using lto */ -# ifndef RAPIDFUZZ_EXCLUDE_SIMD -# if __AVX2__ -# define RAPIDFUZZ_SIMD -# define RAPIDFUZZ_AVX2 -# define RAPIDFUZZ_LTO_HACK 0 +#ifndef RAPIDFUZZ_EXCLUDE_SIMD +# if __AVX2__ +# define RAPIDFUZZ_SIMD +# define RAPIDFUZZ_AVX2 +# define RAPIDFUZZ_LTO_HACK 0 -# include -# include -# include -# include +# include +# include +# include +# include namespace rapidfuzz { namespace detail { @@ -2478,15 +2478,15 @@ static inline native_simd sllv(const native_simd& a, } // namespace detail } // namespace rapidfuzz -# elif (defined(_M_AMD64) || defined(_M_X64)) || defined(__SSE2__) -# define RAPIDFUZZ_SIMD -# define RAPIDFUZZ_SSE2 -# define RAPIDFUZZ_LTO_HACK 1 +# elif (defined(_M_AMD64) || defined(_M_X64)) || defined(__SSE2__) +# define RAPIDFUZZ_SIMD +# define RAPIDFUZZ_SSE2 +# define RAPIDFUZZ_LTO_HACK 1 -# include -# include -# include -# include +# include +# include +# include +# include namespace rapidfuzz { namespace detail { @@ -2999,9 +2999,9 @@ static inline native_simd operator>=(const native_simd& a, const native_simd& b) noexcept { /* sse4.1 */ -# if 0 +# if 0 return _mm_cmpeq_epi16(_mm_max_epu16(a, b), a); // a == max(a,b) -# endif +# endif __m128i s = _mm_subs_epu16(b, a); // b-a, saturated return _mm_cmpeq_epi16(s, _mm_setzero_si128()); // s == 0 @@ -3016,9 +3016,9 @@ static inline native_simd operator>=(const native_simd& a, const native_simd& b) noexcept { /* sse4.1 */ -# if 0 +# if 0 return (Vec4ib)_mm_cmpeq_epi32(_mm_max_epu32(a, b), a); // a == max(a,b) -# endif +# endif return ~(b > a); } @@ -3081,9 +3081,9 @@ static inline native_simd operator<(const native_simd& a, const native_sim } // namespace detail } // namespace rapidfuzz -# endif # endif -# include +#endif +#include namespace rapidfuzz::detail { @@ -3920,18 +3920,20 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedDamerauLevenshtein(const Sentence1& s1_) -> CachedDamerauLevenshtein>; template CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevenshtein>; +#endif } // namespace experimental } // namespace rapidfuzz -# include +#include -# include +#include namespace rapidfuzz::detail { @@ -4141,21 +4143,23 @@ struct CachedHamming : public detail::CachedDistanceBase, bool pad; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedHamming(const Sentence1& s1_, bool pad_ = true) -> CachedHamming>; template CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHamming>; +#endif /**@}*/ } // namespace rapidfuzz -# include +#include -# include -# include -# include +#include +#include +#include namespace rapidfuzz::detail { @@ -4367,10 +4371,10 @@ struct BlockPatternMatchVector { } // namespace rapidfuzz::detail -# include +#include -# include -# include +#include +#include namespace rapidfuzz::detail { @@ -4392,14 +4396,14 @@ struct LCSseqResult { template LCSseqResult& getMatrixRef(LCSseqResult& res) { -# RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE return res; #else -// this is a hack since the compiler doesn't know early enough that -// this is never called when the types differ. -// On C++17 this properly uses if constexpr -assert(RecordMatrix); -return reinterpret_cast&>(res); + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); #endif } @@ -5139,11 +5143,13 @@ struct CachedLCSseq detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedLCSseq(const Sentence1& s1_) -> CachedLCSseq>; template CachedLCSseq(InputIt1 first1, InputIt1 last1) -> CachedLCSseq>; +#endif } // namespace rapidfuzz @@ -5381,11 +5387,13 @@ struct CachedIndel CachedLCSseq scorer; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedIndel(const Sentence1& s1_) -> CachedIndel>; template CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel>; +#endif } // namespace rapidfuzz @@ -6445,11 +6453,13 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedJaro(const Sentence1& s1_) -> CachedJaro>; template CachedJaro(InputIt1 first1, InputIt1 last1) -> CachedJaro>; +#endif } // namespace rapidfuzz @@ -6732,6 +6742,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1) -> CachedJaroWinkler>; @@ -6739,6 +6750,7 @@ explicit CachedJaroWinkler(const Sentence1& s1_, template CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) -> CachedJaroWinkler>; +#endif } // namespace rapidfuzz @@ -8470,6 +8482,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = { 1, 1, 1}) -> CachedLevenshtein>; @@ -8477,6 +8490,7 @@ explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights template CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein>; +#endif } // namespace rapidfuzz @@ -9012,11 +9026,13 @@ struct CachedOSA detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedOSA(const Sentence1& s1_) -> CachedOSA>; template CachedOSA(InputIt1 first1, InputIt1 last1) -> CachedOSA>; +#endif /**@}*/ } // namespace rapidfuzz @@ -9133,12 +9149,13 @@ struct CachedPostfix : public detail::CachedSimilarityBase std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPostfix(const Sentence1& s1_) -> CachedPostfix>; template CachedPostfix(InputIt1 first1, InputIt1 last1) -> CachedPostfix>; - +#endif /**@}*/ } // namespace rapidfuzz @@ -9254,11 +9271,13 @@ struct CachedPrefix : public detail::CachedSimilarityBase, std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPrefix(const Sentence1& s1_) -> CachedPrefix>; template CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix>; +#endif /**@}*/ @@ -9590,11 +9609,13 @@ struct CachedRatio { CachedIndel cached_indel; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedRatio(const Sentence1& s1) -> CachedRatio>; template CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio>; +#endif template ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2, @@ -9663,11 +9684,13 @@ struct CachedPartialRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialRatio(const Sentence1& s1) -> CachedPartialRatio>; template CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::ratio between @@ -9772,11 +9795,13 @@ struct CachedTokenSortRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSortRatio(const Sentence1& s1) -> CachedTokenSortRatio>; template CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::partial_ratio @@ -9830,6 +9855,7 @@ struct CachedPartialTokenSortRatio { CachedPartialRatio cached_partial_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSortRatio(const Sentence1& s1) -> CachedPartialTokenSortRatio>; @@ -9837,6 +9863,7 @@ explicit CachedPartialTokenSortRatio(const Sentence1& s1) template CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSortRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -9898,11 +9925,13 @@ struct CachedTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSetRatio(const Sentence1& s1) -> CachedTokenSetRatio>; template CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -9955,12 +9984,14 @@ struct CachedPartialTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio>; template CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSetRatio>; +#endif /** * @brief Helper method that returns the maximum of fuzz::token_set_ratio and @@ -10017,11 +10048,13 @@ struct CachedTokenRatio { CachedRatio cached_ratio_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenRatio(const Sentence1& s1) -> CachedTokenRatio>; template CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio>; +#endif /** * @brief Helper method that returns the maximum of @@ -10078,11 +10111,13 @@ struct CachedPartialTokenRatio { std::vector s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenRatio(const Sentence1& s1) -> CachedPartialTokenRatio>; template CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio>; +#endif /** * @brief Calculates a weighted ratio based on the other ratio algorithms @@ -10138,11 +10173,13 @@ struct CachedWRatio { rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedWRatio(const Sentence1& s1) -> CachedWRatio>; template CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio>; +#endif /** * @brief Calculates a quick ratio between two strings using fuzz.ratio @@ -10250,11 +10287,13 @@ struct CachedQRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedQRatio(const Sentence1& s1) -> CachedQRatio>; template CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio>; +#endif /**@}*/ diff --git a/rapidfuzz/distance/DamerauLevenshtein.hpp b/rapidfuzz/distance/DamerauLevenshtein.hpp index b1209ed7..ce516491 100644 --- a/rapidfuzz/distance/DamerauLevenshtein.hpp +++ b/rapidfuzz/distance/DamerauLevenshtein.hpp @@ -142,11 +142,13 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedDamerauLevenshtein(const Sentence1& s1_) -> CachedDamerauLevenshtein>; template CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevenshtein>; +#endif } // namespace experimental } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Hamming.hpp b/rapidfuzz/distance/Hamming.hpp index d5160722..b8cb033c 100644 --- a/rapidfuzz/distance/Hamming.hpp +++ b/rapidfuzz/distance/Hamming.hpp @@ -161,11 +161,13 @@ struct CachedHamming : public detail::CachedDistanceBase, bool pad; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedHamming(const Sentence1& s1_, bool pad_ = true) -> CachedHamming>; template CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHamming>; +#endif /**@}*/ diff --git a/rapidfuzz/distance/Indel.hpp b/rapidfuzz/distance/Indel.hpp index 9cfa902b..16410075 100644 --- a/rapidfuzz/distance/Indel.hpp +++ b/rapidfuzz/distance/Indel.hpp @@ -182,10 +182,12 @@ struct CachedIndel CachedLCSseq scorer; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedIndel(const Sentence1& s1_) -> CachedIndel>; template CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index 764332cd..d4cd6ce2 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -222,10 +222,12 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedJaro(const Sentence1& s1_) -> CachedJaro>; template CachedJaro(InputIt1 first1, InputIt1 last1) -> CachedJaro>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/JaroWinkler.hpp b/rapidfuzz/distance/JaroWinkler.hpp index d2306df3..f1e3c545 100644 --- a/rapidfuzz/distance/JaroWinkler.hpp +++ b/rapidfuzz/distance/JaroWinkler.hpp @@ -199,6 +199,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1) -> CachedJaroWinkler>; @@ -206,5 +207,6 @@ explicit CachedJaroWinkler(const Sentence1& s1_, template CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) -> CachedJaroWinkler>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/LCSseq.hpp b/rapidfuzz/distance/LCSseq.hpp index 5ec2e77f..be82e192 100644 --- a/rapidfuzz/distance/LCSseq.hpp +++ b/rapidfuzz/distance/LCSseq.hpp @@ -226,10 +226,12 @@ struct CachedLCSseq detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedLCSseq(const Sentence1& s1_) -> CachedLCSseq>; template CachedLCSseq(InputIt1 first1, InputIt1 last1) -> CachedLCSseq>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Levenshtein.hpp b/rapidfuzz/distance/Levenshtein.hpp index 9d40966b..d01a6fdf 100644 --- a/rapidfuzz/distance/Levenshtein.hpp +++ b/rapidfuzz/distance/Levenshtein.hpp @@ -481,6 +481,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = { 1, 1, 1}) -> CachedLevenshtein>; @@ -488,5 +489,6 @@ explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights template CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/OSA.hpp b/rapidfuzz/distance/OSA.hpp index 61ec0370..c8395076 100644 --- a/rapidfuzz/distance/OSA.hpp +++ b/rapidfuzz/distance/OSA.hpp @@ -271,11 +271,13 @@ struct CachedOSA detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedOSA(const Sentence1& s1_) -> CachedOSA>; template CachedOSA(InputIt1 first1, InputIt1 last1) -> CachedOSA>; +#endif /**@}*/ } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Postfix.hpp b/rapidfuzz/distance/Postfix.hpp index 0da830f7..e04c6742 100644 --- a/rapidfuzz/distance/Postfix.hpp +++ b/rapidfuzz/distance/Postfix.hpp @@ -94,12 +94,13 @@ struct CachedPostfix : public detail::CachedSimilarityBase std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPostfix(const Sentence1& s1_) -> CachedPostfix>; template CachedPostfix(InputIt1 first1, InputIt1 last1) -> CachedPostfix>; - +#endif /**@}*/ } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Prefix.hpp b/rapidfuzz/distance/Prefix.hpp index 64173dc7..19c10017 100644 --- a/rapidfuzz/distance/Prefix.hpp +++ b/rapidfuzz/distance/Prefix.hpp @@ -93,11 +93,13 @@ struct CachedPrefix : public detail::CachedSimilarityBase, std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPrefix(const Sentence1& s1_) -> CachedPrefix>; template CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix>; +#endif /**@}*/ diff --git a/rapidfuzz/fuzz.hpp b/rapidfuzz/fuzz.hpp index d303722c..2991f9f8 100644 --- a/rapidfuzz/fuzz.hpp +++ b/rapidfuzz/fuzz.hpp @@ -116,11 +116,13 @@ struct CachedRatio { CachedIndel cached_indel; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedRatio(const Sentence1& s1) -> CachedRatio>; template CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio>; +#endif template ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2, @@ -189,11 +191,13 @@ struct CachedPartialRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialRatio(const Sentence1& s1) -> CachedPartialRatio>; template CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::ratio between @@ -298,11 +302,13 @@ struct CachedTokenSortRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSortRatio(const Sentence1& s1) -> CachedTokenSortRatio>; template CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::partial_ratio @@ -356,6 +362,7 @@ struct CachedPartialTokenSortRatio { CachedPartialRatio cached_partial_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSortRatio(const Sentence1& s1) -> CachedPartialTokenSortRatio>; @@ -363,6 +370,7 @@ explicit CachedPartialTokenSortRatio(const Sentence1& s1) template CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSortRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -424,11 +432,13 @@ struct CachedTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSetRatio(const Sentence1& s1) -> CachedTokenSetRatio>; template CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -481,12 +491,14 @@ struct CachedPartialTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio>; template CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSetRatio>; +#endif /** * @brief Helper method that returns the maximum of fuzz::token_set_ratio and @@ -543,11 +555,13 @@ struct CachedTokenRatio { CachedRatio cached_ratio_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenRatio(const Sentence1& s1) -> CachedTokenRatio>; template CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio>; +#endif /** * @brief Helper method that returns the maximum of @@ -604,11 +618,13 @@ struct CachedPartialTokenRatio { std::vector s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenRatio(const Sentence1& s1) -> CachedPartialTokenRatio>; template CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio>; +#endif /** * @brief Calculates a weighted ratio based on the other ratio algorithms @@ -664,11 +680,13 @@ struct CachedWRatio { rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedWRatio(const Sentence1& s1) -> CachedWRatio>; template CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio>; +#endif /** * @brief Calculates a quick ratio between two strings using fuzz.ratio @@ -776,11 +794,13 @@ struct CachedQRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedQRatio(const Sentence1& s1) -> CachedQRatio>; template CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio>; +#endif /**@}*/ From c95fe98c355c9f76b251237bf32883a8e03e347d Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 01:38:04 +0100 Subject: [PATCH 03/18] improve support for C++11 --- extras/rapidfuzz_amalgamated.hpp | 188 +++++++++++------- rapidfuzz/details/CharSet.hpp | 6 +- rapidfuzz/details/GrowingHashmap.hpp | 6 +- rapidfuzz/details/Matrix.hpp | 6 +- rapidfuzz/details/PatternMatchVector.hpp | 6 +- rapidfuzz/details/Range.hpp | 14 +- rapidfuzz/details/SplittedSentenceView.hpp | 8 +- rapidfuzz/details/common.hpp | 6 +- rapidfuzz/details/common_impl.hpp | 6 +- rapidfuzz/details/distance.hpp | 42 ++-- rapidfuzz/details/intrinsics.hpp | 6 +- .../distance/DamerauLevenshtein_impl.hpp | 6 +- rapidfuzz/distance/Hamming_impl.hpp | 6 +- rapidfuzz/distance/Indel_impl.hpp | 6 +- rapidfuzz/distance/JaroWinkler.hpp | 8 +- rapidfuzz/distance/JaroWinkler_impl.hpp | 6 +- rapidfuzz/distance/Jaro_impl.hpp | 6 +- rapidfuzz/distance/LCSseq_impl.hpp | 6 +- rapidfuzz/distance/Levenshtein_impl.hpp | 6 +- rapidfuzz/distance/OSA_impl.hpp | 6 +- rapidfuzz/distance/Postfix_impl.hpp | 6 +- rapidfuzz/distance/Prefix_impl.hpp | 6 +- rapidfuzz/fuzz.hpp | 6 +- rapidfuzz/fuzz_impl.hpp | 6 +- rapidfuzz_reference/JaroWinkler.hpp | 2 +- test/common.hpp | 2 +- 26 files changed, 233 insertions(+), 145 deletions(-) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 69f7ff85..52466c0a 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 01:28:38.014689 +// Generated: 2024-12-25 01:37:33.201987 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -20,7 +20,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* hashmap for integers which can only grow, but can't remove elements */ template @@ -213,7 +214,8 @@ struct HybridGrowingHashmap { std::array m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -221,7 +223,8 @@ struct HybridGrowingHashmap { #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct BitMatrixView { @@ -409,7 +412,8 @@ struct ShiftedBitMatrix { std::vector m_offsets; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -421,7 +425,8 @@ struct ShiftedBitMatrix { #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { static inline void assume(bool b) { @@ -525,10 +530,10 @@ class Range { return !empty(); } - template < - typename... Dummy, typename IterCopy = Iter, - typename = std::enable_if_t::iterator_category>>> + template ::iterator_category>::value>> constexpr decltype(auto) operator[](size_t n) const { return _first[static_cast(n)]; @@ -623,7 +628,8 @@ inline bool operator>=(const Range& a, const Range& b) template using RangeVec = std::vector>; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include @@ -1278,7 +1284,8 @@ struct is_explicitly_convertible { } // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template class SplittedSentenceView { @@ -1286,7 +1293,7 @@ class SplittedSentenceView { using CharT = iter_value_t; SplittedSentenceView(RangeVec sentence) noexcept( - std::is_nothrow_move_constructible_v>) + std::is_nothrow_move_constructible>::value) : m_sentence(std::move(sentence)) {} @@ -1358,7 +1365,8 @@ auto SplittedSentenceView::join() const -> std::vector return joined; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -1371,7 +1379,8 @@ auto SplittedSentenceView::join() const -> std::vector # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template T bit_mask_lsb(size_t n) @@ -1566,13 +1575,15 @@ constexpr void unroll(F&& f) unroll_impl(std::make_integer_sequence{}, std::forward(f)); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #if defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct DecomposedSet { @@ -1653,13 +1664,15 @@ static inline void rf_aligned_free(void* ptr) /**@}*/ -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template DecomposedSet set_decomposition(SplittedSentenceView a, @@ -1823,7 +1836,8 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last) return SplittedSentenceView(splitted); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include @@ -3085,12 +3099,13 @@ static inline native_simd operator<(const native_simd& a, const native_sim #endif #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct NormalizedMetricBase { template >> + typename = std::enable_if_t::value>> static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { @@ -3107,7 +3122,7 @@ struct NormalizedMetricBase { } template >> + typename = std::enable_if_t::value>> static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { @@ -3158,7 +3173,7 @@ struct NormalizedMetricBase { template struct DistanceBase : public NormalizedMetricBase { template >> + typename = std::enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -3174,7 +3189,7 @@ struct DistanceBase : public NormalizedMetricBase { } template >> + typename = std::enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -3213,7 +3228,7 @@ struct DistanceBase : public NormalizedMetricBase { template struct SimilarityBase : public NormalizedMetricBase { template >> + typename = std::enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -3229,7 +3244,7 @@ struct SimilarityBase : public NormalizedMetricBase { } template >> + typename = std::enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -3260,15 +3275,15 @@ struct SimilarityBase : public NormalizedMetricBase { } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -3440,15 +3455,15 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -3634,15 +3649,15 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -3652,9 +3667,11 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { friend T; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct RowId { @@ -3780,7 +3797,8 @@ class DamerauLevenshtein } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { /* the API will require a change when adding custom weights */ @@ -3935,7 +3953,8 @@ CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevens #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Hamming : public DistanceBase::max(), bool> { friend DistanceBase::max(), bool>; @@ -3986,7 +4005,8 @@ Editops hamming_editops(const Range& s1, const Range& s2, bo return ops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -4161,7 +4181,8 @@ CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHammin #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct BitvectorHashmap { BitvectorHashmap() : m_map() @@ -4369,14 +4390,16 @@ struct BlockPatternMatchVector { BitMatrix m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct LCSseqResult; @@ -4919,7 +4942,8 @@ class LCSseq : public SimilarityBase #include @@ -5153,7 +5177,8 @@ CachedLCSseq(InputIt1 first1, InputIt1 last1) -> CachedLCSseq size_t indel_distance(const BlockPatternMatchVector& block, const Range& s1, @@ -5210,7 +5235,8 @@ class Indel : public DistanceBase } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -5401,7 +5427,8 @@ CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct FlaggedCharsWord { uint64_t P_flag; @@ -6234,7 +6261,8 @@ class Jaro : public SimilarityBase { } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include @@ -6463,7 +6491,8 @@ CachedJaro(InputIt1 first1, InputIt1 last1) -> CachedJaro } // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template double jaro_winkler_similarity(const Range& P, const Range& T, double prefix_weight, @@ -6547,12 +6576,13 @@ class JaroWinkler : public SimilarityBase { } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { template >> + typename = std::enable_if_t::value>> double jaro_winkler_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -6568,7 +6598,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr } template >> + typename = std::enable_if_t::value>> double jaro_winkler_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -6584,7 +6614,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double } template >> + typename = std::enable_if_t::value>> double jaro_winkler_normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -6600,7 +6630,7 @@ double jaro_winkler_normalized_distance(const Sentence1& s1, const Sentence2& s2 } template >> + typename = std::enable_if_t::value>> double jaro_winkler_normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -6761,7 +6791,8 @@ CachedJaroWinkler(InputIt1 first1, InputIt1 last1, #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct LevenshteinRow { uint64_t VP; @@ -8005,7 +8036,8 @@ Editops levenshtein_editops(const Range& s1, const Range& s2 return editops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -8498,7 +8530,8 @@ CachedLevenshtein(InputIt1 first1, InputIt1 last1, #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /** * @brief Bitparallel implementation of the OSA distance. @@ -8760,7 +8793,8 @@ class OSA : public DistanceBase::ma } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -9039,7 +9073,8 @@ CachedOSA(InputIt1 first1, InputIt1 last1) -> CachedOSA>; #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Postfix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -9060,7 +9095,8 @@ class Postfix : public SimilarityBase CachedPostfix -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Prefix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -9183,7 +9220,8 @@ class Prefix : public SimilarityBase opcodes_apply_vec(const Opcodes& ops, const Sentence1& s1, co #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* * taken from https://stackoverflow.com/a/17251989/11335032 @@ -9499,9 +9538,11 @@ struct CharSet { } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /** * @defgroup Fuzz Fuzz @@ -10297,7 +10338,8 @@ CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio @@ -10307,7 +10349,8 @@ CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /********************************************** * ratio @@ -11230,6 +11273,7 @@ double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz #endif // RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED diff --git a/rapidfuzz/details/CharSet.hpp b/rapidfuzz/details/CharSet.hpp index a00e3ee1..c6792ab8 100644 --- a/rapidfuzz/details/CharSet.hpp +++ b/rapidfuzz/details/CharSet.hpp @@ -9,7 +9,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* * taken from https://stackoverflow.com/a/17251989/11335032 @@ -71,4 +72,5 @@ struct CharSet { } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/GrowingHashmap.hpp b/rapidfuzz/details/GrowingHashmap.hpp index ba0edebc..db63ea0b 100644 --- a/rapidfuzz/details/GrowingHashmap.hpp +++ b/rapidfuzz/details/GrowingHashmap.hpp @@ -7,7 +7,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* hashmap for integers which can only grow, but can't remove elements */ template @@ -200,4 +201,5 @@ struct HybridGrowingHashmap { std::array m_extendedAscii; }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/Matrix.hpp b/rapidfuzz/details/Matrix.hpp index 7525f193..6b97718c 100644 --- a/rapidfuzz/details/Matrix.hpp +++ b/rapidfuzz/details/Matrix.hpp @@ -8,7 +8,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct BitMatrixView { @@ -196,4 +197,5 @@ struct ShiftedBitMatrix { std::vector m_offsets; }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/PatternMatchVector.hpp b/rapidfuzz/details/PatternMatchVector.hpp index 9c56a656..f60f2fa4 100644 --- a/rapidfuzz/details/PatternMatchVector.hpp +++ b/rapidfuzz/details/PatternMatchVector.hpp @@ -11,7 +11,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct BitvectorHashmap { BitvectorHashmap() : m_map() @@ -219,4 +220,5 @@ struct BlockPatternMatchVector { BitMatrix m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/Range.hpp b/rapidfuzz/details/Range.hpp index b9f744dc..389719cf 100644 --- a/rapidfuzz/details/Range.hpp +++ b/rapidfuzz/details/Range.hpp @@ -13,7 +13,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { static inline void assume(bool b) { @@ -117,10 +118,10 @@ class Range { return !empty(); } - template < - typename... Dummy, typename IterCopy = Iter, - typename = std::enable_if_t::iterator_category>>> + template ::iterator_category>::value>> constexpr decltype(auto) operator[](size_t n) const { return _first[static_cast(n)]; @@ -215,4 +216,5 @@ inline bool operator>=(const Range& a, const Range& b) template using RangeVec = std::vector>; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/SplittedSentenceView.hpp b/rapidfuzz/details/SplittedSentenceView.hpp index a6b06955..52653a37 100644 --- a/rapidfuzz/details/SplittedSentenceView.hpp +++ b/rapidfuzz/details/SplittedSentenceView.hpp @@ -3,7 +3,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template class SplittedSentenceView { @@ -11,7 +12,7 @@ class SplittedSentenceView { using CharT = iter_value_t; SplittedSentenceView(RangeVec sentence) noexcept( - std::is_nothrow_move_constructible_v>) + std::is_nothrow_move_constructible>::value) : m_sentence(std::move(sentence)) {} @@ -83,4 +84,5 @@ auto SplittedSentenceView::join() const -> std::vector return joined; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/common.hpp b/rapidfuzz/details/common.hpp index d938958b..61733d8e 100644 --- a/rapidfuzz/details/common.hpp +++ b/rapidfuzz/details/common.hpp @@ -13,7 +13,8 @@ # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct DecomposedSet { @@ -94,6 +95,7 @@ static inline void rf_aligned_free(void* ptr) /**@}*/ -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include diff --git a/rapidfuzz/details/common_impl.hpp b/rapidfuzz/details/common_impl.hpp index 2d803442..f2edf077 100644 --- a/rapidfuzz/details/common_impl.hpp +++ b/rapidfuzz/details/common_impl.hpp @@ -5,7 +5,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template DecomposedSet set_decomposition(SplittedSentenceView a, @@ -169,4 +170,5 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last) return SplittedSentenceView(splitted); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/distance.hpp b/rapidfuzz/details/distance.hpp index 837ad4fb..9e230479 100644 --- a/rapidfuzz/details/distance.hpp +++ b/rapidfuzz/details/distance.hpp @@ -9,12 +9,13 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct NormalizedMetricBase { template >> + typename = std::enable_if_t::value>> static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { @@ -31,7 +32,7 @@ struct NormalizedMetricBase { } template >> + typename = std::enable_if_t::value>> static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { @@ -82,7 +83,7 @@ struct NormalizedMetricBase { template struct DistanceBase : public NormalizedMetricBase { template >> + typename = std::enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -98,7 +99,7 @@ struct DistanceBase : public NormalizedMetricBase { } template >> + typename = std::enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -137,7 +138,7 @@ struct DistanceBase : public NormalizedMetricBase { template struct SimilarityBase : public NormalizedMetricBase { template >> + typename = std::enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -153,7 +154,7 @@ struct SimilarityBase : public NormalizedMetricBase { } template >> + typename = std::enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -184,15 +185,15 @@ struct SimilarityBase : public NormalizedMetricBase { } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -364,15 +365,15 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -558,15 +559,15 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -576,4 +577,5 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { friend T; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/intrinsics.hpp b/rapidfuzz/details/intrinsics.hpp index d5bd0a14..d2d90aea 100644 --- a/rapidfuzz/details/intrinsics.hpp +++ b/rapidfuzz/details/intrinsics.hpp @@ -14,7 +14,8 @@ # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template T bit_mask_lsb(size_t n) @@ -209,4 +210,5 @@ constexpr void unroll(F&& f) unroll_impl(std::make_integer_sequence{}, std::forward(f)); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/DamerauLevenshtein_impl.hpp b/rapidfuzz/distance/DamerauLevenshtein_impl.hpp index 5a122872..b95d2d49 100644 --- a/rapidfuzz/distance/DamerauLevenshtein_impl.hpp +++ b/rapidfuzz/distance/DamerauLevenshtein_impl.hpp @@ -11,7 +11,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct RowId { @@ -137,4 +138,5 @@ class DamerauLevenshtein } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/distance/Hamming_impl.hpp b/rapidfuzz/distance/Hamming_impl.hpp index 8389f902..fa6350cd 100644 --- a/rapidfuzz/distance/Hamming_impl.hpp +++ b/rapidfuzz/distance/Hamming_impl.hpp @@ -6,7 +6,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Hamming : public DistanceBase::max(), bool> { friend DistanceBase::max(), bool>; @@ -57,4 +58,5 @@ Editops hamming_editops(const Range& s1, const Range& s2, bo return ops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Indel_impl.hpp b/rapidfuzz/distance/Indel_impl.hpp index d0ab9d50..b3139fab 100644 --- a/rapidfuzz/distance/Indel_impl.hpp +++ b/rapidfuzz/distance/Indel_impl.hpp @@ -8,7 +8,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template size_t indel_distance(const BlockPatternMatchVector& block, const Range& s1, @@ -65,4 +66,5 @@ class Indel : public DistanceBase } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/JaroWinkler.hpp b/rapidfuzz/distance/JaroWinkler.hpp index f1e3c545..0291c001 100644 --- a/rapidfuzz/distance/JaroWinkler.hpp +++ b/rapidfuzz/distance/JaroWinkler.hpp @@ -9,7 +9,7 @@ namespace rapidfuzz { template >> + typename = std::enable_if_t::value>> double jaro_winkler_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -25,7 +25,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr } template >> + typename = std::enable_if_t::value>> double jaro_winkler_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -41,7 +41,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double } template >> + typename = std::enable_if_t::value>> double jaro_winkler_normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -57,7 +57,7 @@ double jaro_winkler_normalized_distance(const Sentence1& s1, const Sentence2& s2 } template >> + typename = std::enable_if_t::value>> double jaro_winkler_normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { diff --git a/rapidfuzz/distance/JaroWinkler_impl.hpp b/rapidfuzz/distance/JaroWinkler_impl.hpp index c8eb6575..48352085 100644 --- a/rapidfuzz/distance/JaroWinkler_impl.hpp +++ b/rapidfuzz/distance/JaroWinkler_impl.hpp @@ -3,7 +3,8 @@ #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template double jaro_winkler_similarity(const Range& P, const Range& T, double prefix_weight, @@ -87,4 +88,5 @@ class JaroWinkler : public SimilarityBase { } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 1c4d946a..ef27f661 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -9,7 +9,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct FlaggedCharsWord { uint64_t P_flag; @@ -842,4 +843,5 @@ class Jaro : public SimilarityBase { } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index 98f6fe48..054bba78 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -13,7 +13,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct LCSseqResult; @@ -556,4 +557,5 @@ class LCSseq : public SimilarityBase #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct LevenshteinRow { uint64_t VP; @@ -1258,4 +1259,5 @@ Editops levenshtein_editops(const Range& s1, const Range& s2 return editops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/OSA_impl.hpp b/rapidfuzz/distance/OSA_impl.hpp index 635122db..cd0839e6 100644 --- a/rapidfuzz/distance/OSA_impl.hpp +++ b/rapidfuzz/distance/OSA_impl.hpp @@ -10,7 +10,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /** * @brief Bitparallel implementation of the OSA distance. @@ -272,4 +273,5 @@ class OSA : public DistanceBase::ma } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/distance/Postfix_impl.hpp b/rapidfuzz/distance/Postfix_impl.hpp index 0be3abf6..6dbde14d 100644 --- a/rapidfuzz/distance/Postfix_impl.hpp +++ b/rapidfuzz/distance/Postfix_impl.hpp @@ -6,7 +6,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Postfix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -27,4 +28,5 @@ class Postfix : public SimilarityBase #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Prefix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -27,4 +28,5 @@ class Prefix : public SimilarityBase #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /** * @defgroup Fuzz Fuzz @@ -804,6 +805,7 @@ CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio diff --git a/rapidfuzz/fuzz_impl.hpp b/rapidfuzz/fuzz_impl.hpp index 1d2eb463..4ec30005 100644 --- a/rapidfuzz/fuzz_impl.hpp +++ b/rapidfuzz/fuzz_impl.hpp @@ -11,7 +11,8 @@ #include #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /********************************************** * ratio @@ -934,4 +935,5 @@ double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz diff --git a/rapidfuzz_reference/JaroWinkler.hpp b/rapidfuzz_reference/JaroWinkler.hpp index 3b717d8e..c83a18d1 100644 --- a/rapidfuzz_reference/JaroWinkler.hpp +++ b/rapidfuzz_reference/JaroWinkler.hpp @@ -7,7 +7,7 @@ namespace rapidfuzz_reference { template >> + typename = std::enable_if_t::value>> double jaro_winkler_similarity(InputIt1 P_first, InputIt1 P_last, InputIt2 T_first, InputIt2 T_last, double prefix_weight = 0.1, double score_cutoff = 0.0) { diff --git a/test/common.hpp b/test/common.hpp index 427f0906..071f939a 100644 --- a/test/common.hpp +++ b/test/common.hpp @@ -58,7 +58,7 @@ class BidirectionalIterWrapper { T iter; }; -template >> +template ::value>> std::basic_string str_multiply(std::basic_string a, size_t b) { std::basic_string output; From fcff9f26907e5f8a8e4b41e4987b0170f9a6c23a Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 01:43:02 +0100 Subject: [PATCH 04/18] port loop unrolling to C++14 --- extras/rapidfuzz_amalgamated.hpp | 30 ++++++++++++++++++++++-------- rapidfuzz/details/intrinsics.hpp | 28 +++++++++++++++++++++------- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 52466c0a..93d7bf0d 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 01:37:33.201987 +// Generated: 2024-12-25 01:42:39.581315 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -1563,16 +1563,30 @@ static inline unsigned int countr_zero(uint8_t x) return countr_zero(static_cast(x)); } -template -constexpr void unroll_impl(std::integer_sequence, F&& f) -{ - (f(std::integral_constant{}), ...); -} +template +struct UnrollImpl; + +template +struct UnrollImpl { + template + static void call(F&& f) + { + f(Pos); + UnrollImpl::call(std::forward(f)); + } +}; + +template +struct UnrollImpl { + template + static void call(F&&) + {} +}; -template +template constexpr void unroll(F&& f) { - unroll_impl(std::make_integer_sequence{}, std::forward(f)); + UnrollImpl::call(f); } } // namespace detail diff --git a/rapidfuzz/details/intrinsics.hpp b/rapidfuzz/details/intrinsics.hpp index d2d90aea..ed1c25d4 100644 --- a/rapidfuzz/details/intrinsics.hpp +++ b/rapidfuzz/details/intrinsics.hpp @@ -198,16 +198,30 @@ static inline unsigned int countr_zero(uint8_t x) return countr_zero(static_cast(x)); } -template -constexpr void unroll_impl(std::integer_sequence, F&& f) -{ - (f(std::integral_constant{}), ...); -} +template +struct UnrollImpl; + +template +struct UnrollImpl { + template + static void call(F&& f) + { + f(Pos); + UnrollImpl::call(std::forward(f)); + } +}; + +template +struct UnrollImpl { + template + static void call(F&&) + {} +}; -template +template constexpr void unroll(F&& f) { - unroll_impl(std::make_integer_sequence{}, std::forward(f)); + UnrollImpl::call(f); } } // namespace detail From 6dc8807a43208a249f16522c3368e523d04d9c53 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 01:52:53 +0100 Subject: [PATCH 05/18] replace unsupported std::equals overload --- CMakeLists.txt | 2 +- fuzzing/CMakeLists.txt | 2 +- rapidfuzz/details/Range.hpp | 4 +++- rapidfuzz/details/types.hpp | 9 +++------ rapidfuzz/distance/LCSseq_impl.hpp | 6 ++---- rapidfuzz/distance/Levenshtein_impl.hpp | 4 ++-- 6 files changed, 12 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a9f422c..c6caa79e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ add_library(rapidfuzz INTERFACE) # provide a namespaced alias for clients to 'link' against if RapidFuzz is included as a sub-project add_library(rapidfuzz::rapidfuzz ALIAS rapidfuzz) -target_compile_features(rapidfuzz INTERFACE cxx_std_14) +target_compile_features(rapidfuzz INTERFACE cxx_std_11) target_include_directories(rapidfuzz INTERFACE diff --git a/fuzzing/CMakeLists.txt b/fuzzing/CMakeLists.txt index 4d7a3d38..2a71733d 100644 --- a/fuzzing/CMakeLists.txt +++ b/fuzzing/CMakeLists.txt @@ -1,6 +1,6 @@ function(create_fuzzer fuzzer) add_executable(fuzz_${fuzzer} fuzz_${fuzzer}.cpp) - target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_14) + target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_11) target_link_libraries(fuzz_${fuzzer} PRIVATE rapidfuzz::rapidfuzz) target_compile_options(fuzz_${fuzzer} PRIVATE -g -O1 -fsanitize=fuzzer,address -march=native) diff --git a/rapidfuzz/details/Range.hpp b/rapidfuzz/details/Range.hpp index 389719cf..fc7324ca 100644 --- a/rapidfuzz/details/Range.hpp +++ b/rapidfuzz/details/Range.hpp @@ -180,7 +180,9 @@ Range(T& x) -> Range; template inline bool operator==(const Range& a, const Range& b) { - return std::equal(a.begin(), a.end(), b.begin(), b.end()); + if (a.size() != b.size()) return false; + + return std::equal(a.begin(), a.end(), b.begin()); } template diff --git a/rapidfuzz/details/types.hpp b/rapidfuzz/details/types.hpp index 8fec47e9..b8ec393c 100644 --- a/rapidfuzz/details/types.hpp +++ b/rapidfuzz/details/types.hpp @@ -337,13 +337,10 @@ class Editops : private std::vector { inline bool operator==(const Editops& lhs, const Editops& rhs) { - if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) { - return false; - } + if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) return false; + + if (lhs.size() != rhs.size()) return false; - if (lhs.size() != rhs.size()) { - return false; - } return std::equal(lhs.begin(), lhs.end(), rhs.begin()); } diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index 054bba78..b2314594 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -384,8 +384,7 @@ size_t lcs_seq_similarity(const BlockPatternMatchVector& block, Range size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -417,8 +416,7 @@ size_t lcs_seq_similarity(Range s1, Range s2, size_t score_c size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; diff --git a/rapidfuzz/distance/Levenshtein_impl.hpp b/rapidfuzz/distance/Levenshtein_impl.hpp index 391c95b5..0c89a1e4 100644 --- a/rapidfuzz/distance/Levenshtein_impl.hpp +++ b/rapidfuzz/distance/Levenshtein_impl.hpp @@ -858,7 +858,7 @@ size_t uniform_levenshtein_distance(const BlockPatternMatchVector& block, Range< if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; if (score_cutoff < abs_diff(s1.size(), s2.size())) return score_cutoff + 1; @@ -916,7 +916,7 @@ size_t uniform_levenshtein_distance(Range s1, Range s2, size if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; // at least length difference insertions/deletions required if (score_cutoff < (s1.size() - s2.size())) return score_cutoff + 1; From a673a94b94dcf2638406b1c41a6e9799e367545d Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 02:01:42 +0100 Subject: [PATCH 06/18] fix types --- extras/rapidfuzz_amalgamated.hpp | 67 ++++++++++++------------- rapidfuzz/details/intrinsics.hpp | 2 +- rapidfuzz/distance/Jaro_impl.hpp | 10 ++-- rapidfuzz/distance/LCSseq_impl.hpp | 14 +++--- rapidfuzz/distance/Levenshtein_impl.hpp | 8 +-- rapidfuzz/distance/OSA_impl.hpp | 8 +-- 6 files changed, 53 insertions(+), 56 deletions(-) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 93d7bf0d..169144e6 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 01:42:39.581315 +// Generated: 2024-12-25 02:01:10.995282 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -592,7 +592,9 @@ Range(T& x) -> Range; template inline bool operator==(const Range& a, const Range& b) { - return std::equal(a.begin(), a.end(), b.begin(), b.end()); + if (a.size() != b.size()) return false; + + return std::equal(a.begin(), a.end(), b.begin()); } template @@ -978,13 +980,10 @@ class Editops : private std::vector { inline bool operator==(const Editops& lhs, const Editops& rhs) { - if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) { - return false; - } + if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) return false; + + if (lhs.size() != rhs.size()) return false; - if (lhs.size() != rhs.size()) { - return false; - } return std::equal(lhs.begin(), lhs.end(), rhs.begin()); } @@ -1583,7 +1582,7 @@ struct UnrollImpl { {} }; -template +template constexpr void unroll(F&& f) { UnrollImpl::call(f); @@ -4557,12 +4556,12 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const size_t cur_vec = 0; for (; cur_vec + interleaveCount * vecs <= block.size(); cur_vec += interleaveCount * vecs) { std::array, interleaveCount> S; - unroll([&](auto j) { S[j] = static_cast(-1); }); + unroll([&](auto j) { S[j] = static_cast(-1); }); for (const auto& ch : s2) { - unroll([&](auto j) { + unroll([&](auto j) { alignas(32) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); native_simd Matches(stored.data()); native_simd u = S[j] & Matches; @@ -4570,9 +4569,9 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const }); } - unroll([&](auto j) { + unroll([&](auto j) { auto counts = popcount(~S[j]); - unroll([&](auto i) { + unroll([&](auto i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -4584,7 +4583,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const for (const auto& ch : s2) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd Matches(stored.data()); native_simd u = S & Matches; @@ -4592,7 +4591,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const } auto counts = popcount(~S); - unroll([&](auto i) { + unroll([&](auto i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -4783,8 +4782,7 @@ size_t lcs_seq_similarity(const BlockPatternMatchVector& block, Range size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -4816,8 +4814,7 @@ size_t lcs_seq_similarity(Range s1, Range s2, size_t score_c size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -5593,7 +5590,7 @@ static inline void flag_similar_characters_step(const BlockPatternMatchVector& P if (T_j >= 0 && T_j < 256) { for (; word + 3 < last_word - 1; word += 4) { uint64_t PM_j[4]; - unroll([&](auto i) { + unroll([&](auto i) { PM_j[i] = PM.get(word + i, static_cast(T_j)) & (~flagged.P_flag[word + i]); }); @@ -6048,7 +6045,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6062,7 +6059,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6165,7 +6162,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6178,7 +6175,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -7138,12 +7135,12 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte native_simd VN(VecType(0)); alignas(alignment) std::array currDist_; - unroll( + unroll( [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](auto i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -7154,7 +7151,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd X(stored.data()); auto D0 = (((X & VP) + VP) ^ VP) | X | VN; @@ -7178,7 +7175,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](auto i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -7649,7 +7646,7 @@ size_t uniform_levenshtein_distance(const BlockPatternMatchVector& block, Range< if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; if (score_cutoff < abs_diff(s1.size(), s2.size())) return score_cutoff + 1; @@ -7707,7 +7704,7 @@ size_t uniform_levenshtein_distance(Range s1, Range s2, size if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; // at least length difference insertions/deletions required if (score_cutoff < (s1.size() - s2.size())) return score_cutoff + 1; @@ -8635,12 +8632,12 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV native_simd PM_j_old(VecType(0)); alignas(alignment) std::array currDist_; - unroll( + unroll( [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](auto i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -8651,7 +8648,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd PM_j(stored.data()); auto TR = (andnot(PM_j, D0) << 1) & PM_j_old; @@ -8678,7 +8675,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](auto i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { diff --git a/rapidfuzz/details/intrinsics.hpp b/rapidfuzz/details/intrinsics.hpp index ed1c25d4..c69d4fe1 100644 --- a/rapidfuzz/details/intrinsics.hpp +++ b/rapidfuzz/details/intrinsics.hpp @@ -218,7 +218,7 @@ struct UnrollImpl { {} }; -template +template constexpr void unroll(F&& f) { UnrollImpl::call(f); diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index ef27f661..576763e1 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -161,7 +161,7 @@ static inline void flag_similar_characters_step(const BlockPatternMatchVector& P if (T_j >= 0 && T_j < 256) { for (; word + 3 < last_word - 1; word += 4) { uint64_t PM_j[4]; - unroll([&](auto i) { + unroll([&](auto i) { PM_j[i] = PM.get(word + i, static_cast(T_j)) & (~flagged.P_flag[word + i]); }); @@ -616,7 +616,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -630,7 +630,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -733,7 +733,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -746,7 +746,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index b2314594..addf7a9c 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -158,12 +158,12 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const size_t cur_vec = 0; for (; cur_vec + interleaveCount * vecs <= block.size(); cur_vec += interleaveCount * vecs) { std::array, interleaveCount> S; - unroll([&](auto j) { S[j] = static_cast(-1); }); + unroll([&](auto j) { S[j] = static_cast(-1); }); for (const auto& ch : s2) { - unroll([&](auto j) { + unroll([&](auto j) { alignas(32) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); native_simd Matches(stored.data()); native_simd u = S[j] & Matches; @@ -171,9 +171,9 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const }); } - unroll([&](auto j) { + unroll([&](auto j) { auto counts = popcount(~S[j]); - unroll([&](auto i) { + unroll([&](auto i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -185,7 +185,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const for (const auto& ch : s2) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd Matches(stored.data()); native_simd u = S & Matches; @@ -193,7 +193,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const } auto counts = popcount(~S); - unroll([&](auto i) { + unroll([&](auto i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); diff --git a/rapidfuzz/distance/Levenshtein_impl.hpp b/rapidfuzz/distance/Levenshtein_impl.hpp index 0c89a1e4..b2e9a966 100644 --- a/rapidfuzz/distance/Levenshtein_impl.hpp +++ b/rapidfuzz/distance/Levenshtein_impl.hpp @@ -347,12 +347,12 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte native_simd VN(VecType(0)); alignas(alignment) std::array currDist_; - unroll( + unroll( [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](auto i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -363,7 +363,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd X(stored.data()); auto D0 = (((X & VP) + VP) ^ VP) | X | VN; @@ -387,7 +387,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](auto i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { diff --git a/rapidfuzz/distance/OSA_impl.hpp b/rapidfuzz/distance/OSA_impl.hpp index cd0839e6..7be20a96 100644 --- a/rapidfuzz/distance/OSA_impl.hpp +++ b/rapidfuzz/distance/OSA_impl.hpp @@ -101,12 +101,12 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV native_simd PM_j_old(VecType(0)); alignas(alignment) std::array currDist_; - unroll( + unroll( [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](auto i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -117,7 +117,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd PM_j(stored.data()); auto TR = (andnot(PM_j, D0) << 1) & PM_j_old; @@ -144,7 +144,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](auto i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { From 4e928a18bd7751674589ef8320acf415b2c3a1c3 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 02:20:33 +0100 Subject: [PATCH 07/18] fix some more issues --- extras/rapidfuzz_amalgamated.hpp | 528 ++++++++++++------------ rapidfuzz/details/Matrix.hpp | 4 +- rapidfuzz/details/Range.hpp | 4 +- rapidfuzz/details/common_impl.hpp | 4 +- rapidfuzz/details/distance.hpp | 24 +- rapidfuzz/details/type_traits.hpp | 3 + rapidfuzz/distance/Jaro.hpp | 6 +- rapidfuzz/distance/JaroWinkler.hpp | 8 +- rapidfuzz/distance/Jaro_impl.hpp | 10 +- rapidfuzz/distance/LCSseq_impl.hpp | 14 +- rapidfuzz/distance/Levenshtein_impl.hpp | 8 +- rapidfuzz/distance/OSA_impl.hpp | 8 +- rapidfuzz_reference/JaroWinkler.hpp | 2 +- test/common.hpp | 4 +- 14 files changed, 318 insertions(+), 309 deletions(-) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 169144e6..2f788060 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 02:01:10.995282 +// Generated: 2024-12-25 02:20:23.323268 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -231,8 +231,8 @@ struct BitMatrixView { using value_type = T; using size_type = size_t; - using pointer = std::conditional_t; - using reference = std::conditional_t; + using pointer = typename std::conditional::type; + using reference = typename std::conditional::type; BitMatrixView(pointer vector, size_type cols) noexcept : m_vector(vector), m_cols(cols) {} @@ -425,218 +425,6 @@ struct ShiftedBitMatrix { #include #include -namespace rapidfuzz { -namespace detail { - -static inline void assume(bool b) -{ -#if defined(__clang__) - __builtin_assume(b); -#elif defined(__GNUC__) || defined(__GNUG__) - if (!b) __builtin_unreachable(); -#elif defined(_MSC_VER) - __assume(b); -#endif -} - -template -CharT* to_begin(CharT* s) -{ - return s; -} - -template -auto to_begin(T& x) -{ - using std::begin; - return begin(x); -} - -template -CharT* to_end(CharT* s) -{ - assume(s != nullptr); - while (*s != 0) - ++s; - - return s; -} - -template -auto to_end(T& x) -{ - using std::end; - return end(x); -} - -template -class Range { - Iter _first; - Iter _last; - // todo we might not want to cache the size for iterators - // that can can retrieve the size in O(1) time - size_t _size; - -public: - using value_type = typename std::iterator_traits::value_type; - using iterator = Iter; - using reverse_iterator = std::reverse_iterator; - - constexpr Range(Iter first, Iter last) : _first(first), _last(last) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } - - constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) - {} - - template - constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x)) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } - - constexpr iterator begin() const noexcept - { - return _first; - } - constexpr iterator end() const noexcept - { - return _last; - } - - constexpr reverse_iterator rbegin() const noexcept - { - return reverse_iterator(end()); - } - constexpr reverse_iterator rend() const noexcept - { - return reverse_iterator(begin()); - } - - constexpr size_t size() const - { - return _size; - } - - constexpr bool empty() const - { - return size() == 0; - } - explicit constexpr operator bool() const - { - return !empty(); - } - - template ::iterator_category>::value>> - constexpr decltype(auto) operator[](size_t n) const - { - return _first[static_cast(n)]; - } - - constexpr void remove_prefix(size_t n) - { - std::advance(_first, static_cast(n)); - _size -= n; - } - constexpr void remove_suffix(size_t n) - { - std::advance(_last, -static_cast(n)); - _size -= n; - } - - constexpr Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) - { - if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); - - Range res = *this; - res.remove_prefix(pos); - if (count < res.size()) res.remove_suffix(res.size() - count); - - return res; - } - - constexpr decltype(auto) front() const - { - return *(_first); - } - - constexpr decltype(auto) back() const - { - return *(_last - 1); - } - - constexpr Range reversed() const - { - return {rbegin(), rend(), _size}; - } - - friend std::ostream& operator<<(std::ostream& os, const Range& seq) - { - os << "["; - for (auto x : seq) - os << static_cast(x) << ", "; - os << "]"; - return os; - } -}; - -template -Range(T& x) -> Range; - -template -inline bool operator==(const Range& a, const Range& b) -{ - if (a.size() != b.size()) return false; - - return std::equal(a.begin(), a.end(), b.begin()); -} - -template -inline bool operator!=(const Range& a, const Range& b) -{ - return !(a == b); -} - -template -inline bool operator<(const Range& a, const Range& b) -{ - return (std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end())); -} - -template -inline bool operator>(const Range& a, const Range& b) -{ - return b < a; -} - -template -inline bool operator<=(const Range& a, const Range& b) -{ - return !(b < a); -} - -template -inline bool operator>=(const Range& a, const Range& b) -{ - return !(a < b); -} - -template -using RangeVec = std::vector>; - -} // namespace detail -} // namespace rapidfuzz - -#include - -#include - #include #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) @@ -1281,11 +1069,226 @@ struct is_explicitly_convertible { static bool const value = test(0); }; +template +using rf_enable_if_t = typename std::enable_if::type; + } // namespace rapidfuzz namespace rapidfuzz { namespace detail { +static inline void assume(bool b) +{ +#if defined(__clang__) + __builtin_assume(b); +#elif defined(__GNUC__) || defined(__GNUG__) + if (!b) __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(b); +#endif +} + +template +CharT* to_begin(CharT* s) +{ + return s; +} + +template +auto to_begin(T& x) +{ + using std::begin; + return begin(x); +} + +template +CharT* to_end(CharT* s) +{ + assume(s != nullptr); + while (*s != 0) + ++s; + + return s; +} + +template +auto to_end(T& x) +{ + using std::end; + return end(x); +} + +template +class Range { + Iter _first; + Iter _last; + // todo we might not want to cache the size for iterators + // that can can retrieve the size in O(1) time + size_t _size; + +public: + using value_type = typename std::iterator_traits::value_type; + using iterator = Iter; + using reverse_iterator = std::reverse_iterator; + + constexpr Range(Iter first, Iter last) : _first(first), _last(last) + { + assert(std::distance(_first, _last) >= 0); + _size = static_cast(std::distance(_first, _last)); + } + + constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) + {} + + template + constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x)) + { + assert(std::distance(_first, _last) >= 0); + _size = static_cast(std::distance(_first, _last)); + } + + constexpr iterator begin() const noexcept + { + return _first; + } + constexpr iterator end() const noexcept + { + return _last; + } + + constexpr reverse_iterator rbegin() const noexcept + { + return reverse_iterator(end()); + } + constexpr reverse_iterator rend() const noexcept + { + return reverse_iterator(begin()); + } + + constexpr size_t size() const + { + return _size; + } + + constexpr bool empty() const + { + return size() == 0; + } + explicit constexpr operator bool() const + { + return !empty(); + } + + template ::iterator_category>::value>> + constexpr decltype(auto) operator[](size_t n) const + { + return _first[static_cast(n)]; + } + + constexpr void remove_prefix(size_t n) + { + std::advance(_first, static_cast(n)); + _size -= n; + } + constexpr void remove_suffix(size_t n) + { + std::advance(_last, -static_cast(n)); + _size -= n; + } + + constexpr Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) + { + if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); + + Range res = *this; + res.remove_prefix(pos); + if (count < res.size()) res.remove_suffix(res.size() - count); + + return res; + } + + constexpr decltype(auto) front() const + { + return *(_first); + } + + constexpr decltype(auto) back() const + { + return *(_last - 1); + } + + constexpr Range reversed() const + { + return {rbegin(), rend(), _size}; + } + + friend std::ostream& operator<<(std::ostream& os, const Range& seq) + { + os << "["; + for (auto x : seq) + os << static_cast(x) << ", "; + os << "]"; + return os; + } +}; + +template +Range(T& x) -> Range; + +template +inline bool operator==(const Range& a, const Range& b) +{ + if (a.size() != b.size()) return false; + + return std::equal(a.begin(), a.end(), b.begin()); +} + +template +inline bool operator!=(const Range& a, const Range& b) +{ + return !(a == b); +} + +template +inline bool operator<(const Range& a, const Range& b) +{ + return (std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end())); +} + +template +inline bool operator>(const Range& a, const Range& b) +{ + return b < a; +} + +template +inline bool operator<=(const Range& a, const Range& b) +{ + return !(b < a); +} + +template +inline bool operator>=(const Range& a, const Range& b) +{ + return !(a < b); +} + +template +using RangeVec = std::vector>; + +} // namespace detail +} // namespace rapidfuzz + +#include + +#include + +namespace rapidfuzz { +namespace detail { + template class SplittedSentenceView { public: @@ -1733,9 +1736,9 @@ size_t remove_common_prefix(Range& s1, Range& s2) template size_t remove_common_suffix(Range& s1, Range& s2) { - auto rfirst1 = std::rbegin(s1); + auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, std::mismatch(rfirst1, std::rend(s1), std::rbegin(s2), std::rend(s2)).first)); + std::distance(rfirst1, std::mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; @@ -3118,7 +3121,7 @@ namespace detail { template struct NormalizedMetricBase { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { @@ -3135,7 +3138,7 @@ struct NormalizedMetricBase { } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { @@ -3186,7 +3189,7 @@ struct NormalizedMetricBase { template struct DistanceBase : public NormalizedMetricBase { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -3202,7 +3205,7 @@ struct DistanceBase : public NormalizedMetricBase { } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -3241,7 +3244,7 @@ struct DistanceBase : public NormalizedMetricBase { template struct SimilarityBase : public NormalizedMetricBase { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -3257,7 +3260,7 @@ struct SimilarityBase : public NormalizedMetricBase { } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -3288,15 +3291,15 @@ struct SimilarityBase : public NormalizedMetricBase { } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -3468,15 +3471,15 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -3662,15 +3665,15 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -4556,12 +4559,12 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const size_t cur_vec = 0; for (; cur_vec + interleaveCount * vecs <= block.size(); cur_vec += interleaveCount * vecs) { std::array, interleaveCount> S; - unroll([&](auto j) { S[j] = static_cast(-1); }); + unroll([&](size_t j) { S[j] = static_cast(-1); }); for (const auto& ch : s2) { - unroll([&](auto j) { + unroll([&](size_t j) { alignas(32) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); native_simd Matches(stored.data()); native_simd u = S[j] & Matches; @@ -4569,9 +4572,9 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const }); } - unroll([&](auto j) { + unroll([&](size_t j) { auto counts = popcount(~S[j]); - unroll([&](auto i) { + unroll([&](size_t i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -4583,7 +4586,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const for (const auto& ch : s2) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd Matches(stored.data()); native_simd u = S & Matches; @@ -4591,7 +4594,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const } auto counts = popcount(~S); - unroll([&](auto i) { + unroll([&](size_t i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -5590,7 +5593,7 @@ static inline void flag_similar_characters_step(const BlockPatternMatchVector& P if (T_j >= 0 && T_j < 256) { for (; word + 3 < last_word - 1; word += 4) { uint64_t PM_j[4]; - unroll([&](auto i) { + unroll([&](size_t i) { PM_j[i] = PM.get(word + i, static_cast(T_j)) & (~flagged.P_flag[word + i]); }); @@ -6045,7 +6048,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6059,7 +6062,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6162,7 +6165,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6175,7 +6178,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6342,10 +6345,11 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64); - using VecType = typename std::conditional_t< + using VecType = typename std::conditional< MaxLen == 8, uint8_t, - typename std::conditional_t>>; + typename std::conditional::type>::type>:: + type; constexpr static size_t get_vec_size() { @@ -6593,7 +6597,7 @@ class JaroWinkler : public SimilarityBase { namespace rapidfuzz { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -6609,7 +6613,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -6625,7 +6629,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -6641,7 +6645,7 @@ double jaro_winkler_normalized_distance(const Sentence1& s1, const Sentence2& s2 } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -7136,11 +7140,11 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array currDist_; unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -7151,7 +7155,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd X(stored.data()); auto D0 = (((X & VP) + VP) ^ VP) | X | VN; @@ -7175,7 +7179,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -8633,11 +8637,11 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array currDist_; unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -8648,7 +8652,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd PM_j(stored.data()); auto TR = (andnot(PM_j, D0) << 1) & PM_j_old; @@ -8675,7 +8679,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { diff --git a/rapidfuzz/details/Matrix.hpp b/rapidfuzz/details/Matrix.hpp index 6b97718c..3c6b009e 100644 --- a/rapidfuzz/details/Matrix.hpp +++ b/rapidfuzz/details/Matrix.hpp @@ -16,8 +16,8 @@ struct BitMatrixView { using value_type = T; using size_type = size_t; - using pointer = std::conditional_t; - using reference = std::conditional_t; + using pointer = typename std::conditional::type; + using reference = typename std::conditional::type; BitMatrixView(pointer vector, size_type cols) noexcept : m_vector(vector), m_cols(cols) {} diff --git a/rapidfuzz/details/Range.hpp b/rapidfuzz/details/Range.hpp index fc7324ca..a2149347 100644 --- a/rapidfuzz/details/Range.hpp +++ b/rapidfuzz/details/Range.hpp @@ -13,6 +13,8 @@ #include #include +#include + namespace rapidfuzz { namespace detail { @@ -119,7 +121,7 @@ class Range { } template ::iterator_category>::value>> constexpr decltype(auto) operator[](size_t n) const diff --git a/rapidfuzz/details/common_impl.hpp b/rapidfuzz/details/common_impl.hpp index f2edf077..1478b7c5 100644 --- a/rapidfuzz/details/common_impl.hpp +++ b/rapidfuzz/details/common_impl.hpp @@ -54,9 +54,9 @@ size_t remove_common_prefix(Range& s1, Range& s2) template size_t remove_common_suffix(Range& s1, Range& s2) { - auto rfirst1 = std::rbegin(s1); + auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, std::mismatch(rfirst1, std::rend(s1), std::rbegin(s2), std::rend(s2)).first)); + std::distance(rfirst1, std::mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; diff --git a/rapidfuzz/details/distance.hpp b/rapidfuzz/details/distance.hpp index 9e230479..7ddec0f9 100644 --- a/rapidfuzz/details/distance.hpp +++ b/rapidfuzz/details/distance.hpp @@ -15,7 +15,7 @@ namespace detail { template struct NormalizedMetricBase { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { @@ -32,7 +32,7 @@ struct NormalizedMetricBase { } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { @@ -83,7 +83,7 @@ struct NormalizedMetricBase { template struct DistanceBase : public NormalizedMetricBase { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -99,7 +99,7 @@ struct DistanceBase : public NormalizedMetricBase { } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -138,7 +138,7 @@ struct DistanceBase : public NormalizedMetricBase { template struct SimilarityBase : public NormalizedMetricBase { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -154,7 +154,7 @@ struct SimilarityBase : public NormalizedMetricBase { } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { @@ -185,14 +185,14 @@ struct SimilarityBase : public NormalizedMetricBase { } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; @@ -365,14 +365,14 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; @@ -559,14 +559,14 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static std::enable_if_t::value, U> _apply_distance_score_cutoff(U score, + static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; diff --git a/rapidfuzz/details/type_traits.hpp b/rapidfuzz/details/type_traits.hpp index 06b6b1e2..65fdb89e 100644 --- a/rapidfuzz/details/type_traits.hpp +++ b/rapidfuzz/details/type_traits.hpp @@ -49,4 +49,7 @@ struct is_explicitly_convertible { static bool const value = test(0); }; +template +using rf_enable_if_t = typename std::enable_if::type; + } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index d4cd6ce2..3b0a96ce 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -72,10 +72,10 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64); - using VecType = typename std::conditional_t< + using VecType = typename std::conditional< MaxLen == 8, uint8_t, - typename std::conditional_t>>; + typename std::conditional::type>::type>::type; constexpr static size_t get_vec_size() { diff --git a/rapidfuzz/distance/JaroWinkler.hpp b/rapidfuzz/distance/JaroWinkler.hpp index 0291c001..d862f60d 100644 --- a/rapidfuzz/distance/JaroWinkler.hpp +++ b/rapidfuzz/distance/JaroWinkler.hpp @@ -9,7 +9,7 @@ namespace rapidfuzz { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -25,7 +25,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -41,7 +41,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -57,7 +57,7 @@ double jaro_winkler_normalized_distance(const Sentence1& s1, const Sentence2& s2 } template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 576763e1..1fcc2888 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -161,7 +161,7 @@ static inline void flag_similar_characters_step(const BlockPatternMatchVector& P if (T_j >= 0 && T_j < 256) { for (; word + 3 < last_word - 1; word += 4) { uint64_t PM_j[4]; - unroll([&](auto i) { + unroll([&](size_t i) { PM_j[i] = PM.get(word + i, static_cast(T_j)) & (~flagged.P_flag[word + i]); }); @@ -616,7 +616,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -630,7 +630,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -733,7 +733,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -746,7 +746,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index addf7a9c..65bf52e1 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -158,12 +158,12 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const size_t cur_vec = 0; for (; cur_vec + interleaveCount * vecs <= block.size(); cur_vec += interleaveCount * vecs) { std::array, interleaveCount> S; - unroll([&](auto j) { S[j] = static_cast(-1); }); + unroll([&](size_t j) { S[j] = static_cast(-1); }); for (const auto& ch : s2) { - unroll([&](auto j) { + unroll([&](size_t j) { alignas(32) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); native_simd Matches(stored.data()); native_simd u = S[j] & Matches; @@ -171,9 +171,9 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const }); } - unroll([&](auto j) { + unroll([&](size_t j) { auto counts = popcount(~S[j]); - unroll([&](auto i) { + unroll([&](size_t i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); @@ -185,7 +185,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const for (const auto& ch : s2) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd Matches(stored.data()); native_simd u = S & Matches; @@ -193,7 +193,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const } auto counts = popcount(~S); - unroll([&](auto i) { + unroll([&](size_t i) { *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); diff --git a/rapidfuzz/distance/Levenshtein_impl.hpp b/rapidfuzz/distance/Levenshtein_impl.hpp index b2e9a966..9f707352 100644 --- a/rapidfuzz/distance/Levenshtein_impl.hpp +++ b/rapidfuzz/distance/Levenshtein_impl.hpp @@ -348,11 +348,11 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array currDist_; unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -363,7 +363,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd X(stored.data()); auto D0 = (((X & VP) + VP) ^ VP) | X | VN; @@ -387,7 +387,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { diff --git a/rapidfuzz/distance/OSA_impl.hpp b/rapidfuzz/distance/OSA_impl.hpp index 7be20a96..8fa73289 100644 --- a/rapidfuzz/distance/OSA_impl.hpp +++ b/rapidfuzz/distance/OSA_impl.hpp @@ -102,11 +102,11 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array currDist_; unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -117,7 +117,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd PM_j(stored.data()); auto TR = (andnot(PM_j, D0) << 1) & PM_j_old; @@ -144,7 +144,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { diff --git a/rapidfuzz_reference/JaroWinkler.hpp b/rapidfuzz_reference/JaroWinkler.hpp index c83a18d1..94f04f9d 100644 --- a/rapidfuzz_reference/JaroWinkler.hpp +++ b/rapidfuzz_reference/JaroWinkler.hpp @@ -7,7 +7,7 @@ namespace rapidfuzz_reference { template ::value>> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_similarity(InputIt1 P_first, InputIt1 P_last, InputIt2 T_first, InputIt2 T_last, double prefix_weight = 0.1, double score_cutoff = 0.0) { diff --git a/test/common.hpp b/test/common.hpp index 071f939a..d3ec903b 100644 --- a/test/common.hpp +++ b/test/common.hpp @@ -49,7 +49,7 @@ class BidirectionalIterWrapper { return *this; } - const auto& operator*() const + const value_type& operator*() const { return *iter; } @@ -58,7 +58,7 @@ class BidirectionalIterWrapper { T iter; }; -template ::value>> +template ::value>> std::basic_string str_multiply(std::basic_string a, size_t b) { std::basic_string output; From 369e4d1d225a624e9841a6250c86db04dd2a5826 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 02:57:50 +0100 Subject: [PATCH 08/18] finish C++14 support --- CMakeLists.txt | 2 +- extras/rapidfuzz_amalgamated.hpp | 192 +++++++++++---------- fuzzing/CMakeLists.txt | 2 +- fuzzing/fuzz_levenshtein_editops.cpp | 4 +- rapidfuzz/details/Range.hpp | 19 +- rapidfuzz/details/distance.hpp | 108 ++++++------ rapidfuzz/details/type_traits.hpp | 2 +- rapidfuzz/distance/Hamming.hpp | 4 +- rapidfuzz/distance/Jaro.hpp | 11 +- rapidfuzz/distance/JaroWinkler.hpp | 4 +- rapidfuzz/distance/LCSseq.hpp | 12 +- rapidfuzz/distance/Levenshtein.hpp | 16 +- rapidfuzz/distance/OSA.hpp | 10 +- rapidfuzz/fuzz.hpp | 6 +- rapidfuzz/fuzz_impl.hpp | 30 ++-- rapidfuzz_reference/JaroWinkler.hpp | 2 +- test/common.hpp | 6 + test/distance/tests-DamerauLevenshtein.cpp | 22 ++- test/distance/tests-Hamming.cpp | 31 ++-- test/distance/tests-Indel.cpp | 30 ++-- test/distance/tests-Jaro.cpp | 12 +- test/distance/tests-JaroWinkler.cpp | 4 +- test/distance/tests-LCSseq.cpp | 30 ++-- test/distance/tests-Levenshtein.cpp | 51 +++--- test/distance/tests-OSA.cpp | 7 +- test/tests-common.cpp | 25 +-- 26 files changed, 335 insertions(+), 307 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c6caa79e..1a9f422c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ add_library(rapidfuzz INTERFACE) # provide a namespaced alias for clients to 'link' against if RapidFuzz is included as a sub-project add_library(rapidfuzz::rapidfuzz ALIAS rapidfuzz) -target_compile_features(rapidfuzz INTERFACE cxx_std_11) +target_compile_features(rapidfuzz INTERFACE cxx_std_14) target_include_directories(rapidfuzz INTERFACE diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 2f788060..740d9dbc 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 02:20:23.323268 +// Generated: 2024-12-25 02:45:23.298368 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -1141,11 +1141,8 @@ class Range { {} template - constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x)) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } + constexpr Range(T& x) : Range(to_begin(x), to_end(x)) + {} constexpr iterator begin() const noexcept { @@ -1235,8 +1232,18 @@ class Range { } }; +template +constexpr auto make_range(Iter first, Iter last) -> Range +{ + return Range(first, last); +} + template -Range(T& x) -> Range; +constexpr auto make_range(T& x) -> Range +{ + auto first = to_begin(x); + return Range(first, to_end(x)); +} template inline bool operator==(const Range& a, const Range& b) @@ -3125,15 +3132,15 @@ struct NormalizedMetricBase { static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_distance(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_distance(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, + return _normalized_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, score_hint); } @@ -3142,16 +3149,16 @@ struct NormalizedMetricBase { static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_similarity(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, - score_hint); + return _normalized_similarity(make_range(s1), make_range(s2), std::forward(args)..., + score_cutoff, score_hint); } protected: @@ -3193,7 +3200,7 @@ struct DistanceBase : public NormalizedMetricBase { static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return T::_distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3201,7 +3208,8 @@ struct DistanceBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template { static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _similarity(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3217,7 +3225,8 @@ struct DistanceBase : public NormalizedMetricBase { static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -3248,7 +3257,7 @@ struct SimilarityBase : public NormalizedMetricBase { static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3256,7 +3265,8 @@ struct SimilarityBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template { static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return T::_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -3315,27 +3326,27 @@ struct CachedNormalizedMetricBase { double normalized_distance(InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(first2, last2), score_cutoff, score_hint); + return _normalized_distance(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_distance(const Sentence2& s2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(s2), score_cutoff, score_hint); + return _normalized_distance(make_range(s2), score_cutoff, score_hint); } template double normalized_similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(first2, last2), score_cutoff, score_hint); + return _normalized_similarity(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(s2), score_cutoff, score_hint); + return _normalized_similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3376,7 +3387,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(first2, last2), score_cutoff, score_hint); + return derived._distance(make_range(first2, last2), score_cutoff, score_hint); } template @@ -3384,7 +3395,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(s2), score_cutoff, score_hint); + return derived._distance(make_range(s2), score_cutoff, score_hint); } template @@ -3392,14 +3403,14 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(first2, last2), score_cutoff, score_hint); + return _similarity(make_range(first2, last2), score_cutoff, score_hint); } template ResType similarity(const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(s2), score_cutoff, score_hint); + return _similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3430,14 +3441,14 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(first2, last2), score_cutoff, score_hint); + return _distance(make_range(first2, last2), score_cutoff, score_hint); } template ResType distance(const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(s2), score_cutoff, score_hint); + return _distance(make_range(s2), score_cutoff, score_hint); } template @@ -3446,7 +3457,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(first2, last2), score_cutoff, score_hint); + return derived._similarity(make_range(first2, last2), score_cutoff, score_hint); } template @@ -3454,7 +3465,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(s2), score_cutoff, score_hint); + return derived._similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3495,28 +3506,28 @@ struct MultiNormalizedMetricBase { void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_distance(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(s2), score_cutoff); + _normalized_distance(scores, score_count, make_range(s2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(s2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -3573,7 +3584,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(first2, last2), score_cutoff); + derived._distance(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -3581,21 +3592,21 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(s2), score_cutoff); + derived._distance(scores, score_count, make_range(s2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(first2, last2), score_cutoff); + _similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(s2), score_cutoff); + _similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -3624,14 +3635,14 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(first2, last2), score_cutoff); + _distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void distance(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(s2), score_cutoff); + _distance(scores, score_count, make_range(s2), score_cutoff); } template @@ -3639,7 +3650,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(first2, last2), score_cutoff); + derived._similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -3647,7 +3658,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(s2), score_cutoff); + derived._similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -4096,7 +4107,7 @@ template Editops hamming_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(first1, last1), detail::Range(first2, last2), pad_, + return detail::hamming_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), pad_, score_hint); } @@ -4104,7 +4115,7 @@ template Editops hamming_editops(const Sentence1& s1, const Sentence2& s2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(s1), detail::Range(s2), pad_, score_hint); + return detail::hamming_editops(detail::make_range(s1), detail::make_range(s2), pad_, score_hint); } /** @@ -5020,13 +5031,13 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d template Editops lcs_seq_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { - return detail::lcs_seq_editops(detail::Range(first1, last1), detail::Range(first2, last2)); + return detail::lcs_seq_editops(detail::make_range(first1, last1), detail::make_range(first2, last2)); } template Editops lcs_seq_editops(const Sentence1& s1, const Sentence2& s2) { - return detail::lcs_seq_editops(detail::Range(s1), detail::Range(s2)); + return detail::lcs_seq_editops(detail::make_range(s1), detail::make_range(s2)); } #ifdef RAPIDFUZZ_SIMD @@ -5054,7 +5065,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } constexpr static size_t find_block_count(size_t count) @@ -5119,7 +5130,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::lcs_simd(scores_, PM, s2, score_cutoff); else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) @@ -5157,7 +5168,7 @@ struct CachedLCSseq {} template - CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -5174,7 +5185,7 @@ struct CachedLCSseq size_t _similarity(const detail::Range& s2, size_t score_cutoff, [[maybe_unused]] size_t score_hint) const { - return detail::lcs_seq_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::lcs_seq_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; @@ -6440,7 +6451,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); detail::jaro_similarity_simd(scores_, PM, str_lens, str_lens_size, s2, score_cutoff); } @@ -6472,7 +6483,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub {} template - CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -6489,7 +6500,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub double _similarity(const detail::Range& s2, double score_cutoff, [[maybe_unused]] double score_hint) const { - return detail::jaro_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::jaro_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; @@ -6762,7 +6773,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) - : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::Range(first1, last1)) + : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -6779,7 +6790,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase& s2, double score_cutoff, [[maybe_unused]] double score_hint) const { - return detail::jaro_winkler_similarity(PM, detail::Range(s1), s2, prefix_weight, score_cutoff); + return detail::jaro_winkler_similarity(PM, detail::make_range(s1), s2, prefix_weight, score_cutoff); } double prefix_weight; @@ -8331,7 +8342,7 @@ template Editops levenshtein_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(first1, last1), detail::Range(first2, last2), + return detail::levenshtein_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), score_hint); } @@ -8339,7 +8350,7 @@ template Editops levenshtein_editops(const Sentence1& s1, const Sentence2& s2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(s1), detail::Range(s2), score_hint); + return detail::levenshtein_editops(detail::make_range(s1), detail::make_range(s2), score_hint); } #ifdef RAPIDFUZZ_SIMD @@ -8368,7 +8379,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } constexpr static size_t find_block_count(size_t count) @@ -8435,7 +8446,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) @@ -8476,7 +8487,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) - : s1(first1, last1), PM(detail::Range(first1, last1)), weights(aWeights) + : s1(first1, last1), PM(detail::make_range(first1, last1)), weights(aWeights) {} private: @@ -8502,7 +8513,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase= weights.insert_cost + weights.delete_cost) { // max can make use of the common divisor of the three weights size_t new_max = detail::ceil_div(score_cutoff, weights.insert_cost); - size_t dist = detail::indel_distance(PM, detail::Range(s1), s2, new_max); + size_t dist = detail::indel_distance(PM, detail::make_range(s1), s2, new_max); dist *= weights.insert_cost; return (dist <= score_cutoff) ? dist : score_cutoff + 1; } } - return detail::generalized_levenshtein_distance(detail::Range(s1), s2, weights, score_cutoff); + return detail::generalized_levenshtein_distance(detail::make_range(s1), s2, weights, score_cutoff); } std::vector s1; @@ -8939,7 +8950,7 @@ struct MultiOSA else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } constexpr static size_t find_block_count(size_t count) @@ -9003,7 +9014,7 @@ struct MultiOSA if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) @@ -9041,7 +9052,7 @@ struct CachedOSA {} template - CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -9064,9 +9075,9 @@ struct CachedOSA else if (s2.empty()) res = s1.size(); else if (s1.size() < 64) - res = detail::osa_hyrroe2003(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003(PM, detail::make_range(s1), s2, score_cutoff); else - res = detail::osa_hyrroe2003_block(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003_block(PM, detail::make_range(s1), s2, score_cutoff); return (res <= score_cutoff) ? res : score_cutoff + 1; } @@ -9624,7 +9635,7 @@ struct MultiRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template @@ -10294,13 +10305,13 @@ struct MultiQRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const { - rapidfuzz::detail::Range s2_(s2); + auto s2_ = detail::make_range(s2); if (s2_.empty()) { for (size_t i = 0; i < str_lens.size(); ++i) scores[i] = 0; @@ -10374,7 +10385,7 @@ namespace fuzz { template double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff) { - return ratio(detail::Range(first1, last1), detail::Range(first2, last2), score_cutoff); + return ratio(detail::make_range(first1, last1), detail::make_range(first2, last2), score_cutoff); } template @@ -10388,7 +10399,7 @@ template double CachedRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double score_hint) const { - return similarity(detail::Range(first2, last2), score_cutoff, score_hint); + return similarity(detail::make_range(first2, last2), score_cutoff, score_hint); } template @@ -10444,8 +10455,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(window.first); auto subseq2_first = s2.begin() + static_cast(window.second); - detail::Range subseq1(subseq1_first, subseq1_first + static_cast(len1)); - detail::Range subseq2(subseq2_first, subseq2_first + static_cast(len1)); + auto subseq1 = + detail::make_range(subseq1_first, subseq1_first + static_cast(len1)); + auto subseq2 = + detail::make_range(subseq2_first, subseq2_first + static_cast(len1)); if (scores[window.first] == std::numeric_limits::max()) { scores[window.first] = cached_ratio.cached_indel.distance(subseq1); @@ -10499,7 +10512,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i)); + auto subseq = rapidfuzz::detail::make_range(s2.begin(), s2.begin() + static_cast(i)); if (!s1_char_set.find(subseq.back())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -10512,7 +10525,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i), s2.end()); + auto subseq = rapidfuzz::detail::make_range(s2.begin() + static_cast(i), s2.end()); if (!s1_char_set.find(subseq.front())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -10561,8 +10574,8 @@ ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, if (!len1 || !len2) return ScoreAlignment(static_cast(len1 == len2) * 100.0, 0, len1, 0, len1); - auto s1 = detail::Range(first1, last1); - auto s2 = detail::Range(first2, last2); + auto s1 = detail::make_range(first1, last1); + auto s2 = detail::make_range(first2, last2); auto alignment = fuzz_detail::partial_ratio_impl(s1, s2, score_cutoff); if (alignment.score != 100 && s1.size() == s2.size()) { @@ -10621,8 +10634,8 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d if (!len1 || !len2) return static_cast(len1 == len2) * 100.0; - auto s1_ = detail::Range(s1); - auto s2 = detail::Range(first2, last2); + auto s1_ = detail::make_range(s1); + auto s2 = detail::make_range(first2, last2); double score = fuzz_detail::partial_ratio_impl(s1_, s2, cached_ratio, s1_char_set, score_cutoff).score; if (score != 100 && s1_.size() == s2.size()) { @@ -11003,8 +11016,9 @@ double token_ratio(const std::vector& s1_sorted, double result = 0; auto s2_sorted = tokens_b.join(); if (s1_sorted.size() < 65) { - double norm_sim = detail::indel_normalized_similarity(blockmap_s1_sorted, detail::Range(s1_sorted), - detail::Range(s2_sorted), score_cutoff / 100); + double norm_sim = + detail::indel_normalized_similarity(blockmap_s1_sorted, detail::make_range(s1_sorted), + detail::make_range(s2_sorted), score_cutoff / 100); result = norm_sim * 100; } else { @@ -11191,7 +11205,7 @@ CachedWRatio::CachedWRatio(InputIt1 first1, InputIt1 last1) cached_partial_ratio(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))), s1_sorted(tokens_s1.join()), - blockmap_s1_sorted(detail::Range(s1_sorted)) + blockmap_s1_sorted(detail::make_range(s1_sorted)) {} template diff --git a/fuzzing/CMakeLists.txt b/fuzzing/CMakeLists.txt index 2a71733d..4d7a3d38 100644 --- a/fuzzing/CMakeLists.txt +++ b/fuzzing/CMakeLists.txt @@ -1,6 +1,6 @@ function(create_fuzzer fuzzer) add_executable(fuzz_${fuzzer} fuzz_${fuzzer}.cpp) - target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_11) + target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_14) target_link_libraries(fuzz_${fuzzer} PRIVATE rapidfuzz::rapidfuzz) target_compile_options(fuzz_${fuzzer} PRIVATE -g -O1 -fsanitize=fuzzer,address -march=native) diff --git a/fuzzing/fuzz_levenshtein_editops.cpp b/fuzzing/fuzz_levenshtein_editops.cpp index fe09cb5e..7a540eae 100644 --- a/fuzzing/fuzz_levenshtein_editops.cpp +++ b/fuzzing/fuzz_levenshtein_editops.cpp @@ -30,8 +30,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) validate_editops(s1, s2, score, score); if (s1.size() > 1 && s2.size() > 1) { - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); if (hpos.left_score + hpos.right_score != score) throw std::logic_error("find_hirschberg_pos failed"); } diff --git a/rapidfuzz/details/Range.hpp b/rapidfuzz/details/Range.hpp index a2149347..31b09f25 100644 --- a/rapidfuzz/details/Range.hpp +++ b/rapidfuzz/details/Range.hpp @@ -82,11 +82,8 @@ class Range { {} template - constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x)) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } + constexpr Range(T& x) : Range(to_begin(x), to_end(x)) + {} constexpr iterator begin() const noexcept { @@ -176,8 +173,18 @@ class Range { } }; +template +constexpr auto make_range(Iter first, Iter last) -> Range +{ + return Range(first, last); +} + template -Range(T& x) -> Range; +constexpr auto make_range(T& x) -> Range +{ + auto first = to_begin(x); + return Range(first, to_end(x)); +} template inline bool operator==(const Range& a, const Range& b) diff --git a/rapidfuzz/details/distance.hpp b/rapidfuzz/details/distance.hpp index 7ddec0f9..fb87057d 100644 --- a/rapidfuzz/details/distance.hpp +++ b/rapidfuzz/details/distance.hpp @@ -19,15 +19,15 @@ struct NormalizedMetricBase { static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_distance(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_distance(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, + return _normalized_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, score_hint); } @@ -36,16 +36,16 @@ struct NormalizedMetricBase { static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_similarity(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, - score_hint); + return _normalized_similarity(make_range(s1), make_range(s2), std::forward(args)..., + score_cutoff, score_hint); } protected: @@ -87,7 +87,7 @@ struct DistanceBase : public NormalizedMetricBase { static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return T::_distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -95,7 +95,8 @@ struct DistanceBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template { static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _similarity(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -111,7 +112,8 @@ struct DistanceBase : public NormalizedMetricBase { static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -142,7 +144,7 @@ struct SimilarityBase : public NormalizedMetricBase { static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -150,7 +152,8 @@ struct SimilarityBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template { static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return T::_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -185,15 +189,15 @@ struct SimilarityBase : public NormalizedMetricBase { } template - static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -209,27 +213,27 @@ struct CachedNormalizedMetricBase { double normalized_distance(InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(first2, last2), score_cutoff, score_hint); + return _normalized_distance(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_distance(const Sentence2& s2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(s2), score_cutoff, score_hint); + return _normalized_distance(make_range(s2), score_cutoff, score_hint); } template double normalized_similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(first2, last2), score_cutoff, score_hint); + return _normalized_similarity(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(s2), score_cutoff, score_hint); + return _normalized_similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -270,7 +274,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(first2, last2), score_cutoff, score_hint); + return derived._distance(make_range(first2, last2), score_cutoff, score_hint); } template @@ -278,7 +282,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(s2), score_cutoff, score_hint); + return derived._distance(make_range(s2), score_cutoff, score_hint); } template @@ -286,14 +290,14 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(first2, last2), score_cutoff, score_hint); + return _similarity(make_range(first2, last2), score_cutoff, score_hint); } template ResType similarity(const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(s2), score_cutoff, score_hint); + return _similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -324,14 +328,14 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(first2, last2), score_cutoff, score_hint); + return _distance(make_range(first2, last2), score_cutoff, score_hint); } template ResType distance(const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(s2), score_cutoff, score_hint); + return _distance(make_range(s2), score_cutoff, score_hint); } template @@ -340,7 +344,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(first2, last2), score_cutoff, score_hint); + return derived._similarity(make_range(first2, last2), score_cutoff, score_hint); } template @@ -348,7 +352,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(s2), score_cutoff, score_hint); + return derived._similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -365,15 +369,15 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { } template - static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } @@ -389,28 +393,28 @@ struct MultiNormalizedMetricBase { void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_distance(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(s2), score_cutoff); + _normalized_distance(scores, score_count, make_range(s2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(s2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -467,7 +471,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(first2, last2), score_cutoff); + derived._distance(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -475,21 +479,21 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(s2), score_cutoff); + derived._distance(scores, score_count, make_range(s2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(first2, last2), score_cutoff); + _similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(s2), score_cutoff); + _similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -518,14 +522,14 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(first2, last2), score_cutoff); + _distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void distance(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(s2), score_cutoff); + _distance(scores, score_count, make_range(s2), score_cutoff); } template @@ -533,7 +537,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(first2, last2), score_cutoff); + derived._similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -541,7 +545,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(s2), score_cutoff); + derived._similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -559,15 +563,15 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { } template - static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : 1.0; } template - static rapidfuzz::rf_enable_if_t::value, U> _apply_distance_score_cutoff(U score, - U score_cutoff) + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) { return (score <= score_cutoff) ? score : score_cutoff + 1; } diff --git a/rapidfuzz/details/type_traits.hpp b/rapidfuzz/details/type_traits.hpp index 65fdb89e..2f05d2b4 100644 --- a/rapidfuzz/details/type_traits.hpp +++ b/rapidfuzz/details/type_traits.hpp @@ -49,7 +49,7 @@ struct is_explicitly_convertible { static bool const value = test(0); }; -template +template using rf_enable_if_t = typename std::enable_if::type; } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Hamming.hpp b/rapidfuzz/distance/Hamming.hpp index b8cb033c..669e7e70 100644 --- a/rapidfuzz/distance/Hamming.hpp +++ b/rapidfuzz/distance/Hamming.hpp @@ -78,7 +78,7 @@ template Editops hamming_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(first1, last1), detail::Range(first2, last2), pad_, + return detail::hamming_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), pad_, score_hint); } @@ -86,7 +86,7 @@ template Editops hamming_editops(const Sentence1& s1, const Sentence2& s2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(s1), detail::Range(s2), pad_, score_hint); + return detail::hamming_editops(detail::make_range(s1), detail::make_range(s2), pad_, score_hint); } /** diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index 3b0a96ce..954e75b4 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -70,12 +70,13 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, friend detail::MultiSimilarityBase, double, 0, 1>; friend detail::MultiNormalizedMetricBase, double>; - static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64); + static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64, "incorrect MaxLen used"); using VecType = typename std::conditional< MaxLen == 8, uint8_t, typename std::conditional::type>::type>::type; + typename std::conditional::type>::type>:: + type; constexpr static size_t get_vec_size() { @@ -166,7 +167,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); detail::jaro_similarity_simd(scores_, PM, str_lens, str_lens_size, s2, score_cutoff); } @@ -198,7 +199,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub {} template - CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -215,7 +216,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub double _similarity(const detail::Range& s2, double score_cutoff, [[maybe_unused]] double score_hint) const { - return detail::jaro_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::jaro_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; diff --git a/rapidfuzz/distance/JaroWinkler.hpp b/rapidfuzz/distance/JaroWinkler.hpp index d862f60d..4f44e75c 100644 --- a/rapidfuzz/distance/JaroWinkler.hpp +++ b/rapidfuzz/distance/JaroWinkler.hpp @@ -174,7 +174,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) - : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::Range(first1, last1)) + : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -191,7 +191,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase& s2, double score_cutoff, [[maybe_unused]] double score_hint) const { - return detail::jaro_winkler_similarity(PM, detail::Range(s1), s2, prefix_weight, score_cutoff); + return detail::jaro_winkler_similarity(PM, detail::make_range(s1), s2, prefix_weight, score_cutoff); } double prefix_weight; diff --git a/rapidfuzz/distance/LCSseq.hpp b/rapidfuzz/distance/LCSseq.hpp index be82e192..601528f6 100644 --- a/rapidfuzz/distance/LCSseq.hpp +++ b/rapidfuzz/distance/LCSseq.hpp @@ -65,13 +65,13 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d template Editops lcs_seq_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { - return detail::lcs_seq_editops(detail::Range(first1, last1), detail::Range(first2, last2)); + return detail::lcs_seq_editops(detail::make_range(first1, last1), detail::make_range(first2, last2)); } template Editops lcs_seq_editops(const Sentence1& s1, const Sentence2& s2) { - return detail::lcs_seq_editops(detail::Range(s1), detail::Range(s2)); + return detail::lcs_seq_editops(detail::make_range(s1), detail::make_range(s2)); } #ifdef RAPIDFUZZ_SIMD @@ -99,7 +99,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } constexpr static size_t find_block_count(size_t count) @@ -164,7 +164,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::lcs_simd(scores_, PM, s2, score_cutoff); else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) @@ -202,7 +202,7 @@ struct CachedLCSseq {} template - CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -219,7 +219,7 @@ struct CachedLCSseq size_t _similarity(const detail::Range& s2, size_t score_cutoff, [[maybe_unused]] size_t score_hint) const { - return detail::lcs_seq_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::lcs_seq_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; diff --git a/rapidfuzz/distance/Levenshtein.hpp b/rapidfuzz/distance/Levenshtein.hpp index d01a6fdf..62504a1e 100644 --- a/rapidfuzz/distance/Levenshtein.hpp +++ b/rapidfuzz/distance/Levenshtein.hpp @@ -283,7 +283,7 @@ template Editops levenshtein_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(first1, last1), detail::Range(first2, last2), + return detail::levenshtein_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), score_hint); } @@ -291,7 +291,7 @@ template Editops levenshtein_editops(const Sentence1& s1, const Sentence2& s2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(s1), detail::Range(s2), score_hint); + return detail::levenshtein_editops(detail::make_range(s1), detail::make_range(s2), score_hint); } #ifdef RAPIDFUZZ_SIMD @@ -320,7 +320,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } constexpr static size_t find_block_count(size_t count) @@ -387,7 +387,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) @@ -428,7 +428,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) - : s1(first1, last1), PM(detail::Range(first1, last1)), weights(aWeights) + : s1(first1, last1), PM(detail::make_range(first1, last1)), weights(aWeights) {} private: @@ -454,7 +454,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase= weights.insert_cost + weights.delete_cost) { // max can make use of the common divisor of the three weights size_t new_max = detail::ceil_div(score_cutoff, weights.insert_cost); - size_t dist = detail::indel_distance(PM, detail::Range(s1), s2, new_max); + size_t dist = detail::indel_distance(PM, detail::make_range(s1), s2, new_max); dist *= weights.insert_cost; return (dist <= score_cutoff) ? dist : score_cutoff + 1; } } - return detail::generalized_levenshtein_distance(detail::Range(s1), s2, weights, score_cutoff); + return detail::generalized_levenshtein_distance(detail::make_range(s1), s2, weights, score_cutoff); } std::vector s1; diff --git a/rapidfuzz/distance/OSA.hpp b/rapidfuzz/distance/OSA.hpp index c8395076..e3df742f 100644 --- a/rapidfuzz/distance/OSA.hpp +++ b/rapidfuzz/distance/OSA.hpp @@ -135,7 +135,7 @@ struct MultiOSA else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } constexpr static size_t find_block_count(size_t count) @@ -199,7 +199,7 @@ struct MultiOSA if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) @@ -237,7 +237,7 @@ struct CachedOSA {} template - CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -260,9 +260,9 @@ struct CachedOSA else if (s2.empty()) res = s1.size(); else if (s1.size() < 64) - res = detail::osa_hyrroe2003(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003(PM, detail::make_range(s1), s2, score_cutoff); else - res = detail::osa_hyrroe2003_block(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003_block(PM, detail::make_range(s1), s2, score_cutoff); return (res <= score_cutoff) ? res : score_cutoff + 1; } diff --git a/rapidfuzz/fuzz.hpp b/rapidfuzz/fuzz.hpp index ad0db090..de49be9f 100644 --- a/rapidfuzz/fuzz.hpp +++ b/rapidfuzz/fuzz.hpp @@ -76,7 +76,7 @@ struct MultiRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template @@ -746,13 +746,13 @@ struct MultiQRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const { - rapidfuzz::detail::Range s2_(s2); + auto s2_ = detail::make_range(s2); if (s2_.empty()) { for (size_t i = 0; i < str_lens.size(); ++i) scores[i] = 0; diff --git a/rapidfuzz/fuzz_impl.hpp b/rapidfuzz/fuzz_impl.hpp index 4ec30005..7fdf4b62 100644 --- a/rapidfuzz/fuzz_impl.hpp +++ b/rapidfuzz/fuzz_impl.hpp @@ -2,6 +2,7 @@ /* Copyright © 2021-present Max Bachmann */ /* Copyright © 2011 Adam Cohen */ +#include "rapidfuzz/details/Range.hpp" #include #include @@ -21,7 +22,7 @@ namespace fuzz { template double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff) { - return ratio(detail::Range(first1, last1), detail::Range(first2, last2), score_cutoff); + return ratio(detail::make_range(first1, last1), detail::make_range(first2, last2), score_cutoff); } template @@ -35,7 +36,7 @@ template double CachedRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double score_hint) const { - return similarity(detail::Range(first2, last2), score_cutoff, score_hint); + return similarity(detail::make_range(first2, last2), score_cutoff, score_hint); } template @@ -91,8 +92,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(window.first); auto subseq2_first = s2.begin() + static_cast(window.second); - detail::Range subseq1(subseq1_first, subseq1_first + static_cast(len1)); - detail::Range subseq2(subseq2_first, subseq2_first + static_cast(len1)); + auto subseq1 = + detail::make_range(subseq1_first, subseq1_first + static_cast(len1)); + auto subseq2 = + detail::make_range(subseq2_first, subseq2_first + static_cast(len1)); if (scores[window.first] == std::numeric_limits::max()) { scores[window.first] = cached_ratio.cached_indel.distance(subseq1); @@ -146,7 +149,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i)); + auto subseq = rapidfuzz::detail::make_range(s2.begin(), s2.begin() + static_cast(i)); if (!s1_char_set.find(subseq.back())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -159,7 +162,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i), s2.end()); + auto subseq = rapidfuzz::detail::make_range(s2.begin() + static_cast(i), s2.end()); if (!s1_char_set.find(subseq.front())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -208,8 +211,8 @@ ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, if (!len1 || !len2) return ScoreAlignment(static_cast(len1 == len2) * 100.0, 0, len1, 0, len1); - auto s1 = detail::Range(first1, last1); - auto s2 = detail::Range(first2, last2); + auto s1 = detail::make_range(first1, last1); + auto s2 = detail::make_range(first2, last2); auto alignment = fuzz_detail::partial_ratio_impl(s1, s2, score_cutoff); if (alignment.score != 100 && s1.size() == s2.size()) { @@ -268,8 +271,8 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d if (!len1 || !len2) return static_cast(len1 == len2) * 100.0; - auto s1_ = detail::Range(s1); - auto s2 = detail::Range(first2, last2); + auto s1_ = detail::make_range(s1); + auto s2 = detail::make_range(first2, last2); double score = fuzz_detail::partial_ratio_impl(s1_, s2, cached_ratio, s1_char_set, score_cutoff).score; if (score != 100 && s1_.size() == s2.size()) { @@ -650,8 +653,9 @@ double token_ratio(const std::vector& s1_sorted, double result = 0; auto s2_sorted = tokens_b.join(); if (s1_sorted.size() < 65) { - double norm_sim = detail::indel_normalized_similarity(blockmap_s1_sorted, detail::Range(s1_sorted), - detail::Range(s2_sorted), score_cutoff / 100); + double norm_sim = + detail::indel_normalized_similarity(blockmap_s1_sorted, detail::make_range(s1_sorted), + detail::make_range(s2_sorted), score_cutoff / 100); result = norm_sim * 100; } else { @@ -838,7 +842,7 @@ CachedWRatio::CachedWRatio(InputIt1 first1, InputIt1 last1) cached_partial_ratio(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))), s1_sorted(tokens_s1.join()), - blockmap_s1_sorted(detail::Range(s1_sorted)) + blockmap_s1_sorted(detail::make_range(s1_sorted)) {} template diff --git a/rapidfuzz_reference/JaroWinkler.hpp b/rapidfuzz_reference/JaroWinkler.hpp index 94f04f9d..b2447ebb 100644 --- a/rapidfuzz_reference/JaroWinkler.hpp +++ b/rapidfuzz_reference/JaroWinkler.hpp @@ -7,7 +7,7 @@ namespace rapidfuzz_reference { template ::value>> + typename = typename std::enable_if::value>::type> double jaro_winkler_similarity(InputIt1 P_first, InputIt1 P_last, InputIt2 T_first, InputIt2 T_last, double prefix_weight = 0.1, double score_cutoff = 0.0) { diff --git a/test/common.hpp b/test/common.hpp index d3ec903b..3e4d728c 100644 --- a/test/common.hpp +++ b/test/common.hpp @@ -58,6 +58,12 @@ class BidirectionalIterWrapper { T iter; }; +template +constexpr auto make_bidir(Iter iter) -> BidirectionalIterWrapper +{ + return BidirectionalIterWrapper(iter); +} + template ::value>> std::basic_string str_multiply(std::basic_string a, size_t b) { diff --git a/test/distance/tests-DamerauLevenshtein.cpp b/test/distance/tests-DamerauLevenshtein.cpp index 7a3b9648..98e61036 100644 --- a/test/distance/tests-DamerauLevenshtein.cpp +++ b/test/distance/tests-DamerauLevenshtein.cpp @@ -17,9 +17,8 @@ size_t damerau_levenshtein_distance(const Sentence1& s1, const Sentence2& s2, size_t res2 = rapidfuzz::experimental::damerau_levenshtein_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); size_t res3 = rapidfuzz::experimental::damerau_levenshtein_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -36,9 +35,8 @@ size_t damerau_levenshtein_similarity(const Sentence1& s1, const Sentence2& s2, size_t res2 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); size_t res3 = rapidfuzz::experimental::damerau_levenshtein_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -56,9 +54,9 @@ double damerau_levenshtein_normalized_distance(const Sentence1& s1, const Senten double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance( s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), + score_cutoff); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); @@ -76,9 +74,9 @@ double damerau_levenshtein_normalized_similarity(const Sentence1& s1, const Sent double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity( s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), + score_cutoff); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); diff --git a/test/distance/tests-Hamming.cpp b/test/distance/tests-Hamming.cpp index 45feeb4e..cb3b7195 100644 --- a/test/distance/tests-Hamming.cpp +++ b/test/distance/tests-Hamming.cpp @@ -4,6 +4,7 @@ #include #include "../common.hpp" +#include "rapidfuzz/details/type_traits.hpp" using Catch::Matchers::WithinAbs; @@ -13,10 +14,9 @@ size_t hamming_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::hamming_distance(s1, s2, max); size_t res2 = rapidfuzz::hamming_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::hamming_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedHamming scorer(s1); + size_t res3 = rapidfuzz::hamming_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedHamming> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -31,10 +31,9 @@ size_t hamming_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = { size_t res1 = rapidfuzz::hamming_similarity(s1, s2, max); size_t res2 = rapidfuzz::hamming_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::hamming_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedHamming scorer(s1); + size_t res3 = rapidfuzz::hamming_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedHamming> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -50,10 +49,10 @@ double hamming_normalized_distance(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::hamming_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::hamming_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::hamming_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedHamming scorer(s1); + double res3 = + rapidfuzz::hamming_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedHamming> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); @@ -69,10 +68,10 @@ double hamming_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d double res1 = rapidfuzz::hamming_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::hamming_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::hamming_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedHamming scorer(s1); + double res3 = + rapidfuzz::hamming_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedHamming> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); diff --git a/test/distance/tests-Indel.cpp b/test/distance/tests-Indel.cpp index 4d787a4e..d4204657 100644 --- a/test/distance/tests-Indel.cpp +++ b/test/distance/tests-Indel.cpp @@ -14,10 +14,9 @@ size_t indel_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::indel_distance(s1, s2, max); size_t res2 = rapidfuzz::indel_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::indel_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedIndel scorer(s1); + size_t res3 = rapidfuzz::indel_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedIndel> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -62,10 +61,9 @@ size_t indel_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = 0 { size_t res1 = rapidfuzz::indel_similarity(s1, s2, max); size_t res2 = rapidfuzz::indel_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::indel_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedIndel scorer(s1); + size_t res3 = rapidfuzz::indel_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedIndel> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -109,10 +107,10 @@ double indel_normalized_distance(const Sentence1& s1, const Sentence2& s2, doubl double res1 = rapidfuzz::indel_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::indel_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::indel_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedIndel scorer(s1); + double res3 = + rapidfuzz::indel_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedIndel> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); #ifdef RAPIDFUZZ_SIMD @@ -156,10 +154,10 @@ double indel_normalized_similarity(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::indel_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::indel_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::indel_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedIndel scorer(s1); + double res3 = + rapidfuzz::indel_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedIndel> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); #ifdef RAPIDFUZZ_SIMD diff --git a/test/distance/tests-Jaro.cpp b/test/distance/tests-Jaro.cpp index fb3d0823..1bfb7341 100644 --- a/test/distance/tests-Jaro.cpp +++ b/test/distance/tests-Jaro.cpp @@ -16,10 +16,10 @@ double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cu rapidfuzz::jaro_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); #if 0 // todo double res5 = rapidfuzz::jaro_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); + make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); #endif - rapidfuzz::CachedJaro scorer(s1); + rapidfuzz::CachedJaro> scorer(s1); double res6 = scorer.similarity(s2, score_cutoff); double res7 = scorer.similarity(s2.begin(), s2.end(), score_cutoff); double res8 = scorer.normalized_similarity(s2, score_cutoff); @@ -86,10 +86,10 @@ double jaro_distance(const Sentence1& s1, const Sentence2& s2, double score_cuto rapidfuzz::jaro_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); #if 0 // todo double res5 = rapidfuzz::jaro_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); + make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); #endif - rapidfuzz::CachedJaro scorer(s1); + rapidfuzz::CachedJaro> scorer(s1); double res6 = scorer.distance(s2, score_cutoff); double res7 = scorer.distance(s2.begin(), s2.end(), score_cutoff); double res8 = scorer.normalized_distance(s2, score_cutoff); diff --git a/test/distance/tests-JaroWinkler.cpp b/test/distance/tests-JaroWinkler.cpp index bccf3915..f7f5fcdd 100644 --- a/test/distance/tests-JaroWinkler.cpp +++ b/test/distance/tests-JaroWinkler.cpp @@ -16,7 +16,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double double res3 = rapidfuzz::jaro_winkler_normalized_similarity(s1, s2, prefix_weight, score_cutoff); double res4 = rapidfuzz::jaro_winkler_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), prefix_weight, score_cutoff); - rapidfuzz::CachedJaroWinkler scorer(s1, prefix_weight); + rapidfuzz::CachedJaroWinkler> scorer(s1, prefix_weight); double res5 = scorer.similarity(s2, score_cutoff); double res6 = scorer.similarity(s2.begin(), s2.end(), score_cutoff); double res7 = scorer.normalized_similarity(s2, score_cutoff); @@ -72,7 +72,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr double res3 = rapidfuzz::jaro_winkler_normalized_distance(s1, s2, prefix_weight, score_cutoff); double res4 = rapidfuzz::jaro_winkler_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), prefix_weight, score_cutoff); - rapidfuzz::CachedJaroWinkler scorer(s1, prefix_weight); + rapidfuzz::CachedJaroWinkler> scorer(s1, prefix_weight); double res5 = scorer.distance(s2, score_cutoff); double res6 = scorer.distance(s2.begin(), s2.end(), score_cutoff); double res7 = scorer.normalized_distance(s2, score_cutoff); diff --git a/test/distance/tests-LCSseq.cpp b/test/distance/tests-LCSseq.cpp index f8cdeab1..9510bd4b 100644 --- a/test/distance/tests-LCSseq.cpp +++ b/test/distance/tests-LCSseq.cpp @@ -14,10 +14,9 @@ size_t lcs_seq_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::lcs_seq_distance(s1, s2, max); size_t res2 = rapidfuzz::lcs_seq_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::lcs_seq_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedLCSseq scorer(s1); + size_t res3 = rapidfuzz::lcs_seq_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedLCSseq> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -62,10 +61,9 @@ size_t lcs_seq_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = { size_t res1 = rapidfuzz::lcs_seq_similarity(s1, s2, max); size_t res2 = rapidfuzz::lcs_seq_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::lcs_seq_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedLCSseq scorer(s1); + size_t res3 = rapidfuzz::lcs_seq_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedLCSseq> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -109,10 +107,10 @@ double lcs_seq_normalized_distance(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::lcs_seq_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::lcs_seq_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::lcs_seq_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedLCSseq scorer(s1); + double res3 = + rapidfuzz::lcs_seq_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedLCSseq> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); @@ -128,10 +126,10 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d double res1 = rapidfuzz::lcs_seq_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::lcs_seq_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::lcs_seq_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedLCSseq scorer(s1); + double res3 = + rapidfuzz::lcs_seq_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedLCSseq> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); diff --git a/test/distance/tests-Levenshtein.cpp b/test/distance/tests-Levenshtein.cpp index 69841ce3..d5037787 100644 --- a/test/distance/tests-Levenshtein.cpp +++ b/test/distance/tests-Levenshtein.cpp @@ -19,10 +19,9 @@ size_t levenshtein_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::levenshtein_distance(s1, s2, weights, max); size_t res2 = rapidfuzz::levenshtein_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), weights, max); - size_t res3 = rapidfuzz::levenshtein_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), weights, max); - rapidfuzz::CachedLevenshtein scorer(s1, weights); + size_t res3 = rapidfuzz::levenshtein_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), weights, max); + rapidfuzz::CachedLevenshtein> scorer(s1, weights); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -77,10 +76,10 @@ double levenshtein_normalized_similarity(const Sentence1& s1, const Sentence2& s double res1 = rapidfuzz::levenshtein_normalized_similarity(s1, s2, weights, score_cutoff); double res2 = rapidfuzz::levenshtein_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), weights, score_cutoff); - double res3 = rapidfuzz::levenshtein_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), weights, score_cutoff); - rapidfuzz::CachedLevenshtein scorer(s1, weights); + double res3 = rapidfuzz::levenshtein_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), + weights, score_cutoff); + rapidfuzz::CachedLevenshtein> scorer(s1, weights); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); @@ -244,8 +243,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = str_multiply(std::string("abb"), 2); std::string s2 = str_multiply(std::string("ccccca"), 2); - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 5); REQUIRE(hpos.right_score == 6); REQUIRE(hpos.s2_mid == 6); @@ -256,8 +255,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = str_multiply(std::string("abb"), 8 * 64); std::string s2 = str_multiply(std::string("ccccca"), 8 * 64); - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 1280); REQUIRE(hpos.right_score == 1281); REQUIRE(hpos.s2_mid == 1536); @@ -268,8 +267,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = "aaaa"; std::string s2 = "bbbbbbaaaa"; - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 5); REQUIRE(hpos.right_score == 1); REQUIRE(hpos.s2_mid == 5); @@ -350,12 +349,12 @@ TEST_CASE("Levenshtein small band") "LOTJKTie3OINeOTeJKWeOSeCGOdccNKLYemunmeJKWk"; rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(s2 == rapidfuzz::editops_apply_str(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } @@ -398,12 +397,12 @@ TEST_CASE("Levenshtein small band") "HXUJGDGOhccZ"; rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(s2 == rapidfuzz::editops_apply_str(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } } @@ -444,12 +443,12 @@ TEST_CASE("Levenshtein large band (ocr example)") std::vector s2 = get_subsequence(ocr_example2, 51, 6516); rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(s2 == rapidfuzz::editops_apply_vec(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } diff --git a/test/distance/tests-OSA.cpp b/test/distance/tests-OSA.cpp index a5f7147b..22c48a3c 100644 --- a/test/distance/tests-OSA.cpp +++ b/test/distance/tests-OSA.cpp @@ -10,10 +10,9 @@ size_t osa_distance(const Sentence1& s1, const Sentence2& s2, size_t max = std:: { size_t res1 = rapidfuzz::osa_distance(s1, s2, max); size_t res2 = rapidfuzz::osa_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::osa_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedOSA scorer(s1); + size_t res3 = rapidfuzz::osa_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedOSA> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD diff --git a/test/tests-common.cpp b/test/tests-common.cpp index 0e66fe94..9882bc98 100644 --- a/test/tests-common.cpp +++ b/test/tests-common.cpp @@ -1,3 +1,4 @@ +#include "rapidfuzz/details/Range.hpp" #include #include @@ -8,28 +9,28 @@ TEST_CASE("remove affix") std::string s2 = "aaabbbbaaaaa"; { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); REQUIRE(rapidfuzz::detail::remove_common_prefix(s1_, s2_) == 2); - REQUIRE(s1_ == rapidfuzz::detail::Range("bbbbaaaa")); - REQUIRE(s2_ == rapidfuzz::detail::Range("abbbbaaaaa")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("bbbbaaaa")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("abbbbaaaaa")); } { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); REQUIRE(rapidfuzz::detail::remove_common_suffix(s1_, s2_) == 4); - REQUIRE(s1_ == rapidfuzz::detail::Range("aabbbb")); - REQUIRE(s2_ == rapidfuzz::detail::Range("aaabbbba")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("aabbbb")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("aaabbbba")); } { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); auto affix = rapidfuzz::detail::remove_common_affix(s1_, s2_); REQUIRE(affix.prefix_len == 2); REQUIRE(affix.suffix_len == 4); - REQUIRE(s1_ == rapidfuzz::detail::Range("bbbb")); - REQUIRE(s2_ == rapidfuzz::detail::Range("abbbba")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("bbbb")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("abbbba")); } } From 84116827b26ba61a7c2b3d76fb05cd5f53ef1fd0 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 03:51:00 +0100 Subject: [PATCH 09/18] add C++11 support --- CMakeLists.txt | 2 +- extras/rapidfuzz_amalgamated.hpp | 108 ++++++++++++++++++++---------- fuzzing/CMakeLists.txt | 2 +- rapidfuzz/details/Matrix.hpp | 4 +- rapidfuzz/details/Range.hpp | 63 ++++++++++------- rapidfuzz/details/common_impl.hpp | 13 +++- rapidfuzz/details/config.hpp | 6 ++ rapidfuzz/details/intrinsics.hpp | 7 +- rapidfuzz/distance/Jaro_impl.hpp | 10 +++ rapidfuzz/fuzz_impl.hpp | 2 +- 10 files changed, 145 insertions(+), 72 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a9f422c..c6caa79e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ add_library(rapidfuzz INTERFACE) # provide a namespaced alias for clients to 'link' against if RapidFuzz is included as a sub-project add_library(rapidfuzz::rapidfuzz ALIAS rapidfuzz) -target_compile_features(rapidfuzz INTERFACE cxx_std_14) +target_compile_features(rapidfuzz INTERFACE cxx_std_11) target_include_directories(rapidfuzz INTERFACE diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 740d9dbc..09434c35 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 02:45:23.298368 +// Generated: 2024-12-25 03:52:00.425895 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -392,12 +392,12 @@ struct ShiftedBitMatrix { return bool(m_matrix[row][col_word] & col_mask); } - auto operator[](size_t row) noexcept + BitMatrixView operator[](size_t row) noexcept { return m_matrix[row]; } - auto operator[](size_t row) const noexcept + BitMatrixView operator[](size_t row) const noexcept { return m_matrix[row]; } @@ -436,6 +436,12 @@ struct ShiftedBitMatrix { # define RAPIDFUZZ_IF_CONSTEXPR if #endif +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) || __cplusplus >= 201402L) +# define RAPIDFUZZ_CONSTEXPR_CXX14 constexpr +#else +# define RAPIDFUZZ_CONSTEXPR_CXX14 +#endif + #include #include #include @@ -1088,6 +1094,9 @@ static inline void assume(bool b) #endif } +namespace to_begin_detail { +using std::begin; + template CharT* to_begin(CharT* s) { @@ -1095,11 +1104,17 @@ CharT* to_begin(CharT* s) } template -auto to_begin(T& x) +auto to_begin(T& x) -> decltype(begin(x)) { - using std::begin; + return begin(x); } +} // namespace to_begin_detail + +using to_begin_detail::to_begin; + +namespace to_end_detail { +using std::end; template CharT* to_end(CharT* s) @@ -1112,11 +1127,13 @@ CharT* to_end(CharT* s) } template -auto to_end(T& x) +auto to_end(T& x) -> decltype(end(x)) { - using std::end; return end(x); } +} // namespace to_end_detail + +using to_end_detail::to_end; template class Range { @@ -1131,47 +1148,47 @@ class Range { using iterator = Iter; using reverse_iterator = std::reverse_iterator; - constexpr Range(Iter first, Iter last) : _first(first), _last(last) + Range(Iter first, Iter last) : _first(first), _last(last) { assert(std::distance(_first, _last) >= 0); _size = static_cast(std::distance(_first, _last)); } - constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) + Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) {} template - constexpr Range(T& x) : Range(to_begin(x), to_end(x)) + Range(T& x) : Range(to_begin(x), to_end(x)) {} - constexpr iterator begin() const noexcept + iterator begin() const noexcept { return _first; } - constexpr iterator end() const noexcept + iterator end() const noexcept { return _last; } - constexpr reverse_iterator rbegin() const noexcept + reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } - constexpr reverse_iterator rend() const noexcept + reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } - constexpr size_t size() const + size_t size() const { return _size; } - constexpr bool empty() const + bool empty() const { return size() == 0; } - explicit constexpr operator bool() const + explicit operator bool() const { return !empty(); } @@ -1180,23 +1197,24 @@ class Range { typename = rapidfuzz::rf_enable_if_t< std::is_base_of::iterator_category>::value>> - constexpr decltype(auto) operator[](size_t n) const + auto operator[](size_t n) const -> decltype(*_first) { return _first[static_cast(n)]; } - constexpr void remove_prefix(size_t n) + void remove_prefix(size_t n) { std::advance(_first, static_cast(n)); _size -= n; } - constexpr void remove_suffix(size_t n) + + void remove_suffix(size_t n) { std::advance(_last, -static_cast(n)); _size -= n; } - constexpr Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) + Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) { if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); @@ -1207,17 +1225,17 @@ class Range { return res; } - constexpr decltype(auto) front() const + const value_type& front() const { - return *(_first); + return *_first; } - constexpr decltype(auto) back() const + const value_type& back() const { return *(_last - 1); } - constexpr Range reversed() const + Range reversed() const { return {rbegin(), rend(), _size}; } @@ -1233,16 +1251,15 @@ class Range { }; template -constexpr auto make_range(Iter first, Iter last) -> Range +auto make_range(Iter first, Iter last) -> Range { return Range(first, last); } template -constexpr auto make_range(T& x) -> Range +auto make_range(T& x) -> Range { - auto first = to_begin(x); - return Range(first, to_end(x)); + return {to_begin(x), to_end(x)}; } template @@ -1425,7 +1442,7 @@ constexpr uint64_t shl64(uint64_t a, U shift) return (shift < 64) ? a << shift : 0; } -constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) +RAPIDFUZZ_CONSTEXPR_CXX14 uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) { /* todo should use _addcarry_u64 when available */ a += carryin; @@ -1436,7 +1453,7 @@ constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* ca } template -constexpr T ceil_div(T a, U divisor) +RAPIDFUZZ_CONSTEXPR_CXX14 T ceil_div(T a, U divisor) { T _div = static_cast(divisor); return a / _div + static_cast(a % _div != 0); @@ -1472,7 +1489,7 @@ static inline size_t popcount(uint8_t x) } template -constexpr T rotl(T x, unsigned int n) +RAPIDFUZZ_CONSTEXPR_CXX14 T rotl(T x, unsigned int n) { unsigned int num_bits = std::numeric_limits::digits; assert(n < num_bits); @@ -1723,6 +1740,15 @@ DecomposedSet set_decomposition(SplittedSentenceVi return {difference_ab, difference_ba, intersection}; } +template +std::pair mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +{ + while (first1 != last1 && first2 != last2 && *first1 == *first2) + ++first1, ++first2; + + return std::make_pair(first1, first2); +} + /** * Removes common prefix of two string views */ @@ -1731,7 +1757,7 @@ size_t remove_common_prefix(Range& s1, Range& s2) { auto first1 = std::begin(s1); size_t prefix = static_cast( - std::distance(first1, std::mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); + std::distance(first1, mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); s1.remove_prefix(prefix); s2.remove_prefix(prefix); return prefix; @@ -1745,7 +1771,7 @@ size_t remove_common_suffix(Range& s1, Range& s2) { auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, std::mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); + std::distance(rfirst1, mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; @@ -5903,6 +5929,11 @@ struct JaroSimilaritySimdBounds { template static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -5970,6 +6001,11 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng template static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -6354,7 +6390,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, friend detail::MultiSimilarityBase, double, 0, 1>; friend detail::MultiNormalizedMetricBase, double>; - static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64); + static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64, "incorrect MaxLen used"); using VecType = typename std::conditional< MaxLen == 8, uint8_t, @@ -10415,7 +10451,7 @@ double CachedRatio::similarity(const Sentence2& s2, double score_cutoff, namespace fuzz_detail { -static constexpr double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) +static RAPIDFUZZ_CONSTEXPR_CXX14 double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) { double score = (lensum > 0) ? (100.0 - 100.0 * static_cast(dist) / static_cast(lensum)) : 100.0; diff --git a/fuzzing/CMakeLists.txt b/fuzzing/CMakeLists.txt index 4d7a3d38..2a71733d 100644 --- a/fuzzing/CMakeLists.txt +++ b/fuzzing/CMakeLists.txt @@ -1,6 +1,6 @@ function(create_fuzzer fuzzer) add_executable(fuzz_${fuzzer} fuzz_${fuzzer}.cpp) - target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_14) + target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_11) target_link_libraries(fuzz_${fuzzer} PRIVATE rapidfuzz::rapidfuzz) target_compile_options(fuzz_${fuzzer} PRIVATE -g -O1 -fsanitize=fuzzer,address -march=native) diff --git a/rapidfuzz/details/Matrix.hpp b/rapidfuzz/details/Matrix.hpp index 3c6b009e..76c0c868 100644 --- a/rapidfuzz/details/Matrix.hpp +++ b/rapidfuzz/details/Matrix.hpp @@ -177,12 +177,12 @@ struct ShiftedBitMatrix { return bool(m_matrix[row][col_word] & col_mask); } - auto operator[](size_t row) noexcept + BitMatrixView operator[](size_t row) noexcept { return m_matrix[row]; } - auto operator[](size_t row) const noexcept + BitMatrixView operator[](size_t row) const noexcept { return m_matrix[row]; } diff --git a/rapidfuzz/details/Range.hpp b/rapidfuzz/details/Range.hpp index 31b09f25..f0c3b10c 100644 --- a/rapidfuzz/details/Range.hpp +++ b/rapidfuzz/details/Range.hpp @@ -29,6 +29,9 @@ static inline void assume(bool b) #endif } +namespace to_begin_detail { +using std::begin; + template CharT* to_begin(CharT* s) { @@ -36,11 +39,17 @@ CharT* to_begin(CharT* s) } template -auto to_begin(T& x) +auto to_begin(T& x) -> decltype(begin(x)) { - using std::begin; + return begin(x); } +} // namespace to_begin_detail + +using to_begin_detail::to_begin; + +namespace to_end_detail { +using std::end; template CharT* to_end(CharT* s) @@ -53,11 +62,13 @@ CharT* to_end(CharT* s) } template -auto to_end(T& x) +auto to_end(T& x) -> decltype(end(x)) { - using std::end; return end(x); } +} // namespace to_end_detail + +using to_end_detail::to_end; template class Range { @@ -72,47 +83,47 @@ class Range { using iterator = Iter; using reverse_iterator = std::reverse_iterator; - constexpr Range(Iter first, Iter last) : _first(first), _last(last) + Range(Iter first, Iter last) : _first(first), _last(last) { assert(std::distance(_first, _last) >= 0); _size = static_cast(std::distance(_first, _last)); } - constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) + Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) {} template - constexpr Range(T& x) : Range(to_begin(x), to_end(x)) + Range(T& x) : Range(to_begin(x), to_end(x)) {} - constexpr iterator begin() const noexcept + iterator begin() const noexcept { return _first; } - constexpr iterator end() const noexcept + iterator end() const noexcept { return _last; } - constexpr reverse_iterator rbegin() const noexcept + reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } - constexpr reverse_iterator rend() const noexcept + reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } - constexpr size_t size() const + size_t size() const { return _size; } - constexpr bool empty() const + bool empty() const { return size() == 0; } - explicit constexpr operator bool() const + explicit operator bool() const { return !empty(); } @@ -121,23 +132,24 @@ class Range { typename = rapidfuzz::rf_enable_if_t< std::is_base_of::iterator_category>::value>> - constexpr decltype(auto) operator[](size_t n) const + auto operator[](size_t n) const -> decltype(*_first) { return _first[static_cast(n)]; } - constexpr void remove_prefix(size_t n) + void remove_prefix(size_t n) { std::advance(_first, static_cast(n)); _size -= n; } - constexpr void remove_suffix(size_t n) + + void remove_suffix(size_t n) { std::advance(_last, -static_cast(n)); _size -= n; } - constexpr Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) + Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) { if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); @@ -148,17 +160,17 @@ class Range { return res; } - constexpr decltype(auto) front() const + const value_type& front() const { - return *(_first); + return *_first; } - constexpr decltype(auto) back() const + const value_type& back() const { return *(_last - 1); } - constexpr Range reversed() const + Range reversed() const { return {rbegin(), rend(), _size}; } @@ -174,16 +186,15 @@ class Range { }; template -constexpr auto make_range(Iter first, Iter last) -> Range +auto make_range(Iter first, Iter last) -> Range { return Range(first, last); } template -constexpr auto make_range(T& x) -> Range +auto make_range(T& x) -> Range { - auto first = to_begin(x); - return Range(first, to_end(x)); + return {to_begin(x), to_end(x)}; } template diff --git a/rapidfuzz/details/common_impl.hpp b/rapidfuzz/details/common_impl.hpp index 1478b7c5..d94fd2d5 100644 --- a/rapidfuzz/details/common_impl.hpp +++ b/rapidfuzz/details/common_impl.hpp @@ -34,6 +34,15 @@ DecomposedSet set_decomposition(SplittedSentenceVi return {difference_ab, difference_ba, intersection}; } +template +std::pair mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +{ + while (first1 != last1 && first2 != last2 && *first1 == *first2) + ++first1, ++first2; + + return std::make_pair(first1, first2); +} + /** * Removes common prefix of two string views */ @@ -42,7 +51,7 @@ size_t remove_common_prefix(Range& s1, Range& s2) { auto first1 = std::begin(s1); size_t prefix = static_cast( - std::distance(first1, std::mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); + std::distance(first1, mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); s1.remove_prefix(prefix); s2.remove_prefix(prefix); return prefix; @@ -56,7 +65,7 @@ size_t remove_common_suffix(Range& s1, Range& s2) { auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, std::mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); + std::distance(rfirst1, mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; diff --git a/rapidfuzz/details/config.hpp b/rapidfuzz/details/config.hpp index 1616d750..8196b3e1 100644 --- a/rapidfuzz/details/config.hpp +++ b/rapidfuzz/details/config.hpp @@ -11,3 +11,9 @@ # define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 # define RAPIDFUZZ_IF_CONSTEXPR if #endif + +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) || __cplusplus >= 201402L) +# define RAPIDFUZZ_CONSTEXPR_CXX14 constexpr +#else +# define RAPIDFUZZ_CONSTEXPR_CXX14 +#endif diff --git a/rapidfuzz/details/intrinsics.hpp b/rapidfuzz/details/intrinsics.hpp index c69d4fe1..5089a74f 100644 --- a/rapidfuzz/details/intrinsics.hpp +++ b/rapidfuzz/details/intrinsics.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -51,7 +52,7 @@ constexpr uint64_t shl64(uint64_t a, U shift) return (shift < 64) ? a << shift : 0; } -constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) +RAPIDFUZZ_CONSTEXPR_CXX14 uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) { /* todo should use _addcarry_u64 when available */ a += carryin; @@ -62,7 +63,7 @@ constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* ca } template -constexpr T ceil_div(T a, U divisor) +RAPIDFUZZ_CONSTEXPR_CXX14 T ceil_div(T a, U divisor) { T _div = static_cast(divisor); return a / _div + static_cast(a % _div != 0); @@ -98,7 +99,7 @@ static inline size_t popcount(uint8_t x) } template -constexpr T rotl(T x, unsigned int n) +RAPIDFUZZ_CONSTEXPR_CXX14 T rotl(T x, unsigned int n) { unsigned int num_bits = std::numeric_limits::digits; assert(n < num_bits); diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 1fcc2888..4d6d8b80 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -460,6 +460,11 @@ struct JaroSimilaritySimdBounds { template static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -527,6 +532,11 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng template static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; diff --git a/rapidfuzz/fuzz_impl.hpp b/rapidfuzz/fuzz_impl.hpp index 7fdf4b62..c39c19c9 100644 --- a/rapidfuzz/fuzz_impl.hpp +++ b/rapidfuzz/fuzz_impl.hpp @@ -52,7 +52,7 @@ double CachedRatio::similarity(const Sentence2& s2, double score_cutoff, namespace fuzz_detail { -static constexpr double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) +static RAPIDFUZZ_CONSTEXPR_CXX14 double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) { double score = (lensum > 0) ? (100.0 - 100.0 * static_cast(dist) / static_cast(lensum)) : 100.0; From 2db16b3d50c680645fe68819129de04bae3f227f Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 03:55:08 +0100 Subject: [PATCH 10/18] fix ambiguous call --- extras/rapidfuzz_amalgamated.hpp | 8 ++++---- rapidfuzz/details/common_impl.hpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 09434c35..cc521692 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 03:52:00.425895 +// Generated: 2024-12-25 03:55:48.239375 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -1741,7 +1741,7 @@ DecomposedSet set_decomposition(SplittedSentenceVi } template -std::pair mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +std::pair rf_mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { while (first1 != last1 && first2 != last2 && *first1 == *first2) ++first1, ++first2; @@ -1757,7 +1757,7 @@ size_t remove_common_prefix(Range& s1, Range& s2) { auto first1 = std::begin(s1); size_t prefix = static_cast( - std::distance(first1, mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); + std::distance(first1, rf_mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); s1.remove_prefix(prefix); s2.remove_prefix(prefix); return prefix; @@ -1771,7 +1771,7 @@ size_t remove_common_suffix(Range& s1, Range& s2) { auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); + std::distance(rfirst1, rf_mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; diff --git a/rapidfuzz/details/common_impl.hpp b/rapidfuzz/details/common_impl.hpp index d94fd2d5..7821c6d3 100644 --- a/rapidfuzz/details/common_impl.hpp +++ b/rapidfuzz/details/common_impl.hpp @@ -35,7 +35,7 @@ DecomposedSet set_decomposition(SplittedSentenceVi } template -std::pair mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +std::pair rf_mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { while (first1 != last1 && first2 != last2 && *first1 == *first2) ++first1, ++first2; @@ -51,7 +51,7 @@ size_t remove_common_prefix(Range& s1, Range& s2) { auto first1 = std::begin(s1); size_t prefix = static_cast( - std::distance(first1, mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); + std::distance(first1, rf_mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); s1.remove_prefix(prefix); s2.remove_prefix(prefix); return prefix; @@ -65,7 +65,7 @@ size_t remove_common_suffix(Range& s1, Range& s2) { auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); + std::distance(rfirst1, rf_mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; From e08f222f958c4054052e49fa1c860f0b9f696143 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 11:04:13 +0100 Subject: [PATCH 11/18] test more compiler variants --- .github/workflows/linux-simple.yml | 81 ++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 .github/workflows/linux-simple.yml diff --git a/.github/workflows/linux-simple.yml b/.github/workflows/linux-simple.yml new file mode 100644 index 00000000..56f2d67b --- /dev/null +++ b/.github/workflows/linux-simple.yml @@ -0,0 +1,81 @@ +name: Linux builds (basic) + +on: [push, pull_request] + +jobs: + build: + name: ${{matrix.compiler.cxx}}, C++${{matrix.std}}, ${{matrix.build_type}} + runs-on: ubuntu-20.04 + strategy: + matrix: + compiler: + - {cxx: g++-5, other_pkgs: g++-5} + - {cxx: g++-6, other_pkgs: g++-6} + - {cxx: g++-7, other_pkgs: g++-7} + - {cxx: g++-8, other_pkgs: g++-8} + - {cxx: g++-9, other_pkgs: g++-9} + - {cxx: g++-10, other_pkgs: g++-10} + - {cxx: clang++-6.0, other_pkgs: clang-6.0} + - {cxx: clang++-7, other_pkgs: clang-7} + - {cxx: clang++-8, other_pkgs: clang-8} + - {cxx: clang++-9, other_pkgs: clang-9} + - {cxx: clang++-10, other_pkgs: clang-10} + build_type: [Debug, Release] + std: [11, 14] + #std: [11, 14, 17, 20] + #exclude: + # - compiler.cxx: "g++-{5,6,7,8,9,10}" + # std: 17 + # - compiler.cxx: "g++-{5,6,7,8,9,10}" + # std: 20 + # - compiler.cxx: "clang++-{6.0,7,8,9,10}" + # std: 17 + # - compiler.cxx: "clang++-{6.0,7,8,9,10}" + # std: 20 + + steps: + - uses: actions/checkout@v4 + + - name: Add repositories for older GCC + run: | + sudo apt-add-repository 'deb http://azure.archive.ubuntu.com/ubuntu/ bionic main' + sudo apt-add-repository 'deb http://azure.archive.ubuntu.com/ubuntu/ bionic universe' + if: ${{ matrix.compiler.cxx == 'g++-5' || matrix.compiler.cxx == 'g++-6' }} + + - name: Prepare environment + run: | + sudo apt-get update + sudo apt-get install -y ninja-build ${{matrix.compiler.other_pkgs}} + + - name: Configure CMake + env: + CXX: ${{matrix.compiler.cxx}} + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=${{matrix.built_type}} \ + -DCMAKE_CXX_STANDARD=${{matrix.std}} \ + -DCMAKE_CXX_STANDARD_REQUIRED=ON \ + -DCMAKE_CXX_EXTENSIONS=OFF \ + -DRAPIDFUZZ_BUILD_TESTING=1 \ + -DRAPIDFUZZ_ENABLE_LINTERS=1 \ + -DRAPIDFUZZ_BUILD_FUZZERS=1 \ + -G Ninja + + - name: Build + working-directory: build + run: ninja + + - name: Test + working-directory: build + run: ctest -C ${{matrix.BUILD_TYPE}} --rerun-failed --output-on-failure -j `nproc` + + - name: Fuzz Test + working-directory: build + run: | + fuzzing/fuzz_lcs_similarity -max_total_time=30 + fuzzing/fuzz_levenshtein_distance -max_total_time=30 + fuzzing/fuzz_levenshtein_editops -max_total_time=30 + fuzzing/fuzz_indel_distance -max_total_time=30 + fuzzing/fuzz_indel_editops -max_total_time=30 + fuzzing/fuzz_osa_distance -max_total_time=30 + fuzzing/fuzz_damerau_levenshtein_distance -max_total_time=30 From 2647a660988f1c81045e59a8e879a75efefecd8c Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 11:20:29 +0100 Subject: [PATCH 12/18] remove usage of [[maybe_unused]] --- extras/rapidfuzz_amalgamated.hpp | 105 +++++++----------- rapidfuzz/distance/DamerauLevenshtein.hpp | 3 +- .../distance/DamerauLevenshtein_impl.hpp | 3 +- rapidfuzz/distance/Hamming.hpp | 3 +- rapidfuzz/distance/Hamming_impl.hpp | 2 +- rapidfuzz/distance/Jaro.hpp | 5 +- rapidfuzz/distance/JaroWinkler.hpp | 5 +- rapidfuzz/distance/JaroWinkler_impl.hpp | 2 +- rapidfuzz/distance/Jaro_impl.hpp | 9 +- rapidfuzz/distance/LCSseq.hpp | 3 +- rapidfuzz/distance/LCSseq_impl.hpp | 6 +- rapidfuzz/distance/OSA.hpp | 3 +- rapidfuzz/distance/Postfix.hpp | 3 +- rapidfuzz/distance/Postfix_impl.hpp | 3 +- rapidfuzz/distance/Prefix.hpp | 3 +- rapidfuzz/distance/Prefix_impl.hpp | 3 +- rapidfuzz/fuzz_impl.hpp | 47 +++----- 17 files changed, 85 insertions(+), 123 deletions(-) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index cc521692..21a77ec7 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 03:55:48.239375 +// Generated: 2024-12-25 11:20:05.194568 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -3843,8 +3843,7 @@ class DamerauLevenshtein } template - static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, size_t) { return damerau_levenshtein_distance(s1, s2, score_cutoff); } @@ -3982,8 +3981,7 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { return rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, score_cutoff); } @@ -4021,7 +4019,7 @@ class Hamming : public DistanceBase static size_t _distance(const Range& s1, const Range& s2, bool pad, - size_t score_cutoff, [[maybe_unused]] size_t score_hint) + size_t score_cutoff, size_t) { if (!pad && s1.size() != s2.size()) throw std::invalid_argument("Sequences are not the same length."); @@ -4206,8 +4204,7 @@ struct CachedHamming : public detail::CachedDistanceBase, } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t score_hint) const { return detail::Hamming::distance(s1, s2, pad, score_cutoff, score_hint); } @@ -4888,7 +4885,9 @@ Editops recover_alignment(const Range& s1, const Range& s2, if (dist == 0) return editops; - [[maybe_unused]] size_t band_width_right = s2.size() - matrix.sim; +#ifndef NDEBUG + size_t band_width_right = s2.size() - matrix.sim; +#endif auto col = len1; auto row = len2; @@ -4987,7 +4986,7 @@ class LCSseq : public SimilarityBase static size_t _similarity(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + size_t) { return lcs_seq_similarity(s1, s2, score_cutoff); } @@ -5208,8 +5207,7 @@ struct CachedLCSseq } template - size_t _similarity(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(const detail::Range& s2, size_t score_cutoff, size_t) const { return detail::lcs_seq_similarity(PM, detail::make_range(s1), s2, score_cutoff); } @@ -5561,8 +5559,7 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) } template -static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, - [[maybe_unused]] const Range& P, +static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, const Range&, const Range& T, size_t Bound) { assert(P.size() <= 64); @@ -5941,7 +5938,9 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng using namespace simd_sse2; # endif - [[maybe_unused]] static constexpr size_t alignment = native_simd::alignment; +# ifndef RAPIDFUZZ_AVX2 + static constexpr size_t alignment = native_simd::alignment; +# endif static constexpr size_t vec_width = native_simd::size; assert(s2.size() <= sizeof(VecType) * 8); @@ -6316,7 +6315,7 @@ class Jaro : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) + double) { return jaro_similarity(s1, s2, score_cutoff); } @@ -6492,7 +6491,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, } template - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -6533,8 +6532,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub } template - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { return detail::jaro_similarity(PM, detail::make_range(s1), s2, score_cutoff); } @@ -6632,7 +6630,7 @@ class JaroWinkler : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double prefix_weight, - double score_cutoff, [[maybe_unused]] double score_hint) + double score_cutoff, double) { return jaro_winkler_similarity(s1, s2, prefix_weight, score_cutoff); } @@ -6780,7 +6778,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -6823,8 +6821,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { return detail::jaro_winkler_similarity(PM, detail::make_range(s1), s2, prefix_weight, score_cutoff); } @@ -9102,8 +9099,7 @@ struct CachedOSA } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { size_t res; if (s1.empty()) @@ -9149,8 +9145,7 @@ class Postfix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_suffix(s1, s2); return (dist >= score_cutoff) ? dist : 0; @@ -9238,8 +9233,7 @@ struct CachedPostfix : public detail::CachedSimilarityBase } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t score_hint) const { return detail::Postfix::similarity(s1, s2, score_cutoff, score_hint); } @@ -9274,8 +9268,7 @@ class Prefix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_prefix(s1, s2); return (dist >= score_cutoff) ? dist : 0; @@ -9362,8 +9355,7 @@ struct CachedPrefix : public detail::CachedSimilarityBase, } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t) const { return detail::Prefix::similarity(s1, s2, score_cutoff, score_cutoff); } @@ -10658,7 +10650,7 @@ CachedPartialRatio::CachedPartialRatio(InputIt1 first1, InputIt1 last1) template template double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { size_t len1 = s1.size(); size_t len2 = static_cast(std::distance(first2, last2)); @@ -10685,8 +10677,7 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d template template -double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10713,7 +10704,7 @@ double token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double score_c template template double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10722,8 +10713,7 @@ double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10752,7 +10742,7 @@ double partial_token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10761,8 +10751,7 @@ double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10841,7 +10830,7 @@ double token_set_ratio(const Sentence1& s1, const Sentence2& s2, double score_cu template template double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10850,8 +10839,7 @@ double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10900,7 +10888,7 @@ double partial_token_set_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10909,8 +10897,7 @@ double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -11089,15 +11076,14 @@ double token_ratio(const std::vector& s1_sorted, template template double CachedTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::token_ratio(s1_tokens, cached_ratio_s1_sorted, first2, last2, score_cutoff); } template template -double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -11175,15 +11161,14 @@ double partial_token_ratio(const std::vector& s1_sorted, template template double CachedPartialTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::partial_token_ratio(s1_sorted, tokens_s1, first2, last2, score_cutoff); } template template -double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -11246,8 +11231,7 @@ CachedWRatio::CachedWRatio(InputIt1 first1, InputIt1 last1) template template -double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { if (score_cutoff > 100) return 0; @@ -11286,8 +11270,7 @@ double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -11318,8 +11301,7 @@ double QRatio(const Sentence1& s1, const Sentence2& s2, double score_cutoff) template template -double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { auto len2 = std::distance(first2, last2); @@ -11332,8 +11314,7 @@ double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } diff --git a/rapidfuzz/distance/DamerauLevenshtein.hpp b/rapidfuzz/distance/DamerauLevenshtein.hpp index ce516491..98f3f2f1 100644 --- a/rapidfuzz/distance/DamerauLevenshtein.hpp +++ b/rapidfuzz/distance/DamerauLevenshtein.hpp @@ -133,8 +133,7 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { return rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, score_cutoff); } diff --git a/rapidfuzz/distance/DamerauLevenshtein_impl.hpp b/rapidfuzz/distance/DamerauLevenshtein_impl.hpp index b95d2d49..5e34556d 100644 --- a/rapidfuzz/distance/DamerauLevenshtein_impl.hpp +++ b/rapidfuzz/distance/DamerauLevenshtein_impl.hpp @@ -131,8 +131,7 @@ class DamerauLevenshtein } template - static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, size_t) { return damerau_levenshtein_distance(s1, s2, score_cutoff); } diff --git a/rapidfuzz/distance/Hamming.hpp b/rapidfuzz/distance/Hamming.hpp index 669e7e70..3ce33b91 100644 --- a/rapidfuzz/distance/Hamming.hpp +++ b/rapidfuzz/distance/Hamming.hpp @@ -151,8 +151,7 @@ struct CachedHamming : public detail::CachedDistanceBase, } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t score_hint) const { return detail::Hamming::distance(s1, s2, pad, score_cutoff, score_hint); } diff --git a/rapidfuzz/distance/Hamming_impl.hpp b/rapidfuzz/distance/Hamming_impl.hpp index fa6350cd..34d286cd 100644 --- a/rapidfuzz/distance/Hamming_impl.hpp +++ b/rapidfuzz/distance/Hamming_impl.hpp @@ -21,7 +21,7 @@ class Hamming : public DistanceBase static size_t _distance(const Range& s1, const Range& s2, bool pad, - size_t score_cutoff, [[maybe_unused]] size_t score_hint) + size_t score_cutoff, size_t) { if (!pad && s1.size() != s2.size()) throw std::invalid_argument("Sequences are not the same length."); diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index 954e75b4..7b040e3f 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -172,7 +172,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, } template - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -213,8 +213,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub } template - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { return detail::jaro_similarity(PM, detail::make_range(s1), s2, score_cutoff); } diff --git a/rapidfuzz/distance/JaroWinkler.hpp b/rapidfuzz/distance/JaroWinkler.hpp index 4f44e75c..1bd2f082 100644 --- a/rapidfuzz/distance/JaroWinkler.hpp +++ b/rapidfuzz/distance/JaroWinkler.hpp @@ -145,7 +145,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -188,8 +188,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { return detail::jaro_winkler_similarity(PM, detail::make_range(s1), s2, prefix_weight, score_cutoff); } diff --git a/rapidfuzz/distance/JaroWinkler_impl.hpp b/rapidfuzz/distance/JaroWinkler_impl.hpp index 48352085..a40f8af8 100644 --- a/rapidfuzz/distance/JaroWinkler_impl.hpp +++ b/rapidfuzz/distance/JaroWinkler_impl.hpp @@ -82,7 +82,7 @@ class JaroWinkler : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double prefix_weight, - double score_cutoff, [[maybe_unused]] double score_hint) + double score_cutoff, double) { return jaro_winkler_similarity(s1, s2, prefix_weight, score_cutoff); } diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 4d6d8b80..70abc87b 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -92,8 +92,7 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) } template -static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, - [[maybe_unused]] const Range& P, +static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, const Range&, const Range& T, size_t Bound) { assert(P.size() <= 64); @@ -472,7 +471,9 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng using namespace simd_sse2; # endif - [[maybe_unused]] static constexpr size_t alignment = native_simd::alignment; +# ifndef RAPIDFUZZ_AVX2 + static constexpr size_t alignment = native_simd::alignment; +# endif static constexpr size_t vec_width = native_simd::size; assert(s2.size() <= sizeof(VecType) * 8); @@ -847,7 +848,7 @@ class Jaro : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) + double) { return jaro_similarity(s1, s2, score_cutoff); } diff --git a/rapidfuzz/distance/LCSseq.hpp b/rapidfuzz/distance/LCSseq.hpp index 601528f6..11150ff2 100644 --- a/rapidfuzz/distance/LCSseq.hpp +++ b/rapidfuzz/distance/LCSseq.hpp @@ -216,8 +216,7 @@ struct CachedLCSseq } template - size_t _similarity(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(const detail::Range& s2, size_t score_cutoff, size_t) const { return detail::lcs_seq_similarity(PM, detail::make_range(s1), s2, score_cutoff); } diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index 65bf52e1..87242db6 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -450,7 +450,9 @@ Editops recover_alignment(const Range& s1, const Range& s2, if (dist == 0) return editops; - [[maybe_unused]] size_t band_width_right = s2.size() - matrix.sim; +#ifndef NDEBUG + size_t band_width_right = s2.size() - matrix.sim; +#endif auto col = len1; auto row = len2; @@ -549,7 +551,7 @@ class LCSseq : public SimilarityBase static size_t _similarity(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + size_t) { return lcs_seq_similarity(s1, s2, score_cutoff); } diff --git a/rapidfuzz/distance/OSA.hpp b/rapidfuzz/distance/OSA.hpp index e3df742f..b5ee5bc6 100644 --- a/rapidfuzz/distance/OSA.hpp +++ b/rapidfuzz/distance/OSA.hpp @@ -251,8 +251,7 @@ struct CachedOSA } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { size_t res; if (s1.empty()) diff --git a/rapidfuzz/distance/Postfix.hpp b/rapidfuzz/distance/Postfix.hpp index e04c6742..57320a5f 100644 --- a/rapidfuzz/distance/Postfix.hpp +++ b/rapidfuzz/distance/Postfix.hpp @@ -85,8 +85,7 @@ struct CachedPostfix : public detail::CachedSimilarityBase } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t score_hint) const { return detail::Postfix::similarity(s1, s2, score_cutoff, score_hint); } diff --git a/rapidfuzz/distance/Postfix_impl.hpp b/rapidfuzz/distance/Postfix_impl.hpp index 6dbde14d..87eb6ad2 100644 --- a/rapidfuzz/distance/Postfix_impl.hpp +++ b/rapidfuzz/distance/Postfix_impl.hpp @@ -20,8 +20,7 @@ class Postfix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_suffix(s1, s2); return (dist >= score_cutoff) ? dist : 0; diff --git a/rapidfuzz/distance/Prefix.hpp b/rapidfuzz/distance/Prefix.hpp index 19c10017..d3123950 100644 --- a/rapidfuzz/distance/Prefix.hpp +++ b/rapidfuzz/distance/Prefix.hpp @@ -84,8 +84,7 @@ struct CachedPrefix : public detail::CachedSimilarityBase, } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t) const { return detail::Prefix::similarity(s1, s2, score_cutoff, score_cutoff); } diff --git a/rapidfuzz/distance/Prefix_impl.hpp b/rapidfuzz/distance/Prefix_impl.hpp index 161baa7f..fac93c49 100644 --- a/rapidfuzz/distance/Prefix_impl.hpp +++ b/rapidfuzz/distance/Prefix_impl.hpp @@ -20,8 +20,7 @@ class Prefix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_prefix(s1, s2); return (dist >= score_cutoff) ? dist : 0; diff --git a/rapidfuzz/fuzz_impl.hpp b/rapidfuzz/fuzz_impl.hpp index c39c19c9..84be44bf 100644 --- a/rapidfuzz/fuzz_impl.hpp +++ b/rapidfuzz/fuzz_impl.hpp @@ -259,7 +259,7 @@ CachedPartialRatio::CachedPartialRatio(InputIt1 first1, InputIt1 last1) template template double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { size_t len1 = s1.size(); size_t len2 = static_cast(std::distance(first2, last2)); @@ -286,8 +286,7 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d template template -double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -314,7 +313,7 @@ double token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double score_c template template double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -323,8 +322,7 @@ double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -353,7 +351,7 @@ double partial_token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -362,8 +360,7 @@ double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -442,7 +439,7 @@ double token_set_ratio(const Sentence1& s1, const Sentence2& s2, double score_cu template template double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -451,8 +448,7 @@ double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -501,7 +497,7 @@ double partial_token_set_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -510,8 +506,7 @@ double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -690,15 +685,14 @@ double token_ratio(const std::vector& s1_sorted, template template double CachedTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::token_ratio(s1_tokens, cached_ratio_s1_sorted, first2, last2, score_cutoff); } template template -double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -776,15 +770,14 @@ double partial_token_ratio(const std::vector& s1_sorted, template template double CachedPartialTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::partial_token_ratio(s1_sorted, tokens_s1, first2, last2, score_cutoff); } template template -double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -847,8 +840,7 @@ CachedWRatio::CachedWRatio(InputIt1 first1, InputIt1 last1) template template -double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { if (score_cutoff > 100) return 0; @@ -887,8 +879,7 @@ double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -919,8 +910,7 @@ double QRatio(const Sentence1& s1, const Sentence2& s2, double score_cutoff) template template -double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { auto len2 = std::distance(first2, last2); @@ -933,8 +923,7 @@ double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } From ae22db446c7f99d36b7e4a832f6ea18cfa8c759b Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 11:29:29 +0100 Subject: [PATCH 13/18] C++11 didn't support constexpr void functions --- extras/rapidfuzz_amalgamated.hpp | 4 ++-- rapidfuzz/details/intrinsics.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 21a77ec7..ec2883ca 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 11:20:05.194568 +// Generated: 2024-12-25 11:29:11.666527 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -1610,7 +1610,7 @@ struct UnrollImpl { }; template -constexpr void unroll(F&& f) +RAPIDFUZZ_CONSTEXPR_CXX14 void unroll(F&& f) { UnrollImpl::call(f); } diff --git a/rapidfuzz/details/intrinsics.hpp b/rapidfuzz/details/intrinsics.hpp index 5089a74f..7952759e 100644 --- a/rapidfuzz/details/intrinsics.hpp +++ b/rapidfuzz/details/intrinsics.hpp @@ -220,7 +220,7 @@ struct UnrollImpl { }; template -constexpr void unroll(F&& f) +RAPIDFUZZ_CONSTEXPR_CXX14 void unroll(F&& f) { UnrollImpl::call(f); } From c3015c89fd8c842451935d7bccafbeb121e5e056 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 25 Dec 2024 11:35:48 +0100 Subject: [PATCH 14/18] remove constexpr on multiline functions for C++11 --- .github/workflows/linux-simple.yml | 1 + extras/rapidfuzz_amalgamated.hpp | 23 ++++++++++++++--------- rapidfuzz/distance/Jaro.hpp | 2 +- rapidfuzz/distance/Jaro_impl.hpp | 7 ++++++- rapidfuzz/distance/LCSseq.hpp | 4 ++-- rapidfuzz/distance/Levenshtein.hpp | 4 ++-- rapidfuzz/distance/OSA.hpp | 4 ++-- 7 files changed, 28 insertions(+), 17 deletions(-) diff --git a/.github/workflows/linux-simple.yml b/.github/workflows/linux-simple.yml index 56f2d67b..d655fb72 100644 --- a/.github/workflows/linux-simple.yml +++ b/.github/workflows/linux-simple.yml @@ -7,6 +7,7 @@ jobs: name: ${{matrix.compiler.cxx}}, C++${{matrix.std}}, ${{matrix.build_type}} runs-on: ubuntu-20.04 strategy: + fail-fast: false matrix: compiler: - {cxx: g++-5, other_pkgs: g++-5} diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index ec2883ca..3932a3ef 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-12-25 11:29:11.666527 +// Generated: 2024-12-25 11:44:52.213162 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -5074,7 +5074,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz friend detail::MultiSimilarityBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; @@ -5093,7 +5093,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -5559,7 +5559,12 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) } template -static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, const Range&, +static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, +#ifdef NDEBUG + const Range&, +#else + const Range& P, +#endif const Range& T, size_t Bound) { assert(P.size() <= 64); @@ -6415,7 +6420,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, # endif } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -8396,7 +8401,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; @@ -8415,7 +8420,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; @@ -8986,7 +8991,7 @@ struct MultiOSA static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index 7b040e3f..2948c784 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -96,7 +96,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, # endif } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 70abc87b..81bdf2f7 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -92,7 +92,12 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) } template -static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, const Range&, +static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, +#ifdef NDEBUG + const Range&, +#else + const Range& P, +#endif const Range& T, size_t Bound) { assert(P.size() <= 64); diff --git a/rapidfuzz/distance/LCSseq.hpp b/rapidfuzz/distance/LCSseq.hpp index 11150ff2..8891cf2c 100644 --- a/rapidfuzz/distance/LCSseq.hpp +++ b/rapidfuzz/distance/LCSseq.hpp @@ -83,7 +83,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz friend detail::MultiSimilarityBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; @@ -102,7 +102,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); diff --git a/rapidfuzz/distance/Levenshtein.hpp b/rapidfuzz/distance/Levenshtein.hpp index 62504a1e..45245fea 100644 --- a/rapidfuzz/distance/Levenshtein.hpp +++ b/rapidfuzz/distance/Levenshtein.hpp @@ -304,7 +304,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; @@ -323,7 +323,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; @@ -138,7 +138,7 @@ struct MultiOSA static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); From 5a042abe63cd2248740c466b75223d6b7975a927 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Fri, 17 Jan 2025 23:21:45 +0100 Subject: [PATCH 15/18] add more tests --- .github/workflows/linux-simple.yml | 98 ++++++++++++++++++------------ rapidfuzz/details/simd.hpp | 2 +- 2 files changed, 60 insertions(+), 40 deletions(-) diff --git a/.github/workflows/linux-simple.yml b/.github/workflows/linux-simple.yml index d655fb72..a2838e48 100644 --- a/.github/workflows/linux-simple.yml +++ b/.github/workflows/linux-simple.yml @@ -4,35 +4,66 @@ on: [push, pull_request] jobs: build: - name: ${{matrix.compiler.cxx}}, C++${{matrix.std}}, ${{matrix.build_type}} + name: ${{matrix.cxx}}, C++${{matrix.std}}, ${{matrix.build_type}} runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: compiler: - - {cxx: g++-5, other_pkgs: g++-5} - - {cxx: g++-6, other_pkgs: g++-6} - - {cxx: g++-7, other_pkgs: g++-7} - - {cxx: g++-8, other_pkgs: g++-8} - - {cxx: g++-9, other_pkgs: g++-9} - - {cxx: g++-10, other_pkgs: g++-10} - - {cxx: clang++-6.0, other_pkgs: clang-6.0} - - {cxx: clang++-7, other_pkgs: clang-7} - - {cxx: clang++-8, other_pkgs: clang-8} - - {cxx: clang++-9, other_pkgs: clang-9} - - {cxx: clang++-10, other_pkgs: clang-10} + - g++-5 + - g++-6 + - g++-7 + - g++-8 + - g++-9 + - g++-10 + - clang++-6.0 + - clang++-7 + - clang++-8 + - clang++-9 + - clang++-10 build_type: [Debug, Release] - std: [11, 14] - #std: [11, 14, 17, 20] - #exclude: - # - compiler.cxx: "g++-{5,6,7,8,9,10}" - # std: 17 - # - compiler.cxx: "g++-{5,6,7,8,9,10}" - # std: 20 - # - compiler.cxx: "clang++-{6.0,7,8,9,10}" - # std: 17 - # - compiler.cxx: "clang++-{6.0,7,8,9,10}" - # std: 20 + std: [11] + include: + - cxx: g++-5 + other_pkgs: g++-5 + - cxx: g++-6 + other_pkgs: g++-6 + - cxx: g++-7 + other_pkgs: g++-7 + - cxx: g++-8 + other_pkgs: g++-8 + - cxx: g++-9 + other_pkgs: g++-9 + - cxx: g++-10 + other_pkgs: g++-10 + - cxx: clang++-6.0 + other_pkgs: clang-6.0 + - cxx: clang++-7 + other_pkgs: clang-7 + - cxx: clang++-8 + other_pkgs: clang-8 + - cxx: clang++-9 + other_pkgs: clang-9 + - cxx: clang++-10 + other_pkgs: clang-10 + - cxx: clang++-10 + other_pkgs: clang-10 + std: 14 + - cxx: clang++-10 + other_pkgs: clang-10 + std: 17 + - cxx: clang++-10 + other_pkgs: clang-10 + std: 20 + - cxx: g++-10 + other_pkgs: g++-10 + std: 14 + - cxx: g++-10 + other_pkgs: g++-10 + std: 17 + - cxx: g++-10 + other_pkgs: g++-10 + std: 20 steps: - uses: actions/checkout@v4 @@ -41,25 +72,24 @@ jobs: run: | sudo apt-add-repository 'deb http://azure.archive.ubuntu.com/ubuntu/ bionic main' sudo apt-add-repository 'deb http://azure.archive.ubuntu.com/ubuntu/ bionic universe' - if: ${{ matrix.compiler.cxx == 'g++-5' || matrix.compiler.cxx == 'g++-6' }} + if: ${{ matrix.cxx == 'g++-5' || matrix.cxx == 'g++-6' }} - name: Prepare environment run: | sudo apt-get update - sudo apt-get install -y ninja-build ${{matrix.compiler.other_pkgs}} + sudo apt-get install -y ninja-build ${{matrix.other_pkgs}} - name: Configure CMake env: - CXX: ${{matrix.compiler.cxx}} + CXX: ${{matrix.cxx}} run: | cmake -B build \ - -DCMAKE_BUILD_TYPE=${{matrix.built_type}} \ + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} \ -DCMAKE_CXX_STANDARD=${{matrix.std}} \ -DCMAKE_CXX_STANDARD_REQUIRED=ON \ -DCMAKE_CXX_EXTENSIONS=OFF \ -DRAPIDFUZZ_BUILD_TESTING=1 \ -DRAPIDFUZZ_ENABLE_LINTERS=1 \ - -DRAPIDFUZZ_BUILD_FUZZERS=1 \ -G Ninja - name: Build @@ -68,15 +98,5 @@ jobs: - name: Test working-directory: build - run: ctest -C ${{matrix.BUILD_TYPE}} --rerun-failed --output-on-failure -j `nproc` + run: ctest -C ${{matrix.build_type}} --rerun-failed --output-on-failure -j `nproc` - - name: Fuzz Test - working-directory: build - run: | - fuzzing/fuzz_lcs_similarity -max_total_time=30 - fuzzing/fuzz_levenshtein_distance -max_total_time=30 - fuzzing/fuzz_levenshtein_editops -max_total_time=30 - fuzzing/fuzz_indel_distance -max_total_time=30 - fuzzing/fuzz_indel_editops -max_total_time=30 - fuzzing/fuzz_osa_distance -max_total_time=30 - fuzzing/fuzz_damerau_levenshtein_distance -max_total_time=30 diff --git a/rapidfuzz/details/simd.hpp b/rapidfuzz/details/simd.hpp index 358ebec6..037a56f1 100644 --- a/rapidfuzz/details/simd.hpp +++ b/rapidfuzz/details/simd.hpp @@ -6,7 +6,7 @@ /* RAPIDFUZZ_LTO_HACK is used to differentiate functions between different * translation units to avoid warnings when using lto */ #ifndef RAPIDFUZZ_EXCLUDE_SIMD -# if __AVX2__ +# if 0//__AVX2__ # define RAPIDFUZZ_SIMD # define RAPIDFUZZ_AVX2 # define RAPIDFUZZ_LTO_HACK 0 From 0b66c20cecdc20da722eb2ac7780b680a9deef03 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Sat, 18 Jan 2025 22:13:07 +0100 Subject: [PATCH 16/18] add back avx2 --- CHANGELOG.md | 4 ++++ CMakeLists.txt | 2 +- rapidfuzz/details/simd.hpp | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b92b6632..b5a76179 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## Changelog +## [3.3.0] - 2025-01-18 +### Changed +- add C++11 and C++14 support + ## [3.2.0] - 2024-12-17 ### Performance - improve calculation of min score inside partial_ratio so it can skip more alignments diff --git a/CMakeLists.txt b/CMakeLists.txt index c6caa79e..71f0ffca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ if (CMAKE_BINARY_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) message(FATAL_ERROR "Building in-source is not supported! Create a build dir and remove ${CMAKE_SOURCE_DIR}/CMakeCache.txt") endif() -project(rapidfuzz LANGUAGES CXX VERSION 3.2.0) +project(rapidfuzz LANGUAGES CXX VERSION 3.3.0) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") include(GNUInstallDirs) diff --git a/rapidfuzz/details/simd.hpp b/rapidfuzz/details/simd.hpp index 037a56f1..358ebec6 100644 --- a/rapidfuzz/details/simd.hpp +++ b/rapidfuzz/details/simd.hpp @@ -6,7 +6,7 @@ /* RAPIDFUZZ_LTO_HACK is used to differentiate functions between different * translation units to avoid warnings when using lto */ #ifndef RAPIDFUZZ_EXCLUDE_SIMD -# if 0//__AVX2__ +# if __AVX2__ # define RAPIDFUZZ_SIMD # define RAPIDFUZZ_AVX2 # define RAPIDFUZZ_LTO_HACK 0 From 1ff31877ca48edf961e176669163607204221a84 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Sat, 18 Jan 2025 22:18:07 +0100 Subject: [PATCH 17/18] add build type --- .github/workflows/linux-simple.yml | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.github/workflows/linux-simple.yml b/.github/workflows/linux-simple.yml index a2838e48..1b95a775 100644 --- a/.github/workflows/linux-simple.yml +++ b/.github/workflows/linux-simple.yml @@ -46,24 +46,56 @@ jobs: other_pkgs: clang-9 - cxx: clang++-10 other_pkgs: clang-10 + + - cxx: clang++-10 + other_pkgs: clang-10 + std: 14 + build_type: Debug + - cxx: clang++-10 + other_pkgs: clang-10 + std: 17 + build_type: Debug + - cxx: clang++-10 + other_pkgs: clang-10 + std: 20 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 14 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 17 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 20 + build_type: Debug + - cxx: clang++-10 other_pkgs: clang-10 std: 14 + build_type: Release - cxx: clang++-10 other_pkgs: clang-10 std: 17 + build_type: Release - cxx: clang++-10 other_pkgs: clang-10 std: 20 + build_type: Release - cxx: g++-10 other_pkgs: g++-10 std: 14 + build_type: Release - cxx: g++-10 other_pkgs: g++-10 std: 17 + build_type: Release - cxx: g++-10 other_pkgs: g++-10 std: 20 + build_type: Release steps: - uses: actions/checkout@v4 From c2429e4e77997a3cabdae4939d8805f498c3c10e Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Sat, 18 Jan 2025 22:48:27 +0100 Subject: [PATCH 18/18] remove useless assert --- rapidfuzz/distance/Levenshtein_impl.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/rapidfuzz/distance/Levenshtein_impl.hpp b/rapidfuzz/distance/Levenshtein_impl.hpp index 9f707352..991f10c9 100644 --- a/rapidfuzz/distance/Levenshtein_impl.hpp +++ b/rapidfuzz/distance/Levenshtein_impl.hpp @@ -1173,9 +1173,6 @@ HirschbergPos find_hirschberg_pos(const Range& s1, const Range= 0); - assert(hpos.right_score >= 0); - if (hpos.left_score + hpos.right_score > max) return find_hirschberg_pos(s1, s2, max * 2); else {