🌐 AI搜索 & 代理 主页
Skip to content

Commit 806555e

Browse files
committed
ltree: fix case-insensitive matching.
Previously, ltree_prefix_eq_ci() used lowercasing with the default collation; while ltree_crc32_sz() used tolower() directly. These were equivalent only if the default collation provider was libc and the encoding was single-byte. Change both to use casefolding with the default collation. Backpatch through 18, where the casefolding APIs were introduced. The bug exists in earlier versions, but would require some adaptation. A REINDEX is required for ltree indexes where the database default collation is not libc. Reviewed-by: Chao Li <li.evan.chao@gmail.com> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Backpatch-through: 18 Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com Discussion: https://postgr.es/m/01fc00fd66f641b9693d4f9f1af0ccf44cbdfbdf.camel@j-davis.com
1 parent f79e239 commit 806555e

File tree

3 files changed

+89
-11
lines changed

3 files changed

+89
-11
lines changed

contrib/ltree/crc32.c

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,31 +10,62 @@
1010
#include "postgres.h"
1111
#include "ltree.h"
1212

13+
#include "crc32.h"
14+
#include "utils/pg_crc.h"
1315
#ifdef LOWER_NODE
14-
#include <ctype.h>
15-
#define TOLOWER(x) tolower((unsigned char) (x))
16-
#else
17-
#define TOLOWER(x) (x)
16+
#include "catalog/pg_collation.h"
17+
#include "utils/pg_locale.h"
1818
#endif
1919

20-
#include "crc32.h"
21-
#include "utils/pg_crc.h"
20+
#ifdef LOWER_NODE
2221

2322
unsigned int
2423
ltree_crc32_sz(const char *buf, int size)
2524
{
2625
pg_crc32 crc;
2726
const char *p = buf;
27+
static pg_locale_t locale = NULL;
28+
29+
if (!locale)
30+
locale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
2831

2932
INIT_TRADITIONAL_CRC32(crc);
3033
while (size > 0)
3134
{
32-
char c = (char) TOLOWER(*p);
35+
char foldstr[UNICODE_CASEMAP_BUFSZ];
36+
int srclen = pg_mblen(p);
37+
size_t foldlen;
38+
39+
/* fold one codepoint at a time */
40+
foldlen = pg_strfold(foldstr, UNICODE_CASEMAP_BUFSZ, p, srclen,
41+
locale);
42+
43+
COMP_TRADITIONAL_CRC32(crc, foldstr, foldlen);
44+
45+
size -= srclen;
46+
p += srclen;
47+
}
48+
FIN_TRADITIONAL_CRC32(crc);
49+
return (unsigned int) crc;
50+
}
51+
52+
#else
3353

34-
COMP_TRADITIONAL_CRC32(crc, &c, 1);
54+
unsigned int
55+
ltree_crc32_sz(const char *buf, int size)
56+
{
57+
pg_crc32 crc;
58+
const char *p = buf;
59+
60+
INIT_TRADITIONAL_CRC32(crc);
61+
while (size > 0)
62+
{
63+
COMP_TRADITIONAL_CRC32(crc, p, 1);
3564
size--;
3665
p++;
3766
}
3867
FIN_TRADITIONAL_CRC32(crc);
3968
return (unsigned int) crc;
4069
}
70+
71+
#endif /* !LOWER_NODE */

contrib/ltree/lquery_op.c

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,44 @@ ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz)
9393
bool
9494
ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz)
9595
{
96-
char *al = str_tolower(a, a_sz, DEFAULT_COLLATION_OID);
97-
char *bl = str_tolower(b, b_sz, DEFAULT_COLLATION_OID);
96+
static pg_locale_t locale = NULL;
97+
size_t al_sz = a_sz + 1;
98+
size_t al_len;
99+
char *al = palloc(al_sz);
100+
size_t bl_sz = b_sz + 1;
101+
size_t bl_len;
102+
char *bl = palloc(bl_sz);
98103
bool res;
99104

100-
res = (strncmp(al, bl, a_sz) == 0);
105+
if (!locale)
106+
locale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
107+
108+
/* casefold both a and b */
109+
110+
al_len = pg_strfold(al, al_sz, a, a_sz, locale);
111+
if (al_len + 1 > al_sz)
112+
{
113+
/* grow buffer if needed and retry */
114+
al_sz = al_len + 1;
115+
al = repalloc(al, al_sz);
116+
al_len = pg_strfold(al, al_sz, a, a_sz, locale);
117+
Assert(al_len + 1 <= al_sz);
118+
}
119+
120+
bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
121+
if (bl_len + 1 > bl_sz)
122+
{
123+
/* grow buffer if needed and retry */
124+
bl_sz = bl_len + 1;
125+
bl = repalloc(bl, bl_sz);
126+
bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
127+
Assert(bl_len + 1 <= bl_sz);
128+
}
129+
130+
if (al_len > bl_len)
131+
res = false;
132+
else
133+
res = (strncmp(al, bl, al_len) == 0);
101134

102135
pfree(al);
103136
pfree(bl);

src/include/utils/pg_locale.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,20 @@
2424
/* use for libc locale names */
2525
#define LOCALE_NAME_BUFLEN 128
2626

27+
/*
28+
* Maximum number of bytes needed to map a single codepoint. Useful for
29+
* mapping and processing a single input codepoint at a time with a
30+
* statically-allocated buffer.
31+
*
32+
* With full case mapping, an input codepoint may be mapped to as many as
33+
* three output codepoints. See Unicode 16.0.0, section 5.18.2, "Change in
34+
* Length":
35+
*
36+
* https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G29675
37+
*/
38+
#define UNICODE_CASEMAP_LEN 3
39+
#define UNICODE_CASEMAP_BUFSZ (UNICODE_CASEMAP_LEN * MAX_MULTIBYTE_CHAR_LEN)
40+
2741
/* GUC settings */
2842
extern PGDLLIMPORT char *locale_messages;
2943
extern PGDLLIMPORT char *locale_monetary;

0 commit comments

Comments
 (0)