@@ -99,8 +99,6 @@ static Selectivity like_selectivity(const char *patt, int pattlen,
9999static Selectivity regex_selectivity (const char * patt , int pattlen ,
100100 bool case_insensitive ,
101101 int fixed_prefix_len );
102- static int pattern_char_isalpha (char c , bool is_multibyte ,
103- pg_locale_t locale );
104102static Const * make_greater_string (const Const * str_const , FmgrInfo * ltproc ,
105103 Oid collation );
106104static Datum string_to_datum (const char * str , Oid datatype );
@@ -986,43 +984,19 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
986984 */
987985
988986static Pattern_Prefix_Status
989- like_fixed_prefix (Const * patt_const , bool case_insensitive , Oid collation ,
990- Const * * prefix_const , Selectivity * rest_selec )
987+ like_fixed_prefix (Const * patt_const , Const * * prefix_const ,
988+ Selectivity * rest_selec )
991989{
992990 char * match ;
993991 char * patt ;
994992 int pattlen ;
995993 Oid typeid = patt_const -> consttype ;
996994 int pos ,
997995 match_pos ;
998- bool is_multibyte = (pg_database_encoding_max_length () > 1 );
999- pg_locale_t locale = 0 ;
1000996
1001997 /* the right-hand const is type text or bytea */
1002998 Assert (typeid == BYTEAOID || typeid == TEXTOID );
1003999
1004- if (case_insensitive )
1005- {
1006- if (typeid == BYTEAOID )
1007- ereport (ERROR ,
1008- (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
1009- errmsg ("case insensitive matching not supported on type bytea" )));
1010-
1011- if (!OidIsValid (collation ))
1012- {
1013- /*
1014- * This typically means that the parser could not resolve a
1015- * conflict of implicit collations, so report it that way.
1016- */
1017- ereport (ERROR ,
1018- (errcode (ERRCODE_INDETERMINATE_COLLATION ),
1019- errmsg ("could not determine which collation to use for ILIKE" ),
1020- errhint ("Use the COLLATE clause to set the collation explicitly." )));
1021- }
1022-
1023- locale = pg_newlocale_from_collation (collation );
1024- }
1025-
10261000 if (typeid != BYTEAOID )
10271001 {
10281002 patt = TextDatumGetCString (patt_const -> constvalue );
@@ -1055,11 +1029,6 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
10551029 break ;
10561030 }
10571031
1058- /* Stop if case-varying character (it's sort of a wildcard) */
1059- if (case_insensitive &&
1060- pattern_char_isalpha (patt [pos ], is_multibyte , locale ))
1061- break ;
1062-
10631032 match [match_pos ++ ] = patt [pos ];
10641033 }
10651034
@@ -1071,8 +1040,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
10711040 * prefix_const = string_to_bytea_const (match , match_pos );
10721041
10731042 if (rest_selec != NULL )
1074- * rest_selec = like_selectivity (& patt [pos ], pattlen - pos ,
1075- case_insensitive );
1043+ * rest_selec = like_selectivity (& patt [pos ], pattlen - pos , false);
10761044
10771045 pfree (patt );
10781046 pfree (match );
@@ -1087,6 +1055,112 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
10871055 return Pattern_Prefix_None ;
10881056}
10891057
1058+ /*
1059+ * Case-insensitive variant of like_fixed_prefix(). Multibyte and
1060+ * locale-aware for detecting cased characters.
1061+ */
1062+ static Pattern_Prefix_Status
1063+ like_fixed_prefix_ci (Const * patt_const , Oid collation , Const * * prefix_const ,
1064+ Selectivity * rest_selec )
1065+ {
1066+ text * val = DatumGetTextPP (patt_const -> constvalue );
1067+ Oid typeid = patt_const -> consttype ;
1068+ int nbytes = VARSIZE_ANY_EXHDR (val );
1069+ int wpos ;
1070+ pg_wchar * wpatt ;
1071+ int wpattlen ;
1072+ pg_wchar * wmatch ;
1073+ int wmatch_pos = 0 ;
1074+ char * match ;
1075+ int match_mblen ;
1076+ pg_locale_t locale = 0 ;
1077+
1078+ /* the right-hand const is type text or bytea */
1079+ Assert (typeid == BYTEAOID || typeid == TEXTOID );
1080+
1081+ if (typeid == BYTEAOID )
1082+ ereport (ERROR ,
1083+ (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
1084+ errmsg ("case insensitive matching not supported on type bytea" )));
1085+
1086+ if (!OidIsValid (collation ))
1087+ {
1088+ /*
1089+ * This typically means that the parser could not resolve a conflict
1090+ * of implicit collations, so report it that way.
1091+ */
1092+ ereport (ERROR ,
1093+ (errcode (ERRCODE_INDETERMINATE_COLLATION ),
1094+ errmsg ("could not determine which collation to use for ILIKE" ),
1095+ errhint ("Use the COLLATE clause to set the collation explicitly." )));
1096+ }
1097+
1098+ locale = pg_newlocale_from_collation (collation );
1099+
1100+ wpatt = palloc ((nbytes + 1 ) * sizeof (pg_wchar ));
1101+ wpattlen = pg_mb2wchar_with_len (VARDATA_ANY (val ), wpatt , nbytes );
1102+
1103+ wmatch = palloc ((nbytes + 1 ) * sizeof (pg_wchar ));
1104+ for (wpos = 0 ; wpos < wpattlen ; wpos ++ )
1105+ {
1106+ /* % and _ are wildcard characters in LIKE */
1107+ if (wpatt [wpos ] == '%' ||
1108+ wpatt [wpos ] == '_' )
1109+ break ;
1110+
1111+ /* Backslash escapes the next character */
1112+ if (wpatt [wpos ] == '\\' )
1113+ {
1114+ wpos ++ ;
1115+ if (wpos >= wpattlen )
1116+ break ;
1117+ }
1118+
1119+ /*
1120+ * For ILIKE, stop if it's a case-varying character (it's sort of a
1121+ * wildcard).
1122+ */
1123+ if (pg_iswcased (wpatt [wpos ], locale ))
1124+ break ;
1125+
1126+ wmatch [wmatch_pos ++ ] = wpatt [wpos ];
1127+ }
1128+
1129+ wmatch [wmatch_pos ] = '\0' ;
1130+
1131+ match = palloc (pg_database_encoding_max_length () * wmatch_pos + 1 );
1132+ match_mblen = pg_wchar2mb_with_len (wmatch , match , wmatch_pos );
1133+ match [match_mblen ] = '\0' ;
1134+ pfree (wmatch );
1135+
1136+ * prefix_const = string_to_const (match , TEXTOID );
1137+ pfree (match );
1138+
1139+ if (rest_selec != NULL )
1140+ {
1141+ int wrestlen = wpattlen - wmatch_pos ;
1142+ char * rest ;
1143+ int rest_mblen ;
1144+
1145+ rest = palloc (pg_database_encoding_max_length () * wrestlen + 1 );
1146+ rest_mblen = pg_wchar2mb_with_len (& wpatt [wmatch_pos ], rest , wrestlen );
1147+
1148+ * rest_selec = like_selectivity (rest , rest_mblen , true);
1149+ pfree (rest );
1150+ }
1151+
1152+ pfree (wpatt );
1153+
1154+ /* in LIKE, an empty pattern is an exact match! */
1155+ if (wpos == wpattlen )
1156+ return Pattern_Prefix_Exact ; /* reached end of pattern, so exact */
1157+
1158+ if (wmatch_pos > 0 )
1159+ return Pattern_Prefix_Partial ;
1160+
1161+ return Pattern_Prefix_None ;
1162+ }
1163+
10901164static Pattern_Prefix_Status
10911165regex_fixed_prefix (Const * patt_const , bool case_insensitive , Oid collation ,
10921166 Const * * prefix_const , Selectivity * rest_selec )
@@ -1164,12 +1238,11 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
11641238 switch (ptype )
11651239 {
11661240 case Pattern_Type_Like :
1167- result = like_fixed_prefix (patt , false, collation ,
1168- prefix , rest_selec );
1241+ result = like_fixed_prefix (patt , prefix , rest_selec );
11691242 break ;
11701243 case Pattern_Type_Like_IC :
1171- result = like_fixed_prefix (patt , true, collation ,
1172- prefix , rest_selec );
1244+ result = like_fixed_prefix_ci (patt , collation , prefix ,
1245+ rest_selec );
11731246 break ;
11741247 case Pattern_Type_Regex :
11751248 result = regex_fixed_prefix (patt , false, collation ,
@@ -1481,24 +1554,6 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
14811554 return sel ;
14821555}
14831556
1484- /*
1485- * Check whether char is a letter (and, hence, subject to case-folding)
1486- *
1487- * In multibyte character sets or with ICU, we can't use isalpha, and it does
1488- * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
1489- * Instead, just assume any non-ASCII char is potentially case-varying, and
1490- * hard-wire knowledge of which ASCII chars are letters.
1491- */
1492- static int
1493- pattern_char_isalpha (char c , bool is_multibyte ,
1494- pg_locale_t locale )
1495- {
1496- if (locale -> ctype_is_c )
1497- return (c >= 'A' && c <= 'Z' ) || (c >= 'a' && c <= 'z' );
1498- else
1499- return char_is_cased (c , locale );
1500- }
1501-
15021557
15031558/*
15041559 * For bytea, the increment function need only increment the current byte
0 commit comments