88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/port/chklocale.c,v 1.3 2007/09/29 00:01:43 tgl Exp $
11+ * $PostgreSQL: pgsql/src/port/chklocale.c,v 1.4 2007/10/03 17:16:39 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
2727#include "mb/pg_wchar.h"
2828
2929
30- #if defined(HAVE_LANGINFO_H ) && defined(CODESET )
31-
3230/*
3331 * This table needs to recognize all the CODESET spellings for supported
3432 * backend encodings, as well as frontend-only encodings where possible
3533 * (the latter case is currently only needed for initdb to recognize
36- * error situations).
34+ * error situations). On Windows, we rely on entries for codepage
35+ * numbers (CPnnn).
3736 *
3837 * Note that we search the table with pg_strcasecmp(), so variant
3938 * capitalizations don't need their own entries.
@@ -49,23 +48,27 @@ static const struct encoding_match encoding_match_list[] = {
4948 {PG_EUC_JP , "eucJP" },
5049 {PG_EUC_JP , "IBM-eucJP" },
5150 {PG_EUC_JP , "sdeckanji" },
51+ {PG_EUC_JP , "CP20932" },
5252
5353 {PG_EUC_CN , "EUC-CN" },
5454 {PG_EUC_CN , "eucCN" },
5555 {PG_EUC_CN , "IBM-eucCN" },
5656 {PG_EUC_CN , "GB2312" },
5757 {PG_EUC_CN , "dechanzi" },
58+ {PG_EUC_CN , "CP20936" },
5859
5960 {PG_EUC_KR , "EUC-KR" },
6061 {PG_EUC_KR , "eucKR" },
6162 {PG_EUC_KR , "IBM-eucKR" },
6263 {PG_EUC_KR , "deckorean" },
6364 {PG_EUC_KR , "5601" },
65+ {PG_EUC_KR , "CP51949" }, /* or 20949 ? */
6466
6567 {PG_EUC_TW , "EUC-TW" },
6668 {PG_EUC_TW , "eucTW" },
6769 {PG_EUC_TW , "IBM-eucTW" },
6870 {PG_EUC_TW , "cns11643" },
71+ /* No codepage for EUC-TW ? */
6972
7073 {PG_UTF8 , "UTF-8" },
7174 {PG_UTF8 , "utf8" },
@@ -111,6 +114,7 @@ static const struct encoding_match encoding_match_list[] = {
111114 {PG_LATIN10 , "iso885916" },
112115
113116 {PG_KOI8R , "KOI8-R" },
117+ {PG_KOI8R , "CP20866" },
114118
115119 {PG_WIN1252 , "CP1252" },
116120 {PG_WIN1253 , "CP1253" },
@@ -143,23 +147,56 @@ static const struct encoding_match encoding_match_list[] = {
143147
144148 {PG_SJIS , "SJIS" },
145149 {PG_SJIS , "PCK" },
150+ {PG_SJIS , "CP932" },
146151
147152 {PG_BIG5 , "BIG5" },
148153 {PG_BIG5 , "BIG5HKSCS" },
154+ {PG_BIG5 , "CP950" },
149155
150156 {PG_GBK , "GBK" },
157+ {PG_GBK , "CP936" },
151158
152159 {PG_UHC , "UHC" },
153160
154161 {PG_JOHAB , "JOHAB" },
162+ {PG_JOHAB , "CP1361" },
155163
156164 {PG_GB18030 , "GB18030" },
165+ {PG_GB18030 , "CP54936" },
157166
158167 {PG_SHIFT_JIS_2004 , "SJIS_2004" },
159168
160169 {PG_SQL_ASCII , NULL } /* end marker */
161170};
162171
172+ #ifdef WIN32
173+ /*
174+ * On Windows, use CP<codepage number> instead of the nl_langinfo() result
175+ */
176+ static char *
177+ win32_langinfo (const char * ctype )
178+ {
179+ char * r ;
180+ char * codepage ;
181+ int ln ;
182+
183+ /*
184+ * Locale format on Win32 is <Language>_<Country>.<CodePage> .
185+ * For example, English_USA.1252.
186+ */
187+ codepage = strrchr (ctype , '.' );
188+ if (!codepage )
189+ return NULL ;
190+ codepage ++ ;
191+ ln = strlen (codepage );
192+ r = malloc (ln + 3 );
193+ sprintf (r , "CP%s" , codepage );
194+
195+ return r ;
196+ }
197+ #endif /* WIN32 */
198+
199+ #if (defined(HAVE_LANGINFO_H ) && defined(CODESET )) || defined(WIN32 )
163200
164201/*
165202 * Given a setting for LC_CTYPE, return the Postgres ID of the associated
@@ -181,6 +218,7 @@ pg_get_encoding_from_locale(const char *ctype)
181218 if (ctype )
182219 {
183220 char * save ;
221+ char * name ;
184222
185223 save = setlocale (LC_CTYPE , NULL );
186224 if (!save )
@@ -190,15 +228,20 @@ pg_get_encoding_from_locale(const char *ctype)
190228 if (!save )
191229 return PG_SQL_ASCII ; /* out of memory; unlikely */
192230
193- if (!setlocale (LC_CTYPE , ctype ))
231+ name = setlocale (LC_CTYPE , ctype );
232+ if (!name )
194233 {
195234 free (save );
196235 return PG_SQL_ASCII ; /* bogus ctype passed in? */
197236 }
198237
238+ #ifndef WIN32
199239 sys = nl_langinfo (CODESET );
200240 if (sys )
201241 sys = strdup (sys );
242+ #else
243+ sys = win32_langinfo (name );
244+ #endif
202245
203246 setlocale (LC_CTYPE , save );
204247 free (save );
@@ -209,9 +252,13 @@ pg_get_encoding_from_locale(const char *ctype)
209252 ctype = setlocale (LC_CTYPE , NULL );
210253 if (!ctype )
211254 return PG_SQL_ASCII ; /* setlocale() broken? */
255+ #ifndef WIN32
212256 sys = nl_langinfo (CODESET );
213257 if (sys )
214258 sys = strdup (sys );
259+ #else
260+ sys = win32_langinfo (ctype );
261+ #endif
215262 }
216263
217264 if (!sys )
@@ -268,7 +315,7 @@ pg_get_encoding_from_locale(const char *ctype)
268315 return PG_SQL_ASCII ;
269316}
270317
271- #else /* ! (HAVE_LANGINFO_H && CODESET) */
318+ #else /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
272319
273320/*
274321 * stub if no platform support
@@ -279,4 +326,4 @@ pg_get_encoding_from_locale(const char *ctype)
279326 return PG_SQL_ASCII ;
280327}
281328
282- #endif /* HAVE_LANGINFO_H && CODESET */
329+ #endif /* ( HAVE_LANGINFO_H && CODESET) || WIN32 */
0 commit comments