99 *
1010 *
1111 * IDENTIFICATION
12- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.97 2002/06/22 02:04:45 thomas Exp $
12+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.98 2002/08/04 06:36:18 thomas Exp $
1313 *
1414 *-------------------------------------------------------------------------
1515 */
@@ -60,7 +60,7 @@ static char *litbufdup(void);
6060 * When we parse a token that requires multiple lexer rules to process,
6161 * we set token_start to point at the true start of the token, for use
6262 * by yyerror(). yytext will point at just the text consumed by the last
63- * rule, so it's not very helpful (eg , it might contain just the last
63+ * rule, so it's not very helpful (e.g. , it might contain just the last
6464 * quote mark of a quoted identifier). But to avoid cluttering every rule
6565 * with setting token_start, we allow token_start = NULL to denote that
6666 * it's okay to use yytext.
@@ -93,10 +93,10 @@ unsigned char unescape_single_char(unsigned char c);
9393 * and to eliminate parsing troubles for numeric strings.
9494 * Exclusive states:
9595 * <xb> bit string literal
96- * <xc> extended C-style comments - thomas 1997-07-12
97- * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
98- * <xh> hexadecimal numeric string - thomas 1997-11-16
99- * <xq> quoted strings - thomas 1997-07-30
96+ * <xc> extended C-style comments
97+ * <xd> delimited identifiers (double-quoted identifiers)
98+ * <xh> hexadecimal numeric string
99+ * <xq> quoted strings
100100 */
101101
102102%x xb
@@ -106,6 +106,13 @@ unsigned char unescape_single_char(unsigned char c);
106106%x xq
107107
108108/* Bit string
109+ * It is tempting to scan the string for only those characters
110+ * which are allowed. However, this leads to silently swallowed
111+ * characters if illegal characters are included in the string.
112+ * For example, if xbinside is [01] then B'ABCD' is interpreted
113+ * as a zero-length string, and the ABCD' is lost!
114+ * Better to pass the string forward and let the input routines
115+ * validate the contents.
109116 */
110117xbstart [bB ]{quote }
111118xbstop {quote }
@@ -116,7 +123,7 @@ xbcat {quote}{whitespace_with_newline}{quote}
116123 */
117124xhstart [xX ]{quote }
118125xhstop {quote }
119- xhinside [^ ' ]+
126+ xhinside [^ ' ]*
120127xhcat {quote }{whitespace_with_newline }{quote }
121128
122129/* National character
@@ -244,7 +251,7 @@ other .
244251 * style of two adjacent single quotes "''" and in the Postgres/Java style
245252 * of escaped-quote "\'".
246253 * Other embedded escaped characters are matched explicitly and the leading
247- * backslash is dropped from the string. - thomas 1997-09-24
254+ * backslash is dropped from the string.
248255 * Note that xcstart must appear before operator, as explained above!
249256 * Also whitespace (comment) must appear before operator.
250257 */
@@ -291,8 +298,10 @@ other .
291298
292299{xbstart } {
293300 /* Binary bit type.
294- * Should be passing the type forward into the parser
295- * rather than trying to embed it into the string.
301+ * At some point we should simply pass the string
302+ * forward to the parser and label it there.
303+ * In the meantime, place a leading "b" on the string
304+ * to mark it for the input routine as a binary string.
296305 */
297306 token_start = yytext;
298307 BEGIN (xb);
@@ -301,10 +310,8 @@ other .
301310 }
302311<xb >{xbstop } {
303312 BEGIN (INITIAL);
304- if (literalbuf[strspn (literalbuf + 1 , " 01" ) + 1 ] != ' \0 ' )
305- yyerror (" invalid bit string input" );
306313 yylval.str = litbufdup ();
307- return BITCONST ;
314+ return BCONST ;
308315 }
309316<xh >{xhinside } |
310317<xb >{xbinside } {
@@ -314,44 +321,43 @@ other .
314321<xb >{xbcat } {
315322 /* ignore */
316323 }
317- <xb ><<EOF>> { yyerror (" unterminated bit string literal" ); }
318-
324+ <xb ><<EOF>> {
325+ yyerror (" unterminated bit string literal" );
326+ }
319327{xhstart } {
320328 /* Hexadecimal bit type.
321- * Should be passing the type forward into the parser
322- * rather than trying to embed it into the string.
329+ * At some point we should simply pass the string
330+ * forward to the parser and label it there.
331+ * In the meantime, place a leading "x" on the string
332+ * to mark it for the input routine as a hex string.
323333 */
324334 token_start = yytext;
325335 BEGIN (xh);
326336 startlit ();
337+ addlitchar (' x' );
327338 }
328339<xh >{xhstop } {
329- long val;
330- char * endptr;
331-
332340 BEGIN (INITIAL);
333- errno = 0 ;
334- val = strtol (literalbuf, &endptr, 16 );
335- if (*endptr != ' \0 ' || errno == ERANGE
336- #ifdef HAVE_LONG_INT_64
337- /* if long > 32 bits, check for overflow of int4 */
338- || val != (long ) ((int32) val)
339- #endif
340- )
341- yyerror (" bad hexadecimal integer input" );
342- yylval.ival = val;
343- return ICONST;
341+ yylval.str = litbufdup ();
342+ return XCONST;
344343 }
345- <xh ><<EOF>> { yyerror (" unterminated hexadecimal integer " ); }
344+ <xh ><<EOF>> { yyerror (" unterminated hexadecimal string literal " ); }
346345
347346{xnstart } {
348347 /* National character.
349- * Need to remember type info to flow it forward into the parser.
350- * Not yet implemented. - thomas 2002-06-17
348+ * We will pass this along as a normal character string,
349+ * but preceded with an internally-generated "NCHAR".
351350 */
351+ const ScanKeyword *keyword;
352+
353+ /* This had better be a keyword! */
354+ keyword = ScanKeywordLookup (" nchar" );
355+ Assert (keyword != NULL );
356+ yylval.keyword = keyword->name ;
352357 token_start = yytext;
353358 BEGIN (xq);
354359 startlit ();
360+ return keyword->value ;
355361 }
356362
357363
0 commit comments