@@ -43,8 +43,6 @@ typedef struct /* state of JSON lexer */
4343 char * token_start ; /* start of current token within input */
4444 char * token_terminator ; /* end of previous or current token */
4545 JsonValueType token_type ; /* type of current token, once it's known */
46- int line_number ; /* current line number (counting from 1) */
47- char * line_start ; /* start of current line within input (BROKEN!!) */
4846} JsonLexContext ;
4947
5048typedef enum /* states of JSON parser */
@@ -78,6 +76,7 @@ static void json_lex_string(JsonLexContext *lex);
7876static void json_lex_number (JsonLexContext * lex , char * s );
7977static void report_parse_error (JsonParseStack * stack , JsonLexContext * lex );
8078static void report_invalid_token (JsonLexContext * lex );
79+ static int report_json_context (JsonLexContext * lex );
8180static char * extract_mb_char (char * s );
8281static void composite_to_json (Datum composite , StringInfo result ,
8382 bool use_line_feeds );
@@ -185,8 +184,6 @@ json_validate_cstring(char *input)
185184 /* Set up lexing context. */
186185 lex .input = input ;
187186 lex .token_terminator = lex .input ;
188- lex .line_number = 1 ;
189- lex .line_start = input ;
190187
191188 /* Set up parse stack. */
192189 stacksize = 32 ;
@@ -335,11 +332,7 @@ json_lex(JsonLexContext *lex)
335332 /* Skip leading whitespace. */
336333 s = lex -> token_terminator ;
337334 while (* s == ' ' || * s == '\t' || * s == '\n' || * s == '\r' )
338- {
339- if (* s == '\n' )
340- lex -> line_number ++ ;
341335 s ++ ;
342- }
343336 lex -> token_start = s ;
344337
345338 /* Determine token type. */
@@ -350,7 +343,7 @@ json_lex(JsonLexContext *lex)
350343 {
351344 /* End of string. */
352345 lex -> token_start = NULL ;
353- lex -> token_terminator = NULL ;
346+ lex -> token_terminator = s ;
354347 }
355348 else
356349 {
@@ -397,7 +390,8 @@ json_lex(JsonLexContext *lex)
397390 /*
398391 * We got some sort of unexpected punctuation or an otherwise
399392 * unexpected character, so just complain about that one
400- * character.
393+ * character. (It can't be multibyte because the above loop
394+ * will advance over any multibyte characters.)
401395 */
402396 lex -> token_terminator = s + 1 ;
403397 report_invalid_token (lex );
@@ -443,11 +437,14 @@ json_lex_string(JsonLexContext *lex)
443437 lex -> token_terminator = s ;
444438 report_invalid_token (lex );
445439 }
440+ /* Since *s isn't printable, exclude it from the context string */
441+ lex -> token_terminator = s ;
446442 ereport (ERROR ,
447443 (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
448444 errmsg ("invalid input syntax for type json" ),
449- errdetail ("line %d: Character with value \"0x%02x\" must be escaped." ,
450- lex -> line_number , (unsigned char ) * s )));
445+ errdetail ("Character with value 0x%02x must be escaped." ,
446+ (unsigned char ) * s ),
447+ report_json_context (lex )));
451448 }
452449 else if (* s == '\\' )
453450 {
@@ -465,38 +462,39 @@ json_lex_string(JsonLexContext *lex)
465462
466463 for (i = 1 ; i <= 4 ; i ++ )
467464 {
468- if (s [i ] == '\0' )
465+ s ++ ;
466+ if (* s == '\0' )
469467 {
470- lex -> token_terminator = s + i ;
468+ lex -> token_terminator = s ;
471469 report_invalid_token (lex );
472470 }
473- else if (s [ i ] >= '0' && s [ i ] <= '9' )
474- ch = (ch * 16 ) + (s [ i ] - '0' );
475- else if (s [ i ] >= 'a' && s [ i ] <= 'f' )
476- ch = (ch * 16 ) + (s [ i ] - 'a' ) + 10 ;
477- else if (s [ i ] >= 'A' && s [ i ] <= 'F' )
478- ch = (ch * 16 ) + (s [ i ] - 'A' ) + 10 ;
471+ else if (* s >= '0' && * s <= '9' )
472+ ch = (ch * 16 ) + (* s - '0' );
473+ else if (* s >= 'a' && * s <= 'f' )
474+ ch = (ch * 16 ) + (* s - 'a' ) + 10 ;
475+ else if (* s >= 'A' && * s <= 'F' )
476+ ch = (ch * 16 ) + (* s - 'A' ) + 10 ;
479477 else
480478 {
479+ lex -> token_terminator = s + pg_mblen (s );
481480 ereport (ERROR ,
482481 (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
483482 errmsg ("invalid input syntax for type json" ),
484- errdetail ("line %d: \"\\u\" must be followed by four hexadecimal digits." ,
485- lex -> line_number )));
483+ errdetail ("\"\\u\" must be followed by four hexadecimal digits." ) ,
484+ report_json_context ( lex )));
486485 }
487486 }
488-
489- /* Account for the four additional bytes we just parsed. */
490- s += 4 ;
491487 }
492488 else if (strchr ("\"\\/bfnrt" , * s ) == NULL )
493489 {
494490 /* Not a valid string escape, so error out. */
491+ lex -> token_terminator = s + pg_mblen (s );
495492 ereport (ERROR ,
496493 (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
497494 errmsg ("invalid input syntax for type json" ),
498- errdetail ("line %d: Invalid escape \"\\%s\"." ,
499- lex -> line_number , extract_mb_char (s ))));
495+ errdetail ("Escape sequence \"\\%s\" is invalid." ,
496+ extract_mb_char (s )),
497+ report_json_context (lex )));
500498 }
501499 }
502500 }
@@ -599,75 +597,116 @@ json_lex_number(JsonLexContext *lex, char *s)
599597
600598/*
601599 * Report a parse error.
600+ *
601+ * lex->token_start and lex->token_terminator must identify the current token.
602602 */
603603static void
604604report_parse_error (JsonParseStack * stack , JsonLexContext * lex )
605605{
606- char * detail = NULL ;
607- char * token = NULL ;
606+ char * token ;
608607 int toklen ;
609608
610609 /* Handle case where the input ended prematurely. */
611610 if (lex -> token_start == NULL )
612611 ereport (ERROR ,
613612 (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
614- errmsg ("invalid input syntax for type json: \"%s\"" ,
615- lex -> input ),
616- errdetail ( "The input string ended unexpectedly." )));
613+ errmsg ("invalid input syntax for type json" ) ,
614+ errdetail ( "The input string ended unexpectedly." ),
615+ report_json_context ( lex )));
617616
618- /* Separate out the offending token. */
617+ /* Separate out the current token. */
619618 toklen = lex -> token_terminator - lex -> token_start ;
620619 token = palloc (toklen + 1 );
621620 memcpy (token , lex -> token_start , toklen );
622621 token [toklen ] = '\0' ;
623622
624- /* Select correct detail message. */
623+ /* Complain, with the appropriate detail message. */
625624 if (stack == NULL )
626- detail = "line %d: Expected end of input, but found \"%s\"." ;
625+ ereport (ERROR ,
626+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
627+ errmsg ("invalid input syntax for type json" ),
628+ errdetail ("Expected end of input, but found \"%s\"." ,
629+ token ),
630+ report_json_context (lex )));
627631 else
628632 {
629633 switch (stack -> state )
630634 {
631635 case JSON_PARSE_VALUE :
632- detail = "line %d: Expected string, number, object, array, true, false, or null, but found \"%s\"." ;
636+ ereport (ERROR ,
637+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
638+ errmsg ("invalid input syntax for type json" ),
639+ errdetail ("Expected JSON value, but found \"%s\"." ,
640+ token ),
641+ report_json_context (lex )));
633642 break ;
634643 case JSON_PARSE_ARRAY_START :
635- detail = "line %d: Expected array element or \"]\", but found \"%s\"." ;
644+ ereport (ERROR ,
645+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
646+ errmsg ("invalid input syntax for type json" ),
647+ errdetail ("Expected array element or \"]\", but found \"%s\"." ,
648+ token ),
649+ report_json_context (lex )));
636650 break ;
637651 case JSON_PARSE_ARRAY_NEXT :
638- detail = "line %d: Expected \",\" or \"]\", but found \"%s\"." ;
652+ ereport (ERROR ,
653+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
654+ errmsg ("invalid input syntax for type json" ),
655+ errdetail ("Expected \",\" or \"]\", but found \"%s\"." ,
656+ token ),
657+ report_json_context (lex )));
639658 break ;
640659 case JSON_PARSE_OBJECT_START :
641- detail = "line %d: Expected string or \"}\", but found \"%s\"." ;
660+ ereport (ERROR ,
661+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
662+ errmsg ("invalid input syntax for type json" ),
663+ errdetail ("Expected string or \"}\", but found \"%s\"." ,
664+ token ),
665+ report_json_context (lex )));
642666 break ;
643667 case JSON_PARSE_OBJECT_LABEL :
644- detail = "line %d: Expected \":\", but found \"%s\"." ;
668+ ereport (ERROR ,
669+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
670+ errmsg ("invalid input syntax for type json" ),
671+ errdetail ("Expected \":\", but found \"%s\"." ,
672+ token ),
673+ report_json_context (lex )));
645674 break ;
646675 case JSON_PARSE_OBJECT_NEXT :
647- detail = "line %d: Expected \",\" or \"}\", but found \"%s\"." ;
676+ ereport (ERROR ,
677+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
678+ errmsg ("invalid input syntax for type json" ),
679+ errdetail ("Expected \",\" or \"}\", but found \"%s\"." ,
680+ token ),
681+ report_json_context (lex )));
648682 break ;
649683 case JSON_PARSE_OBJECT_COMMA :
650- detail = "line %d: Expected string, but found \"%s\"." ;
684+ ereport (ERROR ,
685+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
686+ errmsg ("invalid input syntax for type json" ),
687+ errdetail ("Expected string, but found \"%s\"." ,
688+ token ),
689+ report_json_context (lex )));
651690 break ;
691+ default :
692+ elog (ERROR , "unexpected json parse state: %d" ,
693+ (int ) stack -> state );
652694 }
653695 }
654-
655- ereport (ERROR ,
656- (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
657- errmsg ("invalid input syntax for type json: \"%s\"" ,
658- lex -> input ),
659- detail ? errdetail (detail , lex -> line_number , token ) : 0 ));
660696}
661697
662698/*
663699 * Report an invalid input token.
700+ *
701+ * lex->token_start and lex->token_terminator must identify the token.
664702 */
665703static void
666704report_invalid_token (JsonLexContext * lex )
667705{
668706 char * token ;
669707 int toklen ;
670708
709+ /* Separate out the offending token. */
671710 toklen = lex -> token_terminator - lex -> token_start ;
672711 token = palloc (toklen + 1 );
673712 memcpy (token , lex -> token_start , toklen );
@@ -676,8 +715,80 @@ report_invalid_token(JsonLexContext *lex)
676715 ereport (ERROR ,
677716 (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
678717 errmsg ("invalid input syntax for type json" ),
679- errdetail ("line %d: Token \"%s\" is invalid." ,
680- lex -> line_number , token )));
718+ errdetail ("Token \"%s\" is invalid." , token ),
719+ report_json_context (lex )));
720+ }
721+
722+ /*
723+ * Report a CONTEXT line for bogus JSON input.
724+ *
725+ * lex->token_terminator must be set to identify the spot where we detected
726+ * the error. Note that lex->token_start might be NULL, in case we recognized
727+ * error at EOF.
728+ *
729+ * The return value isn't meaningful, but we make it non-void so that this
730+ * can be invoked inside ereport().
731+ */
732+ static int
733+ report_json_context (JsonLexContext * lex )
734+ {
735+ const char * context_start ;
736+ const char * context_end ;
737+ const char * line_start ;
738+ int line_number ;
739+ char * ctxt ;
740+ int ctxtlen ;
741+ const char * prefix ;
742+ const char * suffix ;
743+
744+ /* Choose boundaries for the part of the input we will display */
745+ context_start = lex -> input ;
746+ context_end = lex -> token_terminator ;
747+ line_start = context_start ;
748+ line_number = 1 ;
749+ for (;;)
750+ {
751+ /* Always advance over newlines (context_end test is just paranoia) */
752+ if (* context_start == '\n' && context_start < context_end )
753+ {
754+ context_start ++ ;
755+ line_start = context_start ;
756+ line_number ++ ;
757+ continue ;
758+ }
759+ /* Otherwise, done as soon as we are close enough to context_end */
760+ if (context_end - context_start < 50 )
761+ break ;
762+ /* Advance to next multibyte character */
763+ if (IS_HIGHBIT_SET (* context_start ))
764+ context_start += pg_mblen (context_start );
765+ else
766+ context_start ++ ;
767+ }
768+
769+ /*
770+ * We add "..." to indicate that the excerpt doesn't start at the
771+ * beginning of the line ... but if we're within 3 characters of the
772+ * beginning of the line, we might as well just show the whole line.
773+ */
774+ if (context_start - line_start <= 3 )
775+ context_start = line_start ;
776+
777+ /* Get a null-terminated copy of the data to present */
778+ ctxtlen = context_end - context_start ;
779+ ctxt = palloc (ctxtlen + 1 );
780+ memcpy (ctxt , context_start , ctxtlen );
781+ ctxt [ctxtlen ] = '\0' ;
782+
783+ /*
784+ * Show the context, prefixing "..." if not starting at start of line, and
785+ * suffixing "..." if not ending at end of line.
786+ */
787+ prefix = (context_start > line_start ) ? "..." : "" ;
788+ suffix = (* context_end != '\0' && * context_end != '\n' && * context_end != '\r' ) ? "..." : "" ;
789+
790+ return errcontext ("JSON data, line %d: %s%s%s" ,
791+ line_number , prefix , ctxt , suffix );
681792}
682793
683794/*
0 commit comments