88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.27 2006/08/18 19:52:39 tgl Exp $
11+ * $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.28 2006/10/01 22:25:48 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -883,20 +883,25 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
883883#define MIN_RIGHT_CUT 10 /* try to keep this far away from EOL */
884884
885885 char * wquery ;
886- int clen ,
887- slen ,
886+ int slen ,
887+ cno ,
888888 i ,
889- w ,
890889 * qidx ,
891890 * scridx ,
892891 qoffset ,
893892 scroffset ,
894893 ibeg ,
895894 iend ,
896895 loc_line ;
897- bool beg_trunc ,
896+ bool mb_encoding ,
897+ beg_trunc ,
898898 end_trunc ;
899899
900+ /* Convert loc from 1-based to 0-based; no-op if out of range */
901+ loc -- ;
902+ if (loc < 0 )
903+ return ;
904+
900905 /* Need a writable copy of the query */
901906 wquery = strdup (query );
902907 if (wquery == NULL )
@@ -905,13 +910,13 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
905910 /*
906911 * Each character might occupy multiple physical bytes in the string, and
907912 * in some Far Eastern character sets it might take more than one screen
908- * column as well. We compute the starting byte offset and starting
913+ * column as well. We compute the starting byte offset and starting
909914 * screen column of each logical character, and store these in qidx[] and
910915 * scridx[] respectively.
911916 */
912917
913918 /* we need a safe allocation size... */
914- slen = strlen (query ) + 1 ;
919+ slen = strlen (wquery ) + 1 ;
915920
916921 qidx = (int * ) malloc (slen * sizeof (int ));
917922 if (qidx == NULL )
@@ -927,79 +932,93 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
927932 return ;
928933 }
929934
935+ /* We can optimize a bit if it's a single-byte encoding */
936+ mb_encoding = (pg_encoding_max_length (encoding ) != 1 );
937+
938+ /*
939+ * Within the scanning loop, cno is the current character's logical number,
940+ * qoffset is its offset in wquery, and scroffset is its starting logical
941+ * screen column (all indexed from 0). "loc" is the logical character
942+ * number of the error location. We scan to determine loc_line (the
943+ * 1-based line number containing loc) and ibeg/iend (first character
944+ * number and last+1 character number of the line containing loc).
945+ * Note that qidx[] and scridx[] are filled only as far as iend.
946+ */
930947 qoffset = 0 ;
931948 scroffset = 0 ;
932- for (i = 0 ; query [qoffset ] != '\0' ; i ++ )
933- {
934- qidx [i ] = qoffset ;
935- scridx [i ] = scroffset ;
936- w = pg_encoding_dsplen (encoding , & query [qoffset ]);
937- /* treat control chars as width 1; see tab hack below */
938- if (w <= 0 )
939- w = 1 ;
940- scroffset += w ;
941- qoffset += pg_encoding_mblen (encoding , & query [qoffset ]);
942- }
943- qidx [i ] = qoffset ;
944- scridx [i ] = scroffset ;
945- clen = i ;
949+ loc_line = 1 ;
950+ ibeg = 0 ;
951+ iend = -1 ; /* -1 means not set yet */
946952
947- /* convert loc to zero-based offset in qidx/scridx arrays */
948- loc -- ;
949-
950- /* do we have something to show? */
951- if (loc >= 0 && loc <= clen )
953+ for (cno = 0 ; wquery [qoffset ] != '\0' ; cno ++ )
952954 {
953- /* input line number of our syntax error. */
954- loc_line = 1 ;
955- /* first included char of extract. */
956- ibeg = 0 ;
957- /* last-plus-1 included char of extract. */
958- iend = clen ;
955+ char ch = wquery [qoffset ];
956+
957+ qidx [cno ] = qoffset ;
958+ scridx [cno ] = scroffset ;
959959
960960 /*
961961 * Replace tabs with spaces in the writable copy. (Later we might
962962 * want to think about coping with their variable screen width, but
963963 * not today.)
964- *
965- * Extract line number and begin and end indexes of line containing
966- * error location. There will not be any newlines or carriage returns
967- * in the selected extract.
968964 */
969- for (i = 0 ; i < clen ; i ++ )
965+ if (ch == '\t' )
966+ wquery [qoffset ] = ' ' ;
967+
968+ /*
969+ * If end-of-line, count lines and mark positions. Each \r or \n counts
970+ * as a line except when \r \n appear together.
971+ */
972+ else if (ch == '\r' || ch == '\n' )
970973 {
971- /* character length must be 1 or it's not ASCII */
972- if ((qidx [i + 1 ] - qidx [i ]) == 1 )
974+ if (cno < loc )
973975 {
974- if (wquery [qidx [i ]] == '\t' )
975- wquery [qidx [i ]] = ' ' ;
976- else if (wquery [qidx [i ]] == '\r' || wquery [qidx [i ]] == '\n' )
977- {
978- if (i < loc )
979- {
980- /*
981- * count lines before loc. Each \r or \n counts
982- * as a line except when \r \n appear together.
983- */
984- if (wquery [qidx [i ]] == '\r' ||
985- i == 0 ||
986- (qidx [i ] - qidx [i - 1 ]) != 1 ||
987- wquery [qidx [i - 1 ]] != '\r' )
988- loc_line ++ ;
989- /* extract beginning = last line start before loc. */
990- ibeg = i + 1 ;
991- }
992- else
993- {
994- /* set extract end. */
995- iend = i ;
996- /* done scanning. */
997- break ;
998- }
999- }
976+ if (ch == '\r' ||
977+ cno == 0 ||
978+ wquery [qidx [cno - 1 ]] != '\r' )
979+ loc_line ++ ;
980+ /* extract beginning = last line start before loc. */
981+ ibeg = cno + 1 ;
982+ }
983+ else
984+ {
985+ /* set extract end. */
986+ iend = cno ;
987+ /* done scanning. */
988+ break ;
1000989 }
1001990 }
1002991
992+ /* Advance */
993+ if (mb_encoding )
994+ {
995+ int w ;
996+
997+ w = pg_encoding_dsplen (encoding , & wquery [qoffset ]);
998+ /* treat any non-tab control chars as width 1 */
999+ if (w <= 0 )
1000+ w = 1 ;
1001+ scroffset += w ;
1002+ qoffset += pg_encoding_mblen (encoding , & wquery [qoffset ]);
1003+ }
1004+ else
1005+ {
1006+ /* We assume wide chars only exist in multibyte encodings */
1007+ scroffset ++ ;
1008+ qoffset ++ ;
1009+ }
1010+ }
1011+ /* Fix up if we didn't find an end-of-line after loc */
1012+ if (iend < 0 )
1013+ {
1014+ iend = cno ; /* query length in chars, +1 */
1015+ qidx [iend ] = qoffset ;
1016+ scridx [iend ] = scroffset ;
1017+ }
1018+
1019+ /* Print only if loc is within computed query length */
1020+ if (loc <= cno )
1021+ {
10031022 /* If the line extracted is too long, we truncate it. */
10041023 beg_trunc = false;
10051024 end_trunc = false;
@@ -1050,7 +1069,8 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
10501069 scroffset = 0 ;
10511070 for (; i < msg -> len ; i += pg_encoding_mblen (encoding , & msg -> data [i ]))
10521071 {
1053- w = pg_encoding_dsplen (encoding , & msg -> data [i ]);
1072+ int w = pg_encoding_dsplen (encoding , & msg -> data [i ]);
1073+
10541074 if (w <= 0 )
10551075 w = 1 ;
10561076 scroffset += w ;
0 commit comments