1From 9a0aec423d158a9e3d8e5cb6df0d5ce032be3524 Mon Sep 17 00:00:00 2001 2From: Nick Wellnhofer <wellnhofer@aevum.de> 3Date: Sun, 4 Dec 2022 23:01:00 +0100 4Subject: [PATCH 28/28] error: Make sure that error messages are valid UTF-8 5 6This has caused issues with the Python bindings for a long time. 7 8Should fix #64. 9 10Reference: https://github.com/GNOME/libxml2/commit/76c6da420923f2721a2e16adfcef8707a2454a1b 11Conflict: result/,runtest.c,test/ 12--- 13 error.c | 29 ++++++++++++++++++++--------- 14 1 file changed, 20 insertions(+), 9 deletions(-) 15 16diff --git a/error.c b/error.c 17index 9ff1c2b..fe9a7e2 100644 18--- a/error.c 19+++ b/error.c 20@@ -163,7 +163,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) { 21 } 22 23 /** 24- * xmlParserPrintFileContext: 25+ * xmlParserPrintFileContextInternal: 26 * @input: an xmlParserInputPtr input 27 * 28 * Displays current context within the input content for error tracking 29@@ -172,7 +172,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) { 30 static void 31 xmlParserPrintFileContextInternal(xmlParserInputPtr input , 32 xmlGenericErrorFunc channel, void *data ) { 33- const xmlChar *cur, *base; 34+ const xmlChar *cur, *base, *start; 35 unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ 36 xmlChar content[81]; /* space for 80 chars + line terminator */ 37 xmlChar *ctnt; 38@@ -191,19 +191,30 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input , 39 while ((n++ < (sizeof(content)-1)) && (cur > base) && 40 (*(cur) != '\n') && (*(cur) != '\r')) 41 cur--; 42- if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; 43+ if ((*(cur) == '\n') || (*(cur) == '\r')) { 44+ cur++; 45+ } else { 46+ /* skip over continuation bytes */ 47+ while ((cur < input->cur) && ((*cur & 0xC0) == 0x80)) 48+ cur++; 49+ } 50 /* calculate the error position in terms of the current position */ 51 col = input->cur - cur; 52 /* search forward for end-of-line (to max buff size) */ 53 n = 0; 54- ctnt = content; 55+ start = cur; 56 /* copy selected text to our buffer */ 57- while ((*cur != 0) && (*(cur) != '\n') && 58- (*(cur) != '\r') && (n < sizeof(content)-1)) { 59- *ctnt++ = *cur++; 60- n++; 61+ while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) { 62+ int len = input->end - cur; 63+ int c = xmlGetUTF8Char(cur, &len); 64+ 65+ if ((c < 0) || (n + len > sizeof(content)-1)) 66+ break; 67+ cur += len; 68+ n += len; 69 } 70- *ctnt = 0; 71+ memcpy(content, start, n); 72+ content[n] = 0; 73 /* print out the selected text */ 74 channel(data ,"%s\n", content); 75 /* create blank line with problem pointer */ 76-- 772.27.0 78 79