1From a5c4a6efe77f6dd6e0a092db9357b21602eedd31 Mon Sep 17 00:00:00 2001 2From: Nick Wellnhofer <wellnhofer@aevum.de> 3Date: Fri, 28 Mar 2025 16:31:14 +0100 4Subject: [PATCH] parser: Fix XML_PARSE_NOBLANKS dropping non-whitespace text 5 6Regressed with 1f5b5371. 7 8Fixes #884. 9--- 10 parser.c | 13 +++++++------ 11 testparser.c | 34 ++++++++++++++++++++++++++++++++++ 12 2 files changed, 41 insertions(+), 6 deletions(-) 13 14diff --git a/parser.c b/parser.c 15index aacaf1f8c..d8d590ffd 100644 16--- a/parser.c 17+++ b/parser.c 18@@ -4778,7 +4778,8 @@ static const unsigned char test_char_data[256] = { 19 }; 20 21 static void 22-xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size) { 23+xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size, 24+ int isBlank) { 25 int checkBlanks; 26 27 if ((ctxt->sax == NULL) || (ctxt->disableSAX)) 28@@ -4793,7 +4794,7 @@ xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size) { 29 * essentially unusable. 30 */ 31 if ((checkBlanks) && 32- (areBlanks(ctxt, buf, size, 1))) { 33+ (areBlanks(ctxt, buf, size, isBlank))) { 34 if ((ctxt->sax->ignorableWhitespace != NULL) && 35 (ctxt->keepBlanks)) 36 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size); 37@@ -4855,7 +4856,7 @@ get_more_space: 38 const xmlChar *tmp = ctxt->input->cur; 39 ctxt->input->cur = in; 40 41- xmlCharacters(ctxt, tmp, nbchar); 42+ xmlCharacters(ctxt, tmp, nbchar, 1); 43 } 44 return; 45 } 46@@ -4891,7 +4892,7 @@ get_more: 47 const xmlChar *tmp = ctxt->input->cur; 48 ctxt->input->cur = in; 49 50- xmlCharacters(ctxt, tmp, nbchar); 51+ xmlCharacters(ctxt, tmp, nbchar, 0); 52 53 line = ctxt->input->line; 54 col = ctxt->input->col; 55@@ -4958,7 +4959,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) { 56 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 57 buf[nbchar] = 0; 58 59- xmlCharacters(ctxt, buf, nbchar); 60+ xmlCharacters(ctxt, buf, nbchar, 0); 61 nbchar = 0; 62 SHRINK; 63 } 64@@ -4967,7 +4968,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) { 65 if (nbchar != 0) { 66 buf[nbchar] = 0; 67 68- xmlCharacters(ctxt, buf, nbchar); 69+ xmlCharacters(ctxt, buf, nbchar, 0); 70 } 71 /* 72 * cur == 0 can mean 73diff --git a/testparser.c b/testparser.c 74index 5cca7b679..85fee9b4c 100644 75--- a/testparser.c 76+++ b/testparser.c 77@@ -255,6 +255,39 @@ testCtxtParseContent(void) { 78 79 return err; 80 } 81+ 82+static int 83+testNoBlanks(void) { 84+ const xmlChar xml[] = 85+ "<refentry>\n" 86+ " <refsect1>\n" 87+ " <para>\n" 88+ " Run <command>tester --help</command> for more options.\n" 89+ " </para>\n" 90+ " </refsect1>\n" 91+ "</refentry>\n"; 92+ const xmlChar expect[] = 93+ "<?xml version=\"1.0\"?>\n" 94+ "<refentry><refsect1><para>\n" 95+ " Run <command>tester --help</command> for more options.\n" 96+ " </para></refsect1></refentry>\n"; 97+ xmlDocPtr doc; 98+ xmlChar *out; 99+ int size; 100+ int err = 0; 101+ 102+ doc = xmlReadDoc(xml, NULL, NULL, XML_PARSE_NOBLANKS); 103+ xmlDocDumpMemory(doc, &out, &size); 104+ xmlFreeDoc(doc); 105+ 106+ if (!xmlStrEqual(out, expect)) { 107+ fprintf(stderr, "parsing with XML_PARSE_NOBLANKS failed\n"); 108+ err = 1; 109+ } 110+ xmlFree(out); 111+ 112+ return err; 113+} 114 #endif /* LIBXML_OUTPUT_ENABLED */ 115 116 #ifdef LIBXML_SAX1_ENABLED 117@@ -1123,6 +1156,7 @@ main(void) { 118 #endif 119 #ifdef LIBXML_OUTPUT_ENABLED 120 err |= testCtxtParseContent(); 121+ err |= testNoBlanks(); 122 #endif 123 #ifdef LIBXML_SAX1_ENABLED 124 err |= testBalancedChunk(); 125-- 126GitLab 127 128