1From 38f04779f7afd758db6210123ec0b64c489595c5 Mon Sep 17 00:00:00 2001 2From: Nick Wellnhofer <wellnhofer@aevum.de> 3Date: Mon, 22 Aug 2022 13:33:35 +0200 4Subject: [PATCH] Fix HTML parser with threads and --without-legacy 5 6If the legacy functions are disabled, the default "V1" HTML SAX handler 7isn't initialized in threads other than the main thread. 8htmlInitParserCtxt would later use the empty V1 SAX handler, resulting 9in NULL documents. 10 11Change htmlInitParserCtxt to initialize the HTML SAX handler by calling 12xmlSAX2InitHtmlDefaultSAXHandler. This removes the ability to change the 13default handler but is more in line with the XML parser which 14initializes the SAX handler by calling xmlSAXVersion, ignoring the V1 15default handler. 16 17Fixes #399. 18Reference:https://github.com/GNOME/libxml2/commit/38f04779f7afd758db6210123ec0b64c489595c5 19Conflict:NA 20--- 21 HTMLparser.c | 11 ++++------- 22 1 file changed, 4 insertions(+), 7 deletions(-) 23 24diff --git a/HTMLparser.c b/HTMLparser.c 25index e95d86b..98d73f3 100644 26--- a/HTMLparser.c 27+++ b/HTMLparser.c 28@@ -5039,8 +5039,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) 29 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); 30 return(-1); 31 } 32- else 33- memset(sax, 0, sizeof(htmlSAXHandler)); 34+ memset(sax, 0, sizeof(htmlSAXHandler)); 35 36 /* Allocate the Input stack */ 37 ctxt->inputTab = (htmlParserInputPtr *) 38@@ -5099,11 +5098,9 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) 39 ctxt->nodeInfoNr = 0; 40 ctxt->nodeInfoMax = 0; 41 42- if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler; 43- else { 44- ctxt->sax = sax; 45- memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); 46- } 47+ ctxt->sax = sax; 48+ xmlSAX2InitHtmlDefaultSAXHandler(sax); 49+ 50 ctxt->userData = ctxt; 51 ctxt->myDoc = NULL; 52 ctxt->wellFormed = 1; 53-- 542.27.0 55 56