1From a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338 Mon Sep 17 00:00:00 2001 2From: Nick Wellnhofer <wellnhofer@aevum.de> 3Date: Thu, 9 Mar 2023 06:58:24 +0100 4Subject: [PATCH] regexp: Fix determinism checks 5 6Swap arguments in initial call to xmlFARecurseDeterminism. 7 8Fix the check whether we revisit the initial state in 9xmlFARecurseDeterminism. 10 11If there are transitions with equal atoms and targets but different 12counters, treat the regex as deterministic but mark the transitions as 13non-deterministic internally. 14 15Don't overwrite zero return value of xmlFAComputesDeterminism 16with non-zero value from xmlFARecurseDeterminism. 17 18Most of these errors lead to non-deterministic regexes not being 19detected which typically isn't an issue. The improved code may break 20users who relied on buggy behavior or cause other bugs to become 21visible. 22 23Fixes #469. 24 25Reference:https://github.com/GNOME/libxml2/commit/a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338 26Conflict:NA 27 28--- 29 xmlregexp.c | 34 +++++++++++++++++++++++----------- 30 1 file changed, 23 insertions(+), 11 deletions(-) 31 32diff --git a/xmlregexp.c b/xmlregexp.c 33index df0626c..c89f0c7 100644 34--- a/xmlregexp.c 35+++ b/xmlregexp.c 36@@ -2665,7 +2665,7 @@ not_determinist: 37 */ 38 static int 39 xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 40- int to, xmlRegAtomPtr atom) { 41+ int fromnr, int tonr, xmlRegAtomPtr atom) { 42 int ret = 1; 43 int res; 44 int transnr, nbTrans; 45@@ -2690,21 +2690,23 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 46 /* 47 * check transitions conflicting with the one looked at 48 */ 49+ if ((t1->to < 0) || (t1->to == fromnr)) 50+ continue; 51 if (t1->atom == NULL) { 52- if (t1->to < 0) 53- continue; 54 state->markd = XML_REGEXP_MARK_VISITED; 55 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 56- to, atom); 57+ fromnr, tonr, atom); 58 if (res == 0) { 59 ret = 0; 60 /* t1->nd = 1; */ 61 } 62 continue; 63 } 64- if (t1->to != to) 65- continue; 66 if (xmlFACompareAtoms(t1->atom, atom, deep)) { 67+ /* Treat equal transitions as deterministic. */ 68+ if ((t1->to != tonr) || 69+ (!xmlFAEqualAtoms(t1->atom, atom, deep))) 70+ ret = 0; 71 ret = 0; 72 /* mark the transition as non-deterministic */ 73 t1->nd = 1; 74@@ -2837,29 +2839,39 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { 75 * find transitions which indicate a conflict 76 */ 77 if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) { 78- ret = 0; 79+ /* 80+ * Treat equal counter transitions that couldn't be 81+ * eliminated as deterministic. 82+ */ 83+ if ((t1->to != t2->to) || 84+ (t1->counter == t2->counter) || 85+ (!xmlFAEqualAtoms(t1->atom, t2->atom, deep))) 86+ ret = 0; 87 /* mark the transitions as non-deterministic ones */ 88 t1->nd = 1; 89 t2->nd = 1; 90 last = t1; 91 } 92 } else { 93+ int res; 94+ 95 /* 96 * do the closure in case of remaining specific 97 * epsilon transitions like choices or all 98 */ 99- ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 100- t2->to, t2->atom); 101- xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]); 102+ res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to], 103+ statenr, t1->to, t1->atom); 104+ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]); 105 /* don't shortcut the computation so all non deterministic 106 transition get marked down 107 if (ret == 0) 108 return(0); 109 */ 110- if (ret == 0) { 111+ if (res == 0) { 112 t1->nd = 1; 113 /* t2->nd = 1; */ 114 last = t1; 115+ ret = 0; 116 } 117 } 118 } 119-- 1202.27.0 121 122