• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338 Mon Sep 17 00:00:00 2001
2From: Nick Wellnhofer <wellnhofer@aevum.de>
3Date: Thu, 9 Mar 2023 06:58:24 +0100
4Subject: [PATCH] regexp: Fix determinism checks
5
6Swap arguments in initial call to xmlFARecurseDeterminism.
7
8Fix the check whether we revisit the initial state in
9xmlFARecurseDeterminism.
10
11If there are transitions with equal atoms and targets but different
12counters, treat the regex as deterministic but mark the transitions as
13non-deterministic internally.
14
15Don't overwrite zero return value of xmlFAComputesDeterminism
16with non-zero value from xmlFARecurseDeterminism.
17
18Most of these errors lead to non-deterministic regexes not being
19detected which typically isn't an issue. The improved code may break
20users who relied on buggy behavior or cause other bugs to become
21visible.
22
23Fixes #469.
24
25Reference:https://github.com/GNOME/libxml2/commit/a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338
26Conflict:NA
27
28---
29 xmlregexp.c | 34 +++++++++++++++++++++++-----------
30 1 file changed, 23 insertions(+), 11 deletions(-)
31
32diff --git a/xmlregexp.c b/xmlregexp.c
33index df0626c..c89f0c7 100644
34--- a/xmlregexp.c
35+++ b/xmlregexp.c
36@@ -2665,7 +2665,7 @@ not_determinist:
37  */
38 static int
39 xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
40-	                 int to, xmlRegAtomPtr atom) {
41+	                int fromnr, int tonr, xmlRegAtomPtr atom) {
42     int ret = 1;
43     int res;
44     int transnr, nbTrans;
45@@ -2690,21 +2690,23 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
46 	/*
47 	 * check transitions conflicting with the one looked at
48 	 */
49+        if ((t1->to < 0) || (t1->to == fromnr))
50+            continue;
51 	if (t1->atom == NULL) {
52-	    if (t1->to < 0)
53-		continue;
54 	    state->markd = XML_REGEXP_MARK_VISITED;
55 	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
56-		                           to, atom);
57+		                          fromnr, tonr, atom);
58 	    if (res == 0) {
59 	        ret = 0;
60 		/* t1->nd = 1; */
61 	    }
62 	    continue;
63 	}
64-	if (t1->to != to)
65-	    continue;
66 	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
67+            /* Treat equal transitions as deterministic. */
68+            if ((t1->to != tonr) ||
69+                (!xmlFAEqualAtoms(t1->atom, atom, deep)))
70+                ret = 0;
71 	    ret = 0;
72 	    /* mark the transition as non-deterministic */
73 	    t1->nd = 1;
74@@ -2837,29 +2839,39 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
75                      * find transitions which indicate a conflict
76                      */
77 		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
78-			ret = 0;
79+                        /*
80+                         * Treat equal counter transitions that couldn't be
81+                         * eliminated as deterministic.
82+                         */
83+                        if ((t1->to != t2->to) ||
84+                            (t1->counter == t2->counter) ||
85+                            (!xmlFAEqualAtoms(t1->atom, t2->atom, deep)))
86+                            ret = 0;
87 			/* mark the transitions as non-deterministic ones */
88 			t1->nd = 1;
89 			t2->nd = 1;
90 			last = t1;
91 		    }
92 		} else {
93+                    int res;
94+
95 		    /*
96 		     * do the closure in case of remaining specific
97 		     * epsilon transitions like choices or all
98 		     */
99-		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
100-						   t2->to, t2->atom);
101-                    xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
102+		    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to],
103+						  statenr, t1->to, t1->atom);
104+                    xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]);
105 		    /* don't shortcut the computation so all non deterministic
106 		       transition get marked down
107 		    if (ret == 0)
108 			return(0);
109 		     */
110-		    if (ret == 0) {
111+		    if (res == 0) {
112 			t1->nd = 1;
113 			/* t2->nd = 1; */
114 			last = t1;
115+                        ret = 0;
116 		    }
117 		}
118 	    }
119--
1202.27.0
121
122