• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From 2ba6c76fca21397959145e18c5ef376201209020 Mon Sep 17 00:00:00 2001
2From: Sebastian Pipping <sebastian@pipping.org>
3Date: Sun, 27 Feb 2022 16:58:08 +0100
4Subject: [PATCH] lib: Relax fix to CVE-2022-25236 with regard to RFC
5 3986 URI characters
6
7---
8 lib/xmlparse.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++---
9 1 file changed, 131 insertions(+), 8 deletions(-)
10
11diff --git a/lib/xmlparse.c b/lib/xmlparse.c
12index 59da19c..6fe2cf1 100644
13--- a/lib/xmlparse.c
14+++ b/lib/xmlparse.c
15@@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
16   return XML_ERROR_NONE;
17 }
18
19+static XML_Bool
20+is_rfc3986_uri_char(XML_Char candidate) {
21+  // For the RFC 3986 ANBF grammar see
22+  // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
23+
24+  switch (candidate) {
25+  // From rule "ALPHA" (uppercase half)
26+  case 'A':
27+  case 'B':
28+  case 'C':
29+  case 'D':
30+  case 'E':
31+  case 'F':
32+  case 'G':
33+  case 'H':
34+  case 'I':
35+  case 'J':
36+  case 'K':
37+  case 'L':
38+  case 'M':
39+  case 'N':
40+  case 'O':
41+  case 'P':
42+  case 'Q':
43+  case 'R':
44+  case 'S':
45+  case 'T':
46+  case 'U':
47+  case 'V':
48+  case 'W':
49+  case 'X':
50+  case 'Y':
51+  case 'Z':
52+
53+  // From rule "ALPHA" (lowercase half)
54+  case 'a':
55+  case 'b':
56+  case 'c':
57+  case 'd':
58+  case 'e':
59+  case 'f':
60+  case 'g':
61+  case 'h':
62+  case 'i':
63+  case 'j':
64+  case 'k':
65+  case 'l':
66+  case 'm':
67+  case 'n':
68+  case 'o':
69+  case 'p':
70+  case 'q':
71+  case 'r':
72+  case 's':
73+  case 't':
74+  case 'u':
75+  case 'v':
76+  case 'w':
77+  case 'x':
78+  case 'y':
79+  case 'z':
80+
81+  // From rule "DIGIT"
82+  case '0':
83+  case '1':
84+  case '2':
85+  case '3':
86+  case '4':
87+  case '5':
88+  case '6':
89+  case '7':
90+  case '8':
91+  case '9':
92+
93+  // From rule "pct-encoded"
94+  case '%':
95+
96+  // From rule "unreserved"
97+  case '-':
98+  case '.':
99+  case '_':
100+  case '~':
101+
102+  // From rule "gen-delims"
103+  case ':':
104+  case '/':
105+  case '?':
106+  case '#':
107+  case '[':
108+  case ']':
109+  case '@':
110+
111+  // From rule "sub-delims"
112+  case '!':
113+  case '$':
114+  case '&':
115+  case '\'':
116+  case '(':
117+  case ')':
118+  case '*':
119+  case '+':
120+  case ',':
121+  case ';':
122+  case '=':
123+    return XML_TRUE;
124+
125+  default:
126+    return XML_FALSE;
127+  }
128+}
129+
130 /* addBinding() overwrites the value of prefix->binding without checking.
131    Therefore one must keep track of the old value outside of addBinding().
132 */
133@@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
134         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
135       isXMLNS = XML_FALSE;
136
137-    // NOTE: While Expat does not validate namespace URIs against RFC 3986,
138-    //       we have to at least make sure that the XML processor on top of
139-    //       Expat (that is splitting tag names by namespace separator into
140-    //       2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
141-    //       by an attacker putting additional namespace separator characters
142-    //       into namespace declarations.  That would be ambiguous and not to
143-    //       be expected.
144-    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
145+    // NOTE: While Expat does not validate namespace URIs against RFC 3986
146+    //       today (and is not REQUIRED to do so with regard to the XML 1.0
147+    //       namespaces specification) we have to at least make sure, that
148+    //       the application on top of Expat (that is likely splitting expanded
149+    //       element names ("qualified names") of form
150+    //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
151+    //       in its element handler code) cannot be confused by an attacker
152+    //       putting additional namespace separator characters into namespace
153+    //       declarations.  That would be ambiguous and not to be expected.
154+    //
155+    //       While the HTML API docs of function XML_ParserCreateNS have been
156+    //       advising against use of a namespace separator character that can
157+    //       appear in a URI for >20 years now, some widespread applications
158+    //       are using URI characters (':' (colon) in particular) for a
159+    //       namespace separator, in practice.  To keep these applications
160+    //       functional, we only reject namespaces URIs containing the
161+    //       application-chosen namespace separator if the chosen separator
162+    //       is a non-URI character with regard to RFC 3986.
163+    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
164+        && ! is_rfc3986_uri_char(uri[len])) {
165       return XML_ERROR_SYNTAX;
166     }
167   }
168--
1691.8.3.1
170
171