1From 2ba6c76fca21397959145e18c5ef376201209020 Mon Sep 17 00:00:00 2001 2From: Sebastian Pipping <sebastian@pipping.org> 3Date: Sun, 27 Feb 2022 16:58:08 +0100 4Subject: [PATCH] lib: Relax fix to CVE-2022-25236 with regard to RFC 5 3986 URI characters 6 7--- 8 lib/xmlparse.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++--- 9 1 file changed, 131 insertions(+), 8 deletions(-) 10 11diff --git a/lib/xmlparse.c b/lib/xmlparse.c 12index 59da19c..6fe2cf1 100644 13--- a/lib/xmlparse.c 14+++ b/lib/xmlparse.c 15@@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, 16 return XML_ERROR_NONE; 17 } 18 19+static XML_Bool 20+is_rfc3986_uri_char(XML_Char candidate) { 21+ // For the RFC 3986 ANBF grammar see 22+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A 23+ 24+ switch (candidate) { 25+ // From rule "ALPHA" (uppercase half) 26+ case 'A': 27+ case 'B': 28+ case 'C': 29+ case 'D': 30+ case 'E': 31+ case 'F': 32+ case 'G': 33+ case 'H': 34+ case 'I': 35+ case 'J': 36+ case 'K': 37+ case 'L': 38+ case 'M': 39+ case 'N': 40+ case 'O': 41+ case 'P': 42+ case 'Q': 43+ case 'R': 44+ case 'S': 45+ case 'T': 46+ case 'U': 47+ case 'V': 48+ case 'W': 49+ case 'X': 50+ case 'Y': 51+ case 'Z': 52+ 53+ // From rule "ALPHA" (lowercase half) 54+ case 'a': 55+ case 'b': 56+ case 'c': 57+ case 'd': 58+ case 'e': 59+ case 'f': 60+ case 'g': 61+ case 'h': 62+ case 'i': 63+ case 'j': 64+ case 'k': 65+ case 'l': 66+ case 'm': 67+ case 'n': 68+ case 'o': 69+ case 'p': 70+ case 'q': 71+ case 'r': 72+ case 's': 73+ case 't': 74+ case 'u': 75+ case 'v': 76+ case 'w': 77+ case 'x': 78+ case 'y': 79+ case 'z': 80+ 81+ // From rule "DIGIT" 82+ case '0': 83+ case '1': 84+ case '2': 85+ case '3': 86+ case '4': 87+ case '5': 88+ case '6': 89+ case '7': 90+ case '8': 91+ case '9': 92+ 93+ // From rule "pct-encoded" 94+ case '%': 95+ 96+ // From rule "unreserved" 97+ case '-': 98+ case '.': 99+ case '_': 100+ case '~': 101+ 102+ // From rule "gen-delims" 103+ case ':': 104+ case '/': 105+ case '?': 106+ case '#': 107+ case '[': 108+ case ']': 109+ case '@': 110+ 111+ // From rule "sub-delims" 112+ case '!': 113+ case '$': 114+ case '&': 115+ case '\'': 116+ case '(': 117+ case ')': 118+ case '*': 119+ case '+': 120+ case ',': 121+ case ';': 122+ case '=': 123+ return XML_TRUE; 124+ 125+ default: 126+ return XML_FALSE; 127+ } 128+} 129+ 130 /* addBinding() overwrites the value of prefix->binding without checking. 131 Therefore one must keep track of the old value outside of addBinding(). 132 */ 133@@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, 134 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) 135 isXMLNS = XML_FALSE; 136 137- // NOTE: While Expat does not validate namespace URIs against RFC 3986, 138- // we have to at least make sure that the XML processor on top of 139- // Expat (that is splitting tag names by namespace separator into 140- // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused 141- // by an attacker putting additional namespace separator characters 142- // into namespace declarations. That would be ambiguous and not to 143- // be expected. 144- if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { 145+ // NOTE: While Expat does not validate namespace URIs against RFC 3986 146+ // today (and is not REQUIRED to do so with regard to the XML 1.0 147+ // namespaces specification) we have to at least make sure, that 148+ // the application on top of Expat (that is likely splitting expanded 149+ // element names ("qualified names") of form 150+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces 151+ // in its element handler code) cannot be confused by an attacker 152+ // putting additional namespace separator characters into namespace 153+ // declarations. That would be ambiguous and not to be expected. 154+ // 155+ // While the HTML API docs of function XML_ParserCreateNS have been 156+ // advising against use of a namespace separator character that can 157+ // appear in a URI for >20 years now, some widespread applications 158+ // are using URI characters (':' (colon) in particular) for a 159+ // namespace separator, in practice. To keep these applications 160+ // functional, we only reject namespaces URIs containing the 161+ // application-chosen namespace separator if the chosen separator 162+ // is a non-URI character with regard to RFC 3986. 163+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) 164+ && ! is_rfc3986_uri_char(uri[len])) { 165 return XML_ERROR_SYNTAX; 166 } 167 } 168-- 1691.8.3.1 170 171