1From 9b4ce651b26557f16103c3a366c91934ecd439ab Mon Sep 17 00:00:00 2001 2From: Samanta Navarro <ferivoz@riseup.net> 3Date: Tue, 15 Feb 2022 11:54:29 +0000 4Subject: [PATCH] Prevent stack exhaustion in build_model 5 6It is possible to trigger stack exhaustion in build_model function if 7depth of nested children in DTD element is large enough. This happens 8because build_node is a recursively called function within build_model. 9 10The code has been adjusted to run iteratively. It uses the already 11allocated heap space as temporary stack (growing from top to bottom). 12 13Output is identical to recursive version. No new fields in data 14structures were added, i.e. it keeps full API and ABI compatibility. 15Instead the numchildren variable is used to temporarily keep the 16index of items (uint vs int). 17 18Documentation and readability improvements kindly added by Sebastian. 19 20Proof of Concept: 21 221. Compile poc binary which parses XML file line by line 23 24``` 25cat > poc.c << EOF 26 #include <err.h> 27 #include <expat.h> 28 #include <stdio.h> 29 30 XML_Parser parser; 31 32 static void XMLCALL 33 dummy_element_decl_handler(void *userData, const XML_Char *name, 34 XML_Content *model) { 35 XML_FreeContentModel(parser, model); 36 } 37 38 int main(int argc, char *argv[]) { 39 FILE *fp; 40 char *p = NULL; 41 size_t s = 0; 42 ssize_t l; 43 if (argc != 2) 44 errx(1, "usage: poc poc.xml"); 45 if ((parser = XML_ParserCreate(NULL)) == NULL) 46 errx(1, "XML_ParserCreate"); 47 XML_SetElementDeclHandler(parser, dummy_element_decl_handler); 48 if ((fp = fopen(argv[1], "r")) == NULL) 49 err(1, "fopen"); 50 while ((l = getline(&p, &s, fp)) > 0) 51 if (XML_Parse(parser, p, (int)l, XML_FALSE) != XML_STATUS_OK) 52 errx(1, "XML_Parse"); 53 XML_ParserFree(parser); 54 free(p); 55 fclose(fp); 56 return 0; 57 } 58EOF 59cc -std=c11 -D_POSIX_C_SOURCE=200809L -lexpat -o poc poc.c 60``` 61 622. Create XML file with a lot of nested groups in DTD element 63 64``` 65cat > poc.xml.zst.b64 << EOF 66KLUv/aQkACAAPAEA+DwhRE9DVFlQRSB1d3UgWwo8IUVMRU1FTlQgdXd1CigBAHv/58AJAgAQKAIA 67ECgCABAoAgAQKAIAECgCABAoAgAQKHwAAChvd28KKQIA2/8gV24XBAIAECkCABApAgAQKQIAECkC 68ABApAgAQKQIAEClVAAAgPl0+CgEA4A4I2VwwnQ== 69EOF 70base64 -d poc.xml.zst.b64 | zstd -d > poc.xml 71``` 72 733. Run Proof of Concept 74 75``` 76./poc poc.xml 77``` 78 79Co-authored-by: Sebastian Pipping <sebastian@pipping.org> 80--- 81 lib/xmlparse.c | 116 +++++++++++++++++++++++++++++-------------- 82 1 file changed, 79 insertions(+), 37 deletions(-) 83 84diff --git a/lib/xmlparse.c b/lib/xmlparse.c 85index 4b43e613..594cf12c 100644 86--- a/lib/xmlparse.c 87+++ b/lib/xmlparse.c 88@@ -7317,44 +7317,15 @@ nextScaffoldPart(XML_Parser parser) { 89 return next; 90 } 91 92-static void 93-build_node(XML_Parser parser, int src_node, XML_Content *dest, 94- XML_Content **contpos, XML_Char **strpos) { 95- DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 96- dest->type = dtd->scaffold[src_node].type; 97- dest->quant = dtd->scaffold[src_node].quant; 98- if (dest->type == XML_CTYPE_NAME) { 99- const XML_Char *src; 100- dest->name = *strpos; 101- src = dtd->scaffold[src_node].name; 102- for (;;) { 103- *(*strpos)++ = *src; 104- if (! *src) 105- break; 106- src++; 107- } 108- dest->numchildren = 0; 109- dest->children = NULL; 110- } else { 111- unsigned int i; 112- int cn; 113- dest->numchildren = dtd->scaffold[src_node].childcnt; 114- dest->children = *contpos; 115- *contpos += dest->numchildren; 116- for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren; 117- i++, cn = dtd->scaffold[cn].nextsib) { 118- build_node(parser, cn, &(dest->children[i]), contpos, strpos); 119- } 120- dest->name = NULL; 121- } 122-} 123- 124 static XML_Content * 125 build_model(XML_Parser parser) { 126+ /* Function build_model transforms the existing parser->m_dtd->scaffold 127+ * array of CONTENT_SCAFFOLD tree nodes into a new array of 128+ * XML_Content tree nodes followed by a gapless list of zero-terminated 129+ * strings. */ 130 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 131 XML_Content *ret; 132- XML_Content *cpos; 133- XML_Char *str; 134+ XML_Char *str; /* the current string writing location */ 135 136 /* Detect and prevent integer overflow. 137 * The preprocessor guard addresses the "always false" warning 138@@ -7380,10 +7351,81 @@ build_model(XML_Parser parser) { 139 if (! ret) 140 return NULL; 141 142- str = (XML_Char *)(&ret[dtd->scaffCount]); 143- cpos = &ret[1]; 144+ /* What follows is an iterative implementation (of what was previously done 145+ * recursively in a dedicated function called "build_node". The old recursive 146+ * build_node could be forced into stack exhaustion from input as small as a 147+ * few megabyte, and so that was a security issue. Hence, a function call 148+ * stack is avoided now by resolving recursion.) 149+ * 150+ * The iterative approach works as follows: 151+ * 152+ * - We use space in the target array for building a temporary stack structure 153+ * while that space is still unused. 154+ * The stack grows from the array's end downwards and the "actual data" 155+ * grows from the start upwards, sequentially. 156+ * (Because stack grows downwards, pushing onto the stack is a decrement 157+ * while popping off the stack is an increment.) 158+ * 159+ * - A stack element appears as a regular XML_Content node on the outside, 160+ * but only uses a single field -- numchildren -- to store the source 161+ * tree node array index. These are the breadcrumbs leading the way back 162+ * during pre-order (node first) depth-first traversal. 163+ * 164+ * - The reason we know the stack will never grow into (or overlap with) 165+ * the area with data of value at the start of the array is because 166+ * the overall number of elements to process matches the size of the array, 167+ * and the sum of fully processed nodes and yet-to-be processed nodes 168+ * on the stack, cannot be more than the total number of nodes. 169+ * It is possible for the top of the stack and the about-to-write node 170+ * to meet, but that is safe because we get the source index out 171+ * before doing any writes on that node. 172+ */ 173+ XML_Content *dest = ret; /* tree node writing location, moves upwards */ 174+ XML_Content *const destLimit = &ret[dtd->scaffCount]; 175+ XML_Content *const stackBottom = &ret[dtd->scaffCount]; 176+ XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */ 177+ str = (XML_Char *)&ret[dtd->scaffCount]; 178+ 179+ /* Push source tree root node index onto the stack */ 180+ (--stackTop)->numchildren = 0; 181+ 182+ for (; dest < destLimit; dest++) { 183+ /* Pop source tree node index off the stack */ 184+ const int src_node = (int)(stackTop++)->numchildren; 185+ 186+ /* Convert item */ 187+ dest->type = dtd->scaffold[src_node].type; 188+ dest->quant = dtd->scaffold[src_node].quant; 189+ if (dest->type == XML_CTYPE_NAME) { 190+ const XML_Char *src; 191+ dest->name = str; 192+ src = dtd->scaffold[src_node].name; 193+ for (;;) { 194+ *str++ = *src; 195+ if (! *src) 196+ break; 197+ src++; 198+ } 199+ dest->numchildren = 0; 200+ dest->children = NULL; 201+ } else { 202+ unsigned int i; 203+ int cn; 204+ dest->name = NULL; 205+ dest->numchildren = dtd->scaffold[src_node].childcnt; 206+ dest->children = &dest[1]; 207+ 208+ /* Push children to the stack 209+ * in a way where the first child ends up at the top of the 210+ * (downwards growing) stack, in order to be processed first. */ 211+ stackTop -= dest->numchildren; 212+ for (i = 0, cn = dtd->scaffold[src_node].firstchild; 213+ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) { 214+ (stackTop + i)->numchildren = (unsigned int)cn; 215+ } 216+ } 217+ } 218 219- build_node(parser, 0, ret, &cpos, &str); 220 return ret; 221 } 222 223