• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23    Licensed under the MIT license:
24 
25    Permission is  hereby granted,  free of charge,  to any  person obtaining
26    a  copy  of  this  software   and  associated  documentation  files  (the
27    "Software"),  to  deal in  the  Software  without restriction,  including
28    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
29    distribute, sublicense, and/or sell copies of the Software, and to permit
30    persons  to whom  the Software  is  furnished to  do so,  subject to  the
31    following conditions:
32 
33    The above copyright  notice and this permission notice  shall be included
34    in all copies or substantial portions of the Software.
35 
36    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
37    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
38    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
39    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
40    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
41    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
42    USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44 
45 #if defined(NDEBUG)
46 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
47 #endif
48 
49 #include <assert.h>
50 
51 #include <stdio.h>
52 #include <string.h>
53 #include <time.h>
54 
55 #if ! defined(__cplusplus)
56 #  include <stdbool.h>
57 #endif
58 
59 #include "expat_config.h"
60 
61 #include "expat.h"
62 #include "internal.h"
63 #include "minicheck.h"
64 #include "structdata.h"
65 #include "common.h"
66 #include "dummy.h"
67 #include "handlers.h"
68 #include "siphash.h"
69 #include "basic_tests.h"
70 
71 static void
basic_setup(void)72 basic_setup(void) {
73   g_parser = XML_ParserCreate(NULL);
74   if (g_parser == NULL)
75     fail("Parser not created.");
76 }
77 
78 /*
79  * Character & encoding tests.
80  */
81 
START_TEST(test_nul_byte)82 START_TEST(test_nul_byte) {
83   char text[] = "<doc>\0</doc>";
84 
85   /* test that a NUL byte (in US-ASCII data) is an error */
86   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
87       == XML_STATUS_OK)
88     fail("Parser did not report error on NUL-byte.");
89   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
90     xml_failure(g_parser);
91 }
92 END_TEST
93 
START_TEST(test_u0000_char)94 START_TEST(test_u0000_char) {
95   /* test that a NUL byte (in US-ASCII data) is an error */
96   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
97                  "Parser did not report error on NUL-byte.");
98 }
99 END_TEST
100 
START_TEST(test_siphash_self)101 START_TEST(test_siphash_self) {
102   if (! sip24_valid())
103     fail("SipHash self-test failed");
104 }
105 END_TEST
106 
START_TEST(test_siphash_spec)107 START_TEST(test_siphash_spec) {
108   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
109   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
110                          "\x0a\x0b\x0c\x0d\x0e";
111   const size_t len = sizeof(message) - 1;
112   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
113   struct siphash state;
114   struct sipkey key;
115 
116   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
117                   "\x0a\x0b\x0c\x0d\x0e\x0f");
118   sip24_init(&state, &key);
119 
120   /* Cover spread across calls */
121   sip24_update(&state, message, 4);
122   sip24_update(&state, message + 4, len - 4);
123 
124   /* Cover null length */
125   sip24_update(&state, message, 0);
126 
127   if (sip24_final(&state) != expected)
128     fail("sip24_final failed spec test\n");
129 
130   /* Cover wrapper */
131   if (siphash24(message, len, &key) != expected)
132     fail("siphash24 failed spec test\n");
133 }
134 END_TEST
135 
START_TEST(test_bom_utf8)136 START_TEST(test_bom_utf8) {
137   /* This test is really just making sure we don't core on a UTF-8 BOM. */
138   const char *text = "\357\273\277<e/>";
139 
140   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
141       == XML_STATUS_ERROR)
142     xml_failure(g_parser);
143 }
144 END_TEST
145 
START_TEST(test_bom_utf16_be)146 START_TEST(test_bom_utf16_be) {
147   char text[] = "\376\377\0<\0e\0/\0>";
148 
149   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
150       == XML_STATUS_ERROR)
151     xml_failure(g_parser);
152 }
153 END_TEST
154 
START_TEST(test_bom_utf16_le)155 START_TEST(test_bom_utf16_le) {
156   char text[] = "\377\376<\0e\0/\0>\0";
157 
158   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
159       == XML_STATUS_ERROR)
160     xml_failure(g_parser);
161 }
162 END_TEST
163 
START_TEST(test_nobom_utf16_le)164 START_TEST(test_nobom_utf16_le) {
165   char text[] = " \0<\0e\0/\0>\0";
166 
167   if (g_chunkSize == 1) {
168     // TODO: with just the first byte, we can't tell the difference between
169     // UTF-16-LE and UTF-8. Avoid the failure for now.
170     return;
171   }
172 
173   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
174       == XML_STATUS_ERROR)
175     xml_failure(g_parser);
176 }
177 END_TEST
178 
START_TEST(test_hash_collision)179 START_TEST(test_hash_collision) {
180   /* For full coverage of the lookup routine, we need to ensure a
181    * hash collision even though we can only tell that we have one
182    * through breakpoint debugging or coverage statistics.  The
183    * following will cause a hash collision on machines with a 64-bit
184    * long type; others will have to experiment.  The full coverage
185    * tests invoked from qa.sh usually provide a hash collision, but
186    * not always.  This is an attempt to provide insurance.
187    */
188 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
189   const char *text
190       = "<doc>\n"
191         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
192         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
193         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
194         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
195         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
196         "<d8>This triggers the table growth and collides with b2</d8>\n"
197         "</doc>\n";
198 
199   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
200   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
201       == XML_STATUS_ERROR)
202     xml_failure(g_parser);
203 }
204 END_TEST
205 #undef COLLIDING_HASH_SALT
206 
207 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)208 START_TEST(test_danish_latin1) {
209   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
210                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
211 #ifdef XML_UNICODE
212   const XML_Char *expected
213       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
214 #else
215   const XML_Char *expected
216       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
217 #endif
218   run_character_check(text, expected);
219 }
220 END_TEST
221 
222 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)223 START_TEST(test_french_charref_hexidecimal) {
224   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
225                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
226 #ifdef XML_UNICODE
227   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
228 #else
229   const XML_Char *expected
230       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
231 #endif
232   run_character_check(text, expected);
233 }
234 END_TEST
235 
START_TEST(test_french_charref_decimal)236 START_TEST(test_french_charref_decimal) {
237   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
238                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
239 #ifdef XML_UNICODE
240   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
241 #else
242   const XML_Char *expected
243       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
244 #endif
245   run_character_check(text, expected);
246 }
247 END_TEST
248 
START_TEST(test_french_latin1)249 START_TEST(test_french_latin1) {
250   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
251                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
252 #ifdef XML_UNICODE
253   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
254 #else
255   const XML_Char *expected
256       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
257 #endif
258   run_character_check(text, expected);
259 }
260 END_TEST
261 
START_TEST(test_french_utf8)262 START_TEST(test_french_utf8) {
263   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
264                      "<doc>\xC3\xA9</doc>";
265 #ifdef XML_UNICODE
266   const XML_Char *expected = XCS("\x00e9");
267 #else
268   const XML_Char *expected = XCS("\xC3\xA9");
269 #endif
270   run_character_check(text, expected);
271 }
272 END_TEST
273 
274 /* Regression test for SF bug #600479.
275    XXX There should be a test that exercises all legal XML Unicode
276    characters as PCDATA and attribute value content, and XML Name
277    characters as part of element and attribute names.
278 */
START_TEST(test_utf8_false_rejection)279 START_TEST(test_utf8_false_rejection) {
280   const char *text = "<doc>\xEF\xBA\xBF</doc>";
281 #ifdef XML_UNICODE
282   const XML_Char *expected = XCS("\xfebf");
283 #else
284   const XML_Char *expected = XCS("\xEF\xBA\xBF");
285 #endif
286   run_character_check(text, expected);
287 }
288 END_TEST
289 
290 /* Regression test for SF bug #477667.
291    This test assures that any 8-bit character followed by a 7-bit
292    character will not be mistakenly interpreted as a valid UTF-8
293    sequence.
294 */
START_TEST(test_illegal_utf8)295 START_TEST(test_illegal_utf8) {
296   char text[100];
297   int i;
298 
299   for (i = 128; i <= 255; ++i) {
300     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
301     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
302         == XML_STATUS_OK) {
303       snprintf(text, sizeof(text),
304                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
305                i);
306       fail(text);
307     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
308       xml_failure(g_parser);
309     /* Reset the parser since we use the same parser repeatedly. */
310     XML_ParserReset(g_parser, NULL);
311   }
312 }
313 END_TEST
314 
315 /* Examples, not masks: */
316 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
317 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
318 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
319 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
320 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
321 
START_TEST(test_utf8_auto_align)322 START_TEST(test_utf8_auto_align) {
323   struct TestCase {
324     ptrdiff_t expectedMovementInChars;
325     const char *input;
326   };
327 
328   struct TestCase cases[] = {
329       {00, ""},
330 
331       {00, UTF8_LEAD_1},
332 
333       {-1, UTF8_LEAD_2},
334       {00, UTF8_LEAD_2 UTF8_FOLLOW},
335 
336       {-1, UTF8_LEAD_3},
337       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
338       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
339 
340       {-1, UTF8_LEAD_4},
341       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
342       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
343       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
344   };
345 
346   size_t i = 0;
347   bool success = true;
348   for (; i < sizeof(cases) / sizeof(*cases); i++) {
349     const char *fromLim = cases[i].input + strlen(cases[i].input);
350     const char *const fromLimInitially = fromLim;
351     ptrdiff_t actualMovementInChars;
352 
353     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
354 
355     actualMovementInChars = (fromLim - fromLimInitially);
356     if (actualMovementInChars != cases[i].expectedMovementInChars) {
357       size_t j = 0;
358       success = false;
359       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
360              ", actually moved by %2d chars: \"",
361              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
362              (int)actualMovementInChars);
363       for (; j < strlen(cases[i].input); j++) {
364         printf("\\x%02x", (unsigned char)cases[i].input[j]);
365       }
366       printf("\"\n");
367     }
368   }
369 
370   if (! success) {
371     fail("UTF-8 auto-alignment is not bullet-proof\n");
372   }
373 }
374 END_TEST
375 
START_TEST(test_utf16)376 START_TEST(test_utf16) {
377   /* <?xml version="1.0" encoding="UTF-16"?>
378    *  <doc a='123'>some {A} text</doc>
379    *
380    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
381    */
382   char text[]
383       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
384         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
385         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
386         "\000'\000?\000>\000\n"
387         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
388         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
389         "<\000/\000d\000o\000c\000>";
390 #ifdef XML_UNICODE
391   const XML_Char *expected = XCS("some \xff21 text");
392 #else
393   const XML_Char *expected = XCS("some \357\274\241 text");
394 #endif
395   CharData storage;
396 
397   CharData_Init(&storage);
398   XML_SetUserData(g_parser, &storage);
399   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
400   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
401       == XML_STATUS_ERROR)
402     xml_failure(g_parser);
403   CharData_CheckXMLChars(&storage, expected);
404 }
405 END_TEST
406 
START_TEST(test_utf16_le_epilog_newline)407 START_TEST(test_utf16_le_epilog_newline) {
408   unsigned int first_chunk_bytes = 17;
409   char text[] = "\xFF\xFE"                  /* BOM */
410                 "<\000e\000/\000>\000"      /* document element */
411                 "\r\000\n\000\r\000\n\000"; /* epilog */
412 
413   if (first_chunk_bytes >= sizeof(text) - 1)
414     fail("bad value of first_chunk_bytes");
415   if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
416       == XML_STATUS_ERROR)
417     xml_failure(g_parser);
418   else {
419     enum XML_Status rc;
420     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
421                                  sizeof(text) - first_chunk_bytes - 1,
422                                  XML_TRUE);
423     if (rc == XML_STATUS_ERROR)
424       xml_failure(g_parser);
425   }
426 }
427 END_TEST
428 
429 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)430 START_TEST(test_not_utf16) {
431   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
432                      "<doc>Hi</doc>";
433 
434   /* Use a handler to provoke the appropriate code paths */
435   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
436   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
437                  "UTF-16 declared in UTF-8 not faulted");
438 }
439 END_TEST
440 
441 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)442 START_TEST(test_bad_encoding) {
443   const char *text = "<doc>Hi</doc>";
444 
445   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
446     fail("XML_SetEncoding failed");
447   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
448                  "Unknown encoding not faulted");
449 }
450 END_TEST
451 
452 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)453 START_TEST(test_latin1_umlauts) {
454   const char *text
455       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
456         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
457         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
458 #ifdef XML_UNICODE
459   /* Expected results in UTF-16 */
460   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
461       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
462 #else
463   /* Expected results in UTF-8 */
464   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
465       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
466 #endif
467 
468   run_character_check(text, expected);
469   XML_ParserReset(g_parser, NULL);
470   run_attribute_check(text, expected);
471   /* Repeat with a default handler */
472   XML_ParserReset(g_parser, NULL);
473   XML_SetDefaultHandler(g_parser, dummy_default_handler);
474   run_character_check(text, expected);
475   XML_ParserReset(g_parser, NULL);
476   XML_SetDefaultHandler(g_parser, dummy_default_handler);
477   run_attribute_check(text, expected);
478 }
479 END_TEST
480 
481 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)482 START_TEST(test_long_utf8_character) {
483   const char *text
484       = "<?xml version='1.0' encoding='utf-8'?>\n"
485         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
486         "<do\xf0\x90\x80\x80/>";
487   expect_failure(text, XML_ERROR_INVALID_TOKEN,
488                  "4-byte UTF-8 character in element name not faulted");
489 }
490 END_TEST
491 
492 /* Test that a long latin-1 attribute (too long to convert in one go)
493  * is correctly converted
494  */
START_TEST(test_long_latin1_attribute)495 START_TEST(test_long_latin1_attribute) {
496   const char *text
497       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
498         "<doc att='"
499         /* 64 characters per line */
500         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
515         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
516         /* Last character splits across a buffer boundary */
517         "\xe4'>\n</doc>";
518 
519   const XML_Char *expected =
520       /* 64 characters per line */
521       /* clang-format off */
522         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
537         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
538   /* clang-format on */
539 #ifdef XML_UNICODE
540                                                   XCS("\x00e4");
541 #else
542                                                   XCS("\xc3\xa4");
543 #endif
544 
545   run_attribute_check(text, expected);
546 }
547 END_TEST
548 
549 /* Test that a long ASCII attribute (too long to convert in one go)
550  * is correctly converted
551  */
START_TEST(test_long_ascii_attribute)552 START_TEST(test_long_ascii_attribute) {
553   const char *text
554       = "<?xml version='1.0' encoding='us-ascii'?>\n"
555         "<doc att='"
556         /* 64 characters per line */
557         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
573         "01234'>\n</doc>";
574   const XML_Char *expected =
575       /* 64 characters per line */
576       /* clang-format off */
577         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
593         XCS("01234");
594   /* clang-format on */
595 
596   run_attribute_check(text, expected);
597 }
598 END_TEST
599 
600 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)601 START_TEST(test_line_number_after_parse) {
602   const char *text = "<tag>\n"
603                      "\n"
604                      "\n</tag>";
605   XML_Size lineno;
606 
607   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
608       == XML_STATUS_ERROR)
609     xml_failure(g_parser);
610   lineno = XML_GetCurrentLineNumber(g_parser);
611   if (lineno != 4) {
612     char buffer[100];
613     snprintf(buffer, sizeof(buffer),
614              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
615     fail(buffer);
616   }
617 }
618 END_TEST
619 
620 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)621 START_TEST(test_column_number_after_parse) {
622   const char *text = "<tag></tag>";
623   XML_Size colno;
624 
625   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
626       == XML_STATUS_ERROR)
627     xml_failure(g_parser);
628   colno = XML_GetCurrentColumnNumber(g_parser);
629   if (colno != 11) {
630     char buffer[100];
631     snprintf(buffer, sizeof(buffer),
632              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
633     fail(buffer);
634   }
635 }
636 END_TEST
637 
638 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)639 START_TEST(test_line_and_column_numbers_inside_handlers) {
640   const char *text = "<a>\n"      /* Unix end-of-line */
641                      "  <b>\r\n"  /* Windows end-of-line */
642                      "    <c/>\r" /* Mac OS end-of-line */
643                      "  </b>\n"
644                      "  <d>\n"
645                      "    <f/>\n"
646                      "  </d>\n"
647                      "</a>";
648   const StructDataEntry expected[]
649       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
650          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
651          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
652          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
653          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
654   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
655   StructData storage;
656 
657   StructData_Init(&storage);
658   XML_SetUserData(g_parser, &storage);
659   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
660   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
661   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
662       == XML_STATUS_ERROR)
663     xml_failure(g_parser);
664 
665   StructData_CheckItems(&storage, expected, expected_count);
666   StructData_Dispose(&storage);
667 }
668 END_TEST
669 
670 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)671 START_TEST(test_line_number_after_error) {
672   const char *text = "<a>\n"
673                      "  <b>\n"
674                      "  </a>"; /* missing </b> */
675   XML_Size lineno;
676   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
677       != XML_STATUS_ERROR)
678     fail("Expected a parse error");
679 
680   lineno = XML_GetCurrentLineNumber(g_parser);
681   if (lineno != 3) {
682     char buffer[100];
683     snprintf(buffer, sizeof(buffer),
684              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
685     fail(buffer);
686   }
687 }
688 END_TEST
689 
690 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)691 START_TEST(test_column_number_after_error) {
692   const char *text = "<a>\n"
693                      "  <b>\n"
694                      "  </a>"; /* missing </b> */
695   XML_Size colno;
696   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
697       != XML_STATUS_ERROR)
698     fail("Expected a parse error");
699 
700   colno = XML_GetCurrentColumnNumber(g_parser);
701   if (colno != 4) {
702     char buffer[100];
703     snprintf(buffer, sizeof(buffer),
704              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
705     fail(buffer);
706   }
707 }
708 END_TEST
709 
710 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)711 START_TEST(test_really_long_lines) {
712   /* This parses an input line longer than INIT_DATA_BUF_SIZE
713      characters long (defined to be 1024 in xmlparse.c).  We take a
714      really cheesy approach to building the input buffer, because
715      this avoids writing bugs in buffer-filling code.
716   */
717   const char *text
718       = "<e>"
719         /* 64 chars */
720         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
721         /* until we have at least 1024 characters on the line: */
722         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
738         "</e>";
739   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
740       == XML_STATUS_ERROR)
741     xml_failure(g_parser);
742 }
743 END_TEST
744 
745 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)746 START_TEST(test_really_long_encoded_lines) {
747   /* As above, except that we want to provoke an output buffer
748    * overflow with a non-trivial encoding.  For this we need to pass
749    * the whole cdata in one go, not byte-by-byte.
750    */
751   void *buffer;
752   const char *text
753       = "<?xml version='1.0' encoding='iso-8859-1'?>"
754         "<e>"
755         /* 64 chars */
756         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757         /* until we have at least 1024 characters on the line: */
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
774         "</e>";
775   int parse_len = (int)strlen(text);
776 
777   /* Need a cdata handler to provoke the code path we want to test */
778   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
779   buffer = XML_GetBuffer(g_parser, parse_len);
780   if (buffer == NULL)
781     fail("Could not allocate parse buffer");
782   assert(buffer != NULL);
783   memcpy(buffer, text, parse_len);
784   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
785     xml_failure(g_parser);
786 }
787 END_TEST
788 
789 /*
790  * Element event tests.
791  */
792 
START_TEST(test_end_element_events)793 START_TEST(test_end_element_events) {
794   const char *text = "<a><b><c/></b><d><f/></d></a>";
795   const XML_Char *expected = XCS("/c/b/f/d/a");
796   CharData storage;
797 
798   CharData_Init(&storage);
799   XML_SetUserData(g_parser, &storage);
800   XML_SetEndElementHandler(g_parser, end_element_event_handler);
801   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
802       == XML_STATUS_ERROR)
803     xml_failure(g_parser);
804   CharData_CheckXMLChars(&storage, expected);
805 }
806 END_TEST
807 
808 /*
809  * Attribute tests.
810  */
811 
812 /* Helper used by the following tests; this checks any "attr" and "refs"
813    attributes to make sure whitespace has been normalized.
814 
815    Return true if whitespace has been normalized in a string, using
816    the rules for attribute value normalization.  The 'is_cdata' flag
817    is needed since CDATA attributes don't need to have multiple
818    whitespace characters collapsed to a single space, while other
819    attribute data types do.  (Section 3.3.3 of the recommendation.)
820 */
821 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)822 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
823   int blanks = 0;
824   int at_start = 1;
825   while (*s) {
826     if (*s == XCS(' '))
827       ++blanks;
828     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
829       return 0;
830     else {
831       if (at_start) {
832         at_start = 0;
833         if (blanks && ! is_cdata)
834           /* illegal leading blanks */
835           return 0;
836       } else if (blanks > 1 && ! is_cdata)
837         return 0;
838       blanks = 0;
839     }
840     ++s;
841   }
842   if (blanks && ! is_cdata)
843     return 0;
844   return 1;
845 }
846 
847 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)848 START_TEST(test_helper_is_whitespace_normalized) {
849   assert(is_whitespace_normalized(XCS("abc"), 0));
850   assert(is_whitespace_normalized(XCS("abc"), 1));
851   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
852   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
853   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
854   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
855   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
856   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
857   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
858   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
859   assert(! is_whitespace_normalized(XCS(" "), 0));
860   assert(is_whitespace_normalized(XCS(" "), 1));
861   assert(! is_whitespace_normalized(XCS("\t"), 0));
862   assert(! is_whitespace_normalized(XCS("\t"), 1));
863   assert(! is_whitespace_normalized(XCS("\n"), 0));
864   assert(! is_whitespace_normalized(XCS("\n"), 1));
865   assert(! is_whitespace_normalized(XCS("\r"), 0));
866   assert(! is_whitespace_normalized(XCS("\r"), 1));
867   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
868 }
869 END_TEST
870 
871 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)872 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
873                                           const XML_Char **atts) {
874   int i;
875   UNUSED_P(userData);
876   UNUSED_P(name);
877   for (i = 0; atts[i] != NULL; i += 2) {
878     const XML_Char *attrname = atts[i];
879     const XML_Char *value = atts[i + 1];
880     if (xcstrcmp(XCS("attr"), attrname) == 0
881         || xcstrcmp(XCS("ents"), attrname) == 0
882         || xcstrcmp(XCS("refs"), attrname) == 0) {
883       if (! is_whitespace_normalized(value, 0)) {
884         char buffer[256];
885         snprintf(buffer, sizeof(buffer),
886                  "attribute value not normalized: %" XML_FMT_STR
887                  "='%" XML_FMT_STR "'",
888                  attrname, value);
889         fail(buffer);
890       }
891     }
892   }
893 }
894 
START_TEST(test_attr_whitespace_normalization)895 START_TEST(test_attr_whitespace_normalization) {
896   const char *text
897       = "<!DOCTYPE doc [\n"
898         "  <!ATTLIST doc\n"
899         "            attr NMTOKENS #REQUIRED\n"
900         "            ents ENTITIES #REQUIRED\n"
901         "            refs IDREFS   #REQUIRED>\n"
902         "]>\n"
903         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
904         "     ents=' ent-1   \t\r\n"
905         "            ent-2  ' >\n"
906         "  <e id='id-1'/>\n"
907         "  <e id='id-2'/>\n"
908         "</doc>";
909 
910   XML_SetStartElementHandler(g_parser,
911                              check_attr_contains_normalized_whitespace);
912   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
913       == XML_STATUS_ERROR)
914     xml_failure(g_parser);
915 }
916 END_TEST
917 
918 /*
919  * XML declaration tests.
920  */
921 
START_TEST(test_xmldecl_misplaced)922 START_TEST(test_xmldecl_misplaced) {
923   expect_failure("\n"
924                  "<?xml version='1.0'?>\n"
925                  "<a/>",
926                  XML_ERROR_MISPLACED_XML_PI,
927                  "failed to report misplaced XML declaration");
928 }
929 END_TEST
930 
START_TEST(test_xmldecl_invalid)931 START_TEST(test_xmldecl_invalid) {
932   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
933                  "Failed to report invalid XML declaration");
934 }
935 END_TEST
936 
START_TEST(test_xmldecl_missing_attr)937 START_TEST(test_xmldecl_missing_attr) {
938   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
939                  "Failed to report missing XML declaration attribute");
940 }
941 END_TEST
942 
START_TEST(test_xmldecl_missing_value)943 START_TEST(test_xmldecl_missing_value) {
944   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
945                  "<doc/>",
946                  XML_ERROR_XML_DECL,
947                  "Failed to report missing attribute value");
948 }
949 END_TEST
950 
951 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)952 START_TEST(test_unknown_encoding_internal_entity) {
953   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
954                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
955                      "<test a='&foo;'/>";
956 
957   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
958   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
959       == XML_STATUS_ERROR)
960     xml_failure(g_parser);
961 }
962 END_TEST
963 
964 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)965 START_TEST(test_unrecognised_encoding_internal_entity) {
966   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
967                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
968                      "<test a='&foo;'/>";
969 
970   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
971   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
972       != XML_STATUS_ERROR)
973     fail("Unrecognised encoding not rejected");
974 }
975 END_TEST
976 
977 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)978 START_TEST(test_ext_entity_set_encoding) {
979   const char *text = "<!DOCTYPE doc [\n"
980                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
981                      "]>\n"
982                      "<doc>&en;</doc>";
983   ExtTest test_data
984       = {/* This text says it's an unsupported encoding, but it's really
985             UTF-8, which we tell Expat using XML_SetEncoding().
986          */
987          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
988 #ifdef XML_UNICODE
989   const XML_Char *expected = XCS("\x00e9");
990 #else
991   const XML_Char *expected = XCS("\xc3\xa9");
992 #endif
993 
994   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
995   run_ext_character_check(text, &test_data, expected);
996 }
997 END_TEST
998 
999 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1000 START_TEST(test_ext_entity_no_handler) {
1001   const char *text = "<!DOCTYPE doc [\n"
1002                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1003                      "]>\n"
1004                      "<doc>&en;</doc>";
1005 
1006   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1007   run_character_check(text, XCS(""));
1008 }
1009 END_TEST
1010 
1011 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1012 START_TEST(test_ext_entity_set_bom) {
1013   const char *text = "<!DOCTYPE doc [\n"
1014                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1015                      "]>\n"
1016                      "<doc>&en;</doc>";
1017   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1018                        "<?xml encoding='iso-8859-3'?>"
1019                        "\xC3\xA9",
1020                        XCS("utf-8"), NULL};
1021 #ifdef XML_UNICODE
1022   const XML_Char *expected = XCS("\x00e9");
1023 #else
1024   const XML_Char *expected = XCS("\xc3\xa9");
1025 #endif
1026 
1027   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1028   run_ext_character_check(text, &test_data, expected);
1029 }
1030 END_TEST
1031 
1032 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1033 START_TEST(test_ext_entity_bad_encoding) {
1034   const char *text = "<!DOCTYPE doc [\n"
1035                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1036                      "]>\n"
1037                      "<doc>&en;</doc>";
1038   ExtFaults fault
1039       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1040          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1041 
1042   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1043   XML_SetUserData(g_parser, &fault);
1044   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1045                  "Bad encoding should not have been accepted");
1046 }
1047 END_TEST
1048 
1049 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1050 START_TEST(test_ext_entity_bad_encoding_2) {
1051   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1052                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1053                      "<doc>&entity;</doc>";
1054   ExtFaults fault
1055       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1056          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1057 
1058   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1059   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1060   XML_SetUserData(g_parser, &fault);
1061   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1062                  "Bad encoding not faulted in external entity handler");
1063 }
1064 END_TEST
1065 
1066 /* Test that no error is reported for unknown entities if we don't
1067    read an external subset.  This was fixed in Expat 1.95.5.
1068 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1069 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1070   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1071                      "<doc>&entity;</doc>";
1072 
1073   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1074       == XML_STATUS_ERROR)
1075     xml_failure(g_parser);
1076 }
1077 END_TEST
1078 
1079 /* Test that an error is reported for unknown entities if we don't
1080    have an external subset.
1081 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1082 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1083   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1084                  "Parser did not report undefined entity w/out a DTD.");
1085 }
1086 END_TEST
1087 
1088 /* Test that an error is reported for unknown entities if we don't
1089    read an external subset, but have been declared standalone.
1090 */
START_TEST(test_wfc_undeclared_entity_standalone)1091 START_TEST(test_wfc_undeclared_entity_standalone) {
1092   const char *text
1093       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1094         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1095         "<doc>&entity;</doc>";
1096 
1097   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1098                  "Parser did not report undefined entity (standalone).");
1099 }
1100 END_TEST
1101 
1102 /* Test that an error is reported for unknown entities if we have read
1103    an external subset, and standalone is true.
1104 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1105 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1106   const char *text
1107       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1108         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1109         "<doc>&entity;</doc>";
1110   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1111 
1112   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1113   XML_SetUserData(g_parser, &test_data);
1114   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1115   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1116                  "Parser did not report undefined entity (external DTD).");
1117 }
1118 END_TEST
1119 
1120 /* Test that external entity handling is not done if the parsing flag
1121  * is set to UNLESS_STANDALONE
1122  */
START_TEST(test_entity_with_external_subset_unless_standalone)1123 START_TEST(test_entity_with_external_subset_unless_standalone) {
1124   const char *text
1125       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1126         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1127         "<doc>&entity;</doc>";
1128   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1129 
1130   XML_SetParamEntityParsing(g_parser,
1131                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1132   XML_SetUserData(g_parser, &test_data);
1133   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1134   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1135                  "Parser did not report undefined entity");
1136 }
1137 END_TEST
1138 
1139 /* Test that no error is reported for unknown entities if we have read
1140    an external subset, and standalone is false.
1141 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1142 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1143   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1144                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1145                      "<doc>&entity;</doc>";
1146   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1147 
1148   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1149   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1150   run_ext_character_check(text, &test_data, XCS(""));
1151 }
1152 END_TEST
1153 
1154 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1155 START_TEST(test_not_standalone_handler_reject) {
1156   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1157                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1158                      "<doc>&entity;</doc>";
1159   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1160 
1161   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1162   XML_SetUserData(g_parser, &test_data);
1163   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1164   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1165   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1166                  "NotStandalone handler failed to reject");
1167 
1168   /* Try again but without external entity handling */
1169   XML_ParserReset(g_parser, NULL);
1170   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1171   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1172                  "NotStandalone handler failed to reject");
1173 }
1174 END_TEST
1175 
1176 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1177 START_TEST(test_not_standalone_handler_accept) {
1178   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1179                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1180                      "<doc>&entity;</doc>";
1181   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1182 
1183   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1184   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1185   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1186   run_ext_character_check(text, &test_data, XCS(""));
1187 
1188   /* Repeat without the external entity handler */
1189   XML_ParserReset(g_parser, NULL);
1190   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1191   run_character_check(text, XCS(""));
1192 }
1193 END_TEST
1194 
START_TEST(test_wfc_no_recursive_entity_refs)1195 START_TEST(test_wfc_no_recursive_entity_refs) {
1196   const char *text = "<!DOCTYPE doc [\n"
1197                      "  <!ENTITY entity '&#38;entity;'>\n"
1198                      "]>\n"
1199                      "<doc>&entity;</doc>";
1200 
1201   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1202                  "Parser did not report recursive entity reference.");
1203 }
1204 END_TEST
1205 
START_TEST(test_recursive_external_parameter_entity_2)1206 START_TEST(test_recursive_external_parameter_entity_2) {
1207   struct TestCase {
1208     const char *doc;
1209     enum XML_Status expectedStatus;
1210   };
1211 
1212   struct TestCase cases[] = {
1213       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1214       {"<!ENTITY % p1 '%p1;'>"
1215        "<!ENTITY % p1 'first declaration wins'>",
1216        XML_STATUS_ERROR},
1217       {"<!ENTITY % p1 'first declaration wins'>"
1218        "<!ENTITY % p1 '%p1;'>",
1219        XML_STATUS_OK},
1220       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1221   };
1222 
1223   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1224     const char *const doc = cases[i].doc;
1225     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1226     set_subtest("%s", doc);
1227 
1228     XML_Parser parser = XML_ParserCreate(NULL);
1229     assert_true(parser != NULL);
1230 
1231     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1232     assert_true(ext_parser != NULL);
1233 
1234     const enum XML_Status actualStatus
1235         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1236 
1237     assert_true(actualStatus == expectedStatus);
1238     if (actualStatus != XML_STATUS_OK) {
1239       assert_true(XML_GetErrorCode(ext_parser)
1240                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1241     }
1242 
1243     XML_ParserFree(ext_parser);
1244     XML_ParserFree(parser);
1245   }
1246 }
1247 END_TEST
1248 
1249 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1250 START_TEST(test_ext_entity_invalid_parse) {
1251   const char *text = "<!DOCTYPE doc [\n"
1252                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1253                      "]>\n"
1254                      "<doc>&en;</doc>";
1255   const ExtFaults faults[]
1256       = {{"<", "Incomplete element declaration not faulted", NULL,
1257           XML_ERROR_UNCLOSED_TOKEN},
1258          {"<\xe2\x82", /* First two bytes of a three-byte char */
1259           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1260          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1261           XML_ERROR_PARTIAL_CHAR},
1262          {NULL, NULL, NULL, XML_ERROR_NONE}};
1263   const ExtFaults *fault = faults;
1264 
1265   for (; fault->parse_text != NULL; fault++) {
1266     set_subtest("\"%s\"", fault->parse_text);
1267     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1268     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1269     XML_SetUserData(g_parser, (void *)fault);
1270     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1271                    "Parser did not report external entity error");
1272     XML_ParserReset(g_parser, NULL);
1273   }
1274 }
1275 END_TEST
1276 
1277 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1278 START_TEST(test_dtd_default_handling) {
1279   const char *text = "<!DOCTYPE doc [\n"
1280                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1281                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1282                      "<!ELEMENT doc EMPTY>\n"
1283                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1284                      "<?pi in dtd?>\n"
1285                      "<!--comment in dtd-->\n"
1286                      "]><doc/>";
1287 
1288   XML_SetDefaultHandler(g_parser, accumulate_characters);
1289   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1290   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1291   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1292   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1293   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1294   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1295   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1296   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1297   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1298   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1299   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1300 }
1301 END_TEST
1302 
1303 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1304 START_TEST(test_dtd_attr_handling) {
1305   const char *prolog = "<!DOCTYPE doc [\n"
1306                        "<!ELEMENT doc EMPTY>\n";
1307   AttTest attr_data[]
1308       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1309           "]>"
1310           "<doc a='two'/>",
1311           XCS("doc"), XCS("a"),
1312           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1313           NULL, XML_TRUE},
1314          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1315           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1316           "]>"
1317           "<doc/>",
1318           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1319          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1320           "]>"
1321           "<doc/>",
1322           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1323          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1324           "]>"
1325           "<doc/>",
1326           XCS("doc"), XCS("a"), XCS("CDATA"),
1327 #ifdef XML_UNICODE
1328           XCS("\x06f2"),
1329 #else
1330           XCS("\xdb\xb2"),
1331 #endif
1332           XML_FALSE},
1333          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1334   AttTest *test;
1335 
1336   for (test = attr_data; test->definition != NULL; test++) {
1337     set_subtest("%s", test->definition);
1338     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1339     XML_SetUserData(g_parser, test);
1340     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1341                                 XML_FALSE)
1342         == XML_STATUS_ERROR)
1343       xml_failure(g_parser);
1344     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1345                                 (int)strlen(test->definition), XML_TRUE)
1346         == XML_STATUS_ERROR)
1347       xml_failure(g_parser);
1348     XML_ParserReset(g_parser, NULL);
1349   }
1350 }
1351 END_TEST
1352 
1353 /* See related SF bug #673791.
1354    When namespace processing is enabled, setting the namespace URI for
1355    a prefix is not allowed; this test ensures that it *is* allowed
1356    when namespace processing is not enabled.
1357    (See Namespaces in XML, section 2.)
1358 */
START_TEST(test_empty_ns_without_namespaces)1359 START_TEST(test_empty_ns_without_namespaces) {
1360   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1361                      "  <e xmlns:prefix=''/>\n"
1362                      "</doc>";
1363 
1364   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1365       == XML_STATUS_ERROR)
1366     xml_failure(g_parser);
1367 }
1368 END_TEST
1369 
1370 /* Regression test for SF bug #824420.
1371    Checks that an xmlns:prefix attribute set in an attribute's default
1372    value isn't misinterpreted.
1373 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1374 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1375   const char *text = "<!DOCTYPE e:element [\n"
1376                      "  <!ATTLIST e:element\n"
1377                      "    xmlns:e CDATA 'http://example.org/'>\n"
1378                      "      ]>\n"
1379                      "<e:element/>";
1380 
1381   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1382       == XML_STATUS_ERROR)
1383     xml_failure(g_parser);
1384 }
1385 END_TEST
1386 
1387 /* Regression test for SF bug #1515266: missing check of stopped
1388    parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1389 START_TEST(test_stop_parser_between_char_data_calls) {
1390   /* The sample data must be big enough that there are two calls to
1391      the character data handler from within the inner "for" loop of
1392      the XML_TOK_DATA_CHARS case in doContent(), and the character
1393      handler must stop the parser and clear the character data
1394      handler.
1395   */
1396   const char *text = long_character_data_text;
1397 
1398   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1399   g_resumable = XML_FALSE;
1400   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1401       != XML_STATUS_ERROR)
1402     xml_failure(g_parser);
1403   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1404     xml_failure(g_parser);
1405 }
1406 END_TEST
1407 
1408 /* Regression test for SF bug #1515266: missing check of stopped
1409    parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1410 START_TEST(test_suspend_parser_between_char_data_calls) {
1411   /* The sample data must be big enough that there are two calls to
1412      the character data handler from within the inner "for" loop of
1413      the XML_TOK_DATA_CHARS case in doContent(), and the character
1414      handler must stop the parser and clear the character data
1415      handler.
1416   */
1417   const char *text = long_character_data_text;
1418 
1419   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1420   g_resumable = XML_TRUE;
1421   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1422       != XML_STATUS_SUSPENDED)
1423     xml_failure(g_parser);
1424   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1425     xml_failure(g_parser);
1426   /* Try parsing directly */
1427   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1428       != XML_STATUS_ERROR)
1429     fail("Attempt to continue parse while suspended not faulted");
1430   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1431     fail("Suspended parse not faulted with correct error");
1432 }
1433 END_TEST
1434 
1435 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1436 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1437   const char *text = long_character_data_text;
1438 
1439   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1440   g_resumable = XML_FALSE;
1441   g_abortable = XML_FALSE;
1442   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1443       != XML_STATUS_ERROR)
1444     fail("Failed to double-stop parser");
1445 
1446   XML_ParserReset(g_parser, NULL);
1447   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1448   g_resumable = XML_TRUE;
1449   g_abortable = XML_FALSE;
1450   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1451       != XML_STATUS_SUSPENDED)
1452     fail("Failed to double-suspend parser");
1453 
1454   XML_ParserReset(g_parser, NULL);
1455   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1456   g_resumable = XML_TRUE;
1457   g_abortable = XML_TRUE;
1458   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1459       != XML_STATUS_ERROR)
1460     fail("Failed to suspend-abort parser");
1461 }
1462 END_TEST
1463 
START_TEST(test_good_cdata_ascii)1464 START_TEST(test_good_cdata_ascii) {
1465   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1466   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1467 
1468   CharData storage;
1469   CharData_Init(&storage);
1470   XML_SetUserData(g_parser, &storage);
1471   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1472   /* Add start and end handlers for coverage */
1473   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1474   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1475 
1476   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1477       == XML_STATUS_ERROR)
1478     xml_failure(g_parser);
1479   CharData_CheckXMLChars(&storage, expected);
1480 
1481   /* Try again, this time with a default handler */
1482   XML_ParserReset(g_parser, NULL);
1483   CharData_Init(&storage);
1484   XML_SetUserData(g_parser, &storage);
1485   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1486   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1487 
1488   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1489       == XML_STATUS_ERROR)
1490     xml_failure(g_parser);
1491   CharData_CheckXMLChars(&storage, expected);
1492 }
1493 END_TEST
1494 
START_TEST(test_good_cdata_utf16)1495 START_TEST(test_good_cdata_utf16) {
1496   /* Test data is:
1497    *   <?xml version='1.0' encoding='utf-16'?>
1498    *   <a><![CDATA[hello]]></a>
1499    */
1500   const char text[]
1501       = "\0<\0?\0x\0m\0l\0"
1502         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1503         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1504         "1\0"
1505         "6\0'"
1506         "\0?\0>\0\n"
1507         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1508   const XML_Char *expected = XCS("hello");
1509 
1510   CharData storage;
1511   CharData_Init(&storage);
1512   XML_SetUserData(g_parser, &storage);
1513   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1514 
1515   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1516       == XML_STATUS_ERROR)
1517     xml_failure(g_parser);
1518   CharData_CheckXMLChars(&storage, expected);
1519 }
1520 END_TEST
1521 
START_TEST(test_good_cdata_utf16_le)1522 START_TEST(test_good_cdata_utf16_le) {
1523   /* Test data is:
1524    *   <?xml version='1.0' encoding='utf-16'?>
1525    *   <a><![CDATA[hello]]></a>
1526    */
1527   const char text[]
1528       = "<\0?\0x\0m\0l\0"
1529         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1530         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1531         "1\0"
1532         "6\0'"
1533         "\0?\0>\0\n"
1534         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1535   const XML_Char *expected = XCS("hello");
1536 
1537   CharData storage;
1538   CharData_Init(&storage);
1539   XML_SetUserData(g_parser, &storage);
1540   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1541 
1542   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1543       == XML_STATUS_ERROR)
1544     xml_failure(g_parser);
1545   CharData_CheckXMLChars(&storage, expected);
1546 }
1547 END_TEST
1548 
1549 /* Test UTF16 conversion of a long cdata string */
1550 
1551 /* 16 characters: handy macro to reduce visual clutter */
1552 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1553 
START_TEST(test_long_cdata_utf16)1554 START_TEST(test_long_cdata_utf16) {
1555   /* Test data is:
1556    * <?xlm version='1.0' encoding='utf-16'?>
1557    * <a><![CDATA[
1558    * ABCDEFGHIJKLMNOP
1559    * ]]></a>
1560    */
1561   const char text[]
1562       = "\0<\0?\0x\0m\0l\0 "
1563         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1564         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1565         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1566       /* 64 characters per line */
1567       /* clang-format off */
1568         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1569         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1570         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1571         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1572         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1573         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1574         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1575         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1576         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1577         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1578         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1579         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1580         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1581         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1582         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1583         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1584         A_TO_P_IN_UTF16
1585         /* clang-format on */
1586         "\0]\0]\0>\0<\0/\0a\0>";
1587   const XML_Char *expected =
1588       /* clang-format off */
1589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1605         XCS("ABCDEFGHIJKLMNOP");
1606   /* clang-format on */
1607   CharData storage;
1608   void *buffer;
1609 
1610   CharData_Init(&storage);
1611   XML_SetUserData(g_parser, &storage);
1612   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1613   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1614   if (buffer == NULL)
1615     fail("Could not allocate parse buffer");
1616   assert(buffer != NULL);
1617   memcpy(buffer, text, sizeof(text) - 1);
1618   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1619     xml_failure(g_parser);
1620   CharData_CheckXMLChars(&storage, expected);
1621 }
1622 END_TEST
1623 
1624 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1625 START_TEST(test_multichar_cdata_utf16) {
1626   /* Test data is:
1627    *   <?xml version='1.0' encoding='utf-16'?>
1628    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1629    *
1630    * where {MINIM} is U+1d15e (a minim or half-note)
1631    *   UTF-16: 0xd834 0xdd5e
1632    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1633    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1634    *   UTF-16: 0xd834 0xdd5f
1635    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1636    */
1637   const char text[] = "\0<\0?\0x\0m\0l\0"
1638                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1639                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1640                       "1\0"
1641                       "6\0'"
1642                       "\0?\0>\0\n"
1643                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1644                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1645                       "\0]\0]\0>\0<\0/\0a\0>";
1646 #ifdef XML_UNICODE
1647   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1648 #else
1649   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1650 #endif
1651   CharData storage;
1652 
1653   CharData_Init(&storage);
1654   XML_SetUserData(g_parser, &storage);
1655   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1656 
1657   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1658       == XML_STATUS_ERROR)
1659     xml_failure(g_parser);
1660   CharData_CheckXMLChars(&storage, expected);
1661 }
1662 END_TEST
1663 
1664 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1665 START_TEST(test_utf16_bad_surrogate_pair) {
1666   /* Test data is:
1667    *   <?xml version='1.0' encoding='utf-16'?>
1668    *   <a><![CDATA[{BADLINB}]]></a>
1669    *
1670    * where {BADLINB} is U+10000 (the first Linear B character)
1671    * with the UTF-16 surrogate pair in the wrong order, i.e.
1672    *   0xdc00 0xd800
1673    */
1674   const char text[] = "\0<\0?\0x\0m\0l\0"
1675                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1676                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1677                       "1\0"
1678                       "6\0'"
1679                       "\0?\0>\0\n"
1680                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1681                       "\xdc\x00\xd8\x00"
1682                       "\0]\0]\0>\0<\0/\0a\0>";
1683 
1684   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1685       != XML_STATUS_ERROR)
1686     fail("Reversed UTF-16 surrogate pair not faulted");
1687   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1688     xml_failure(g_parser);
1689 }
1690 END_TEST
1691 
START_TEST(test_bad_cdata)1692 START_TEST(test_bad_cdata) {
1693   struct CaseData {
1694     const char *text;
1695     enum XML_Error expectedError;
1696   };
1697 
1698   struct CaseData cases[]
1699       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1700          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1701          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1702          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1703          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1704          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1705          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1706          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1707 
1708          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1711 
1712          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1713          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1714          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1715          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1716          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1717          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1718          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1719 
1720          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1722          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1723 
1724   size_t i = 0;
1725   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1726     set_subtest("%s", cases[i].text);
1727     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1728         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1729     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1730 
1731     assert(actualStatus == XML_STATUS_ERROR);
1732 
1733     if (actualError != cases[i].expectedError) {
1734       char message[100];
1735       snprintf(message, sizeof(message),
1736                "Expected error %d but got error %d for case %u: \"%s\"\n",
1737                cases[i].expectedError, actualError, (unsigned int)i + 1,
1738                cases[i].text);
1739       fail(message);
1740     }
1741 
1742     XML_ParserReset(g_parser, NULL);
1743   }
1744 }
1745 END_TEST
1746 
1747 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1748 START_TEST(test_bad_cdata_utf16) {
1749   struct CaseData {
1750     size_t text_bytes;
1751     const char *text;
1752     enum XML_Error expected_error;
1753   };
1754 
1755   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1756                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1757                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1758                         "1\0"
1759                         "6\0'"
1760                         "\0?\0>\0\n"
1761                         "\0<\0a\0>";
1762   struct CaseData cases[] = {
1763       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1764       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1765       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1766       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1767       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1768       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1769       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1770       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1771       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1772       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1773       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1774       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1775       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1776       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1777       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1778       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1779       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1780       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1783       /* Now add a four-byte UTF-16 character */
1784       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1785        XML_ERROR_UNCLOSED_CDATA_SECTION},
1786       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1787       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1788        XML_ERROR_PARTIAL_CHAR},
1789       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1790        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1791   size_t i;
1792 
1793   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1794     set_subtest("case %lu", (long unsigned)(i + 1));
1795     enum XML_Status actual_status;
1796     enum XML_Error actual_error;
1797 
1798     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1799                                 XML_FALSE)
1800         == XML_STATUS_ERROR)
1801       xml_failure(g_parser);
1802     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1803                                             (int)cases[i].text_bytes, XML_TRUE);
1804     assert(actual_status == XML_STATUS_ERROR);
1805     actual_error = XML_GetErrorCode(g_parser);
1806     if (actual_error != cases[i].expected_error) {
1807       char message[1024];
1808 
1809       snprintf(message, sizeof(message),
1810                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1811                ") for case %lu\n",
1812                cases[i].expected_error,
1813                XML_ErrorString(cases[i].expected_error), actual_error,
1814                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1815       fail(message);
1816     }
1817     XML_ParserReset(g_parser, NULL);
1818   }
1819 }
1820 END_TEST
1821 
1822 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1823 START_TEST(test_stop_parser_between_cdata_calls) {
1824   const char *text = long_cdata_text;
1825 
1826   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1827   g_resumable = XML_FALSE;
1828   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1829 }
1830 END_TEST
1831 
1832 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1833 START_TEST(test_suspend_parser_between_cdata_calls) {
1834   const char *text = long_cdata_text;
1835   enum XML_Status result;
1836 
1837   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1838   g_resumable = XML_TRUE;
1839   result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1840   if (result != XML_STATUS_SUSPENDED) {
1841     if (result == XML_STATUS_ERROR)
1842       xml_failure(g_parser);
1843     fail("Parse not suspended in CDATA handler");
1844   }
1845   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1846     xml_failure(g_parser);
1847 }
1848 END_TEST
1849 
1850 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1851 START_TEST(test_memory_allocation) {
1852   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1853   char *p;
1854 
1855   if (buffer == NULL) {
1856     fail("Allocation failed");
1857   } else {
1858     /* Try writing to memory; some OSes try to cheat! */
1859     buffer[0] = 'T';
1860     buffer[1] = 'E';
1861     buffer[2] = 'S';
1862     buffer[3] = 'T';
1863     buffer[4] = '\0';
1864     if (strcmp(buffer, "TEST") != 0) {
1865       fail("Memory not writable");
1866     } else {
1867       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1868       if (p == NULL) {
1869         fail("Reallocation failed");
1870       } else {
1871         /* Write again, just to be sure */
1872         buffer = p;
1873         buffer[0] = 'V';
1874         if (strcmp(buffer, "VEST") != 0) {
1875           fail("Reallocated memory not writable");
1876         }
1877       }
1878     }
1879     XML_MemFree(g_parser, buffer);
1880   }
1881 }
1882 END_TEST
1883 
1884 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1885 START_TEST(test_default_current) {
1886   const char *text = "<doc>hell]</doc>";
1887   const char *entity_text = "<!DOCTYPE doc [\n"
1888                             "<!ENTITY entity '&#37;'>\n"
1889                             "]>\n"
1890                             "<doc>&entity;</doc>";
1891 
1892   set_subtest("with defaulting");
1893   {
1894     struct handler_record_list storage;
1895     storage.count = 0;
1896     XML_SetDefaultHandler(g_parser, record_default_handler);
1897     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1898     XML_SetUserData(g_parser, &storage);
1899     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1900         == XML_STATUS_ERROR)
1901       xml_failure(g_parser);
1902     int i = 0;
1903     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1904     // we should have gotten one or more cdata callbacks, totaling 5 chars
1905     int cdata_len_remaining = 5;
1906     while (cdata_len_remaining > 0) {
1907       const struct handler_record_entry *c_entry
1908           = handler_record_get(&storage, i++);
1909       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1910       assert_true(c_entry->arg > 0);
1911       assert_true(c_entry->arg <= cdata_len_remaining);
1912       cdata_len_remaining -= c_entry->arg;
1913       // default handler must follow, with the exact same len argument.
1914       assert_record_handler_called(&storage, i++, "record_default_handler",
1915                                    c_entry->arg);
1916     }
1917     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1918     assert_true(storage.count == i);
1919   }
1920 
1921   /* Again, without the defaulting */
1922   set_subtest("no defaulting");
1923   {
1924     struct handler_record_list storage;
1925     storage.count = 0;
1926     XML_ParserReset(g_parser, NULL);
1927     XML_SetDefaultHandler(g_parser, record_default_handler);
1928     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1929     XML_SetUserData(g_parser, &storage);
1930     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1931         == XML_STATUS_ERROR)
1932       xml_failure(g_parser);
1933     int i = 0;
1934     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1935     // we should have gotten one or more cdata callbacks, totaling 5 chars
1936     int cdata_len_remaining = 5;
1937     while (cdata_len_remaining > 0) {
1938       const struct handler_record_entry *c_entry
1939           = handler_record_get(&storage, i++);
1940       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1941       assert_true(c_entry->arg > 0);
1942       assert_true(c_entry->arg <= cdata_len_remaining);
1943       cdata_len_remaining -= c_entry->arg;
1944     }
1945     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1946     assert_true(storage.count == i);
1947   }
1948 
1949   /* Now with an internal entity to complicate matters */
1950   set_subtest("with internal entity");
1951   {
1952     struct handler_record_list storage;
1953     storage.count = 0;
1954     XML_ParserReset(g_parser, NULL);
1955     XML_SetDefaultHandler(g_parser, record_default_handler);
1956     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1957     XML_SetUserData(g_parser, &storage);
1958     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1959                                 XML_TRUE)
1960         == XML_STATUS_ERROR)
1961       xml_failure(g_parser);
1962     /* The default handler suppresses the entity */
1963     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1964     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1965     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1966     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1967     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1968     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1969     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1970     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1971     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1972     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1973     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1974     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1975     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1976     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1977     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1978     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1979     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1980     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1981     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1982     assert_true(storage.count == 19);
1983   }
1984 
1985   /* Again, with a skip handler */
1986   set_subtest("with skip handler");
1987   {
1988     struct handler_record_list storage;
1989     storage.count = 0;
1990     XML_ParserReset(g_parser, NULL);
1991     XML_SetDefaultHandler(g_parser, record_default_handler);
1992     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1993     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1994     XML_SetUserData(g_parser, &storage);
1995     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1996                                 XML_TRUE)
1997         == XML_STATUS_ERROR)
1998       xml_failure(g_parser);
1999     /* The default handler suppresses the entity */
2000     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2001     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2002     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2003     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2004     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2005     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2006     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2007     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2008     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2009     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2010     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2011     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2012     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2013     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2014     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2015     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2016     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2017     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2018     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2019     assert_true(storage.count == 19);
2020   }
2021 
2022   /* This time, allow the entity through */
2023   set_subtest("allow entity");
2024   {
2025     struct handler_record_list storage;
2026     storage.count = 0;
2027     XML_ParserReset(g_parser, NULL);
2028     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2029     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2030     XML_SetUserData(g_parser, &storage);
2031     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2032                                 XML_TRUE)
2033         == XML_STATUS_ERROR)
2034       xml_failure(g_parser);
2035     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2036     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2037     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2038     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2039     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2040     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2041     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2042     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2043     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2044     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2045     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2046     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2047     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2048     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2049     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2050     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2051     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2052     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2053     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2054     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2055     assert_true(storage.count == 20);
2056   }
2057 
2058   /* Finally, without passing the cdata to the default handler */
2059   set_subtest("not passing cdata");
2060   {
2061     struct handler_record_list storage;
2062     storage.count = 0;
2063     XML_ParserReset(g_parser, NULL);
2064     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2065     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2066     XML_SetUserData(g_parser, &storage);
2067     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2068                                 XML_TRUE)
2069         == XML_STATUS_ERROR)
2070       xml_failure(g_parser);
2071     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2072     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2073     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2074     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2075     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2076     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2077     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2078     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2079     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2080     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2081     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2082     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2083     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2084     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2085     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2086     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2087     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2088     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2089                                  1);
2090     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2091     assert_true(storage.count == 19);
2092   }
2093 }
2094 END_TEST
2095 
2096 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2097 START_TEST(test_dtd_elements) {
2098   const char *text = "<!DOCTYPE doc [\n"
2099                      "<!ELEMENT doc (chapter)>\n"
2100                      "<!ELEMENT chapter (#PCDATA)>\n"
2101                      "]>\n"
2102                      "<doc><chapter>Wombats are go</chapter></doc>";
2103 
2104   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2105   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2106       == XML_STATUS_ERROR)
2107     xml_failure(g_parser);
2108 }
2109 END_TEST
2110 
2111 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2112 element_decl_check_model(void *userData, const XML_Char *name,
2113                          XML_Content *model) {
2114   UNUSED_P(userData);
2115   uint32_t errorFlags = 0;
2116 
2117   /* Expected model array structure is this:
2118    * [0] (type 6, quant 0)
2119    *   [1] (type 5, quant 0)
2120    *     [3] (type 4, quant 0, name "bar")
2121    *     [4] (type 4, quant 0, name "foo")
2122    *     [5] (type 4, quant 3, name "xyz")
2123    *   [2] (type 4, quant 2, name "zebra")
2124    */
2125   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2126   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2127 
2128   if (model != NULL) {
2129     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2130     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2131     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2132     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2133     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2134 
2135     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2136     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2137     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2138     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2139     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2140 
2141     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2142     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2143     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2144     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2145     errorFlags
2146         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2147 
2148     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2149     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2150     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2151     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2152     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2153 
2154     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2155     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2156     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2157     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2158     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2159 
2160     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2161     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2162     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2163     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2164     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2165   }
2166 
2167   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2168   XML_FreeContentModel(g_parser, model);
2169 }
2170 
START_TEST(test_dtd_elements_nesting)2171 START_TEST(test_dtd_elements_nesting) {
2172   // Payload inspired by a test in Perl's XML::Parser
2173   const char *text = "<!DOCTYPE foo [\n"
2174                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2175                      "]>\n"
2176                      "<foo/>";
2177 
2178   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2179 
2180   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2181   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2182       == XML_STATUS_ERROR)
2183     xml_failure(g_parser);
2184 
2185   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2186     fail("Element declaration model regression detected");
2187 }
2188 END_TEST
2189 
2190 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2191 START_TEST(test_set_foreign_dtd) {
2192   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2193   const char *text2 = "<doc>&entity;</doc>";
2194   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2195 
2196   /* Check hash salt is passed through too */
2197   XML_SetHashSalt(g_parser, 0x12345678);
2198   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2199   XML_SetUserData(g_parser, &test_data);
2200   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2201   /* Add a default handler to exercise more code paths */
2202   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2203   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2204     fail("Could not set foreign DTD");
2205   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2206       == XML_STATUS_ERROR)
2207     xml_failure(g_parser);
2208 
2209   /* Ensure that trying to set the DTD after parsing has started
2210    * is faulted, even if it's the same setting.
2211    */
2212   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2213       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2214     fail("Failed to reject late foreign DTD setting");
2215   /* Ditto for the hash salt */
2216   if (XML_SetHashSalt(g_parser, 0x23456789))
2217     fail("Failed to reject late hash salt change");
2218 
2219   /* Now finish the parse */
2220   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2221       == XML_STATUS_ERROR)
2222     xml_failure(g_parser);
2223 }
2224 END_TEST
2225 
2226 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2227 START_TEST(test_foreign_dtd_not_standalone) {
2228   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2229                      "<doc>&entity;</doc>";
2230   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2231 
2232   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2233   XML_SetUserData(g_parser, &test_data);
2234   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2235   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2236   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2237     fail("Could not set foreign DTD");
2238   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2239                  "NotStandalonehandler failed to reject");
2240 }
2241 END_TEST
2242 
2243 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2244 START_TEST(test_invalid_foreign_dtd) {
2245   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2246                      "<doc>&entity;</doc>";
2247   ExtFaults test_data
2248       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2249 
2250   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2251   XML_SetUserData(g_parser, &test_data);
2252   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2253   XML_UseForeignDTD(g_parser, XML_TRUE);
2254   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2255                  "Bad DTD should not have been accepted");
2256 }
2257 END_TEST
2258 
2259 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2260 START_TEST(test_foreign_dtd_with_doctype) {
2261   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2262                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2263   const char *text2 = "<doc>&entity;</doc>";
2264   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2265 
2266   /* Check hash salt is passed through too */
2267   XML_SetHashSalt(g_parser, 0x12345678);
2268   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2269   XML_SetUserData(g_parser, &test_data);
2270   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2271   /* Add a default handler to exercise more code paths */
2272   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2273   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2274     fail("Could not set foreign DTD");
2275   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2276       == XML_STATUS_ERROR)
2277     xml_failure(g_parser);
2278 
2279   /* Ensure that trying to set the DTD after parsing has started
2280    * is faulted, even if it's the same setting.
2281    */
2282   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2283       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2284     fail("Failed to reject late foreign DTD setting");
2285   /* Ditto for the hash salt */
2286   if (XML_SetHashSalt(g_parser, 0x23456789))
2287     fail("Failed to reject late hash salt change");
2288 
2289   /* Now finish the parse */
2290   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2291       == XML_STATUS_ERROR)
2292     xml_failure(g_parser);
2293 }
2294 END_TEST
2295 
2296 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2297 START_TEST(test_foreign_dtd_without_external_subset) {
2298   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2299                      "<doc>&foo;</doc>";
2300 
2301   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2302   XML_SetUserData(g_parser, NULL);
2303   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2304   XML_UseForeignDTD(g_parser, XML_TRUE);
2305   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2306       == XML_STATUS_ERROR)
2307     xml_failure(g_parser);
2308 }
2309 END_TEST
2310 
START_TEST(test_empty_foreign_dtd)2311 START_TEST(test_empty_foreign_dtd) {
2312   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2313                      "<doc>&entity;</doc>";
2314 
2315   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2316   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2317   XML_UseForeignDTD(g_parser, XML_TRUE);
2318   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2319                  "Undefined entity not faulted");
2320 }
2321 END_TEST
2322 
2323 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2324 START_TEST(test_set_base) {
2325   const XML_Char *old_base;
2326   const XML_Char *new_base = XCS("/local/file/name.xml");
2327 
2328   old_base = XML_GetBase(g_parser);
2329   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2330     fail("Unable to set base");
2331   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2332     fail("Base setting not correct");
2333   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2334     fail("Unable to NULL base");
2335   if (XML_GetBase(g_parser) != NULL)
2336     fail("Base setting not nulled");
2337   XML_SetBase(g_parser, old_base);
2338 }
2339 END_TEST
2340 
2341 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2342 START_TEST(test_attributes) {
2343   const char *text = "<!DOCTYPE doc [\n"
2344                      "<!ELEMENT doc (tag)>\n"
2345                      "<!ATTLIST doc id ID #REQUIRED>\n"
2346                      "]>"
2347                      "<doc a='1' id='one' b='2'>"
2348                      "<tag c='3'/>"
2349                      "</doc>";
2350   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2351                          {XCS("b"), XCS("2")},
2352                          {XCS("id"), XCS("one")},
2353                          {NULL, NULL}};
2354   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2355   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2356                         {XCS("tag"), 1, NULL, NULL},
2357                         {NULL, 0, NULL, NULL}};
2358   info[0].attributes = doc_info;
2359   info[1].attributes = tag_info;
2360 
2361   XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2362   XML_SetUserData(g_parser, info);
2363   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2364       == XML_STATUS_ERROR)
2365     xml_failure(g_parser);
2366 }
2367 END_TEST
2368 
2369 /* Test reset works correctly in the middle of processing an internal
2370  * entity.  Exercises some obscure code in XML_ParserReset().
2371  */
START_TEST(test_reset_in_entity)2372 START_TEST(test_reset_in_entity) {
2373   const char *text = "<!DOCTYPE doc [\n"
2374                      "<!ENTITY wombat 'wom'>\n"
2375                      "<!ENTITY entity 'hi &wom; there'>\n"
2376                      "]>\n"
2377                      "<doc>&entity;</doc>";
2378   XML_ParsingStatus status;
2379 
2380   g_resumable = XML_TRUE;
2381   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2382   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2383       == XML_STATUS_ERROR)
2384     xml_failure(g_parser);
2385   XML_GetParsingStatus(g_parser, &status);
2386   if (status.parsing != XML_SUSPENDED)
2387     fail("Parsing status not SUSPENDED");
2388   XML_ParserReset(g_parser, NULL);
2389   XML_GetParsingStatus(g_parser, &status);
2390   if (status.parsing != XML_INITIALIZED)
2391     fail("Parsing status doesn't reset to INITIALIZED");
2392 }
2393 END_TEST
2394 
2395 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2396 START_TEST(test_resume_invalid_parse) {
2397   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2398 
2399   g_resumable = XML_TRUE;
2400   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2401   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2402       == XML_STATUS_ERROR)
2403     xml_failure(g_parser);
2404   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2405     fail("Resumed invalid parse not faulted");
2406   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2407     fail("Invalid parse not correctly faulted");
2408 }
2409 END_TEST
2410 
2411 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2412 START_TEST(test_resume_resuspended) {
2413   const char *text = "<doc>Hello<meep/>world</doc>";
2414 
2415   g_resumable = XML_TRUE;
2416   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2417   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2418       == XML_STATUS_ERROR)
2419     xml_failure(g_parser);
2420   g_resumable = XML_TRUE;
2421   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2422   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2423     fail("Resumption not suspended");
2424   /* This one should succeed and finish up */
2425   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2426     xml_failure(g_parser);
2427 }
2428 END_TEST
2429 
2430 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2431 START_TEST(test_cdata_default) {
2432   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2433   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2434   CharData storage;
2435 
2436   CharData_Init(&storage);
2437   XML_SetUserData(g_parser, &storage);
2438   XML_SetDefaultHandler(g_parser, accumulate_characters);
2439 
2440   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2441       == XML_STATUS_ERROR)
2442     xml_failure(g_parser);
2443   CharData_CheckXMLChars(&storage, expected);
2444 }
2445 END_TEST
2446 
2447 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2448 START_TEST(test_subordinate_reset) {
2449   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2450                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2451                      "<doc>&entity;</doc>";
2452 
2453   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2454   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2455   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2456       == XML_STATUS_ERROR)
2457     xml_failure(g_parser);
2458 }
2459 END_TEST
2460 
2461 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2462 START_TEST(test_subordinate_suspend) {
2463   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2464                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2465                      "<doc>&entity;</doc>";
2466 
2467   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2468   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2469   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2470       == XML_STATUS_ERROR)
2471     xml_failure(g_parser);
2472 }
2473 END_TEST
2474 
2475 /* Test suspending a subordinate parser from an XML declaration */
2476 /* Increases code coverage of the tests */
2477 
START_TEST(test_subordinate_xdecl_suspend)2478 START_TEST(test_subordinate_xdecl_suspend) {
2479   const char *text
2480       = "<!DOCTYPE doc [\n"
2481         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2482         "]>\n"
2483         "<doc>&entity;</doc>";
2484 
2485   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2486   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2487   g_resumable = XML_TRUE;
2488   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2489       == XML_STATUS_ERROR)
2490     xml_failure(g_parser);
2491 }
2492 END_TEST
2493 
START_TEST(test_subordinate_xdecl_abort)2494 START_TEST(test_subordinate_xdecl_abort) {
2495   const char *text
2496       = "<!DOCTYPE doc [\n"
2497         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2498         "]>\n"
2499         "<doc>&entity;</doc>";
2500 
2501   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2502   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2503   g_resumable = XML_FALSE;
2504   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2505       == XML_STATUS_ERROR)
2506     xml_failure(g_parser);
2507 }
2508 END_TEST
2509 
2510 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2511 START_TEST(test_ext_entity_invalid_suspended_parse) {
2512   const char *text = "<!DOCTYPE doc [\n"
2513                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2514                      "]>\n"
2515                      "<doc>&en;</doc>";
2516   ExtFaults faults[]
2517       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2518           "Incomplete element declaration not faulted", NULL,
2519           XML_ERROR_UNCLOSED_TOKEN},
2520          {/* First two bytes of a three-byte char */
2521           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2522           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2523          {NULL, NULL, NULL, XML_ERROR_NONE}};
2524   ExtFaults *fault;
2525 
2526   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2527     set_subtest("%s", fault->parse_text);
2528     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2529     XML_SetExternalEntityRefHandler(g_parser,
2530                                     external_entity_suspending_faulter);
2531     XML_SetUserData(g_parser, fault);
2532     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2533                    "Parser did not report external entity error");
2534     XML_ParserReset(g_parser, NULL);
2535   }
2536 }
2537 END_TEST
2538 
2539 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2540 START_TEST(test_explicit_encoding) {
2541   const char *text1 = "<doc>Hello ";
2542   const char *text2 = " World</doc>";
2543 
2544   /* Just check that we can set the encoding to NULL before starting */
2545   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2546     fail("Failed to initialise encoding to NULL");
2547   /* Say we are UTF-8 */
2548   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2549     fail("Failed to set explicit encoding");
2550   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2551       == XML_STATUS_ERROR)
2552     xml_failure(g_parser);
2553   /* Try to switch encodings mid-parse */
2554   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2555     fail("Allowed encoding change");
2556   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2557       == XML_STATUS_ERROR)
2558     xml_failure(g_parser);
2559   /* Try now the parse is over */
2560   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2561     fail("Failed to unset encoding");
2562 }
2563 END_TEST
2564 
2565 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2566 START_TEST(test_trailing_cr) {
2567   const char *text = "<doc>\r";
2568   int found_cr;
2569 
2570   /* Try with a character handler, for code coverage */
2571   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2572   XML_SetUserData(g_parser, &found_cr);
2573   found_cr = 0;
2574   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2575       == XML_STATUS_OK)
2576     fail("Failed to fault unclosed doc");
2577   if (found_cr == 0)
2578     fail("Did not catch the carriage return");
2579   XML_ParserReset(g_parser, NULL);
2580 
2581   /* Now with a default handler instead */
2582   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2583   XML_SetUserData(g_parser, &found_cr);
2584   found_cr = 0;
2585   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2586       == XML_STATUS_OK)
2587     fail("Failed to fault unclosed doc");
2588   if (found_cr == 0)
2589     fail("Did not catch default carriage return");
2590 }
2591 END_TEST
2592 
2593 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2594 START_TEST(test_ext_entity_trailing_cr) {
2595   const char *text = "<!DOCTYPE doc [\n"
2596                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2597                      "]>\n"
2598                      "<doc>&en;</doc>";
2599   int found_cr;
2600 
2601   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2602   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2603   XML_SetUserData(g_parser, &found_cr);
2604   found_cr = 0;
2605   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2606       != XML_STATUS_OK)
2607     xml_failure(g_parser);
2608   if (found_cr == 0)
2609     fail("No carriage return found");
2610   XML_ParserReset(g_parser, NULL);
2611 
2612   /* Try again with a different trailing CR */
2613   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2614   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2615   XML_SetUserData(g_parser, &found_cr);
2616   found_cr = 0;
2617   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2618       != XML_STATUS_OK)
2619     xml_failure(g_parser);
2620   if (found_cr == 0)
2621     fail("No carriage return found");
2622 }
2623 END_TEST
2624 
2625 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2626 START_TEST(test_trailing_rsqb) {
2627   const char *text8 = "<doc>]";
2628   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2629   int found_rsqb;
2630   int text8_len = (int)strlen(text8);
2631 
2632   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2633   XML_SetUserData(g_parser, &found_rsqb);
2634   found_rsqb = 0;
2635   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2636       == XML_STATUS_OK)
2637     fail("Failed to fault unclosed doc");
2638   if (found_rsqb == 0)
2639     fail("Did not catch the right square bracket");
2640 
2641   /* Try again with a different encoding */
2642   XML_ParserReset(g_parser, NULL);
2643   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2644   XML_SetUserData(g_parser, &found_rsqb);
2645   found_rsqb = 0;
2646   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2647                               XML_TRUE)
2648       == XML_STATUS_OK)
2649     fail("Failed to fault unclosed doc");
2650   if (found_rsqb == 0)
2651     fail("Did not catch the right square bracket");
2652 
2653   /* And finally with a default handler */
2654   XML_ParserReset(g_parser, NULL);
2655   XML_SetDefaultHandler(g_parser, rsqb_handler);
2656   XML_SetUserData(g_parser, &found_rsqb);
2657   found_rsqb = 0;
2658   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2659                               XML_TRUE)
2660       == XML_STATUS_OK)
2661     fail("Failed to fault unclosed doc");
2662   if (found_rsqb == 0)
2663     fail("Did not catch the right square bracket");
2664 }
2665 END_TEST
2666 
2667 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2668 START_TEST(test_ext_entity_trailing_rsqb) {
2669   const char *text = "<!DOCTYPE doc [\n"
2670                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2671                      "]>\n"
2672                      "<doc>&en;</doc>";
2673   int found_rsqb;
2674 
2675   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2676   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2677   XML_SetUserData(g_parser, &found_rsqb);
2678   found_rsqb = 0;
2679   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2680       != XML_STATUS_OK)
2681     xml_failure(g_parser);
2682   if (found_rsqb == 0)
2683     fail("No right square bracket found");
2684 }
2685 END_TEST
2686 
2687 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2688 START_TEST(test_ext_entity_good_cdata) {
2689   const char *text = "<!DOCTYPE doc [\n"
2690                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2691                      "]>\n"
2692                      "<doc>&en;</doc>";
2693 
2694   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2695   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2696   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2697       != XML_STATUS_OK)
2698     xml_failure(g_parser);
2699 }
2700 END_TEST
2701 
2702 /* Test user parameter settings */
START_TEST(test_user_parameters)2703 START_TEST(test_user_parameters) {
2704   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2705                      "<!-- Primary parse -->\n"
2706                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2707                      "<doc>&entity;";
2708   const char *epilog = "<!-- Back to primary parser -->\n"
2709                        "</doc>";
2710 
2711   g_comment_count = 0;
2712   g_skip_count = 0;
2713   g_xdecl_count = 0;
2714   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2715   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2716   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2717   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2718   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2719   XML_UseParserAsHandlerArg(g_parser);
2720   XML_SetUserData(g_parser, (void *)1);
2721   g_handler_data = g_parser;
2722   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2723       == XML_STATUS_ERROR)
2724     xml_failure(g_parser);
2725   /* Ensure we can't change policy mid-parse */
2726   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2727     fail("Changed param entity parsing policy while parsing");
2728   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2729       == XML_STATUS_ERROR)
2730     xml_failure(g_parser);
2731   if (g_comment_count != 3)
2732     fail("Comment handler not invoked enough times");
2733   if (g_skip_count != 1)
2734     fail("Skip handler not invoked enough times");
2735   if (g_xdecl_count != 1)
2736     fail("XML declaration handler not invoked");
2737 }
2738 END_TEST
2739 
2740 /* Test that an explicit external entity handler argument replaces
2741  * the parser as the first argument.
2742  *
2743  * We do not call the first parameter to the external entity handler
2744  * 'parser' for once, since the first time the handler is called it
2745  * will actually be a text string.  We need to be able to access the
2746  * global 'parser' variable to create our external entity parser from,
2747  * since there are code paths we need to ensure get executed.
2748  */
START_TEST(test_ext_entity_ref_parameter)2749 START_TEST(test_ext_entity_ref_parameter) {
2750   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2751                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2752                      "<doc>&entity;</doc>";
2753 
2754   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2755   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2756   /* Set a handler arg that is not NULL and not parser (which is
2757    * what NULL would cause to be passed.
2758    */
2759   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2760   g_handler_data = text;
2761   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2762       == XML_STATUS_ERROR)
2763     xml_failure(g_parser);
2764 
2765   /* Now try again with unset args */
2766   XML_ParserReset(g_parser, NULL);
2767   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2768   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2769   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2770   g_handler_data = g_parser;
2771   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2772       == XML_STATUS_ERROR)
2773     xml_failure(g_parser);
2774 }
2775 END_TEST
2776 
2777 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2778 START_TEST(test_empty_parse) {
2779   const char *text = "<doc></doc>";
2780   const char *partial = "<doc>";
2781 
2782   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2783     fail("Parsing empty string faulted");
2784   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2785     fail("Parsing final empty string not faulted");
2786   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2787     fail("Parsing final empty string faulted for wrong reason");
2788 
2789   /* Now try with valid text before the empty end */
2790   XML_ParserReset(g_parser, NULL);
2791   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2792       == XML_STATUS_ERROR)
2793     xml_failure(g_parser);
2794   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2795     fail("Parsing final empty string faulted");
2796 
2797   /* Now try with invalid text before the empty end */
2798   XML_ParserReset(g_parser, NULL);
2799   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2800                               XML_FALSE)
2801       == XML_STATUS_ERROR)
2802     xml_failure(g_parser);
2803   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2804     fail("Parsing final incomplete empty string not faulted");
2805 }
2806 END_TEST
2807 
2808 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2809 START_TEST(test_negative_len_parse) {
2810   const char *const doc = "<root/>";
2811   for (int isFinal = 0; isFinal < 2; isFinal++) {
2812     set_subtest("isFinal=%d", isFinal);
2813 
2814     XML_Parser parser = XML_ParserCreate(NULL);
2815 
2816     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2817       fail("There was not supposed to be any initial parse error.");
2818 
2819     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2820 
2821     if (status != XML_STATUS_ERROR)
2822       fail("Negative len was expected to fail the parse but did not.");
2823 
2824     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2825       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2826 
2827     XML_ParserFree(parser);
2828   }
2829 }
2830 END_TEST
2831 
2832 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2833 START_TEST(test_negative_len_parse_buffer) {
2834   const char *const doc = "<root/>";
2835   for (int isFinal = 0; isFinal < 2; isFinal++) {
2836     set_subtest("isFinal=%d", isFinal);
2837 
2838     XML_Parser parser = XML_ParserCreate(NULL);
2839 
2840     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2841       fail("There was not supposed to be any initial parse error.");
2842 
2843     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2844 
2845     if (buffer == NULL)
2846       fail("XML_GetBuffer failed.");
2847 
2848     memcpy(buffer, doc, strlen(doc));
2849 
2850     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2851 
2852     if (status != XML_STATUS_ERROR)
2853       fail("Negative len was expected to fail the parse but did not.");
2854 
2855     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2856       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2857 
2858     XML_ParserFree(parser);
2859   }
2860 }
2861 END_TEST
2862 
2863 /* Test odd corners of the XML_GetBuffer interface */
2864 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2865 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2866   const XML_Feature *feature = XML_GetFeatureList();
2867 
2868   if (feature == NULL)
2869     return XML_STATUS_ERROR;
2870   for (; feature->feature != XML_FEATURE_END; feature++) {
2871     if (feature->feature == feature_id) {
2872       *presult = feature->value;
2873       return XML_STATUS_OK;
2874     }
2875   }
2876   return XML_STATUS_ERROR;
2877 }
2878 
2879 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)2880 START_TEST(test_get_buffer_1) {
2881   const char *text = get_buffer_test_text;
2882   void *buffer;
2883   long context_bytes;
2884 
2885   /* Attempt to allocate a negative length buffer */
2886   if (XML_GetBuffer(g_parser, -12) != NULL)
2887     fail("Negative length buffer not failed");
2888 
2889   /* Now get a small buffer and extend it past valid length */
2890   buffer = XML_GetBuffer(g_parser, 1536);
2891   if (buffer == NULL)
2892     fail("1.5K buffer failed");
2893   assert(buffer != NULL);
2894   memcpy(buffer, text, strlen(text));
2895   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2896       == XML_STATUS_ERROR)
2897     xml_failure(g_parser);
2898   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2899     fail("INT_MAX buffer not failed");
2900 
2901   /* Now try extending it a more reasonable but still too large
2902    * amount.  The allocator in XML_GetBuffer() doubles the buffer
2903    * size until it exceeds the requested amount or INT_MAX.  If it
2904    * exceeds INT_MAX, it rejects the request, so we want a request
2905    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2906    * with an extra byte just to ensure that the request is off any
2907    * boundary.  The request will be inflated internally by
2908    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2909    * request.
2910    */
2911   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2912     context_bytes = 0;
2913   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2914     fail("INT_MAX- buffer not failed");
2915 
2916   /* Now try extending it a carefully crafted amount */
2917   if (XML_GetBuffer(g_parser, 1000) == NULL)
2918     fail("1000 buffer failed");
2919 }
2920 END_TEST
2921 
2922 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)2923 START_TEST(test_get_buffer_2) {
2924   const char *text = get_buffer_test_text;
2925   void *buffer;
2926 
2927   /* Now get a decent buffer */
2928   buffer = XML_GetBuffer(g_parser, 1536);
2929   if (buffer == NULL)
2930     fail("1.5K buffer failed");
2931   assert(buffer != NULL);
2932   memcpy(buffer, text, strlen(text));
2933   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2934       == XML_STATUS_ERROR)
2935     xml_failure(g_parser);
2936 
2937   /* Extend it, to catch a different code path */
2938   if (XML_GetBuffer(g_parser, 1024) == NULL)
2939     fail("1024 buffer failed");
2940 }
2941 END_TEST
2942 
2943 /* Test for signed integer overflow CVE-2022-23852 */
2944 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)2945 START_TEST(test_get_buffer_3_overflow) {
2946   XML_Parser parser = XML_ParserCreate(NULL);
2947   assert(parser != NULL);
2948 
2949   const char *const text = "\n";
2950   const int expectedKeepValue = (int)strlen(text);
2951 
2952   // After this call, variable "keep" in XML_GetBuffer will
2953   // have value expectedKeepValue
2954   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2955                               XML_FALSE /* isFinal */)
2956       == XML_STATUS_ERROR)
2957     xml_failure(parser);
2958 
2959   assert(expectedKeepValue > 0);
2960   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2961     fail("enlarging buffer not failed");
2962 
2963   XML_ParserFree(parser);
2964 }
2965 END_TEST
2966 #endif // XML_CONTEXT_BYTES > 0
2967 
START_TEST(test_buffer_can_grow_to_max)2968 START_TEST(test_buffer_can_grow_to_max) {
2969   const char *const prefixes[] = {
2970       "",
2971       "<",
2972       "<x a='",
2973       "<doc><x a='",
2974       "<document><x a='",
2975       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2976       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2977       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2978       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2979       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2980   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2981   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2982 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2983   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2984   // Can we make a big allocation?
2985   void *big = malloc(maxbuf);
2986   if (! big) {
2987     // The big allocation failed. Let's be a little lenient.
2988     maxbuf = maxbuf / 2;
2989   }
2990   free(big);
2991 #endif
2992 
2993   for (int i = 0; i < num_prefixes; ++i) {
2994     set_subtest("\"%s\"", prefixes[i]);
2995     XML_Parser parser = XML_ParserCreate(NULL);
2996     const int prefix_len = (int)strlen(prefixes[i]);
2997     const enum XML_Status s
2998         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2999     if (s != XML_STATUS_OK)
3000       xml_failure(parser);
3001 
3002     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3003     // subtracting the whole prefix is easiest, and close enough.
3004     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3005     // The limit should be consistent; no prefix should allow us to
3006     // reach above the max buffer size.
3007     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3008     XML_ParserFree(parser);
3009   }
3010 }
3011 END_TEST
3012 
START_TEST(test_getbuffer_allocates_on_zero_len)3013 START_TEST(test_getbuffer_allocates_on_zero_len) {
3014   for (int first_len = 1; first_len >= 0; first_len--) {
3015     set_subtest("with len=%d first", first_len);
3016     XML_Parser parser = XML_ParserCreate(NULL);
3017     assert_true(parser != NULL);
3018     assert_true(XML_GetBuffer(parser, first_len) != NULL);
3019     assert_true(XML_GetBuffer(parser, 0) != NULL);
3020     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3021       xml_failure(parser);
3022     XML_ParserFree(parser);
3023   }
3024 }
3025 END_TEST
3026 
3027 /* Test position information macros */
START_TEST(test_byte_info_at_end)3028 START_TEST(test_byte_info_at_end) {
3029   const char *text = "<doc></doc>";
3030 
3031   if (XML_GetCurrentByteIndex(g_parser) != -1
3032       || XML_GetCurrentByteCount(g_parser) != 0)
3033     fail("Byte index/count incorrect at start of parse");
3034   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3035       == XML_STATUS_ERROR)
3036     xml_failure(g_parser);
3037   /* At end, the count will be zero and the index the end of string */
3038   if (XML_GetCurrentByteCount(g_parser) != 0)
3039     fail("Terminal byte count incorrect");
3040   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3041     fail("Terminal byte index incorrect");
3042 }
3043 END_TEST
3044 
3045 /* Test position information from errors */
3046 #define PRE_ERROR_STR "<doc></"
3047 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3048 START_TEST(test_byte_info_at_error) {
3049   const char *text = PRE_ERROR_STR POST_ERROR_STR;
3050 
3051   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3052       == XML_STATUS_OK)
3053     fail("Syntax error not faulted");
3054   if (XML_GetCurrentByteCount(g_parser) != 0)
3055     fail("Error byte count incorrect");
3056   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3057     fail("Error byte index incorrect");
3058 }
3059 END_TEST
3060 #undef PRE_ERROR_STR
3061 #undef POST_ERROR_STR
3062 
3063 /* Test position information in handler */
3064 #define START_ELEMENT "<e>"
3065 #define CDATA_TEXT "Hello"
3066 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3067 START_TEST(test_byte_info_at_cdata) {
3068   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3069   int offset, size;
3070   ByteTestData data;
3071 
3072   /* Check initial context is empty */
3073   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3074     fail("Unexpected context at start of parse");
3075 
3076   data.start_element_len = (int)strlen(START_ELEMENT);
3077   data.cdata_len = (int)strlen(CDATA_TEXT);
3078   data.total_string_len = (int)strlen(text);
3079   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3080   XML_SetUserData(g_parser, &data);
3081   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3082     xml_failure(g_parser);
3083 }
3084 END_TEST
3085 #undef START_ELEMENT
3086 #undef CDATA_TEXT
3087 #undef END_ELEMENT
3088 
3089 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3090 START_TEST(test_predefined_entities) {
3091   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3092   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3093   const XML_Char *result = XCS("<>&\"'");
3094   CharData storage;
3095 
3096   XML_SetDefaultHandler(g_parser, accumulate_characters);
3097   /* run_character_check uses XML_SetCharacterDataHandler(), which
3098    * unfortunately heads off a code path that we need to exercise.
3099    */
3100   CharData_Init(&storage);
3101   XML_SetUserData(g_parser, &storage);
3102   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3103       == XML_STATUS_ERROR)
3104     xml_failure(g_parser);
3105   /* The default handler doesn't translate the entities */
3106   CharData_CheckXMLChars(&storage, expected);
3107 
3108   /* Now try again and check the translation */
3109   XML_ParserReset(g_parser, NULL);
3110   run_character_check(text, result);
3111 }
3112 END_TEST
3113 
3114 /* Regression test that an invalid tag in an external parameter
3115  * reference in an external DTD is correctly faulted.
3116  *
3117  * Only a few specific tags are legal in DTDs ignoring comments and
3118  * processing instructions, all of which begin with an exclamation
3119  * mark.  "<el/>" is not one of them, so the parser should raise an
3120  * error on encountering it.
3121  */
START_TEST(test_invalid_tag_in_dtd)3122 START_TEST(test_invalid_tag_in_dtd) {
3123   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3124                      "<doc></doc>\n";
3125 
3126   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3127   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3128   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3129                  "Invalid tag IN DTD external param not rejected");
3130 }
3131 END_TEST
3132 
3133 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3134 START_TEST(test_not_predefined_entities) {
3135   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3136                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3137   int i = 0;
3138 
3139   while (text[i] != NULL) {
3140     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3141                    "Undefined entity not rejected");
3142     XML_ParserReset(g_parser, NULL);
3143     i++;
3144   }
3145 }
3146 END_TEST
3147 
3148 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3149 START_TEST(test_ignore_section) {
3150   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3151                      "<doc><e>&entity;</e></doc>";
3152   const XML_Char *expected
3153       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3154   CharData storage;
3155 
3156   CharData_Init(&storage);
3157   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3158   XML_SetUserData(g_parser, &storage);
3159   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3160   XML_SetDefaultHandler(g_parser, accumulate_characters);
3161   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3162   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3163   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3164   XML_SetStartElementHandler(g_parser, dummy_start_element);
3165   XML_SetEndElementHandler(g_parser, dummy_end_element);
3166   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3167       == XML_STATUS_ERROR)
3168     xml_failure(g_parser);
3169   CharData_CheckXMLChars(&storage, expected);
3170 }
3171 END_TEST
3172 
START_TEST(test_ignore_section_utf16)3173 START_TEST(test_ignore_section_utf16) {
3174   const char text[] =
3175       /* <!DOCTYPE d SYSTEM 's'> */
3176       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3177       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3178       /* <d><e>&en;</e></d> */
3179       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3180   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3181   CharData storage;
3182 
3183   CharData_Init(&storage);
3184   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3185   XML_SetUserData(g_parser, &storage);
3186   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3187   XML_SetDefaultHandler(g_parser, accumulate_characters);
3188   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3189   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3190   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3191   XML_SetStartElementHandler(g_parser, dummy_start_element);
3192   XML_SetEndElementHandler(g_parser, dummy_end_element);
3193   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3194       == XML_STATUS_ERROR)
3195     xml_failure(g_parser);
3196   CharData_CheckXMLChars(&storage, expected);
3197 }
3198 END_TEST
3199 
START_TEST(test_ignore_section_utf16_be)3200 START_TEST(test_ignore_section_utf16_be) {
3201   const char text[] =
3202       /* <!DOCTYPE d SYSTEM 's'> */
3203       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3204       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3205       /* <d><e>&en;</e></d> */
3206       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3207   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3208   CharData storage;
3209 
3210   CharData_Init(&storage);
3211   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3212   XML_SetUserData(g_parser, &storage);
3213   XML_SetExternalEntityRefHandler(g_parser,
3214                                   external_entity_load_ignore_utf16_be);
3215   XML_SetDefaultHandler(g_parser, accumulate_characters);
3216   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3217   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3218   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3219   XML_SetStartElementHandler(g_parser, dummy_start_element);
3220   XML_SetEndElementHandler(g_parser, dummy_end_element);
3221   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3222       == XML_STATUS_ERROR)
3223     xml_failure(g_parser);
3224   CharData_CheckXMLChars(&storage, expected);
3225 }
3226 END_TEST
3227 
3228 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3229 START_TEST(test_bad_ignore_section) {
3230   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3231                      "<doc><e>&entity;</e></doc>";
3232   ExtFaults faults[]
3233       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3234           XML_ERROR_SYNTAX},
3235          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3236           XML_ERROR_INVALID_TOKEN},
3237          {/* FIrst two bytes of a three-byte char */
3238           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3239           XML_ERROR_PARTIAL_CHAR},
3240          {NULL, NULL, NULL, XML_ERROR_NONE}};
3241   ExtFaults *fault;
3242 
3243   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3244     set_subtest("%s", fault->parse_text);
3245     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3246     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3247     XML_SetUserData(g_parser, fault);
3248     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3249                    "Incomplete IGNORE section not failed");
3250     XML_ParserReset(g_parser, NULL);
3251   }
3252 }
3253 END_TEST
3254 
3255 struct bom_testdata {
3256   const char *external;
3257   int split;
3258   XML_Bool nested_callback_happened;
3259 };
3260 
3261 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3262 external_bom_checker(XML_Parser parser, const XML_Char *context,
3263                      const XML_Char *base, const XML_Char *systemId,
3264                      const XML_Char *publicId) {
3265   const char *text;
3266   UNUSED_P(base);
3267   UNUSED_P(systemId);
3268   UNUSED_P(publicId);
3269 
3270   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3271   if (ext_parser == NULL)
3272     fail("Could not create external entity parser");
3273 
3274   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3275     struct bom_testdata *const testdata
3276         = (struct bom_testdata *)XML_GetUserData(parser);
3277     const char *const external = testdata->external;
3278     const int split = testdata->split;
3279     testdata->nested_callback_happened = XML_TRUE;
3280 
3281     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3282         != XML_STATUS_OK) {
3283       xml_failure(ext_parser);
3284     }
3285     text = external + split; // the parse below will continue where we left off.
3286   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3287     text = "<!ELEMENT doc EMPTY>\n"
3288            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3289            "<!ENTITY % e2 '%e1;'>\n";
3290   } else {
3291     fail("unknown systemId");
3292   }
3293 
3294   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3295       != XML_STATUS_OK)
3296     xml_failure(ext_parser);
3297 
3298   XML_ParserFree(ext_parser);
3299   return XML_STATUS_OK;
3300 }
3301 
3302 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3303 START_TEST(test_external_bom_consumed) {
3304   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3305                            "<doc></doc>\n";
3306   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3307   const int len = (int)strlen(external);
3308   for (int split = 0; split <= len; ++split) {
3309     set_subtest("split at byte %d", split);
3310 
3311     struct bom_testdata testdata;
3312     testdata.external = external;
3313     testdata.split = split;
3314     testdata.nested_callback_happened = XML_FALSE;
3315 
3316     XML_Parser parser = XML_ParserCreate(NULL);
3317     if (parser == NULL) {
3318       fail("Couldn't create parser");
3319     }
3320     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3321     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3322     XML_SetUserData(parser, &testdata);
3323     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3324         == XML_STATUS_ERROR)
3325       xml_failure(parser);
3326     if (! testdata.nested_callback_happened) {
3327       fail("ref handler not called");
3328     }
3329     XML_ParserFree(parser);
3330   }
3331 }
3332 END_TEST
3333 
3334 /* Test recursive parsing */
START_TEST(test_external_entity_values)3335 START_TEST(test_external_entity_values) {
3336   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3337                      "<doc></doc>\n";
3338   ExtFaults data_004_2[] = {
3339       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3340       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3341        XML_ERROR_INVALID_TOKEN},
3342       {"'wombat", "Unterminated string not faulted", NULL,
3343        XML_ERROR_UNCLOSED_TOKEN},
3344       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3345        XML_ERROR_PARTIAL_CHAR},
3346       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3347       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3348        XML_ERROR_XML_DECL},
3349       {/* UTF-8 BOM */
3350        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3351        XML_ERROR_NONE},
3352       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3353        "Invalid token after text declaration not faulted", NULL,
3354        XML_ERROR_INVALID_TOKEN},
3355       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3356        "Unterminated string after text decl not faulted", NULL,
3357        XML_ERROR_UNCLOSED_TOKEN},
3358       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3359        "Partial UTF-8 character after text decl not faulted", NULL,
3360        XML_ERROR_PARTIAL_CHAR},
3361       {"%e1;", "Recursive parameter entity not faulted", NULL,
3362        XML_ERROR_RECURSIVE_ENTITY_REF},
3363       {NULL, NULL, NULL, XML_ERROR_NONE}};
3364   int i;
3365 
3366   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3367     set_subtest("%s", data_004_2[i].parse_text);
3368     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3369     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3370     XML_SetUserData(g_parser, &data_004_2[i]);
3371     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3372         == XML_STATUS_ERROR)
3373       xml_failure(g_parser);
3374     XML_ParserReset(g_parser, NULL);
3375   }
3376 }
3377 END_TEST
3378 
3379 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3380 START_TEST(test_ext_entity_not_standalone) {
3381   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3382                      "<doc></doc>";
3383 
3384   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3385   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3386   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3387                  "Standalone rejection not caught");
3388 }
3389 END_TEST
3390 
START_TEST(test_ext_entity_value_abort)3391 START_TEST(test_ext_entity_value_abort) {
3392   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3393                      "<doc></doc>\n";
3394 
3395   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3396   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3397   g_resumable = XML_FALSE;
3398   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3399       == XML_STATUS_ERROR)
3400     xml_failure(g_parser);
3401 }
3402 END_TEST
3403 
START_TEST(test_bad_public_doctype)3404 START_TEST(test_bad_public_doctype) {
3405   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3406                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3407                      "<doc></doc>";
3408 
3409   /* Setting a handler provokes a particular code path */
3410   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3411                             dummy_end_doctype_handler);
3412   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3413 }
3414 END_TEST
3415 
3416 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3417 START_TEST(test_attribute_enum_value) {
3418   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3419                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3420                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3421   ExtTest dtd_data
3422       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3423          "<!ELEMENT a EMPTY>\n"
3424          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3425          NULL, NULL};
3426   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3427 
3428   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3429   XML_SetUserData(g_parser, &dtd_data);
3430   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3431   /* An attribute list handler provokes a different code path */
3432   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3433   run_ext_character_check(text, &dtd_data, expected);
3434 }
3435 END_TEST
3436 
3437 /* Slightly bizarrely, the library seems to silently ignore entity
3438  * definitions for predefined entities, even when they are wrong.  The
3439  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3440  * to happen, so this is currently treated as acceptable.
3441  */
START_TEST(test_predefined_entity_redefinition)3442 START_TEST(test_predefined_entity_redefinition) {
3443   const char *text = "<!DOCTYPE doc [\n"
3444                      "<!ENTITY apos 'foo'>\n"
3445                      "]>\n"
3446                      "<doc>&apos;</doc>";
3447   run_character_check(text, XCS("'"));
3448 }
3449 END_TEST
3450 
3451 /* Test that the parser stops processing the DTD after an unresolved
3452  * parameter entity is encountered.
3453  */
START_TEST(test_dtd_stop_processing)3454 START_TEST(test_dtd_stop_processing) {
3455   const char *text = "<!DOCTYPE doc [\n"
3456                      "%foo;\n"
3457                      "<!ENTITY bar 'bas'>\n"
3458                      "]><doc/>";
3459 
3460   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3461   init_dummy_handlers();
3462   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3463       == XML_STATUS_ERROR)
3464     xml_failure(g_parser);
3465   if (get_dummy_handler_flags() != 0)
3466     fail("DTD processing still going after undefined PE");
3467 }
3468 END_TEST
3469 
3470 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3471 START_TEST(test_public_notation_no_sysid) {
3472   const char *text = "<!DOCTYPE doc [\n"
3473                      "<!NOTATION note PUBLIC 'foo'>\n"
3474                      "<!ELEMENT doc EMPTY>\n"
3475                      "]>\n<doc/>";
3476 
3477   init_dummy_handlers();
3478   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3479   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3480       == XML_STATUS_ERROR)
3481     xml_failure(g_parser);
3482   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3483     fail("Notation declaration handler not called");
3484 }
3485 END_TEST
3486 
START_TEST(test_nested_groups)3487 START_TEST(test_nested_groups) {
3488   const char *text
3489       = "<!DOCTYPE doc [\n"
3490         "<!ELEMENT doc "
3491         /* Sixteen elements per line */
3492         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3493         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3494         "))))))))))))))))))))))))))))))))>\n"
3495         "<!ELEMENT e EMPTY>"
3496         "]>\n"
3497         "<doc><e/></doc>";
3498   CharData storage;
3499 
3500   CharData_Init(&storage);
3501   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3502   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3503   XML_SetUserData(g_parser, &storage);
3504   init_dummy_handlers();
3505   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3506       == XML_STATUS_ERROR)
3507     xml_failure(g_parser);
3508   CharData_CheckXMLChars(&storage, XCS("doce"));
3509   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3510     fail("Element handler not fired");
3511 }
3512 END_TEST
3513 
START_TEST(test_group_choice)3514 START_TEST(test_group_choice) {
3515   const char *text = "<!DOCTYPE doc [\n"
3516                      "<!ELEMENT doc (a|b|c)+>\n"
3517                      "<!ELEMENT a EMPTY>\n"
3518                      "<!ELEMENT b (#PCDATA)>\n"
3519                      "<!ELEMENT c ANY>\n"
3520                      "]>\n"
3521                      "<doc>\n"
3522                      "<a/>\n"
3523                      "<b attr='foo'>This is a foo</b>\n"
3524                      "<c></c>\n"
3525                      "</doc>\n";
3526 
3527   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3528   init_dummy_handlers();
3529   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3530       == XML_STATUS_ERROR)
3531     xml_failure(g_parser);
3532   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3533     fail("Element handler flag not raised");
3534 }
3535 END_TEST
3536 
START_TEST(test_standalone_parameter_entity)3537 START_TEST(test_standalone_parameter_entity) {
3538   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3539                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3540                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3541                      "%entity;\n"
3542                      "]>\n"
3543                      "<doc></doc>";
3544   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3545 
3546   XML_SetUserData(g_parser, dtd_data);
3547   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3548   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3549   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3550       == XML_STATUS_ERROR)
3551     xml_failure(g_parser);
3552 }
3553 END_TEST
3554 
3555 /* Test skipping of parameter entity in an external DTD */
3556 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3557 START_TEST(test_skipped_parameter_entity) {
3558   const char *text = "<?xml version='1.0'?>\n"
3559                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3560                      "<!ELEMENT root (#PCDATA|a)* >\n"
3561                      "]>\n"
3562                      "<root></root>";
3563   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3564 
3565   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3566   XML_SetUserData(g_parser, &dtd_data);
3567   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3568   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3569   init_dummy_handlers();
3570   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3571       == XML_STATUS_ERROR)
3572     xml_failure(g_parser);
3573   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3574     fail("Skip handler not executed");
3575 }
3576 END_TEST
3577 
3578 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3579 START_TEST(test_recursive_external_parameter_entity) {
3580   const char *text = "<?xml version='1.0'?>\n"
3581                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3582                      "<!ELEMENT root (#PCDATA|a)* >\n"
3583                      "]>\n"
3584                      "<root></root>";
3585   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3586                         "Recursive external parameter entity not faulted", NULL,
3587                         XML_ERROR_RECURSIVE_ENTITY_REF};
3588 
3589   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3590   XML_SetUserData(g_parser, &dtd_data);
3591   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3592   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3593                  "Recursive external parameter not spotted");
3594 }
3595 END_TEST
3596 
3597 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3598 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3599   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3600                      "<doc></doc>\n";
3601 
3602   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3603   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3604   XML_SetUserData(g_parser, NULL);
3605   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3606       == XML_STATUS_ERROR)
3607     xml_failure(g_parser);
3608 
3609   /* Now repeat without the external entity ref handler invoking
3610    * another copy of itself.
3611    */
3612   XML_ParserReset(g_parser, NULL);
3613   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3614   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3615   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3616   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3617       == XML_STATUS_ERROR)
3618     xml_failure(g_parser);
3619 }
3620 END_TEST
3621 
3622 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3623 START_TEST(test_suspend_xdecl) {
3624   const char *text = long_character_data_text;
3625 
3626   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3627   XML_SetUserData(g_parser, g_parser);
3628   g_resumable = XML_TRUE;
3629   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3630       != XML_STATUS_SUSPENDED)
3631     xml_failure(g_parser);
3632   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3633     xml_failure(g_parser);
3634   /* Attempt to start a new parse while suspended */
3635   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3636       != XML_STATUS_ERROR)
3637     fail("Attempt to parse while suspended not faulted");
3638   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3639     fail("Suspended parse not faulted with correct error");
3640 }
3641 END_TEST
3642 
3643 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3644 START_TEST(test_abort_epilog) {
3645   const char *text = "<doc></doc>\n\r\n";
3646   XML_Char trigger_char = XCS('\r');
3647 
3648   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3649   XML_SetUserData(g_parser, &trigger_char);
3650   g_resumable = XML_FALSE;
3651   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3652       != XML_STATUS_ERROR)
3653     fail("Abort not triggered");
3654   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3655     xml_failure(g_parser);
3656 }
3657 END_TEST
3658 
3659 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3660 START_TEST(test_abort_epilog_2) {
3661   const char *text = "<doc></doc>\n";
3662   XML_Char trigger_char = XCS('\n');
3663 
3664   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3665   XML_SetUserData(g_parser, &trigger_char);
3666   g_resumable = XML_FALSE;
3667   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3668 }
3669 END_TEST
3670 
3671 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3672 START_TEST(test_suspend_epilog) {
3673   const char *text = "<doc></doc>\n";
3674   XML_Char trigger_char = XCS('\n');
3675 
3676   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3677   XML_SetUserData(g_parser, &trigger_char);
3678   g_resumable = XML_TRUE;
3679   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3680       != XML_STATUS_SUSPENDED)
3681     xml_failure(g_parser);
3682 }
3683 END_TEST
3684 
START_TEST(test_suspend_in_sole_empty_tag)3685 START_TEST(test_suspend_in_sole_empty_tag) {
3686   const char *text = "<doc/>";
3687   enum XML_Status rc;
3688 
3689   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3690   XML_SetUserData(g_parser, g_parser);
3691   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3692   if (rc == XML_STATUS_ERROR)
3693     xml_failure(g_parser);
3694   else if (rc != XML_STATUS_SUSPENDED)
3695     fail("Suspend not triggered");
3696   rc = XML_ResumeParser(g_parser);
3697   if (rc == XML_STATUS_ERROR)
3698     xml_failure(g_parser);
3699   else if (rc != XML_STATUS_OK)
3700     fail("Resume failed");
3701 }
3702 END_TEST
3703 
START_TEST(test_unfinished_epilog)3704 START_TEST(test_unfinished_epilog) {
3705   const char *text = "<doc></doc><";
3706 
3707   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3708                  "Incomplete epilog entry not faulted");
3709 }
3710 END_TEST
3711 
START_TEST(test_partial_char_in_epilog)3712 START_TEST(test_partial_char_in_epilog) {
3713   const char *text = "<doc></doc>\xe2\x82";
3714 
3715   /* First check that no fault is raised if the parse is not finished */
3716   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3717       == XML_STATUS_ERROR)
3718     xml_failure(g_parser);
3719   /* Now check that it is faulted once we finish */
3720   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3721     fail("Partial character in epilog not faulted");
3722   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3723     xml_failure(g_parser);
3724 }
3725 END_TEST
3726 
3727 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3728 START_TEST(test_suspend_resume_internal_entity) {
3729   const char *text
3730       = "<!DOCTYPE doc [\n"
3731         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3732         "]>\n"
3733         "<doc>&foo;</doc>\n";
3734   const XML_Char *expected1 = XCS("Hi");
3735   const XML_Char *expected2 = XCS("HiHo");
3736   CharData storage;
3737 
3738   CharData_Init(&storage);
3739   XML_SetStartElementHandler(g_parser, start_element_suspender);
3740   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3741   XML_SetUserData(g_parser, &storage);
3742   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3743   // we won't know exactly how much input we actually managed to give Expat.
3744   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3745       != XML_STATUS_SUSPENDED)
3746     xml_failure(g_parser);
3747   CharData_CheckXMLChars(&storage, XCS(""));
3748   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3749     xml_failure(g_parser);
3750   CharData_CheckXMLChars(&storage, expected1);
3751   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3752     xml_failure(g_parser);
3753   CharData_CheckXMLChars(&storage, expected2);
3754 }
3755 END_TEST
3756 
START_TEST(test_suspend_resume_internal_entity_issue_629)3757 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3758   const char *const text
3759       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3760         "<"
3761         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3762         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3763         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3764         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3765         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3766         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3767         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3768         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3769         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3770         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3771         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3772         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3773         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3774         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3775         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3776         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3777         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3778         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3779         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3780         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3781         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3782         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3783         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3784         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3785         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3786         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3787         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3788         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3789         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3790         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3791         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3792         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3793         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3794         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3795         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3796         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3797         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3798         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3799         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3800         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3801         "/>"
3802         "</b></a>";
3803   const size_t firstChunkSizeBytes = 54;
3804 
3805   XML_Parser parser = XML_ParserCreate(NULL);
3806   XML_SetUserData(parser, parser);
3807   XML_SetCommentHandler(parser, suspending_comment_handler);
3808 
3809   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3810       != XML_STATUS_SUSPENDED)
3811     xml_failure(parser);
3812   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3813     xml_failure(parser);
3814   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3815                               (int)(strlen(text) - firstChunkSizeBytes),
3816                               XML_TRUE)
3817       != XML_STATUS_OK)
3818     xml_failure(parser);
3819   XML_ParserFree(parser);
3820 }
3821 END_TEST
3822 
3823 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3824 START_TEST(test_resume_entity_with_syntax_error) {
3825   const char *text = "<!DOCTYPE doc [\n"
3826                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3827                      "]>\n"
3828                      "<doc>&foo;</doc>\n";
3829 
3830   XML_SetStartElementHandler(g_parser, start_element_suspender);
3831   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3832       != XML_STATUS_SUSPENDED)
3833     xml_failure(g_parser);
3834   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3835     fail("Syntax error in entity not faulted");
3836   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3837     xml_failure(g_parser);
3838 }
3839 END_TEST
3840 
3841 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3842 START_TEST(test_suspend_resume_parameter_entity) {
3843   const char *text = "<!DOCTYPE doc [\n"
3844                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3845                      "%foo;\n"
3846                      "]>\n"
3847                      "<doc>Hello, world</doc>";
3848   const XML_Char *expected = XCS("Hello, world");
3849   CharData storage;
3850 
3851   CharData_Init(&storage);
3852   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3853   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3854   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3855   XML_SetUserData(g_parser, &storage);
3856   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3857       != XML_STATUS_SUSPENDED)
3858     xml_failure(g_parser);
3859   CharData_CheckXMLChars(&storage, XCS(""));
3860   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3861     xml_failure(g_parser);
3862   CharData_CheckXMLChars(&storage, expected);
3863 }
3864 END_TEST
3865 
3866 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)3867 START_TEST(test_restart_on_error) {
3868   const char *text = "<$doc><doc></doc>";
3869 
3870   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3871       != XML_STATUS_ERROR)
3872     fail("Invalid tag name not faulted");
3873   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3874     xml_failure(g_parser);
3875   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3876     fail("Restarting invalid parse not faulted");
3877   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3878     xml_failure(g_parser);
3879 }
3880 END_TEST
3881 
3882 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)3883 START_TEST(test_reject_lt_in_attribute_value) {
3884   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3885                      "<doc></doc>";
3886 
3887   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3888                  "Bad attribute default not faulted");
3889 }
3890 END_TEST
3891 
START_TEST(test_reject_unfinished_param_in_att_value)3892 START_TEST(test_reject_unfinished_param_in_att_value) {
3893   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3894                      "<doc></doc>";
3895 
3896   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3897                  "Bad attribute default not faulted");
3898 }
3899 END_TEST
3900 
START_TEST(test_trailing_cr_in_att_value)3901 START_TEST(test_trailing_cr_in_att_value) {
3902   const char *text = "<doc a='value\r'/>";
3903 
3904   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3905       == XML_STATUS_ERROR)
3906     xml_failure(g_parser);
3907 }
3908 END_TEST
3909 
3910 /* Try parsing a general entity within a parameter entity in a
3911  * standalone internal DTD.  Covers a corner case in the parser.
3912  */
START_TEST(test_standalone_internal_entity)3913 START_TEST(test_standalone_internal_entity) {
3914   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3915                      "<!DOCTYPE doc [\n"
3916                      "  <!ELEMENT doc (#PCDATA)>\n"
3917                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3918                      "  <!ENTITY ge 'AttDefaultValue'>\n"
3919                      "  %pe;\n"
3920                      "]>\n"
3921                      "<doc att2='any'/>";
3922 
3923   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3924   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3925       == XML_STATUS_ERROR)
3926     xml_failure(g_parser);
3927 }
3928 END_TEST
3929 
3930 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)3931 START_TEST(test_skipped_external_entity) {
3932   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3933                      "<doc></doc>\n";
3934   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3935                        "<!ENTITY % e2 '%e1;'>\n",
3936                        NULL, NULL};
3937 
3938   XML_SetUserData(g_parser, &test_data);
3939   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3940   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3941   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3942       == XML_STATUS_ERROR)
3943     xml_failure(g_parser);
3944 }
3945 END_TEST
3946 
3947 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)3948 START_TEST(test_skipped_null_loaded_ext_entity) {
3949   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3950                      "<doc />";
3951   ExtHdlrData test_data
3952       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3953          "<!ENTITY % pe2 '%pe1;'>\n"
3954          "%pe2;\n",
3955          external_entity_null_loader, NULL};
3956 
3957   XML_SetUserData(g_parser, &test_data);
3958   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3959   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3960   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3961       == XML_STATUS_ERROR)
3962     xml_failure(g_parser);
3963 }
3964 END_TEST
3965 
START_TEST(test_skipped_unloaded_ext_entity)3966 START_TEST(test_skipped_unloaded_ext_entity) {
3967   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3968                      "<doc />";
3969   ExtHdlrData test_data
3970       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3971          "<!ENTITY % pe2 '%pe1;'>\n"
3972          "%pe2;\n",
3973          NULL, NULL};
3974 
3975   XML_SetUserData(g_parser, &test_data);
3976   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3977   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3978   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3979       == XML_STATUS_ERROR)
3980     xml_failure(g_parser);
3981 }
3982 END_TEST
3983 
3984 /* Test that a parameter entity value ending with a carriage return
3985  * has it translated internally into a newline.
3986  */
START_TEST(test_param_entity_with_trailing_cr)3987 START_TEST(test_param_entity_with_trailing_cr) {
3988 #define PARAM_ENTITY_NAME "pe"
3989 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3990   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3991                      "<doc/>";
3992   ExtTest test_data
3993       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3994          "%" PARAM_ENTITY_NAME ";\n",
3995          NULL, NULL};
3996 
3997   XML_SetUserData(g_parser, &test_data);
3998   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3999   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4000   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4001   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4002                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4003   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4004       == XML_STATUS_ERROR)
4005     xml_failure(g_parser);
4006   int entity_match_flag = get_param_entity_match_flag();
4007   if (entity_match_flag == ENTITY_MATCH_FAIL)
4008     fail("Parameter entity CR->NEWLINE conversion failed");
4009   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4010     fail("Parameter entity not parsed");
4011 }
4012 #undef PARAM_ENTITY_NAME
4013 #undef PARAM_ENTITY_CORE_VALUE
4014 END_TEST
4015 
START_TEST(test_invalid_character_entity)4016 START_TEST(test_invalid_character_entity) {
4017   const char *text = "<!DOCTYPE doc [\n"
4018                      "  <!ENTITY entity '&#x110000;'>\n"
4019                      "]>\n"
4020                      "<doc>&entity;</doc>";
4021 
4022   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4023                  "Out of range character reference not faulted");
4024 }
4025 END_TEST
4026 
START_TEST(test_invalid_character_entity_2)4027 START_TEST(test_invalid_character_entity_2) {
4028   const char *text = "<!DOCTYPE doc [\n"
4029                      "  <!ENTITY entity '&#xg0;'>\n"
4030                      "]>\n"
4031                      "<doc>&entity;</doc>";
4032 
4033   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4034                  "Out of range character reference not faulted");
4035 }
4036 END_TEST
4037 
START_TEST(test_invalid_character_entity_3)4038 START_TEST(test_invalid_character_entity_3) {
4039   const char text[] =
4040       /* <!DOCTYPE doc [\n */
4041       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4042       /* U+0E04 = KHO KHWAI
4043        * U+0E08 = CHO CHAN */
4044       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4045       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4046       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4047       /* ]>\n */
4048       "\0]\0>\0\n"
4049       /* <doc>&entity;</doc> */
4050       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4051 
4052   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4053       != XML_STATUS_ERROR)
4054     fail("Invalid start of entity name not faulted");
4055   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4056     xml_failure(g_parser);
4057 }
4058 END_TEST
4059 
START_TEST(test_invalid_character_entity_4)4060 START_TEST(test_invalid_character_entity_4) {
4061   const char *text = "<!DOCTYPE doc [\n"
4062                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4063                      "]>\n"
4064                      "<doc>&entity;</doc>";
4065 
4066   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4067                  "Out of range character reference not faulted");
4068 }
4069 END_TEST
4070 
4071 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4072 START_TEST(test_pi_handled_in_default) {
4073   const char *text = "<?test processing instruction?>\n<doc/>";
4074   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4075   CharData storage;
4076 
4077   CharData_Init(&storage);
4078   XML_SetDefaultHandler(g_parser, accumulate_characters);
4079   XML_SetUserData(g_parser, &storage);
4080   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4081       == XML_STATUS_ERROR)
4082     xml_failure(g_parser);
4083   CharData_CheckXMLChars(&storage, expected);
4084 }
4085 END_TEST
4086 
4087 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4088 START_TEST(test_comment_handled_in_default) {
4089   const char *text = "<!-- This is a comment -->\n<doc/>";
4090   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4091   CharData storage;
4092 
4093   CharData_Init(&storage);
4094   XML_SetDefaultHandler(g_parser, accumulate_characters);
4095   XML_SetUserData(g_parser, &storage);
4096   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4097       == XML_STATUS_ERROR)
4098     xml_failure(g_parser);
4099   CharData_CheckXMLChars(&storage, expected);
4100 }
4101 END_TEST
4102 
4103 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4104 START_TEST(test_pi_yml) {
4105   const char *text = "<?yml something like data?><doc/>";
4106   const XML_Char *expected = XCS("yml: something like data\n");
4107   CharData storage;
4108 
4109   CharData_Init(&storage);
4110   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4111   XML_SetUserData(g_parser, &storage);
4112   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4113       == XML_STATUS_ERROR)
4114     xml_failure(g_parser);
4115   CharData_CheckXMLChars(&storage, expected);
4116 }
4117 END_TEST
4118 
START_TEST(test_pi_xnl)4119 START_TEST(test_pi_xnl) {
4120   const char *text = "<?xnl nothing like data?><doc/>";
4121   const XML_Char *expected = XCS("xnl: nothing like data\n");
4122   CharData storage;
4123 
4124   CharData_Init(&storage);
4125   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4126   XML_SetUserData(g_parser, &storage);
4127   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4128       == XML_STATUS_ERROR)
4129     xml_failure(g_parser);
4130   CharData_CheckXMLChars(&storage, expected);
4131 }
4132 END_TEST
4133 
START_TEST(test_pi_xmm)4134 START_TEST(test_pi_xmm) {
4135   const char *text = "<?xmm everything like data?><doc/>";
4136   const XML_Char *expected = XCS("xmm: everything like data\n");
4137   CharData storage;
4138 
4139   CharData_Init(&storage);
4140   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4141   XML_SetUserData(g_parser, &storage);
4142   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4143       == XML_STATUS_ERROR)
4144     xml_failure(g_parser);
4145   CharData_CheckXMLChars(&storage, expected);
4146 }
4147 END_TEST
4148 
START_TEST(test_utf16_pi)4149 START_TEST(test_utf16_pi) {
4150   const char text[] =
4151       /* <?{KHO KHWAI}{CHO CHAN}?>
4152        * where {KHO KHWAI} = U+0E04
4153        * and   {CHO CHAN}  = U+0E08
4154        */
4155       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4156       /* <q/> */
4157       "<\0q\0/\0>\0";
4158 #ifdef XML_UNICODE
4159   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4160 #else
4161   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4162 #endif
4163   CharData storage;
4164 
4165   CharData_Init(&storage);
4166   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4167   XML_SetUserData(g_parser, &storage);
4168   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4169       == XML_STATUS_ERROR)
4170     xml_failure(g_parser);
4171   CharData_CheckXMLChars(&storage, expected);
4172 }
4173 END_TEST
4174 
START_TEST(test_utf16_be_pi)4175 START_TEST(test_utf16_be_pi) {
4176   const char text[] =
4177       /* <?{KHO KHWAI}{CHO CHAN}?>
4178        * where {KHO KHWAI} = U+0E04
4179        * and   {CHO CHAN}  = U+0E08
4180        */
4181       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4182       /* <q/> */
4183       "\0<\0q\0/\0>";
4184 #ifdef XML_UNICODE
4185   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4186 #else
4187   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4188 #endif
4189   CharData storage;
4190 
4191   CharData_Init(&storage);
4192   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4193   XML_SetUserData(g_parser, &storage);
4194   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4195       == XML_STATUS_ERROR)
4196     xml_failure(g_parser);
4197   CharData_CheckXMLChars(&storage, expected);
4198 }
4199 END_TEST
4200 
4201 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4202 START_TEST(test_utf16_be_comment) {
4203   const char text[] =
4204       /* <!-- Comment A --> */
4205       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4206       /* <doc/> */
4207       "\0<\0d\0o\0c\0/\0>";
4208   const XML_Char *expected = XCS(" Comment A ");
4209   CharData storage;
4210 
4211   CharData_Init(&storage);
4212   XML_SetCommentHandler(g_parser, accumulate_comment);
4213   XML_SetUserData(g_parser, &storage);
4214   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4215       == XML_STATUS_ERROR)
4216     xml_failure(g_parser);
4217   CharData_CheckXMLChars(&storage, expected);
4218 }
4219 END_TEST
4220 
START_TEST(test_utf16_le_comment)4221 START_TEST(test_utf16_le_comment) {
4222   const char text[] =
4223       /* <!-- Comment B --> */
4224       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4225       /* <doc/> */
4226       "<\0d\0o\0c\0/\0>\0";
4227   const XML_Char *expected = XCS(" Comment B ");
4228   CharData storage;
4229 
4230   CharData_Init(&storage);
4231   XML_SetCommentHandler(g_parser, accumulate_comment);
4232   XML_SetUserData(g_parser, &storage);
4233   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4234       == XML_STATUS_ERROR)
4235     xml_failure(g_parser);
4236   CharData_CheckXMLChars(&storage, expected);
4237 }
4238 END_TEST
4239 
4240 /* Test that the unknown encoding handler with map entries that expect
4241  * conversion but no conversion function is faulted
4242  */
START_TEST(test_missing_encoding_conversion_fn)4243 START_TEST(test_missing_encoding_conversion_fn) {
4244   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4245                      "<doc>\x81</doc>";
4246 
4247   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4248   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4249    * character introducing a two-byte sequence.  For this, it
4250    * requires a convert function.  The above function call doesn't
4251    * pass one through, so when BadEncodingHandler actually gets
4252    * called it should supply an invalid encoding.
4253    */
4254   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4255                  "Encoding with missing convert() not faulted");
4256 }
4257 END_TEST
4258 
START_TEST(test_failing_encoding_conversion_fn)4259 START_TEST(test_failing_encoding_conversion_fn) {
4260   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4261                      "<doc>\x81</doc>";
4262 
4263   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4264   /* BadEncodingHandler sets up an encoding with every top-bit-set
4265    * character introducing a two-byte sequence.  For this, it
4266    * requires a convert function.  The above function call passes
4267    * one that insists all possible sequences are invalid anyway.
4268    */
4269   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4270                  "Encoding with failing convert() not faulted");
4271 }
4272 END_TEST
4273 
4274 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4275 START_TEST(test_unknown_encoding_success) {
4276   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4277                      /* Equivalent to <eoc>Hello, world</eoc> */
4278                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4279 
4280   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4281   run_character_check(text, XCS("Hello, world"));
4282 }
4283 END_TEST
4284 
4285 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4286 START_TEST(test_unknown_encoding_bad_name) {
4287   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4288                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4289 
4290   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4291   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4292                  "Bad name start in unknown encoding not faulted");
4293 }
4294 END_TEST
4295 
4296 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4297 START_TEST(test_unknown_encoding_bad_name_2) {
4298   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4299                      "<d\xffoc>Hello, world</d\xffoc>";
4300 
4301   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4302   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4303                  "Bad name in unknown encoding not faulted");
4304 }
4305 END_TEST
4306 
4307 /* Test element name that is long enough to fill the conversion buffer
4308  * in an unknown encoding, finishing with an encoded character.
4309  */
START_TEST(test_unknown_encoding_long_name_1)4310 START_TEST(test_unknown_encoding_long_name_1) {
4311   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4312                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4313                      "Hi"
4314                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4315   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4316   CharData storage;
4317 
4318   CharData_Init(&storage);
4319   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4320   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4321   XML_SetUserData(g_parser, &storage);
4322   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4323       == XML_STATUS_ERROR)
4324     xml_failure(g_parser);
4325   CharData_CheckXMLChars(&storage, expected);
4326 }
4327 END_TEST
4328 
4329 /* Test element name that is long enough to fill the conversion buffer
4330  * in an unknown encoding, finishing with an simple character.
4331  */
START_TEST(test_unknown_encoding_long_name_2)4332 START_TEST(test_unknown_encoding_long_name_2) {
4333   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4334                      "<abcdefghabcdefghabcdefghijklmnop>"
4335                      "Hi"
4336                      "</abcdefghabcdefghabcdefghijklmnop>";
4337   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4338   CharData storage;
4339 
4340   CharData_Init(&storage);
4341   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4342   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4343   XML_SetUserData(g_parser, &storage);
4344   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4345       == XML_STATUS_ERROR)
4346     xml_failure(g_parser);
4347   CharData_CheckXMLChars(&storage, expected);
4348 }
4349 END_TEST
4350 
START_TEST(test_invalid_unknown_encoding)4351 START_TEST(test_invalid_unknown_encoding) {
4352   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4353                      "<doc>Hello world</doc>";
4354 
4355   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4356   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4357                  "Invalid unknown encoding not faulted");
4358 }
4359 END_TEST
4360 
START_TEST(test_unknown_ascii_encoding_ok)4361 START_TEST(test_unknown_ascii_encoding_ok) {
4362   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4363                      "<doc>Hello, world</doc>";
4364 
4365   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4366   run_character_check(text, XCS("Hello, world"));
4367 }
4368 END_TEST
4369 
START_TEST(test_unknown_ascii_encoding_fail)4370 START_TEST(test_unknown_ascii_encoding_fail) {
4371   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4372                      "<doc>Hello, \x80 world</doc>";
4373 
4374   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4375   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4376                  "Invalid character not faulted");
4377 }
4378 END_TEST
4379 
START_TEST(test_unknown_encoding_invalid_length)4380 START_TEST(test_unknown_encoding_invalid_length) {
4381   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4382                      "<doc>Hello, world</doc>";
4383 
4384   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4385   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4386                  "Invalid unknown encoding not faulted");
4387 }
4388 END_TEST
4389 
START_TEST(test_unknown_encoding_invalid_topbit)4390 START_TEST(test_unknown_encoding_invalid_topbit) {
4391   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4392                      "<doc>Hello, world</doc>";
4393 
4394   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4395   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4396                  "Invalid unknown encoding not faulted");
4397 }
4398 END_TEST
4399 
START_TEST(test_unknown_encoding_invalid_surrogate)4400 START_TEST(test_unknown_encoding_invalid_surrogate) {
4401   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4402                      "<doc>Hello, \x82 world</doc>";
4403 
4404   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4405   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4406                  "Invalid unknown encoding not faulted");
4407 }
4408 END_TEST
4409 
START_TEST(test_unknown_encoding_invalid_high)4410 START_TEST(test_unknown_encoding_invalid_high) {
4411   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4412                      "<doc>Hello, world</doc>";
4413 
4414   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4415   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4416                  "Invalid unknown encoding not faulted");
4417 }
4418 END_TEST
4419 
START_TEST(test_unknown_encoding_invalid_attr_value)4420 START_TEST(test_unknown_encoding_invalid_attr_value) {
4421   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4422                      "<doc attr='\xff\x30'/>";
4423 
4424   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4425   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4426                  "Invalid attribute valid not faulted");
4427 }
4428 END_TEST
4429 
4430 /* Test an external entity parser set to use latin-1 detects UTF-16
4431  * BOMs correctly.
4432  */
4433 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4434 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4435   const char *text = "<!DOCTYPE doc [\n"
4436                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4437                      "]>\n"
4438                      "<doc>&en;</doc>";
4439   ExtTest2 test_data
4440       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4441          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4442           *   0x4c = L and 0x20 is a space
4443           */
4444          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4445 #ifdef XML_UNICODE
4446   const XML_Char *expected = XCS("\x00ff\x00feL ");
4447 #else
4448   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4449   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4450 #endif
4451   CharData storage;
4452 
4453   CharData_Init(&storage);
4454   test_data.storage = &storage;
4455   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4456   XML_SetUserData(g_parser, &test_data);
4457   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4458   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4459       == XML_STATUS_ERROR)
4460     xml_failure(g_parser);
4461   CharData_CheckXMLChars(&storage, expected);
4462 }
4463 END_TEST
4464 
START_TEST(test_ext_entity_latin1_utf16be_bom)4465 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4466   const char *text = "<!DOCTYPE doc [\n"
4467                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4468                      "]>\n"
4469                      "<doc>&en;</doc>";
4470   ExtTest2 test_data
4471       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4472          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4473           *   0x4c = L and 0x20 is a space
4474           */
4475          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4476 #ifdef XML_UNICODE
4477   const XML_Char *expected = XCS("\x00fe\x00ff L");
4478 #else
4479   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4480   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4481 #endif
4482   CharData storage;
4483 
4484   CharData_Init(&storage);
4485   test_data.storage = &storage;
4486   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4487   XML_SetUserData(g_parser, &test_data);
4488   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4489   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4490       == XML_STATUS_ERROR)
4491     xml_failure(g_parser);
4492   CharData_CheckXMLChars(&storage, expected);
4493 }
4494 END_TEST
4495 
4496 /* Parsing the full buffer rather than a byte at a time makes a
4497  * difference to the encoding scanning code, so repeat the above tests
4498  * without breaking them down by byte.
4499  */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4500 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4501   const char *text = "<!DOCTYPE doc [\n"
4502                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4503                      "]>\n"
4504                      "<doc>&en;</doc>";
4505   ExtTest2 test_data
4506       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4507          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4508           *   0x4c = L and 0x20 is a space
4509           */
4510          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4511 #ifdef XML_UNICODE
4512   const XML_Char *expected = XCS("\x00ff\x00feL ");
4513 #else
4514   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4515   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4516 #endif
4517   CharData storage;
4518 
4519   CharData_Init(&storage);
4520   test_data.storage = &storage;
4521   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4522   XML_SetUserData(g_parser, &test_data);
4523   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4524   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4525       == XML_STATUS_ERROR)
4526     xml_failure(g_parser);
4527   CharData_CheckXMLChars(&storage, expected);
4528 }
4529 END_TEST
4530 
START_TEST(test_ext_entity_latin1_utf16be_bom2)4531 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4532   const char *text = "<!DOCTYPE doc [\n"
4533                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4534                      "]>\n"
4535                      "<doc>&en;</doc>";
4536   ExtTest2 test_data
4537       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4538          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4539           *   0x4c = L and 0x20 is a space
4540           */
4541          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4542 #ifdef XML_UNICODE
4543   const XML_Char *expected = XCS("\x00fe\x00ff L");
4544 #else
4545   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4546   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4547 #endif
4548   CharData storage;
4549 
4550   CharData_Init(&storage);
4551   test_data.storage = &storage;
4552   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4553   XML_SetUserData(g_parser, &test_data);
4554   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4555   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4556       == XML_STATUS_ERROR)
4557     xml_failure(g_parser);
4558   CharData_CheckXMLChars(&storage, expected);
4559 }
4560 END_TEST
4561 
4562 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4563 START_TEST(test_ext_entity_utf16_be) {
4564   const char *text = "<!DOCTYPE doc [\n"
4565                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4566                      "]>\n"
4567                      "<doc>&en;</doc>";
4568   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4569 #ifdef XML_UNICODE
4570   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4571 #else
4572   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4573                                  "\xe6\x94\x80"   /* U+6500 */
4574                                  "\xe2\xbc\x80"   /* U+2F00 */
4575                                  "\xe3\xb8\x80"); /* U+3E00 */
4576 #endif
4577   CharData storage;
4578 
4579   CharData_Init(&storage);
4580   test_data.storage = &storage;
4581   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4582   XML_SetUserData(g_parser, &test_data);
4583   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4584   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4585       == XML_STATUS_ERROR)
4586     xml_failure(g_parser);
4587   CharData_CheckXMLChars(&storage, expected);
4588 }
4589 END_TEST
4590 
4591 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4592 START_TEST(test_ext_entity_utf16_le) {
4593   const char *text = "<!DOCTYPE doc [\n"
4594                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4595                      "]>\n"
4596                      "<doc>&en;</doc>";
4597   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4598 #ifdef XML_UNICODE
4599   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4600 #else
4601   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4602                                  "\xe6\x94\x80"   /* U+6500 */
4603                                  "\xe2\xbc\x80"   /* U+2F00 */
4604                                  "\xe3\xb8\x80"); /* U+3E00 */
4605 #endif
4606   CharData storage;
4607 
4608   CharData_Init(&storage);
4609   test_data.storage = &storage;
4610   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4611   XML_SetUserData(g_parser, &test_data);
4612   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4613   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4614       == XML_STATUS_ERROR)
4615     xml_failure(g_parser);
4616   CharData_CheckXMLChars(&storage, expected);
4617 }
4618 END_TEST
4619 
4620 /* Test little-endian UTF-16 given no explicit encoding.
4621  * The existing default encoding (UTF-8) is assumed to hold without a
4622  * BOM to contradict it, so the entity value will in fact provoke an
4623  * error because 0x00 is not a valid XML character.  We parse the
4624  * whole buffer in one go rather than feeding it in byte by byte to
4625  * exercise different code paths in the initial scanning routines.
4626  */
START_TEST(test_ext_entity_utf16_unknown)4627 START_TEST(test_ext_entity_utf16_unknown) {
4628   const char *text = "<!DOCTYPE doc [\n"
4629                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4630                      "]>\n"
4631                      "<doc>&en;</doc>";
4632   ExtFaults2 test_data
4633       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4634          XML_ERROR_INVALID_TOKEN};
4635 
4636   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4637   XML_SetUserData(g_parser, &test_data);
4638   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4639                  "Invalid character should not have been accepted");
4640 }
4641 END_TEST
4642 
4643 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4644 START_TEST(test_ext_entity_utf8_non_bom) {
4645   const char *text = "<!DOCTYPE doc [\n"
4646                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4647                      "]>\n"
4648                      "<doc>&en;</doc>";
4649   ExtTest2 test_data
4650       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4651          3, NULL, NULL};
4652 #ifdef XML_UNICODE
4653   const XML_Char *expected = XCS("\xfec0");
4654 #else
4655   const XML_Char *expected = XCS("\xef\xbb\x80");
4656 #endif
4657   CharData storage;
4658 
4659   CharData_Init(&storage);
4660   test_data.storage = &storage;
4661   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4662   XML_SetUserData(g_parser, &test_data);
4663   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4664   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4665       == XML_STATUS_ERROR)
4666     xml_failure(g_parser);
4667   CharData_CheckXMLChars(&storage, expected);
4668 }
4669 END_TEST
4670 
4671 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4672 START_TEST(test_utf8_in_cdata_section) {
4673   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4674 #ifdef XML_UNICODE
4675   const XML_Char *expected = XCS("one \x00e9 two");
4676 #else
4677   const XML_Char *expected = XCS("one \xc3\xa9 two");
4678 #endif
4679 
4680   run_character_check(text, expected);
4681 }
4682 END_TEST
4683 
4684 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4685 START_TEST(test_utf8_in_cdata_section_2) {
4686   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4687 #ifdef XML_UNICODE
4688   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4689 #else
4690   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4691 #endif
4692 
4693   run_character_check(text, expected);
4694 }
4695 END_TEST
4696 
START_TEST(test_utf8_in_start_tags)4697 START_TEST(test_utf8_in_start_tags) {
4698   struct test_case {
4699     bool goodName;
4700     bool goodNameStart;
4701     const char *tagName;
4702   };
4703 
4704   // The idea with the tests below is this:
4705   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4706   // go to isNever and are hence not a concern.
4707   //
4708   // We start with a character that is a valid name character
4709   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4710   // single bits at places where (1) the result leaves the UTF-8 encoding space
4711   // and (2) we stay in the same n-byte sequence family.
4712   //
4713   // The flipped bits are highlighted in angle brackets in comments,
4714   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4715   // the most significant bit to 1 to leave UTF-8 encoding space.
4716   struct test_case cases[] = {
4717       // 1-byte UTF-8: [0xxx xxxx]
4718       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4719       {false, false, "\xBA"}, // [<1>011 1010]
4720       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4721       {false, false, "\xB9"}, // [<1>011 1001]
4722 
4723       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4724       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4725                                   // Arabic small waw U+06E5
4726       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4727       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4728       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4729       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4730                                   // combining char U+0301
4731       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4732       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4733       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4734 
4735       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4736       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4737                                       // Devanagari Letter A U+0905
4738       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4739       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4740       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4741       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4742       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4743       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4744                                       // combining char U+0901
4745       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4746       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4747       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4748       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4749       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4750   };
4751   const bool atNameStart[] = {true, false};
4752 
4753   size_t i = 0;
4754   char doc[1024];
4755   size_t failCount = 0;
4756 
4757   // we need all the bytes to be parsed, but we don't want the errors that can
4758   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4759   if (g_reparseDeferralEnabledDefault) {
4760     return;
4761   }
4762 
4763   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4764     size_t j = 0;
4765     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4766       const bool expectedSuccess
4767           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4768       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4769                cases[i].tagName);
4770       XML_Parser parser = XML_ParserCreate(NULL);
4771 
4772       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4773           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4774 
4775       bool success = true;
4776       if ((status == XML_STATUS_OK) != expectedSuccess) {
4777         success = false;
4778       }
4779       if ((status == XML_STATUS_ERROR)
4780           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4781         success = false;
4782       }
4783 
4784       if (! success) {
4785         fprintf(
4786             stderr,
4787             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4788             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4789             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4790         failCount++;
4791       }
4792 
4793       XML_ParserFree(parser);
4794     }
4795   }
4796 
4797   if (failCount > 0) {
4798     fail("UTF-8 regression detected");
4799   }
4800 }
4801 END_TEST
4802 
4803 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4804 START_TEST(test_trailing_spaces_in_elements) {
4805   const char *text = "<doc   >Hi</doc >";
4806   const XML_Char *expected = XCS("doc/doc");
4807   CharData storage;
4808 
4809   CharData_Init(&storage);
4810   XML_SetElementHandler(g_parser, record_element_start_handler,
4811                         record_element_end_handler);
4812   XML_SetUserData(g_parser, &storage);
4813   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4814       == XML_STATUS_ERROR)
4815     xml_failure(g_parser);
4816   CharData_CheckXMLChars(&storage, expected);
4817 }
4818 END_TEST
4819 
START_TEST(test_utf16_attribute)4820 START_TEST(test_utf16_attribute) {
4821   const char text[] =
4822       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4823        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4824        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4825        */
4826       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4827   const XML_Char *expected = XCS("a");
4828   CharData storage;
4829 
4830   CharData_Init(&storage);
4831   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4832   XML_SetUserData(g_parser, &storage);
4833   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4834       == XML_STATUS_ERROR)
4835     xml_failure(g_parser);
4836   CharData_CheckXMLChars(&storage, expected);
4837 }
4838 END_TEST
4839 
START_TEST(test_utf16_second_attr)4840 START_TEST(test_utf16_second_attr) {
4841   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4842    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4843    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4844    */
4845   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4846                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4847   const XML_Char *expected = XCS("1");
4848   CharData storage;
4849 
4850   CharData_Init(&storage);
4851   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4852   XML_SetUserData(g_parser, &storage);
4853   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4854       == XML_STATUS_ERROR)
4855     xml_failure(g_parser);
4856   CharData_CheckXMLChars(&storage, expected);
4857 }
4858 END_TEST
4859 
START_TEST(test_attr_after_solidus)4860 START_TEST(test_attr_after_solidus) {
4861   const char *text = "<doc attr1='a' / attr2='b'>";
4862 
4863   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4864 }
4865 END_TEST
4866 
START_TEST(test_utf16_pe)4867 START_TEST(test_utf16_pe) {
4868   /* <!DOCTYPE doc [
4869    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4870    * %{KHO KHWAI}{CHO CHAN};
4871    * ]>
4872    * <doc></doc>
4873    *
4874    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4875    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4876    */
4877   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4878                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4879                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4880                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4881                       "\0%\x0e\x04\x0e\x08\0;\0\n"
4882                       "\0]\0>\0\n"
4883                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4884 #ifdef XML_UNICODE
4885   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4886 #else
4887   const XML_Char *expected
4888       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4889 #endif
4890   CharData storage;
4891 
4892   CharData_Init(&storage);
4893   XML_SetUserData(g_parser, &storage);
4894   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4895   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4896       == XML_STATUS_ERROR)
4897     xml_failure(g_parser);
4898   CharData_CheckXMLChars(&storage, expected);
4899 }
4900 END_TEST
4901 
4902 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)4903 START_TEST(test_bad_attr_desc_keyword) {
4904   const char *text = "<!DOCTYPE doc [\n"
4905                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4906                      "]>\n"
4907                      "<doc />";
4908 
4909   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4910                  "Bad keyword !IMPLIED not faulted");
4911 }
4912 END_TEST
4913 
4914 /* Test that an invalid attribute description keyword consisting of
4915  * UTF-16 characters with their top bytes non-zero are correctly
4916  * faulted
4917  */
START_TEST(test_bad_attr_desc_keyword_utf16)4918 START_TEST(test_bad_attr_desc_keyword_utf16) {
4919   /* <!DOCTYPE d [
4920    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4921    * ]><d/>
4922    *
4923    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4924    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4925    */
4926   const char text[]
4927       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4928         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4929         "\0#\x0e\x04\x0e\x08\0>\0\n"
4930         "\0]\0>\0<\0d\0/\0>";
4931 
4932   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4933       != XML_STATUS_ERROR)
4934     fail("Invalid UTF16 attribute keyword not faulted");
4935   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4936     xml_failure(g_parser);
4937 }
4938 END_TEST
4939 
4940 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4941  * using prefix-encoding (see above) to trigger specific code paths
4942  */
START_TEST(test_bad_doctype)4943 START_TEST(test_bad_doctype) {
4944   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4945                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4946 
4947   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4948   expect_failure(text, XML_ERROR_SYNTAX,
4949                  "Invalid bytes in DOCTYPE not faulted");
4950 }
4951 END_TEST
4952 
START_TEST(test_bad_doctype_utf8)4953 START_TEST(test_bad_doctype_utf8) {
4954   const char *text = "<!DOCTYPE \xDB\x25"
4955                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
4956   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4957                  "Invalid UTF-8 in DOCTYPE not faulted");
4958 }
4959 END_TEST
4960 
START_TEST(test_bad_doctype_utf16)4961 START_TEST(test_bad_doctype_utf16) {
4962   const char text[] =
4963       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4964        *
4965        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4966        * (name character) but not a valid letter (name start character)
4967        */
4968       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4969       "\x06\xf2"
4970       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4971 
4972   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4973       != XML_STATUS_ERROR)
4974     fail("Invalid bytes in DOCTYPE not faulted");
4975   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4976     xml_failure(g_parser);
4977 }
4978 END_TEST
4979 
START_TEST(test_bad_doctype_plus)4980 START_TEST(test_bad_doctype_plus) {
4981   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4982                      "<1+>&foo;</1+>";
4983 
4984   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4985                  "'+' in document name not faulted");
4986 }
4987 END_TEST
4988 
START_TEST(test_bad_doctype_star)4989 START_TEST(test_bad_doctype_star) {
4990   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4991                      "<1*>&foo;</1*>";
4992 
4993   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4994                  "'*' in document name not faulted");
4995 }
4996 END_TEST
4997 
START_TEST(test_bad_doctype_query)4998 START_TEST(test_bad_doctype_query) {
4999   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5000                      "<1?>&foo;</1?>";
5001 
5002   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5003                  "'?' in document name not faulted");
5004 }
5005 END_TEST
5006 
START_TEST(test_unknown_encoding_bad_ignore)5007 START_TEST(test_unknown_encoding_bad_ignore) {
5008   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5009                      "<!DOCTYPE doc SYSTEM 'foo'>"
5010                      "<doc><e>&entity;</e></doc>";
5011   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5012                      "Invalid character not faulted", XCS("prefix-conv"),
5013                      XML_ERROR_INVALID_TOKEN};
5014 
5015   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5016   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5017   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5018   XML_SetUserData(g_parser, &fault);
5019   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5020                  "Bad IGNORE section with unknown encoding not failed");
5021 }
5022 END_TEST
5023 
START_TEST(test_entity_in_utf16_be_attr)5024 START_TEST(test_entity_in_utf16_be_attr) {
5025   const char text[] =
5026       /* <e a='&#228; &#x00E4;'></e> */
5027       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5028       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5029 #ifdef XML_UNICODE
5030   const XML_Char *expected = XCS("\x00e4 \x00e4");
5031 #else
5032   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5033 #endif
5034   CharData storage;
5035 
5036   CharData_Init(&storage);
5037   XML_SetUserData(g_parser, &storage);
5038   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5039   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5040       == XML_STATUS_ERROR)
5041     xml_failure(g_parser);
5042   CharData_CheckXMLChars(&storage, expected);
5043 }
5044 END_TEST
5045 
START_TEST(test_entity_in_utf16_le_attr)5046 START_TEST(test_entity_in_utf16_le_attr) {
5047   const char text[] =
5048       /* <e a='&#228; &#x00E4;'></e> */
5049       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5050       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5051 #ifdef XML_UNICODE
5052   const XML_Char *expected = XCS("\x00e4 \x00e4");
5053 #else
5054   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5055 #endif
5056   CharData storage;
5057 
5058   CharData_Init(&storage);
5059   XML_SetUserData(g_parser, &storage);
5060   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5061   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5062       == XML_STATUS_ERROR)
5063     xml_failure(g_parser);
5064   CharData_CheckXMLChars(&storage, expected);
5065 }
5066 END_TEST
5067 
START_TEST(test_entity_public_utf16_be)5068 START_TEST(test_entity_public_utf16_be) {
5069   const char text[] =
5070       /* <!DOCTYPE d [ */
5071       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5072       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5073       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5074       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5075       /* %e; */
5076       "\0%\0e\0;\0\n"
5077       /* ]> */
5078       "\0]\0>\0\n"
5079       /* <d>&j;</d> */
5080       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5081   ExtTest2 test_data
5082       = {/* <!ENTITY j 'baz'> */
5083          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5084   const XML_Char *expected = XCS("baz");
5085   CharData storage;
5086 
5087   CharData_Init(&storage);
5088   test_data.storage = &storage;
5089   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5090   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5091   XML_SetUserData(g_parser, &test_data);
5092   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5093   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5094       == XML_STATUS_ERROR)
5095     xml_failure(g_parser);
5096   CharData_CheckXMLChars(&storage, expected);
5097 }
5098 END_TEST
5099 
START_TEST(test_entity_public_utf16_le)5100 START_TEST(test_entity_public_utf16_le) {
5101   const char text[] =
5102       /* <!DOCTYPE d [ */
5103       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5104       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5105       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5106       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5107       /* %e; */
5108       "%\0e\0;\0\n\0"
5109       /* ]> */
5110       "]\0>\0\n\0"
5111       /* <d>&j;</d> */
5112       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5113   ExtTest2 test_data
5114       = {/* <!ENTITY j 'baz'> */
5115          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5116   const XML_Char *expected = XCS("baz");
5117   CharData storage;
5118 
5119   CharData_Init(&storage);
5120   test_data.storage = &storage;
5121   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5122   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5123   XML_SetUserData(g_parser, &test_data);
5124   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5125   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5126       == XML_STATUS_ERROR)
5127     xml_failure(g_parser);
5128   CharData_CheckXMLChars(&storage, expected);
5129 }
5130 END_TEST
5131 
5132 /* Test that a doctype with neither an internal nor external subset is
5133  * faulted
5134  */
START_TEST(test_short_doctype)5135 START_TEST(test_short_doctype) {
5136   const char *text = "<!DOCTYPE doc></doc>";
5137   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5138                  "DOCTYPE without subset not rejected");
5139 }
5140 END_TEST
5141 
START_TEST(test_short_doctype_2)5142 START_TEST(test_short_doctype_2) {
5143   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5144   expect_failure(text, XML_ERROR_SYNTAX,
5145                  "DOCTYPE without Public ID not rejected");
5146 }
5147 END_TEST
5148 
START_TEST(test_short_doctype_3)5149 START_TEST(test_short_doctype_3) {
5150   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5151   expect_failure(text, XML_ERROR_SYNTAX,
5152                  "DOCTYPE without System ID not rejected");
5153 }
5154 END_TEST
5155 
START_TEST(test_long_doctype)5156 START_TEST(test_long_doctype) {
5157   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5158   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5159 }
5160 END_TEST
5161 
START_TEST(test_bad_entity)5162 START_TEST(test_bad_entity) {
5163   const char *text = "<!DOCTYPE doc [\n"
5164                      "  <!ENTITY foo PUBLIC>\n"
5165                      "]>\n"
5166                      "<doc/>";
5167   expect_failure(text, XML_ERROR_SYNTAX,
5168                  "ENTITY without Public ID is not rejected");
5169 }
5170 END_TEST
5171 
5172 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5173 START_TEST(test_bad_entity_2) {
5174   const char *text = "<!DOCTYPE doc [\n"
5175                      "  <!ENTITY % foo bar>\n"
5176                      "]>\n"
5177                      "<doc/>";
5178   expect_failure(text, XML_ERROR_SYNTAX,
5179                  "ENTITY without Public ID is not rejected");
5180 }
5181 END_TEST
5182 
START_TEST(test_bad_entity_3)5183 START_TEST(test_bad_entity_3) {
5184   const char *text = "<!DOCTYPE doc [\n"
5185                      "  <!ENTITY % foo PUBLIC>\n"
5186                      "]>\n"
5187                      "<doc/>";
5188   expect_failure(text, XML_ERROR_SYNTAX,
5189                  "Parameter ENTITY without Public ID is not rejected");
5190 }
5191 END_TEST
5192 
START_TEST(test_bad_entity_4)5193 START_TEST(test_bad_entity_4) {
5194   const char *text = "<!DOCTYPE doc [\n"
5195                      "  <!ENTITY % foo SYSTEM>\n"
5196                      "]>\n"
5197                      "<doc/>";
5198   expect_failure(text, XML_ERROR_SYNTAX,
5199                  "Parameter ENTITY without Public ID is not rejected");
5200 }
5201 END_TEST
5202 
START_TEST(test_bad_notation)5203 START_TEST(test_bad_notation) {
5204   const char *text = "<!DOCTYPE doc [\n"
5205                      "  <!NOTATION n SYSTEM>\n"
5206                      "]>\n"
5207                      "<doc/>";
5208   expect_failure(text, XML_ERROR_SYNTAX,
5209                  "Notation without System ID is not rejected");
5210 }
5211 END_TEST
5212 
5213 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5214 START_TEST(test_default_doctype_handler) {
5215   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5216                      "  <!ENTITY foo 'bar'>\n"
5217                      "]>\n"
5218                      "<doc>&foo;</doc>";
5219   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5220                               {XCS("'test.dtd'"), 10, XML_FALSE},
5221                               {NULL, 0, XML_FALSE}};
5222   int i;
5223 
5224   XML_SetUserData(g_parser, &test_data);
5225   XML_SetDefaultHandler(g_parser, checking_default_handler);
5226   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5227   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5228       == XML_STATUS_ERROR)
5229     xml_failure(g_parser);
5230   for (i = 0; test_data[i].expected != NULL; i++)
5231     if (! test_data[i].seen)
5232       fail("Default handler not run for public !DOCTYPE");
5233 }
5234 END_TEST
5235 
START_TEST(test_empty_element_abort)5236 START_TEST(test_empty_element_abort) {
5237   const char *text = "<abort/>";
5238 
5239   XML_SetStartElementHandler(g_parser, start_element_suspender);
5240   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5241       != XML_STATUS_ERROR)
5242     fail("Expected to error on abort");
5243 }
5244 END_TEST
5245 
5246 /* Regression test for GH issue #612: unfinished m_declAttributeType
5247  * allocation in ->m_tempPool can corrupt following allocation.
5248  */
START_TEST(test_pool_integrity_with_unfinished_attr)5249 START_TEST(test_pool_integrity_with_unfinished_attr) {
5250   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5251                      "<!DOCTYPE foo [\n"
5252                      "<!ELEMENT foo ANY>\n"
5253                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5254                      "%entp;\n"
5255                      "]>\n"
5256                      "<a></a>\n";
5257   const XML_Char *expected = XCS("COMMENT");
5258   CharData storage;
5259 
5260   CharData_Init(&storage);
5261   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5262   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5263   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5264   XML_SetCommentHandler(g_parser, accumulate_comment);
5265   XML_SetUserData(g_parser, &storage);
5266   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5267       == XML_STATUS_ERROR)
5268     xml_failure(g_parser);
5269   CharData_CheckXMLChars(&storage, expected);
5270 }
5271 END_TEST
5272 
5273 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5274 START_TEST(test_entity_ref_no_elements) {
5275   const char *const text = "<!DOCTYPE foo [\n"
5276                            "<!ENTITY e1 \"test\">\n"
5277                            "]> <foo>&e1;"; // intentionally missing newline
5278 
5279   XML_Parser parser = XML_ParserCreate(NULL);
5280   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5281               == XML_STATUS_ERROR);
5282   assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5283   XML_ParserFree(parser);
5284 }
5285 END_TEST
5286 
5287 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5288 START_TEST(test_deep_nested_entity) {
5289   const size_t N_LINES = 60000;
5290   const size_t SIZE_PER_LINE = 50;
5291 
5292   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5293   if (text == NULL) {
5294     fail("malloc failed");
5295   }
5296 
5297   char *textPtr = text;
5298 
5299   // Create the XML
5300   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5301                       "<!DOCTYPE foo [\n"
5302                       "	<!ENTITY s0 'deepText'>\n");
5303 
5304   for (size_t i = 1; i < N_LINES; ++i) {
5305     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5306                         (long unsigned)i, (long unsigned)(i - 1));
5307   }
5308 
5309   snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5310            (long unsigned)(N_LINES - 1));
5311 
5312   const XML_Char *const expected = XCS("deepText");
5313 
5314   CharData storage;
5315   CharData_Init(&storage);
5316 
5317   XML_Parser parser = XML_ParserCreate(NULL);
5318 
5319   XML_SetCharacterDataHandler(parser, accumulate_characters);
5320   XML_SetUserData(parser, &storage);
5321 
5322   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5323       == XML_STATUS_ERROR)
5324     xml_failure(parser);
5325 
5326   CharData_CheckXMLChars(&storage, expected);
5327   XML_ParserFree(parser);
5328   free(text);
5329 }
5330 END_TEST
5331 
5332 /* Tests if chained entity references in attributes
5333 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5334 START_TEST(test_deep_nested_attribute_entity) {
5335   const size_t N_LINES = 60000;
5336   const size_t SIZE_PER_LINE = 100;
5337 
5338   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5339   if (text == NULL) {
5340     fail("malloc failed");
5341   }
5342 
5343   char *textPtr = text;
5344 
5345   // Create the XML
5346   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5347                       "<!DOCTYPE foo [\n"
5348                       "	<!ENTITY s0 'deepText'>\n");
5349 
5350   for (size_t i = 1; i < N_LINES; ++i) {
5351     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5352                         (long unsigned)i, (long unsigned)(i - 1));
5353   }
5354 
5355   snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5356            (long unsigned)(N_LINES - 1));
5357 
5358   AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5359   ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
5360   info[0].attributes = doc_info;
5361 
5362   XML_Parser parser = XML_ParserCreate(NULL);
5363   ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5364 
5365   XML_SetStartElementHandler(parser, counting_start_element_handler);
5366   XML_SetUserData(parser, &parserPlusElemenInfo);
5367 
5368   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5369       == XML_STATUS_ERROR)
5370     xml_failure(parser);
5371 
5372   XML_ParserFree(parser);
5373   free(text);
5374 }
5375 END_TEST
5376 
START_TEST(test_deep_nested_entity_delayed_interpretation)5377 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5378   const size_t N_LINES = 70000;
5379   const size_t SIZE_PER_LINE = 100;
5380 
5381   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5382   if (text == NULL) {
5383     fail("malloc failed");
5384   }
5385 
5386   char *textPtr = text;
5387 
5388   // Create the XML
5389   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5390                       "<!DOCTYPE foo [\n"
5391                       "	<!ENTITY %% s0 'deepText'>\n");
5392 
5393   for (size_t i = 1; i < N_LINES; ++i) {
5394     textPtr += snprintf(textPtr, SIZE_PER_LINE,
5395                         "  <!ENTITY %% s%lu '&#37;s%lu;'>\n", (long unsigned)i,
5396                         (long unsigned)(i - 1));
5397   }
5398 
5399   snprintf(textPtr, SIZE_PER_LINE,
5400            "  <!ENTITY %% define_g \"<!ENTITY g '&#37;s%lu;'>\">\n"
5401            "  %%define_g;\n"
5402            "]>\n"
5403            "<foo/>\n",
5404            (long unsigned)(N_LINES - 1));
5405 
5406   XML_Parser parser = XML_ParserCreate(NULL);
5407 
5408   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5409   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5410       == XML_STATUS_ERROR)
5411     xml_failure(parser);
5412 
5413   XML_ParserFree(parser);
5414   free(text);
5415 }
5416 END_TEST
5417 
START_TEST(test_nested_entity_suspend)5418 START_TEST(test_nested_entity_suspend) {
5419   const char *const text = "<!DOCTYPE a [\n"
5420                            "  <!ENTITY e1 '<!--e1-->'>\n"
5421                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5422                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5423                            "]>\n"
5424                            "<a><!--start-->&e3;<!--end--></a>";
5425   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5426       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5427   CharData storage;
5428   CharData_Init(&storage);
5429   XML_Parser parser = XML_ParserCreate(NULL);
5430   ParserPlusStorage parserPlusStorage = {parser, &storage};
5431 
5432   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5433   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5434   XML_SetUserData(parser, &parserPlusStorage);
5435 
5436   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5437   while (status == XML_STATUS_SUSPENDED) {
5438     status = XML_ResumeParser(parser);
5439   }
5440   if (status != XML_STATUS_OK)
5441     xml_failure(parser);
5442 
5443   CharData_CheckXMLChars(&storage, expected);
5444   XML_ParserFree(parser);
5445 }
5446 END_TEST
5447 
START_TEST(test_nested_entity_suspend_2)5448 START_TEST(test_nested_entity_suspend_2) {
5449   const char *const text = "<!DOCTYPE doc [\n"
5450                            "  <!ENTITY ge1 'head1Ztail1'>\n"
5451                            "  <!ENTITY ge2 'head2&ge1;tail2'>\n"
5452                            "  <!ENTITY ge3 'head3&ge2;tail3'>\n"
5453                            "]>\n"
5454                            "<doc>&ge3;</doc>";
5455   const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5456       XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5457   CharData storage;
5458   CharData_Init(&storage);
5459   XML_Parser parser = XML_ParserCreate(NULL);
5460   ParserPlusStorage parserPlusStorage = {parser, &storage};
5461 
5462   XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5463   XML_SetUserData(parser, &parserPlusStorage);
5464 
5465   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5466   while (status == XML_STATUS_SUSPENDED) {
5467     status = XML_ResumeParser(parser);
5468   }
5469   if (status != XML_STATUS_OK)
5470     xml_failure(parser);
5471 
5472   CharData_CheckXMLChars(&storage, expected);
5473   XML_ParserFree(parser);
5474 }
5475 END_TEST
5476 
5477 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5478 START_TEST(test_big_tokens_scale_linearly) {
5479   const struct {
5480     const char *pre;
5481     const char *post;
5482   } text[] = {
5483       {"<a>", "</a>"},                      // assumed good, used as baseline
5484       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5485       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5486       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5487       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5488   };
5489   const int num_cases = sizeof(text) / sizeof(text[0]);
5490   char aaaaaa[4096];
5491   const int fillsize = (int)sizeof(aaaaaa);
5492   const int fillcount = 100;
5493   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5494   const unsigned max_factor = 4;
5495   const unsigned max_scanned = max_factor * approx_bytes;
5496 
5497   memset(aaaaaa, 'a', fillsize);
5498 
5499   if (! g_reparseDeferralEnabledDefault) {
5500     return; // heuristic is disabled; we would get O(n^2) and fail.
5501   }
5502 
5503   for (int i = 0; i < num_cases; ++i) {
5504     XML_Parser parser = XML_ParserCreate(NULL);
5505     assert_true(parser != NULL);
5506     enum XML_Status status;
5507     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5508 
5509     // parse the start text
5510     g_bytesScanned = 0;
5511     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5512                                      (int)strlen(text[i].pre), XML_FALSE);
5513     if (status != XML_STATUS_OK) {
5514       xml_failure(parser);
5515     }
5516 
5517     // parse lots of 'a', failing the test early if it takes too long
5518     unsigned past_max_count = 0;
5519     for (int f = 0; f < fillcount; ++f) {
5520       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5521       if (status != XML_STATUS_OK) {
5522         xml_failure(parser);
5523       }
5524       if (g_bytesScanned > max_scanned) {
5525         // We're not done, and have already passed the limit -- the test will
5526         // definitely fail. This block allows us to save time by failing early.
5527         const unsigned pushed
5528             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5529         fprintf(
5530             stderr,
5531             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5532             f + 1, fillcount, pushed, g_bytesScanned,
5533             g_bytesScanned / (double)pushed, max_scanned, max_factor);
5534         past_max_count++;
5535         // We are failing, but allow a few log prints first. If we don't reach
5536         // a count of five, the test will fail after the loop instead.
5537         assert_true(past_max_count < 5);
5538       }
5539     }
5540 
5541     // parse the end text
5542     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5543                                      (int)strlen(text[i].post), XML_TRUE);
5544     if (status != XML_STATUS_OK) {
5545       xml_failure(parser);
5546     }
5547 
5548     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5549     if (g_bytesScanned > max_scanned) {
5550       fprintf(
5551           stderr,
5552           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5553           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5554           max_factor);
5555       fail("scanned too many bytes");
5556     }
5557 
5558     XML_ParserFree(parser);
5559   }
5560 }
5561 END_TEST
5562 
START_TEST(test_set_reparse_deferral)5563 START_TEST(test_set_reparse_deferral) {
5564   const char *const pre = "<d>";
5565   const char *const start = "<x attr='";
5566   const char *const end = "'></x>";
5567   char eeeeee[100];
5568   const int fillsize = (int)sizeof(eeeeee);
5569   memset(eeeeee, 'e', fillsize);
5570 
5571   for (int enabled = 0; enabled <= 1; enabled += 1) {
5572     set_subtest("deferral=%d", enabled);
5573 
5574     XML_Parser parser = XML_ParserCreate(NULL);
5575     assert_true(parser != NULL);
5576     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5577     // pre-grow the buffer to avoid reparsing due to almost-fullness
5578     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5579 
5580     CharData storage;
5581     CharData_Init(&storage);
5582     XML_SetUserData(parser, &storage);
5583     XML_SetStartElementHandler(parser, start_element_event_handler);
5584 
5585     enum XML_Status status;
5586     // parse the start text
5587     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5588     if (status != XML_STATUS_OK) {
5589       xml_failure(parser);
5590     }
5591     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5592 
5593     // ..and the start of the token
5594     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5595     if (status != XML_STATUS_OK) {
5596       xml_failure(parser);
5597     }
5598     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5599 
5600     // try to parse lots of 'e', but the token isn't finished
5601     for (int c = 0; c < 100; ++c) {
5602       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5603       if (status != XML_STATUS_OK) {
5604         xml_failure(parser);
5605       }
5606     }
5607     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5608 
5609     // end the <x> token.
5610     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5611     if (status != XML_STATUS_OK) {
5612       xml_failure(parser);
5613     }
5614 
5615     if (enabled) {
5616       // In general, we may need to push more data to trigger a reparse attempt,
5617       // but in this test, the data is constructed to always require it.
5618       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5619       // 2x the token length should suffice; the +1 covers the start and end.
5620       for (int c = 0; c < 101; ++c) {
5621         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5622         if (status != XML_STATUS_OK) {
5623           xml_failure(parser);
5624         }
5625       }
5626     }
5627     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5628 
5629     XML_ParserFree(parser);
5630   }
5631 }
5632 END_TEST
5633 
5634 struct element_decl_data {
5635   XML_Parser parser;
5636   int count;
5637 };
5638 
5639 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5640 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5641   UNUSED_P(name);
5642   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5643   testdata->count += 1;
5644   XML_FreeContentModel(testdata->parser, model);
5645 }
5646 
5647 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5648 external_inherited_parser(XML_Parser p, const XML_Char *context,
5649                           const XML_Char *base, const XML_Char *systemId,
5650                           const XML_Char *publicId) {
5651   UNUSED_P(base);
5652   UNUSED_P(systemId);
5653   UNUSED_P(publicId);
5654   const char *const pre = "<!ELEMENT document ANY>\n";
5655   const char *const start = "<!ELEMENT ";
5656   const char *const end = " ANY>\n";
5657   const char *const post = "<!ELEMENT xyz ANY>\n";
5658   const int enabled = *(int *)XML_GetUserData(p);
5659   char eeeeee[100];
5660   char spaces[100];
5661   const int fillsize = (int)sizeof(eeeeee);
5662   assert_true(fillsize == (int)sizeof(spaces));
5663   memset(eeeeee, 'e', fillsize);
5664   memset(spaces, ' ', fillsize);
5665 
5666   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5667   assert_true(parser != NULL);
5668   // pre-grow the buffer to avoid reparsing due to almost-fullness
5669   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5670 
5671   struct element_decl_data testdata;
5672   testdata.parser = parser;
5673   testdata.count = 0;
5674   XML_SetUserData(parser, &testdata);
5675   XML_SetElementDeclHandler(parser, element_decl_counter);
5676 
5677   enum XML_Status status;
5678   // parse the initial text
5679   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5680   if (status != XML_STATUS_OK) {
5681     xml_failure(parser);
5682   }
5683   assert_true(testdata.count == 1); // first element should be done
5684 
5685   // ..and the start of the big token
5686   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5687   if (status != XML_STATUS_OK) {
5688     xml_failure(parser);
5689   }
5690   assert_true(testdata.count == 1); // still just the first one
5691 
5692   // try to parse lots of 'e', but the token isn't finished
5693   for (int c = 0; c < 100; ++c) {
5694     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5695     if (status != XML_STATUS_OK) {
5696       xml_failure(parser);
5697     }
5698   }
5699   assert_true(testdata.count == 1); // *still* just the first one
5700 
5701   // end the big token.
5702   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5703   if (status != XML_STATUS_OK) {
5704     xml_failure(parser);
5705   }
5706 
5707   if (enabled) {
5708     // In general, we may need to push more data to trigger a reparse attempt,
5709     // but in this test, the data is constructed to always require it.
5710     assert_true(testdata.count == 1); // or the test is incorrect
5711     // 2x the token length should suffice; the +1 covers the start and end.
5712     for (int c = 0; c < 101; ++c) {
5713       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5714       if (status != XML_STATUS_OK) {
5715         xml_failure(parser);
5716       }
5717     }
5718   }
5719   assert_true(testdata.count == 2); // the big token should be done
5720 
5721   // parse the final text
5722   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5723   if (status != XML_STATUS_OK) {
5724     xml_failure(parser);
5725   }
5726   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5727 
5728   XML_ParserFree(parser);
5729   return XML_STATUS_OK;
5730 }
5731 
START_TEST(test_reparse_deferral_is_inherited)5732 START_TEST(test_reparse_deferral_is_inherited) {
5733   const char *const text
5734       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5735   for (int enabled = 0; enabled <= 1; ++enabled) {
5736     set_subtest("deferral=%d", enabled);
5737 
5738     XML_Parser parser = XML_ParserCreate(NULL);
5739     assert_true(parser != NULL);
5740     XML_SetUserData(parser, (void *)&enabled);
5741     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5742     // this handler creates a sub-parser and checks that its deferral behavior
5743     // is what we expected, based on the value of `enabled` (in userdata).
5744     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5745     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5746     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5747       xml_failure(parser);
5748 
5749     XML_ParserFree(parser);
5750   }
5751 }
5752 END_TEST
5753 
START_TEST(test_set_reparse_deferral_on_null_parser)5754 START_TEST(test_set_reparse_deferral_on_null_parser) {
5755   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5756   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5757   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5758   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5759   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5760               == XML_FALSE);
5761   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5762               == XML_FALSE);
5763 }
5764 END_TEST
5765 
START_TEST(test_set_reparse_deferral_on_the_fly)5766 START_TEST(test_set_reparse_deferral_on_the_fly) {
5767   const char *const pre = "<d><x attr='";
5768   const char *const end = "'></x>";
5769   char iiiiii[100];
5770   const int fillsize = (int)sizeof(iiiiii);
5771   memset(iiiiii, 'i', fillsize);
5772 
5773   XML_Parser parser = XML_ParserCreate(NULL);
5774   assert_true(parser != NULL);
5775   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5776 
5777   CharData storage;
5778   CharData_Init(&storage);
5779   XML_SetUserData(parser, &storage);
5780   XML_SetStartElementHandler(parser, start_element_event_handler);
5781 
5782   enum XML_Status status;
5783   // parse the start text
5784   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5785   if (status != XML_STATUS_OK) {
5786     xml_failure(parser);
5787   }
5788   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5789 
5790   // try to parse some 'i', but the token isn't finished
5791   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5792   if (status != XML_STATUS_OK) {
5793     xml_failure(parser);
5794   }
5795   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5796 
5797   // end the <x> token.
5798   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5799   if (status != XML_STATUS_OK) {
5800     xml_failure(parser);
5801   }
5802   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5803 
5804   // now change the heuristic setting and add *no* data
5805   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5806   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5807   status = XML_Parse(parser, "", 0, XML_FALSE);
5808   if (status != XML_STATUS_OK) {
5809     xml_failure(parser);
5810   }
5811   CharData_CheckXMLChars(&storage, XCS("dx"));
5812 
5813   XML_ParserFree(parser);
5814 }
5815 END_TEST
5816 
START_TEST(test_set_bad_reparse_option)5817 START_TEST(test_set_bad_reparse_option) {
5818   XML_Parser parser = XML_ParserCreate(NULL);
5819   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5820   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5821   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5822   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5823   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5824   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5825   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5826   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5827   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5828   XML_ParserFree(parser);
5829 }
5830 END_TEST
5831 
5832 static size_t g_totalAlloc = 0;
5833 static size_t g_biggestAlloc = 0;
5834 
5835 static void *
counting_realloc(void * ptr,size_t size)5836 counting_realloc(void *ptr, size_t size) {
5837   g_totalAlloc += size;
5838   if (size > g_biggestAlloc) {
5839     g_biggestAlloc = size;
5840   }
5841   return realloc(ptr, size);
5842 }
5843 
5844 static void *
counting_malloc(size_t size)5845 counting_malloc(size_t size) {
5846   return counting_realloc(NULL, size);
5847 }
5848 
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5849 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5850   if (g_chunkSize != 0) {
5851     // this test does not use SINGLE_BYTES, because it depends on very precise
5852     // buffer fills.
5853     return;
5854   }
5855   if (! g_reparseDeferralEnabledDefault) {
5856     return; // this test is irrelevant when the deferral heuristic is disabled.
5857   }
5858 
5859   const int document_length = 65536;
5860   char *const document = (char *)malloc(document_length);
5861 
5862   const XML_Memory_Handling_Suite memfuncs = {
5863       counting_malloc,
5864       counting_realloc,
5865       free,
5866   };
5867 
5868   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5869   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5870   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5871 
5872   for (const int *leading = leading_list; *leading >= 0; leading++) {
5873     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5874       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5875         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5876                     *fillsize);
5877         // start by checking that the test looks reasonably valid
5878         assert_true(*leading + *bigtoken <= document_length);
5879 
5880         // put 'x' everywhere; some will be overwritten by elements.
5881         memset(document, 'x', document_length);
5882         // maybe add an initial tag
5883         if (*leading) {
5884           assert_true(*leading >= 3); // or the test case is invalid
5885           memcpy(document, "<a>", 3);
5886         }
5887         // add the large token
5888         document[*leading + 0] = '<';
5889         document[*leading + 1] = 'b';
5890         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5891         document[*leading + *bigtoken - 1] = '>';
5892 
5893         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5894         const int expected_elem_total = 1 + (*leading ? 1 : 0);
5895 
5896         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5897         assert_true(parser != NULL);
5898 
5899         CharData storage;
5900         CharData_Init(&storage);
5901         XML_SetUserData(parser, &storage);
5902         XML_SetStartElementHandler(parser, start_element_event_handler);
5903 
5904         g_biggestAlloc = 0;
5905         g_totalAlloc = 0;
5906         int offset = 0;
5907         // fill data until the big token is covered (but not necessarily parsed)
5908         while (offset < *leading + *bigtoken) {
5909           assert_true(offset + *fillsize <= document_length);
5910           const enum XML_Status status
5911               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5912           if (status != XML_STATUS_OK) {
5913             xml_failure(parser);
5914           }
5915           offset += *fillsize;
5916         }
5917         // Now, check that we've had a buffer allocation that could fit the
5918         // context bytes and our big token. In order to detect a special case,
5919         // we need to know how many bytes of our big token were included in the
5920         // first push that contained _any_ bytes of the big token:
5921         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5922         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5923           // Special case: we aren't saving any context, and the whole big token
5924           // was covered by a single fill, so Expat may have parsed directly
5925           // from our input pointer, without allocating an internal buffer.
5926         } else if (*leading < XML_CONTEXT_BYTES) {
5927           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5928         } else {
5929           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5930         }
5931         // fill data until the big token is actually parsed
5932         while (storage.count < expected_elem_total) {
5933           const size_t alloc_before = g_totalAlloc;
5934           assert_true(offset + *fillsize <= document_length);
5935           const enum XML_Status status
5936               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5937           if (status != XML_STATUS_OK) {
5938             xml_failure(parser);
5939           }
5940           offset += *fillsize;
5941           // since all the bytes of the big token are already in the buffer,
5942           // the bufsize ceiling should make us finish its parsing without any
5943           // further buffer allocations. We assume that there will be no other
5944           // large allocations in this test.
5945           assert_true(g_totalAlloc - alloc_before < 4096);
5946         }
5947         // test-the-test: was our alloc even called?
5948         assert_true(g_totalAlloc > 0);
5949         // test-the-test: there shouldn't be any extra start elements
5950         assert_true(storage.count == expected_elem_total);
5951 
5952         XML_ParserFree(parser);
5953       }
5954     }
5955   }
5956   free(document);
5957 }
5958 END_TEST
5959 
START_TEST(test_varying_buffer_fills)5960 START_TEST(test_varying_buffer_fills) {
5961   const int KiB = 1024;
5962   const int MiB = 1024 * KiB;
5963   const int document_length = 16 * MiB;
5964   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5965 
5966   if (g_chunkSize != 0) {
5967     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5968   }
5969 
5970   char *const document = (char *)malloc(document_length);
5971   assert_true(document != NULL);
5972   memset(document, 'x', document_length);
5973   document[0] = '<';
5974   document[1] = 't';
5975   memset(&document[2], ' ', big - 2); // a very spacy token
5976   document[big - 1] = '>';
5977 
5978   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5979   // When reparse deferral is enabled, the final (negated) value is the expected
5980   // maximum number of bytes scanned in parse attempts.
5981   const int testcases[][30] = {
5982       {8 * MiB, -8 * MiB},
5983       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5984       // zero-size fills shouldn't trigger the bypass
5985       {4 * MiB, 0, 4 * MiB, -12 * MiB},
5986       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5987       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5988       // try to hit the buffer ceiling only once (at the end)
5989       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5990       // try to hit the same buffer ceiling multiple times
5991       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5992 
5993       // try to hit every ceiling, by always landing 1K shy of the buffer size
5994       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5995        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5996 
5997       // try to avoid every ceiling, by always landing 1B past the buffer size
5998       // the normal 2x heuristic threshold still forces parse attempts.
5999       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6000        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6001        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6002        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6003        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6004        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6005        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6006        -(10 * MiB + 682 * KiB + 7)},
6007       // try to avoid every ceiling again, except on our last fill.
6008       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6009        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6010        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6011        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6012        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6013        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6014        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6015        -(10 * MiB + 682 * KiB + 6)},
6016 
6017       // try to hit ceilings on the way multiple times
6018       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6019        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6020        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
6021        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
6022        // we'll make a parse attempt at every parse call
6023        -(45 * MiB + 12)},
6024   };
6025   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6026   for (int test_i = 0; test_i < testcount; test_i++) {
6027     const int *fillsize = testcases[test_i];
6028     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6029                 fillsize[2], fillsize[3]);
6030     XML_Parser parser = XML_ParserCreate(NULL);
6031     assert_true(parser != NULL);
6032 
6033     CharData storage;
6034     CharData_Init(&storage);
6035     XML_SetUserData(parser, &storage);
6036     XML_SetStartElementHandler(parser, start_element_event_handler);
6037 
6038     g_bytesScanned = 0;
6039     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6040     int offset = 0;
6041     while (*fillsize >= 0) {
6042       assert_true(offset + *fillsize <= document_length); // or test is invalid
6043       const enum XML_Status status
6044           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6045       if (status != XML_STATUS_OK) {
6046         xml_failure(parser);
6047       }
6048       offset += *fillsize;
6049       fillsize++;
6050       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6051       worstcase_bytes += offset; // we might've tried to parse all pending bytes
6052     }
6053     assert_true(storage.count == 1); // the big token should've been parsed
6054     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6055     if (g_reparseDeferralEnabledDefault) {
6056       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6057       const unsigned max_bytes_scanned = -*fillsize;
6058       if (g_bytesScanned > max_bytes_scanned) {
6059         fprintf(stderr,
6060                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6061                 g_bytesScanned, max_bytes_scanned);
6062         fail("too many bytes scanned in parse attempts");
6063       }
6064     }
6065     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6066 
6067     XML_ParserFree(parser);
6068   }
6069   free(document);
6070 }
6071 END_TEST
6072 
6073 void
make_basic_test_case(Suite * s)6074 make_basic_test_case(Suite *s) {
6075   TCase *tc_basic = tcase_create("basic tests");
6076 
6077   suite_add_tcase(s, tc_basic);
6078   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6079 
6080   tcase_add_test(tc_basic, test_nul_byte);
6081   tcase_add_test(tc_basic, test_u0000_char);
6082   tcase_add_test(tc_basic, test_siphash_self);
6083   tcase_add_test(tc_basic, test_siphash_spec);
6084   tcase_add_test(tc_basic, test_bom_utf8);
6085   tcase_add_test(tc_basic, test_bom_utf16_be);
6086   tcase_add_test(tc_basic, test_bom_utf16_le);
6087   tcase_add_test(tc_basic, test_nobom_utf16_le);
6088   tcase_add_test(tc_basic, test_hash_collision);
6089   tcase_add_test(tc_basic, test_illegal_utf8);
6090   tcase_add_test(tc_basic, test_utf8_auto_align);
6091   tcase_add_test(tc_basic, test_utf16);
6092   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6093   tcase_add_test(tc_basic, test_not_utf16);
6094   tcase_add_test(tc_basic, test_bad_encoding);
6095   tcase_add_test(tc_basic, test_latin1_umlauts);
6096   tcase_add_test(tc_basic, test_long_utf8_character);
6097   tcase_add_test(tc_basic, test_long_latin1_attribute);
6098   tcase_add_test(tc_basic, test_long_ascii_attribute);
6099   /* Regression test for SF bug #491986. */
6100   tcase_add_test(tc_basic, test_danish_latin1);
6101   /* Regression test for SF bug #514281. */
6102   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6103   tcase_add_test(tc_basic, test_french_charref_decimal);
6104   tcase_add_test(tc_basic, test_french_latin1);
6105   tcase_add_test(tc_basic, test_french_utf8);
6106   tcase_add_test(tc_basic, test_utf8_false_rejection);
6107   tcase_add_test(tc_basic, test_line_number_after_parse);
6108   tcase_add_test(tc_basic, test_column_number_after_parse);
6109   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6110   tcase_add_test(tc_basic, test_line_number_after_error);
6111   tcase_add_test(tc_basic, test_column_number_after_error);
6112   tcase_add_test(tc_basic, test_really_long_lines);
6113   tcase_add_test(tc_basic, test_really_long_encoded_lines);
6114   tcase_add_test(tc_basic, test_end_element_events);
6115   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6116   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6117   tcase_add_test(tc_basic, test_xmldecl_misplaced);
6118   tcase_add_test(tc_basic, test_xmldecl_invalid);
6119   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6120   tcase_add_test(tc_basic, test_xmldecl_missing_value);
6121   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6122   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6123   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6124   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6125   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6126   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6127   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6128   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6129   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6130   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6131   tcase_add_test(tc_basic,
6132                  test_wfc_undeclared_entity_with_external_subset_standalone);
6133   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6134   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6135   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6136   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6137   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6138   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6139   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6140   tcase_add_test(tc_basic, test_dtd_attr_handling);
6141   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6142   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6143   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6144   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6145   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6146   tcase_add_test(tc_basic, test_good_cdata_ascii);
6147   tcase_add_test(tc_basic, test_good_cdata_utf16);
6148   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6149   tcase_add_test(tc_basic, test_long_cdata_utf16);
6150   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6151   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6152   tcase_add_test(tc_basic, test_bad_cdata);
6153   tcase_add_test(tc_basic, test_bad_cdata_utf16);
6154   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6155   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6156   tcase_add_test(tc_basic, test_memory_allocation);
6157   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6158   tcase_add_test(tc_basic, test_dtd_elements);
6159   tcase_add_test(tc_basic, test_dtd_elements_nesting);
6160   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6161   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6162   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6163   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6164   tcase_add_test__ifdef_xml_dtd(tc_basic,
6165                                 test_foreign_dtd_without_external_subset);
6166   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6167   tcase_add_test(tc_basic, test_set_base);
6168   tcase_add_test(tc_basic, test_attributes);
6169   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6170   tcase_add_test(tc_basic, test_resume_invalid_parse);
6171   tcase_add_test(tc_basic, test_resume_resuspended);
6172   tcase_add_test(tc_basic, test_cdata_default);
6173   tcase_add_test(tc_basic, test_subordinate_reset);
6174   tcase_add_test(tc_basic, test_subordinate_suspend);
6175   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6176   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6177   tcase_add_test__ifdef_xml_dtd(tc_basic,
6178                                 test_ext_entity_invalid_suspended_parse);
6179   tcase_add_test(tc_basic, test_explicit_encoding);
6180   tcase_add_test(tc_basic, test_trailing_cr);
6181   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6182   tcase_add_test(tc_basic, test_trailing_rsqb);
6183   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6184   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6185   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6186   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6187   tcase_add_test(tc_basic, test_empty_parse);
6188   tcase_add_test(tc_basic, test_negative_len_parse);
6189   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6190   tcase_add_test(tc_basic, test_get_buffer_1);
6191   tcase_add_test(tc_basic, test_get_buffer_2);
6192 #if XML_CONTEXT_BYTES > 0
6193   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6194 #endif
6195   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6196   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6197   tcase_add_test(tc_basic, test_byte_info_at_end);
6198   tcase_add_test(tc_basic, test_byte_info_at_error);
6199   tcase_add_test(tc_basic, test_byte_info_at_cdata);
6200   tcase_add_test(tc_basic, test_predefined_entities);
6201   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6202   tcase_add_test(tc_basic, test_not_predefined_entities);
6203   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6204   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6205   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6206   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6207   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6208   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6209   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6210   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6211   tcase_add_test(tc_basic, test_bad_public_doctype);
6212   tcase_add_test(tc_basic, test_attribute_enum_value);
6213   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6214   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6215   tcase_add_test(tc_basic, test_public_notation_no_sysid);
6216   tcase_add_test(tc_basic, test_nested_groups);
6217   tcase_add_test(tc_basic, test_group_choice);
6218   tcase_add_test(tc_basic, test_standalone_parameter_entity);
6219   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6220   tcase_add_test__ifdef_xml_dtd(tc_basic,
6221                                 test_recursive_external_parameter_entity);
6222   tcase_add_test__ifdef_xml_dtd(tc_basic,
6223                                 test_recursive_external_parameter_entity_2);
6224   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6225   tcase_add_test(tc_basic, test_suspend_xdecl);
6226   tcase_add_test(tc_basic, test_abort_epilog);
6227   tcase_add_test(tc_basic, test_abort_epilog_2);
6228   tcase_add_test(tc_basic, test_suspend_epilog);
6229   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6230   tcase_add_test(tc_basic, test_unfinished_epilog);
6231   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6232   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6233   tcase_add_test__ifdef_xml_dtd(tc_basic,
6234                                 test_suspend_resume_internal_entity_issue_629);
6235   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6236   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6237   tcase_add_test(tc_basic, test_restart_on_error);
6238   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6239   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6240   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6241   tcase_add_test(tc_basic, test_standalone_internal_entity);
6242   tcase_add_test(tc_basic, test_skipped_external_entity);
6243   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6244   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6245   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6246   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6247   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6248   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6249   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6250   tcase_add_test(tc_basic, test_pi_handled_in_default);
6251   tcase_add_test(tc_basic, test_comment_handled_in_default);
6252   tcase_add_test(tc_basic, test_pi_yml);
6253   tcase_add_test(tc_basic, test_pi_xnl);
6254   tcase_add_test(tc_basic, test_pi_xmm);
6255   tcase_add_test(tc_basic, test_utf16_pi);
6256   tcase_add_test(tc_basic, test_utf16_be_pi);
6257   tcase_add_test(tc_basic, test_utf16_be_comment);
6258   tcase_add_test(tc_basic, test_utf16_le_comment);
6259   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6260   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6261   tcase_add_test(tc_basic, test_unknown_encoding_success);
6262   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6263   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6264   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6265   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6266   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6267   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6268   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6269   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6270   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6271   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6272   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6273   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6274   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6275   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6276   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6277   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6278   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6279   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6280   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6281   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6282   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6283   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6284   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6285   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6286   tcase_add_test(tc_basic, test_utf16_attribute);
6287   tcase_add_test(tc_basic, test_utf16_second_attr);
6288   tcase_add_test(tc_basic, test_attr_after_solidus);
6289   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6290   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6291   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6292   tcase_add_test(tc_basic, test_bad_doctype);
6293   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6294   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6295   tcase_add_test(tc_basic, test_bad_doctype_plus);
6296   tcase_add_test(tc_basic, test_bad_doctype_star);
6297   tcase_add_test(tc_basic, test_bad_doctype_query);
6298   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6299   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6300   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6301   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6302   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6303   tcase_add_test(tc_basic, test_short_doctype);
6304   tcase_add_test(tc_basic, test_short_doctype_2);
6305   tcase_add_test(tc_basic, test_short_doctype_3);
6306   tcase_add_test(tc_basic, test_long_doctype);
6307   tcase_add_test(tc_basic, test_bad_entity);
6308   tcase_add_test(tc_basic, test_bad_entity_2);
6309   tcase_add_test(tc_basic, test_bad_entity_3);
6310   tcase_add_test(tc_basic, test_bad_entity_4);
6311   tcase_add_test(tc_basic, test_bad_notation);
6312   tcase_add_test(tc_basic, test_default_doctype_handler);
6313   tcase_add_test(tc_basic, test_empty_element_abort);
6314   tcase_add_test__ifdef_xml_dtd(tc_basic,
6315                                 test_pool_integrity_with_unfinished_attr);
6316   tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6317   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6318   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6319   tcase_add_test__if_xml_ge(tc_basic,
6320                             test_deep_nested_entity_delayed_interpretation);
6321   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6322   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6323   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6324   tcase_add_test(tc_basic, test_set_reparse_deferral);
6325   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6326   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6327   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6328   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6329   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6330   tcase_add_test(tc_basic, test_varying_buffer_fills);
6331 }
6332