• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Licensed under the MIT license:
23 
24    Permission is  hereby granted,  free of charge,  to any  person obtaining
25    a  copy  of  this  software   and  associated  documentation  files  (the
26    "Software"),  to  deal in  the  Software  without restriction,  including
27    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28    distribute, sublicense, and/or sell copies of the Software, and to permit
29    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30    following conditions:
31 
32    The above copyright  notice and this permission notice  shall be included
33    in all copies or substantial portions of the Software.
34 
35    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41    USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43 
44 #if defined(NDEBUG)
45 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47 
48 #include <assert.h>
49 
50 #include <stdio.h>
51 #include <string.h>
52 #include <time.h>
53 
54 #if ! defined(__cplusplus)
55 #  include <stdbool.h>
56 #endif
57 
58 #include "expat_config.h"
59 
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69 
70 static void
basic_setup(void)71 basic_setup(void) {
72   g_parser = XML_ParserCreate(NULL);
73   if (g_parser == NULL)
74     fail("Parser not created.");
75 }
76 
77 /*
78  * Character & encoding tests.
79  */
80 
START_TEST(test_nul_byte)81 START_TEST(test_nul_byte) {
82   char text[] = "<doc>\0</doc>";
83 
84   /* test that a NUL byte (in US-ASCII data) is an error */
85   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86       == XML_STATUS_OK)
87     fail("Parser did not report error on NUL-byte.");
88   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89     xml_failure(g_parser);
90 }
91 END_TEST
92 
START_TEST(test_u0000_char)93 START_TEST(test_u0000_char) {
94   /* test that a NUL byte (in US-ASCII data) is an error */
95   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96                  "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99 
START_TEST(test_siphash_self)100 START_TEST(test_siphash_self) {
101   if (! sip24_valid())
102     fail("SipHash self-test failed");
103 }
104 END_TEST
105 
START_TEST(test_siphash_spec)106 START_TEST(test_siphash_spec) {
107   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109                          "\x0a\x0b\x0c\x0d\x0e";
110   const size_t len = sizeof(message) - 1;
111   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112   struct siphash state;
113   struct sipkey key;
114 
115   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116                   "\x0a\x0b\x0c\x0d\x0e\x0f");
117   sip24_init(&state, &key);
118 
119   /* Cover spread across calls */
120   sip24_update(&state, message, 4);
121   sip24_update(&state, message + 4, len - 4);
122 
123   /* Cover null length */
124   sip24_update(&state, message, 0);
125 
126   if (sip24_final(&state) != expected)
127     fail("sip24_final failed spec test\n");
128 
129   /* Cover wrapper */
130   if (siphash24(message, len, &key) != expected)
131     fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134 
START_TEST(test_bom_utf8)135 START_TEST(test_bom_utf8) {
136   /* This test is really just making sure we don't core on a UTF-8 BOM. */
137   const char *text = "\357\273\277<e/>";
138 
139   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140       == XML_STATUS_ERROR)
141     xml_failure(g_parser);
142 }
143 END_TEST
144 
START_TEST(test_bom_utf16_be)145 START_TEST(test_bom_utf16_be) {
146   char text[] = "\376\377\0<\0e\0/\0>";
147 
148   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149       == XML_STATUS_ERROR)
150     xml_failure(g_parser);
151 }
152 END_TEST
153 
START_TEST(test_bom_utf16_le)154 START_TEST(test_bom_utf16_le) {
155   char text[] = "\377\376<\0e\0/\0>\0";
156 
157   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158       == XML_STATUS_ERROR)
159     xml_failure(g_parser);
160 }
161 END_TEST
162 
START_TEST(test_nobom_utf16_le)163 START_TEST(test_nobom_utf16_le) {
164   char text[] = " \0<\0e\0/\0>\0";
165 
166   if (g_chunkSize == 1) {
167     // TODO: with just the first byte, we can't tell the difference between
168     // UTF-16-LE and UTF-8. Avoid the failure for now.
169     return;
170   }
171 
172   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173       == XML_STATUS_ERROR)
174     xml_failure(g_parser);
175 }
176 END_TEST
177 
START_TEST(test_hash_collision)178 START_TEST(test_hash_collision) {
179   /* For full coverage of the lookup routine, we need to ensure a
180    * hash collision even though we can only tell that we have one
181    * through breakpoint debugging or coverage statistics.  The
182    * following will cause a hash collision on machines with a 64-bit
183    * long type; others will have to experiment.  The full coverage
184    * tests invoked from qa.sh usually provide a hash collision, but
185    * not always.  This is an attempt to provide insurance.
186    */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188   const char *text
189       = "<doc>\n"
190         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195         "<d8>This triggers the table growth and collides with b2</d8>\n"
196         "</doc>\n";
197 
198   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200       == XML_STATUS_ERROR)
201     xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205 
206 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)207 START_TEST(test_danish_latin1) {
208   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210 #ifdef XML_UNICODE
211   const XML_Char *expected
212       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213 #else
214   const XML_Char *expected
215       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216 #endif
217   run_character_check(text, expected);
218 }
219 END_TEST
220 
221 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)222 START_TEST(test_french_charref_hexidecimal) {
223   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225 #ifdef XML_UNICODE
226   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227 #else
228   const XML_Char *expected
229       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230 #endif
231   run_character_check(text, expected);
232 }
233 END_TEST
234 
START_TEST(test_french_charref_decimal)235 START_TEST(test_french_charref_decimal) {
236   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238 #ifdef XML_UNICODE
239   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240 #else
241   const XML_Char *expected
242       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243 #endif
244   run_character_check(text, expected);
245 }
246 END_TEST
247 
START_TEST(test_french_latin1)248 START_TEST(test_french_latin1) {
249   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251 #ifdef XML_UNICODE
252   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254   const XML_Char *expected
255       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257   run_character_check(text, expected);
258 }
259 END_TEST
260 
START_TEST(test_french_utf8)261 START_TEST(test_french_utf8) {
262   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263                      "<doc>\xC3\xA9</doc>";
264 #ifdef XML_UNICODE
265   const XML_Char *expected = XCS("\x00e9");
266 #else
267   const XML_Char *expected = XCS("\xC3\xA9");
268 #endif
269   run_character_check(text, expected);
270 }
271 END_TEST
272 
273 /* Regression test for SF bug #600479.
274    XXX There should be a test that exercises all legal XML Unicode
275    characters as PCDATA and attribute value content, and XML Name
276    characters as part of element and attribute names.
277 */
START_TEST(test_utf8_false_rejection)278 START_TEST(test_utf8_false_rejection) {
279   const char *text = "<doc>\xEF\xBA\xBF</doc>";
280 #ifdef XML_UNICODE
281   const XML_Char *expected = XCS("\xfebf");
282 #else
283   const XML_Char *expected = XCS("\xEF\xBA\xBF");
284 #endif
285   run_character_check(text, expected);
286 }
287 END_TEST
288 
289 /* Regression test for SF bug #477667.
290    This test assures that any 8-bit character followed by a 7-bit
291    character will not be mistakenly interpreted as a valid UTF-8
292    sequence.
293 */
START_TEST(test_illegal_utf8)294 START_TEST(test_illegal_utf8) {
295   char text[100];
296   int i;
297 
298   for (i = 128; i <= 255; ++i) {
299     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301         == XML_STATUS_OK) {
302       snprintf(text, sizeof(text),
303                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304                i);
305       fail(text);
306     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307       xml_failure(g_parser);
308     /* Reset the parser since we use the same parser repeatedly. */
309     XML_ParserReset(g_parser, NULL);
310   }
311 }
312 END_TEST
313 
314 /* Examples, not masks: */
315 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320 
START_TEST(test_utf8_auto_align)321 START_TEST(test_utf8_auto_align) {
322   struct TestCase {
323     ptrdiff_t expectedMovementInChars;
324     const char *input;
325   };
326 
327   struct TestCase cases[] = {
328       {00, ""},
329 
330       {00, UTF8_LEAD_1},
331 
332       {-1, UTF8_LEAD_2},
333       {00, UTF8_LEAD_2 UTF8_FOLLOW},
334 
335       {-1, UTF8_LEAD_3},
336       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338 
339       {-1, UTF8_LEAD_4},
340       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343   };
344 
345   size_t i = 0;
346   bool success = true;
347   for (; i < sizeof(cases) / sizeof(*cases); i++) {
348     const char *fromLim = cases[i].input + strlen(cases[i].input);
349     const char *const fromLimInitially = fromLim;
350     ptrdiff_t actualMovementInChars;
351 
352     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353 
354     actualMovementInChars = (fromLim - fromLimInitially);
355     if (actualMovementInChars != cases[i].expectedMovementInChars) {
356       size_t j = 0;
357       success = false;
358       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359              ", actually moved by %2d chars: \"",
360              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361              (int)actualMovementInChars);
362       for (; j < strlen(cases[i].input); j++) {
363         printf("\\x%02x", (unsigned char)cases[i].input[j]);
364       }
365       printf("\"\n");
366     }
367   }
368 
369   if (! success) {
370     fail("UTF-8 auto-alignment is not bullet-proof\n");
371   }
372 }
373 END_TEST
374 
START_TEST(test_utf16)375 START_TEST(test_utf16) {
376   /* <?xml version="1.0" encoding="UTF-16"?>
377    *  <doc a='123'>some {A} text</doc>
378    *
379    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380    */
381   char text[]
382       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385         "\000'\000?\000>\000\n"
386         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388         "<\000/\000d\000o\000c\000>";
389 #ifdef XML_UNICODE
390   const XML_Char *expected = XCS("some \xff21 text");
391 #else
392   const XML_Char *expected = XCS("some \357\274\241 text");
393 #endif
394   CharData storage;
395 
396   CharData_Init(&storage);
397   XML_SetUserData(g_parser, &storage);
398   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400       == XML_STATUS_ERROR)
401     xml_failure(g_parser);
402   CharData_CheckXMLChars(&storage, expected);
403 }
404 END_TEST
405 
START_TEST(test_utf16_le_epilog_newline)406 START_TEST(test_utf16_le_epilog_newline) {
407   unsigned int first_chunk_bytes = 17;
408   char text[] = "\xFF\xFE"                  /* BOM */
409                 "<\000e\000/\000>\000"      /* document element */
410                 "\r\000\n\000\r\000\n\000"; /* epilog */
411 
412   if (first_chunk_bytes >= sizeof(text) - 1)
413     fail("bad value of first_chunk_bytes");
414   if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415       == XML_STATUS_ERROR)
416     xml_failure(g_parser);
417   else {
418     enum XML_Status rc;
419     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420                                  sizeof(text) - first_chunk_bytes - 1,
421                                  XML_TRUE);
422     if (rc == XML_STATUS_ERROR)
423       xml_failure(g_parser);
424   }
425 }
426 END_TEST
427 
428 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)429 START_TEST(test_not_utf16) {
430   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431                      "<doc>Hi</doc>";
432 
433   /* Use a handler to provoke the appropriate code paths */
434   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436                  "UTF-16 declared in UTF-8 not faulted");
437 }
438 END_TEST
439 
440 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)441 START_TEST(test_bad_encoding) {
442   const char *text = "<doc>Hi</doc>";
443 
444   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445     fail("XML_SetEncoding failed");
446   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447                  "Unknown encoding not faulted");
448 }
449 END_TEST
450 
451 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)452 START_TEST(test_latin1_umlauts) {
453   const char *text
454       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457 #ifdef XML_UNICODE
458   /* Expected results in UTF-16 */
459   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461 #else
462   /* Expected results in UTF-8 */
463   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465 #endif
466 
467   run_character_check(text, expected);
468   XML_ParserReset(g_parser, NULL);
469   run_attribute_check(text, expected);
470   /* Repeat with a default handler */
471   XML_ParserReset(g_parser, NULL);
472   XML_SetDefaultHandler(g_parser, dummy_default_handler);
473   run_character_check(text, expected);
474   XML_ParserReset(g_parser, NULL);
475   XML_SetDefaultHandler(g_parser, dummy_default_handler);
476   run_attribute_check(text, expected);
477 }
478 END_TEST
479 
480 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)481 START_TEST(test_long_utf8_character) {
482   const char *text
483       = "<?xml version='1.0' encoding='utf-8'?>\n"
484         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485         "<do\xf0\x90\x80\x80/>";
486   expect_failure(text, XML_ERROR_INVALID_TOKEN,
487                  "4-byte UTF-8 character in element name not faulted");
488 }
489 END_TEST
490 
491 /* Test that a long latin-1 attribute (too long to convert in one go)
492  * is correctly converted
493  */
START_TEST(test_long_latin1_attribute)494 START_TEST(test_long_latin1_attribute) {
495   const char *text
496       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497         "<doc att='"
498         /* 64 characters per line */
499         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515         /* Last character splits across a buffer boundary */
516         "\xe4'>\n</doc>";
517 
518   const XML_Char *expected =
519       /* 64 characters per line */
520       /* clang-format off */
521         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537   /* clang-format on */
538 #ifdef XML_UNICODE
539                                                   XCS("\x00e4");
540 #else
541                                                   XCS("\xc3\xa4");
542 #endif
543 
544   run_attribute_check(text, expected);
545 }
546 END_TEST
547 
548 /* Test that a long ASCII attribute (too long to convert in one go)
549  * is correctly converted
550  */
START_TEST(test_long_ascii_attribute)551 START_TEST(test_long_ascii_attribute) {
552   const char *text
553       = "<?xml version='1.0' encoding='us-ascii'?>\n"
554         "<doc att='"
555         /* 64 characters per line */
556         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572         "01234'>\n</doc>";
573   const XML_Char *expected =
574       /* 64 characters per line */
575       /* clang-format off */
576         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592         XCS("01234");
593   /* clang-format on */
594 
595   run_attribute_check(text, expected);
596 }
597 END_TEST
598 
599 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)600 START_TEST(test_line_number_after_parse) {
601   const char *text = "<tag>\n"
602                      "\n"
603                      "\n</tag>";
604   XML_Size lineno;
605 
606   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607       == XML_STATUS_ERROR)
608     xml_failure(g_parser);
609   lineno = XML_GetCurrentLineNumber(g_parser);
610   if (lineno != 4) {
611     char buffer[100];
612     snprintf(buffer, sizeof(buffer),
613              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614     fail(buffer);
615   }
616 }
617 END_TEST
618 
619 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)620 START_TEST(test_column_number_after_parse) {
621   const char *text = "<tag></tag>";
622   XML_Size colno;
623 
624   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625       == XML_STATUS_ERROR)
626     xml_failure(g_parser);
627   colno = XML_GetCurrentColumnNumber(g_parser);
628   if (colno != 11) {
629     char buffer[100];
630     snprintf(buffer, sizeof(buffer),
631              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632     fail(buffer);
633   }
634 }
635 END_TEST
636 
637 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)638 START_TEST(test_line_and_column_numbers_inside_handlers) {
639   const char *text = "<a>\n"      /* Unix end-of-line */
640                      "  <b>\r\n"  /* Windows end-of-line */
641                      "    <c/>\r" /* Mac OS end-of-line */
642                      "  </b>\n"
643                      "  <d>\n"
644                      "    <f/>\n"
645                      "  </d>\n"
646                      "</a>";
647   const StructDataEntry expected[]
648       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
651          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
653   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654   StructData storage;
655 
656   StructData_Init(&storage);
657   XML_SetUserData(g_parser, &storage);
658   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661       == XML_STATUS_ERROR)
662     xml_failure(g_parser);
663 
664   StructData_CheckItems(&storage, expected, expected_count);
665   StructData_Dispose(&storage);
666 }
667 END_TEST
668 
669 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)670 START_TEST(test_line_number_after_error) {
671   const char *text = "<a>\n"
672                      "  <b>\n"
673                      "  </a>"; /* missing </b> */
674   XML_Size lineno;
675   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676       != XML_STATUS_ERROR)
677     fail("Expected a parse error");
678 
679   lineno = XML_GetCurrentLineNumber(g_parser);
680   if (lineno != 3) {
681     char buffer[100];
682     snprintf(buffer, sizeof(buffer),
683              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684     fail(buffer);
685   }
686 }
687 END_TEST
688 
689 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)690 START_TEST(test_column_number_after_error) {
691   const char *text = "<a>\n"
692                      "  <b>\n"
693                      "  </a>"; /* missing </b> */
694   XML_Size colno;
695   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696       != XML_STATUS_ERROR)
697     fail("Expected a parse error");
698 
699   colno = XML_GetCurrentColumnNumber(g_parser);
700   if (colno != 4) {
701     char buffer[100];
702     snprintf(buffer, sizeof(buffer),
703              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704     fail(buffer);
705   }
706 }
707 END_TEST
708 
709 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)710 START_TEST(test_really_long_lines) {
711   /* This parses an input line longer than INIT_DATA_BUF_SIZE
712      characters long (defined to be 1024 in xmlparse.c).  We take a
713      really cheesy approach to building the input buffer, because
714      this avoids writing bugs in buffer-filling code.
715   */
716   const char *text
717       = "<e>"
718         /* 64 chars */
719         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720         /* until we have at least 1024 characters on the line: */
721         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737         "</e>";
738   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739       == XML_STATUS_ERROR)
740     xml_failure(g_parser);
741 }
742 END_TEST
743 
744 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)745 START_TEST(test_really_long_encoded_lines) {
746   /* As above, except that we want to provoke an output buffer
747    * overflow with a non-trivial encoding.  For this we need to pass
748    * the whole cdata in one go, not byte-by-byte.
749    */
750   void *buffer;
751   const char *text
752       = "<?xml version='1.0' encoding='iso-8859-1'?>"
753         "<e>"
754         /* 64 chars */
755         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756         /* until we have at least 1024 characters on the line: */
757         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773         "</e>";
774   int parse_len = (int)strlen(text);
775 
776   /* Need a cdata handler to provoke the code path we want to test */
777   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778   buffer = XML_GetBuffer(g_parser, parse_len);
779   if (buffer == NULL)
780     fail("Could not allocate parse buffer");
781   assert(buffer != NULL);
782   memcpy(buffer, text, parse_len);
783   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784     xml_failure(g_parser);
785 }
786 END_TEST
787 
788 /*
789  * Element event tests.
790  */
791 
START_TEST(test_end_element_events)792 START_TEST(test_end_element_events) {
793   const char *text = "<a><b><c/></b><d><f/></d></a>";
794   const XML_Char *expected = XCS("/c/b/f/d/a");
795   CharData storage;
796 
797   CharData_Init(&storage);
798   XML_SetUserData(g_parser, &storage);
799   XML_SetEndElementHandler(g_parser, end_element_event_handler);
800   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801       == XML_STATUS_ERROR)
802     xml_failure(g_parser);
803   CharData_CheckXMLChars(&storage, expected);
804 }
805 END_TEST
806 
807 /*
808  * Attribute tests.
809  */
810 
811 /* Helper used by the following tests; this checks any "attr" and "refs"
812    attributes to make sure whitespace has been normalized.
813 
814    Return true if whitespace has been normalized in a string, using
815    the rules for attribute value normalization.  The 'is_cdata' flag
816    is needed since CDATA attributes don't need to have multiple
817    whitespace characters collapsed to a single space, while other
818    attribute data types do.  (Section 3.3.3 of the recommendation.)
819 */
820 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)821 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822   int blanks = 0;
823   int at_start = 1;
824   while (*s) {
825     if (*s == XCS(' '))
826       ++blanks;
827     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828       return 0;
829     else {
830       if (at_start) {
831         at_start = 0;
832         if (blanks && ! is_cdata)
833           /* illegal leading blanks */
834           return 0;
835       } else if (blanks > 1 && ! is_cdata)
836         return 0;
837       blanks = 0;
838     }
839     ++s;
840   }
841   if (blanks && ! is_cdata)
842     return 0;
843   return 1;
844 }
845 
846 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)847 START_TEST(test_helper_is_whitespace_normalized) {
848   assert(is_whitespace_normalized(XCS("abc"), 0));
849   assert(is_whitespace_normalized(XCS("abc"), 1));
850   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
855   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
856   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858   assert(! is_whitespace_normalized(XCS(" "), 0));
859   assert(is_whitespace_normalized(XCS(" "), 1));
860   assert(! is_whitespace_normalized(XCS("\t"), 0));
861   assert(! is_whitespace_normalized(XCS("\t"), 1));
862   assert(! is_whitespace_normalized(XCS("\n"), 0));
863   assert(! is_whitespace_normalized(XCS("\n"), 1));
864   assert(! is_whitespace_normalized(XCS("\r"), 0));
865   assert(! is_whitespace_normalized(XCS("\r"), 1));
866   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867 }
868 END_TEST
869 
870 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)871 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872                                           const XML_Char **atts) {
873   int i;
874   UNUSED_P(userData);
875   UNUSED_P(name);
876   for (i = 0; atts[i] != NULL; i += 2) {
877     const XML_Char *attrname = atts[i];
878     const XML_Char *value = atts[i + 1];
879     if (xcstrcmp(XCS("attr"), attrname) == 0
880         || xcstrcmp(XCS("ents"), attrname) == 0
881         || xcstrcmp(XCS("refs"), attrname) == 0) {
882       if (! is_whitespace_normalized(value, 0)) {
883         char buffer[256];
884         snprintf(buffer, sizeof(buffer),
885                  "attribute value not normalized: %" XML_FMT_STR
886                  "='%" XML_FMT_STR "'",
887                  attrname, value);
888         fail(buffer);
889       }
890     }
891   }
892 }
893 
START_TEST(test_attr_whitespace_normalization)894 START_TEST(test_attr_whitespace_normalization) {
895   const char *text
896       = "<!DOCTYPE doc [\n"
897         "  <!ATTLIST doc\n"
898         "            attr NMTOKENS #REQUIRED\n"
899         "            ents ENTITIES #REQUIRED\n"
900         "            refs IDREFS   #REQUIRED>\n"
901         "]>\n"
902         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
903         "     ents=' ent-1   \t\r\n"
904         "            ent-2  ' >\n"
905         "  <e id='id-1'/>\n"
906         "  <e id='id-2'/>\n"
907         "</doc>";
908 
909   XML_SetStartElementHandler(g_parser,
910                              check_attr_contains_normalized_whitespace);
911   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912       == XML_STATUS_ERROR)
913     xml_failure(g_parser);
914 }
915 END_TEST
916 
917 /*
918  * XML declaration tests.
919  */
920 
START_TEST(test_xmldecl_misplaced)921 START_TEST(test_xmldecl_misplaced) {
922   expect_failure("\n"
923                  "<?xml version='1.0'?>\n"
924                  "<a/>",
925                  XML_ERROR_MISPLACED_XML_PI,
926                  "failed to report misplaced XML declaration");
927 }
928 END_TEST
929 
START_TEST(test_xmldecl_invalid)930 START_TEST(test_xmldecl_invalid) {
931   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932                  "Failed to report invalid XML declaration");
933 }
934 END_TEST
935 
START_TEST(test_xmldecl_missing_attr)936 START_TEST(test_xmldecl_missing_attr) {
937   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938                  "Failed to report missing XML declaration attribute");
939 }
940 END_TEST
941 
START_TEST(test_xmldecl_missing_value)942 START_TEST(test_xmldecl_missing_value) {
943   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944                  "<doc/>",
945                  XML_ERROR_XML_DECL,
946                  "Failed to report missing attribute value");
947 }
948 END_TEST
949 
950 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)951 START_TEST(test_unknown_encoding_internal_entity) {
952   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954                      "<test a='&foo;'/>";
955 
956   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958       == XML_STATUS_ERROR)
959     xml_failure(g_parser);
960 }
961 END_TEST
962 
963 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)964 START_TEST(test_unrecognised_encoding_internal_entity) {
965   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967                      "<test a='&foo;'/>";
968 
969   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971       != XML_STATUS_ERROR)
972     fail("Unrecognised encoding not rejected");
973 }
974 END_TEST
975 
976 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)977 START_TEST(test_ext_entity_set_encoding) {
978   const char *text = "<!DOCTYPE doc [\n"
979                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980                      "]>\n"
981                      "<doc>&en;</doc>";
982   ExtTest test_data
983       = {/* This text says it's an unsupported encoding, but it's really
984             UTF-8, which we tell Expat using XML_SetEncoding().
985          */
986          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987 #ifdef XML_UNICODE
988   const XML_Char *expected = XCS("\x00e9");
989 #else
990   const XML_Char *expected = XCS("\xc3\xa9");
991 #endif
992 
993   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994   run_ext_character_check(text, &test_data, expected);
995 }
996 END_TEST
997 
998 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)999 START_TEST(test_ext_entity_no_handler) {
1000   const char *text = "<!DOCTYPE doc [\n"
1001                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002                      "]>\n"
1003                      "<doc>&en;</doc>";
1004 
1005   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006   run_character_check(text, XCS(""));
1007 }
1008 END_TEST
1009 
1010 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1011 START_TEST(test_ext_entity_set_bom) {
1012   const char *text = "<!DOCTYPE doc [\n"
1013                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014                      "]>\n"
1015                      "<doc>&en;</doc>";
1016   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017                        "<?xml encoding='iso-8859-3'?>"
1018                        "\xC3\xA9",
1019                        XCS("utf-8"), NULL};
1020 #ifdef XML_UNICODE
1021   const XML_Char *expected = XCS("\x00e9");
1022 #else
1023   const XML_Char *expected = XCS("\xc3\xa9");
1024 #endif
1025 
1026   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027   run_ext_character_check(text, &test_data, expected);
1028 }
1029 END_TEST
1030 
1031 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1032 START_TEST(test_ext_entity_bad_encoding) {
1033   const char *text = "<!DOCTYPE doc [\n"
1034                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035                      "]>\n"
1036                      "<doc>&en;</doc>";
1037   ExtFaults fault
1038       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040 
1041   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042   XML_SetUserData(g_parser, &fault);
1043   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044                  "Bad encoding should not have been accepted");
1045 }
1046 END_TEST
1047 
1048 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1049 START_TEST(test_ext_entity_bad_encoding_2) {
1050   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052                      "<doc>&entity;</doc>";
1053   ExtFaults fault
1054       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056 
1057   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059   XML_SetUserData(g_parser, &fault);
1060   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061                  "Bad encoding not faulted in external entity handler");
1062 }
1063 END_TEST
1064 
1065 /* Test that no error is reported for unknown entities if we don't
1066    read an external subset.  This was fixed in Expat 1.95.5.
1067 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1068 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070                      "<doc>&entity;</doc>";
1071 
1072   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073       == XML_STATUS_ERROR)
1074     xml_failure(g_parser);
1075 }
1076 END_TEST
1077 
1078 /* Test that an error is reported for unknown entities if we don't
1079    have an external subset.
1080 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1081 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083                  "Parser did not report undefined entity w/out a DTD.");
1084 }
1085 END_TEST
1086 
1087 /* Test that an error is reported for unknown entities if we don't
1088    read an external subset, but have been declared standalone.
1089 */
START_TEST(test_wfc_undeclared_entity_standalone)1090 START_TEST(test_wfc_undeclared_entity_standalone) {
1091   const char *text
1092       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094         "<doc>&entity;</doc>";
1095 
1096   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097                  "Parser did not report undefined entity (standalone).");
1098 }
1099 END_TEST
1100 
1101 /* Test that an error is reported for unknown entities if we have read
1102    an external subset, and standalone is true.
1103 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1104 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105   const char *text
1106       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108         "<doc>&entity;</doc>";
1109   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110 
1111   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112   XML_SetUserData(g_parser, &test_data);
1113   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115                  "Parser did not report undefined entity (external DTD).");
1116 }
1117 END_TEST
1118 
1119 /* Test that external entity handling is not done if the parsing flag
1120  * is set to UNLESS_STANDALONE
1121  */
START_TEST(test_entity_with_external_subset_unless_standalone)1122 START_TEST(test_entity_with_external_subset_unless_standalone) {
1123   const char *text
1124       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126         "<doc>&entity;</doc>";
1127   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128 
1129   XML_SetParamEntityParsing(g_parser,
1130                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131   XML_SetUserData(g_parser, &test_data);
1132   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134                  "Parser did not report undefined entity");
1135 }
1136 END_TEST
1137 
1138 /* Test that no error is reported for unknown entities if we have read
1139    an external subset, and standalone is false.
1140 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1141 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144                      "<doc>&entity;</doc>";
1145   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146 
1147   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149   run_ext_character_check(text, &test_data, XCS(""));
1150 }
1151 END_TEST
1152 
1153 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1154 START_TEST(test_not_standalone_handler_reject) {
1155   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157                      "<doc>&entity;</doc>";
1158   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159 
1160   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161   XML_SetUserData(g_parser, &test_data);
1162   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165                  "NotStandalone handler failed to reject");
1166 
1167   /* Try again but without external entity handling */
1168   XML_ParserReset(g_parser, NULL);
1169   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171                  "NotStandalone handler failed to reject");
1172 }
1173 END_TEST
1174 
1175 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1176 START_TEST(test_not_standalone_handler_accept) {
1177   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179                      "<doc>&entity;</doc>";
1180   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181 
1182   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185   run_ext_character_check(text, &test_data, XCS(""));
1186 
1187   /* Repeat without the external entity handler */
1188   XML_ParserReset(g_parser, NULL);
1189   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190   run_character_check(text, XCS(""));
1191 }
1192 END_TEST
1193 
START_TEST(test_wfc_no_recursive_entity_refs)1194 START_TEST(test_wfc_no_recursive_entity_refs) {
1195   const char *text = "<!DOCTYPE doc [\n"
1196                      "  <!ENTITY entity '&#38;entity;'>\n"
1197                      "]>\n"
1198                      "<doc>&entity;</doc>";
1199 
1200   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201                  "Parser did not report recursive entity reference.");
1202 }
1203 END_TEST
1204 
START_TEST(test_recursive_external_parameter_entity_2)1205 START_TEST(test_recursive_external_parameter_entity_2) {
1206   struct TestCase {
1207     const char *doc;
1208     enum XML_Status expectedStatus;
1209   };
1210 
1211   struct TestCase cases[] = {
1212       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213       {"<!ENTITY % p1 '%p1;'>"
1214        "<!ENTITY % p1 'first declaration wins'>",
1215        XML_STATUS_ERROR},
1216       {"<!ENTITY % p1 'first declaration wins'>"
1217        "<!ENTITY % p1 '%p1;'>",
1218        XML_STATUS_OK},
1219       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1220   };
1221 
1222   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223     const char *const doc = cases[i].doc;
1224     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225     set_subtest("%s", doc);
1226 
1227     XML_Parser parser = XML_ParserCreate(NULL);
1228     assert_true(parser != NULL);
1229 
1230     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1231     assert_true(ext_parser != NULL);
1232 
1233     const enum XML_Status actualStatus
1234         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235 
1236     assert_true(actualStatus == expectedStatus);
1237     if (actualStatus != XML_STATUS_OK) {
1238       assert_true(XML_GetErrorCode(ext_parser)
1239                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1240     }
1241 
1242     XML_ParserFree(ext_parser);
1243     XML_ParserFree(parser);
1244   }
1245 }
1246 END_TEST
1247 
1248 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1249 START_TEST(test_ext_entity_invalid_parse) {
1250   const char *text = "<!DOCTYPE doc [\n"
1251                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1252                      "]>\n"
1253                      "<doc>&en;</doc>";
1254   const ExtFaults faults[]
1255       = {{"<", "Incomplete element declaration not faulted", NULL,
1256           XML_ERROR_UNCLOSED_TOKEN},
1257          {"<\xe2\x82", /* First two bytes of a three-byte char */
1258           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1259          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1260           XML_ERROR_PARTIAL_CHAR},
1261          {NULL, NULL, NULL, XML_ERROR_NONE}};
1262   const ExtFaults *fault = faults;
1263 
1264   for (; fault->parse_text != NULL; fault++) {
1265     set_subtest("\"%s\"", fault->parse_text);
1266     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1267     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1268     XML_SetUserData(g_parser, (void *)fault);
1269     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1270                    "Parser did not report external entity error");
1271     XML_ParserReset(g_parser, NULL);
1272   }
1273 }
1274 END_TEST
1275 
1276 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1277 START_TEST(test_dtd_default_handling) {
1278   const char *text = "<!DOCTYPE doc [\n"
1279                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1280                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1281                      "<!ELEMENT doc EMPTY>\n"
1282                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1283                      "<?pi in dtd?>\n"
1284                      "<!--comment in dtd-->\n"
1285                      "]><doc/>";
1286 
1287   XML_SetDefaultHandler(g_parser, accumulate_characters);
1288   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1289   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1290   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1291   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1292   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1293   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1294   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1295   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1296   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1297   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1298   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1299 }
1300 END_TEST
1301 
1302 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1303 START_TEST(test_dtd_attr_handling) {
1304   const char *prolog = "<!DOCTYPE doc [\n"
1305                        "<!ELEMENT doc EMPTY>\n";
1306   AttTest attr_data[]
1307       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1308           "]>"
1309           "<doc a='two'/>",
1310           XCS("doc"), XCS("a"),
1311           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1312           NULL, XML_TRUE},
1313          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1314           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1315           "]>"
1316           "<doc/>",
1317           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1318          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1319           "]>"
1320           "<doc/>",
1321           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1322          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1323           "]>"
1324           "<doc/>",
1325           XCS("doc"), XCS("a"), XCS("CDATA"),
1326 #ifdef XML_UNICODE
1327           XCS("\x06f2"),
1328 #else
1329           XCS("\xdb\xb2"),
1330 #endif
1331           XML_FALSE},
1332          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1333   AttTest *test;
1334 
1335   for (test = attr_data; test->definition != NULL; test++) {
1336     set_subtest("%s", test->definition);
1337     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1338     XML_SetUserData(g_parser, test);
1339     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1340                                 XML_FALSE)
1341         == XML_STATUS_ERROR)
1342       xml_failure(g_parser);
1343     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1344                                 (int)strlen(test->definition), XML_TRUE)
1345         == XML_STATUS_ERROR)
1346       xml_failure(g_parser);
1347     XML_ParserReset(g_parser, NULL);
1348   }
1349 }
1350 END_TEST
1351 
1352 /* See related SF bug #673791.
1353    When namespace processing is enabled, setting the namespace URI for
1354    a prefix is not allowed; this test ensures that it *is* allowed
1355    when namespace processing is not enabled.
1356    (See Namespaces in XML, section 2.)
1357 */
START_TEST(test_empty_ns_without_namespaces)1358 START_TEST(test_empty_ns_without_namespaces) {
1359   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1360                      "  <e xmlns:prefix=''/>\n"
1361                      "</doc>";
1362 
1363   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1364       == XML_STATUS_ERROR)
1365     xml_failure(g_parser);
1366 }
1367 END_TEST
1368 
1369 /* Regression test for SF bug #824420.
1370    Checks that an xmlns:prefix attribute set in an attribute's default
1371    value isn't misinterpreted.
1372 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1373 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1374   const char *text = "<!DOCTYPE e:element [\n"
1375                      "  <!ATTLIST e:element\n"
1376                      "    xmlns:e CDATA 'http://example.org/'>\n"
1377                      "      ]>\n"
1378                      "<e:element/>";
1379 
1380   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1381       == XML_STATUS_ERROR)
1382     xml_failure(g_parser);
1383 }
1384 END_TEST
1385 
1386 /* Regression test for SF bug #1515266: missing check of stopped
1387    parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1388 START_TEST(test_stop_parser_between_char_data_calls) {
1389   /* The sample data must be big enough that there are two calls to
1390      the character data handler from within the inner "for" loop of
1391      the XML_TOK_DATA_CHARS case in doContent(), and the character
1392      handler must stop the parser and clear the character data
1393      handler.
1394   */
1395   const char *text = long_character_data_text;
1396 
1397   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1398   g_resumable = XML_FALSE;
1399   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1400       != XML_STATUS_ERROR)
1401     xml_failure(g_parser);
1402   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1403     xml_failure(g_parser);
1404 }
1405 END_TEST
1406 
1407 /* Regression test for SF bug #1515266: missing check of stopped
1408    parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1409 START_TEST(test_suspend_parser_between_char_data_calls) {
1410   /* The sample data must be big enough that there are two calls to
1411      the character data handler from within the inner "for" loop of
1412      the XML_TOK_DATA_CHARS case in doContent(), and the character
1413      handler must stop the parser and clear the character data
1414      handler.
1415   */
1416   const char *text = long_character_data_text;
1417 
1418   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1419   g_resumable = XML_TRUE;
1420   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1421       != XML_STATUS_SUSPENDED)
1422     xml_failure(g_parser);
1423   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1424     xml_failure(g_parser);
1425   /* Try parsing directly */
1426   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1427       != XML_STATUS_ERROR)
1428     fail("Attempt to continue parse while suspended not faulted");
1429   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1430     fail("Suspended parse not faulted with correct error");
1431 }
1432 END_TEST
1433 
1434 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1435 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1436   const char *text = long_character_data_text;
1437 
1438   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1439   g_resumable = XML_FALSE;
1440   g_abortable = XML_FALSE;
1441   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1442       != XML_STATUS_ERROR)
1443     fail("Failed to double-stop parser");
1444 
1445   XML_ParserReset(g_parser, NULL);
1446   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1447   g_resumable = XML_TRUE;
1448   g_abortable = XML_FALSE;
1449   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1450       != XML_STATUS_SUSPENDED)
1451     fail("Failed to double-suspend parser");
1452 
1453   XML_ParserReset(g_parser, NULL);
1454   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1455   g_resumable = XML_TRUE;
1456   g_abortable = XML_TRUE;
1457   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1458       != XML_STATUS_ERROR)
1459     fail("Failed to suspend-abort parser");
1460 }
1461 END_TEST
1462 
START_TEST(test_good_cdata_ascii)1463 START_TEST(test_good_cdata_ascii) {
1464   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1465   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1466 
1467   CharData storage;
1468   CharData_Init(&storage);
1469   XML_SetUserData(g_parser, &storage);
1470   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1471   /* Add start and end handlers for coverage */
1472   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1473   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1474 
1475   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1476       == XML_STATUS_ERROR)
1477     xml_failure(g_parser);
1478   CharData_CheckXMLChars(&storage, expected);
1479 
1480   /* Try again, this time with a default handler */
1481   XML_ParserReset(g_parser, NULL);
1482   CharData_Init(&storage);
1483   XML_SetUserData(g_parser, &storage);
1484   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1485   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1486 
1487   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1488       == XML_STATUS_ERROR)
1489     xml_failure(g_parser);
1490   CharData_CheckXMLChars(&storage, expected);
1491 }
1492 END_TEST
1493 
START_TEST(test_good_cdata_utf16)1494 START_TEST(test_good_cdata_utf16) {
1495   /* Test data is:
1496    *   <?xml version='1.0' encoding='utf-16'?>
1497    *   <a><![CDATA[hello]]></a>
1498    */
1499   const char text[]
1500       = "\0<\0?\0x\0m\0l\0"
1501         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1502         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1503         "1\0"
1504         "6\0'"
1505         "\0?\0>\0\n"
1506         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1507   const XML_Char *expected = XCS("hello");
1508 
1509   CharData storage;
1510   CharData_Init(&storage);
1511   XML_SetUserData(g_parser, &storage);
1512   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1513 
1514   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1515       == XML_STATUS_ERROR)
1516     xml_failure(g_parser);
1517   CharData_CheckXMLChars(&storage, expected);
1518 }
1519 END_TEST
1520 
START_TEST(test_good_cdata_utf16_le)1521 START_TEST(test_good_cdata_utf16_le) {
1522   /* Test data is:
1523    *   <?xml version='1.0' encoding='utf-16'?>
1524    *   <a><![CDATA[hello]]></a>
1525    */
1526   const char text[]
1527       = "<\0?\0x\0m\0l\0"
1528         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1529         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1530         "1\0"
1531         "6\0'"
1532         "\0?\0>\0\n"
1533         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1534   const XML_Char *expected = XCS("hello");
1535 
1536   CharData storage;
1537   CharData_Init(&storage);
1538   XML_SetUserData(g_parser, &storage);
1539   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1540 
1541   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1542       == XML_STATUS_ERROR)
1543     xml_failure(g_parser);
1544   CharData_CheckXMLChars(&storage, expected);
1545 }
1546 END_TEST
1547 
1548 /* Test UTF16 conversion of a long cdata string */
1549 
1550 /* 16 characters: handy macro to reduce visual clutter */
1551 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1552 
START_TEST(test_long_cdata_utf16)1553 START_TEST(test_long_cdata_utf16) {
1554   /* Test data is:
1555    * <?xlm version='1.0' encoding='utf-16'?>
1556    * <a><![CDATA[
1557    * ABCDEFGHIJKLMNOP
1558    * ]]></a>
1559    */
1560   const char text[]
1561       = "\0<\0?\0x\0m\0l\0 "
1562         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1563         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1564         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1565       /* 64 characters per line */
1566       /* clang-format off */
1567         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1568         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1569         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1570         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1571         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1572         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1573         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1574         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1575         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1576         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1577         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1578         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1579         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1580         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1581         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1582         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1583         A_TO_P_IN_UTF16
1584         /* clang-format on */
1585         "\0]\0]\0>\0<\0/\0a\0>";
1586   const XML_Char *expected =
1587       /* clang-format off */
1588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604         XCS("ABCDEFGHIJKLMNOP");
1605   /* clang-format on */
1606   CharData storage;
1607   void *buffer;
1608 
1609   CharData_Init(&storage);
1610   XML_SetUserData(g_parser, &storage);
1611   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1613   if (buffer == NULL)
1614     fail("Could not allocate parse buffer");
1615   assert(buffer != NULL);
1616   memcpy(buffer, text, sizeof(text) - 1);
1617   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1618     xml_failure(g_parser);
1619   CharData_CheckXMLChars(&storage, expected);
1620 }
1621 END_TEST
1622 
1623 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1624 START_TEST(test_multichar_cdata_utf16) {
1625   /* Test data is:
1626    *   <?xml version='1.0' encoding='utf-16'?>
1627    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1628    *
1629    * where {MINIM} is U+1d15e (a minim or half-note)
1630    *   UTF-16: 0xd834 0xdd5e
1631    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1632    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1633    *   UTF-16: 0xd834 0xdd5f
1634    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1635    */
1636   const char text[] = "\0<\0?\0x\0m\0l\0"
1637                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1638                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1639                       "1\0"
1640                       "6\0'"
1641                       "\0?\0>\0\n"
1642                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1643                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1644                       "\0]\0]\0>\0<\0/\0a\0>";
1645 #ifdef XML_UNICODE
1646   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1647 #else
1648   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1649 #endif
1650   CharData storage;
1651 
1652   CharData_Init(&storage);
1653   XML_SetUserData(g_parser, &storage);
1654   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1655 
1656   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1657       == XML_STATUS_ERROR)
1658     xml_failure(g_parser);
1659   CharData_CheckXMLChars(&storage, expected);
1660 }
1661 END_TEST
1662 
1663 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1664 START_TEST(test_utf16_bad_surrogate_pair) {
1665   /* Test data is:
1666    *   <?xml version='1.0' encoding='utf-16'?>
1667    *   <a><![CDATA[{BADLINB}]]></a>
1668    *
1669    * where {BADLINB} is U+10000 (the first Linear B character)
1670    * with the UTF-16 surrogate pair in the wrong order, i.e.
1671    *   0xdc00 0xd800
1672    */
1673   const char text[] = "\0<\0?\0x\0m\0l\0"
1674                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1675                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1676                       "1\0"
1677                       "6\0'"
1678                       "\0?\0>\0\n"
1679                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1680                       "\xdc\x00\xd8\x00"
1681                       "\0]\0]\0>\0<\0/\0a\0>";
1682 
1683   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1684       != XML_STATUS_ERROR)
1685     fail("Reversed UTF-16 surrogate pair not faulted");
1686   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1687     xml_failure(g_parser);
1688 }
1689 END_TEST
1690 
START_TEST(test_bad_cdata)1691 START_TEST(test_bad_cdata) {
1692   struct CaseData {
1693     const char *text;
1694     enum XML_Error expectedError;
1695   };
1696 
1697   struct CaseData cases[]
1698       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1699          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1700          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1701          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1702          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1703          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1704          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1705          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1706 
1707          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1708          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710 
1711          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1712          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1713          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1715          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1716          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1717          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1718 
1719          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1720          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1722 
1723   size_t i = 0;
1724   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1725     set_subtest("%s", cases[i].text);
1726     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1727         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1728     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1729 
1730     assert(actualStatus == XML_STATUS_ERROR);
1731 
1732     if (actualError != cases[i].expectedError) {
1733       char message[100];
1734       snprintf(message, sizeof(message),
1735                "Expected error %d but got error %d for case %u: \"%s\"\n",
1736                cases[i].expectedError, actualError, (unsigned int)i + 1,
1737                cases[i].text);
1738       fail(message);
1739     }
1740 
1741     XML_ParserReset(g_parser, NULL);
1742   }
1743 }
1744 END_TEST
1745 
1746 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1747 START_TEST(test_bad_cdata_utf16) {
1748   struct CaseData {
1749     size_t text_bytes;
1750     const char *text;
1751     enum XML_Error expected_error;
1752   };
1753 
1754   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1755                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1756                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1757                         "1\0"
1758                         "6\0'"
1759                         "\0?\0>\0\n"
1760                         "\0<\0a\0>";
1761   struct CaseData cases[] = {
1762       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1763       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1764       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1765       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1766       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1767       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1768       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1769       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1770       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1771       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1772       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1773       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1774       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1775       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1776       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1777       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1778       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1779       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1780       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782       /* Now add a four-byte UTF-16 character */
1783       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1784        XML_ERROR_UNCLOSED_CDATA_SECTION},
1785       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1786       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1787        XML_ERROR_PARTIAL_CHAR},
1788       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1789        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1790   size_t i;
1791 
1792   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1793     set_subtest("case %lu", (long unsigned)(i + 1));
1794     enum XML_Status actual_status;
1795     enum XML_Error actual_error;
1796 
1797     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1798                                 XML_FALSE)
1799         == XML_STATUS_ERROR)
1800       xml_failure(g_parser);
1801     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1802                                             (int)cases[i].text_bytes, XML_TRUE);
1803     assert(actual_status == XML_STATUS_ERROR);
1804     actual_error = XML_GetErrorCode(g_parser);
1805     if (actual_error != cases[i].expected_error) {
1806       char message[1024];
1807 
1808       snprintf(message, sizeof(message),
1809                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1810                ") for case %lu\n",
1811                cases[i].expected_error,
1812                XML_ErrorString(cases[i].expected_error), actual_error,
1813                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1814       fail(message);
1815     }
1816     XML_ParserReset(g_parser, NULL);
1817   }
1818 }
1819 END_TEST
1820 
1821 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1822 START_TEST(test_stop_parser_between_cdata_calls) {
1823   const char *text = long_cdata_text;
1824 
1825   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1826   g_resumable = XML_FALSE;
1827   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1828 }
1829 END_TEST
1830 
1831 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1832 START_TEST(test_suspend_parser_between_cdata_calls) {
1833   const char *text = long_cdata_text;
1834   enum XML_Status result;
1835 
1836   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1837   g_resumable = XML_TRUE;
1838   result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1839   if (result != XML_STATUS_SUSPENDED) {
1840     if (result == XML_STATUS_ERROR)
1841       xml_failure(g_parser);
1842     fail("Parse not suspended in CDATA handler");
1843   }
1844   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1845     xml_failure(g_parser);
1846 }
1847 END_TEST
1848 
1849 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1850 START_TEST(test_memory_allocation) {
1851   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1852   char *p;
1853 
1854   if (buffer == NULL) {
1855     fail("Allocation failed");
1856   } else {
1857     /* Try writing to memory; some OSes try to cheat! */
1858     buffer[0] = 'T';
1859     buffer[1] = 'E';
1860     buffer[2] = 'S';
1861     buffer[3] = 'T';
1862     buffer[4] = '\0';
1863     if (strcmp(buffer, "TEST") != 0) {
1864       fail("Memory not writable");
1865     } else {
1866       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1867       if (p == NULL) {
1868         fail("Reallocation failed");
1869       } else {
1870         /* Write again, just to be sure */
1871         buffer = p;
1872         buffer[0] = 'V';
1873         if (strcmp(buffer, "VEST") != 0) {
1874           fail("Reallocated memory not writable");
1875         }
1876       }
1877     }
1878     XML_MemFree(g_parser, buffer);
1879   }
1880 }
1881 END_TEST
1882 
1883 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1884 START_TEST(test_default_current) {
1885   const char *text = "<doc>hell]</doc>";
1886   const char *entity_text = "<!DOCTYPE doc [\n"
1887                             "<!ENTITY entity '&#37;'>\n"
1888                             "]>\n"
1889                             "<doc>&entity;</doc>";
1890 
1891   set_subtest("with defaulting");
1892   {
1893     struct handler_record_list storage;
1894     storage.count = 0;
1895     XML_SetDefaultHandler(g_parser, record_default_handler);
1896     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1897     XML_SetUserData(g_parser, &storage);
1898     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1899         == XML_STATUS_ERROR)
1900       xml_failure(g_parser);
1901     int i = 0;
1902     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1903     // we should have gotten one or more cdata callbacks, totaling 5 chars
1904     int cdata_len_remaining = 5;
1905     while (cdata_len_remaining > 0) {
1906       const struct handler_record_entry *c_entry
1907           = handler_record_get(&storage, i++);
1908       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1909       assert_true(c_entry->arg > 0);
1910       assert_true(c_entry->arg <= cdata_len_remaining);
1911       cdata_len_remaining -= c_entry->arg;
1912       // default handler must follow, with the exact same len argument.
1913       assert_record_handler_called(&storage, i++, "record_default_handler",
1914                                    c_entry->arg);
1915     }
1916     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1917     assert_true(storage.count == i);
1918   }
1919 
1920   /* Again, without the defaulting */
1921   set_subtest("no defaulting");
1922   {
1923     struct handler_record_list storage;
1924     storage.count = 0;
1925     XML_ParserReset(g_parser, NULL);
1926     XML_SetDefaultHandler(g_parser, record_default_handler);
1927     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1928     XML_SetUserData(g_parser, &storage);
1929     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1930         == XML_STATUS_ERROR)
1931       xml_failure(g_parser);
1932     int i = 0;
1933     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1934     // we should have gotten one or more cdata callbacks, totaling 5 chars
1935     int cdata_len_remaining = 5;
1936     while (cdata_len_remaining > 0) {
1937       const struct handler_record_entry *c_entry
1938           = handler_record_get(&storage, i++);
1939       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1940       assert_true(c_entry->arg > 0);
1941       assert_true(c_entry->arg <= cdata_len_remaining);
1942       cdata_len_remaining -= c_entry->arg;
1943     }
1944     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1945     assert_true(storage.count == i);
1946   }
1947 
1948   /* Now with an internal entity to complicate matters */
1949   set_subtest("with internal entity");
1950   {
1951     struct handler_record_list storage;
1952     storage.count = 0;
1953     XML_ParserReset(g_parser, NULL);
1954     XML_SetDefaultHandler(g_parser, record_default_handler);
1955     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1956     XML_SetUserData(g_parser, &storage);
1957     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1958                                 XML_TRUE)
1959         == XML_STATUS_ERROR)
1960       xml_failure(g_parser);
1961     /* The default handler suppresses the entity */
1962     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1963     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1964     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1965     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1966     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1967     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1968     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1969     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1970     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1971     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1972     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1973     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1974     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1975     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1976     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1977     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1978     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1979     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1980     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1981     assert_true(storage.count == 19);
1982   }
1983 
1984   /* Again, with a skip handler */
1985   set_subtest("with skip handler");
1986   {
1987     struct handler_record_list storage;
1988     storage.count = 0;
1989     XML_ParserReset(g_parser, NULL);
1990     XML_SetDefaultHandler(g_parser, record_default_handler);
1991     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1992     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1993     XML_SetUserData(g_parser, &storage);
1994     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1995                                 XML_TRUE)
1996         == XML_STATUS_ERROR)
1997       xml_failure(g_parser);
1998     /* The default handler suppresses the entity */
1999     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2000     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2001     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2002     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2003     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2004     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2005     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2006     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2007     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2008     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2009     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2010     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2011     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2012     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2013     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2014     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2015     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2016     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2017     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2018     assert_true(storage.count == 19);
2019   }
2020 
2021   /* This time, allow the entity through */
2022   set_subtest("allow entity");
2023   {
2024     struct handler_record_list storage;
2025     storage.count = 0;
2026     XML_ParserReset(g_parser, NULL);
2027     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2028     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2029     XML_SetUserData(g_parser, &storage);
2030     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2031                                 XML_TRUE)
2032         == XML_STATUS_ERROR)
2033       xml_failure(g_parser);
2034     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2035     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2036     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2037     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2038     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2039     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2040     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2041     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2042     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2043     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2044     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2045     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2046     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2047     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2048     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2049     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2050     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2051     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2052     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2053     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2054     assert_true(storage.count == 20);
2055   }
2056 
2057   /* Finally, without passing the cdata to the default handler */
2058   set_subtest("not passing cdata");
2059   {
2060     struct handler_record_list storage;
2061     storage.count = 0;
2062     XML_ParserReset(g_parser, NULL);
2063     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2064     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2065     XML_SetUserData(g_parser, &storage);
2066     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2067                                 XML_TRUE)
2068         == XML_STATUS_ERROR)
2069       xml_failure(g_parser);
2070     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2071     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2072     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2073     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2074     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2075     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2076     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2077     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2078     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2079     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2080     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2081     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2082     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2083     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2084     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2085     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2086     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2087     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2088                                  1);
2089     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2090     assert_true(storage.count == 19);
2091   }
2092 }
2093 END_TEST
2094 
2095 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2096 START_TEST(test_dtd_elements) {
2097   const char *text = "<!DOCTYPE doc [\n"
2098                      "<!ELEMENT doc (chapter)>\n"
2099                      "<!ELEMENT chapter (#PCDATA)>\n"
2100                      "]>\n"
2101                      "<doc><chapter>Wombats are go</chapter></doc>";
2102 
2103   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2104   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2105       == XML_STATUS_ERROR)
2106     xml_failure(g_parser);
2107 }
2108 END_TEST
2109 
2110 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2111 element_decl_check_model(void *userData, const XML_Char *name,
2112                          XML_Content *model) {
2113   UNUSED_P(userData);
2114   uint32_t errorFlags = 0;
2115 
2116   /* Expected model array structure is this:
2117    * [0] (type 6, quant 0)
2118    *   [1] (type 5, quant 0)
2119    *     [3] (type 4, quant 0, name "bar")
2120    *     [4] (type 4, quant 0, name "foo")
2121    *     [5] (type 4, quant 3, name "xyz")
2122    *   [2] (type 4, quant 2, name "zebra")
2123    */
2124   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2125   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2126 
2127   if (model != NULL) {
2128     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2129     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2130     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2131     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2132     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2133 
2134     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2135     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2136     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2137     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2138     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2139 
2140     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2141     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2142     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2143     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2144     errorFlags
2145         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2146 
2147     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2148     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2149     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2150     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2151     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2152 
2153     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2154     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2155     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2156     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2157     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2158 
2159     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2160     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2161     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2162     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2163     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2164   }
2165 
2166   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2167   XML_FreeContentModel(g_parser, model);
2168 }
2169 
START_TEST(test_dtd_elements_nesting)2170 START_TEST(test_dtd_elements_nesting) {
2171   // Payload inspired by a test in Perl's XML::Parser
2172   const char *text = "<!DOCTYPE foo [\n"
2173                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2174                      "]>\n"
2175                      "<foo/>";
2176 
2177   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2178 
2179   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2180   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2181       == XML_STATUS_ERROR)
2182     xml_failure(g_parser);
2183 
2184   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2185     fail("Element declaration model regression detected");
2186 }
2187 END_TEST
2188 
2189 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2190 START_TEST(test_set_foreign_dtd) {
2191   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2192   const char *text2 = "<doc>&entity;</doc>";
2193   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2194 
2195   /* Check hash salt is passed through too */
2196   XML_SetHashSalt(g_parser, 0x12345678);
2197   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2198   XML_SetUserData(g_parser, &test_data);
2199   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2200   /* Add a default handler to exercise more code paths */
2201   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2202   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2203     fail("Could not set foreign DTD");
2204   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2205       == XML_STATUS_ERROR)
2206     xml_failure(g_parser);
2207 
2208   /* Ensure that trying to set the DTD after parsing has started
2209    * is faulted, even if it's the same setting.
2210    */
2211   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2212       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2213     fail("Failed to reject late foreign DTD setting");
2214   /* Ditto for the hash salt */
2215   if (XML_SetHashSalt(g_parser, 0x23456789))
2216     fail("Failed to reject late hash salt change");
2217 
2218   /* Now finish the parse */
2219   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2220       == XML_STATUS_ERROR)
2221     xml_failure(g_parser);
2222 }
2223 END_TEST
2224 
2225 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2226 START_TEST(test_foreign_dtd_not_standalone) {
2227   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2228                      "<doc>&entity;</doc>";
2229   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2230 
2231   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2232   XML_SetUserData(g_parser, &test_data);
2233   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2234   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2235   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2236     fail("Could not set foreign DTD");
2237   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2238                  "NotStandalonehandler failed to reject");
2239 }
2240 END_TEST
2241 
2242 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2243 START_TEST(test_invalid_foreign_dtd) {
2244   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2245                      "<doc>&entity;</doc>";
2246   ExtFaults test_data
2247       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2248 
2249   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2250   XML_SetUserData(g_parser, &test_data);
2251   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2252   XML_UseForeignDTD(g_parser, XML_TRUE);
2253   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2254                  "Bad DTD should not have been accepted");
2255 }
2256 END_TEST
2257 
2258 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2259 START_TEST(test_foreign_dtd_with_doctype) {
2260   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2261                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2262   const char *text2 = "<doc>&entity;</doc>";
2263   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2264 
2265   /* Check hash salt is passed through too */
2266   XML_SetHashSalt(g_parser, 0x12345678);
2267   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2268   XML_SetUserData(g_parser, &test_data);
2269   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2270   /* Add a default handler to exercise more code paths */
2271   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2272   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2273     fail("Could not set foreign DTD");
2274   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2275       == XML_STATUS_ERROR)
2276     xml_failure(g_parser);
2277 
2278   /* Ensure that trying to set the DTD after parsing has started
2279    * is faulted, even if it's the same setting.
2280    */
2281   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2282       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2283     fail("Failed to reject late foreign DTD setting");
2284   /* Ditto for the hash salt */
2285   if (XML_SetHashSalt(g_parser, 0x23456789))
2286     fail("Failed to reject late hash salt change");
2287 
2288   /* Now finish the parse */
2289   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2290       == XML_STATUS_ERROR)
2291     xml_failure(g_parser);
2292 }
2293 END_TEST
2294 
2295 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2296 START_TEST(test_foreign_dtd_without_external_subset) {
2297   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2298                      "<doc>&foo;</doc>";
2299 
2300   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2301   XML_SetUserData(g_parser, NULL);
2302   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2303   XML_UseForeignDTD(g_parser, XML_TRUE);
2304   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2305       == XML_STATUS_ERROR)
2306     xml_failure(g_parser);
2307 }
2308 END_TEST
2309 
START_TEST(test_empty_foreign_dtd)2310 START_TEST(test_empty_foreign_dtd) {
2311   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2312                      "<doc>&entity;</doc>";
2313 
2314   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2315   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2316   XML_UseForeignDTD(g_parser, XML_TRUE);
2317   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2318                  "Undefined entity not faulted");
2319 }
2320 END_TEST
2321 
2322 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2323 START_TEST(test_set_base) {
2324   const XML_Char *old_base;
2325   const XML_Char *new_base = XCS("/local/file/name.xml");
2326 
2327   old_base = XML_GetBase(g_parser);
2328   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2329     fail("Unable to set base");
2330   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2331     fail("Base setting not correct");
2332   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2333     fail("Unable to NULL base");
2334   if (XML_GetBase(g_parser) != NULL)
2335     fail("Base setting not nulled");
2336   XML_SetBase(g_parser, old_base);
2337 }
2338 END_TEST
2339 
2340 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2341 START_TEST(test_attributes) {
2342   const char *text = "<!DOCTYPE doc [\n"
2343                      "<!ELEMENT doc (tag)>\n"
2344                      "<!ATTLIST doc id ID #REQUIRED>\n"
2345                      "]>"
2346                      "<doc a='1' id='one' b='2'>"
2347                      "<tag c='3'/>"
2348                      "</doc>";
2349   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2350                          {XCS("b"), XCS("2")},
2351                          {XCS("id"), XCS("one")},
2352                          {NULL, NULL}};
2353   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2354   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2355                         {XCS("tag"), 1, NULL, NULL},
2356                         {NULL, 0, NULL, NULL}};
2357   info[0].attributes = doc_info;
2358   info[1].attributes = tag_info;
2359 
2360   XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2361   XML_SetUserData(g_parser, info);
2362   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2363       == XML_STATUS_ERROR)
2364     xml_failure(g_parser);
2365 }
2366 END_TEST
2367 
2368 /* Test reset works correctly in the middle of processing an internal
2369  * entity.  Exercises some obscure code in XML_ParserReset().
2370  */
START_TEST(test_reset_in_entity)2371 START_TEST(test_reset_in_entity) {
2372   const char *text = "<!DOCTYPE doc [\n"
2373                      "<!ENTITY wombat 'wom'>\n"
2374                      "<!ENTITY entity 'hi &wom; there'>\n"
2375                      "]>\n"
2376                      "<doc>&entity;</doc>";
2377   XML_ParsingStatus status;
2378 
2379   g_resumable = XML_TRUE;
2380   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2381   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2382       == XML_STATUS_ERROR)
2383     xml_failure(g_parser);
2384   XML_GetParsingStatus(g_parser, &status);
2385   if (status.parsing != XML_SUSPENDED)
2386     fail("Parsing status not SUSPENDED");
2387   XML_ParserReset(g_parser, NULL);
2388   XML_GetParsingStatus(g_parser, &status);
2389   if (status.parsing != XML_INITIALIZED)
2390     fail("Parsing status doesn't reset to INITIALIZED");
2391 }
2392 END_TEST
2393 
2394 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2395 START_TEST(test_resume_invalid_parse) {
2396   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2397 
2398   g_resumable = XML_TRUE;
2399   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2400   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2401       == XML_STATUS_ERROR)
2402     xml_failure(g_parser);
2403   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2404     fail("Resumed invalid parse not faulted");
2405   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2406     fail("Invalid parse not correctly faulted");
2407 }
2408 END_TEST
2409 
2410 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2411 START_TEST(test_resume_resuspended) {
2412   const char *text = "<doc>Hello<meep/>world</doc>";
2413 
2414   g_resumable = XML_TRUE;
2415   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2416   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2417       == XML_STATUS_ERROR)
2418     xml_failure(g_parser);
2419   g_resumable = XML_TRUE;
2420   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2421   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2422     fail("Resumption not suspended");
2423   /* This one should succeed and finish up */
2424   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2425     xml_failure(g_parser);
2426 }
2427 END_TEST
2428 
2429 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2430 START_TEST(test_cdata_default) {
2431   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2432   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2433   CharData storage;
2434 
2435   CharData_Init(&storage);
2436   XML_SetUserData(g_parser, &storage);
2437   XML_SetDefaultHandler(g_parser, accumulate_characters);
2438 
2439   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2440       == XML_STATUS_ERROR)
2441     xml_failure(g_parser);
2442   CharData_CheckXMLChars(&storage, expected);
2443 }
2444 END_TEST
2445 
2446 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2447 START_TEST(test_subordinate_reset) {
2448   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2449                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2450                      "<doc>&entity;</doc>";
2451 
2452   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2453   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2454   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2455       == XML_STATUS_ERROR)
2456     xml_failure(g_parser);
2457 }
2458 END_TEST
2459 
2460 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2461 START_TEST(test_subordinate_suspend) {
2462   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2463                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2464                      "<doc>&entity;</doc>";
2465 
2466   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2467   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2468   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2469       == XML_STATUS_ERROR)
2470     xml_failure(g_parser);
2471 }
2472 END_TEST
2473 
2474 /* Test suspending a subordinate parser from an XML declaration */
2475 /* Increases code coverage of the tests */
2476 
START_TEST(test_subordinate_xdecl_suspend)2477 START_TEST(test_subordinate_xdecl_suspend) {
2478   const char *text
2479       = "<!DOCTYPE doc [\n"
2480         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2481         "]>\n"
2482         "<doc>&entity;</doc>";
2483 
2484   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2485   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2486   g_resumable = XML_TRUE;
2487   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2488       == XML_STATUS_ERROR)
2489     xml_failure(g_parser);
2490 }
2491 END_TEST
2492 
START_TEST(test_subordinate_xdecl_abort)2493 START_TEST(test_subordinate_xdecl_abort) {
2494   const char *text
2495       = "<!DOCTYPE doc [\n"
2496         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2497         "]>\n"
2498         "<doc>&entity;</doc>";
2499 
2500   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2501   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2502   g_resumable = XML_FALSE;
2503   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2504       == XML_STATUS_ERROR)
2505     xml_failure(g_parser);
2506 }
2507 END_TEST
2508 
2509 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2510 START_TEST(test_ext_entity_invalid_suspended_parse) {
2511   const char *text = "<!DOCTYPE doc [\n"
2512                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2513                      "]>\n"
2514                      "<doc>&en;</doc>";
2515   ExtFaults faults[]
2516       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2517           "Incomplete element declaration not faulted", NULL,
2518           XML_ERROR_UNCLOSED_TOKEN},
2519          {/* First two bytes of a three-byte char */
2520           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2521           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2522          {NULL, NULL, NULL, XML_ERROR_NONE}};
2523   ExtFaults *fault;
2524 
2525   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2526     set_subtest("%s", fault->parse_text);
2527     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2528     XML_SetExternalEntityRefHandler(g_parser,
2529                                     external_entity_suspending_faulter);
2530     XML_SetUserData(g_parser, fault);
2531     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2532                    "Parser did not report external entity error");
2533     XML_ParserReset(g_parser, NULL);
2534   }
2535 }
2536 END_TEST
2537 
2538 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2539 START_TEST(test_explicit_encoding) {
2540   const char *text1 = "<doc>Hello ";
2541   const char *text2 = " World</doc>";
2542 
2543   /* Just check that we can set the encoding to NULL before starting */
2544   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2545     fail("Failed to initialise encoding to NULL");
2546   /* Say we are UTF-8 */
2547   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2548     fail("Failed to set explicit encoding");
2549   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2550       == XML_STATUS_ERROR)
2551     xml_failure(g_parser);
2552   /* Try to switch encodings mid-parse */
2553   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2554     fail("Allowed encoding change");
2555   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2556       == XML_STATUS_ERROR)
2557     xml_failure(g_parser);
2558   /* Try now the parse is over */
2559   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2560     fail("Failed to unset encoding");
2561 }
2562 END_TEST
2563 
2564 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2565 START_TEST(test_trailing_cr) {
2566   const char *text = "<doc>\r";
2567   int found_cr;
2568 
2569   /* Try with a character handler, for code coverage */
2570   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2571   XML_SetUserData(g_parser, &found_cr);
2572   found_cr = 0;
2573   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2574       == XML_STATUS_OK)
2575     fail("Failed to fault unclosed doc");
2576   if (found_cr == 0)
2577     fail("Did not catch the carriage return");
2578   XML_ParserReset(g_parser, NULL);
2579 
2580   /* Now with a default handler instead */
2581   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2582   XML_SetUserData(g_parser, &found_cr);
2583   found_cr = 0;
2584   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2585       == XML_STATUS_OK)
2586     fail("Failed to fault unclosed doc");
2587   if (found_cr == 0)
2588     fail("Did not catch default carriage return");
2589 }
2590 END_TEST
2591 
2592 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2593 START_TEST(test_ext_entity_trailing_cr) {
2594   const char *text = "<!DOCTYPE doc [\n"
2595                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2596                      "]>\n"
2597                      "<doc>&en;</doc>";
2598   int found_cr;
2599 
2600   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2601   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2602   XML_SetUserData(g_parser, &found_cr);
2603   found_cr = 0;
2604   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2605       != XML_STATUS_OK)
2606     xml_failure(g_parser);
2607   if (found_cr == 0)
2608     fail("No carriage return found");
2609   XML_ParserReset(g_parser, NULL);
2610 
2611   /* Try again with a different trailing CR */
2612   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2613   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2614   XML_SetUserData(g_parser, &found_cr);
2615   found_cr = 0;
2616   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2617       != XML_STATUS_OK)
2618     xml_failure(g_parser);
2619   if (found_cr == 0)
2620     fail("No carriage return found");
2621 }
2622 END_TEST
2623 
2624 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2625 START_TEST(test_trailing_rsqb) {
2626   const char *text8 = "<doc>]";
2627   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2628   int found_rsqb;
2629   int text8_len = (int)strlen(text8);
2630 
2631   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2632   XML_SetUserData(g_parser, &found_rsqb);
2633   found_rsqb = 0;
2634   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2635       == XML_STATUS_OK)
2636     fail("Failed to fault unclosed doc");
2637   if (found_rsqb == 0)
2638     fail("Did not catch the right square bracket");
2639 
2640   /* Try again with a different encoding */
2641   XML_ParserReset(g_parser, NULL);
2642   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2643   XML_SetUserData(g_parser, &found_rsqb);
2644   found_rsqb = 0;
2645   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2646                               XML_TRUE)
2647       == XML_STATUS_OK)
2648     fail("Failed to fault unclosed doc");
2649   if (found_rsqb == 0)
2650     fail("Did not catch the right square bracket");
2651 
2652   /* And finally with a default handler */
2653   XML_ParserReset(g_parser, NULL);
2654   XML_SetDefaultHandler(g_parser, rsqb_handler);
2655   XML_SetUserData(g_parser, &found_rsqb);
2656   found_rsqb = 0;
2657   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2658                               XML_TRUE)
2659       == XML_STATUS_OK)
2660     fail("Failed to fault unclosed doc");
2661   if (found_rsqb == 0)
2662     fail("Did not catch the right square bracket");
2663 }
2664 END_TEST
2665 
2666 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2667 START_TEST(test_ext_entity_trailing_rsqb) {
2668   const char *text = "<!DOCTYPE doc [\n"
2669                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2670                      "]>\n"
2671                      "<doc>&en;</doc>";
2672   int found_rsqb;
2673 
2674   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2675   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2676   XML_SetUserData(g_parser, &found_rsqb);
2677   found_rsqb = 0;
2678   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2679       != XML_STATUS_OK)
2680     xml_failure(g_parser);
2681   if (found_rsqb == 0)
2682     fail("No right square bracket found");
2683 }
2684 END_TEST
2685 
2686 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2687 START_TEST(test_ext_entity_good_cdata) {
2688   const char *text = "<!DOCTYPE doc [\n"
2689                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2690                      "]>\n"
2691                      "<doc>&en;</doc>";
2692 
2693   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2694   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2695   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2696       != XML_STATUS_OK)
2697     xml_failure(g_parser);
2698 }
2699 END_TEST
2700 
2701 /* Test user parameter settings */
START_TEST(test_user_parameters)2702 START_TEST(test_user_parameters) {
2703   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2704                      "<!-- Primary parse -->\n"
2705                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2706                      "<doc>&entity;";
2707   const char *epilog = "<!-- Back to primary parser -->\n"
2708                        "</doc>";
2709 
2710   g_comment_count = 0;
2711   g_skip_count = 0;
2712   g_xdecl_count = 0;
2713   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2714   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2715   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2716   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2717   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2718   XML_UseParserAsHandlerArg(g_parser);
2719   XML_SetUserData(g_parser, (void *)1);
2720   g_handler_data = g_parser;
2721   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2722       == XML_STATUS_ERROR)
2723     xml_failure(g_parser);
2724   /* Ensure we can't change policy mid-parse */
2725   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2726     fail("Changed param entity parsing policy while parsing");
2727   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2728       == XML_STATUS_ERROR)
2729     xml_failure(g_parser);
2730   if (g_comment_count != 3)
2731     fail("Comment handler not invoked enough times");
2732   if (g_skip_count != 1)
2733     fail("Skip handler not invoked enough times");
2734   if (g_xdecl_count != 1)
2735     fail("XML declaration handler not invoked");
2736 }
2737 END_TEST
2738 
2739 /* Test that an explicit external entity handler argument replaces
2740  * the parser as the first argument.
2741  *
2742  * We do not call the first parameter to the external entity handler
2743  * 'parser' for once, since the first time the handler is called it
2744  * will actually be a text string.  We need to be able to access the
2745  * global 'parser' variable to create our external entity parser from,
2746  * since there are code paths we need to ensure get executed.
2747  */
START_TEST(test_ext_entity_ref_parameter)2748 START_TEST(test_ext_entity_ref_parameter) {
2749   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2750                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2751                      "<doc>&entity;</doc>";
2752 
2753   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2754   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2755   /* Set a handler arg that is not NULL and not parser (which is
2756    * what NULL would cause to be passed.
2757    */
2758   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2759   g_handler_data = text;
2760   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2761       == XML_STATUS_ERROR)
2762     xml_failure(g_parser);
2763 
2764   /* Now try again with unset args */
2765   XML_ParserReset(g_parser, NULL);
2766   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2767   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2768   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2769   g_handler_data = g_parser;
2770   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2771       == XML_STATUS_ERROR)
2772     xml_failure(g_parser);
2773 }
2774 END_TEST
2775 
2776 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2777 START_TEST(test_empty_parse) {
2778   const char *text = "<doc></doc>";
2779   const char *partial = "<doc>";
2780 
2781   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2782     fail("Parsing empty string faulted");
2783   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2784     fail("Parsing final empty string not faulted");
2785   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2786     fail("Parsing final empty string faulted for wrong reason");
2787 
2788   /* Now try with valid text before the empty end */
2789   XML_ParserReset(g_parser, NULL);
2790   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2791       == XML_STATUS_ERROR)
2792     xml_failure(g_parser);
2793   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2794     fail("Parsing final empty string faulted");
2795 
2796   /* Now try with invalid text before the empty end */
2797   XML_ParserReset(g_parser, NULL);
2798   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2799                               XML_FALSE)
2800       == XML_STATUS_ERROR)
2801     xml_failure(g_parser);
2802   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2803     fail("Parsing final incomplete empty string not faulted");
2804 }
2805 END_TEST
2806 
2807 /* Test odd corners of the XML_GetBuffer interface */
2808 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2809 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2810   const XML_Feature *feature = XML_GetFeatureList();
2811 
2812   if (feature == NULL)
2813     return XML_STATUS_ERROR;
2814   for (; feature->feature != XML_FEATURE_END; feature++) {
2815     if (feature->feature == feature_id) {
2816       *presult = feature->value;
2817       return XML_STATUS_OK;
2818     }
2819   }
2820   return XML_STATUS_ERROR;
2821 }
2822 
2823 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)2824 START_TEST(test_get_buffer_1) {
2825   const char *text = get_buffer_test_text;
2826   void *buffer;
2827   long context_bytes;
2828 
2829   /* Attempt to allocate a negative length buffer */
2830   if (XML_GetBuffer(g_parser, -12) != NULL)
2831     fail("Negative length buffer not failed");
2832 
2833   /* Now get a small buffer and extend it past valid length */
2834   buffer = XML_GetBuffer(g_parser, 1536);
2835   if (buffer == NULL)
2836     fail("1.5K buffer failed");
2837   assert(buffer != NULL);
2838   memcpy(buffer, text, strlen(text));
2839   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2840       == XML_STATUS_ERROR)
2841     xml_failure(g_parser);
2842   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2843     fail("INT_MAX buffer not failed");
2844 
2845   /* Now try extending it a more reasonable but still too large
2846    * amount.  The allocator in XML_GetBuffer() doubles the buffer
2847    * size until it exceeds the requested amount or INT_MAX.  If it
2848    * exceeds INT_MAX, it rejects the request, so we want a request
2849    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2850    * with an extra byte just to ensure that the request is off any
2851    * boundary.  The request will be inflated internally by
2852    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2853    * request.
2854    */
2855   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2856     context_bytes = 0;
2857   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2858     fail("INT_MAX- buffer not failed");
2859 
2860   /* Now try extending it a carefully crafted amount */
2861   if (XML_GetBuffer(g_parser, 1000) == NULL)
2862     fail("1000 buffer failed");
2863 }
2864 END_TEST
2865 
2866 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)2867 START_TEST(test_get_buffer_2) {
2868   const char *text = get_buffer_test_text;
2869   void *buffer;
2870 
2871   /* Now get a decent buffer */
2872   buffer = XML_GetBuffer(g_parser, 1536);
2873   if (buffer == NULL)
2874     fail("1.5K buffer failed");
2875   assert(buffer != NULL);
2876   memcpy(buffer, text, strlen(text));
2877   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2878       == XML_STATUS_ERROR)
2879     xml_failure(g_parser);
2880 
2881   /* Extend it, to catch a different code path */
2882   if (XML_GetBuffer(g_parser, 1024) == NULL)
2883     fail("1024 buffer failed");
2884 }
2885 END_TEST
2886 
2887 /* Test for signed integer overflow CVE-2022-23852 */
2888 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)2889 START_TEST(test_get_buffer_3_overflow) {
2890   XML_Parser parser = XML_ParserCreate(NULL);
2891   assert(parser != NULL);
2892 
2893   const char *const text = "\n";
2894   const int expectedKeepValue = (int)strlen(text);
2895 
2896   // After this call, variable "keep" in XML_GetBuffer will
2897   // have value expectedKeepValue
2898   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2899                               XML_FALSE /* isFinal */)
2900       == XML_STATUS_ERROR)
2901     xml_failure(parser);
2902 
2903   assert(expectedKeepValue > 0);
2904   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2905     fail("enlarging buffer not failed");
2906 
2907   XML_ParserFree(parser);
2908 }
2909 END_TEST
2910 #endif // XML_CONTEXT_BYTES > 0
2911 
START_TEST(test_buffer_can_grow_to_max)2912 START_TEST(test_buffer_can_grow_to_max) {
2913   const char *const prefixes[] = {
2914       "",
2915       "<",
2916       "<x a='",
2917       "<doc><x a='",
2918       "<document><x a='",
2919       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2920       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2921       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2922       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2923       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2924   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2925   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2926 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2927   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2928   // Can we make a big allocation?
2929   void *big = malloc(maxbuf);
2930   if (! big) {
2931     // The big allocation failed. Let's be a little lenient.
2932     maxbuf = maxbuf / 2;
2933   }
2934   free(big);
2935 #endif
2936 
2937   for (int i = 0; i < num_prefixes; ++i) {
2938     set_subtest("\"%s\"", prefixes[i]);
2939     XML_Parser parser = XML_ParserCreate(NULL);
2940     const int prefix_len = (int)strlen(prefixes[i]);
2941     const enum XML_Status s
2942         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2943     if (s != XML_STATUS_OK)
2944       xml_failure(parser);
2945 
2946     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
2947     // subtracting the whole prefix is easiest, and close enough.
2948     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
2949     // The limit should be consistent; no prefix should allow us to
2950     // reach above the max buffer size.
2951     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
2952     XML_ParserFree(parser);
2953   }
2954 }
2955 END_TEST
2956 
START_TEST(test_getbuffer_allocates_on_zero_len)2957 START_TEST(test_getbuffer_allocates_on_zero_len) {
2958   for (int first_len = 1; first_len >= 0; first_len--) {
2959     set_subtest("with len=%d first", first_len);
2960     XML_Parser parser = XML_ParserCreate(NULL);
2961     assert_true(parser != NULL);
2962     assert_true(XML_GetBuffer(parser, first_len) != NULL);
2963     assert_true(XML_GetBuffer(parser, 0) != NULL);
2964     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
2965       xml_failure(parser);
2966     XML_ParserFree(parser);
2967   }
2968 }
2969 END_TEST
2970 
2971 /* Test position information macros */
START_TEST(test_byte_info_at_end)2972 START_TEST(test_byte_info_at_end) {
2973   const char *text = "<doc></doc>";
2974 
2975   if (XML_GetCurrentByteIndex(g_parser) != -1
2976       || XML_GetCurrentByteCount(g_parser) != 0)
2977     fail("Byte index/count incorrect at start of parse");
2978   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2979       == XML_STATUS_ERROR)
2980     xml_failure(g_parser);
2981   /* At end, the count will be zero and the index the end of string */
2982   if (XML_GetCurrentByteCount(g_parser) != 0)
2983     fail("Terminal byte count incorrect");
2984   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
2985     fail("Terminal byte index incorrect");
2986 }
2987 END_TEST
2988 
2989 /* Test position information from errors */
2990 #define PRE_ERROR_STR "<doc></"
2991 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)2992 START_TEST(test_byte_info_at_error) {
2993   const char *text = PRE_ERROR_STR POST_ERROR_STR;
2994 
2995   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2996       == XML_STATUS_OK)
2997     fail("Syntax error not faulted");
2998   if (XML_GetCurrentByteCount(g_parser) != 0)
2999     fail("Error byte count incorrect");
3000   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3001     fail("Error byte index incorrect");
3002 }
3003 END_TEST
3004 #undef PRE_ERROR_STR
3005 #undef POST_ERROR_STR
3006 
3007 /* Test position information in handler */
3008 #define START_ELEMENT "<e>"
3009 #define CDATA_TEXT "Hello"
3010 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3011 START_TEST(test_byte_info_at_cdata) {
3012   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3013   int offset, size;
3014   ByteTestData data;
3015 
3016   /* Check initial context is empty */
3017   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3018     fail("Unexpected context at start of parse");
3019 
3020   data.start_element_len = (int)strlen(START_ELEMENT);
3021   data.cdata_len = (int)strlen(CDATA_TEXT);
3022   data.total_string_len = (int)strlen(text);
3023   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3024   XML_SetUserData(g_parser, &data);
3025   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3026     xml_failure(g_parser);
3027 }
3028 END_TEST
3029 #undef START_ELEMENT
3030 #undef CDATA_TEXT
3031 #undef END_ELEMENT
3032 
3033 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3034 START_TEST(test_predefined_entities) {
3035   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3036   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3037   const XML_Char *result = XCS("<>&\"'");
3038   CharData storage;
3039 
3040   XML_SetDefaultHandler(g_parser, accumulate_characters);
3041   /* run_character_check uses XML_SetCharacterDataHandler(), which
3042    * unfortunately heads off a code path that we need to exercise.
3043    */
3044   CharData_Init(&storage);
3045   XML_SetUserData(g_parser, &storage);
3046   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3047       == XML_STATUS_ERROR)
3048     xml_failure(g_parser);
3049   /* The default handler doesn't translate the entities */
3050   CharData_CheckXMLChars(&storage, expected);
3051 
3052   /* Now try again and check the translation */
3053   XML_ParserReset(g_parser, NULL);
3054   run_character_check(text, result);
3055 }
3056 END_TEST
3057 
3058 /* Regression test that an invalid tag in an external parameter
3059  * reference in an external DTD is correctly faulted.
3060  *
3061  * Only a few specific tags are legal in DTDs ignoring comments and
3062  * processing instructions, all of which begin with an exclamation
3063  * mark.  "<el/>" is not one of them, so the parser should raise an
3064  * error on encountering it.
3065  */
START_TEST(test_invalid_tag_in_dtd)3066 START_TEST(test_invalid_tag_in_dtd) {
3067   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3068                      "<doc></doc>\n";
3069 
3070   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3071   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3072   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3073                  "Invalid tag IN DTD external param not rejected");
3074 }
3075 END_TEST
3076 
3077 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3078 START_TEST(test_not_predefined_entities) {
3079   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3080                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3081   int i = 0;
3082 
3083   while (text[i] != NULL) {
3084     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3085                    "Undefined entity not rejected");
3086     XML_ParserReset(g_parser, NULL);
3087     i++;
3088   }
3089 }
3090 END_TEST
3091 
3092 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3093 START_TEST(test_ignore_section) {
3094   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3095                      "<doc><e>&entity;</e></doc>";
3096   const XML_Char *expected
3097       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3098   CharData storage;
3099 
3100   CharData_Init(&storage);
3101   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3102   XML_SetUserData(g_parser, &storage);
3103   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3104   XML_SetDefaultHandler(g_parser, accumulate_characters);
3105   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3106   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3107   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3108   XML_SetStartElementHandler(g_parser, dummy_start_element);
3109   XML_SetEndElementHandler(g_parser, dummy_end_element);
3110   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3111       == XML_STATUS_ERROR)
3112     xml_failure(g_parser);
3113   CharData_CheckXMLChars(&storage, expected);
3114 }
3115 END_TEST
3116 
START_TEST(test_ignore_section_utf16)3117 START_TEST(test_ignore_section_utf16) {
3118   const char text[] =
3119       /* <!DOCTYPE d SYSTEM 's'> */
3120       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3121       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3122       /* <d><e>&en;</e></d> */
3123       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3124   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3125   CharData storage;
3126 
3127   CharData_Init(&storage);
3128   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3129   XML_SetUserData(g_parser, &storage);
3130   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3131   XML_SetDefaultHandler(g_parser, accumulate_characters);
3132   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3133   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3134   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3135   XML_SetStartElementHandler(g_parser, dummy_start_element);
3136   XML_SetEndElementHandler(g_parser, dummy_end_element);
3137   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3138       == XML_STATUS_ERROR)
3139     xml_failure(g_parser);
3140   CharData_CheckXMLChars(&storage, expected);
3141 }
3142 END_TEST
3143 
START_TEST(test_ignore_section_utf16_be)3144 START_TEST(test_ignore_section_utf16_be) {
3145   const char text[] =
3146       /* <!DOCTYPE d SYSTEM 's'> */
3147       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3148       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3149       /* <d><e>&en;</e></d> */
3150       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3151   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3152   CharData storage;
3153 
3154   CharData_Init(&storage);
3155   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3156   XML_SetUserData(g_parser, &storage);
3157   XML_SetExternalEntityRefHandler(g_parser,
3158                                   external_entity_load_ignore_utf16_be);
3159   XML_SetDefaultHandler(g_parser, accumulate_characters);
3160   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3161   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3162   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3163   XML_SetStartElementHandler(g_parser, dummy_start_element);
3164   XML_SetEndElementHandler(g_parser, dummy_end_element);
3165   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3166       == XML_STATUS_ERROR)
3167     xml_failure(g_parser);
3168   CharData_CheckXMLChars(&storage, expected);
3169 }
3170 END_TEST
3171 
3172 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3173 START_TEST(test_bad_ignore_section) {
3174   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3175                      "<doc><e>&entity;</e></doc>";
3176   ExtFaults faults[]
3177       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3178           XML_ERROR_SYNTAX},
3179          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3180           XML_ERROR_INVALID_TOKEN},
3181          {/* FIrst two bytes of a three-byte char */
3182           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3183           XML_ERROR_PARTIAL_CHAR},
3184          {NULL, NULL, NULL, XML_ERROR_NONE}};
3185   ExtFaults *fault;
3186 
3187   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3188     set_subtest("%s", fault->parse_text);
3189     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3190     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3191     XML_SetUserData(g_parser, fault);
3192     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3193                    "Incomplete IGNORE section not failed");
3194     XML_ParserReset(g_parser, NULL);
3195   }
3196 }
3197 END_TEST
3198 
3199 struct bom_testdata {
3200   const char *external;
3201   int split;
3202   XML_Bool nested_callback_happened;
3203 };
3204 
3205 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3206 external_bom_checker(XML_Parser parser, const XML_Char *context,
3207                      const XML_Char *base, const XML_Char *systemId,
3208                      const XML_Char *publicId) {
3209   const char *text;
3210   UNUSED_P(base);
3211   UNUSED_P(systemId);
3212   UNUSED_P(publicId);
3213 
3214   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3215   if (ext_parser == NULL)
3216     fail("Could not create external entity parser");
3217 
3218   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3219     struct bom_testdata *const testdata
3220         = (struct bom_testdata *)XML_GetUserData(parser);
3221     const char *const external = testdata->external;
3222     const int split = testdata->split;
3223     testdata->nested_callback_happened = XML_TRUE;
3224 
3225     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3226         != XML_STATUS_OK) {
3227       xml_failure(ext_parser);
3228     }
3229     text = external + split; // the parse below will continue where we left off.
3230   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3231     text = "<!ELEMENT doc EMPTY>\n"
3232            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3233            "<!ENTITY % e2 '%e1;'>\n";
3234   } else {
3235     fail("unknown systemId");
3236   }
3237 
3238   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3239       != XML_STATUS_OK)
3240     xml_failure(ext_parser);
3241 
3242   XML_ParserFree(ext_parser);
3243   return XML_STATUS_OK;
3244 }
3245 
3246 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3247 START_TEST(test_external_bom_consumed) {
3248   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3249                            "<doc></doc>\n";
3250   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3251   const int len = (int)strlen(external);
3252   for (int split = 0; split <= len; ++split) {
3253     set_subtest("split at byte %d", split);
3254 
3255     struct bom_testdata testdata;
3256     testdata.external = external;
3257     testdata.split = split;
3258     testdata.nested_callback_happened = XML_FALSE;
3259 
3260     XML_Parser parser = XML_ParserCreate(NULL);
3261     if (parser == NULL) {
3262       fail("Couldn't create parser");
3263     }
3264     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3265     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3266     XML_SetUserData(parser, &testdata);
3267     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3268         == XML_STATUS_ERROR)
3269       xml_failure(parser);
3270     if (! testdata.nested_callback_happened) {
3271       fail("ref handler not called");
3272     }
3273     XML_ParserFree(parser);
3274   }
3275 }
3276 END_TEST
3277 
3278 /* Test recursive parsing */
START_TEST(test_external_entity_values)3279 START_TEST(test_external_entity_values) {
3280   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3281                      "<doc></doc>\n";
3282   ExtFaults data_004_2[] = {
3283       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3284       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3285        XML_ERROR_INVALID_TOKEN},
3286       {"'wombat", "Unterminated string not faulted", NULL,
3287        XML_ERROR_UNCLOSED_TOKEN},
3288       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3289        XML_ERROR_PARTIAL_CHAR},
3290       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3291       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3292        XML_ERROR_XML_DECL},
3293       {/* UTF-8 BOM */
3294        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3295        XML_ERROR_NONE},
3296       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3297        "Invalid token after text declaration not faulted", NULL,
3298        XML_ERROR_INVALID_TOKEN},
3299       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3300        "Unterminated string after text decl not faulted", NULL,
3301        XML_ERROR_UNCLOSED_TOKEN},
3302       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3303        "Partial UTF-8 character after text decl not faulted", NULL,
3304        XML_ERROR_PARTIAL_CHAR},
3305       {"%e1;", "Recursive parameter entity not faulted", NULL,
3306        XML_ERROR_RECURSIVE_ENTITY_REF},
3307       {NULL, NULL, NULL, XML_ERROR_NONE}};
3308   int i;
3309 
3310   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3311     set_subtest("%s", data_004_2[i].parse_text);
3312     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3313     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3314     XML_SetUserData(g_parser, &data_004_2[i]);
3315     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3316         == XML_STATUS_ERROR)
3317       xml_failure(g_parser);
3318     XML_ParserReset(g_parser, NULL);
3319   }
3320 }
3321 END_TEST
3322 
3323 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3324 START_TEST(test_ext_entity_not_standalone) {
3325   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3326                      "<doc></doc>";
3327 
3328   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3329   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3330   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3331                  "Standalone rejection not caught");
3332 }
3333 END_TEST
3334 
START_TEST(test_ext_entity_value_abort)3335 START_TEST(test_ext_entity_value_abort) {
3336   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3337                      "<doc></doc>\n";
3338 
3339   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3340   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3341   g_resumable = XML_FALSE;
3342   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3343       == XML_STATUS_ERROR)
3344     xml_failure(g_parser);
3345 }
3346 END_TEST
3347 
START_TEST(test_bad_public_doctype)3348 START_TEST(test_bad_public_doctype) {
3349   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3350                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3351                      "<doc></doc>";
3352 
3353   /* Setting a handler provokes a particular code path */
3354   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3355                             dummy_end_doctype_handler);
3356   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3357 }
3358 END_TEST
3359 
3360 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3361 START_TEST(test_attribute_enum_value) {
3362   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3363                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3364                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3365   ExtTest dtd_data
3366       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3367          "<!ELEMENT a EMPTY>\n"
3368          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3369          NULL, NULL};
3370   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3371 
3372   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3373   XML_SetUserData(g_parser, &dtd_data);
3374   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3375   /* An attribute list handler provokes a different code path */
3376   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3377   run_ext_character_check(text, &dtd_data, expected);
3378 }
3379 END_TEST
3380 
3381 /* Slightly bizarrely, the library seems to silently ignore entity
3382  * definitions for predefined entities, even when they are wrong.  The
3383  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3384  * to happen, so this is currently treated as acceptable.
3385  */
START_TEST(test_predefined_entity_redefinition)3386 START_TEST(test_predefined_entity_redefinition) {
3387   const char *text = "<!DOCTYPE doc [\n"
3388                      "<!ENTITY apos 'foo'>\n"
3389                      "]>\n"
3390                      "<doc>&apos;</doc>";
3391   run_character_check(text, XCS("'"));
3392 }
3393 END_TEST
3394 
3395 /* Test that the parser stops processing the DTD after an unresolved
3396  * parameter entity is encountered.
3397  */
START_TEST(test_dtd_stop_processing)3398 START_TEST(test_dtd_stop_processing) {
3399   const char *text = "<!DOCTYPE doc [\n"
3400                      "%foo;\n"
3401                      "<!ENTITY bar 'bas'>\n"
3402                      "]><doc/>";
3403 
3404   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3405   init_dummy_handlers();
3406   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407       == XML_STATUS_ERROR)
3408     xml_failure(g_parser);
3409   if (get_dummy_handler_flags() != 0)
3410     fail("DTD processing still going after undefined PE");
3411 }
3412 END_TEST
3413 
3414 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3415 START_TEST(test_public_notation_no_sysid) {
3416   const char *text = "<!DOCTYPE doc [\n"
3417                      "<!NOTATION note PUBLIC 'foo'>\n"
3418                      "<!ELEMENT doc EMPTY>\n"
3419                      "]>\n<doc/>";
3420 
3421   init_dummy_handlers();
3422   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3423   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3424       == XML_STATUS_ERROR)
3425     xml_failure(g_parser);
3426   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3427     fail("Notation declaration handler not called");
3428 }
3429 END_TEST
3430 
START_TEST(test_nested_groups)3431 START_TEST(test_nested_groups) {
3432   const char *text
3433       = "<!DOCTYPE doc [\n"
3434         "<!ELEMENT doc "
3435         /* Sixteen elements per line */
3436         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3437         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3438         "))))))))))))))))))))))))))))))))>\n"
3439         "<!ELEMENT e EMPTY>"
3440         "]>\n"
3441         "<doc><e/></doc>";
3442   CharData storage;
3443 
3444   CharData_Init(&storage);
3445   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3446   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3447   XML_SetUserData(g_parser, &storage);
3448   init_dummy_handlers();
3449   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3450       == XML_STATUS_ERROR)
3451     xml_failure(g_parser);
3452   CharData_CheckXMLChars(&storage, XCS("doce"));
3453   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3454     fail("Element handler not fired");
3455 }
3456 END_TEST
3457 
START_TEST(test_group_choice)3458 START_TEST(test_group_choice) {
3459   const char *text = "<!DOCTYPE doc [\n"
3460                      "<!ELEMENT doc (a|b|c)+>\n"
3461                      "<!ELEMENT a EMPTY>\n"
3462                      "<!ELEMENT b (#PCDATA)>\n"
3463                      "<!ELEMENT c ANY>\n"
3464                      "]>\n"
3465                      "<doc>\n"
3466                      "<a/>\n"
3467                      "<b attr='foo'>This is a foo</b>\n"
3468                      "<c></c>\n"
3469                      "</doc>\n";
3470 
3471   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3472   init_dummy_handlers();
3473   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3474       == XML_STATUS_ERROR)
3475     xml_failure(g_parser);
3476   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3477     fail("Element handler flag not raised");
3478 }
3479 END_TEST
3480 
START_TEST(test_standalone_parameter_entity)3481 START_TEST(test_standalone_parameter_entity) {
3482   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3483                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3484                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3485                      "%entity;\n"
3486                      "]>\n"
3487                      "<doc></doc>";
3488   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3489 
3490   XML_SetUserData(g_parser, dtd_data);
3491   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3492   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3493   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3494       == XML_STATUS_ERROR)
3495     xml_failure(g_parser);
3496 }
3497 END_TEST
3498 
3499 /* Test skipping of parameter entity in an external DTD */
3500 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3501 START_TEST(test_skipped_parameter_entity) {
3502   const char *text = "<?xml version='1.0'?>\n"
3503                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3504                      "<!ELEMENT root (#PCDATA|a)* >\n"
3505                      "]>\n"
3506                      "<root></root>";
3507   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3508 
3509   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3510   XML_SetUserData(g_parser, &dtd_data);
3511   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3512   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3513   init_dummy_handlers();
3514   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3515       == XML_STATUS_ERROR)
3516     xml_failure(g_parser);
3517   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3518     fail("Skip handler not executed");
3519 }
3520 END_TEST
3521 
3522 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3523 START_TEST(test_recursive_external_parameter_entity) {
3524   const char *text = "<?xml version='1.0'?>\n"
3525                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3526                      "<!ELEMENT root (#PCDATA|a)* >\n"
3527                      "]>\n"
3528                      "<root></root>";
3529   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3530                         "Recursive external parameter entity not faulted", NULL,
3531                         XML_ERROR_RECURSIVE_ENTITY_REF};
3532 
3533   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3534   XML_SetUserData(g_parser, &dtd_data);
3535   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3536   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3537                  "Recursive external parameter not spotted");
3538 }
3539 END_TEST
3540 
3541 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3542 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3543   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3544                      "<doc></doc>\n";
3545 
3546   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3547   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3548   XML_SetUserData(g_parser, NULL);
3549   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3550       == XML_STATUS_ERROR)
3551     xml_failure(g_parser);
3552 
3553   /* Now repeat without the external entity ref handler invoking
3554    * another copy of itself.
3555    */
3556   XML_ParserReset(g_parser, NULL);
3557   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3558   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3559   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3560   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3561       == XML_STATUS_ERROR)
3562     xml_failure(g_parser);
3563 }
3564 END_TEST
3565 
3566 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3567 START_TEST(test_suspend_xdecl) {
3568   const char *text = long_character_data_text;
3569 
3570   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3571   XML_SetUserData(g_parser, g_parser);
3572   g_resumable = XML_TRUE;
3573   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3574       != XML_STATUS_SUSPENDED)
3575     xml_failure(g_parser);
3576   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3577     xml_failure(g_parser);
3578   /* Attempt to start a new parse while suspended */
3579   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3580       != XML_STATUS_ERROR)
3581     fail("Attempt to parse while suspended not faulted");
3582   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3583     fail("Suspended parse not faulted with correct error");
3584 }
3585 END_TEST
3586 
3587 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3588 START_TEST(test_abort_epilog) {
3589   const char *text = "<doc></doc>\n\r\n";
3590   XML_Char trigger_char = XCS('\r');
3591 
3592   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3593   XML_SetUserData(g_parser, &trigger_char);
3594   g_resumable = XML_FALSE;
3595   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3596       != XML_STATUS_ERROR)
3597     fail("Abort not triggered");
3598   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3599     xml_failure(g_parser);
3600 }
3601 END_TEST
3602 
3603 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3604 START_TEST(test_abort_epilog_2) {
3605   const char *text = "<doc></doc>\n";
3606   XML_Char trigger_char = XCS('\n');
3607 
3608   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3609   XML_SetUserData(g_parser, &trigger_char);
3610   g_resumable = XML_FALSE;
3611   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3612 }
3613 END_TEST
3614 
3615 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3616 START_TEST(test_suspend_epilog) {
3617   const char *text = "<doc></doc>\n";
3618   XML_Char trigger_char = XCS('\n');
3619 
3620   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3621   XML_SetUserData(g_parser, &trigger_char);
3622   g_resumable = XML_TRUE;
3623   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3624       != XML_STATUS_SUSPENDED)
3625     xml_failure(g_parser);
3626 }
3627 END_TEST
3628 
START_TEST(test_suspend_in_sole_empty_tag)3629 START_TEST(test_suspend_in_sole_empty_tag) {
3630   const char *text = "<doc/>";
3631   enum XML_Status rc;
3632 
3633   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3634   XML_SetUserData(g_parser, g_parser);
3635   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3636   if (rc == XML_STATUS_ERROR)
3637     xml_failure(g_parser);
3638   else if (rc != XML_STATUS_SUSPENDED)
3639     fail("Suspend not triggered");
3640   rc = XML_ResumeParser(g_parser);
3641   if (rc == XML_STATUS_ERROR)
3642     xml_failure(g_parser);
3643   else if (rc != XML_STATUS_OK)
3644     fail("Resume failed");
3645 }
3646 END_TEST
3647 
START_TEST(test_unfinished_epilog)3648 START_TEST(test_unfinished_epilog) {
3649   const char *text = "<doc></doc><";
3650 
3651   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3652                  "Incomplete epilog entry not faulted");
3653 }
3654 END_TEST
3655 
START_TEST(test_partial_char_in_epilog)3656 START_TEST(test_partial_char_in_epilog) {
3657   const char *text = "<doc></doc>\xe2\x82";
3658 
3659   /* First check that no fault is raised if the parse is not finished */
3660   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3661       == XML_STATUS_ERROR)
3662     xml_failure(g_parser);
3663   /* Now check that it is faulted once we finish */
3664   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3665     fail("Partial character in epilog not faulted");
3666   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3667     xml_failure(g_parser);
3668 }
3669 END_TEST
3670 
3671 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3672 START_TEST(test_suspend_resume_internal_entity) {
3673   const char *text
3674       = "<!DOCTYPE doc [\n"
3675         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3676         "]>\n"
3677         "<doc>&foo;</doc>\n";
3678   const XML_Char *expected1 = XCS("Hi");
3679   const XML_Char *expected2 = XCS("HiHo");
3680   CharData storage;
3681 
3682   CharData_Init(&storage);
3683   XML_SetStartElementHandler(g_parser, start_element_suspender);
3684   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3685   XML_SetUserData(g_parser, &storage);
3686   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3687   // we won't know exactly how much input we actually managed to give Expat.
3688   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3689       != XML_STATUS_SUSPENDED)
3690     xml_failure(g_parser);
3691   CharData_CheckXMLChars(&storage, XCS(""));
3692   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3693     xml_failure(g_parser);
3694   CharData_CheckXMLChars(&storage, expected1);
3695   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3696     xml_failure(g_parser);
3697   CharData_CheckXMLChars(&storage, expected2);
3698 }
3699 END_TEST
3700 
START_TEST(test_suspend_resume_internal_entity_issue_629)3701 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3702   const char *const text
3703       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3704         "<"
3705         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3706         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3707         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3708         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3709         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3710         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3711         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3712         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3713         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3714         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3715         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3716         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3717         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3718         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3719         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3720         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3721         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3722         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3723         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3724         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3725         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3726         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3727         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3728         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3729         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3730         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3731         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3732         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3733         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3734         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3735         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3736         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3737         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3738         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3739         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3740         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3741         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3742         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3743         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3744         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3745         "/>"
3746         "</b></a>";
3747   const size_t firstChunkSizeBytes = 54;
3748 
3749   XML_Parser parser = XML_ParserCreate(NULL);
3750   XML_SetUserData(parser, parser);
3751   XML_SetCommentHandler(parser, suspending_comment_handler);
3752 
3753   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3754       != XML_STATUS_SUSPENDED)
3755     xml_failure(parser);
3756   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3757     xml_failure(parser);
3758   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3759                               (int)(strlen(text) - firstChunkSizeBytes),
3760                               XML_TRUE)
3761       != XML_STATUS_OK)
3762     xml_failure(parser);
3763   XML_ParserFree(parser);
3764 }
3765 END_TEST
3766 
3767 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3768 START_TEST(test_resume_entity_with_syntax_error) {
3769   const char *text = "<!DOCTYPE doc [\n"
3770                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3771                      "]>\n"
3772                      "<doc>&foo;</doc>\n";
3773 
3774   XML_SetStartElementHandler(g_parser, start_element_suspender);
3775   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3776       != XML_STATUS_SUSPENDED)
3777     xml_failure(g_parser);
3778   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3779     fail("Syntax error in entity not faulted");
3780   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3781     xml_failure(g_parser);
3782 }
3783 END_TEST
3784 
3785 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3786 START_TEST(test_suspend_resume_parameter_entity) {
3787   const char *text = "<!DOCTYPE doc [\n"
3788                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3789                      "%foo;\n"
3790                      "]>\n"
3791                      "<doc>Hello, world</doc>";
3792   const XML_Char *expected = XCS("Hello, world");
3793   CharData storage;
3794 
3795   CharData_Init(&storage);
3796   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3797   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3798   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3799   XML_SetUserData(g_parser, &storage);
3800   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3801       != XML_STATUS_SUSPENDED)
3802     xml_failure(g_parser);
3803   CharData_CheckXMLChars(&storage, XCS(""));
3804   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3805     xml_failure(g_parser);
3806   CharData_CheckXMLChars(&storage, expected);
3807 }
3808 END_TEST
3809 
3810 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)3811 START_TEST(test_restart_on_error) {
3812   const char *text = "<$doc><doc></doc>";
3813 
3814   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3815       != XML_STATUS_ERROR)
3816     fail("Invalid tag name not faulted");
3817   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3818     xml_failure(g_parser);
3819   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3820     fail("Restarting invalid parse not faulted");
3821   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3822     xml_failure(g_parser);
3823 }
3824 END_TEST
3825 
3826 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)3827 START_TEST(test_reject_lt_in_attribute_value) {
3828   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3829                      "<doc></doc>";
3830 
3831   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3832                  "Bad attribute default not faulted");
3833 }
3834 END_TEST
3835 
START_TEST(test_reject_unfinished_param_in_att_value)3836 START_TEST(test_reject_unfinished_param_in_att_value) {
3837   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3838                      "<doc></doc>";
3839 
3840   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3841                  "Bad attribute default not faulted");
3842 }
3843 END_TEST
3844 
START_TEST(test_trailing_cr_in_att_value)3845 START_TEST(test_trailing_cr_in_att_value) {
3846   const char *text = "<doc a='value\r'/>";
3847 
3848   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3849       == XML_STATUS_ERROR)
3850     xml_failure(g_parser);
3851 }
3852 END_TEST
3853 
3854 /* Try parsing a general entity within a parameter entity in a
3855  * standalone internal DTD.  Covers a corner case in the parser.
3856  */
START_TEST(test_standalone_internal_entity)3857 START_TEST(test_standalone_internal_entity) {
3858   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3859                      "<!DOCTYPE doc [\n"
3860                      "  <!ELEMENT doc (#PCDATA)>\n"
3861                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3862                      "  <!ENTITY ge 'AttDefaultValue'>\n"
3863                      "  %pe;\n"
3864                      "]>\n"
3865                      "<doc att2='any'/>";
3866 
3867   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3868   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3869       == XML_STATUS_ERROR)
3870     xml_failure(g_parser);
3871 }
3872 END_TEST
3873 
3874 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)3875 START_TEST(test_skipped_external_entity) {
3876   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3877                      "<doc></doc>\n";
3878   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3879                        "<!ENTITY % e2 '%e1;'>\n",
3880                        NULL, NULL};
3881 
3882   XML_SetUserData(g_parser, &test_data);
3883   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3884   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3885   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3886       == XML_STATUS_ERROR)
3887     xml_failure(g_parser);
3888 }
3889 END_TEST
3890 
3891 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)3892 START_TEST(test_skipped_null_loaded_ext_entity) {
3893   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3894                      "<doc />";
3895   ExtHdlrData test_data
3896       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3897          "<!ENTITY % pe2 '%pe1;'>\n"
3898          "%pe2;\n",
3899          external_entity_null_loader};
3900 
3901   XML_SetUserData(g_parser, &test_data);
3902   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3903   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3904   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3905       == XML_STATUS_ERROR)
3906     xml_failure(g_parser);
3907 }
3908 END_TEST
3909 
START_TEST(test_skipped_unloaded_ext_entity)3910 START_TEST(test_skipped_unloaded_ext_entity) {
3911   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3912                      "<doc />";
3913   ExtHdlrData test_data
3914       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3915          "<!ENTITY % pe2 '%pe1;'>\n"
3916          "%pe2;\n",
3917          NULL};
3918 
3919   XML_SetUserData(g_parser, &test_data);
3920   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3921   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3922   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3923       == XML_STATUS_ERROR)
3924     xml_failure(g_parser);
3925 }
3926 END_TEST
3927 
3928 /* Test that a parameter entity value ending with a carriage return
3929  * has it translated internally into a newline.
3930  */
START_TEST(test_param_entity_with_trailing_cr)3931 START_TEST(test_param_entity_with_trailing_cr) {
3932 #define PARAM_ENTITY_NAME "pe"
3933 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3934   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3935                      "<doc/>";
3936   ExtTest test_data
3937       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3938          "%" PARAM_ENTITY_NAME ";\n",
3939          NULL, NULL};
3940 
3941   XML_SetUserData(g_parser, &test_data);
3942   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3943   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3944   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
3945   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
3946                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
3947   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3948       == XML_STATUS_ERROR)
3949     xml_failure(g_parser);
3950   int entity_match_flag = get_param_entity_match_flag();
3951   if (entity_match_flag == ENTITY_MATCH_FAIL)
3952     fail("Parameter entity CR->NEWLINE conversion failed");
3953   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
3954     fail("Parameter entity not parsed");
3955 }
3956 #undef PARAM_ENTITY_NAME
3957 #undef PARAM_ENTITY_CORE_VALUE
3958 END_TEST
3959 
START_TEST(test_invalid_character_entity)3960 START_TEST(test_invalid_character_entity) {
3961   const char *text = "<!DOCTYPE doc [\n"
3962                      "  <!ENTITY entity '&#x110000;'>\n"
3963                      "]>\n"
3964                      "<doc>&entity;</doc>";
3965 
3966   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3967                  "Out of range character reference not faulted");
3968 }
3969 END_TEST
3970 
START_TEST(test_invalid_character_entity_2)3971 START_TEST(test_invalid_character_entity_2) {
3972   const char *text = "<!DOCTYPE doc [\n"
3973                      "  <!ENTITY entity '&#xg0;'>\n"
3974                      "]>\n"
3975                      "<doc>&entity;</doc>";
3976 
3977   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3978                  "Out of range character reference not faulted");
3979 }
3980 END_TEST
3981 
START_TEST(test_invalid_character_entity_3)3982 START_TEST(test_invalid_character_entity_3) {
3983   const char text[] =
3984       /* <!DOCTYPE doc [\n */
3985       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
3986       /* U+0E04 = KHO KHWAI
3987        * U+0E08 = CHO CHAN */
3988       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
3989       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
3990       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
3991       /* ]>\n */
3992       "\0]\0>\0\n"
3993       /* <doc>&entity;</doc> */
3994       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
3995 
3996   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3997       != XML_STATUS_ERROR)
3998     fail("Invalid start of entity name not faulted");
3999   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4000     xml_failure(g_parser);
4001 }
4002 END_TEST
4003 
START_TEST(test_invalid_character_entity_4)4004 START_TEST(test_invalid_character_entity_4) {
4005   const char *text = "<!DOCTYPE doc [\n"
4006                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4007                      "]>\n"
4008                      "<doc>&entity;</doc>";
4009 
4010   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4011                  "Out of range character reference not faulted");
4012 }
4013 END_TEST
4014 
4015 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4016 START_TEST(test_pi_handled_in_default) {
4017   const char *text = "<?test processing instruction?>\n<doc/>";
4018   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4019   CharData storage;
4020 
4021   CharData_Init(&storage);
4022   XML_SetDefaultHandler(g_parser, accumulate_characters);
4023   XML_SetUserData(g_parser, &storage);
4024   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4025       == XML_STATUS_ERROR)
4026     xml_failure(g_parser);
4027   CharData_CheckXMLChars(&storage, expected);
4028 }
4029 END_TEST
4030 
4031 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4032 START_TEST(test_comment_handled_in_default) {
4033   const char *text = "<!-- This is a comment -->\n<doc/>";
4034   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4035   CharData storage;
4036 
4037   CharData_Init(&storage);
4038   XML_SetDefaultHandler(g_parser, accumulate_characters);
4039   XML_SetUserData(g_parser, &storage);
4040   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4041       == XML_STATUS_ERROR)
4042     xml_failure(g_parser);
4043   CharData_CheckXMLChars(&storage, expected);
4044 }
4045 END_TEST
4046 
4047 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4048 START_TEST(test_pi_yml) {
4049   const char *text = "<?yml something like data?><doc/>";
4050   const XML_Char *expected = XCS("yml: something like data\n");
4051   CharData storage;
4052 
4053   CharData_Init(&storage);
4054   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4055   XML_SetUserData(g_parser, &storage);
4056   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4057       == XML_STATUS_ERROR)
4058     xml_failure(g_parser);
4059   CharData_CheckXMLChars(&storage, expected);
4060 }
4061 END_TEST
4062 
START_TEST(test_pi_xnl)4063 START_TEST(test_pi_xnl) {
4064   const char *text = "<?xnl nothing like data?><doc/>";
4065   const XML_Char *expected = XCS("xnl: nothing like data\n");
4066   CharData storage;
4067 
4068   CharData_Init(&storage);
4069   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4070   XML_SetUserData(g_parser, &storage);
4071   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4072       == XML_STATUS_ERROR)
4073     xml_failure(g_parser);
4074   CharData_CheckXMLChars(&storage, expected);
4075 }
4076 END_TEST
4077 
START_TEST(test_pi_xmm)4078 START_TEST(test_pi_xmm) {
4079   const char *text = "<?xmm everything like data?><doc/>";
4080   const XML_Char *expected = XCS("xmm: everything like data\n");
4081   CharData storage;
4082 
4083   CharData_Init(&storage);
4084   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4085   XML_SetUserData(g_parser, &storage);
4086   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4087       == XML_STATUS_ERROR)
4088     xml_failure(g_parser);
4089   CharData_CheckXMLChars(&storage, expected);
4090 }
4091 END_TEST
4092 
START_TEST(test_utf16_pi)4093 START_TEST(test_utf16_pi) {
4094   const char text[] =
4095       /* <?{KHO KHWAI}{CHO CHAN}?>
4096        * where {KHO KHWAI} = U+0E04
4097        * and   {CHO CHAN}  = U+0E08
4098        */
4099       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4100       /* <q/> */
4101       "<\0q\0/\0>\0";
4102 #ifdef XML_UNICODE
4103   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4104 #else
4105   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4106 #endif
4107   CharData storage;
4108 
4109   CharData_Init(&storage);
4110   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4111   XML_SetUserData(g_parser, &storage);
4112   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4113       == XML_STATUS_ERROR)
4114     xml_failure(g_parser);
4115   CharData_CheckXMLChars(&storage, expected);
4116 }
4117 END_TEST
4118 
START_TEST(test_utf16_be_pi)4119 START_TEST(test_utf16_be_pi) {
4120   const char text[] =
4121       /* <?{KHO KHWAI}{CHO CHAN}?>
4122        * where {KHO KHWAI} = U+0E04
4123        * and   {CHO CHAN}  = U+0E08
4124        */
4125       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4126       /* <q/> */
4127       "\0<\0q\0/\0>";
4128 #ifdef XML_UNICODE
4129   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4130 #else
4131   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4132 #endif
4133   CharData storage;
4134 
4135   CharData_Init(&storage);
4136   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4137   XML_SetUserData(g_parser, &storage);
4138   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4139       == XML_STATUS_ERROR)
4140     xml_failure(g_parser);
4141   CharData_CheckXMLChars(&storage, expected);
4142 }
4143 END_TEST
4144 
4145 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4146 START_TEST(test_utf16_be_comment) {
4147   const char text[] =
4148       /* <!-- Comment A --> */
4149       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4150       /* <doc/> */
4151       "\0<\0d\0o\0c\0/\0>";
4152   const XML_Char *expected = XCS(" Comment A ");
4153   CharData storage;
4154 
4155   CharData_Init(&storage);
4156   XML_SetCommentHandler(g_parser, accumulate_comment);
4157   XML_SetUserData(g_parser, &storage);
4158   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4159       == XML_STATUS_ERROR)
4160     xml_failure(g_parser);
4161   CharData_CheckXMLChars(&storage, expected);
4162 }
4163 END_TEST
4164 
START_TEST(test_utf16_le_comment)4165 START_TEST(test_utf16_le_comment) {
4166   const char text[] =
4167       /* <!-- Comment B --> */
4168       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4169       /* <doc/> */
4170       "<\0d\0o\0c\0/\0>\0";
4171   const XML_Char *expected = XCS(" Comment B ");
4172   CharData storage;
4173 
4174   CharData_Init(&storage);
4175   XML_SetCommentHandler(g_parser, accumulate_comment);
4176   XML_SetUserData(g_parser, &storage);
4177   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4178       == XML_STATUS_ERROR)
4179     xml_failure(g_parser);
4180   CharData_CheckXMLChars(&storage, expected);
4181 }
4182 END_TEST
4183 
4184 /* Test that the unknown encoding handler with map entries that expect
4185  * conversion but no conversion function is faulted
4186  */
START_TEST(test_missing_encoding_conversion_fn)4187 START_TEST(test_missing_encoding_conversion_fn) {
4188   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4189                      "<doc>\x81</doc>";
4190 
4191   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4192   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4193    * character introducing a two-byte sequence.  For this, it
4194    * requires a convert function.  The above function call doesn't
4195    * pass one through, so when BadEncodingHandler actually gets
4196    * called it should supply an invalid encoding.
4197    */
4198   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4199                  "Encoding with missing convert() not faulted");
4200 }
4201 END_TEST
4202 
START_TEST(test_failing_encoding_conversion_fn)4203 START_TEST(test_failing_encoding_conversion_fn) {
4204   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4205                      "<doc>\x81</doc>";
4206 
4207   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4208   /* BadEncodingHandler sets up an encoding with every top-bit-set
4209    * character introducing a two-byte sequence.  For this, it
4210    * requires a convert function.  The above function call passes
4211    * one that insists all possible sequences are invalid anyway.
4212    */
4213   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4214                  "Encoding with failing convert() not faulted");
4215 }
4216 END_TEST
4217 
4218 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4219 START_TEST(test_unknown_encoding_success) {
4220   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4221                      /* Equivalent to <eoc>Hello, world</eoc> */
4222                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4223 
4224   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4225   run_character_check(text, XCS("Hello, world"));
4226 }
4227 END_TEST
4228 
4229 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4230 START_TEST(test_unknown_encoding_bad_name) {
4231   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4232                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4233 
4234   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4235   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4236                  "Bad name start in unknown encoding not faulted");
4237 }
4238 END_TEST
4239 
4240 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4241 START_TEST(test_unknown_encoding_bad_name_2) {
4242   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4243                      "<d\xffoc>Hello, world</d\xffoc>";
4244 
4245   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4246   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4247                  "Bad name in unknown encoding not faulted");
4248 }
4249 END_TEST
4250 
4251 /* Test element name that is long enough to fill the conversion buffer
4252  * in an unknown encoding, finishing with an encoded character.
4253  */
START_TEST(test_unknown_encoding_long_name_1)4254 START_TEST(test_unknown_encoding_long_name_1) {
4255   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4256                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4257                      "Hi"
4258                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4259   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4260   CharData storage;
4261 
4262   CharData_Init(&storage);
4263   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4264   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4265   XML_SetUserData(g_parser, &storage);
4266   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4267       == XML_STATUS_ERROR)
4268     xml_failure(g_parser);
4269   CharData_CheckXMLChars(&storage, expected);
4270 }
4271 END_TEST
4272 
4273 /* Test element name that is long enough to fill the conversion buffer
4274  * in an unknown encoding, finishing with an simple character.
4275  */
START_TEST(test_unknown_encoding_long_name_2)4276 START_TEST(test_unknown_encoding_long_name_2) {
4277   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4278                      "<abcdefghabcdefghabcdefghijklmnop>"
4279                      "Hi"
4280                      "</abcdefghabcdefghabcdefghijklmnop>";
4281   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4282   CharData storage;
4283 
4284   CharData_Init(&storage);
4285   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4286   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4287   XML_SetUserData(g_parser, &storage);
4288   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4289       == XML_STATUS_ERROR)
4290     xml_failure(g_parser);
4291   CharData_CheckXMLChars(&storage, expected);
4292 }
4293 END_TEST
4294 
START_TEST(test_invalid_unknown_encoding)4295 START_TEST(test_invalid_unknown_encoding) {
4296   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4297                      "<doc>Hello world</doc>";
4298 
4299   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4300   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4301                  "Invalid unknown encoding not faulted");
4302 }
4303 END_TEST
4304 
START_TEST(test_unknown_ascii_encoding_ok)4305 START_TEST(test_unknown_ascii_encoding_ok) {
4306   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4307                      "<doc>Hello, world</doc>";
4308 
4309   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4310   run_character_check(text, XCS("Hello, world"));
4311 }
4312 END_TEST
4313 
START_TEST(test_unknown_ascii_encoding_fail)4314 START_TEST(test_unknown_ascii_encoding_fail) {
4315   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4316                      "<doc>Hello, \x80 world</doc>";
4317 
4318   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4319   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4320                  "Invalid character not faulted");
4321 }
4322 END_TEST
4323 
START_TEST(test_unknown_encoding_invalid_length)4324 START_TEST(test_unknown_encoding_invalid_length) {
4325   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4326                      "<doc>Hello, world</doc>";
4327 
4328   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4329   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4330                  "Invalid unknown encoding not faulted");
4331 }
4332 END_TEST
4333 
START_TEST(test_unknown_encoding_invalid_topbit)4334 START_TEST(test_unknown_encoding_invalid_topbit) {
4335   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4336                      "<doc>Hello, world</doc>";
4337 
4338   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4339   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4340                  "Invalid unknown encoding not faulted");
4341 }
4342 END_TEST
4343 
START_TEST(test_unknown_encoding_invalid_surrogate)4344 START_TEST(test_unknown_encoding_invalid_surrogate) {
4345   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4346                      "<doc>Hello, \x82 world</doc>";
4347 
4348   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4349   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4350                  "Invalid unknown encoding not faulted");
4351 }
4352 END_TEST
4353 
START_TEST(test_unknown_encoding_invalid_high)4354 START_TEST(test_unknown_encoding_invalid_high) {
4355   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4356                      "<doc>Hello, world</doc>";
4357 
4358   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4359   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4360                  "Invalid unknown encoding not faulted");
4361 }
4362 END_TEST
4363 
START_TEST(test_unknown_encoding_invalid_attr_value)4364 START_TEST(test_unknown_encoding_invalid_attr_value) {
4365   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4366                      "<doc attr='\xff\x30'/>";
4367 
4368   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4369   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4370                  "Invalid attribute valid not faulted");
4371 }
4372 END_TEST
4373 
4374 /* Test an external entity parser set to use latin-1 detects UTF-16
4375  * BOMs correctly.
4376  */
4377 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4378 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4379   const char *text = "<!DOCTYPE doc [\n"
4380                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4381                      "]>\n"
4382                      "<doc>&en;</doc>";
4383   ExtTest2 test_data
4384       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4385          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4386           *   0x4c = L and 0x20 is a space
4387           */
4388          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4389 #ifdef XML_UNICODE
4390   const XML_Char *expected = XCS("\x00ff\x00feL ");
4391 #else
4392   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4393   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4394 #endif
4395   CharData storage;
4396 
4397   CharData_Init(&storage);
4398   test_data.storage = &storage;
4399   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4400   XML_SetUserData(g_parser, &test_data);
4401   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4402   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4403       == XML_STATUS_ERROR)
4404     xml_failure(g_parser);
4405   CharData_CheckXMLChars(&storage, expected);
4406 }
4407 END_TEST
4408 
START_TEST(test_ext_entity_latin1_utf16be_bom)4409 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4410   const char *text = "<!DOCTYPE doc [\n"
4411                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4412                      "]>\n"
4413                      "<doc>&en;</doc>";
4414   ExtTest2 test_data
4415       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4416          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4417           *   0x4c = L and 0x20 is a space
4418           */
4419          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4420 #ifdef XML_UNICODE
4421   const XML_Char *expected = XCS("\x00fe\x00ff L");
4422 #else
4423   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4424   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4425 #endif
4426   CharData storage;
4427 
4428   CharData_Init(&storage);
4429   test_data.storage = &storage;
4430   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4431   XML_SetUserData(g_parser, &test_data);
4432   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4433   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4434       == XML_STATUS_ERROR)
4435     xml_failure(g_parser);
4436   CharData_CheckXMLChars(&storage, expected);
4437 }
4438 END_TEST
4439 
4440 /* Parsing the full buffer rather than a byte at a time makes a
4441  * difference to the encoding scanning code, so repeat the above tests
4442  * without breaking them down by byte.
4443  */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4444 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4445   const char *text = "<!DOCTYPE doc [\n"
4446                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4447                      "]>\n"
4448                      "<doc>&en;</doc>";
4449   ExtTest2 test_data
4450       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4451          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4452           *   0x4c = L and 0x20 is a space
4453           */
4454          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4455 #ifdef XML_UNICODE
4456   const XML_Char *expected = XCS("\x00ff\x00feL ");
4457 #else
4458   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4459   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4460 #endif
4461   CharData storage;
4462 
4463   CharData_Init(&storage);
4464   test_data.storage = &storage;
4465   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4466   XML_SetUserData(g_parser, &test_data);
4467   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4468   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4469       == XML_STATUS_ERROR)
4470     xml_failure(g_parser);
4471   CharData_CheckXMLChars(&storage, expected);
4472 }
4473 END_TEST
4474 
START_TEST(test_ext_entity_latin1_utf16be_bom2)4475 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4476   const char *text = "<!DOCTYPE doc [\n"
4477                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4478                      "]>\n"
4479                      "<doc>&en;</doc>";
4480   ExtTest2 test_data
4481       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4482          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4483           *   0x4c = L and 0x20 is a space
4484           */
4485          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4486 #ifdef XML_UNICODE
4487   const XML_Char *expected = XCS("\x00fe\x00ff L");
4488 #else
4489   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4490   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4491 #endif
4492   CharData storage;
4493 
4494   CharData_Init(&storage);
4495   test_data.storage = &storage;
4496   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4497   XML_SetUserData(g_parser, &test_data);
4498   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4499   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4500       == XML_STATUS_ERROR)
4501     xml_failure(g_parser);
4502   CharData_CheckXMLChars(&storage, expected);
4503 }
4504 END_TEST
4505 
4506 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4507 START_TEST(test_ext_entity_utf16_be) {
4508   const char *text = "<!DOCTYPE doc [\n"
4509                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4510                      "]>\n"
4511                      "<doc>&en;</doc>";
4512   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4513 #ifdef XML_UNICODE
4514   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4515 #else
4516   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4517                                  "\xe6\x94\x80"   /* U+6500 */
4518                                  "\xe2\xbc\x80"   /* U+2F00 */
4519                                  "\xe3\xb8\x80"); /* U+3E00 */
4520 #endif
4521   CharData storage;
4522 
4523   CharData_Init(&storage);
4524   test_data.storage = &storage;
4525   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4526   XML_SetUserData(g_parser, &test_data);
4527   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4528   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4529       == XML_STATUS_ERROR)
4530     xml_failure(g_parser);
4531   CharData_CheckXMLChars(&storage, expected);
4532 }
4533 END_TEST
4534 
4535 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4536 START_TEST(test_ext_entity_utf16_le) {
4537   const char *text = "<!DOCTYPE doc [\n"
4538                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4539                      "]>\n"
4540                      "<doc>&en;</doc>";
4541   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4542 #ifdef XML_UNICODE
4543   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4544 #else
4545   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4546                                  "\xe6\x94\x80"   /* U+6500 */
4547                                  "\xe2\xbc\x80"   /* U+2F00 */
4548                                  "\xe3\xb8\x80"); /* U+3E00 */
4549 #endif
4550   CharData storage;
4551 
4552   CharData_Init(&storage);
4553   test_data.storage = &storage;
4554   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4555   XML_SetUserData(g_parser, &test_data);
4556   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4557   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4558       == XML_STATUS_ERROR)
4559     xml_failure(g_parser);
4560   CharData_CheckXMLChars(&storage, expected);
4561 }
4562 END_TEST
4563 
4564 /* Test little-endian UTF-16 given no explicit encoding.
4565  * The existing default encoding (UTF-8) is assumed to hold without a
4566  * BOM to contradict it, so the entity value will in fact provoke an
4567  * error because 0x00 is not a valid XML character.  We parse the
4568  * whole buffer in one go rather than feeding it in byte by byte to
4569  * exercise different code paths in the initial scanning routines.
4570  */
START_TEST(test_ext_entity_utf16_unknown)4571 START_TEST(test_ext_entity_utf16_unknown) {
4572   const char *text = "<!DOCTYPE doc [\n"
4573                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4574                      "]>\n"
4575                      "<doc>&en;</doc>";
4576   ExtFaults2 test_data
4577       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4578          XML_ERROR_INVALID_TOKEN};
4579 
4580   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4581   XML_SetUserData(g_parser, &test_data);
4582   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4583                  "Invalid character should not have been accepted");
4584 }
4585 END_TEST
4586 
4587 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4588 START_TEST(test_ext_entity_utf8_non_bom) {
4589   const char *text = "<!DOCTYPE doc [\n"
4590                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4591                      "]>\n"
4592                      "<doc>&en;</doc>";
4593   ExtTest2 test_data
4594       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4595          3, NULL, NULL};
4596 #ifdef XML_UNICODE
4597   const XML_Char *expected = XCS("\xfec0");
4598 #else
4599   const XML_Char *expected = XCS("\xef\xbb\x80");
4600 #endif
4601   CharData storage;
4602 
4603   CharData_Init(&storage);
4604   test_data.storage = &storage;
4605   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4606   XML_SetUserData(g_parser, &test_data);
4607   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4608   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4609       == XML_STATUS_ERROR)
4610     xml_failure(g_parser);
4611   CharData_CheckXMLChars(&storage, expected);
4612 }
4613 END_TEST
4614 
4615 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4616 START_TEST(test_utf8_in_cdata_section) {
4617   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4618 #ifdef XML_UNICODE
4619   const XML_Char *expected = XCS("one \x00e9 two");
4620 #else
4621   const XML_Char *expected = XCS("one \xc3\xa9 two");
4622 #endif
4623 
4624   run_character_check(text, expected);
4625 }
4626 END_TEST
4627 
4628 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4629 START_TEST(test_utf8_in_cdata_section_2) {
4630   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4631 #ifdef XML_UNICODE
4632   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4633 #else
4634   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4635 #endif
4636 
4637   run_character_check(text, expected);
4638 }
4639 END_TEST
4640 
START_TEST(test_utf8_in_start_tags)4641 START_TEST(test_utf8_in_start_tags) {
4642   struct test_case {
4643     bool goodName;
4644     bool goodNameStart;
4645     const char *tagName;
4646   };
4647 
4648   // The idea with the tests below is this:
4649   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4650   // go to isNever and are hence not a concern.
4651   //
4652   // We start with a character that is a valid name character
4653   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4654   // single bits at places where (1) the result leaves the UTF-8 encoding space
4655   // and (2) we stay in the same n-byte sequence family.
4656   //
4657   // The flipped bits are highlighted in angle brackets in comments,
4658   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4659   // the most significant bit to 1 to leave UTF-8 encoding space.
4660   struct test_case cases[] = {
4661       // 1-byte UTF-8: [0xxx xxxx]
4662       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4663       {false, false, "\xBA"}, // [<1>011 1010]
4664       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4665       {false, false, "\xB9"}, // [<1>011 1001]
4666 
4667       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4668       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4669                                   // Arabic small waw U+06E5
4670       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4671       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4672       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4673       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4674                                   // combining char U+0301
4675       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4676       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4677       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4678 
4679       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4680       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4681                                       // Devanagari Letter A U+0905
4682       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4683       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4684       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4685       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4686       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4687       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4688                                       // combining char U+0901
4689       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4690       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4691       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4692       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4693       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4694   };
4695   const bool atNameStart[] = {true, false};
4696 
4697   size_t i = 0;
4698   char doc[1024];
4699   size_t failCount = 0;
4700 
4701   // we need all the bytes to be parsed, but we don't want the errors that can
4702   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4703   if (g_reparseDeferralEnabledDefault) {
4704     return;
4705   }
4706 
4707   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4708     size_t j = 0;
4709     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4710       const bool expectedSuccess
4711           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4712       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4713                cases[i].tagName);
4714       XML_Parser parser = XML_ParserCreate(NULL);
4715 
4716       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4717           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4718 
4719       bool success = true;
4720       if ((status == XML_STATUS_OK) != expectedSuccess) {
4721         success = false;
4722       }
4723       if ((status == XML_STATUS_ERROR)
4724           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4725         success = false;
4726       }
4727 
4728       if (! success) {
4729         fprintf(
4730             stderr,
4731             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4732             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4733             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4734         failCount++;
4735       }
4736 
4737       XML_ParserFree(parser);
4738     }
4739   }
4740 
4741   if (failCount > 0) {
4742     fail("UTF-8 regression detected");
4743   }
4744 }
4745 END_TEST
4746 
4747 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4748 START_TEST(test_trailing_spaces_in_elements) {
4749   const char *text = "<doc   >Hi</doc >";
4750   const XML_Char *expected = XCS("doc/doc");
4751   CharData storage;
4752 
4753   CharData_Init(&storage);
4754   XML_SetElementHandler(g_parser, record_element_start_handler,
4755                         record_element_end_handler);
4756   XML_SetUserData(g_parser, &storage);
4757   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4758       == XML_STATUS_ERROR)
4759     xml_failure(g_parser);
4760   CharData_CheckXMLChars(&storage, expected);
4761 }
4762 END_TEST
4763 
START_TEST(test_utf16_attribute)4764 START_TEST(test_utf16_attribute) {
4765   const char text[] =
4766       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4767        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4768        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4769        */
4770       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4771   const XML_Char *expected = XCS("a");
4772   CharData storage;
4773 
4774   CharData_Init(&storage);
4775   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4776   XML_SetUserData(g_parser, &storage);
4777   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4778       == XML_STATUS_ERROR)
4779     xml_failure(g_parser);
4780   CharData_CheckXMLChars(&storage, expected);
4781 }
4782 END_TEST
4783 
START_TEST(test_utf16_second_attr)4784 START_TEST(test_utf16_second_attr) {
4785   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4786    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4787    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4788    */
4789   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4790                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4791   const XML_Char *expected = XCS("1");
4792   CharData storage;
4793 
4794   CharData_Init(&storage);
4795   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4796   XML_SetUserData(g_parser, &storage);
4797   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4798       == XML_STATUS_ERROR)
4799     xml_failure(g_parser);
4800   CharData_CheckXMLChars(&storage, expected);
4801 }
4802 END_TEST
4803 
START_TEST(test_attr_after_solidus)4804 START_TEST(test_attr_after_solidus) {
4805   const char *text = "<doc attr1='a' / attr2='b'>";
4806 
4807   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4808 }
4809 END_TEST
4810 
START_TEST(test_utf16_pe)4811 START_TEST(test_utf16_pe) {
4812   /* <!DOCTYPE doc [
4813    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4814    * %{KHO KHWAI}{CHO CHAN};
4815    * ]>
4816    * <doc></doc>
4817    *
4818    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4819    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4820    */
4821   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4822                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4823                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4824                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4825                       "\0%\x0e\x04\x0e\x08\0;\0\n"
4826                       "\0]\0>\0\n"
4827                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4828 #ifdef XML_UNICODE
4829   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4830 #else
4831   const XML_Char *expected
4832       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4833 #endif
4834   CharData storage;
4835 
4836   CharData_Init(&storage);
4837   XML_SetUserData(g_parser, &storage);
4838   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4839   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4840       == XML_STATUS_ERROR)
4841     xml_failure(g_parser);
4842   CharData_CheckXMLChars(&storage, expected);
4843 }
4844 END_TEST
4845 
4846 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)4847 START_TEST(test_bad_attr_desc_keyword) {
4848   const char *text = "<!DOCTYPE doc [\n"
4849                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4850                      "]>\n"
4851                      "<doc />";
4852 
4853   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4854                  "Bad keyword !IMPLIED not faulted");
4855 }
4856 END_TEST
4857 
4858 /* Test that an invalid attribute description keyword consisting of
4859  * UTF-16 characters with their top bytes non-zero are correctly
4860  * faulted
4861  */
START_TEST(test_bad_attr_desc_keyword_utf16)4862 START_TEST(test_bad_attr_desc_keyword_utf16) {
4863   /* <!DOCTYPE d [
4864    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4865    * ]><d/>
4866    *
4867    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4868    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4869    */
4870   const char text[]
4871       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4872         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4873         "\0#\x0e\x04\x0e\x08\0>\0\n"
4874         "\0]\0>\0<\0d\0/\0>";
4875 
4876   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4877       != XML_STATUS_ERROR)
4878     fail("Invalid UTF16 attribute keyword not faulted");
4879   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4880     xml_failure(g_parser);
4881 }
4882 END_TEST
4883 
4884 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4885  * using prefix-encoding (see above) to trigger specific code paths
4886  */
START_TEST(test_bad_doctype)4887 START_TEST(test_bad_doctype) {
4888   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4889                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4890 
4891   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4892   expect_failure(text, XML_ERROR_SYNTAX,
4893                  "Invalid bytes in DOCTYPE not faulted");
4894 }
4895 END_TEST
4896 
START_TEST(test_bad_doctype_utf8)4897 START_TEST(test_bad_doctype_utf8) {
4898   const char *text = "<!DOCTYPE \xDB\x25"
4899                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
4900   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4901                  "Invalid UTF-8 in DOCTYPE not faulted");
4902 }
4903 END_TEST
4904 
START_TEST(test_bad_doctype_utf16)4905 START_TEST(test_bad_doctype_utf16) {
4906   const char text[] =
4907       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4908        *
4909        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4910        * (name character) but not a valid letter (name start character)
4911        */
4912       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4913       "\x06\xf2"
4914       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4915 
4916   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4917       != XML_STATUS_ERROR)
4918     fail("Invalid bytes in DOCTYPE not faulted");
4919   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4920     xml_failure(g_parser);
4921 }
4922 END_TEST
4923 
START_TEST(test_bad_doctype_plus)4924 START_TEST(test_bad_doctype_plus) {
4925   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4926                      "<1+>&foo;</1+>";
4927 
4928   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4929                  "'+' in document name not faulted");
4930 }
4931 END_TEST
4932 
START_TEST(test_bad_doctype_star)4933 START_TEST(test_bad_doctype_star) {
4934   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4935                      "<1*>&foo;</1*>";
4936 
4937   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4938                  "'*' in document name not faulted");
4939 }
4940 END_TEST
4941 
START_TEST(test_bad_doctype_query)4942 START_TEST(test_bad_doctype_query) {
4943   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
4944                      "<1?>&foo;</1?>";
4945 
4946   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4947                  "'?' in document name not faulted");
4948 }
4949 END_TEST
4950 
START_TEST(test_unknown_encoding_bad_ignore)4951 START_TEST(test_unknown_encoding_bad_ignore) {
4952   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
4953                      "<!DOCTYPE doc SYSTEM 'foo'>"
4954                      "<doc><e>&entity;</e></doc>";
4955   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
4956                      "Invalid character not faulted", XCS("prefix-conv"),
4957                      XML_ERROR_INVALID_TOKEN};
4958 
4959   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4960   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4961   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4962   XML_SetUserData(g_parser, &fault);
4963   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4964                  "Bad IGNORE section with unknown encoding not failed");
4965 }
4966 END_TEST
4967 
START_TEST(test_entity_in_utf16_be_attr)4968 START_TEST(test_entity_in_utf16_be_attr) {
4969   const char text[] =
4970       /* <e a='&#228; &#x00E4;'></e> */
4971       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
4972       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
4973 #ifdef XML_UNICODE
4974   const XML_Char *expected = XCS("\x00e4 \x00e4");
4975 #else
4976   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4977 #endif
4978   CharData storage;
4979 
4980   CharData_Init(&storage);
4981   XML_SetUserData(g_parser, &storage);
4982   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4983   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4984       == XML_STATUS_ERROR)
4985     xml_failure(g_parser);
4986   CharData_CheckXMLChars(&storage, expected);
4987 }
4988 END_TEST
4989 
START_TEST(test_entity_in_utf16_le_attr)4990 START_TEST(test_entity_in_utf16_le_attr) {
4991   const char text[] =
4992       /* <e a='&#228; &#x00E4;'></e> */
4993       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
4994       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
4995 #ifdef XML_UNICODE
4996   const XML_Char *expected = XCS("\x00e4 \x00e4");
4997 #else
4998   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4999 #endif
5000   CharData storage;
5001 
5002   CharData_Init(&storage);
5003   XML_SetUserData(g_parser, &storage);
5004   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5005   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5006       == XML_STATUS_ERROR)
5007     xml_failure(g_parser);
5008   CharData_CheckXMLChars(&storage, expected);
5009 }
5010 END_TEST
5011 
START_TEST(test_entity_public_utf16_be)5012 START_TEST(test_entity_public_utf16_be) {
5013   const char text[] =
5014       /* <!DOCTYPE d [ */
5015       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5016       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5017       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5018       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5019       /* %e; */
5020       "\0%\0e\0;\0\n"
5021       /* ]> */
5022       "\0]\0>\0\n"
5023       /* <d>&j;</d> */
5024       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5025   ExtTest2 test_data
5026       = {/* <!ENTITY j 'baz'> */
5027          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5028   const XML_Char *expected = XCS("baz");
5029   CharData storage;
5030 
5031   CharData_Init(&storage);
5032   test_data.storage = &storage;
5033   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5034   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5035   XML_SetUserData(g_parser, &test_data);
5036   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5037   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5038       == XML_STATUS_ERROR)
5039     xml_failure(g_parser);
5040   CharData_CheckXMLChars(&storage, expected);
5041 }
5042 END_TEST
5043 
START_TEST(test_entity_public_utf16_le)5044 START_TEST(test_entity_public_utf16_le) {
5045   const char text[] =
5046       /* <!DOCTYPE d [ */
5047       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5048       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5049       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5050       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5051       /* %e; */
5052       "%\0e\0;\0\n\0"
5053       /* ]> */
5054       "]\0>\0\n\0"
5055       /* <d>&j;</d> */
5056       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5057   ExtTest2 test_data
5058       = {/* <!ENTITY j 'baz'> */
5059          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5060   const XML_Char *expected = XCS("baz");
5061   CharData storage;
5062 
5063   CharData_Init(&storage);
5064   test_data.storage = &storage;
5065   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5066   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5067   XML_SetUserData(g_parser, &test_data);
5068   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5069   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5070       == XML_STATUS_ERROR)
5071     xml_failure(g_parser);
5072   CharData_CheckXMLChars(&storage, expected);
5073 }
5074 END_TEST
5075 
5076 /* Test that a doctype with neither an internal nor external subset is
5077  * faulted
5078  */
START_TEST(test_short_doctype)5079 START_TEST(test_short_doctype) {
5080   const char *text = "<!DOCTYPE doc></doc>";
5081   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5082                  "DOCTYPE without subset not rejected");
5083 }
5084 END_TEST
5085 
START_TEST(test_short_doctype_2)5086 START_TEST(test_short_doctype_2) {
5087   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5088   expect_failure(text, XML_ERROR_SYNTAX,
5089                  "DOCTYPE without Public ID not rejected");
5090 }
5091 END_TEST
5092 
START_TEST(test_short_doctype_3)5093 START_TEST(test_short_doctype_3) {
5094   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5095   expect_failure(text, XML_ERROR_SYNTAX,
5096                  "DOCTYPE without System ID not rejected");
5097 }
5098 END_TEST
5099 
START_TEST(test_long_doctype)5100 START_TEST(test_long_doctype) {
5101   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5102   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5103 }
5104 END_TEST
5105 
START_TEST(test_bad_entity)5106 START_TEST(test_bad_entity) {
5107   const char *text = "<!DOCTYPE doc [\n"
5108                      "  <!ENTITY foo PUBLIC>\n"
5109                      "]>\n"
5110                      "<doc/>";
5111   expect_failure(text, XML_ERROR_SYNTAX,
5112                  "ENTITY without Public ID is not rejected");
5113 }
5114 END_TEST
5115 
5116 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5117 START_TEST(test_bad_entity_2) {
5118   const char *text = "<!DOCTYPE doc [\n"
5119                      "  <!ENTITY % foo bar>\n"
5120                      "]>\n"
5121                      "<doc/>";
5122   expect_failure(text, XML_ERROR_SYNTAX,
5123                  "ENTITY without Public ID is not rejected");
5124 }
5125 END_TEST
5126 
START_TEST(test_bad_entity_3)5127 START_TEST(test_bad_entity_3) {
5128   const char *text = "<!DOCTYPE doc [\n"
5129                      "  <!ENTITY % foo PUBLIC>\n"
5130                      "]>\n"
5131                      "<doc/>";
5132   expect_failure(text, XML_ERROR_SYNTAX,
5133                  "Parameter ENTITY without Public ID is not rejected");
5134 }
5135 END_TEST
5136 
START_TEST(test_bad_entity_4)5137 START_TEST(test_bad_entity_4) {
5138   const char *text = "<!DOCTYPE doc [\n"
5139                      "  <!ENTITY % foo SYSTEM>\n"
5140                      "]>\n"
5141                      "<doc/>";
5142   expect_failure(text, XML_ERROR_SYNTAX,
5143                  "Parameter ENTITY without Public ID is not rejected");
5144 }
5145 END_TEST
5146 
START_TEST(test_bad_notation)5147 START_TEST(test_bad_notation) {
5148   const char *text = "<!DOCTYPE doc [\n"
5149                      "  <!NOTATION n SYSTEM>\n"
5150                      "]>\n"
5151                      "<doc/>";
5152   expect_failure(text, XML_ERROR_SYNTAX,
5153                  "Notation without System ID is not rejected");
5154 }
5155 END_TEST
5156 
5157 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5158 START_TEST(test_default_doctype_handler) {
5159   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5160                      "  <!ENTITY foo 'bar'>\n"
5161                      "]>\n"
5162                      "<doc>&foo;</doc>";
5163   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5164                               {XCS("'test.dtd'"), 10, XML_FALSE},
5165                               {NULL, 0, XML_FALSE}};
5166   int i;
5167 
5168   XML_SetUserData(g_parser, &test_data);
5169   XML_SetDefaultHandler(g_parser, checking_default_handler);
5170   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5171   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5172       == XML_STATUS_ERROR)
5173     xml_failure(g_parser);
5174   for (i = 0; test_data[i].expected != NULL; i++)
5175     if (! test_data[i].seen)
5176       fail("Default handler not run for public !DOCTYPE");
5177 }
5178 END_TEST
5179 
START_TEST(test_empty_element_abort)5180 START_TEST(test_empty_element_abort) {
5181   const char *text = "<abort/>";
5182 
5183   XML_SetStartElementHandler(g_parser, start_element_suspender);
5184   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5185       != XML_STATUS_ERROR)
5186     fail("Expected to error on abort");
5187 }
5188 END_TEST
5189 
5190 /* Regression test for GH issue #612: unfinished m_declAttributeType
5191  * allocation in ->m_tempPool can corrupt following allocation.
5192  */
START_TEST(test_pool_integrity_with_unfinished_attr)5193 START_TEST(test_pool_integrity_with_unfinished_attr) {
5194   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5195                      "<!DOCTYPE foo [\n"
5196                      "<!ELEMENT foo ANY>\n"
5197                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5198                      "%entp;\n"
5199                      "]>\n"
5200                      "<a></a>\n";
5201   const XML_Char *expected = XCS("COMMENT");
5202   CharData storage;
5203 
5204   CharData_Init(&storage);
5205   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5206   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5207   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5208   XML_SetCommentHandler(g_parser, accumulate_comment);
5209   XML_SetUserData(g_parser, &storage);
5210   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5211       == XML_STATUS_ERROR)
5212     xml_failure(g_parser);
5213   CharData_CheckXMLChars(&storage, expected);
5214 }
5215 END_TEST
5216 
START_TEST(test_nested_entity_suspend)5217 START_TEST(test_nested_entity_suspend) {
5218   const char *const text = "<!DOCTYPE a [\n"
5219                            "  <!ENTITY e1 '<!--e1-->'>\n"
5220                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5221                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5222                            "]>\n"
5223                            "<a><!--start-->&e3;<!--end--></a>";
5224   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5225       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5226   CharData storage;
5227   CharData_Init(&storage);
5228   XML_Parser parser = XML_ParserCreate(NULL);
5229   ParserPlusStorage parserPlusStorage = {parser, &storage};
5230 
5231   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5232   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5233   XML_SetUserData(parser, &parserPlusStorage);
5234 
5235   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5236   while (status == XML_STATUS_SUSPENDED) {
5237     status = XML_ResumeParser(parser);
5238   }
5239   if (status != XML_STATUS_OK)
5240     xml_failure(parser);
5241 
5242   CharData_CheckXMLChars(&storage, expected);
5243   XML_ParserFree(parser);
5244 }
5245 END_TEST
5246 
5247 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5248 START_TEST(test_big_tokens_scale_linearly) {
5249   const struct {
5250     const char *pre;
5251     const char *post;
5252   } text[] = {
5253       {"<a>", "</a>"},                      // assumed good, used as baseline
5254       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5255       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5256       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5257       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5258   };
5259   const int num_cases = sizeof(text) / sizeof(text[0]);
5260   char aaaaaa[4096];
5261   const int fillsize = (int)sizeof(aaaaaa);
5262   const int fillcount = 100;
5263   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5264   const unsigned max_factor = 4;
5265   const unsigned max_scanned = max_factor * approx_bytes;
5266 
5267   memset(aaaaaa, 'a', fillsize);
5268 
5269   if (! g_reparseDeferralEnabledDefault) {
5270     return; // heuristic is disabled; we would get O(n^2) and fail.
5271   }
5272 
5273   for (int i = 0; i < num_cases; ++i) {
5274     XML_Parser parser = XML_ParserCreate(NULL);
5275     assert_true(parser != NULL);
5276     enum XML_Status status;
5277     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5278 
5279     // parse the start text
5280     g_bytesScanned = 0;
5281     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5282                                      (int)strlen(text[i].pre), XML_FALSE);
5283     if (status != XML_STATUS_OK) {
5284       xml_failure(parser);
5285     }
5286 
5287     // parse lots of 'a', failing the test early if it takes too long
5288     unsigned past_max_count = 0;
5289     for (int f = 0; f < fillcount; ++f) {
5290       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5291       if (status != XML_STATUS_OK) {
5292         xml_failure(parser);
5293       }
5294       if (g_bytesScanned > max_scanned) {
5295         // We're not done, and have already passed the limit -- the test will
5296         // definitely fail. This block allows us to save time by failing early.
5297         const unsigned pushed
5298             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5299         fprintf(
5300             stderr,
5301             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5302             f + 1, fillcount, pushed, g_bytesScanned,
5303             g_bytesScanned / (double)pushed, max_scanned, max_factor);
5304         past_max_count++;
5305         // We are failing, but allow a few log prints first. If we don't reach
5306         // a count of five, the test will fail after the loop instead.
5307         assert_true(past_max_count < 5);
5308       }
5309     }
5310 
5311     // parse the end text
5312     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5313                                      (int)strlen(text[i].post), XML_TRUE);
5314     if (status != XML_STATUS_OK) {
5315       xml_failure(parser);
5316     }
5317 
5318     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5319     if (g_bytesScanned > max_scanned) {
5320       fprintf(
5321           stderr,
5322           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5323           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5324           max_factor);
5325       fail("scanned too many bytes");
5326     }
5327 
5328     XML_ParserFree(parser);
5329   }
5330 }
5331 END_TEST
5332 
START_TEST(test_set_reparse_deferral)5333 START_TEST(test_set_reparse_deferral) {
5334   const char *const pre = "<d>";
5335   const char *const start = "<x attr='";
5336   const char *const end = "'></x>";
5337   char eeeeee[100];
5338   const int fillsize = (int)sizeof(eeeeee);
5339   memset(eeeeee, 'e', fillsize);
5340 
5341   for (int enabled = 0; enabled <= 1; enabled += 1) {
5342     set_subtest("deferral=%d", enabled);
5343 
5344     XML_Parser parser = XML_ParserCreate(NULL);
5345     assert_true(parser != NULL);
5346     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5347     // pre-grow the buffer to avoid reparsing due to almost-fullness
5348     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5349 
5350     CharData storage;
5351     CharData_Init(&storage);
5352     XML_SetUserData(parser, &storage);
5353     XML_SetStartElementHandler(parser, start_element_event_handler);
5354 
5355     enum XML_Status status;
5356     // parse the start text
5357     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5358     if (status != XML_STATUS_OK) {
5359       xml_failure(parser);
5360     }
5361     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5362 
5363     // ..and the start of the token
5364     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5365     if (status != XML_STATUS_OK) {
5366       xml_failure(parser);
5367     }
5368     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5369 
5370     // try to parse lots of 'e', but the token isn't finished
5371     for (int c = 0; c < 100; ++c) {
5372       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5373       if (status != XML_STATUS_OK) {
5374         xml_failure(parser);
5375       }
5376     }
5377     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5378 
5379     // end the <x> token.
5380     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5381     if (status != XML_STATUS_OK) {
5382       xml_failure(parser);
5383     }
5384 
5385     if (enabled) {
5386       // In general, we may need to push more data to trigger a reparse attempt,
5387       // but in this test, the data is constructed to always require it.
5388       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5389       // 2x the token length should suffice; the +1 covers the start and end.
5390       for (int c = 0; c < 101; ++c) {
5391         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5392         if (status != XML_STATUS_OK) {
5393           xml_failure(parser);
5394         }
5395       }
5396     }
5397     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5398 
5399     XML_ParserFree(parser);
5400   }
5401 }
5402 END_TEST
5403 
5404 struct element_decl_data {
5405   XML_Parser parser;
5406   int count;
5407 };
5408 
5409 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5410 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5411   UNUSED_P(name);
5412   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5413   testdata->count += 1;
5414   XML_FreeContentModel(testdata->parser, model);
5415 }
5416 
5417 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5418 external_inherited_parser(XML_Parser p, const XML_Char *context,
5419                           const XML_Char *base, const XML_Char *systemId,
5420                           const XML_Char *publicId) {
5421   UNUSED_P(base);
5422   UNUSED_P(systemId);
5423   UNUSED_P(publicId);
5424   const char *const pre = "<!ELEMENT document ANY>\n";
5425   const char *const start = "<!ELEMENT ";
5426   const char *const end = " ANY>\n";
5427   const char *const post = "<!ELEMENT xyz ANY>\n";
5428   const int enabled = *(int *)XML_GetUserData(p);
5429   char eeeeee[100];
5430   char spaces[100];
5431   const int fillsize = (int)sizeof(eeeeee);
5432   assert_true(fillsize == (int)sizeof(spaces));
5433   memset(eeeeee, 'e', fillsize);
5434   memset(spaces, ' ', fillsize);
5435 
5436   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5437   assert_true(parser != NULL);
5438   // pre-grow the buffer to avoid reparsing due to almost-fullness
5439   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5440 
5441   struct element_decl_data testdata;
5442   testdata.parser = parser;
5443   testdata.count = 0;
5444   XML_SetUserData(parser, &testdata);
5445   XML_SetElementDeclHandler(parser, element_decl_counter);
5446 
5447   enum XML_Status status;
5448   // parse the initial text
5449   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5450   if (status != XML_STATUS_OK) {
5451     xml_failure(parser);
5452   }
5453   assert_true(testdata.count == 1); // first element should be done
5454 
5455   // ..and the start of the big token
5456   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5457   if (status != XML_STATUS_OK) {
5458     xml_failure(parser);
5459   }
5460   assert_true(testdata.count == 1); // still just the first one
5461 
5462   // try to parse lots of 'e', but the token isn't finished
5463   for (int c = 0; c < 100; ++c) {
5464     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5465     if (status != XML_STATUS_OK) {
5466       xml_failure(parser);
5467     }
5468   }
5469   assert_true(testdata.count == 1); // *still* just the first one
5470 
5471   // end the big token.
5472   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5473   if (status != XML_STATUS_OK) {
5474     xml_failure(parser);
5475   }
5476 
5477   if (enabled) {
5478     // In general, we may need to push more data to trigger a reparse attempt,
5479     // but in this test, the data is constructed to always require it.
5480     assert_true(testdata.count == 1); // or the test is incorrect
5481     // 2x the token length should suffice; the +1 covers the start and end.
5482     for (int c = 0; c < 101; ++c) {
5483       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5484       if (status != XML_STATUS_OK) {
5485         xml_failure(parser);
5486       }
5487     }
5488   }
5489   assert_true(testdata.count == 2); // the big token should be done
5490 
5491   // parse the final text
5492   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5493   if (status != XML_STATUS_OK) {
5494     xml_failure(parser);
5495   }
5496   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5497 
5498   XML_ParserFree(parser);
5499   return XML_STATUS_OK;
5500 }
5501 
START_TEST(test_reparse_deferral_is_inherited)5502 START_TEST(test_reparse_deferral_is_inherited) {
5503   const char *const text
5504       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5505   for (int enabled = 0; enabled <= 1; ++enabled) {
5506     set_subtest("deferral=%d", enabled);
5507 
5508     XML_Parser parser = XML_ParserCreate(NULL);
5509     assert_true(parser != NULL);
5510     XML_SetUserData(parser, (void *)&enabled);
5511     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5512     // this handler creates a sub-parser and checks that its deferral behavior
5513     // is what we expected, based on the value of `enabled` (in userdata).
5514     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5515     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5516     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5517       xml_failure(parser);
5518 
5519     XML_ParserFree(parser);
5520   }
5521 }
5522 END_TEST
5523 
START_TEST(test_set_reparse_deferral_on_null_parser)5524 START_TEST(test_set_reparse_deferral_on_null_parser) {
5525   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5526   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5527   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5528   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5529   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5530               == XML_FALSE);
5531   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5532               == XML_FALSE);
5533 }
5534 END_TEST
5535 
START_TEST(test_set_reparse_deferral_on_the_fly)5536 START_TEST(test_set_reparse_deferral_on_the_fly) {
5537   const char *const pre = "<d><x attr='";
5538   const char *const end = "'></x>";
5539   char iiiiii[100];
5540   const int fillsize = (int)sizeof(iiiiii);
5541   memset(iiiiii, 'i', fillsize);
5542 
5543   XML_Parser parser = XML_ParserCreate(NULL);
5544   assert_true(parser != NULL);
5545   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5546 
5547   CharData storage;
5548   CharData_Init(&storage);
5549   XML_SetUserData(parser, &storage);
5550   XML_SetStartElementHandler(parser, start_element_event_handler);
5551 
5552   enum XML_Status status;
5553   // parse the start text
5554   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5555   if (status != XML_STATUS_OK) {
5556     xml_failure(parser);
5557   }
5558   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5559 
5560   // try to parse some 'i', but the token isn't finished
5561   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5562   if (status != XML_STATUS_OK) {
5563     xml_failure(parser);
5564   }
5565   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5566 
5567   // end the <x> token.
5568   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5569   if (status != XML_STATUS_OK) {
5570     xml_failure(parser);
5571   }
5572   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5573 
5574   // now change the heuristic setting and add *no* data
5575   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5576   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5577   status = XML_Parse(parser, "", 0, XML_FALSE);
5578   if (status != XML_STATUS_OK) {
5579     xml_failure(parser);
5580   }
5581   CharData_CheckXMLChars(&storage, XCS("dx"));
5582 
5583   XML_ParserFree(parser);
5584 }
5585 END_TEST
5586 
START_TEST(test_set_bad_reparse_option)5587 START_TEST(test_set_bad_reparse_option) {
5588   XML_Parser parser = XML_ParserCreate(NULL);
5589   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5590   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5591   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5592   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5593   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5594   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5595   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5596   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5597   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5598   XML_ParserFree(parser);
5599 }
5600 END_TEST
5601 
5602 static size_t g_totalAlloc = 0;
5603 static size_t g_biggestAlloc = 0;
5604 
5605 static void *
counting_realloc(void * ptr,size_t size)5606 counting_realloc(void *ptr, size_t size) {
5607   g_totalAlloc += size;
5608   if (size > g_biggestAlloc) {
5609     g_biggestAlloc = size;
5610   }
5611   return realloc(ptr, size);
5612 }
5613 
5614 static void *
counting_malloc(size_t size)5615 counting_malloc(size_t size) {
5616   return counting_realloc(NULL, size);
5617 }
5618 
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5619 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5620   if (g_chunkSize != 0) {
5621     // this test does not use SINGLE_BYTES, because it depends on very precise
5622     // buffer fills.
5623     return;
5624   }
5625   if (! g_reparseDeferralEnabledDefault) {
5626     return; // this test is irrelevant when the deferral heuristic is disabled.
5627   }
5628 
5629   const int document_length = 65536;
5630   char *const document = (char *)malloc(document_length);
5631 
5632   const XML_Memory_Handling_Suite memfuncs = {
5633       counting_malloc,
5634       counting_realloc,
5635       free,
5636   };
5637 
5638   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5639   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5640   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5641 
5642   for (const int *leading = leading_list; *leading >= 0; leading++) {
5643     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5644       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5645         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5646                     *fillsize);
5647         // start by checking that the test looks reasonably valid
5648         assert_true(*leading + *bigtoken <= document_length);
5649 
5650         // put 'x' everywhere; some will be overwritten by elements.
5651         memset(document, 'x', document_length);
5652         // maybe add an initial tag
5653         if (*leading) {
5654           assert_true(*leading >= 3); // or the test case is invalid
5655           memcpy(document, "<a>", 3);
5656         }
5657         // add the large token
5658         document[*leading + 0] = '<';
5659         document[*leading + 1] = 'b';
5660         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5661         document[*leading + *bigtoken - 1] = '>';
5662 
5663         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5664         const int expected_elem_total = 1 + (*leading ? 1 : 0);
5665 
5666         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5667         assert_true(parser != NULL);
5668 
5669         CharData storage;
5670         CharData_Init(&storage);
5671         XML_SetUserData(parser, &storage);
5672         XML_SetStartElementHandler(parser, start_element_event_handler);
5673 
5674         g_biggestAlloc = 0;
5675         g_totalAlloc = 0;
5676         int offset = 0;
5677         // fill data until the big token is covered (but not necessarily parsed)
5678         while (offset < *leading + *bigtoken) {
5679           assert_true(offset + *fillsize <= document_length);
5680           const enum XML_Status status
5681               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5682           if (status != XML_STATUS_OK) {
5683             xml_failure(parser);
5684           }
5685           offset += *fillsize;
5686         }
5687         // Now, check that we've had a buffer allocation that could fit the
5688         // context bytes and our big token. In order to detect a special case,
5689         // we need to know how many bytes of our big token were included in the
5690         // first push that contained _any_ bytes of the big token:
5691         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5692         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5693           // Special case: we aren't saving any context, and the whole big token
5694           // was covered by a single fill, so Expat may have parsed directly
5695           // from our input pointer, without allocating an internal buffer.
5696         } else if (*leading < XML_CONTEXT_BYTES) {
5697           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5698         } else {
5699           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5700         }
5701         // fill data until the big token is actually parsed
5702         while (storage.count < expected_elem_total) {
5703           const size_t alloc_before = g_totalAlloc;
5704           assert_true(offset + *fillsize <= document_length);
5705           const enum XML_Status status
5706               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5707           if (status != XML_STATUS_OK) {
5708             xml_failure(parser);
5709           }
5710           offset += *fillsize;
5711           // since all the bytes of the big token are already in the buffer,
5712           // the bufsize ceiling should make us finish its parsing without any
5713           // further buffer allocations. We assume that there will be no other
5714           // large allocations in this test.
5715           assert_true(g_totalAlloc - alloc_before < 4096);
5716         }
5717         // test-the-test: was our alloc even called?
5718         assert_true(g_totalAlloc > 0);
5719         // test-the-test: there shouldn't be any extra start elements
5720         assert_true(storage.count == expected_elem_total);
5721 
5722         XML_ParserFree(parser);
5723       }
5724     }
5725   }
5726   free(document);
5727 }
5728 END_TEST
5729 
START_TEST(test_varying_buffer_fills)5730 START_TEST(test_varying_buffer_fills) {
5731   const int KiB = 1024;
5732   const int MiB = 1024 * KiB;
5733   const int document_length = 16 * MiB;
5734   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5735 
5736   if (g_chunkSize != 0) {
5737     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5738   }
5739 
5740   char *const document = (char *)malloc(document_length);
5741   assert_true(document != NULL);
5742   memset(document, 'x', document_length);
5743   document[0] = '<';
5744   document[1] = 't';
5745   memset(&document[2], ' ', big - 2); // a very spacy token
5746   document[big - 1] = '>';
5747 
5748   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5749   // When reparse deferral is enabled, the final (negated) value is the expected
5750   // maximum number of bytes scanned in parse attempts.
5751   const int testcases[][30] = {
5752       {8 * MiB, -8 * MiB},
5753       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5754       // zero-size fills shouldn't trigger the bypass
5755       {4 * MiB, 0, 4 * MiB, -12 * MiB},
5756       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5757       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5758       // try to hit the buffer ceiling only once (at the end)
5759       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5760       // try to hit the same buffer ceiling multiple times
5761       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5762 
5763       // try to hit every ceiling, by always landing 1K shy of the buffer size
5764       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5765        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5766 
5767       // try to avoid every ceiling, by always landing 1B past the buffer size
5768       // the normal 2x heuristic threshold still forces parse attempts.
5769       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5770        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5771        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5772        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5773        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5774        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5775        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5776        -(10 * MiB + 682 * KiB + 7)},
5777       // try to avoid every ceiling again, except on our last fill.
5778       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5779        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5780        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5781        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5782        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5783        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5784        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5785        -(10 * MiB + 682 * KiB + 6)},
5786 
5787       // try to hit ceilings on the way multiple times
5788       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5789        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5790        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
5791        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
5792        // we'll make a parse attempt at every parse call
5793        -(45 * MiB + 12)},
5794   };
5795   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5796   for (int test_i = 0; test_i < testcount; test_i++) {
5797     const int *fillsize = testcases[test_i];
5798     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5799                 fillsize[2], fillsize[3]);
5800     XML_Parser parser = XML_ParserCreate(NULL);
5801     assert_true(parser != NULL);
5802 
5803     CharData storage;
5804     CharData_Init(&storage);
5805     XML_SetUserData(parser, &storage);
5806     XML_SetStartElementHandler(parser, start_element_event_handler);
5807 
5808     g_bytesScanned = 0;
5809     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5810     int offset = 0;
5811     while (*fillsize >= 0) {
5812       assert_true(offset + *fillsize <= document_length); // or test is invalid
5813       const enum XML_Status status
5814           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5815       if (status != XML_STATUS_OK) {
5816         xml_failure(parser);
5817       }
5818       offset += *fillsize;
5819       fillsize++;
5820       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5821       worstcase_bytes += offset; // we might've tried to parse all pending bytes
5822     }
5823     assert_true(storage.count == 1); // the big token should've been parsed
5824     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
5825     if (g_reparseDeferralEnabledDefault) {
5826       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5827       const unsigned max_bytes_scanned = -*fillsize;
5828       if (g_bytesScanned > max_bytes_scanned) {
5829         fprintf(stderr,
5830                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
5831                 g_bytesScanned, max_bytes_scanned);
5832         fail("too many bytes scanned in parse attempts");
5833       }
5834     }
5835     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
5836 
5837     XML_ParserFree(parser);
5838   }
5839   free(document);
5840 }
5841 END_TEST
5842 
5843 void
make_basic_test_case(Suite * s)5844 make_basic_test_case(Suite *s) {
5845   TCase *tc_basic = tcase_create("basic tests");
5846 
5847   suite_add_tcase(s, tc_basic);
5848   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5849 
5850   tcase_add_test(tc_basic, test_nul_byte);
5851   tcase_add_test(tc_basic, test_u0000_char);
5852   tcase_add_test(tc_basic, test_siphash_self);
5853   tcase_add_test(tc_basic, test_siphash_spec);
5854   tcase_add_test(tc_basic, test_bom_utf8);
5855   tcase_add_test(tc_basic, test_bom_utf16_be);
5856   tcase_add_test(tc_basic, test_bom_utf16_le);
5857   tcase_add_test(tc_basic, test_nobom_utf16_le);
5858   tcase_add_test(tc_basic, test_hash_collision);
5859   tcase_add_test(tc_basic, test_illegal_utf8);
5860   tcase_add_test(tc_basic, test_utf8_auto_align);
5861   tcase_add_test(tc_basic, test_utf16);
5862   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5863   tcase_add_test(tc_basic, test_not_utf16);
5864   tcase_add_test(tc_basic, test_bad_encoding);
5865   tcase_add_test(tc_basic, test_latin1_umlauts);
5866   tcase_add_test(tc_basic, test_long_utf8_character);
5867   tcase_add_test(tc_basic, test_long_latin1_attribute);
5868   tcase_add_test(tc_basic, test_long_ascii_attribute);
5869   /* Regression test for SF bug #491986. */
5870   tcase_add_test(tc_basic, test_danish_latin1);
5871   /* Regression test for SF bug #514281. */
5872   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5873   tcase_add_test(tc_basic, test_french_charref_decimal);
5874   tcase_add_test(tc_basic, test_french_latin1);
5875   tcase_add_test(tc_basic, test_french_utf8);
5876   tcase_add_test(tc_basic, test_utf8_false_rejection);
5877   tcase_add_test(tc_basic, test_line_number_after_parse);
5878   tcase_add_test(tc_basic, test_column_number_after_parse);
5879   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5880   tcase_add_test(tc_basic, test_line_number_after_error);
5881   tcase_add_test(tc_basic, test_column_number_after_error);
5882   tcase_add_test(tc_basic, test_really_long_lines);
5883   tcase_add_test(tc_basic, test_really_long_encoded_lines);
5884   tcase_add_test(tc_basic, test_end_element_events);
5885   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5886   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5887   tcase_add_test(tc_basic, test_xmldecl_misplaced);
5888   tcase_add_test(tc_basic, test_xmldecl_invalid);
5889   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5890   tcase_add_test(tc_basic, test_xmldecl_missing_value);
5891   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5892   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5893   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5894   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5895   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5896   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5897   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5898   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5899   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5900   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5901   tcase_add_test(tc_basic,
5902                  test_wfc_undeclared_entity_with_external_subset_standalone);
5903   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5904   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5905   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5906   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5907   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5908   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5909   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5910   tcase_add_test(tc_basic, test_dtd_attr_handling);
5911   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5912   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5913   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5914   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5915   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5916   tcase_add_test(tc_basic, test_good_cdata_ascii);
5917   tcase_add_test(tc_basic, test_good_cdata_utf16);
5918   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5919   tcase_add_test(tc_basic, test_long_cdata_utf16);
5920   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5921   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5922   tcase_add_test(tc_basic, test_bad_cdata);
5923   tcase_add_test(tc_basic, test_bad_cdata_utf16);
5924   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5925   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5926   tcase_add_test(tc_basic, test_memory_allocation);
5927   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5928   tcase_add_test(tc_basic, test_dtd_elements);
5929   tcase_add_test(tc_basic, test_dtd_elements_nesting);
5930   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5931   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5932   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5933   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5934   tcase_add_test__ifdef_xml_dtd(tc_basic,
5935                                 test_foreign_dtd_without_external_subset);
5936   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
5937   tcase_add_test(tc_basic, test_set_base);
5938   tcase_add_test(tc_basic, test_attributes);
5939   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
5940   tcase_add_test(tc_basic, test_resume_invalid_parse);
5941   tcase_add_test(tc_basic, test_resume_resuspended);
5942   tcase_add_test(tc_basic, test_cdata_default);
5943   tcase_add_test(tc_basic, test_subordinate_reset);
5944   tcase_add_test(tc_basic, test_subordinate_suspend);
5945   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
5946   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
5947   tcase_add_test__ifdef_xml_dtd(tc_basic,
5948                                 test_ext_entity_invalid_suspended_parse);
5949   tcase_add_test(tc_basic, test_explicit_encoding);
5950   tcase_add_test(tc_basic, test_trailing_cr);
5951   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
5952   tcase_add_test(tc_basic, test_trailing_rsqb);
5953   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
5954   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
5955   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
5956   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
5957   tcase_add_test(tc_basic, test_empty_parse);
5958   tcase_add_test(tc_basic, test_get_buffer_1);
5959   tcase_add_test(tc_basic, test_get_buffer_2);
5960 #if XML_CONTEXT_BYTES > 0
5961   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
5962 #endif
5963   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
5964   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
5965   tcase_add_test(tc_basic, test_byte_info_at_end);
5966   tcase_add_test(tc_basic, test_byte_info_at_error);
5967   tcase_add_test(tc_basic, test_byte_info_at_cdata);
5968   tcase_add_test(tc_basic, test_predefined_entities);
5969   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
5970   tcase_add_test(tc_basic, test_not_predefined_entities);
5971   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
5972   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
5973   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
5974   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
5975   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
5976   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
5977   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
5978   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
5979   tcase_add_test(tc_basic, test_bad_public_doctype);
5980   tcase_add_test(tc_basic, test_attribute_enum_value);
5981   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
5982   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
5983   tcase_add_test(tc_basic, test_public_notation_no_sysid);
5984   tcase_add_test(tc_basic, test_nested_groups);
5985   tcase_add_test(tc_basic, test_group_choice);
5986   tcase_add_test(tc_basic, test_standalone_parameter_entity);
5987   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
5988   tcase_add_test__ifdef_xml_dtd(tc_basic,
5989                                 test_recursive_external_parameter_entity);
5990   tcase_add_test__ifdef_xml_dtd(tc_basic,
5991                                 test_recursive_external_parameter_entity_2);
5992   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
5993   tcase_add_test(tc_basic, test_suspend_xdecl);
5994   tcase_add_test(tc_basic, test_abort_epilog);
5995   tcase_add_test(tc_basic, test_abort_epilog_2);
5996   tcase_add_test(tc_basic, test_suspend_epilog);
5997   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
5998   tcase_add_test(tc_basic, test_unfinished_epilog);
5999   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6000   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6001   tcase_add_test__ifdef_xml_dtd(tc_basic,
6002                                 test_suspend_resume_internal_entity_issue_629);
6003   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6004   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6005   tcase_add_test(tc_basic, test_restart_on_error);
6006   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6007   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6008   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6009   tcase_add_test(tc_basic, test_standalone_internal_entity);
6010   tcase_add_test(tc_basic, test_skipped_external_entity);
6011   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6012   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6013   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6014   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6015   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6016   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6017   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6018   tcase_add_test(tc_basic, test_pi_handled_in_default);
6019   tcase_add_test(tc_basic, test_comment_handled_in_default);
6020   tcase_add_test(tc_basic, test_pi_yml);
6021   tcase_add_test(tc_basic, test_pi_xnl);
6022   tcase_add_test(tc_basic, test_pi_xmm);
6023   tcase_add_test(tc_basic, test_utf16_pi);
6024   tcase_add_test(tc_basic, test_utf16_be_pi);
6025   tcase_add_test(tc_basic, test_utf16_be_comment);
6026   tcase_add_test(tc_basic, test_utf16_le_comment);
6027   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6028   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6029   tcase_add_test(tc_basic, test_unknown_encoding_success);
6030   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6031   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6032   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6033   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6034   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6035   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6036   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6037   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6038   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6039   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6040   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6041   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6042   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6043   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6044   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6045   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6046   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6047   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6048   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6049   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6050   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6051   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6052   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6053   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6054   tcase_add_test(tc_basic, test_utf16_attribute);
6055   tcase_add_test(tc_basic, test_utf16_second_attr);
6056   tcase_add_test(tc_basic, test_attr_after_solidus);
6057   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6058   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6059   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6060   tcase_add_test(tc_basic, test_bad_doctype);
6061   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6062   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6063   tcase_add_test(tc_basic, test_bad_doctype_plus);
6064   tcase_add_test(tc_basic, test_bad_doctype_star);
6065   tcase_add_test(tc_basic, test_bad_doctype_query);
6066   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6067   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6068   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6069   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6070   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6071   tcase_add_test(tc_basic, test_short_doctype);
6072   tcase_add_test(tc_basic, test_short_doctype_2);
6073   tcase_add_test(tc_basic, test_short_doctype_3);
6074   tcase_add_test(tc_basic, test_long_doctype);
6075   tcase_add_test(tc_basic, test_bad_entity);
6076   tcase_add_test(tc_basic, test_bad_entity_2);
6077   tcase_add_test(tc_basic, test_bad_entity_3);
6078   tcase_add_test(tc_basic, test_bad_entity_4);
6079   tcase_add_test(tc_basic, test_bad_notation);
6080   tcase_add_test(tc_basic, test_default_doctype_handler);
6081   tcase_add_test(tc_basic, test_empty_element_abort);
6082   tcase_add_test__ifdef_xml_dtd(tc_basic,
6083                                 test_pool_integrity_with_unfinished_attr);
6084   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6085   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6086   tcase_add_test(tc_basic, test_set_reparse_deferral);
6087   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6088   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6089   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6090   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6091   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6092   tcase_add_test(tc_basic, test_varying_buffer_fills);
6093 }
6094