1 /* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23 Licensed under the MIT license:
24
25 Permission is hereby granted, free of charge, to any person obtaining
26 a copy of this software and associated documentation files (the
27 "Software"), to deal in the Software without restriction, including
28 without limitation the rights to use, copy, modify, merge, publish,
29 distribute, sublicense, and/or sell copies of the Software, and to permit
30 persons to whom the Software is furnished to do so, subject to the
31 following conditions:
32
33 The above copyright notice and this permission notice shall be included
34 in all copies or substantial portions of the Software.
35
36 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
37 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
38 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
39 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
40 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
41 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
42 USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44
45 #if defined(NDEBUG)
46 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
47 #endif
48
49 #include <assert.h>
50
51 #include <stdio.h>
52 #include <string.h>
53 #include <time.h>
54
55 #if ! defined(__cplusplus)
56 # include <stdbool.h>
57 #endif
58
59 #include "expat_config.h"
60
61 #include "expat.h"
62 #include "internal.h"
63 #include "minicheck.h"
64 #include "structdata.h"
65 #include "common.h"
66 #include "dummy.h"
67 #include "handlers.h"
68 #include "siphash.h"
69 #include "basic_tests.h"
70
71 static void
basic_setup(void)72 basic_setup(void) {
73 g_parser = XML_ParserCreate(NULL);
74 if (g_parser == NULL)
75 fail("Parser not created.");
76 }
77
78 /*
79 * Character & encoding tests.
80 */
81
START_TEST(test_nul_byte)82 START_TEST(test_nul_byte) {
83 char text[] = "<doc>\0</doc>";
84
85 /* test that a NUL byte (in US-ASCII data) is an error */
86 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
87 == XML_STATUS_OK)
88 fail("Parser did not report error on NUL-byte.");
89 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
90 xml_failure(g_parser);
91 }
92 END_TEST
93
START_TEST(test_u0000_char)94 START_TEST(test_u0000_char) {
95 /* test that a NUL byte (in US-ASCII data) is an error */
96 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF,
97 "Parser did not report error on NUL-byte.");
98 }
99 END_TEST
100
START_TEST(test_siphash_self)101 START_TEST(test_siphash_self) {
102 if (! sip24_valid())
103 fail("SipHash self-test failed");
104 }
105 END_TEST
106
START_TEST(test_siphash_spec)107 START_TEST(test_siphash_spec) {
108 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
109 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
110 "\x0a\x0b\x0c\x0d\x0e";
111 const size_t len = sizeof(message) - 1;
112 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
113 struct siphash state;
114 struct sipkey key;
115
116 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
117 "\x0a\x0b\x0c\x0d\x0e\x0f");
118 sip24_init(&state, &key);
119
120 /* Cover spread across calls */
121 sip24_update(&state, message, 4);
122 sip24_update(&state, message + 4, len - 4);
123
124 /* Cover null length */
125 sip24_update(&state, message, 0);
126
127 if (sip24_final(&state) != expected)
128 fail("sip24_final failed spec test\n");
129
130 /* Cover wrapper */
131 if (siphash24(message, len, &key) != expected)
132 fail("siphash24 failed spec test\n");
133 }
134 END_TEST
135
START_TEST(test_bom_utf8)136 START_TEST(test_bom_utf8) {
137 /* This test is really just making sure we don't core on a UTF-8 BOM. */
138 const char *text = "\357\273\277<e/>";
139
140 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
141 == XML_STATUS_ERROR)
142 xml_failure(g_parser);
143 }
144 END_TEST
145
START_TEST(test_bom_utf16_be)146 START_TEST(test_bom_utf16_be) {
147 char text[] = "\376\377\0<\0e\0/\0>";
148
149 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
150 == XML_STATUS_ERROR)
151 xml_failure(g_parser);
152 }
153 END_TEST
154
START_TEST(test_bom_utf16_le)155 START_TEST(test_bom_utf16_le) {
156 char text[] = "\377\376<\0e\0/\0>\0";
157
158 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
159 == XML_STATUS_ERROR)
160 xml_failure(g_parser);
161 }
162 END_TEST
163
START_TEST(test_nobom_utf16_le)164 START_TEST(test_nobom_utf16_le) {
165 char text[] = " \0<\0e\0/\0>\0";
166
167 if (g_chunkSize == 1) {
168 // TODO: with just the first byte, we can't tell the difference between
169 // UTF-16-LE and UTF-8. Avoid the failure for now.
170 return;
171 }
172
173 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
174 == XML_STATUS_ERROR)
175 xml_failure(g_parser);
176 }
177 END_TEST
178
START_TEST(test_hash_collision)179 START_TEST(test_hash_collision) {
180 /* For full coverage of the lookup routine, we need to ensure a
181 * hash collision even though we can only tell that we have one
182 * through breakpoint debugging or coverage statistics. The
183 * following will cause a hash collision on machines with a 64-bit
184 * long type; others will have to experiment. The full coverage
185 * tests invoked from qa.sh usually provide a hash collision, but
186 * not always. This is an attempt to provide insurance.
187 */
188 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
189 const char *text
190 = "<doc>\n"
191 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
192 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
193 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
194 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
195 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
196 "<d8>This triggers the table growth and collides with b2</d8>\n"
197 "</doc>\n";
198
199 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
200 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
201 == XML_STATUS_ERROR)
202 xml_failure(g_parser);
203 }
204 END_TEST
205 #undef COLLIDING_HASH_SALT
206
207 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)208 START_TEST(test_danish_latin1) {
209 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
210 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
211 #ifdef XML_UNICODE
212 const XML_Char *expected
213 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
214 #else
215 const XML_Char *expected
216 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
217 #endif
218 run_character_check(text, expected);
219 }
220 END_TEST
221
222 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)223 START_TEST(test_french_charref_hexidecimal) {
224 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
225 "<doc>éèàçêÈ</doc>";
226 #ifdef XML_UNICODE
227 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
228 #else
229 const XML_Char *expected
230 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
231 #endif
232 run_character_check(text, expected);
233 }
234 END_TEST
235
START_TEST(test_french_charref_decimal)236 START_TEST(test_french_charref_decimal) {
237 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
238 "<doc>éèàçêÈ</doc>";
239 #ifdef XML_UNICODE
240 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
241 #else
242 const XML_Char *expected
243 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
244 #endif
245 run_character_check(text, expected);
246 }
247 END_TEST
248
START_TEST(test_french_latin1)249 START_TEST(test_french_latin1) {
250 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
251 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
252 #ifdef XML_UNICODE
253 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
254 #else
255 const XML_Char *expected
256 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
257 #endif
258 run_character_check(text, expected);
259 }
260 END_TEST
261
START_TEST(test_french_utf8)262 START_TEST(test_french_utf8) {
263 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
264 "<doc>\xC3\xA9</doc>";
265 #ifdef XML_UNICODE
266 const XML_Char *expected = XCS("\x00e9");
267 #else
268 const XML_Char *expected = XCS("\xC3\xA9");
269 #endif
270 run_character_check(text, expected);
271 }
272 END_TEST
273
274 /* Regression test for SF bug #600479.
275 XXX There should be a test that exercises all legal XML Unicode
276 characters as PCDATA and attribute value content, and XML Name
277 characters as part of element and attribute names.
278 */
START_TEST(test_utf8_false_rejection)279 START_TEST(test_utf8_false_rejection) {
280 const char *text = "<doc>\xEF\xBA\xBF</doc>";
281 #ifdef XML_UNICODE
282 const XML_Char *expected = XCS("\xfebf");
283 #else
284 const XML_Char *expected = XCS("\xEF\xBA\xBF");
285 #endif
286 run_character_check(text, expected);
287 }
288 END_TEST
289
290 /* Regression test for SF bug #477667.
291 This test assures that any 8-bit character followed by a 7-bit
292 character will not be mistakenly interpreted as a valid UTF-8
293 sequence.
294 */
START_TEST(test_illegal_utf8)295 START_TEST(test_illegal_utf8) {
296 char text[100];
297 int i;
298
299 for (i = 128; i <= 255; ++i) {
300 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
301 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
302 == XML_STATUS_OK) {
303 snprintf(text, sizeof(text),
304 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
305 i);
306 fail(text);
307 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
308 xml_failure(g_parser);
309 /* Reset the parser since we use the same parser repeatedly. */
310 XML_ParserReset(g_parser, NULL);
311 }
312 }
313 END_TEST
314
315 /* Examples, not masks: */
316 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
317 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
318 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
319 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
320 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
321
START_TEST(test_utf8_auto_align)322 START_TEST(test_utf8_auto_align) {
323 struct TestCase {
324 ptrdiff_t expectedMovementInChars;
325 const char *input;
326 };
327
328 struct TestCase cases[] = {
329 {00, ""},
330
331 {00, UTF8_LEAD_1},
332
333 {-1, UTF8_LEAD_2},
334 {00, UTF8_LEAD_2 UTF8_FOLLOW},
335
336 {-1, UTF8_LEAD_3},
337 {-2, UTF8_LEAD_3 UTF8_FOLLOW},
338 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
339
340 {-1, UTF8_LEAD_4},
341 {-2, UTF8_LEAD_4 UTF8_FOLLOW},
342 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
343 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
344 };
345
346 size_t i = 0;
347 bool success = true;
348 for (; i < sizeof(cases) / sizeof(*cases); i++) {
349 const char *fromLim = cases[i].input + strlen(cases[i].input);
350 const char *const fromLimInitially = fromLim;
351 ptrdiff_t actualMovementInChars;
352
353 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
354
355 actualMovementInChars = (fromLim - fromLimInitially);
356 if (actualMovementInChars != cases[i].expectedMovementInChars) {
357 size_t j = 0;
358 success = false;
359 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
360 ", actually moved by %2d chars: \"",
361 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
362 (int)actualMovementInChars);
363 for (; j < strlen(cases[i].input); j++) {
364 printf("\\x%02x", (unsigned char)cases[i].input[j]);
365 }
366 printf("\"\n");
367 }
368 }
369
370 if (! success) {
371 fail("UTF-8 auto-alignment is not bullet-proof\n");
372 }
373 }
374 END_TEST
375
START_TEST(test_utf16)376 START_TEST(test_utf16) {
377 /* <?xml version="1.0" encoding="UTF-16"?>
378 * <doc a='123'>some {A} text</doc>
379 *
380 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
381 */
382 char text[]
383 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
384 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
385 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
386 "\000'\000?\000>\000\n"
387 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
388 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
389 "<\000/\000d\000o\000c\000>";
390 #ifdef XML_UNICODE
391 const XML_Char *expected = XCS("some \xff21 text");
392 #else
393 const XML_Char *expected = XCS("some \357\274\241 text");
394 #endif
395 CharData storage;
396
397 CharData_Init(&storage);
398 XML_SetUserData(g_parser, &storage);
399 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
400 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
401 == XML_STATUS_ERROR)
402 xml_failure(g_parser);
403 CharData_CheckXMLChars(&storage, expected);
404 }
405 END_TEST
406
START_TEST(test_utf16_le_epilog_newline)407 START_TEST(test_utf16_le_epilog_newline) {
408 unsigned int first_chunk_bytes = 17;
409 char text[] = "\xFF\xFE" /* BOM */
410 "<\000e\000/\000>\000" /* document element */
411 "\r\000\n\000\r\000\n\000"; /* epilog */
412
413 if (first_chunk_bytes >= sizeof(text) - 1)
414 fail("bad value of first_chunk_bytes");
415 if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
416 == XML_STATUS_ERROR)
417 xml_failure(g_parser);
418 else {
419 enum XML_Status rc;
420 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
421 sizeof(text) - first_chunk_bytes - 1,
422 XML_TRUE);
423 if (rc == XML_STATUS_ERROR)
424 xml_failure(g_parser);
425 }
426 }
427 END_TEST
428
429 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)430 START_TEST(test_not_utf16) {
431 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
432 "<doc>Hi</doc>";
433
434 /* Use a handler to provoke the appropriate code paths */
435 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
436 expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
437 "UTF-16 declared in UTF-8 not faulted");
438 }
439 END_TEST
440
441 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)442 START_TEST(test_bad_encoding) {
443 const char *text = "<doc>Hi</doc>";
444
445 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
446 fail("XML_SetEncoding failed");
447 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
448 "Unknown encoding not faulted");
449 }
450 END_TEST
451
452 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)453 START_TEST(test_latin1_umlauts) {
454 const char *text
455 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
456 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n"
457 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>";
458 #ifdef XML_UNICODE
459 /* Expected results in UTF-16 */
460 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
461 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
462 #else
463 /* Expected results in UTF-8 */
464 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
465 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
466 #endif
467
468 run_character_check(text, expected);
469 XML_ParserReset(g_parser, NULL);
470 run_attribute_check(text, expected);
471 /* Repeat with a default handler */
472 XML_ParserReset(g_parser, NULL);
473 XML_SetDefaultHandler(g_parser, dummy_default_handler);
474 run_character_check(text, expected);
475 XML_ParserReset(g_parser, NULL);
476 XML_SetDefaultHandler(g_parser, dummy_default_handler);
477 run_attribute_check(text, expected);
478 }
479 END_TEST
480
481 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)482 START_TEST(test_long_utf8_character) {
483 const char *text
484 = "<?xml version='1.0' encoding='utf-8'?>\n"
485 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
486 "<do\xf0\x90\x80\x80/>";
487 expect_failure(text, XML_ERROR_INVALID_TOKEN,
488 "4-byte UTF-8 character in element name not faulted");
489 }
490 END_TEST
491
492 /* Test that a long latin-1 attribute (too long to convert in one go)
493 * is correctly converted
494 */
START_TEST(test_long_latin1_attribute)495 START_TEST(test_long_latin1_attribute) {
496 const char *text
497 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
498 "<doc att='"
499 /* 64 characters per line */
500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
515 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
516 /* Last character splits across a buffer boundary */
517 "\xe4'>\n</doc>";
518
519 const XML_Char *expected =
520 /* 64 characters per line */
521 /* clang-format off */
522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
537 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
538 /* clang-format on */
539 #ifdef XML_UNICODE
540 XCS("\x00e4");
541 #else
542 XCS("\xc3\xa4");
543 #endif
544
545 run_attribute_check(text, expected);
546 }
547 END_TEST
548
549 /* Test that a long ASCII attribute (too long to convert in one go)
550 * is correctly converted
551 */
START_TEST(test_long_ascii_attribute)552 START_TEST(test_long_ascii_attribute) {
553 const char *text
554 = "<?xml version='1.0' encoding='us-ascii'?>\n"
555 "<doc att='"
556 /* 64 characters per line */
557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
573 "01234'>\n</doc>";
574 const XML_Char *expected =
575 /* 64 characters per line */
576 /* clang-format off */
577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
593 XCS("01234");
594 /* clang-format on */
595
596 run_attribute_check(text, expected);
597 }
598 END_TEST
599
600 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)601 START_TEST(test_line_number_after_parse) {
602 const char *text = "<tag>\n"
603 "\n"
604 "\n</tag>";
605 XML_Size lineno;
606
607 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
608 == XML_STATUS_ERROR)
609 xml_failure(g_parser);
610 lineno = XML_GetCurrentLineNumber(g_parser);
611 if (lineno != 4) {
612 char buffer[100];
613 snprintf(buffer, sizeof(buffer),
614 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
615 fail(buffer);
616 }
617 }
618 END_TEST
619
620 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)621 START_TEST(test_column_number_after_parse) {
622 const char *text = "<tag></tag>";
623 XML_Size colno;
624
625 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
626 == XML_STATUS_ERROR)
627 xml_failure(g_parser);
628 colno = XML_GetCurrentColumnNumber(g_parser);
629 if (colno != 11) {
630 char buffer[100];
631 snprintf(buffer, sizeof(buffer),
632 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
633 fail(buffer);
634 }
635 }
636 END_TEST
637
638 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)639 START_TEST(test_line_and_column_numbers_inside_handlers) {
640 const char *text = "<a>\n" /* Unix end-of-line */
641 " <b>\r\n" /* Windows end-of-line */
642 " <c/>\r" /* Mac OS end-of-line */
643 " </b>\n"
644 " <d>\n"
645 " <f/>\n"
646 " </d>\n"
647 "</a>";
648 const StructDataEntry expected[]
649 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
650 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
651 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
652 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
653 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
654 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
655 StructData storage;
656
657 StructData_Init(&storage);
658 XML_SetUserData(g_parser, &storage);
659 XML_SetStartElementHandler(g_parser, start_element_event_handler2);
660 XML_SetEndElementHandler(g_parser, end_element_event_handler2);
661 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
662 == XML_STATUS_ERROR)
663 xml_failure(g_parser);
664
665 StructData_CheckItems(&storage, expected, expected_count);
666 StructData_Dispose(&storage);
667 }
668 END_TEST
669
670 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)671 START_TEST(test_line_number_after_error) {
672 const char *text = "<a>\n"
673 " <b>\n"
674 " </a>"; /* missing </b> */
675 XML_Size lineno;
676 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
677 != XML_STATUS_ERROR)
678 fail("Expected a parse error");
679
680 lineno = XML_GetCurrentLineNumber(g_parser);
681 if (lineno != 3) {
682 char buffer[100];
683 snprintf(buffer, sizeof(buffer),
684 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
685 fail(buffer);
686 }
687 }
688 END_TEST
689
690 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)691 START_TEST(test_column_number_after_error) {
692 const char *text = "<a>\n"
693 " <b>\n"
694 " </a>"; /* missing </b> */
695 XML_Size colno;
696 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
697 != XML_STATUS_ERROR)
698 fail("Expected a parse error");
699
700 colno = XML_GetCurrentColumnNumber(g_parser);
701 if (colno != 4) {
702 char buffer[100];
703 snprintf(buffer, sizeof(buffer),
704 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
705 fail(buffer);
706 }
707 }
708 END_TEST
709
710 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)711 START_TEST(test_really_long_lines) {
712 /* This parses an input line longer than INIT_DATA_BUF_SIZE
713 characters long (defined to be 1024 in xmlparse.c). We take a
714 really cheesy approach to building the input buffer, because
715 this avoids writing bugs in buffer-filling code.
716 */
717 const char *text
718 = "<e>"
719 /* 64 chars */
720 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
721 /* until we have at least 1024 characters on the line: */
722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
738 "</e>";
739 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
740 == XML_STATUS_ERROR)
741 xml_failure(g_parser);
742 }
743 END_TEST
744
745 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)746 START_TEST(test_really_long_encoded_lines) {
747 /* As above, except that we want to provoke an output buffer
748 * overflow with a non-trivial encoding. For this we need to pass
749 * the whole cdata in one go, not byte-by-byte.
750 */
751 void *buffer;
752 const char *text
753 = "<?xml version='1.0' encoding='iso-8859-1'?>"
754 "<e>"
755 /* 64 chars */
756 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757 /* until we have at least 1024 characters on the line: */
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
774 "</e>";
775 int parse_len = (int)strlen(text);
776
777 /* Need a cdata handler to provoke the code path we want to test */
778 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
779 buffer = XML_GetBuffer(g_parser, parse_len);
780 if (buffer == NULL)
781 fail("Could not allocate parse buffer");
782 assert(buffer != NULL);
783 memcpy(buffer, text, parse_len);
784 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
785 xml_failure(g_parser);
786 }
787 END_TEST
788
789 /*
790 * Element event tests.
791 */
792
START_TEST(test_end_element_events)793 START_TEST(test_end_element_events) {
794 const char *text = "<a><b><c/></b><d><f/></d></a>";
795 const XML_Char *expected = XCS("/c/b/f/d/a");
796 CharData storage;
797
798 CharData_Init(&storage);
799 XML_SetUserData(g_parser, &storage);
800 XML_SetEndElementHandler(g_parser, end_element_event_handler);
801 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
802 == XML_STATUS_ERROR)
803 xml_failure(g_parser);
804 CharData_CheckXMLChars(&storage, expected);
805 }
806 END_TEST
807
808 /*
809 * Attribute tests.
810 */
811
812 /* Helper used by the following tests; this checks any "attr" and "refs"
813 attributes to make sure whitespace has been normalized.
814
815 Return true if whitespace has been normalized in a string, using
816 the rules for attribute value normalization. The 'is_cdata' flag
817 is needed since CDATA attributes don't need to have multiple
818 whitespace characters collapsed to a single space, while other
819 attribute data types do. (Section 3.3.3 of the recommendation.)
820 */
821 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)822 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
823 int blanks = 0;
824 int at_start = 1;
825 while (*s) {
826 if (*s == XCS(' '))
827 ++blanks;
828 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
829 return 0;
830 else {
831 if (at_start) {
832 at_start = 0;
833 if (blanks && ! is_cdata)
834 /* illegal leading blanks */
835 return 0;
836 } else if (blanks > 1 && ! is_cdata)
837 return 0;
838 blanks = 0;
839 }
840 ++s;
841 }
842 if (blanks && ! is_cdata)
843 return 0;
844 return 1;
845 }
846
847 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)848 START_TEST(test_helper_is_whitespace_normalized) {
849 assert(is_whitespace_normalized(XCS("abc"), 0));
850 assert(is_whitespace_normalized(XCS("abc"), 1));
851 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
852 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
853 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
854 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
855 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
856 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
857 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
858 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
859 assert(! is_whitespace_normalized(XCS(" "), 0));
860 assert(is_whitespace_normalized(XCS(" "), 1));
861 assert(! is_whitespace_normalized(XCS("\t"), 0));
862 assert(! is_whitespace_normalized(XCS("\t"), 1));
863 assert(! is_whitespace_normalized(XCS("\n"), 0));
864 assert(! is_whitespace_normalized(XCS("\n"), 1));
865 assert(! is_whitespace_normalized(XCS("\r"), 0));
866 assert(! is_whitespace_normalized(XCS("\r"), 1));
867 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
868 }
869 END_TEST
870
871 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)872 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
873 const XML_Char **atts) {
874 int i;
875 UNUSED_P(userData);
876 UNUSED_P(name);
877 for (i = 0; atts[i] != NULL; i += 2) {
878 const XML_Char *attrname = atts[i];
879 const XML_Char *value = atts[i + 1];
880 if (xcstrcmp(XCS("attr"), attrname) == 0
881 || xcstrcmp(XCS("ents"), attrname) == 0
882 || xcstrcmp(XCS("refs"), attrname) == 0) {
883 if (! is_whitespace_normalized(value, 0)) {
884 char buffer[256];
885 snprintf(buffer, sizeof(buffer),
886 "attribute value not normalized: %" XML_FMT_STR
887 "='%" XML_FMT_STR "'",
888 attrname, value);
889 fail(buffer);
890 }
891 }
892 }
893 }
894
START_TEST(test_attr_whitespace_normalization)895 START_TEST(test_attr_whitespace_normalization) {
896 const char *text
897 = "<!DOCTYPE doc [\n"
898 " <!ATTLIST doc\n"
899 " attr NMTOKENS #REQUIRED\n"
900 " ents ENTITIES #REQUIRED\n"
901 " refs IDREFS #REQUIRED>\n"
902 "]>\n"
903 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
904 " ents=' ent-1 \t\r\n"
905 " ent-2 ' >\n"
906 " <e id='id-1'/>\n"
907 " <e id='id-2'/>\n"
908 "</doc>";
909
910 XML_SetStartElementHandler(g_parser,
911 check_attr_contains_normalized_whitespace);
912 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
913 == XML_STATUS_ERROR)
914 xml_failure(g_parser);
915 }
916 END_TEST
917
918 /*
919 * XML declaration tests.
920 */
921
START_TEST(test_xmldecl_misplaced)922 START_TEST(test_xmldecl_misplaced) {
923 expect_failure("\n"
924 "<?xml version='1.0'?>\n"
925 "<a/>",
926 XML_ERROR_MISPLACED_XML_PI,
927 "failed to report misplaced XML declaration");
928 }
929 END_TEST
930
START_TEST(test_xmldecl_invalid)931 START_TEST(test_xmldecl_invalid) {
932 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
933 "Failed to report invalid XML declaration");
934 }
935 END_TEST
936
START_TEST(test_xmldecl_missing_attr)937 START_TEST(test_xmldecl_missing_attr) {
938 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
939 "Failed to report missing XML declaration attribute");
940 }
941 END_TEST
942
START_TEST(test_xmldecl_missing_value)943 START_TEST(test_xmldecl_missing_value) {
944 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
945 "<doc/>",
946 XML_ERROR_XML_DECL,
947 "Failed to report missing attribute value");
948 }
949 END_TEST
950
951 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)952 START_TEST(test_unknown_encoding_internal_entity) {
953 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
954 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
955 "<test a='&foo;'/>";
956
957 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
958 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
959 == XML_STATUS_ERROR)
960 xml_failure(g_parser);
961 }
962 END_TEST
963
964 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)965 START_TEST(test_unrecognised_encoding_internal_entity) {
966 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
967 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
968 "<test a='&foo;'/>";
969
970 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
971 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
972 != XML_STATUS_ERROR)
973 fail("Unrecognised encoding not rejected");
974 }
975 END_TEST
976
977 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)978 START_TEST(test_ext_entity_set_encoding) {
979 const char *text = "<!DOCTYPE doc [\n"
980 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
981 "]>\n"
982 "<doc>&en;</doc>";
983 ExtTest test_data
984 = {/* This text says it's an unsupported encoding, but it's really
985 UTF-8, which we tell Expat using XML_SetEncoding().
986 */
987 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
988 #ifdef XML_UNICODE
989 const XML_Char *expected = XCS("\x00e9");
990 #else
991 const XML_Char *expected = XCS("\xc3\xa9");
992 #endif
993
994 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
995 run_ext_character_check(text, &test_data, expected);
996 }
997 END_TEST
998
999 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1000 START_TEST(test_ext_entity_no_handler) {
1001 const char *text = "<!DOCTYPE doc [\n"
1002 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1003 "]>\n"
1004 "<doc>&en;</doc>";
1005
1006 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1007 run_character_check(text, XCS(""));
1008 }
1009 END_TEST
1010
1011 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1012 START_TEST(test_ext_entity_set_bom) {
1013 const char *text = "<!DOCTYPE doc [\n"
1014 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1015 "]>\n"
1016 "<doc>&en;</doc>";
1017 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1018 "<?xml encoding='iso-8859-3'?>"
1019 "\xC3\xA9",
1020 XCS("utf-8"), NULL};
1021 #ifdef XML_UNICODE
1022 const XML_Char *expected = XCS("\x00e9");
1023 #else
1024 const XML_Char *expected = XCS("\xc3\xa9");
1025 #endif
1026
1027 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1028 run_ext_character_check(text, &test_data, expected);
1029 }
1030 END_TEST
1031
1032 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1033 START_TEST(test_ext_entity_bad_encoding) {
1034 const char *text = "<!DOCTYPE doc [\n"
1035 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1036 "]>\n"
1037 "<doc>&en;</doc>";
1038 ExtFaults fault
1039 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1040 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1041
1042 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1043 XML_SetUserData(g_parser, &fault);
1044 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1045 "Bad encoding should not have been accepted");
1046 }
1047 END_TEST
1048
1049 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1050 START_TEST(test_ext_entity_bad_encoding_2) {
1051 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1052 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1053 "<doc>&entity;</doc>";
1054 ExtFaults fault
1055 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1056 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1057
1058 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1059 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1060 XML_SetUserData(g_parser, &fault);
1061 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1062 "Bad encoding not faulted in external entity handler");
1063 }
1064 END_TEST
1065
1066 /* Test that no error is reported for unknown entities if we don't
1067 read an external subset. This was fixed in Expat 1.95.5.
1068 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1069 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1070 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1071 "<doc>&entity;</doc>";
1072
1073 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1074 == XML_STATUS_ERROR)
1075 xml_failure(g_parser);
1076 }
1077 END_TEST
1078
1079 /* Test that an error is reported for unknown entities if we don't
1080 have an external subset.
1081 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1082 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1083 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1084 "Parser did not report undefined entity w/out a DTD.");
1085 }
1086 END_TEST
1087
1088 /* Test that an error is reported for unknown entities if we don't
1089 read an external subset, but have been declared standalone.
1090 */
START_TEST(test_wfc_undeclared_entity_standalone)1091 START_TEST(test_wfc_undeclared_entity_standalone) {
1092 const char *text
1093 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1094 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1095 "<doc>&entity;</doc>";
1096
1097 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1098 "Parser did not report undefined entity (standalone).");
1099 }
1100 END_TEST
1101
1102 /* Test that an error is reported for unknown entities if we have read
1103 an external subset, and standalone is true.
1104 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1105 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1106 const char *text
1107 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1108 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1109 "<doc>&entity;</doc>";
1110 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1111
1112 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1113 XML_SetUserData(g_parser, &test_data);
1114 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1115 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1116 "Parser did not report undefined entity (external DTD).");
1117 }
1118 END_TEST
1119
1120 /* Test that external entity handling is not done if the parsing flag
1121 * is set to UNLESS_STANDALONE
1122 */
START_TEST(test_entity_with_external_subset_unless_standalone)1123 START_TEST(test_entity_with_external_subset_unless_standalone) {
1124 const char *text
1125 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1126 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1127 "<doc>&entity;</doc>";
1128 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1129
1130 XML_SetParamEntityParsing(g_parser,
1131 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1132 XML_SetUserData(g_parser, &test_data);
1133 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1134 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1135 "Parser did not report undefined entity");
1136 }
1137 END_TEST
1138
1139 /* Test that no error is reported for unknown entities if we have read
1140 an external subset, and standalone is false.
1141 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1142 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1143 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1144 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1145 "<doc>&entity;</doc>";
1146 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1147
1148 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1149 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1150 run_ext_character_check(text, &test_data, XCS(""));
1151 }
1152 END_TEST
1153
1154 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1155 START_TEST(test_not_standalone_handler_reject) {
1156 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1157 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1158 "<doc>&entity;</doc>";
1159 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1160
1161 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1162 XML_SetUserData(g_parser, &test_data);
1163 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1164 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1165 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1166 "NotStandalone handler failed to reject");
1167
1168 /* Try again but without external entity handling */
1169 XML_ParserReset(g_parser, NULL);
1170 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1171 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1172 "NotStandalone handler failed to reject");
1173 }
1174 END_TEST
1175
1176 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1177 START_TEST(test_not_standalone_handler_accept) {
1178 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1179 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1180 "<doc>&entity;</doc>";
1181 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1182
1183 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1184 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1185 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1186 run_ext_character_check(text, &test_data, XCS(""));
1187
1188 /* Repeat without the external entity handler */
1189 XML_ParserReset(g_parser, NULL);
1190 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1191 run_character_check(text, XCS(""));
1192 }
1193 END_TEST
1194
START_TEST(test_wfc_no_recursive_entity_refs)1195 START_TEST(test_wfc_no_recursive_entity_refs) {
1196 const char *text = "<!DOCTYPE doc [\n"
1197 " <!ENTITY entity '&entity;'>\n"
1198 "]>\n"
1199 "<doc>&entity;</doc>";
1200
1201 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1202 "Parser did not report recursive entity reference.");
1203 }
1204 END_TEST
1205
START_TEST(test_recursive_external_parameter_entity_2)1206 START_TEST(test_recursive_external_parameter_entity_2) {
1207 struct TestCase {
1208 const char *doc;
1209 enum XML_Status expectedStatus;
1210 };
1211
1212 struct TestCase cases[] = {
1213 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1214 {"<!ENTITY % p1 '%p1;'>"
1215 "<!ENTITY % p1 'first declaration wins'>",
1216 XML_STATUS_ERROR},
1217 {"<!ENTITY % p1 'first declaration wins'>"
1218 "<!ENTITY % p1 '%p1;'>",
1219 XML_STATUS_OK},
1220 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK},
1221 };
1222
1223 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1224 const char *const doc = cases[i].doc;
1225 const enum XML_Status expectedStatus = cases[i].expectedStatus;
1226 set_subtest("%s", doc);
1227
1228 XML_Parser parser = XML_ParserCreate(NULL);
1229 assert_true(parser != NULL);
1230
1231 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1232 assert_true(ext_parser != NULL);
1233
1234 const enum XML_Status actualStatus
1235 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1236
1237 assert_true(actualStatus == expectedStatus);
1238 if (actualStatus != XML_STATUS_OK) {
1239 assert_true(XML_GetErrorCode(ext_parser)
1240 == XML_ERROR_RECURSIVE_ENTITY_REF);
1241 }
1242
1243 XML_ParserFree(ext_parser);
1244 XML_ParserFree(parser);
1245 }
1246 }
1247 END_TEST
1248
1249 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1250 START_TEST(test_ext_entity_invalid_parse) {
1251 const char *text = "<!DOCTYPE doc [\n"
1252 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1253 "]>\n"
1254 "<doc>&en;</doc>";
1255 const ExtFaults faults[]
1256 = {{"<", "Incomplete element declaration not faulted", NULL,
1257 XML_ERROR_UNCLOSED_TOKEN},
1258 {"<\xe2\x82", /* First two bytes of a three-byte char */
1259 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1260 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1261 XML_ERROR_PARTIAL_CHAR},
1262 {NULL, NULL, NULL, XML_ERROR_NONE}};
1263 const ExtFaults *fault = faults;
1264
1265 for (; fault->parse_text != NULL; fault++) {
1266 set_subtest("\"%s\"", fault->parse_text);
1267 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1268 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1269 XML_SetUserData(g_parser, (void *)fault);
1270 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1271 "Parser did not report external entity error");
1272 XML_ParserReset(g_parser, NULL);
1273 }
1274 }
1275 END_TEST
1276
1277 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1278 START_TEST(test_dtd_default_handling) {
1279 const char *text = "<!DOCTYPE doc [\n"
1280 "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1281 "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1282 "<!ELEMENT doc EMPTY>\n"
1283 "<!ATTLIST doc a CDATA #IMPLIED>\n"
1284 "<?pi in dtd?>\n"
1285 "<!--comment in dtd-->\n"
1286 "]><doc/>";
1287
1288 XML_SetDefaultHandler(g_parser, accumulate_characters);
1289 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1290 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1291 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1292 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1293 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1294 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1295 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1296 XML_SetCommentHandler(g_parser, dummy_comment_handler);
1297 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1298 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1299 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1300 }
1301 END_TEST
1302
1303 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1304 START_TEST(test_dtd_attr_handling) {
1305 const char *prolog = "<!DOCTYPE doc [\n"
1306 "<!ELEMENT doc EMPTY>\n";
1307 AttTest attr_data[]
1308 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1309 "]>"
1310 "<doc a='two'/>",
1311 XCS("doc"), XCS("a"),
1312 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1313 NULL, XML_TRUE},
1314 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1315 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1316 "]>"
1317 "<doc/>",
1318 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1319 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1320 "]>"
1321 "<doc/>",
1322 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1323 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1324 "]>"
1325 "<doc/>",
1326 XCS("doc"), XCS("a"), XCS("CDATA"),
1327 #ifdef XML_UNICODE
1328 XCS("\x06f2"),
1329 #else
1330 XCS("\xdb\xb2"),
1331 #endif
1332 XML_FALSE},
1333 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1334 AttTest *test;
1335
1336 for (test = attr_data; test->definition != NULL; test++) {
1337 set_subtest("%s", test->definition);
1338 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1339 XML_SetUserData(g_parser, test);
1340 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1341 XML_FALSE)
1342 == XML_STATUS_ERROR)
1343 xml_failure(g_parser);
1344 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1345 (int)strlen(test->definition), XML_TRUE)
1346 == XML_STATUS_ERROR)
1347 xml_failure(g_parser);
1348 XML_ParserReset(g_parser, NULL);
1349 }
1350 }
1351 END_TEST
1352
1353 /* See related SF bug #673791.
1354 When namespace processing is enabled, setting the namespace URI for
1355 a prefix is not allowed; this test ensures that it *is* allowed
1356 when namespace processing is not enabled.
1357 (See Namespaces in XML, section 2.)
1358 */
START_TEST(test_empty_ns_without_namespaces)1359 START_TEST(test_empty_ns_without_namespaces) {
1360 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1361 " <e xmlns:prefix=''/>\n"
1362 "</doc>";
1363
1364 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1365 == XML_STATUS_ERROR)
1366 xml_failure(g_parser);
1367 }
1368 END_TEST
1369
1370 /* Regression test for SF bug #824420.
1371 Checks that an xmlns:prefix attribute set in an attribute's default
1372 value isn't misinterpreted.
1373 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1374 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1375 const char *text = "<!DOCTYPE e:element [\n"
1376 " <!ATTLIST e:element\n"
1377 " xmlns:e CDATA 'http://example.org/'>\n"
1378 " ]>\n"
1379 "<e:element/>";
1380
1381 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1382 == XML_STATUS_ERROR)
1383 xml_failure(g_parser);
1384 }
1385 END_TEST
1386
1387 /* Regression test for SF bug #1515266: missing check of stopped
1388 parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1389 START_TEST(test_stop_parser_between_char_data_calls) {
1390 /* The sample data must be big enough that there are two calls to
1391 the character data handler from within the inner "for" loop of
1392 the XML_TOK_DATA_CHARS case in doContent(), and the character
1393 handler must stop the parser and clear the character data
1394 handler.
1395 */
1396 const char *text = long_character_data_text;
1397
1398 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1399 g_resumable = XML_FALSE;
1400 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1401 != XML_STATUS_ERROR)
1402 xml_failure(g_parser);
1403 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1404 xml_failure(g_parser);
1405 }
1406 END_TEST
1407
1408 /* Regression test for SF bug #1515266: missing check of stopped
1409 parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1410 START_TEST(test_suspend_parser_between_char_data_calls) {
1411 /* The sample data must be big enough that there are two calls to
1412 the character data handler from within the inner "for" loop of
1413 the XML_TOK_DATA_CHARS case in doContent(), and the character
1414 handler must stop the parser and clear the character data
1415 handler.
1416 */
1417 const char *text = long_character_data_text;
1418
1419 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1420 g_resumable = XML_TRUE;
1421 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1422 != XML_STATUS_SUSPENDED)
1423 xml_failure(g_parser);
1424 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1425 xml_failure(g_parser);
1426 /* Try parsing directly */
1427 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1428 != XML_STATUS_ERROR)
1429 fail("Attempt to continue parse while suspended not faulted");
1430 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1431 fail("Suspended parse not faulted with correct error");
1432 }
1433 END_TEST
1434
1435 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1436 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1437 const char *text = long_character_data_text;
1438
1439 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1440 g_resumable = XML_FALSE;
1441 g_abortable = XML_FALSE;
1442 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1443 != XML_STATUS_ERROR)
1444 fail("Failed to double-stop parser");
1445
1446 XML_ParserReset(g_parser, NULL);
1447 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1448 g_resumable = XML_TRUE;
1449 g_abortable = XML_FALSE;
1450 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1451 != XML_STATUS_SUSPENDED)
1452 fail("Failed to double-suspend parser");
1453
1454 XML_ParserReset(g_parser, NULL);
1455 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1456 g_resumable = XML_TRUE;
1457 g_abortable = XML_TRUE;
1458 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1459 != XML_STATUS_ERROR)
1460 fail("Failed to suspend-abort parser");
1461 }
1462 END_TEST
1463
START_TEST(test_good_cdata_ascii)1464 START_TEST(test_good_cdata_ascii) {
1465 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1466 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1467
1468 CharData storage;
1469 CharData_Init(&storage);
1470 XML_SetUserData(g_parser, &storage);
1471 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1472 /* Add start and end handlers for coverage */
1473 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1474 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1475
1476 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1477 == XML_STATUS_ERROR)
1478 xml_failure(g_parser);
1479 CharData_CheckXMLChars(&storage, expected);
1480
1481 /* Try again, this time with a default handler */
1482 XML_ParserReset(g_parser, NULL);
1483 CharData_Init(&storage);
1484 XML_SetUserData(g_parser, &storage);
1485 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1486 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1487
1488 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1489 == XML_STATUS_ERROR)
1490 xml_failure(g_parser);
1491 CharData_CheckXMLChars(&storage, expected);
1492 }
1493 END_TEST
1494
START_TEST(test_good_cdata_utf16)1495 START_TEST(test_good_cdata_utf16) {
1496 /* Test data is:
1497 * <?xml version='1.0' encoding='utf-16'?>
1498 * <a><![CDATA[hello]]></a>
1499 */
1500 const char text[]
1501 = "\0<\0?\0x\0m\0l\0"
1502 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1503 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1504 "1\0"
1505 "6\0'"
1506 "\0?\0>\0\n"
1507 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1508 const XML_Char *expected = XCS("hello");
1509
1510 CharData storage;
1511 CharData_Init(&storage);
1512 XML_SetUserData(g_parser, &storage);
1513 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1514
1515 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1516 == XML_STATUS_ERROR)
1517 xml_failure(g_parser);
1518 CharData_CheckXMLChars(&storage, expected);
1519 }
1520 END_TEST
1521
START_TEST(test_good_cdata_utf16_le)1522 START_TEST(test_good_cdata_utf16_le) {
1523 /* Test data is:
1524 * <?xml version='1.0' encoding='utf-16'?>
1525 * <a><![CDATA[hello]]></a>
1526 */
1527 const char text[]
1528 = "<\0?\0x\0m\0l\0"
1529 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1530 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1531 "1\0"
1532 "6\0'"
1533 "\0?\0>\0\n"
1534 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1535 const XML_Char *expected = XCS("hello");
1536
1537 CharData storage;
1538 CharData_Init(&storage);
1539 XML_SetUserData(g_parser, &storage);
1540 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1541
1542 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1543 == XML_STATUS_ERROR)
1544 xml_failure(g_parser);
1545 CharData_CheckXMLChars(&storage, expected);
1546 }
1547 END_TEST
1548
1549 /* Test UTF16 conversion of a long cdata string */
1550
1551 /* 16 characters: handy macro to reduce visual clutter */
1552 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1553
START_TEST(test_long_cdata_utf16)1554 START_TEST(test_long_cdata_utf16) {
1555 /* Test data is:
1556 * <?xlm version='1.0' encoding='utf-16'?>
1557 * <a><![CDATA[
1558 * ABCDEFGHIJKLMNOP
1559 * ]]></a>
1560 */
1561 const char text[]
1562 = "\0<\0?\0x\0m\0l\0 "
1563 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1564 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1565 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1566 /* 64 characters per line */
1567 /* clang-format off */
1568 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1569 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1570 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1571 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1572 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1573 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1574 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1575 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1576 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1577 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1578 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1579 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1580 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1581 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1582 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1583 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1584 A_TO_P_IN_UTF16
1585 /* clang-format on */
1586 "\0]\0]\0>\0<\0/\0a\0>";
1587 const XML_Char *expected =
1588 /* clang-format off */
1589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1605 XCS("ABCDEFGHIJKLMNOP");
1606 /* clang-format on */
1607 CharData storage;
1608 void *buffer;
1609
1610 CharData_Init(&storage);
1611 XML_SetUserData(g_parser, &storage);
1612 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1613 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1614 if (buffer == NULL)
1615 fail("Could not allocate parse buffer");
1616 assert(buffer != NULL);
1617 memcpy(buffer, text, sizeof(text) - 1);
1618 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1619 xml_failure(g_parser);
1620 CharData_CheckXMLChars(&storage, expected);
1621 }
1622 END_TEST
1623
1624 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1625 START_TEST(test_multichar_cdata_utf16) {
1626 /* Test data is:
1627 * <?xml version='1.0' encoding='utf-16'?>
1628 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1629 *
1630 * where {MINIM} is U+1d15e (a minim or half-note)
1631 * UTF-16: 0xd834 0xdd5e
1632 * UTF-8: 0xf0 0x9d 0x85 0x9e
1633 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1634 * UTF-16: 0xd834 0xdd5f
1635 * UTF-8: 0xf0 0x9d 0x85 0x9f
1636 */
1637 const char text[] = "\0<\0?\0x\0m\0l\0"
1638 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1639 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1640 "1\0"
1641 "6\0'"
1642 "\0?\0>\0\n"
1643 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1644 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1645 "\0]\0]\0>\0<\0/\0a\0>";
1646 #ifdef XML_UNICODE
1647 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1648 #else
1649 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1650 #endif
1651 CharData storage;
1652
1653 CharData_Init(&storage);
1654 XML_SetUserData(g_parser, &storage);
1655 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1656
1657 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1658 == XML_STATUS_ERROR)
1659 xml_failure(g_parser);
1660 CharData_CheckXMLChars(&storage, expected);
1661 }
1662 END_TEST
1663
1664 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1665 START_TEST(test_utf16_bad_surrogate_pair) {
1666 /* Test data is:
1667 * <?xml version='1.0' encoding='utf-16'?>
1668 * <a><![CDATA[{BADLINB}]]></a>
1669 *
1670 * where {BADLINB} is U+10000 (the first Linear B character)
1671 * with the UTF-16 surrogate pair in the wrong order, i.e.
1672 * 0xdc00 0xd800
1673 */
1674 const char text[] = "\0<\0?\0x\0m\0l\0"
1675 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1676 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1677 "1\0"
1678 "6\0'"
1679 "\0?\0>\0\n"
1680 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1681 "\xdc\x00\xd8\x00"
1682 "\0]\0]\0>\0<\0/\0a\0>";
1683
1684 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1685 != XML_STATUS_ERROR)
1686 fail("Reversed UTF-16 surrogate pair not faulted");
1687 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1688 xml_failure(g_parser);
1689 }
1690 END_TEST
1691
START_TEST(test_bad_cdata)1692 START_TEST(test_bad_cdata) {
1693 struct CaseData {
1694 const char *text;
1695 enum XML_Error expectedError;
1696 };
1697
1698 struct CaseData cases[]
1699 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1700 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1701 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1702 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1703 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1704 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1705 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1706 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1707
1708 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1711
1712 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1713 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1715 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1716 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1717 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1718 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1719
1720 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1722 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1723
1724 size_t i = 0;
1725 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1726 set_subtest("%s", cases[i].text);
1727 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1728 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1729 const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1730
1731 assert(actualStatus == XML_STATUS_ERROR);
1732
1733 if (actualError != cases[i].expectedError) {
1734 char message[100];
1735 snprintf(message, sizeof(message),
1736 "Expected error %d but got error %d for case %u: \"%s\"\n",
1737 cases[i].expectedError, actualError, (unsigned int)i + 1,
1738 cases[i].text);
1739 fail(message);
1740 }
1741
1742 XML_ParserReset(g_parser, NULL);
1743 }
1744 }
1745 END_TEST
1746
1747 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1748 START_TEST(test_bad_cdata_utf16) {
1749 struct CaseData {
1750 size_t text_bytes;
1751 const char *text;
1752 enum XML_Error expected_error;
1753 };
1754
1755 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1756 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1757 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1758 "1\0"
1759 "6\0'"
1760 "\0?\0>\0\n"
1761 "\0<\0a\0>";
1762 struct CaseData cases[] = {
1763 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1764 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1765 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1766 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1767 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1768 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1769 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1770 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1771 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1772 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1773 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1774 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1775 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1776 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1777 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1778 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1779 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1780 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1783 /* Now add a four-byte UTF-16 character */
1784 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1785 XML_ERROR_UNCLOSED_CDATA_SECTION},
1786 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1787 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1788 XML_ERROR_PARTIAL_CHAR},
1789 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1790 XML_ERROR_UNCLOSED_CDATA_SECTION}};
1791 size_t i;
1792
1793 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1794 set_subtest("case %lu", (long unsigned)(i + 1));
1795 enum XML_Status actual_status;
1796 enum XML_Error actual_error;
1797
1798 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1799 XML_FALSE)
1800 == XML_STATUS_ERROR)
1801 xml_failure(g_parser);
1802 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1803 (int)cases[i].text_bytes, XML_TRUE);
1804 assert(actual_status == XML_STATUS_ERROR);
1805 actual_error = XML_GetErrorCode(g_parser);
1806 if (actual_error != cases[i].expected_error) {
1807 char message[1024];
1808
1809 snprintf(message, sizeof(message),
1810 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1811 ") for case %lu\n",
1812 cases[i].expected_error,
1813 XML_ErrorString(cases[i].expected_error), actual_error,
1814 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1815 fail(message);
1816 }
1817 XML_ParserReset(g_parser, NULL);
1818 }
1819 }
1820 END_TEST
1821
1822 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1823 START_TEST(test_stop_parser_between_cdata_calls) {
1824 const char *text = long_cdata_text;
1825
1826 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1827 g_resumable = XML_FALSE;
1828 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1829 }
1830 END_TEST
1831
1832 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1833 START_TEST(test_suspend_parser_between_cdata_calls) {
1834 const char *text = long_cdata_text;
1835 enum XML_Status result;
1836
1837 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1838 g_resumable = XML_TRUE;
1839 result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1840 if (result != XML_STATUS_SUSPENDED) {
1841 if (result == XML_STATUS_ERROR)
1842 xml_failure(g_parser);
1843 fail("Parse not suspended in CDATA handler");
1844 }
1845 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1846 xml_failure(g_parser);
1847 }
1848 END_TEST
1849
1850 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1851 START_TEST(test_memory_allocation) {
1852 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1853 char *p;
1854
1855 if (buffer == NULL) {
1856 fail("Allocation failed");
1857 } else {
1858 /* Try writing to memory; some OSes try to cheat! */
1859 buffer[0] = 'T';
1860 buffer[1] = 'E';
1861 buffer[2] = 'S';
1862 buffer[3] = 'T';
1863 buffer[4] = '\0';
1864 if (strcmp(buffer, "TEST") != 0) {
1865 fail("Memory not writable");
1866 } else {
1867 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1868 if (p == NULL) {
1869 fail("Reallocation failed");
1870 } else {
1871 /* Write again, just to be sure */
1872 buffer = p;
1873 buffer[0] = 'V';
1874 if (strcmp(buffer, "VEST") != 0) {
1875 fail("Reallocated memory not writable");
1876 }
1877 }
1878 }
1879 XML_MemFree(g_parser, buffer);
1880 }
1881 }
1882 END_TEST
1883
1884 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1885 START_TEST(test_default_current) {
1886 const char *text = "<doc>hell]</doc>";
1887 const char *entity_text = "<!DOCTYPE doc [\n"
1888 "<!ENTITY entity '%'>\n"
1889 "]>\n"
1890 "<doc>&entity;</doc>";
1891
1892 set_subtest("with defaulting");
1893 {
1894 struct handler_record_list storage;
1895 storage.count = 0;
1896 XML_SetDefaultHandler(g_parser, record_default_handler);
1897 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1898 XML_SetUserData(g_parser, &storage);
1899 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1900 == XML_STATUS_ERROR)
1901 xml_failure(g_parser);
1902 int i = 0;
1903 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1904 // we should have gotten one or more cdata callbacks, totaling 5 chars
1905 int cdata_len_remaining = 5;
1906 while (cdata_len_remaining > 0) {
1907 const struct handler_record_entry *c_entry
1908 = handler_record_get(&storage, i++);
1909 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1910 assert_true(c_entry->arg > 0);
1911 assert_true(c_entry->arg <= cdata_len_remaining);
1912 cdata_len_remaining -= c_entry->arg;
1913 // default handler must follow, with the exact same len argument.
1914 assert_record_handler_called(&storage, i++, "record_default_handler",
1915 c_entry->arg);
1916 }
1917 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1918 assert_true(storage.count == i);
1919 }
1920
1921 /* Again, without the defaulting */
1922 set_subtest("no defaulting");
1923 {
1924 struct handler_record_list storage;
1925 storage.count = 0;
1926 XML_ParserReset(g_parser, NULL);
1927 XML_SetDefaultHandler(g_parser, record_default_handler);
1928 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1929 XML_SetUserData(g_parser, &storage);
1930 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1931 == XML_STATUS_ERROR)
1932 xml_failure(g_parser);
1933 int i = 0;
1934 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1935 // we should have gotten one or more cdata callbacks, totaling 5 chars
1936 int cdata_len_remaining = 5;
1937 while (cdata_len_remaining > 0) {
1938 const struct handler_record_entry *c_entry
1939 = handler_record_get(&storage, i++);
1940 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1941 assert_true(c_entry->arg > 0);
1942 assert_true(c_entry->arg <= cdata_len_remaining);
1943 cdata_len_remaining -= c_entry->arg;
1944 }
1945 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1946 assert_true(storage.count == i);
1947 }
1948
1949 /* Now with an internal entity to complicate matters */
1950 set_subtest("with internal entity");
1951 {
1952 struct handler_record_list storage;
1953 storage.count = 0;
1954 XML_ParserReset(g_parser, NULL);
1955 XML_SetDefaultHandler(g_parser, record_default_handler);
1956 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1957 XML_SetUserData(g_parser, &storage);
1958 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1959 XML_TRUE)
1960 == XML_STATUS_ERROR)
1961 xml_failure(g_parser);
1962 /* The default handler suppresses the entity */
1963 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1964 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1965 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1966 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1967 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1968 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1969 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1970 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1971 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1972 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1973 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1974 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1975 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1976 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1977 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1978 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1979 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1980 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1981 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1982 assert_true(storage.count == 19);
1983 }
1984
1985 /* Again, with a skip handler */
1986 set_subtest("with skip handler");
1987 {
1988 struct handler_record_list storage;
1989 storage.count = 0;
1990 XML_ParserReset(g_parser, NULL);
1991 XML_SetDefaultHandler(g_parser, record_default_handler);
1992 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1993 XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1994 XML_SetUserData(g_parser, &storage);
1995 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1996 XML_TRUE)
1997 == XML_STATUS_ERROR)
1998 xml_failure(g_parser);
1999 /* The default handler suppresses the entity */
2000 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2001 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2002 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2003 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2004 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2005 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2006 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2007 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2008 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2009 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2010 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2011 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2012 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2013 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2014 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2015 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2016 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2017 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2018 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2019 assert_true(storage.count == 19);
2020 }
2021
2022 /* This time, allow the entity through */
2023 set_subtest("allow entity");
2024 {
2025 struct handler_record_list storage;
2026 storage.count = 0;
2027 XML_ParserReset(g_parser, NULL);
2028 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2029 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2030 XML_SetUserData(g_parser, &storage);
2031 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2032 XML_TRUE)
2033 == XML_STATUS_ERROR)
2034 xml_failure(g_parser);
2035 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2036 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2037 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2038 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2039 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2040 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2041 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2042 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2043 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2044 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2045 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2046 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2047 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2048 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2049 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2050 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2051 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2052 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2053 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2054 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2055 assert_true(storage.count == 20);
2056 }
2057
2058 /* Finally, without passing the cdata to the default handler */
2059 set_subtest("not passing cdata");
2060 {
2061 struct handler_record_list storage;
2062 storage.count = 0;
2063 XML_ParserReset(g_parser, NULL);
2064 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2065 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2066 XML_SetUserData(g_parser, &storage);
2067 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2068 XML_TRUE)
2069 == XML_STATUS_ERROR)
2070 xml_failure(g_parser);
2071 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2072 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2073 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2074 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2075 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2076 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2077 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2078 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2079 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2080 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2081 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2082 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2083 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2084 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2085 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2086 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2087 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2088 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2089 1);
2090 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2091 assert_true(storage.count == 19);
2092 }
2093 }
2094 END_TEST
2095
2096 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2097 START_TEST(test_dtd_elements) {
2098 const char *text = "<!DOCTYPE doc [\n"
2099 "<!ELEMENT doc (chapter)>\n"
2100 "<!ELEMENT chapter (#PCDATA)>\n"
2101 "]>\n"
2102 "<doc><chapter>Wombats are go</chapter></doc>";
2103
2104 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2105 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2106 == XML_STATUS_ERROR)
2107 xml_failure(g_parser);
2108 }
2109 END_TEST
2110
2111 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2112 element_decl_check_model(void *userData, const XML_Char *name,
2113 XML_Content *model) {
2114 UNUSED_P(userData);
2115 uint32_t errorFlags = 0;
2116
2117 /* Expected model array structure is this:
2118 * [0] (type 6, quant 0)
2119 * [1] (type 5, quant 0)
2120 * [3] (type 4, quant 0, name "bar")
2121 * [4] (type 4, quant 0, name "foo")
2122 * [5] (type 4, quant 3, name "xyz")
2123 * [2] (type 4, quant 2, name "zebra")
2124 */
2125 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2126 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2127
2128 if (model != NULL) {
2129 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2130 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2131 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2132 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2133 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2134
2135 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2136 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2137 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2138 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2139 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2140
2141 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2142 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2143 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2144 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2145 errorFlags
2146 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2147
2148 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2149 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2150 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2151 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2152 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2153
2154 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2155 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2156 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2157 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2158 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2159
2160 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2161 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2162 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2163 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2164 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2165 }
2166
2167 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2168 XML_FreeContentModel(g_parser, model);
2169 }
2170
START_TEST(test_dtd_elements_nesting)2171 START_TEST(test_dtd_elements_nesting) {
2172 // Payload inspired by a test in Perl's XML::Parser
2173 const char *text = "<!DOCTYPE foo [\n"
2174 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2175 "]>\n"
2176 "<foo/>";
2177
2178 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2179
2180 XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2181 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2182 == XML_STATUS_ERROR)
2183 xml_failure(g_parser);
2184
2185 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2186 fail("Element declaration model regression detected");
2187 }
2188 END_TEST
2189
2190 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2191 START_TEST(test_set_foreign_dtd) {
2192 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2193 const char *text2 = "<doc>&entity;</doc>";
2194 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2195
2196 /* Check hash salt is passed through too */
2197 XML_SetHashSalt(g_parser, 0x12345678);
2198 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2199 XML_SetUserData(g_parser, &test_data);
2200 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2201 /* Add a default handler to exercise more code paths */
2202 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2203 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2204 fail("Could not set foreign DTD");
2205 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2206 == XML_STATUS_ERROR)
2207 xml_failure(g_parser);
2208
2209 /* Ensure that trying to set the DTD after parsing has started
2210 * is faulted, even if it's the same setting.
2211 */
2212 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2213 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2214 fail("Failed to reject late foreign DTD setting");
2215 /* Ditto for the hash salt */
2216 if (XML_SetHashSalt(g_parser, 0x23456789))
2217 fail("Failed to reject late hash salt change");
2218
2219 /* Now finish the parse */
2220 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2221 == XML_STATUS_ERROR)
2222 xml_failure(g_parser);
2223 }
2224 END_TEST
2225
2226 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2227 START_TEST(test_foreign_dtd_not_standalone) {
2228 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2229 "<doc>&entity;</doc>";
2230 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2231
2232 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2233 XML_SetUserData(g_parser, &test_data);
2234 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2235 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2236 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2237 fail("Could not set foreign DTD");
2238 expect_failure(text, XML_ERROR_NOT_STANDALONE,
2239 "NotStandalonehandler failed to reject");
2240 }
2241 END_TEST
2242
2243 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2244 START_TEST(test_invalid_foreign_dtd) {
2245 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2246 "<doc>&entity;</doc>";
2247 ExtFaults test_data
2248 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2249
2250 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2251 XML_SetUserData(g_parser, &test_data);
2252 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2253 XML_UseForeignDTD(g_parser, XML_TRUE);
2254 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2255 "Bad DTD should not have been accepted");
2256 }
2257 END_TEST
2258
2259 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2260 START_TEST(test_foreign_dtd_with_doctype) {
2261 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2262 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2263 const char *text2 = "<doc>&entity;</doc>";
2264 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2265
2266 /* Check hash salt is passed through too */
2267 XML_SetHashSalt(g_parser, 0x12345678);
2268 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2269 XML_SetUserData(g_parser, &test_data);
2270 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2271 /* Add a default handler to exercise more code paths */
2272 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2273 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2274 fail("Could not set foreign DTD");
2275 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2276 == XML_STATUS_ERROR)
2277 xml_failure(g_parser);
2278
2279 /* Ensure that trying to set the DTD after parsing has started
2280 * is faulted, even if it's the same setting.
2281 */
2282 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2283 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2284 fail("Failed to reject late foreign DTD setting");
2285 /* Ditto for the hash salt */
2286 if (XML_SetHashSalt(g_parser, 0x23456789))
2287 fail("Failed to reject late hash salt change");
2288
2289 /* Now finish the parse */
2290 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2291 == XML_STATUS_ERROR)
2292 xml_failure(g_parser);
2293 }
2294 END_TEST
2295
2296 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2297 START_TEST(test_foreign_dtd_without_external_subset) {
2298 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2299 "<doc>&foo;</doc>";
2300
2301 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2302 XML_SetUserData(g_parser, NULL);
2303 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2304 XML_UseForeignDTD(g_parser, XML_TRUE);
2305 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2306 == XML_STATUS_ERROR)
2307 xml_failure(g_parser);
2308 }
2309 END_TEST
2310
START_TEST(test_empty_foreign_dtd)2311 START_TEST(test_empty_foreign_dtd) {
2312 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2313 "<doc>&entity;</doc>";
2314
2315 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2316 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2317 XML_UseForeignDTD(g_parser, XML_TRUE);
2318 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2319 "Undefined entity not faulted");
2320 }
2321 END_TEST
2322
2323 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2324 START_TEST(test_set_base) {
2325 const XML_Char *old_base;
2326 const XML_Char *new_base = XCS("/local/file/name.xml");
2327
2328 old_base = XML_GetBase(g_parser);
2329 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2330 fail("Unable to set base");
2331 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2332 fail("Base setting not correct");
2333 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2334 fail("Unable to NULL base");
2335 if (XML_GetBase(g_parser) != NULL)
2336 fail("Base setting not nulled");
2337 XML_SetBase(g_parser, old_base);
2338 }
2339 END_TEST
2340
2341 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2342 START_TEST(test_attributes) {
2343 const char *text = "<!DOCTYPE doc [\n"
2344 "<!ELEMENT doc (tag)>\n"
2345 "<!ATTLIST doc id ID #REQUIRED>\n"
2346 "]>"
2347 "<doc a='1' id='one' b='2'>"
2348 "<tag c='3'/>"
2349 "</doc>";
2350 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2351 {XCS("b"), XCS("2")},
2352 {XCS("id"), XCS("one")},
2353 {NULL, NULL}};
2354 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2355 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2356 {XCS("tag"), 1, NULL, NULL},
2357 {NULL, 0, NULL, NULL}};
2358 info[0].attributes = doc_info;
2359 info[1].attributes = tag_info;
2360
2361 XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2362 XML_SetUserData(g_parser, info);
2363 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2364 == XML_STATUS_ERROR)
2365 xml_failure(g_parser);
2366 }
2367 END_TEST
2368
2369 /* Test reset works correctly in the middle of processing an internal
2370 * entity. Exercises some obscure code in XML_ParserReset().
2371 */
START_TEST(test_reset_in_entity)2372 START_TEST(test_reset_in_entity) {
2373 const char *text = "<!DOCTYPE doc [\n"
2374 "<!ENTITY wombat 'wom'>\n"
2375 "<!ENTITY entity 'hi &wom; there'>\n"
2376 "]>\n"
2377 "<doc>&entity;</doc>";
2378 XML_ParsingStatus status;
2379
2380 g_resumable = XML_TRUE;
2381 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2382 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2383 == XML_STATUS_ERROR)
2384 xml_failure(g_parser);
2385 XML_GetParsingStatus(g_parser, &status);
2386 if (status.parsing != XML_SUSPENDED)
2387 fail("Parsing status not SUSPENDED");
2388 XML_ParserReset(g_parser, NULL);
2389 XML_GetParsingStatus(g_parser, &status);
2390 if (status.parsing != XML_INITIALIZED)
2391 fail("Parsing status doesn't reset to INITIALIZED");
2392 }
2393 END_TEST
2394
2395 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2396 START_TEST(test_resume_invalid_parse) {
2397 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2398
2399 g_resumable = XML_TRUE;
2400 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2401 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2402 == XML_STATUS_ERROR)
2403 xml_failure(g_parser);
2404 if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2405 fail("Resumed invalid parse not faulted");
2406 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2407 fail("Invalid parse not correctly faulted");
2408 }
2409 END_TEST
2410
2411 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2412 START_TEST(test_resume_resuspended) {
2413 const char *text = "<doc>Hello<meep/>world</doc>";
2414
2415 g_resumable = XML_TRUE;
2416 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2417 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2418 == XML_STATUS_ERROR)
2419 xml_failure(g_parser);
2420 g_resumable = XML_TRUE;
2421 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2422 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2423 fail("Resumption not suspended");
2424 /* This one should succeed and finish up */
2425 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2426 xml_failure(g_parser);
2427 }
2428 END_TEST
2429
2430 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2431 START_TEST(test_cdata_default) {
2432 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2433 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2434 CharData storage;
2435
2436 CharData_Init(&storage);
2437 XML_SetUserData(g_parser, &storage);
2438 XML_SetDefaultHandler(g_parser, accumulate_characters);
2439
2440 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2441 == XML_STATUS_ERROR)
2442 xml_failure(g_parser);
2443 CharData_CheckXMLChars(&storage, expected);
2444 }
2445 END_TEST
2446
2447 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2448 START_TEST(test_subordinate_reset) {
2449 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2450 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2451 "<doc>&entity;</doc>";
2452
2453 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2454 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2455 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2456 == XML_STATUS_ERROR)
2457 xml_failure(g_parser);
2458 }
2459 END_TEST
2460
2461 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2462 START_TEST(test_subordinate_suspend) {
2463 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2464 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2465 "<doc>&entity;</doc>";
2466
2467 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2468 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2469 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2470 == XML_STATUS_ERROR)
2471 xml_failure(g_parser);
2472 }
2473 END_TEST
2474
2475 /* Test suspending a subordinate parser from an XML declaration */
2476 /* Increases code coverage of the tests */
2477
START_TEST(test_subordinate_xdecl_suspend)2478 START_TEST(test_subordinate_xdecl_suspend) {
2479 const char *text
2480 = "<!DOCTYPE doc [\n"
2481 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2482 "]>\n"
2483 "<doc>&entity;</doc>";
2484
2485 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2486 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2487 g_resumable = XML_TRUE;
2488 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2489 == XML_STATUS_ERROR)
2490 xml_failure(g_parser);
2491 }
2492 END_TEST
2493
START_TEST(test_subordinate_xdecl_abort)2494 START_TEST(test_subordinate_xdecl_abort) {
2495 const char *text
2496 = "<!DOCTYPE doc [\n"
2497 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2498 "]>\n"
2499 "<doc>&entity;</doc>";
2500
2501 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2502 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2503 g_resumable = XML_FALSE;
2504 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2505 == XML_STATUS_ERROR)
2506 xml_failure(g_parser);
2507 }
2508 END_TEST
2509
2510 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2511 START_TEST(test_ext_entity_invalid_suspended_parse) {
2512 const char *text = "<!DOCTYPE doc [\n"
2513 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2514 "]>\n"
2515 "<doc>&en;</doc>";
2516 ExtFaults faults[]
2517 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2518 "Incomplete element declaration not faulted", NULL,
2519 XML_ERROR_UNCLOSED_TOKEN},
2520 {/* First two bytes of a three-byte char */
2521 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2522 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2523 {NULL, NULL, NULL, XML_ERROR_NONE}};
2524 ExtFaults *fault;
2525
2526 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2527 set_subtest("%s", fault->parse_text);
2528 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2529 XML_SetExternalEntityRefHandler(g_parser,
2530 external_entity_suspending_faulter);
2531 XML_SetUserData(g_parser, fault);
2532 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2533 "Parser did not report external entity error");
2534 XML_ParserReset(g_parser, NULL);
2535 }
2536 }
2537 END_TEST
2538
2539 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2540 START_TEST(test_explicit_encoding) {
2541 const char *text1 = "<doc>Hello ";
2542 const char *text2 = " World</doc>";
2543
2544 /* Just check that we can set the encoding to NULL before starting */
2545 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2546 fail("Failed to initialise encoding to NULL");
2547 /* Say we are UTF-8 */
2548 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2549 fail("Failed to set explicit encoding");
2550 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2551 == XML_STATUS_ERROR)
2552 xml_failure(g_parser);
2553 /* Try to switch encodings mid-parse */
2554 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2555 fail("Allowed encoding change");
2556 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2557 == XML_STATUS_ERROR)
2558 xml_failure(g_parser);
2559 /* Try now the parse is over */
2560 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2561 fail("Failed to unset encoding");
2562 }
2563 END_TEST
2564
2565 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2566 START_TEST(test_trailing_cr) {
2567 const char *text = "<doc>\r";
2568 int found_cr;
2569
2570 /* Try with a character handler, for code coverage */
2571 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2572 XML_SetUserData(g_parser, &found_cr);
2573 found_cr = 0;
2574 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2575 == XML_STATUS_OK)
2576 fail("Failed to fault unclosed doc");
2577 if (found_cr == 0)
2578 fail("Did not catch the carriage return");
2579 XML_ParserReset(g_parser, NULL);
2580
2581 /* Now with a default handler instead */
2582 XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2583 XML_SetUserData(g_parser, &found_cr);
2584 found_cr = 0;
2585 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2586 == XML_STATUS_OK)
2587 fail("Failed to fault unclosed doc");
2588 if (found_cr == 0)
2589 fail("Did not catch default carriage return");
2590 }
2591 END_TEST
2592
2593 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2594 START_TEST(test_ext_entity_trailing_cr) {
2595 const char *text = "<!DOCTYPE doc [\n"
2596 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2597 "]>\n"
2598 "<doc>&en;</doc>";
2599 int found_cr;
2600
2601 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2602 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2603 XML_SetUserData(g_parser, &found_cr);
2604 found_cr = 0;
2605 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2606 != XML_STATUS_OK)
2607 xml_failure(g_parser);
2608 if (found_cr == 0)
2609 fail("No carriage return found");
2610 XML_ParserReset(g_parser, NULL);
2611
2612 /* Try again with a different trailing CR */
2613 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2614 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2615 XML_SetUserData(g_parser, &found_cr);
2616 found_cr = 0;
2617 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2618 != XML_STATUS_OK)
2619 xml_failure(g_parser);
2620 if (found_cr == 0)
2621 fail("No carriage return found");
2622 }
2623 END_TEST
2624
2625 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2626 START_TEST(test_trailing_rsqb) {
2627 const char *text8 = "<doc>]";
2628 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2629 int found_rsqb;
2630 int text8_len = (int)strlen(text8);
2631
2632 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2633 XML_SetUserData(g_parser, &found_rsqb);
2634 found_rsqb = 0;
2635 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2636 == XML_STATUS_OK)
2637 fail("Failed to fault unclosed doc");
2638 if (found_rsqb == 0)
2639 fail("Did not catch the right square bracket");
2640
2641 /* Try again with a different encoding */
2642 XML_ParserReset(g_parser, NULL);
2643 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2644 XML_SetUserData(g_parser, &found_rsqb);
2645 found_rsqb = 0;
2646 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2647 XML_TRUE)
2648 == XML_STATUS_OK)
2649 fail("Failed to fault unclosed doc");
2650 if (found_rsqb == 0)
2651 fail("Did not catch the right square bracket");
2652
2653 /* And finally with a default handler */
2654 XML_ParserReset(g_parser, NULL);
2655 XML_SetDefaultHandler(g_parser, rsqb_handler);
2656 XML_SetUserData(g_parser, &found_rsqb);
2657 found_rsqb = 0;
2658 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2659 XML_TRUE)
2660 == XML_STATUS_OK)
2661 fail("Failed to fault unclosed doc");
2662 if (found_rsqb == 0)
2663 fail("Did not catch the right square bracket");
2664 }
2665 END_TEST
2666
2667 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2668 START_TEST(test_ext_entity_trailing_rsqb) {
2669 const char *text = "<!DOCTYPE doc [\n"
2670 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2671 "]>\n"
2672 "<doc>&en;</doc>";
2673 int found_rsqb;
2674
2675 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2676 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2677 XML_SetUserData(g_parser, &found_rsqb);
2678 found_rsqb = 0;
2679 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2680 != XML_STATUS_OK)
2681 xml_failure(g_parser);
2682 if (found_rsqb == 0)
2683 fail("No right square bracket found");
2684 }
2685 END_TEST
2686
2687 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2688 START_TEST(test_ext_entity_good_cdata) {
2689 const char *text = "<!DOCTYPE doc [\n"
2690 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2691 "]>\n"
2692 "<doc>&en;</doc>";
2693
2694 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2695 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2696 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2697 != XML_STATUS_OK)
2698 xml_failure(g_parser);
2699 }
2700 END_TEST
2701
2702 /* Test user parameter settings */
START_TEST(test_user_parameters)2703 START_TEST(test_user_parameters) {
2704 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2705 "<!-- Primary parse -->\n"
2706 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2707 "<doc>&entity;";
2708 const char *epilog = "<!-- Back to primary parser -->\n"
2709 "</doc>";
2710
2711 g_comment_count = 0;
2712 g_skip_count = 0;
2713 g_xdecl_count = 0;
2714 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2715 XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2716 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2717 XML_SetCommentHandler(g_parser, data_check_comment_handler);
2718 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2719 XML_UseParserAsHandlerArg(g_parser);
2720 XML_SetUserData(g_parser, (void *)1);
2721 g_handler_data = g_parser;
2722 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2723 == XML_STATUS_ERROR)
2724 xml_failure(g_parser);
2725 /* Ensure we can't change policy mid-parse */
2726 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2727 fail("Changed param entity parsing policy while parsing");
2728 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2729 == XML_STATUS_ERROR)
2730 xml_failure(g_parser);
2731 if (g_comment_count != 3)
2732 fail("Comment handler not invoked enough times");
2733 if (g_skip_count != 1)
2734 fail("Skip handler not invoked enough times");
2735 if (g_xdecl_count != 1)
2736 fail("XML declaration handler not invoked");
2737 }
2738 END_TEST
2739
2740 /* Test that an explicit external entity handler argument replaces
2741 * the parser as the first argument.
2742 *
2743 * We do not call the first parameter to the external entity handler
2744 * 'parser' for once, since the first time the handler is called it
2745 * will actually be a text string. We need to be able to access the
2746 * global 'parser' variable to create our external entity parser from,
2747 * since there are code paths we need to ensure get executed.
2748 */
START_TEST(test_ext_entity_ref_parameter)2749 START_TEST(test_ext_entity_ref_parameter) {
2750 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2751 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2752 "<doc>&entity;</doc>";
2753
2754 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2755 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2756 /* Set a handler arg that is not NULL and not parser (which is
2757 * what NULL would cause to be passed.
2758 */
2759 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2760 g_handler_data = text;
2761 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2762 == XML_STATUS_ERROR)
2763 xml_failure(g_parser);
2764
2765 /* Now try again with unset args */
2766 XML_ParserReset(g_parser, NULL);
2767 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2768 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2769 XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2770 g_handler_data = g_parser;
2771 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2772 == XML_STATUS_ERROR)
2773 xml_failure(g_parser);
2774 }
2775 END_TEST
2776
2777 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2778 START_TEST(test_empty_parse) {
2779 const char *text = "<doc></doc>";
2780 const char *partial = "<doc>";
2781
2782 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2783 fail("Parsing empty string faulted");
2784 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2785 fail("Parsing final empty string not faulted");
2786 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2787 fail("Parsing final empty string faulted for wrong reason");
2788
2789 /* Now try with valid text before the empty end */
2790 XML_ParserReset(g_parser, NULL);
2791 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2792 == XML_STATUS_ERROR)
2793 xml_failure(g_parser);
2794 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2795 fail("Parsing final empty string faulted");
2796
2797 /* Now try with invalid text before the empty end */
2798 XML_ParserReset(g_parser, NULL);
2799 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2800 XML_FALSE)
2801 == XML_STATUS_ERROR)
2802 xml_failure(g_parser);
2803 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2804 fail("Parsing final incomplete empty string not faulted");
2805 }
2806 END_TEST
2807
2808 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2809 START_TEST(test_negative_len_parse) {
2810 const char *const doc = "<root/>";
2811 for (int isFinal = 0; isFinal < 2; isFinal++) {
2812 set_subtest("isFinal=%d", isFinal);
2813
2814 XML_Parser parser = XML_ParserCreate(NULL);
2815
2816 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2817 fail("There was not supposed to be any initial parse error.");
2818
2819 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2820
2821 if (status != XML_STATUS_ERROR)
2822 fail("Negative len was expected to fail the parse but did not.");
2823
2824 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2825 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2826
2827 XML_ParserFree(parser);
2828 }
2829 }
2830 END_TEST
2831
2832 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2833 START_TEST(test_negative_len_parse_buffer) {
2834 const char *const doc = "<root/>";
2835 for (int isFinal = 0; isFinal < 2; isFinal++) {
2836 set_subtest("isFinal=%d", isFinal);
2837
2838 XML_Parser parser = XML_ParserCreate(NULL);
2839
2840 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2841 fail("There was not supposed to be any initial parse error.");
2842
2843 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2844
2845 if (buffer == NULL)
2846 fail("XML_GetBuffer failed.");
2847
2848 memcpy(buffer, doc, strlen(doc));
2849
2850 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2851
2852 if (status != XML_STATUS_ERROR)
2853 fail("Negative len was expected to fail the parse but did not.");
2854
2855 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2856 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2857
2858 XML_ParserFree(parser);
2859 }
2860 }
2861 END_TEST
2862
2863 /* Test odd corners of the XML_GetBuffer interface */
2864 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2865 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2866 const XML_Feature *feature = XML_GetFeatureList();
2867
2868 if (feature == NULL)
2869 return XML_STATUS_ERROR;
2870 for (; feature->feature != XML_FEATURE_END; feature++) {
2871 if (feature->feature == feature_id) {
2872 *presult = feature->value;
2873 return XML_STATUS_OK;
2874 }
2875 }
2876 return XML_STATUS_ERROR;
2877 }
2878
2879 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)2880 START_TEST(test_get_buffer_1) {
2881 const char *text = get_buffer_test_text;
2882 void *buffer;
2883 long context_bytes;
2884
2885 /* Attempt to allocate a negative length buffer */
2886 if (XML_GetBuffer(g_parser, -12) != NULL)
2887 fail("Negative length buffer not failed");
2888
2889 /* Now get a small buffer and extend it past valid length */
2890 buffer = XML_GetBuffer(g_parser, 1536);
2891 if (buffer == NULL)
2892 fail("1.5K buffer failed");
2893 assert(buffer != NULL);
2894 memcpy(buffer, text, strlen(text));
2895 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2896 == XML_STATUS_ERROR)
2897 xml_failure(g_parser);
2898 if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2899 fail("INT_MAX buffer not failed");
2900
2901 /* Now try extending it a more reasonable but still too large
2902 * amount. The allocator in XML_GetBuffer() doubles the buffer
2903 * size until it exceeds the requested amount or INT_MAX. If it
2904 * exceeds INT_MAX, it rejects the request, so we want a request
2905 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
2906 * with an extra byte just to ensure that the request is off any
2907 * boundary. The request will be inflated internally by
2908 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2909 * request.
2910 */
2911 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2912 context_bytes = 0;
2913 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2914 fail("INT_MAX- buffer not failed");
2915
2916 /* Now try extending it a carefully crafted amount */
2917 if (XML_GetBuffer(g_parser, 1000) == NULL)
2918 fail("1000 buffer failed");
2919 }
2920 END_TEST
2921
2922 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)2923 START_TEST(test_get_buffer_2) {
2924 const char *text = get_buffer_test_text;
2925 void *buffer;
2926
2927 /* Now get a decent buffer */
2928 buffer = XML_GetBuffer(g_parser, 1536);
2929 if (buffer == NULL)
2930 fail("1.5K buffer failed");
2931 assert(buffer != NULL);
2932 memcpy(buffer, text, strlen(text));
2933 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2934 == XML_STATUS_ERROR)
2935 xml_failure(g_parser);
2936
2937 /* Extend it, to catch a different code path */
2938 if (XML_GetBuffer(g_parser, 1024) == NULL)
2939 fail("1024 buffer failed");
2940 }
2941 END_TEST
2942
2943 /* Test for signed integer overflow CVE-2022-23852 */
2944 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)2945 START_TEST(test_get_buffer_3_overflow) {
2946 XML_Parser parser = XML_ParserCreate(NULL);
2947 assert(parser != NULL);
2948
2949 const char *const text = "\n";
2950 const int expectedKeepValue = (int)strlen(text);
2951
2952 // After this call, variable "keep" in XML_GetBuffer will
2953 // have value expectedKeepValue
2954 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2955 XML_FALSE /* isFinal */)
2956 == XML_STATUS_ERROR)
2957 xml_failure(parser);
2958
2959 assert(expectedKeepValue > 0);
2960 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2961 fail("enlarging buffer not failed");
2962
2963 XML_ParserFree(parser);
2964 }
2965 END_TEST
2966 #endif // XML_CONTEXT_BYTES > 0
2967
START_TEST(test_buffer_can_grow_to_max)2968 START_TEST(test_buffer_can_grow_to_max) {
2969 const char *const prefixes[] = {
2970 "",
2971 "<",
2972 "<x a='",
2973 "<doc><x a='",
2974 "<document><x a='",
2975 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2976 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2977 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2978 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2979 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2980 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2981 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2982 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2983 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2984 // Can we make a big allocation?
2985 void *big = malloc(maxbuf);
2986 if (! big) {
2987 // The big allocation failed. Let's be a little lenient.
2988 maxbuf = maxbuf / 2;
2989 }
2990 free(big);
2991 #endif
2992
2993 for (int i = 0; i < num_prefixes; ++i) {
2994 set_subtest("\"%s\"", prefixes[i]);
2995 XML_Parser parser = XML_ParserCreate(NULL);
2996 const int prefix_len = (int)strlen(prefixes[i]);
2997 const enum XML_Status s
2998 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2999 if (s != XML_STATUS_OK)
3000 xml_failure(parser);
3001
3002 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3003 // subtracting the whole prefix is easiest, and close enough.
3004 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3005 // The limit should be consistent; no prefix should allow us to
3006 // reach above the max buffer size.
3007 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3008 XML_ParserFree(parser);
3009 }
3010 }
3011 END_TEST
3012
START_TEST(test_getbuffer_allocates_on_zero_len)3013 START_TEST(test_getbuffer_allocates_on_zero_len) {
3014 for (int first_len = 1; first_len >= 0; first_len--) {
3015 set_subtest("with len=%d first", first_len);
3016 XML_Parser parser = XML_ParserCreate(NULL);
3017 assert_true(parser != NULL);
3018 assert_true(XML_GetBuffer(parser, first_len) != NULL);
3019 assert_true(XML_GetBuffer(parser, 0) != NULL);
3020 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3021 xml_failure(parser);
3022 XML_ParserFree(parser);
3023 }
3024 }
3025 END_TEST
3026
3027 /* Test position information macros */
START_TEST(test_byte_info_at_end)3028 START_TEST(test_byte_info_at_end) {
3029 const char *text = "<doc></doc>";
3030
3031 if (XML_GetCurrentByteIndex(g_parser) != -1
3032 || XML_GetCurrentByteCount(g_parser) != 0)
3033 fail("Byte index/count incorrect at start of parse");
3034 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3035 == XML_STATUS_ERROR)
3036 xml_failure(g_parser);
3037 /* At end, the count will be zero and the index the end of string */
3038 if (XML_GetCurrentByteCount(g_parser) != 0)
3039 fail("Terminal byte count incorrect");
3040 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3041 fail("Terminal byte index incorrect");
3042 }
3043 END_TEST
3044
3045 /* Test position information from errors */
3046 #define PRE_ERROR_STR "<doc></"
3047 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3048 START_TEST(test_byte_info_at_error) {
3049 const char *text = PRE_ERROR_STR POST_ERROR_STR;
3050
3051 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3052 == XML_STATUS_OK)
3053 fail("Syntax error not faulted");
3054 if (XML_GetCurrentByteCount(g_parser) != 0)
3055 fail("Error byte count incorrect");
3056 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3057 fail("Error byte index incorrect");
3058 }
3059 END_TEST
3060 #undef PRE_ERROR_STR
3061 #undef POST_ERROR_STR
3062
3063 /* Test position information in handler */
3064 #define START_ELEMENT "<e>"
3065 #define CDATA_TEXT "Hello"
3066 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3067 START_TEST(test_byte_info_at_cdata) {
3068 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3069 int offset, size;
3070 ByteTestData data;
3071
3072 /* Check initial context is empty */
3073 if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3074 fail("Unexpected context at start of parse");
3075
3076 data.start_element_len = (int)strlen(START_ELEMENT);
3077 data.cdata_len = (int)strlen(CDATA_TEXT);
3078 data.total_string_len = (int)strlen(text);
3079 XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3080 XML_SetUserData(g_parser, &data);
3081 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3082 xml_failure(g_parser);
3083 }
3084 END_TEST
3085 #undef START_ELEMENT
3086 #undef CDATA_TEXT
3087 #undef END_ELEMENT
3088
3089 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3090 START_TEST(test_predefined_entities) {
3091 const char *text = "<doc><>&"'</doc>";
3092 const XML_Char *expected = XCS("<doc><>&"'</doc>");
3093 const XML_Char *result = XCS("<>&\"'");
3094 CharData storage;
3095
3096 XML_SetDefaultHandler(g_parser, accumulate_characters);
3097 /* run_character_check uses XML_SetCharacterDataHandler(), which
3098 * unfortunately heads off a code path that we need to exercise.
3099 */
3100 CharData_Init(&storage);
3101 XML_SetUserData(g_parser, &storage);
3102 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3103 == XML_STATUS_ERROR)
3104 xml_failure(g_parser);
3105 /* The default handler doesn't translate the entities */
3106 CharData_CheckXMLChars(&storage, expected);
3107
3108 /* Now try again and check the translation */
3109 XML_ParserReset(g_parser, NULL);
3110 run_character_check(text, result);
3111 }
3112 END_TEST
3113
3114 /* Regression test that an invalid tag in an external parameter
3115 * reference in an external DTD is correctly faulted.
3116 *
3117 * Only a few specific tags are legal in DTDs ignoring comments and
3118 * processing instructions, all of which begin with an exclamation
3119 * mark. "<el/>" is not one of them, so the parser should raise an
3120 * error on encountering it.
3121 */
START_TEST(test_invalid_tag_in_dtd)3122 START_TEST(test_invalid_tag_in_dtd) {
3123 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3124 "<doc></doc>\n";
3125
3126 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3127 XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3128 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3129 "Invalid tag IN DTD external param not rejected");
3130 }
3131 END_TEST
3132
3133 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3134 START_TEST(test_not_predefined_entities) {
3135 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3136 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3137 int i = 0;
3138
3139 while (text[i] != NULL) {
3140 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3141 "Undefined entity not rejected");
3142 XML_ParserReset(g_parser, NULL);
3143 i++;
3144 }
3145 }
3146 END_TEST
3147
3148 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3149 START_TEST(test_ignore_section) {
3150 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3151 "<doc><e>&entity;</e></doc>";
3152 const XML_Char *expected
3153 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3154 CharData storage;
3155
3156 CharData_Init(&storage);
3157 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3158 XML_SetUserData(g_parser, &storage);
3159 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3160 XML_SetDefaultHandler(g_parser, accumulate_characters);
3161 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3162 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3163 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3164 XML_SetStartElementHandler(g_parser, dummy_start_element);
3165 XML_SetEndElementHandler(g_parser, dummy_end_element);
3166 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3167 == XML_STATUS_ERROR)
3168 xml_failure(g_parser);
3169 CharData_CheckXMLChars(&storage, expected);
3170 }
3171 END_TEST
3172
START_TEST(test_ignore_section_utf16)3173 START_TEST(test_ignore_section_utf16) {
3174 const char text[] =
3175 /* <!DOCTYPE d SYSTEM 's'> */
3176 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3177 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3178 /* <d><e>&en;</e></d> */
3179 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3180 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3181 CharData storage;
3182
3183 CharData_Init(&storage);
3184 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3185 XML_SetUserData(g_parser, &storage);
3186 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3187 XML_SetDefaultHandler(g_parser, accumulate_characters);
3188 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3189 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3190 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3191 XML_SetStartElementHandler(g_parser, dummy_start_element);
3192 XML_SetEndElementHandler(g_parser, dummy_end_element);
3193 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3194 == XML_STATUS_ERROR)
3195 xml_failure(g_parser);
3196 CharData_CheckXMLChars(&storage, expected);
3197 }
3198 END_TEST
3199
START_TEST(test_ignore_section_utf16_be)3200 START_TEST(test_ignore_section_utf16_be) {
3201 const char text[] =
3202 /* <!DOCTYPE d SYSTEM 's'> */
3203 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3204 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3205 /* <d><e>&en;</e></d> */
3206 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3207 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3208 CharData storage;
3209
3210 CharData_Init(&storage);
3211 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3212 XML_SetUserData(g_parser, &storage);
3213 XML_SetExternalEntityRefHandler(g_parser,
3214 external_entity_load_ignore_utf16_be);
3215 XML_SetDefaultHandler(g_parser, accumulate_characters);
3216 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3217 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3218 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3219 XML_SetStartElementHandler(g_parser, dummy_start_element);
3220 XML_SetEndElementHandler(g_parser, dummy_end_element);
3221 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3222 == XML_STATUS_ERROR)
3223 xml_failure(g_parser);
3224 CharData_CheckXMLChars(&storage, expected);
3225 }
3226 END_TEST
3227
3228 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3229 START_TEST(test_bad_ignore_section) {
3230 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3231 "<doc><e>&entity;</e></doc>";
3232 ExtFaults faults[]
3233 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3234 XML_ERROR_SYNTAX},
3235 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3236 XML_ERROR_INVALID_TOKEN},
3237 {/* FIrst two bytes of a three-byte char */
3238 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3239 XML_ERROR_PARTIAL_CHAR},
3240 {NULL, NULL, NULL, XML_ERROR_NONE}};
3241 ExtFaults *fault;
3242
3243 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3244 set_subtest("%s", fault->parse_text);
3245 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3246 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3247 XML_SetUserData(g_parser, fault);
3248 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3249 "Incomplete IGNORE section not failed");
3250 XML_ParserReset(g_parser, NULL);
3251 }
3252 }
3253 END_TEST
3254
3255 struct bom_testdata {
3256 const char *external;
3257 int split;
3258 XML_Bool nested_callback_happened;
3259 };
3260
3261 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3262 external_bom_checker(XML_Parser parser, const XML_Char *context,
3263 const XML_Char *base, const XML_Char *systemId,
3264 const XML_Char *publicId) {
3265 const char *text;
3266 UNUSED_P(base);
3267 UNUSED_P(systemId);
3268 UNUSED_P(publicId);
3269
3270 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3271 if (ext_parser == NULL)
3272 fail("Could not create external entity parser");
3273
3274 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3275 struct bom_testdata *const testdata
3276 = (struct bom_testdata *)XML_GetUserData(parser);
3277 const char *const external = testdata->external;
3278 const int split = testdata->split;
3279 testdata->nested_callback_happened = XML_TRUE;
3280
3281 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3282 != XML_STATUS_OK) {
3283 xml_failure(ext_parser);
3284 }
3285 text = external + split; // the parse below will continue where we left off.
3286 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3287 text = "<!ELEMENT doc EMPTY>\n"
3288 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3289 "<!ENTITY % e2 '%e1;'>\n";
3290 } else {
3291 fail("unknown systemId");
3292 }
3293
3294 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3295 != XML_STATUS_OK)
3296 xml_failure(ext_parser);
3297
3298 XML_ParserFree(ext_parser);
3299 return XML_STATUS_OK;
3300 }
3301
3302 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3303 START_TEST(test_external_bom_consumed) {
3304 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3305 "<doc></doc>\n";
3306 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3307 const int len = (int)strlen(external);
3308 for (int split = 0; split <= len; ++split) {
3309 set_subtest("split at byte %d", split);
3310
3311 struct bom_testdata testdata;
3312 testdata.external = external;
3313 testdata.split = split;
3314 testdata.nested_callback_happened = XML_FALSE;
3315
3316 XML_Parser parser = XML_ParserCreate(NULL);
3317 if (parser == NULL) {
3318 fail("Couldn't create parser");
3319 }
3320 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3321 XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3322 XML_SetUserData(parser, &testdata);
3323 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3324 == XML_STATUS_ERROR)
3325 xml_failure(parser);
3326 if (! testdata.nested_callback_happened) {
3327 fail("ref handler not called");
3328 }
3329 XML_ParserFree(parser);
3330 }
3331 }
3332 END_TEST
3333
3334 /* Test recursive parsing */
START_TEST(test_external_entity_values)3335 START_TEST(test_external_entity_values) {
3336 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3337 "<doc></doc>\n";
3338 ExtFaults data_004_2[] = {
3339 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3340 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3341 XML_ERROR_INVALID_TOKEN},
3342 {"'wombat", "Unterminated string not faulted", NULL,
3343 XML_ERROR_UNCLOSED_TOKEN},
3344 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3345 XML_ERROR_PARTIAL_CHAR},
3346 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3347 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3348 XML_ERROR_XML_DECL},
3349 {/* UTF-8 BOM */
3350 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3351 XML_ERROR_NONE},
3352 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3353 "Invalid token after text declaration not faulted", NULL,
3354 XML_ERROR_INVALID_TOKEN},
3355 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3356 "Unterminated string after text decl not faulted", NULL,
3357 XML_ERROR_UNCLOSED_TOKEN},
3358 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3359 "Partial UTF-8 character after text decl not faulted", NULL,
3360 XML_ERROR_PARTIAL_CHAR},
3361 {"%e1;", "Recursive parameter entity not faulted", NULL,
3362 XML_ERROR_RECURSIVE_ENTITY_REF},
3363 {NULL, NULL, NULL, XML_ERROR_NONE}};
3364 int i;
3365
3366 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3367 set_subtest("%s", data_004_2[i].parse_text);
3368 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3369 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3370 XML_SetUserData(g_parser, &data_004_2[i]);
3371 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3372 == XML_STATUS_ERROR)
3373 xml_failure(g_parser);
3374 XML_ParserReset(g_parser, NULL);
3375 }
3376 }
3377 END_TEST
3378
3379 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3380 START_TEST(test_ext_entity_not_standalone) {
3381 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3382 "<doc></doc>";
3383
3384 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3385 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3386 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3387 "Standalone rejection not caught");
3388 }
3389 END_TEST
3390
START_TEST(test_ext_entity_value_abort)3391 START_TEST(test_ext_entity_value_abort) {
3392 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3393 "<doc></doc>\n";
3394
3395 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3396 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3397 g_resumable = XML_FALSE;
3398 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3399 == XML_STATUS_ERROR)
3400 xml_failure(g_parser);
3401 }
3402 END_TEST
3403
START_TEST(test_bad_public_doctype)3404 START_TEST(test_bad_public_doctype) {
3405 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3406 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3407 "<doc></doc>";
3408
3409 /* Setting a handler provokes a particular code path */
3410 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3411 dummy_end_doctype_handler);
3412 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3413 }
3414 END_TEST
3415
3416 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3417 START_TEST(test_attribute_enum_value) {
3418 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3419 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3420 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3421 ExtTest dtd_data
3422 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3423 "<!ELEMENT a EMPTY>\n"
3424 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3425 NULL, NULL};
3426 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3427
3428 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3429 XML_SetUserData(g_parser, &dtd_data);
3430 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3431 /* An attribute list handler provokes a different code path */
3432 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3433 run_ext_character_check(text, &dtd_data, expected);
3434 }
3435 END_TEST
3436
3437 /* Slightly bizarrely, the library seems to silently ignore entity
3438 * definitions for predefined entities, even when they are wrong. The
3439 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3440 * to happen, so this is currently treated as acceptable.
3441 */
START_TEST(test_predefined_entity_redefinition)3442 START_TEST(test_predefined_entity_redefinition) {
3443 const char *text = "<!DOCTYPE doc [\n"
3444 "<!ENTITY apos 'foo'>\n"
3445 "]>\n"
3446 "<doc>'</doc>";
3447 run_character_check(text, XCS("'"));
3448 }
3449 END_TEST
3450
3451 /* Test that the parser stops processing the DTD after an unresolved
3452 * parameter entity is encountered.
3453 */
START_TEST(test_dtd_stop_processing)3454 START_TEST(test_dtd_stop_processing) {
3455 const char *text = "<!DOCTYPE doc [\n"
3456 "%foo;\n"
3457 "<!ENTITY bar 'bas'>\n"
3458 "]><doc/>";
3459
3460 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3461 init_dummy_handlers();
3462 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3463 == XML_STATUS_ERROR)
3464 xml_failure(g_parser);
3465 if (get_dummy_handler_flags() != 0)
3466 fail("DTD processing still going after undefined PE");
3467 }
3468 END_TEST
3469
3470 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3471 START_TEST(test_public_notation_no_sysid) {
3472 const char *text = "<!DOCTYPE doc [\n"
3473 "<!NOTATION note PUBLIC 'foo'>\n"
3474 "<!ELEMENT doc EMPTY>\n"
3475 "]>\n<doc/>";
3476
3477 init_dummy_handlers();
3478 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3479 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3480 == XML_STATUS_ERROR)
3481 xml_failure(g_parser);
3482 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3483 fail("Notation declaration handler not called");
3484 }
3485 END_TEST
3486
START_TEST(test_nested_groups)3487 START_TEST(test_nested_groups) {
3488 const char *text
3489 = "<!DOCTYPE doc [\n"
3490 "<!ELEMENT doc "
3491 /* Sixteen elements per line */
3492 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3493 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3494 "))))))))))))))))))))))))))))))))>\n"
3495 "<!ELEMENT e EMPTY>"
3496 "]>\n"
3497 "<doc><e/></doc>";
3498 CharData storage;
3499
3500 CharData_Init(&storage);
3501 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3502 XML_SetStartElementHandler(g_parser, record_element_start_handler);
3503 XML_SetUserData(g_parser, &storage);
3504 init_dummy_handlers();
3505 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3506 == XML_STATUS_ERROR)
3507 xml_failure(g_parser);
3508 CharData_CheckXMLChars(&storage, XCS("doce"));
3509 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3510 fail("Element handler not fired");
3511 }
3512 END_TEST
3513
START_TEST(test_group_choice)3514 START_TEST(test_group_choice) {
3515 const char *text = "<!DOCTYPE doc [\n"
3516 "<!ELEMENT doc (a|b|c)+>\n"
3517 "<!ELEMENT a EMPTY>\n"
3518 "<!ELEMENT b (#PCDATA)>\n"
3519 "<!ELEMENT c ANY>\n"
3520 "]>\n"
3521 "<doc>\n"
3522 "<a/>\n"
3523 "<b attr='foo'>This is a foo</b>\n"
3524 "<c></c>\n"
3525 "</doc>\n";
3526
3527 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3528 init_dummy_handlers();
3529 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3530 == XML_STATUS_ERROR)
3531 xml_failure(g_parser);
3532 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3533 fail("Element handler flag not raised");
3534 }
3535 END_TEST
3536
START_TEST(test_standalone_parameter_entity)3537 START_TEST(test_standalone_parameter_entity) {
3538 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3539 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3540 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3541 "%entity;\n"
3542 "]>\n"
3543 "<doc></doc>";
3544 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3545
3546 XML_SetUserData(g_parser, dtd_data);
3547 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3548 XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3549 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3550 == XML_STATUS_ERROR)
3551 xml_failure(g_parser);
3552 }
3553 END_TEST
3554
3555 /* Test skipping of parameter entity in an external DTD */
3556 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3557 START_TEST(test_skipped_parameter_entity) {
3558 const char *text = "<?xml version='1.0'?>\n"
3559 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3560 "<!ELEMENT root (#PCDATA|a)* >\n"
3561 "]>\n"
3562 "<root></root>";
3563 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3564
3565 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3566 XML_SetUserData(g_parser, &dtd_data);
3567 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3568 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3569 init_dummy_handlers();
3570 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3571 == XML_STATUS_ERROR)
3572 xml_failure(g_parser);
3573 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3574 fail("Skip handler not executed");
3575 }
3576 END_TEST
3577
3578 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3579 START_TEST(test_recursive_external_parameter_entity) {
3580 const char *text = "<?xml version='1.0'?>\n"
3581 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3582 "<!ELEMENT root (#PCDATA|a)* >\n"
3583 "]>\n"
3584 "<root></root>";
3585 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;",
3586 "Recursive external parameter entity not faulted", NULL,
3587 XML_ERROR_RECURSIVE_ENTITY_REF};
3588
3589 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3590 XML_SetUserData(g_parser, &dtd_data);
3591 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3592 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3593 "Recursive external parameter not spotted");
3594 }
3595 END_TEST
3596
3597 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3598 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3599 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3600 "<doc></doc>\n";
3601
3602 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3603 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3604 XML_SetUserData(g_parser, NULL);
3605 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3606 == XML_STATUS_ERROR)
3607 xml_failure(g_parser);
3608
3609 /* Now repeat without the external entity ref handler invoking
3610 * another copy of itself.
3611 */
3612 XML_ParserReset(g_parser, NULL);
3613 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3614 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3615 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3616 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3617 == XML_STATUS_ERROR)
3618 xml_failure(g_parser);
3619 }
3620 END_TEST
3621
3622 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3623 START_TEST(test_suspend_xdecl) {
3624 const char *text = long_character_data_text;
3625
3626 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3627 XML_SetUserData(g_parser, g_parser);
3628 g_resumable = XML_TRUE;
3629 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3630 != XML_STATUS_SUSPENDED)
3631 xml_failure(g_parser);
3632 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3633 xml_failure(g_parser);
3634 /* Attempt to start a new parse while suspended */
3635 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3636 != XML_STATUS_ERROR)
3637 fail("Attempt to parse while suspended not faulted");
3638 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3639 fail("Suspended parse not faulted with correct error");
3640 }
3641 END_TEST
3642
3643 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3644 START_TEST(test_abort_epilog) {
3645 const char *text = "<doc></doc>\n\r\n";
3646 XML_Char trigger_char = XCS('\r');
3647
3648 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3649 XML_SetUserData(g_parser, &trigger_char);
3650 g_resumable = XML_FALSE;
3651 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3652 != XML_STATUS_ERROR)
3653 fail("Abort not triggered");
3654 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3655 xml_failure(g_parser);
3656 }
3657 END_TEST
3658
3659 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3660 START_TEST(test_abort_epilog_2) {
3661 const char *text = "<doc></doc>\n";
3662 XML_Char trigger_char = XCS('\n');
3663
3664 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3665 XML_SetUserData(g_parser, &trigger_char);
3666 g_resumable = XML_FALSE;
3667 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3668 }
3669 END_TEST
3670
3671 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3672 START_TEST(test_suspend_epilog) {
3673 const char *text = "<doc></doc>\n";
3674 XML_Char trigger_char = XCS('\n');
3675
3676 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3677 XML_SetUserData(g_parser, &trigger_char);
3678 g_resumable = XML_TRUE;
3679 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3680 != XML_STATUS_SUSPENDED)
3681 xml_failure(g_parser);
3682 }
3683 END_TEST
3684
START_TEST(test_suspend_in_sole_empty_tag)3685 START_TEST(test_suspend_in_sole_empty_tag) {
3686 const char *text = "<doc/>";
3687 enum XML_Status rc;
3688
3689 XML_SetEndElementHandler(g_parser, suspending_end_handler);
3690 XML_SetUserData(g_parser, g_parser);
3691 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3692 if (rc == XML_STATUS_ERROR)
3693 xml_failure(g_parser);
3694 else if (rc != XML_STATUS_SUSPENDED)
3695 fail("Suspend not triggered");
3696 rc = XML_ResumeParser(g_parser);
3697 if (rc == XML_STATUS_ERROR)
3698 xml_failure(g_parser);
3699 else if (rc != XML_STATUS_OK)
3700 fail("Resume failed");
3701 }
3702 END_TEST
3703
START_TEST(test_unfinished_epilog)3704 START_TEST(test_unfinished_epilog) {
3705 const char *text = "<doc></doc><";
3706
3707 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3708 "Incomplete epilog entry not faulted");
3709 }
3710 END_TEST
3711
START_TEST(test_partial_char_in_epilog)3712 START_TEST(test_partial_char_in_epilog) {
3713 const char *text = "<doc></doc>\xe2\x82";
3714
3715 /* First check that no fault is raised if the parse is not finished */
3716 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3717 == XML_STATUS_ERROR)
3718 xml_failure(g_parser);
3719 /* Now check that it is faulted once we finish */
3720 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3721 fail("Partial character in epilog not faulted");
3722 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3723 xml_failure(g_parser);
3724 }
3725 END_TEST
3726
3727 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3728 START_TEST(test_suspend_resume_internal_entity) {
3729 const char *text
3730 = "<!DOCTYPE doc [\n"
3731 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3732 "]>\n"
3733 "<doc>&foo;</doc>\n";
3734 const XML_Char *expected1 = XCS("Hi");
3735 const XML_Char *expected2 = XCS("HiHo");
3736 CharData storage;
3737
3738 CharData_Init(&storage);
3739 XML_SetStartElementHandler(g_parser, start_element_suspender);
3740 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3741 XML_SetUserData(g_parser, &storage);
3742 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3743 // we won't know exactly how much input we actually managed to give Expat.
3744 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3745 != XML_STATUS_SUSPENDED)
3746 xml_failure(g_parser);
3747 CharData_CheckXMLChars(&storage, XCS(""));
3748 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3749 xml_failure(g_parser);
3750 CharData_CheckXMLChars(&storage, expected1);
3751 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3752 xml_failure(g_parser);
3753 CharData_CheckXMLChars(&storage, expected2);
3754 }
3755 END_TEST
3756
START_TEST(test_suspend_resume_internal_entity_issue_629)3757 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3758 const char *const text
3759 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3760 "<"
3761 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3762 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3763 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3764 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3765 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3766 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3767 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3768 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3769 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3770 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3771 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3772 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3773 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3774 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3775 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3776 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3777 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3778 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3779 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3780 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3781 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3782 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3783 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3784 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3785 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3786 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3787 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3788 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3789 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3790 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3791 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3792 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3793 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3794 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3795 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3796 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3797 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3798 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3799 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3800 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3801 "/>"
3802 "</b></a>";
3803 const size_t firstChunkSizeBytes = 54;
3804
3805 XML_Parser parser = XML_ParserCreate(NULL);
3806 XML_SetUserData(parser, parser);
3807 XML_SetCommentHandler(parser, suspending_comment_handler);
3808
3809 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3810 != XML_STATUS_SUSPENDED)
3811 xml_failure(parser);
3812 if (XML_ResumeParser(parser) != XML_STATUS_OK)
3813 xml_failure(parser);
3814 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3815 (int)(strlen(text) - firstChunkSizeBytes),
3816 XML_TRUE)
3817 != XML_STATUS_OK)
3818 xml_failure(parser);
3819 XML_ParserFree(parser);
3820 }
3821 END_TEST
3822
3823 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3824 START_TEST(test_resume_entity_with_syntax_error) {
3825 const char *text = "<!DOCTYPE doc [\n"
3826 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3827 "]>\n"
3828 "<doc>&foo;</doc>\n";
3829
3830 XML_SetStartElementHandler(g_parser, start_element_suspender);
3831 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3832 != XML_STATUS_SUSPENDED)
3833 xml_failure(g_parser);
3834 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3835 fail("Syntax error in entity not faulted");
3836 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3837 xml_failure(g_parser);
3838 }
3839 END_TEST
3840
3841 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3842 START_TEST(test_suspend_resume_parameter_entity) {
3843 const char *text = "<!DOCTYPE doc [\n"
3844 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3845 "%foo;\n"
3846 "]>\n"
3847 "<doc>Hello, world</doc>";
3848 const XML_Char *expected = XCS("Hello, world");
3849 CharData storage;
3850
3851 CharData_Init(&storage);
3852 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3853 XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3854 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3855 XML_SetUserData(g_parser, &storage);
3856 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3857 != XML_STATUS_SUSPENDED)
3858 xml_failure(g_parser);
3859 CharData_CheckXMLChars(&storage, XCS(""));
3860 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3861 xml_failure(g_parser);
3862 CharData_CheckXMLChars(&storage, expected);
3863 }
3864 END_TEST
3865
3866 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)3867 START_TEST(test_restart_on_error) {
3868 const char *text = "<$doc><doc></doc>";
3869
3870 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3871 != XML_STATUS_ERROR)
3872 fail("Invalid tag name not faulted");
3873 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3874 xml_failure(g_parser);
3875 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3876 fail("Restarting invalid parse not faulted");
3877 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3878 xml_failure(g_parser);
3879 }
3880 END_TEST
3881
3882 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)3883 START_TEST(test_reject_lt_in_attribute_value) {
3884 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3885 "<doc></doc>";
3886
3887 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3888 "Bad attribute default not faulted");
3889 }
3890 END_TEST
3891
START_TEST(test_reject_unfinished_param_in_att_value)3892 START_TEST(test_reject_unfinished_param_in_att_value) {
3893 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3894 "<doc></doc>";
3895
3896 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3897 "Bad attribute default not faulted");
3898 }
3899 END_TEST
3900
START_TEST(test_trailing_cr_in_att_value)3901 START_TEST(test_trailing_cr_in_att_value) {
3902 const char *text = "<doc a='value\r'/>";
3903
3904 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3905 == XML_STATUS_ERROR)
3906 xml_failure(g_parser);
3907 }
3908 END_TEST
3909
3910 /* Try parsing a general entity within a parameter entity in a
3911 * standalone internal DTD. Covers a corner case in the parser.
3912 */
START_TEST(test_standalone_internal_entity)3913 START_TEST(test_standalone_internal_entity) {
3914 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3915 "<!DOCTYPE doc [\n"
3916 " <!ELEMENT doc (#PCDATA)>\n"
3917 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n"
3918 " <!ENTITY ge 'AttDefaultValue'>\n"
3919 " %pe;\n"
3920 "]>\n"
3921 "<doc att2='any'/>";
3922
3923 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3924 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3925 == XML_STATUS_ERROR)
3926 xml_failure(g_parser);
3927 }
3928 END_TEST
3929
3930 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)3931 START_TEST(test_skipped_external_entity) {
3932 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3933 "<doc></doc>\n";
3934 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3935 "<!ENTITY % e2 '%e1;'>\n",
3936 NULL, NULL};
3937
3938 XML_SetUserData(g_parser, &test_data);
3939 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3940 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3941 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3942 == XML_STATUS_ERROR)
3943 xml_failure(g_parser);
3944 }
3945 END_TEST
3946
3947 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)3948 START_TEST(test_skipped_null_loaded_ext_entity) {
3949 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3950 "<doc />";
3951 ExtHdlrData test_data
3952 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3953 "<!ENTITY % pe2 '%pe1;'>\n"
3954 "%pe2;\n",
3955 external_entity_null_loader, NULL};
3956
3957 XML_SetUserData(g_parser, &test_data);
3958 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3959 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3960 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3961 == XML_STATUS_ERROR)
3962 xml_failure(g_parser);
3963 }
3964 END_TEST
3965
START_TEST(test_skipped_unloaded_ext_entity)3966 START_TEST(test_skipped_unloaded_ext_entity) {
3967 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3968 "<doc />";
3969 ExtHdlrData test_data
3970 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3971 "<!ENTITY % pe2 '%pe1;'>\n"
3972 "%pe2;\n",
3973 NULL, NULL};
3974
3975 XML_SetUserData(g_parser, &test_data);
3976 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3977 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3978 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3979 == XML_STATUS_ERROR)
3980 xml_failure(g_parser);
3981 }
3982 END_TEST
3983
3984 /* Test that a parameter entity value ending with a carriage return
3985 * has it translated internally into a newline.
3986 */
START_TEST(test_param_entity_with_trailing_cr)3987 START_TEST(test_param_entity_with_trailing_cr) {
3988 #define PARAM_ENTITY_NAME "pe"
3989 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3990 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3991 "<doc/>";
3992 ExtTest test_data
3993 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3994 "%" PARAM_ENTITY_NAME ";\n",
3995 NULL, NULL};
3996
3997 XML_SetUserData(g_parser, &test_data);
3998 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3999 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4000 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4001 param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4002 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4003 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4004 == XML_STATUS_ERROR)
4005 xml_failure(g_parser);
4006 int entity_match_flag = get_param_entity_match_flag();
4007 if (entity_match_flag == ENTITY_MATCH_FAIL)
4008 fail("Parameter entity CR->NEWLINE conversion failed");
4009 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4010 fail("Parameter entity not parsed");
4011 }
4012 #undef PARAM_ENTITY_NAME
4013 #undef PARAM_ENTITY_CORE_VALUE
4014 END_TEST
4015
START_TEST(test_invalid_character_entity)4016 START_TEST(test_invalid_character_entity) {
4017 const char *text = "<!DOCTYPE doc [\n"
4018 " <!ENTITY entity '�'>\n"
4019 "]>\n"
4020 "<doc>&entity;</doc>";
4021
4022 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4023 "Out of range character reference not faulted");
4024 }
4025 END_TEST
4026
START_TEST(test_invalid_character_entity_2)4027 START_TEST(test_invalid_character_entity_2) {
4028 const char *text = "<!DOCTYPE doc [\n"
4029 " <!ENTITY entity '&#xg0;'>\n"
4030 "]>\n"
4031 "<doc>&entity;</doc>";
4032
4033 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4034 "Out of range character reference not faulted");
4035 }
4036 END_TEST
4037
START_TEST(test_invalid_character_entity_3)4038 START_TEST(test_invalid_character_entity_3) {
4039 const char text[] =
4040 /* <!DOCTYPE doc [\n */
4041 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4042 /* U+0E04 = KHO KHWAI
4043 * U+0E08 = CHO CHAN */
4044 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4045 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4046 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4047 /* ]>\n */
4048 "\0]\0>\0\n"
4049 /* <doc>&entity;</doc> */
4050 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4051
4052 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4053 != XML_STATUS_ERROR)
4054 fail("Invalid start of entity name not faulted");
4055 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4056 xml_failure(g_parser);
4057 }
4058 END_TEST
4059
START_TEST(test_invalid_character_entity_4)4060 START_TEST(test_invalid_character_entity_4) {
4061 const char *text = "<!DOCTYPE doc [\n"
4062 " <!ENTITY entity '�'>\n" /* = � */
4063 "]>\n"
4064 "<doc>&entity;</doc>";
4065
4066 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4067 "Out of range character reference not faulted");
4068 }
4069 END_TEST
4070
4071 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4072 START_TEST(test_pi_handled_in_default) {
4073 const char *text = "<?test processing instruction?>\n<doc/>";
4074 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4075 CharData storage;
4076
4077 CharData_Init(&storage);
4078 XML_SetDefaultHandler(g_parser, accumulate_characters);
4079 XML_SetUserData(g_parser, &storage);
4080 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4081 == XML_STATUS_ERROR)
4082 xml_failure(g_parser);
4083 CharData_CheckXMLChars(&storage, expected);
4084 }
4085 END_TEST
4086
4087 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4088 START_TEST(test_comment_handled_in_default) {
4089 const char *text = "<!-- This is a comment -->\n<doc/>";
4090 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4091 CharData storage;
4092
4093 CharData_Init(&storage);
4094 XML_SetDefaultHandler(g_parser, accumulate_characters);
4095 XML_SetUserData(g_parser, &storage);
4096 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4097 == XML_STATUS_ERROR)
4098 xml_failure(g_parser);
4099 CharData_CheckXMLChars(&storage, expected);
4100 }
4101 END_TEST
4102
4103 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4104 START_TEST(test_pi_yml) {
4105 const char *text = "<?yml something like data?><doc/>";
4106 const XML_Char *expected = XCS("yml: something like data\n");
4107 CharData storage;
4108
4109 CharData_Init(&storage);
4110 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4111 XML_SetUserData(g_parser, &storage);
4112 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4113 == XML_STATUS_ERROR)
4114 xml_failure(g_parser);
4115 CharData_CheckXMLChars(&storage, expected);
4116 }
4117 END_TEST
4118
START_TEST(test_pi_xnl)4119 START_TEST(test_pi_xnl) {
4120 const char *text = "<?xnl nothing like data?><doc/>";
4121 const XML_Char *expected = XCS("xnl: nothing like data\n");
4122 CharData storage;
4123
4124 CharData_Init(&storage);
4125 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4126 XML_SetUserData(g_parser, &storage);
4127 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4128 == XML_STATUS_ERROR)
4129 xml_failure(g_parser);
4130 CharData_CheckXMLChars(&storage, expected);
4131 }
4132 END_TEST
4133
START_TEST(test_pi_xmm)4134 START_TEST(test_pi_xmm) {
4135 const char *text = "<?xmm everything like data?><doc/>";
4136 const XML_Char *expected = XCS("xmm: everything like data\n");
4137 CharData storage;
4138
4139 CharData_Init(&storage);
4140 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4141 XML_SetUserData(g_parser, &storage);
4142 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4143 == XML_STATUS_ERROR)
4144 xml_failure(g_parser);
4145 CharData_CheckXMLChars(&storage, expected);
4146 }
4147 END_TEST
4148
START_TEST(test_utf16_pi)4149 START_TEST(test_utf16_pi) {
4150 const char text[] =
4151 /* <?{KHO KHWAI}{CHO CHAN}?>
4152 * where {KHO KHWAI} = U+0E04
4153 * and {CHO CHAN} = U+0E08
4154 */
4155 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4156 /* <q/> */
4157 "<\0q\0/\0>\0";
4158 #ifdef XML_UNICODE
4159 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4160 #else
4161 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4162 #endif
4163 CharData storage;
4164
4165 CharData_Init(&storage);
4166 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4167 XML_SetUserData(g_parser, &storage);
4168 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4169 == XML_STATUS_ERROR)
4170 xml_failure(g_parser);
4171 CharData_CheckXMLChars(&storage, expected);
4172 }
4173 END_TEST
4174
START_TEST(test_utf16_be_pi)4175 START_TEST(test_utf16_be_pi) {
4176 const char text[] =
4177 /* <?{KHO KHWAI}{CHO CHAN}?>
4178 * where {KHO KHWAI} = U+0E04
4179 * and {CHO CHAN} = U+0E08
4180 */
4181 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4182 /* <q/> */
4183 "\0<\0q\0/\0>";
4184 #ifdef XML_UNICODE
4185 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4186 #else
4187 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4188 #endif
4189 CharData storage;
4190
4191 CharData_Init(&storage);
4192 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4193 XML_SetUserData(g_parser, &storage);
4194 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4195 == XML_STATUS_ERROR)
4196 xml_failure(g_parser);
4197 CharData_CheckXMLChars(&storage, expected);
4198 }
4199 END_TEST
4200
4201 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4202 START_TEST(test_utf16_be_comment) {
4203 const char text[] =
4204 /* <!-- Comment A --> */
4205 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4206 /* <doc/> */
4207 "\0<\0d\0o\0c\0/\0>";
4208 const XML_Char *expected = XCS(" Comment A ");
4209 CharData storage;
4210
4211 CharData_Init(&storage);
4212 XML_SetCommentHandler(g_parser, accumulate_comment);
4213 XML_SetUserData(g_parser, &storage);
4214 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4215 == XML_STATUS_ERROR)
4216 xml_failure(g_parser);
4217 CharData_CheckXMLChars(&storage, expected);
4218 }
4219 END_TEST
4220
START_TEST(test_utf16_le_comment)4221 START_TEST(test_utf16_le_comment) {
4222 const char text[] =
4223 /* <!-- Comment B --> */
4224 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4225 /* <doc/> */
4226 "<\0d\0o\0c\0/\0>\0";
4227 const XML_Char *expected = XCS(" Comment B ");
4228 CharData storage;
4229
4230 CharData_Init(&storage);
4231 XML_SetCommentHandler(g_parser, accumulate_comment);
4232 XML_SetUserData(g_parser, &storage);
4233 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4234 == XML_STATUS_ERROR)
4235 xml_failure(g_parser);
4236 CharData_CheckXMLChars(&storage, expected);
4237 }
4238 END_TEST
4239
4240 /* Test that the unknown encoding handler with map entries that expect
4241 * conversion but no conversion function is faulted
4242 */
START_TEST(test_missing_encoding_conversion_fn)4243 START_TEST(test_missing_encoding_conversion_fn) {
4244 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4245 "<doc>\x81</doc>";
4246
4247 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4248 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4249 * character introducing a two-byte sequence. For this, it
4250 * requires a convert function. The above function call doesn't
4251 * pass one through, so when BadEncodingHandler actually gets
4252 * called it should supply an invalid encoding.
4253 */
4254 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4255 "Encoding with missing convert() not faulted");
4256 }
4257 END_TEST
4258
START_TEST(test_failing_encoding_conversion_fn)4259 START_TEST(test_failing_encoding_conversion_fn) {
4260 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4261 "<doc>\x81</doc>";
4262
4263 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4264 /* BadEncodingHandler sets up an encoding with every top-bit-set
4265 * character introducing a two-byte sequence. For this, it
4266 * requires a convert function. The above function call passes
4267 * one that insists all possible sequences are invalid anyway.
4268 */
4269 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4270 "Encoding with failing convert() not faulted");
4271 }
4272 END_TEST
4273
4274 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4275 START_TEST(test_unknown_encoding_success) {
4276 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4277 /* Equivalent to <eoc>Hello, world</eoc> */
4278 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4279
4280 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4281 run_character_check(text, XCS("Hello, world"));
4282 }
4283 END_TEST
4284
4285 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4286 START_TEST(test_unknown_encoding_bad_name) {
4287 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4288 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4289
4290 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4291 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4292 "Bad name start in unknown encoding not faulted");
4293 }
4294 END_TEST
4295
4296 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4297 START_TEST(test_unknown_encoding_bad_name_2) {
4298 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4299 "<d\xffoc>Hello, world</d\xffoc>";
4300
4301 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4302 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4303 "Bad name in unknown encoding not faulted");
4304 }
4305 END_TEST
4306
4307 /* Test element name that is long enough to fill the conversion buffer
4308 * in an unknown encoding, finishing with an encoded character.
4309 */
START_TEST(test_unknown_encoding_long_name_1)4310 START_TEST(test_unknown_encoding_long_name_1) {
4311 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4312 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4313 "Hi"
4314 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4315 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4316 CharData storage;
4317
4318 CharData_Init(&storage);
4319 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4320 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4321 XML_SetUserData(g_parser, &storage);
4322 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4323 == XML_STATUS_ERROR)
4324 xml_failure(g_parser);
4325 CharData_CheckXMLChars(&storage, expected);
4326 }
4327 END_TEST
4328
4329 /* Test element name that is long enough to fill the conversion buffer
4330 * in an unknown encoding, finishing with an simple character.
4331 */
START_TEST(test_unknown_encoding_long_name_2)4332 START_TEST(test_unknown_encoding_long_name_2) {
4333 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4334 "<abcdefghabcdefghabcdefghijklmnop>"
4335 "Hi"
4336 "</abcdefghabcdefghabcdefghijklmnop>";
4337 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4338 CharData storage;
4339
4340 CharData_Init(&storage);
4341 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4342 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4343 XML_SetUserData(g_parser, &storage);
4344 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4345 == XML_STATUS_ERROR)
4346 xml_failure(g_parser);
4347 CharData_CheckXMLChars(&storage, expected);
4348 }
4349 END_TEST
4350
START_TEST(test_invalid_unknown_encoding)4351 START_TEST(test_invalid_unknown_encoding) {
4352 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4353 "<doc>Hello world</doc>";
4354
4355 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4356 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4357 "Invalid unknown encoding not faulted");
4358 }
4359 END_TEST
4360
START_TEST(test_unknown_ascii_encoding_ok)4361 START_TEST(test_unknown_ascii_encoding_ok) {
4362 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4363 "<doc>Hello, world</doc>";
4364
4365 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4366 run_character_check(text, XCS("Hello, world"));
4367 }
4368 END_TEST
4369
START_TEST(test_unknown_ascii_encoding_fail)4370 START_TEST(test_unknown_ascii_encoding_fail) {
4371 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4372 "<doc>Hello, \x80 world</doc>";
4373
4374 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4375 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4376 "Invalid character not faulted");
4377 }
4378 END_TEST
4379
START_TEST(test_unknown_encoding_invalid_length)4380 START_TEST(test_unknown_encoding_invalid_length) {
4381 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4382 "<doc>Hello, world</doc>";
4383
4384 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4385 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4386 "Invalid unknown encoding not faulted");
4387 }
4388 END_TEST
4389
START_TEST(test_unknown_encoding_invalid_topbit)4390 START_TEST(test_unknown_encoding_invalid_topbit) {
4391 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4392 "<doc>Hello, world</doc>";
4393
4394 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4395 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4396 "Invalid unknown encoding not faulted");
4397 }
4398 END_TEST
4399
START_TEST(test_unknown_encoding_invalid_surrogate)4400 START_TEST(test_unknown_encoding_invalid_surrogate) {
4401 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4402 "<doc>Hello, \x82 world</doc>";
4403
4404 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4405 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4406 "Invalid unknown encoding not faulted");
4407 }
4408 END_TEST
4409
START_TEST(test_unknown_encoding_invalid_high)4410 START_TEST(test_unknown_encoding_invalid_high) {
4411 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4412 "<doc>Hello, world</doc>";
4413
4414 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4415 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4416 "Invalid unknown encoding not faulted");
4417 }
4418 END_TEST
4419
START_TEST(test_unknown_encoding_invalid_attr_value)4420 START_TEST(test_unknown_encoding_invalid_attr_value) {
4421 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4422 "<doc attr='\xff\x30'/>";
4423
4424 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4425 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4426 "Invalid attribute valid not faulted");
4427 }
4428 END_TEST
4429
4430 /* Test an external entity parser set to use latin-1 detects UTF-16
4431 * BOMs correctly.
4432 */
4433 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4434 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4435 const char *text = "<!DOCTYPE doc [\n"
4436 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4437 "]>\n"
4438 "<doc>&en;</doc>";
4439 ExtTest2 test_data
4440 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4441 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4442 * 0x4c = L and 0x20 is a space
4443 */
4444 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4445 #ifdef XML_UNICODE
4446 const XML_Char *expected = XCS("\x00ff\x00feL ");
4447 #else
4448 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4449 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4450 #endif
4451 CharData storage;
4452
4453 CharData_Init(&storage);
4454 test_data.storage = &storage;
4455 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4456 XML_SetUserData(g_parser, &test_data);
4457 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4458 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4459 == XML_STATUS_ERROR)
4460 xml_failure(g_parser);
4461 CharData_CheckXMLChars(&storage, expected);
4462 }
4463 END_TEST
4464
START_TEST(test_ext_entity_latin1_utf16be_bom)4465 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4466 const char *text = "<!DOCTYPE doc [\n"
4467 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4468 "]>\n"
4469 "<doc>&en;</doc>";
4470 ExtTest2 test_data
4471 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4472 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4473 * 0x4c = L and 0x20 is a space
4474 */
4475 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4476 #ifdef XML_UNICODE
4477 const XML_Char *expected = XCS("\x00fe\x00ff L");
4478 #else
4479 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4480 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4481 #endif
4482 CharData storage;
4483
4484 CharData_Init(&storage);
4485 test_data.storage = &storage;
4486 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4487 XML_SetUserData(g_parser, &test_data);
4488 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4489 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4490 == XML_STATUS_ERROR)
4491 xml_failure(g_parser);
4492 CharData_CheckXMLChars(&storage, expected);
4493 }
4494 END_TEST
4495
4496 /* Parsing the full buffer rather than a byte at a time makes a
4497 * difference to the encoding scanning code, so repeat the above tests
4498 * without breaking them down by byte.
4499 */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4500 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4501 const char *text = "<!DOCTYPE doc [\n"
4502 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4503 "]>\n"
4504 "<doc>&en;</doc>";
4505 ExtTest2 test_data
4506 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4507 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4508 * 0x4c = L and 0x20 is a space
4509 */
4510 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4511 #ifdef XML_UNICODE
4512 const XML_Char *expected = XCS("\x00ff\x00feL ");
4513 #else
4514 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4515 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4516 #endif
4517 CharData storage;
4518
4519 CharData_Init(&storage);
4520 test_data.storage = &storage;
4521 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4522 XML_SetUserData(g_parser, &test_data);
4523 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4524 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4525 == XML_STATUS_ERROR)
4526 xml_failure(g_parser);
4527 CharData_CheckXMLChars(&storage, expected);
4528 }
4529 END_TEST
4530
START_TEST(test_ext_entity_latin1_utf16be_bom2)4531 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4532 const char *text = "<!DOCTYPE doc [\n"
4533 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4534 "]>\n"
4535 "<doc>&en;</doc>";
4536 ExtTest2 test_data
4537 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4538 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4539 * 0x4c = L and 0x20 is a space
4540 */
4541 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4542 #ifdef XML_UNICODE
4543 const XML_Char *expected = XCS("\x00fe\x00ff L");
4544 #else
4545 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4546 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4547 #endif
4548 CharData storage;
4549
4550 CharData_Init(&storage);
4551 test_data.storage = &storage;
4552 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4553 XML_SetUserData(g_parser, &test_data);
4554 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4555 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4556 == XML_STATUS_ERROR)
4557 xml_failure(g_parser);
4558 CharData_CheckXMLChars(&storage, expected);
4559 }
4560 END_TEST
4561
4562 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4563 START_TEST(test_ext_entity_utf16_be) {
4564 const char *text = "<!DOCTYPE doc [\n"
4565 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4566 "]>\n"
4567 "<doc>&en;</doc>";
4568 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4569 #ifdef XML_UNICODE
4570 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4571 #else
4572 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4573 "\xe6\x94\x80" /* U+6500 */
4574 "\xe2\xbc\x80" /* U+2F00 */
4575 "\xe3\xb8\x80"); /* U+3E00 */
4576 #endif
4577 CharData storage;
4578
4579 CharData_Init(&storage);
4580 test_data.storage = &storage;
4581 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4582 XML_SetUserData(g_parser, &test_data);
4583 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4584 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4585 == XML_STATUS_ERROR)
4586 xml_failure(g_parser);
4587 CharData_CheckXMLChars(&storage, expected);
4588 }
4589 END_TEST
4590
4591 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4592 START_TEST(test_ext_entity_utf16_le) {
4593 const char *text = "<!DOCTYPE doc [\n"
4594 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4595 "]>\n"
4596 "<doc>&en;</doc>";
4597 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4598 #ifdef XML_UNICODE
4599 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4600 #else
4601 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4602 "\xe6\x94\x80" /* U+6500 */
4603 "\xe2\xbc\x80" /* U+2F00 */
4604 "\xe3\xb8\x80"); /* U+3E00 */
4605 #endif
4606 CharData storage;
4607
4608 CharData_Init(&storage);
4609 test_data.storage = &storage;
4610 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4611 XML_SetUserData(g_parser, &test_data);
4612 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4613 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4614 == XML_STATUS_ERROR)
4615 xml_failure(g_parser);
4616 CharData_CheckXMLChars(&storage, expected);
4617 }
4618 END_TEST
4619
4620 /* Test little-endian UTF-16 given no explicit encoding.
4621 * The existing default encoding (UTF-8) is assumed to hold without a
4622 * BOM to contradict it, so the entity value will in fact provoke an
4623 * error because 0x00 is not a valid XML character. We parse the
4624 * whole buffer in one go rather than feeding it in byte by byte to
4625 * exercise different code paths in the initial scanning routines.
4626 */
START_TEST(test_ext_entity_utf16_unknown)4627 START_TEST(test_ext_entity_utf16_unknown) {
4628 const char *text = "<!DOCTYPE doc [\n"
4629 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4630 "]>\n"
4631 "<doc>&en;</doc>";
4632 ExtFaults2 test_data
4633 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4634 XML_ERROR_INVALID_TOKEN};
4635
4636 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4637 XML_SetUserData(g_parser, &test_data);
4638 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4639 "Invalid character should not have been accepted");
4640 }
4641 END_TEST
4642
4643 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4644 START_TEST(test_ext_entity_utf8_non_bom) {
4645 const char *text = "<!DOCTYPE doc [\n"
4646 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4647 "]>\n"
4648 "<doc>&en;</doc>";
4649 ExtTest2 test_data
4650 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4651 3, NULL, NULL};
4652 #ifdef XML_UNICODE
4653 const XML_Char *expected = XCS("\xfec0");
4654 #else
4655 const XML_Char *expected = XCS("\xef\xbb\x80");
4656 #endif
4657 CharData storage;
4658
4659 CharData_Init(&storage);
4660 test_data.storage = &storage;
4661 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4662 XML_SetUserData(g_parser, &test_data);
4663 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4664 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4665 == XML_STATUS_ERROR)
4666 xml_failure(g_parser);
4667 CharData_CheckXMLChars(&storage, expected);
4668 }
4669 END_TEST
4670
4671 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4672 START_TEST(test_utf8_in_cdata_section) {
4673 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4674 #ifdef XML_UNICODE
4675 const XML_Char *expected = XCS("one \x00e9 two");
4676 #else
4677 const XML_Char *expected = XCS("one \xc3\xa9 two");
4678 #endif
4679
4680 run_character_check(text, expected);
4681 }
4682 END_TEST
4683
4684 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4685 START_TEST(test_utf8_in_cdata_section_2) {
4686 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4687 #ifdef XML_UNICODE
4688 const XML_Char *expected = XCS("\x00e9]\x00e9two");
4689 #else
4690 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4691 #endif
4692
4693 run_character_check(text, expected);
4694 }
4695 END_TEST
4696
START_TEST(test_utf8_in_start_tags)4697 START_TEST(test_utf8_in_start_tags) {
4698 struct test_case {
4699 bool goodName;
4700 bool goodNameStart;
4701 const char *tagName;
4702 };
4703
4704 // The idea with the tests below is this:
4705 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4706 // go to isNever and are hence not a concern.
4707 //
4708 // We start with a character that is a valid name character
4709 // (or even name-start character, see XML 1.0r4 spec) and then we flip
4710 // single bits at places where (1) the result leaves the UTF-8 encoding space
4711 // and (2) we stay in the same n-byte sequence family.
4712 //
4713 // The flipped bits are highlighted in angle brackets in comments,
4714 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4715 // the most significant bit to 1 to leave UTF-8 encoding space.
4716 struct test_case cases[] = {
4717 // 1-byte UTF-8: [0xxx xxxx]
4718 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
4719 {false, false, "\xBA"}, // [<1>011 1010]
4720 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
4721 {false, false, "\xB9"}, // [<1>011 1001]
4722
4723 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4724 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
4725 // Arabic small waw U+06E5
4726 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4727 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4728 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4729 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
4730 // combining char U+0301
4731 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4732 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4733 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4734
4735 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4736 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
4737 // Devanagari Letter A U+0905
4738 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4739 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4740 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4741 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4742 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4743 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
4744 // combining char U+0901
4745 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4746 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4747 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4748 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4749 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4750 };
4751 const bool atNameStart[] = {true, false};
4752
4753 size_t i = 0;
4754 char doc[1024];
4755 size_t failCount = 0;
4756
4757 // we need all the bytes to be parsed, but we don't want the errors that can
4758 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4759 if (g_reparseDeferralEnabledDefault) {
4760 return;
4761 }
4762
4763 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4764 size_t j = 0;
4765 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4766 const bool expectedSuccess
4767 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4768 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4769 cases[i].tagName);
4770 XML_Parser parser = XML_ParserCreate(NULL);
4771
4772 const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4773 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4774
4775 bool success = true;
4776 if ((status == XML_STATUS_OK) != expectedSuccess) {
4777 success = false;
4778 }
4779 if ((status == XML_STATUS_ERROR)
4780 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4781 success = false;
4782 }
4783
4784 if (! success) {
4785 fprintf(
4786 stderr,
4787 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4788 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
4789 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4790 failCount++;
4791 }
4792
4793 XML_ParserFree(parser);
4794 }
4795 }
4796
4797 if (failCount > 0) {
4798 fail("UTF-8 regression detected");
4799 }
4800 }
4801 END_TEST
4802
4803 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4804 START_TEST(test_trailing_spaces_in_elements) {
4805 const char *text = "<doc >Hi</doc >";
4806 const XML_Char *expected = XCS("doc/doc");
4807 CharData storage;
4808
4809 CharData_Init(&storage);
4810 XML_SetElementHandler(g_parser, record_element_start_handler,
4811 record_element_end_handler);
4812 XML_SetUserData(g_parser, &storage);
4813 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4814 == XML_STATUS_ERROR)
4815 xml_failure(g_parser);
4816 CharData_CheckXMLChars(&storage, expected);
4817 }
4818 END_TEST
4819
START_TEST(test_utf16_attribute)4820 START_TEST(test_utf16_attribute) {
4821 const char text[] =
4822 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4823 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4824 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4825 */
4826 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4827 const XML_Char *expected = XCS("a");
4828 CharData storage;
4829
4830 CharData_Init(&storage);
4831 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4832 XML_SetUserData(g_parser, &storage);
4833 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4834 == XML_STATUS_ERROR)
4835 xml_failure(g_parser);
4836 CharData_CheckXMLChars(&storage, expected);
4837 }
4838 END_TEST
4839
START_TEST(test_utf16_second_attr)4840 START_TEST(test_utf16_second_attr) {
4841 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4842 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4843 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4844 */
4845 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4846 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4847 const XML_Char *expected = XCS("1");
4848 CharData storage;
4849
4850 CharData_Init(&storage);
4851 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4852 XML_SetUserData(g_parser, &storage);
4853 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4854 == XML_STATUS_ERROR)
4855 xml_failure(g_parser);
4856 CharData_CheckXMLChars(&storage, expected);
4857 }
4858 END_TEST
4859
START_TEST(test_attr_after_solidus)4860 START_TEST(test_attr_after_solidus) {
4861 const char *text = "<doc attr1='a' / attr2='b'>";
4862
4863 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4864 }
4865 END_TEST
4866
START_TEST(test_utf16_pe)4867 START_TEST(test_utf16_pe) {
4868 /* <!DOCTYPE doc [
4869 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4870 * %{KHO KHWAI}{CHO CHAN};
4871 * ]>
4872 * <doc></doc>
4873 *
4874 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4875 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4876 */
4877 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4878 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4879 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4880 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4881 "\0%\x0e\x04\x0e\x08\0;\0\n"
4882 "\0]\0>\0\n"
4883 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4884 #ifdef XML_UNICODE
4885 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4886 #else
4887 const XML_Char *expected
4888 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4889 #endif
4890 CharData storage;
4891
4892 CharData_Init(&storage);
4893 XML_SetUserData(g_parser, &storage);
4894 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4895 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4896 == XML_STATUS_ERROR)
4897 xml_failure(g_parser);
4898 CharData_CheckXMLChars(&storage, expected);
4899 }
4900 END_TEST
4901
4902 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)4903 START_TEST(test_bad_attr_desc_keyword) {
4904 const char *text = "<!DOCTYPE doc [\n"
4905 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4906 "]>\n"
4907 "<doc />";
4908
4909 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4910 "Bad keyword !IMPLIED not faulted");
4911 }
4912 END_TEST
4913
4914 /* Test that an invalid attribute description keyword consisting of
4915 * UTF-16 characters with their top bytes non-zero are correctly
4916 * faulted
4917 */
START_TEST(test_bad_attr_desc_keyword_utf16)4918 START_TEST(test_bad_attr_desc_keyword_utf16) {
4919 /* <!DOCTYPE d [
4920 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4921 * ]><d/>
4922 *
4923 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4924 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4925 */
4926 const char text[]
4927 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4928 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4929 "\0#\x0e\x04\x0e\x08\0>\0\n"
4930 "\0]\0>\0<\0d\0/\0>";
4931
4932 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4933 != XML_STATUS_ERROR)
4934 fail("Invalid UTF16 attribute keyword not faulted");
4935 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4936 xml_failure(g_parser);
4937 }
4938 END_TEST
4939
4940 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
4941 * using prefix-encoding (see above) to trigger specific code paths
4942 */
START_TEST(test_bad_doctype)4943 START_TEST(test_bad_doctype) {
4944 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4945 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4946
4947 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4948 expect_failure(text, XML_ERROR_SYNTAX,
4949 "Invalid bytes in DOCTYPE not faulted");
4950 }
4951 END_TEST
4952
START_TEST(test_bad_doctype_utf8)4953 START_TEST(test_bad_doctype_utf8) {
4954 const char *text = "<!DOCTYPE \xDB\x25"
4955 "doc><doc/>"; // [1101 1011] [<0>010 0101]
4956 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4957 "Invalid UTF-8 in DOCTYPE not faulted");
4958 }
4959 END_TEST
4960
START_TEST(test_bad_doctype_utf16)4961 START_TEST(test_bad_doctype_utf16) {
4962 const char text[] =
4963 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4964 *
4965 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4966 * (name character) but not a valid letter (name start character)
4967 */
4968 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4969 "\x06\xf2"
4970 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4971
4972 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4973 != XML_STATUS_ERROR)
4974 fail("Invalid bytes in DOCTYPE not faulted");
4975 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4976 xml_failure(g_parser);
4977 }
4978 END_TEST
4979
START_TEST(test_bad_doctype_plus)4980 START_TEST(test_bad_doctype_plus) {
4981 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4982 "<1+>&foo;</1+>";
4983
4984 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4985 "'+' in document name not faulted");
4986 }
4987 END_TEST
4988
START_TEST(test_bad_doctype_star)4989 START_TEST(test_bad_doctype_star) {
4990 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4991 "<1*>&foo;</1*>";
4992
4993 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4994 "'*' in document name not faulted");
4995 }
4996 END_TEST
4997
START_TEST(test_bad_doctype_query)4998 START_TEST(test_bad_doctype_query) {
4999 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5000 "<1?>&foo;</1?>";
5001
5002 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5003 "'?' in document name not faulted");
5004 }
5005 END_TEST
5006
START_TEST(test_unknown_encoding_bad_ignore)5007 START_TEST(test_unknown_encoding_bad_ignore) {
5008 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5009 "<!DOCTYPE doc SYSTEM 'foo'>"
5010 "<doc><e>&entity;</e></doc>";
5011 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5012 "Invalid character not faulted", XCS("prefix-conv"),
5013 XML_ERROR_INVALID_TOKEN};
5014
5015 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5016 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5017 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5018 XML_SetUserData(g_parser, &fault);
5019 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5020 "Bad IGNORE section with unknown encoding not failed");
5021 }
5022 END_TEST
5023
START_TEST(test_entity_in_utf16_be_attr)5024 START_TEST(test_entity_in_utf16_be_attr) {
5025 const char text[] =
5026 /* <e a='ä ä'></e> */
5027 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5028 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5029 #ifdef XML_UNICODE
5030 const XML_Char *expected = XCS("\x00e4 \x00e4");
5031 #else
5032 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5033 #endif
5034 CharData storage;
5035
5036 CharData_Init(&storage);
5037 XML_SetUserData(g_parser, &storage);
5038 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5039 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5040 == XML_STATUS_ERROR)
5041 xml_failure(g_parser);
5042 CharData_CheckXMLChars(&storage, expected);
5043 }
5044 END_TEST
5045
START_TEST(test_entity_in_utf16_le_attr)5046 START_TEST(test_entity_in_utf16_le_attr) {
5047 const char text[] =
5048 /* <e a='ä ä'></e> */
5049 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5050 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5051 #ifdef XML_UNICODE
5052 const XML_Char *expected = XCS("\x00e4 \x00e4");
5053 #else
5054 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5055 #endif
5056 CharData storage;
5057
5058 CharData_Init(&storage);
5059 XML_SetUserData(g_parser, &storage);
5060 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5061 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5062 == XML_STATUS_ERROR)
5063 xml_failure(g_parser);
5064 CharData_CheckXMLChars(&storage, expected);
5065 }
5066 END_TEST
5067
START_TEST(test_entity_public_utf16_be)5068 START_TEST(test_entity_public_utf16_be) {
5069 const char text[] =
5070 /* <!DOCTYPE d [ */
5071 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5072 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5073 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5074 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5075 /* %e; */
5076 "\0%\0e\0;\0\n"
5077 /* ]> */
5078 "\0]\0>\0\n"
5079 /* <d>&j;</d> */
5080 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5081 ExtTest2 test_data
5082 = {/* <!ENTITY j 'baz'> */
5083 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5084 const XML_Char *expected = XCS("baz");
5085 CharData storage;
5086
5087 CharData_Init(&storage);
5088 test_data.storage = &storage;
5089 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5090 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5091 XML_SetUserData(g_parser, &test_data);
5092 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5093 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5094 == XML_STATUS_ERROR)
5095 xml_failure(g_parser);
5096 CharData_CheckXMLChars(&storage, expected);
5097 }
5098 END_TEST
5099
START_TEST(test_entity_public_utf16_le)5100 START_TEST(test_entity_public_utf16_le) {
5101 const char text[] =
5102 /* <!DOCTYPE d [ */
5103 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5104 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5105 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5106 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5107 /* %e; */
5108 "%\0e\0;\0\n\0"
5109 /* ]> */
5110 "]\0>\0\n\0"
5111 /* <d>&j;</d> */
5112 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5113 ExtTest2 test_data
5114 = {/* <!ENTITY j 'baz'> */
5115 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5116 const XML_Char *expected = XCS("baz");
5117 CharData storage;
5118
5119 CharData_Init(&storage);
5120 test_data.storage = &storage;
5121 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5122 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5123 XML_SetUserData(g_parser, &test_data);
5124 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5125 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5126 == XML_STATUS_ERROR)
5127 xml_failure(g_parser);
5128 CharData_CheckXMLChars(&storage, expected);
5129 }
5130 END_TEST
5131
5132 /* Test that a doctype with neither an internal nor external subset is
5133 * faulted
5134 */
START_TEST(test_short_doctype)5135 START_TEST(test_short_doctype) {
5136 const char *text = "<!DOCTYPE doc></doc>";
5137 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5138 "DOCTYPE without subset not rejected");
5139 }
5140 END_TEST
5141
START_TEST(test_short_doctype_2)5142 START_TEST(test_short_doctype_2) {
5143 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5144 expect_failure(text, XML_ERROR_SYNTAX,
5145 "DOCTYPE without Public ID not rejected");
5146 }
5147 END_TEST
5148
START_TEST(test_short_doctype_3)5149 START_TEST(test_short_doctype_3) {
5150 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5151 expect_failure(text, XML_ERROR_SYNTAX,
5152 "DOCTYPE without System ID not rejected");
5153 }
5154 END_TEST
5155
START_TEST(test_long_doctype)5156 START_TEST(test_long_doctype) {
5157 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5158 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5159 }
5160 END_TEST
5161
START_TEST(test_bad_entity)5162 START_TEST(test_bad_entity) {
5163 const char *text = "<!DOCTYPE doc [\n"
5164 " <!ENTITY foo PUBLIC>\n"
5165 "]>\n"
5166 "<doc/>";
5167 expect_failure(text, XML_ERROR_SYNTAX,
5168 "ENTITY without Public ID is not rejected");
5169 }
5170 END_TEST
5171
5172 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5173 START_TEST(test_bad_entity_2) {
5174 const char *text = "<!DOCTYPE doc [\n"
5175 " <!ENTITY % foo bar>\n"
5176 "]>\n"
5177 "<doc/>";
5178 expect_failure(text, XML_ERROR_SYNTAX,
5179 "ENTITY without Public ID is not rejected");
5180 }
5181 END_TEST
5182
START_TEST(test_bad_entity_3)5183 START_TEST(test_bad_entity_3) {
5184 const char *text = "<!DOCTYPE doc [\n"
5185 " <!ENTITY % foo PUBLIC>\n"
5186 "]>\n"
5187 "<doc/>";
5188 expect_failure(text, XML_ERROR_SYNTAX,
5189 "Parameter ENTITY without Public ID is not rejected");
5190 }
5191 END_TEST
5192
START_TEST(test_bad_entity_4)5193 START_TEST(test_bad_entity_4) {
5194 const char *text = "<!DOCTYPE doc [\n"
5195 " <!ENTITY % foo SYSTEM>\n"
5196 "]>\n"
5197 "<doc/>";
5198 expect_failure(text, XML_ERROR_SYNTAX,
5199 "Parameter ENTITY without Public ID is not rejected");
5200 }
5201 END_TEST
5202
START_TEST(test_bad_notation)5203 START_TEST(test_bad_notation) {
5204 const char *text = "<!DOCTYPE doc [\n"
5205 " <!NOTATION n SYSTEM>\n"
5206 "]>\n"
5207 "<doc/>";
5208 expect_failure(text, XML_ERROR_SYNTAX,
5209 "Notation without System ID is not rejected");
5210 }
5211 END_TEST
5212
5213 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5214 START_TEST(test_default_doctype_handler) {
5215 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5216 " <!ENTITY foo 'bar'>\n"
5217 "]>\n"
5218 "<doc>&foo;</doc>";
5219 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5220 {XCS("'test.dtd'"), 10, XML_FALSE},
5221 {NULL, 0, XML_FALSE}};
5222 int i;
5223
5224 XML_SetUserData(g_parser, &test_data);
5225 XML_SetDefaultHandler(g_parser, checking_default_handler);
5226 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5227 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5228 == XML_STATUS_ERROR)
5229 xml_failure(g_parser);
5230 for (i = 0; test_data[i].expected != NULL; i++)
5231 if (! test_data[i].seen)
5232 fail("Default handler not run for public !DOCTYPE");
5233 }
5234 END_TEST
5235
START_TEST(test_empty_element_abort)5236 START_TEST(test_empty_element_abort) {
5237 const char *text = "<abort/>";
5238
5239 XML_SetStartElementHandler(g_parser, start_element_suspender);
5240 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5241 != XML_STATUS_ERROR)
5242 fail("Expected to error on abort");
5243 }
5244 END_TEST
5245
5246 /* Regression test for GH issue #612: unfinished m_declAttributeType
5247 * allocation in ->m_tempPool can corrupt following allocation.
5248 */
START_TEST(test_pool_integrity_with_unfinished_attr)5249 START_TEST(test_pool_integrity_with_unfinished_attr) {
5250 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5251 "<!DOCTYPE foo [\n"
5252 "<!ELEMENT foo ANY>\n"
5253 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5254 "%entp;\n"
5255 "]>\n"
5256 "<a></a>\n";
5257 const XML_Char *expected = XCS("COMMENT");
5258 CharData storage;
5259
5260 CharData_Init(&storage);
5261 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5262 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5263 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5264 XML_SetCommentHandler(g_parser, accumulate_comment);
5265 XML_SetUserData(g_parser, &storage);
5266 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5267 == XML_STATUS_ERROR)
5268 xml_failure(g_parser);
5269 CharData_CheckXMLChars(&storage, expected);
5270 }
5271 END_TEST
5272
5273 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5274 START_TEST(test_entity_ref_no_elements) {
5275 const char *const text = "<!DOCTYPE foo [\n"
5276 "<!ENTITY e1 \"test\">\n"
5277 "]> <foo>&e1;"; // intentionally missing newline
5278
5279 XML_Parser parser = XML_ParserCreate(NULL);
5280 assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5281 == XML_STATUS_ERROR);
5282 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5283 XML_ParserFree(parser);
5284 }
5285 END_TEST
5286
5287 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5288 START_TEST(test_deep_nested_entity) {
5289 const size_t N_LINES = 60000;
5290 const size_t SIZE_PER_LINE = 50;
5291
5292 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5293 if (text == NULL) {
5294 fail("malloc failed");
5295 }
5296
5297 char *textPtr = text;
5298
5299 // Create the XML
5300 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5301 "<!DOCTYPE foo [\n"
5302 " <!ENTITY s0 'deepText'>\n");
5303
5304 for (size_t i = 1; i < N_LINES; ++i) {
5305 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5306 (long unsigned)i, (long unsigned)(i - 1));
5307 }
5308
5309 snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5310 (long unsigned)(N_LINES - 1));
5311
5312 const XML_Char *const expected = XCS("deepText");
5313
5314 CharData storage;
5315 CharData_Init(&storage);
5316
5317 XML_Parser parser = XML_ParserCreate(NULL);
5318
5319 XML_SetCharacterDataHandler(parser, accumulate_characters);
5320 XML_SetUserData(parser, &storage);
5321
5322 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5323 == XML_STATUS_ERROR)
5324 xml_failure(parser);
5325
5326 CharData_CheckXMLChars(&storage, expected);
5327 XML_ParserFree(parser);
5328 free(text);
5329 }
5330 END_TEST
5331
5332 /* Tests if chained entity references in attributes
5333 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5334 START_TEST(test_deep_nested_attribute_entity) {
5335 const size_t N_LINES = 60000;
5336 const size_t SIZE_PER_LINE = 100;
5337
5338 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5339 if (text == NULL) {
5340 fail("malloc failed");
5341 }
5342
5343 char *textPtr = text;
5344
5345 // Create the XML
5346 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5347 "<!DOCTYPE foo [\n"
5348 " <!ENTITY s0 'deepText'>\n");
5349
5350 for (size_t i = 1; i < N_LINES; ++i) {
5351 textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
5352 (long unsigned)i, (long unsigned)(i - 1));
5353 }
5354
5355 snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5356 (long unsigned)(N_LINES - 1));
5357
5358 AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5359 ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
5360 info[0].attributes = doc_info;
5361
5362 XML_Parser parser = XML_ParserCreate(NULL);
5363 ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5364
5365 XML_SetStartElementHandler(parser, counting_start_element_handler);
5366 XML_SetUserData(parser, &parserPlusElemenInfo);
5367
5368 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5369 == XML_STATUS_ERROR)
5370 xml_failure(parser);
5371
5372 XML_ParserFree(parser);
5373 free(text);
5374 }
5375 END_TEST
5376
START_TEST(test_deep_nested_entity_delayed_interpretation)5377 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5378 const size_t N_LINES = 70000;
5379 const size_t SIZE_PER_LINE = 100;
5380
5381 char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5382 if (text == NULL) {
5383 fail("malloc failed");
5384 }
5385
5386 char *textPtr = text;
5387
5388 // Create the XML
5389 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5390 "<!DOCTYPE foo [\n"
5391 " <!ENTITY %% s0 'deepText'>\n");
5392
5393 for (size_t i = 1; i < N_LINES; ++i) {
5394 textPtr += snprintf(textPtr, SIZE_PER_LINE,
5395 " <!ENTITY %% s%lu '%s%lu;'>\n", (long unsigned)i,
5396 (long unsigned)(i - 1));
5397 }
5398
5399 snprintf(textPtr, SIZE_PER_LINE,
5400 " <!ENTITY %% define_g \"<!ENTITY g '%s%lu;'>\">\n"
5401 " %%define_g;\n"
5402 "]>\n"
5403 "<foo/>\n",
5404 (long unsigned)(N_LINES - 1));
5405
5406 XML_Parser parser = XML_ParserCreate(NULL);
5407
5408 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5409 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5410 == XML_STATUS_ERROR)
5411 xml_failure(parser);
5412
5413 XML_ParserFree(parser);
5414 free(text);
5415 }
5416 END_TEST
5417
START_TEST(test_nested_entity_suspend)5418 START_TEST(test_nested_entity_suspend) {
5419 const char *const text = "<!DOCTYPE a [\n"
5420 " <!ENTITY e1 '<!--e1-->'>\n"
5421 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5422 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5423 "]>\n"
5424 "<a><!--start-->&e3;<!--end--></a>";
5425 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5426 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5427 CharData storage;
5428 CharData_Init(&storage);
5429 XML_Parser parser = XML_ParserCreate(NULL);
5430 ParserPlusStorage parserPlusStorage = {parser, &storage};
5431
5432 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5433 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5434 XML_SetUserData(parser, &parserPlusStorage);
5435
5436 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5437 while (status == XML_STATUS_SUSPENDED) {
5438 status = XML_ResumeParser(parser);
5439 }
5440 if (status != XML_STATUS_OK)
5441 xml_failure(parser);
5442
5443 CharData_CheckXMLChars(&storage, expected);
5444 XML_ParserFree(parser);
5445 }
5446 END_TEST
5447
START_TEST(test_nested_entity_suspend_2)5448 START_TEST(test_nested_entity_suspend_2) {
5449 const char *const text = "<!DOCTYPE doc [\n"
5450 " <!ENTITY ge1 'head1Ztail1'>\n"
5451 " <!ENTITY ge2 'head2&ge1;tail2'>\n"
5452 " <!ENTITY ge3 'head3&ge2;tail3'>\n"
5453 "]>\n"
5454 "<doc>&ge3;</doc>";
5455 const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5456 XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5457 CharData storage;
5458 CharData_Init(&storage);
5459 XML_Parser parser = XML_ParserCreate(NULL);
5460 ParserPlusStorage parserPlusStorage = {parser, &storage};
5461
5462 XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5463 XML_SetUserData(parser, &parserPlusStorage);
5464
5465 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5466 while (status == XML_STATUS_SUSPENDED) {
5467 status = XML_ResumeParser(parser);
5468 }
5469 if (status != XML_STATUS_OK)
5470 xml_failure(parser);
5471
5472 CharData_CheckXMLChars(&storage, expected);
5473 XML_ParserFree(parser);
5474 }
5475 END_TEST
5476
5477 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5478 START_TEST(test_big_tokens_scale_linearly) {
5479 const struct {
5480 const char *pre;
5481 const char *post;
5482 } text[] = {
5483 {"<a>", "</a>"}, // assumed good, used as baseline
5484 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5485 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5486 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5487 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5488 };
5489 const int num_cases = sizeof(text) / sizeof(text[0]);
5490 char aaaaaa[4096];
5491 const int fillsize = (int)sizeof(aaaaaa);
5492 const int fillcount = 100;
5493 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5494 const unsigned max_factor = 4;
5495 const unsigned max_scanned = max_factor * approx_bytes;
5496
5497 memset(aaaaaa, 'a', fillsize);
5498
5499 if (! g_reparseDeferralEnabledDefault) {
5500 return; // heuristic is disabled; we would get O(n^2) and fail.
5501 }
5502
5503 for (int i = 0; i < num_cases; ++i) {
5504 XML_Parser parser = XML_ParserCreate(NULL);
5505 assert_true(parser != NULL);
5506 enum XML_Status status;
5507 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5508
5509 // parse the start text
5510 g_bytesScanned = 0;
5511 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5512 (int)strlen(text[i].pre), XML_FALSE);
5513 if (status != XML_STATUS_OK) {
5514 xml_failure(parser);
5515 }
5516
5517 // parse lots of 'a', failing the test early if it takes too long
5518 unsigned past_max_count = 0;
5519 for (int f = 0; f < fillcount; ++f) {
5520 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5521 if (status != XML_STATUS_OK) {
5522 xml_failure(parser);
5523 }
5524 if (g_bytesScanned > max_scanned) {
5525 // We're not done, and have already passed the limit -- the test will
5526 // definitely fail. This block allows us to save time by failing early.
5527 const unsigned pushed
5528 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5529 fprintf(
5530 stderr,
5531 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5532 f + 1, fillcount, pushed, g_bytesScanned,
5533 g_bytesScanned / (double)pushed, max_scanned, max_factor);
5534 past_max_count++;
5535 // We are failing, but allow a few log prints first. If we don't reach
5536 // a count of five, the test will fail after the loop instead.
5537 assert_true(past_max_count < 5);
5538 }
5539 }
5540
5541 // parse the end text
5542 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5543 (int)strlen(text[i].post), XML_TRUE);
5544 if (status != XML_STATUS_OK) {
5545 xml_failure(parser);
5546 }
5547
5548 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5549 if (g_bytesScanned > max_scanned) {
5550 fprintf(
5551 stderr,
5552 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5553 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5554 max_factor);
5555 fail("scanned too many bytes");
5556 }
5557
5558 XML_ParserFree(parser);
5559 }
5560 }
5561 END_TEST
5562
START_TEST(test_set_reparse_deferral)5563 START_TEST(test_set_reparse_deferral) {
5564 const char *const pre = "<d>";
5565 const char *const start = "<x attr='";
5566 const char *const end = "'></x>";
5567 char eeeeee[100];
5568 const int fillsize = (int)sizeof(eeeeee);
5569 memset(eeeeee, 'e', fillsize);
5570
5571 for (int enabled = 0; enabled <= 1; enabled += 1) {
5572 set_subtest("deferral=%d", enabled);
5573
5574 XML_Parser parser = XML_ParserCreate(NULL);
5575 assert_true(parser != NULL);
5576 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5577 // pre-grow the buffer to avoid reparsing due to almost-fullness
5578 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5579
5580 CharData storage;
5581 CharData_Init(&storage);
5582 XML_SetUserData(parser, &storage);
5583 XML_SetStartElementHandler(parser, start_element_event_handler);
5584
5585 enum XML_Status status;
5586 // parse the start text
5587 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5588 if (status != XML_STATUS_OK) {
5589 xml_failure(parser);
5590 }
5591 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5592
5593 // ..and the start of the token
5594 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5595 if (status != XML_STATUS_OK) {
5596 xml_failure(parser);
5597 }
5598 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5599
5600 // try to parse lots of 'e', but the token isn't finished
5601 for (int c = 0; c < 100; ++c) {
5602 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5603 if (status != XML_STATUS_OK) {
5604 xml_failure(parser);
5605 }
5606 }
5607 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5608
5609 // end the <x> token.
5610 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5611 if (status != XML_STATUS_OK) {
5612 xml_failure(parser);
5613 }
5614
5615 if (enabled) {
5616 // In general, we may need to push more data to trigger a reparse attempt,
5617 // but in this test, the data is constructed to always require it.
5618 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5619 // 2x the token length should suffice; the +1 covers the start and end.
5620 for (int c = 0; c < 101; ++c) {
5621 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5622 if (status != XML_STATUS_OK) {
5623 xml_failure(parser);
5624 }
5625 }
5626 }
5627 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5628
5629 XML_ParserFree(parser);
5630 }
5631 }
5632 END_TEST
5633
5634 struct element_decl_data {
5635 XML_Parser parser;
5636 int count;
5637 };
5638
5639 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5640 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5641 UNUSED_P(name);
5642 struct element_decl_data *testdata = (struct element_decl_data *)userData;
5643 testdata->count += 1;
5644 XML_FreeContentModel(testdata->parser, model);
5645 }
5646
5647 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5648 external_inherited_parser(XML_Parser p, const XML_Char *context,
5649 const XML_Char *base, const XML_Char *systemId,
5650 const XML_Char *publicId) {
5651 UNUSED_P(base);
5652 UNUSED_P(systemId);
5653 UNUSED_P(publicId);
5654 const char *const pre = "<!ELEMENT document ANY>\n";
5655 const char *const start = "<!ELEMENT ";
5656 const char *const end = " ANY>\n";
5657 const char *const post = "<!ELEMENT xyz ANY>\n";
5658 const int enabled = *(int *)XML_GetUserData(p);
5659 char eeeeee[100];
5660 char spaces[100];
5661 const int fillsize = (int)sizeof(eeeeee);
5662 assert_true(fillsize == (int)sizeof(spaces));
5663 memset(eeeeee, 'e', fillsize);
5664 memset(spaces, ' ', fillsize);
5665
5666 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5667 assert_true(parser != NULL);
5668 // pre-grow the buffer to avoid reparsing due to almost-fullness
5669 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5670
5671 struct element_decl_data testdata;
5672 testdata.parser = parser;
5673 testdata.count = 0;
5674 XML_SetUserData(parser, &testdata);
5675 XML_SetElementDeclHandler(parser, element_decl_counter);
5676
5677 enum XML_Status status;
5678 // parse the initial text
5679 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5680 if (status != XML_STATUS_OK) {
5681 xml_failure(parser);
5682 }
5683 assert_true(testdata.count == 1); // first element should be done
5684
5685 // ..and the start of the big token
5686 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5687 if (status != XML_STATUS_OK) {
5688 xml_failure(parser);
5689 }
5690 assert_true(testdata.count == 1); // still just the first one
5691
5692 // try to parse lots of 'e', but the token isn't finished
5693 for (int c = 0; c < 100; ++c) {
5694 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5695 if (status != XML_STATUS_OK) {
5696 xml_failure(parser);
5697 }
5698 }
5699 assert_true(testdata.count == 1); // *still* just the first one
5700
5701 // end the big token.
5702 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5703 if (status != XML_STATUS_OK) {
5704 xml_failure(parser);
5705 }
5706
5707 if (enabled) {
5708 // In general, we may need to push more data to trigger a reparse attempt,
5709 // but in this test, the data is constructed to always require it.
5710 assert_true(testdata.count == 1); // or the test is incorrect
5711 // 2x the token length should suffice; the +1 covers the start and end.
5712 for (int c = 0; c < 101; ++c) {
5713 status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5714 if (status != XML_STATUS_OK) {
5715 xml_failure(parser);
5716 }
5717 }
5718 }
5719 assert_true(testdata.count == 2); // the big token should be done
5720
5721 // parse the final text
5722 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5723 if (status != XML_STATUS_OK) {
5724 xml_failure(parser);
5725 }
5726 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5727
5728 XML_ParserFree(parser);
5729 return XML_STATUS_OK;
5730 }
5731
START_TEST(test_reparse_deferral_is_inherited)5732 START_TEST(test_reparse_deferral_is_inherited) {
5733 const char *const text
5734 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5735 for (int enabled = 0; enabled <= 1; ++enabled) {
5736 set_subtest("deferral=%d", enabled);
5737
5738 XML_Parser parser = XML_ParserCreate(NULL);
5739 assert_true(parser != NULL);
5740 XML_SetUserData(parser, (void *)&enabled);
5741 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5742 // this handler creates a sub-parser and checks that its deferral behavior
5743 // is what we expected, based on the value of `enabled` (in userdata).
5744 XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5745 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5746 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5747 xml_failure(parser);
5748
5749 XML_ParserFree(parser);
5750 }
5751 }
5752 END_TEST
5753
START_TEST(test_set_reparse_deferral_on_null_parser)5754 START_TEST(test_set_reparse_deferral_on_null_parser) {
5755 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5756 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5757 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5758 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5759 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5760 == XML_FALSE);
5761 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5762 == XML_FALSE);
5763 }
5764 END_TEST
5765
START_TEST(test_set_reparse_deferral_on_the_fly)5766 START_TEST(test_set_reparse_deferral_on_the_fly) {
5767 const char *const pre = "<d><x attr='";
5768 const char *const end = "'></x>";
5769 char iiiiii[100];
5770 const int fillsize = (int)sizeof(iiiiii);
5771 memset(iiiiii, 'i', fillsize);
5772
5773 XML_Parser parser = XML_ParserCreate(NULL);
5774 assert_true(parser != NULL);
5775 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5776
5777 CharData storage;
5778 CharData_Init(&storage);
5779 XML_SetUserData(parser, &storage);
5780 XML_SetStartElementHandler(parser, start_element_event_handler);
5781
5782 enum XML_Status status;
5783 // parse the start text
5784 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5785 if (status != XML_STATUS_OK) {
5786 xml_failure(parser);
5787 }
5788 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5789
5790 // try to parse some 'i', but the token isn't finished
5791 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5792 if (status != XML_STATUS_OK) {
5793 xml_failure(parser);
5794 }
5795 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5796
5797 // end the <x> token.
5798 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5799 if (status != XML_STATUS_OK) {
5800 xml_failure(parser);
5801 }
5802 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5803
5804 // now change the heuristic setting and add *no* data
5805 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5806 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5807 status = XML_Parse(parser, "", 0, XML_FALSE);
5808 if (status != XML_STATUS_OK) {
5809 xml_failure(parser);
5810 }
5811 CharData_CheckXMLChars(&storage, XCS("dx"));
5812
5813 XML_ParserFree(parser);
5814 }
5815 END_TEST
5816
START_TEST(test_set_bad_reparse_option)5817 START_TEST(test_set_bad_reparse_option) {
5818 XML_Parser parser = XML_ParserCreate(NULL);
5819 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5820 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5821 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5822 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5823 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5824 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5825 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5826 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5827 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5828 XML_ParserFree(parser);
5829 }
5830 END_TEST
5831
5832 static size_t g_totalAlloc = 0;
5833 static size_t g_biggestAlloc = 0;
5834
5835 static void *
counting_realloc(void * ptr,size_t size)5836 counting_realloc(void *ptr, size_t size) {
5837 g_totalAlloc += size;
5838 if (size > g_biggestAlloc) {
5839 g_biggestAlloc = size;
5840 }
5841 return realloc(ptr, size);
5842 }
5843
5844 static void *
counting_malloc(size_t size)5845 counting_malloc(size_t size) {
5846 return counting_realloc(NULL, size);
5847 }
5848
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5849 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5850 if (g_chunkSize != 0) {
5851 // this test does not use SINGLE_BYTES, because it depends on very precise
5852 // buffer fills.
5853 return;
5854 }
5855 if (! g_reparseDeferralEnabledDefault) {
5856 return; // this test is irrelevant when the deferral heuristic is disabled.
5857 }
5858
5859 const int document_length = 65536;
5860 char *const document = (char *)malloc(document_length);
5861
5862 const XML_Memory_Handling_Suite memfuncs = {
5863 counting_malloc,
5864 counting_realloc,
5865 free,
5866 };
5867
5868 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5869 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5870 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5871
5872 for (const int *leading = leading_list; *leading >= 0; leading++) {
5873 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5874 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5875 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5876 *fillsize);
5877 // start by checking that the test looks reasonably valid
5878 assert_true(*leading + *bigtoken <= document_length);
5879
5880 // put 'x' everywhere; some will be overwritten by elements.
5881 memset(document, 'x', document_length);
5882 // maybe add an initial tag
5883 if (*leading) {
5884 assert_true(*leading >= 3); // or the test case is invalid
5885 memcpy(document, "<a>", 3);
5886 }
5887 // add the large token
5888 document[*leading + 0] = '<';
5889 document[*leading + 1] = 'b';
5890 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5891 document[*leading + *bigtoken - 1] = '>';
5892
5893 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5894 const int expected_elem_total = 1 + (*leading ? 1 : 0);
5895
5896 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5897 assert_true(parser != NULL);
5898
5899 CharData storage;
5900 CharData_Init(&storage);
5901 XML_SetUserData(parser, &storage);
5902 XML_SetStartElementHandler(parser, start_element_event_handler);
5903
5904 g_biggestAlloc = 0;
5905 g_totalAlloc = 0;
5906 int offset = 0;
5907 // fill data until the big token is covered (but not necessarily parsed)
5908 while (offset < *leading + *bigtoken) {
5909 assert_true(offset + *fillsize <= document_length);
5910 const enum XML_Status status
5911 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5912 if (status != XML_STATUS_OK) {
5913 xml_failure(parser);
5914 }
5915 offset += *fillsize;
5916 }
5917 // Now, check that we've had a buffer allocation that could fit the
5918 // context bytes and our big token. In order to detect a special case,
5919 // we need to know how many bytes of our big token were included in the
5920 // first push that contained _any_ bytes of the big token:
5921 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5922 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5923 // Special case: we aren't saving any context, and the whole big token
5924 // was covered by a single fill, so Expat may have parsed directly
5925 // from our input pointer, without allocating an internal buffer.
5926 } else if (*leading < XML_CONTEXT_BYTES) {
5927 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5928 } else {
5929 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5930 }
5931 // fill data until the big token is actually parsed
5932 while (storage.count < expected_elem_total) {
5933 const size_t alloc_before = g_totalAlloc;
5934 assert_true(offset + *fillsize <= document_length);
5935 const enum XML_Status status
5936 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5937 if (status != XML_STATUS_OK) {
5938 xml_failure(parser);
5939 }
5940 offset += *fillsize;
5941 // since all the bytes of the big token are already in the buffer,
5942 // the bufsize ceiling should make us finish its parsing without any
5943 // further buffer allocations. We assume that there will be no other
5944 // large allocations in this test.
5945 assert_true(g_totalAlloc - alloc_before < 4096);
5946 }
5947 // test-the-test: was our alloc even called?
5948 assert_true(g_totalAlloc > 0);
5949 // test-the-test: there shouldn't be any extra start elements
5950 assert_true(storage.count == expected_elem_total);
5951
5952 XML_ParserFree(parser);
5953 }
5954 }
5955 }
5956 free(document);
5957 }
5958 END_TEST
5959
START_TEST(test_varying_buffer_fills)5960 START_TEST(test_varying_buffer_fills) {
5961 const int KiB = 1024;
5962 const int MiB = 1024 * KiB;
5963 const int document_length = 16 * MiB;
5964 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5965
5966 if (g_chunkSize != 0) {
5967 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5968 }
5969
5970 char *const document = (char *)malloc(document_length);
5971 assert_true(document != NULL);
5972 memset(document, 'x', document_length);
5973 document[0] = '<';
5974 document[1] = 't';
5975 memset(&document[2], ' ', big - 2); // a very spacy token
5976 document[big - 1] = '>';
5977
5978 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5979 // When reparse deferral is enabled, the final (negated) value is the expected
5980 // maximum number of bytes scanned in parse attempts.
5981 const int testcases[][30] = {
5982 {8 * MiB, -8 * MiB},
5983 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5984 // zero-size fills shouldn't trigger the bypass
5985 {4 * MiB, 0, 4 * MiB, -12 * MiB},
5986 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5987 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5988 // try to hit the buffer ceiling only once (at the end)
5989 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5990 // try to hit the same buffer ceiling multiple times
5991 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5992
5993 // try to hit every ceiling, by always landing 1K shy of the buffer size
5994 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5995 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5996
5997 // try to avoid every ceiling, by always landing 1B past the buffer size
5998 // the normal 2x heuristic threshold still forces parse attempts.
5999 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6000 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6001 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6002 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6003 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6004 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6005 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6006 -(10 * MiB + 682 * KiB + 7)},
6007 // try to avoid every ceiling again, except on our last fill.
6008 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
6009 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
6010 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
6011 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
6012 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6013 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6014 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6015 -(10 * MiB + 682 * KiB + 6)},
6016
6017 // try to hit ceilings on the way multiple times
6018 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6019 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6020 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
6021 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
6022 // we'll make a parse attempt at every parse call
6023 -(45 * MiB + 12)},
6024 };
6025 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6026 for (int test_i = 0; test_i < testcount; test_i++) {
6027 const int *fillsize = testcases[test_i];
6028 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6029 fillsize[2], fillsize[3]);
6030 XML_Parser parser = XML_ParserCreate(NULL);
6031 assert_true(parser != NULL);
6032
6033 CharData storage;
6034 CharData_Init(&storage);
6035 XML_SetUserData(parser, &storage);
6036 XML_SetStartElementHandler(parser, start_element_event_handler);
6037
6038 g_bytesScanned = 0;
6039 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6040 int offset = 0;
6041 while (*fillsize >= 0) {
6042 assert_true(offset + *fillsize <= document_length); // or test is invalid
6043 const enum XML_Status status
6044 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6045 if (status != XML_STATUS_OK) {
6046 xml_failure(parser);
6047 }
6048 offset += *fillsize;
6049 fillsize++;
6050 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6051 worstcase_bytes += offset; // we might've tried to parse all pending bytes
6052 }
6053 assert_true(storage.count == 1); // the big token should've been parsed
6054 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6055 if (g_reparseDeferralEnabledDefault) {
6056 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6057 const unsigned max_bytes_scanned = -*fillsize;
6058 if (g_bytesScanned > max_bytes_scanned) {
6059 fprintf(stderr,
6060 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6061 g_bytesScanned, max_bytes_scanned);
6062 fail("too many bytes scanned in parse attempts");
6063 }
6064 }
6065 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6066
6067 XML_ParserFree(parser);
6068 }
6069 free(document);
6070 }
6071 END_TEST
6072
6073 void
make_basic_test_case(Suite * s)6074 make_basic_test_case(Suite *s) {
6075 TCase *tc_basic = tcase_create("basic tests");
6076
6077 suite_add_tcase(s, tc_basic);
6078 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6079
6080 tcase_add_test(tc_basic, test_nul_byte);
6081 tcase_add_test(tc_basic, test_u0000_char);
6082 tcase_add_test(tc_basic, test_siphash_self);
6083 tcase_add_test(tc_basic, test_siphash_spec);
6084 tcase_add_test(tc_basic, test_bom_utf8);
6085 tcase_add_test(tc_basic, test_bom_utf16_be);
6086 tcase_add_test(tc_basic, test_bom_utf16_le);
6087 tcase_add_test(tc_basic, test_nobom_utf16_le);
6088 tcase_add_test(tc_basic, test_hash_collision);
6089 tcase_add_test(tc_basic, test_illegal_utf8);
6090 tcase_add_test(tc_basic, test_utf8_auto_align);
6091 tcase_add_test(tc_basic, test_utf16);
6092 tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6093 tcase_add_test(tc_basic, test_not_utf16);
6094 tcase_add_test(tc_basic, test_bad_encoding);
6095 tcase_add_test(tc_basic, test_latin1_umlauts);
6096 tcase_add_test(tc_basic, test_long_utf8_character);
6097 tcase_add_test(tc_basic, test_long_latin1_attribute);
6098 tcase_add_test(tc_basic, test_long_ascii_attribute);
6099 /* Regression test for SF bug #491986. */
6100 tcase_add_test(tc_basic, test_danish_latin1);
6101 /* Regression test for SF bug #514281. */
6102 tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6103 tcase_add_test(tc_basic, test_french_charref_decimal);
6104 tcase_add_test(tc_basic, test_french_latin1);
6105 tcase_add_test(tc_basic, test_french_utf8);
6106 tcase_add_test(tc_basic, test_utf8_false_rejection);
6107 tcase_add_test(tc_basic, test_line_number_after_parse);
6108 tcase_add_test(tc_basic, test_column_number_after_parse);
6109 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6110 tcase_add_test(tc_basic, test_line_number_after_error);
6111 tcase_add_test(tc_basic, test_column_number_after_error);
6112 tcase_add_test(tc_basic, test_really_long_lines);
6113 tcase_add_test(tc_basic, test_really_long_encoded_lines);
6114 tcase_add_test(tc_basic, test_end_element_events);
6115 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6116 tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6117 tcase_add_test(tc_basic, test_xmldecl_misplaced);
6118 tcase_add_test(tc_basic, test_xmldecl_invalid);
6119 tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6120 tcase_add_test(tc_basic, test_xmldecl_missing_value);
6121 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6122 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6123 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6124 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6125 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6126 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6127 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6128 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6129 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6130 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6131 tcase_add_test(tc_basic,
6132 test_wfc_undeclared_entity_with_external_subset_standalone);
6133 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6134 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6135 tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6136 tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6137 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6138 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6139 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6140 tcase_add_test(tc_basic, test_dtd_attr_handling);
6141 tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6142 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6143 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6144 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6145 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6146 tcase_add_test(tc_basic, test_good_cdata_ascii);
6147 tcase_add_test(tc_basic, test_good_cdata_utf16);
6148 tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6149 tcase_add_test(tc_basic, test_long_cdata_utf16);
6150 tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6151 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6152 tcase_add_test(tc_basic, test_bad_cdata);
6153 tcase_add_test(tc_basic, test_bad_cdata_utf16);
6154 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6155 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6156 tcase_add_test(tc_basic, test_memory_allocation);
6157 tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6158 tcase_add_test(tc_basic, test_dtd_elements);
6159 tcase_add_test(tc_basic, test_dtd_elements_nesting);
6160 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6161 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6162 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6163 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6164 tcase_add_test__ifdef_xml_dtd(tc_basic,
6165 test_foreign_dtd_without_external_subset);
6166 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6167 tcase_add_test(tc_basic, test_set_base);
6168 tcase_add_test(tc_basic, test_attributes);
6169 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6170 tcase_add_test(tc_basic, test_resume_invalid_parse);
6171 tcase_add_test(tc_basic, test_resume_resuspended);
6172 tcase_add_test(tc_basic, test_cdata_default);
6173 tcase_add_test(tc_basic, test_subordinate_reset);
6174 tcase_add_test(tc_basic, test_subordinate_suspend);
6175 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6176 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6177 tcase_add_test__ifdef_xml_dtd(tc_basic,
6178 test_ext_entity_invalid_suspended_parse);
6179 tcase_add_test(tc_basic, test_explicit_encoding);
6180 tcase_add_test(tc_basic, test_trailing_cr);
6181 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6182 tcase_add_test(tc_basic, test_trailing_rsqb);
6183 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6184 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6185 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6186 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6187 tcase_add_test(tc_basic, test_empty_parse);
6188 tcase_add_test(tc_basic, test_negative_len_parse);
6189 tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6190 tcase_add_test(tc_basic, test_get_buffer_1);
6191 tcase_add_test(tc_basic, test_get_buffer_2);
6192 #if XML_CONTEXT_BYTES > 0
6193 tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6194 #endif
6195 tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6196 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6197 tcase_add_test(tc_basic, test_byte_info_at_end);
6198 tcase_add_test(tc_basic, test_byte_info_at_error);
6199 tcase_add_test(tc_basic, test_byte_info_at_cdata);
6200 tcase_add_test(tc_basic, test_predefined_entities);
6201 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6202 tcase_add_test(tc_basic, test_not_predefined_entities);
6203 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6204 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6205 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6206 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6207 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6208 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6209 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6210 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6211 tcase_add_test(tc_basic, test_bad_public_doctype);
6212 tcase_add_test(tc_basic, test_attribute_enum_value);
6213 tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6214 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6215 tcase_add_test(tc_basic, test_public_notation_no_sysid);
6216 tcase_add_test(tc_basic, test_nested_groups);
6217 tcase_add_test(tc_basic, test_group_choice);
6218 tcase_add_test(tc_basic, test_standalone_parameter_entity);
6219 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6220 tcase_add_test__ifdef_xml_dtd(tc_basic,
6221 test_recursive_external_parameter_entity);
6222 tcase_add_test__ifdef_xml_dtd(tc_basic,
6223 test_recursive_external_parameter_entity_2);
6224 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6225 tcase_add_test(tc_basic, test_suspend_xdecl);
6226 tcase_add_test(tc_basic, test_abort_epilog);
6227 tcase_add_test(tc_basic, test_abort_epilog_2);
6228 tcase_add_test(tc_basic, test_suspend_epilog);
6229 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6230 tcase_add_test(tc_basic, test_unfinished_epilog);
6231 tcase_add_test(tc_basic, test_partial_char_in_epilog);
6232 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6233 tcase_add_test__ifdef_xml_dtd(tc_basic,
6234 test_suspend_resume_internal_entity_issue_629);
6235 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6236 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6237 tcase_add_test(tc_basic, test_restart_on_error);
6238 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6239 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6240 tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6241 tcase_add_test(tc_basic, test_standalone_internal_entity);
6242 tcase_add_test(tc_basic, test_skipped_external_entity);
6243 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6244 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6245 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6246 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6247 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6248 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6249 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6250 tcase_add_test(tc_basic, test_pi_handled_in_default);
6251 tcase_add_test(tc_basic, test_comment_handled_in_default);
6252 tcase_add_test(tc_basic, test_pi_yml);
6253 tcase_add_test(tc_basic, test_pi_xnl);
6254 tcase_add_test(tc_basic, test_pi_xmm);
6255 tcase_add_test(tc_basic, test_utf16_pi);
6256 tcase_add_test(tc_basic, test_utf16_be_pi);
6257 tcase_add_test(tc_basic, test_utf16_be_comment);
6258 tcase_add_test(tc_basic, test_utf16_le_comment);
6259 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6260 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6261 tcase_add_test(tc_basic, test_unknown_encoding_success);
6262 tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6263 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6264 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6265 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6266 tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6267 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6268 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6269 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6270 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6271 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6272 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6273 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6274 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6275 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6276 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6277 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6278 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6279 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6280 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6281 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6282 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6283 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6284 tcase_add_test(tc_basic, test_utf8_in_start_tags);
6285 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6286 tcase_add_test(tc_basic, test_utf16_attribute);
6287 tcase_add_test(tc_basic, test_utf16_second_attr);
6288 tcase_add_test(tc_basic, test_attr_after_solidus);
6289 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6290 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6291 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6292 tcase_add_test(tc_basic, test_bad_doctype);
6293 tcase_add_test(tc_basic, test_bad_doctype_utf8);
6294 tcase_add_test(tc_basic, test_bad_doctype_utf16);
6295 tcase_add_test(tc_basic, test_bad_doctype_plus);
6296 tcase_add_test(tc_basic, test_bad_doctype_star);
6297 tcase_add_test(tc_basic, test_bad_doctype_query);
6298 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6299 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6300 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6301 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6302 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6303 tcase_add_test(tc_basic, test_short_doctype);
6304 tcase_add_test(tc_basic, test_short_doctype_2);
6305 tcase_add_test(tc_basic, test_short_doctype_3);
6306 tcase_add_test(tc_basic, test_long_doctype);
6307 tcase_add_test(tc_basic, test_bad_entity);
6308 tcase_add_test(tc_basic, test_bad_entity_2);
6309 tcase_add_test(tc_basic, test_bad_entity_3);
6310 tcase_add_test(tc_basic, test_bad_entity_4);
6311 tcase_add_test(tc_basic, test_bad_notation);
6312 tcase_add_test(tc_basic, test_default_doctype_handler);
6313 tcase_add_test(tc_basic, test_empty_element_abort);
6314 tcase_add_test__ifdef_xml_dtd(tc_basic,
6315 test_pool_integrity_with_unfinished_attr);
6316 tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6317 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6318 tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6319 tcase_add_test__if_xml_ge(tc_basic,
6320 test_deep_nested_entity_delayed_interpretation);
6321 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6322 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6323 tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6324 tcase_add_test(tc_basic, test_set_reparse_deferral);
6325 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6326 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6327 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6328 tcase_add_test(tc_basic, test_set_bad_reparse_option);
6329 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6330 tcase_add_test(tc_basic, test_varying_buffer_fills);
6331 }
6332