1 /* 8539b9040d9d901366a62560a064af7cb99811335784b363abc039c5b0ebc416 (2.4.1+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2018 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Licensed under the MIT license:
36
37 Permission is hereby granted, free of charge, to any person obtaining
38 a copy of this software and associated documentation files (the
39 "Software"), to deal in the Software without restriction, including
40 without limitation the rights to use, copy, modify, merge, publish,
41 distribute, sublicense, and/or sell copies of the Software, and to permit
42 persons to whom the Software is furnished to do so, subject to the
43 following conditions:
44
45 The above copyright notice and this permission notice shall be included
46 in all copies or substantial portions of the Software.
47
48 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
49 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
50 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
51 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
52 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
53 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
54 USE OR OTHER DEALINGS IN THE SOFTWARE.
55 */
56
57 #if ! defined(_GNU_SOURCE)
58 # define _GNU_SOURCE 1 /* syscall prototype */
59 #endif
60
61 #ifdef _WIN32
62 /* force stdlib to define rand_s() */
63 # if ! defined(_CRT_RAND_S)
64 # define _CRT_RAND_S
65 # endif
66 #endif
67
68 #include <stddef.h>
69 #include <string.h> /* memset(), memcpy() */
70 #include <assert.h>
71 #include <limits.h> /* UINT_MAX */
72 #include <stdio.h> /* fprintf */
73 #include <stdlib.h> /* getenv, rand_s */
74 #include <stdint.h> /* uintptr_t */
75 #include <math.h> /* isnan */
76
77 #ifdef _WIN32
78 # define getpid GetCurrentProcessId
79 #else
80 # include <sys/time.h> /* gettimeofday() */
81 # include <sys/types.h> /* getpid() */
82 # include <unistd.h> /* getpid() */
83 # include <fcntl.h> /* O_RDONLY */
84 # include <errno.h>
85 #endif
86
87 #define XML_BUILDING_EXPAT 1
88
89 #ifdef _WIN32
90 # include "winconfig.h"
91 #elif defined(HAVE_EXPAT_CONFIG_H)
92 # include <expat_config.h>
93 #endif /* ndef _WIN32 */
94
95 #include "ascii.h"
96 #include "expat.h"
97 #include "siphash.h"
98
99 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
100 # if defined(HAVE_GETRANDOM)
101 # include <sys/random.h> /* getrandom */
102 # else
103 # include <unistd.h> /* syscall */
104 # include <sys/syscall.h> /* SYS_getrandom */
105 # endif
106 # if ! defined(GRND_NONBLOCK)
107 # define GRND_NONBLOCK 0x0001
108 # endif /* defined(GRND_NONBLOCK) */
109 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
110
111 #if defined(HAVE_LIBBSD) \
112 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
113 # include <bsd/stdlib.h>
114 #endif
115
116 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
117 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
118 #endif
119
120 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
121 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
122 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
123 && ! defined(XML_POOR_ENTROPY)
124 # error You do not have support for any sources of high quality entropy \
125 enabled. For end user security, that is probably not what you want. \
126 \
127 Your options include: \
128 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
129 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
130 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
131 * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
132 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
133 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
134 * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
135 * Windows >=Vista (rand_s): _WIN32. \
136 \
137 If insist on not using any of these, bypass this error by defining \
138 XML_POOR_ENTROPY; you have been warned. \
139 \
140 If you have reasons to patch this detection code away or need changes \
141 to the build system, please open a bug. Thank you!
142 #endif
143
144 #ifdef XML_UNICODE
145 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
146 # define XmlConvert XmlUtf16Convert
147 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
148 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
149 # define XmlEncode XmlUtf16Encode
150 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
151 typedef unsigned short ICHAR;
152 #else
153 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
154 # define XmlConvert XmlUtf8Convert
155 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
156 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
157 # define XmlEncode XmlUtf8Encode
158 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
159 typedef char ICHAR;
160 #endif
161
162 #ifndef XML_NS
163
164 # define XmlInitEncodingNS XmlInitEncoding
165 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
166 # undef XmlGetInternalEncodingNS
167 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
168 # define XmlParseXmlDeclNS XmlParseXmlDecl
169
170 #endif
171
172 #ifdef XML_UNICODE
173
174 # ifdef XML_UNICODE_WCHAR_T
175 # define XML_T(x) (const wchar_t) x
176 # define XML_L(x) L##x
177 # else
178 # define XML_T(x) (const unsigned short)x
179 # define XML_L(x) x
180 # endif
181
182 #else
183
184 # define XML_T(x) x
185 # define XML_L(x) x
186
187 #endif
188
189 /* Round up n to be a multiple of sz, where sz is a power of 2. */
190 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
191
192 /* Do safe (NULL-aware) pointer arithmetic */
193 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
194
195 #include "internal.h"
196 #include "xmltok.h"
197 #include "xmlrole.h"
198
199 typedef const XML_Char *KEY;
200
201 typedef struct {
202 KEY name;
203 } NAMED;
204
205 typedef struct {
206 NAMED **v;
207 unsigned char power;
208 size_t size;
209 size_t used;
210 const XML_Memory_Handling_Suite *mem;
211 } HASH_TABLE;
212
213 static size_t keylen(KEY s);
214
215 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
216
217 /* For probing (after a collision) we need a step size relative prime
218 to the hash table size, which is a power of 2. We use double-hashing,
219 since we can calculate a second hash value cheaply by taking those bits
220 of the first hash value that were discarded (masked out) when the table
221 index was calculated: index = hash & mask, where mask = table->size - 1.
222 We limit the maximum step size to table->size / 4 (mask >> 2) and make
223 it odd, since odd numbers are always relative prime to a power of 2.
224 */
225 #define SECOND_HASH(hash, mask, power) \
226 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
227 #define PROBE_STEP(hash, mask, power) \
228 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
229
230 typedef struct {
231 NAMED **p;
232 NAMED **end;
233 } HASH_TABLE_ITER;
234
235 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
236 #define INIT_DATA_BUF_SIZE 1024
237 #define INIT_ATTS_SIZE 16
238 #define INIT_ATTS_VERSION 0xFFFFFFFF
239 #define INIT_BLOCK_SIZE 1024
240 #define INIT_BUFFER_SIZE 1024
241
242 #define EXPAND_SPARE 24
243
244 typedef struct binding {
245 struct prefix *prefix;
246 struct binding *nextTagBinding;
247 struct binding *prevPrefixBinding;
248 const struct attribute_id *attId;
249 XML_Char *uri;
250 int uriLen;
251 int uriAlloc;
252 } BINDING;
253
254 typedef struct prefix {
255 const XML_Char *name;
256 BINDING *binding;
257 } PREFIX;
258
259 typedef struct {
260 const XML_Char *str;
261 const XML_Char *localPart;
262 const XML_Char *prefix;
263 int strLen;
264 int uriLen;
265 int prefixLen;
266 } TAG_NAME;
267
268 /* TAG represents an open element.
269 The name of the element is stored in both the document and API
270 encodings. The memory buffer 'buf' is a separately-allocated
271 memory area which stores the name. During the XML_Parse()/
272 XMLParseBuffer() when the element is open, the memory for the 'raw'
273 version of the name (in the document encoding) is shared with the
274 document buffer. If the element is open across calls to
275 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
276 contain the 'raw' name as well.
277
278 A parser re-uses these structures, maintaining a list of allocated
279 TAG objects in a free list.
280 */
281 typedef struct tag {
282 struct tag *parent; /* parent of this element */
283 const char *rawName; /* tagName in the original encoding */
284 int rawNameLength;
285 TAG_NAME name; /* tagName in the API encoding */
286 char *buf; /* buffer for name components */
287 char *bufEnd; /* end of the buffer */
288 BINDING *bindings;
289 } TAG;
290
291 typedef struct {
292 const XML_Char *name;
293 const XML_Char *textPtr;
294 int textLen; /* length in XML_Chars */
295 int processed; /* # of processed bytes - when suspended */
296 const XML_Char *systemId;
297 const XML_Char *base;
298 const XML_Char *publicId;
299 const XML_Char *notation;
300 XML_Bool open;
301 XML_Bool is_param;
302 XML_Bool is_internal; /* true if declared in internal subset outside PE */
303 } ENTITY;
304
305 typedef struct {
306 enum XML_Content_Type type;
307 enum XML_Content_Quant quant;
308 const XML_Char *name;
309 int firstchild;
310 int lastchild;
311 int childcnt;
312 int nextsib;
313 } CONTENT_SCAFFOLD;
314
315 #define INIT_SCAFFOLD_ELEMENTS 32
316
317 typedef struct block {
318 struct block *next;
319 int size;
320 XML_Char s[1];
321 } BLOCK;
322
323 typedef struct {
324 BLOCK *blocks;
325 BLOCK *freeBlocks;
326 const XML_Char *end;
327 XML_Char *ptr;
328 XML_Char *start;
329 const XML_Memory_Handling_Suite *mem;
330 } STRING_POOL;
331
332 /* The XML_Char before the name is used to determine whether
333 an attribute has been specified. */
334 typedef struct attribute_id {
335 XML_Char *name;
336 PREFIX *prefix;
337 XML_Bool maybeTokenized;
338 XML_Bool xmlns;
339 } ATTRIBUTE_ID;
340
341 typedef struct {
342 const ATTRIBUTE_ID *id;
343 XML_Bool isCdata;
344 const XML_Char *value;
345 } DEFAULT_ATTRIBUTE;
346
347 typedef struct {
348 unsigned long version;
349 unsigned long hash;
350 const XML_Char *uriName;
351 } NS_ATT;
352
353 typedef struct {
354 const XML_Char *name;
355 PREFIX *prefix;
356 const ATTRIBUTE_ID *idAtt;
357 int nDefaultAtts;
358 int allocDefaultAtts;
359 DEFAULT_ATTRIBUTE *defaultAtts;
360 } ELEMENT_TYPE;
361
362 typedef struct {
363 HASH_TABLE generalEntities;
364 HASH_TABLE elementTypes;
365 HASH_TABLE attributeIds;
366 HASH_TABLE prefixes;
367 STRING_POOL pool;
368 STRING_POOL entityValuePool;
369 /* false once a parameter entity reference has been skipped */
370 XML_Bool keepProcessing;
371 /* true once an internal or external PE reference has been encountered;
372 this includes the reference to an external subset */
373 XML_Bool hasParamEntityRefs;
374 XML_Bool standalone;
375 #ifdef XML_DTD
376 /* indicates if external PE has been read */
377 XML_Bool paramEntityRead;
378 HASH_TABLE paramEntities;
379 #endif /* XML_DTD */
380 PREFIX defaultPrefix;
381 /* === scaffolding for building content model === */
382 XML_Bool in_eldecl;
383 CONTENT_SCAFFOLD *scaffold;
384 unsigned contentStringLen;
385 unsigned scaffSize;
386 unsigned scaffCount;
387 int scaffLevel;
388 int *scaffIndex;
389 } DTD;
390
391 typedef struct open_internal_entity {
392 const char *internalEventPtr;
393 const char *internalEventEndPtr;
394 struct open_internal_entity *next;
395 ENTITY *entity;
396 int startTagLevel;
397 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
398 } OPEN_INTERNAL_ENTITY;
399
400 enum XML_Account {
401 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
402 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
403 expansion */
404 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
405 };
406
407 #ifdef XML_DTD
408 typedef unsigned long long XmlBigCount;
409 typedef struct accounting {
410 XmlBigCount countBytesDirect;
411 XmlBigCount countBytesIndirect;
412 int debugLevel;
413 float maximumAmplificationFactor; // >=1.0
414 unsigned long long activationThresholdBytes;
415 } ACCOUNTING;
416
417 typedef struct entity_stats {
418 unsigned int countEverOpened;
419 unsigned int currentDepth;
420 unsigned int maximumDepthSeen;
421 int debugLevel;
422 } ENTITY_STATS;
423 #endif /* XML_DTD */
424
425 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
426 const char *end, const char **endPtr);
427
428 static Processor prologProcessor;
429 static Processor prologInitProcessor;
430 static Processor contentProcessor;
431 static Processor cdataSectionProcessor;
432 #ifdef XML_DTD
433 static Processor ignoreSectionProcessor;
434 static Processor externalParEntProcessor;
435 static Processor externalParEntInitProcessor;
436 static Processor entityValueProcessor;
437 static Processor entityValueInitProcessor;
438 #endif /* XML_DTD */
439 static Processor epilogProcessor;
440 static Processor errorProcessor;
441 static Processor externalEntityInitProcessor;
442 static Processor externalEntityInitProcessor2;
443 static Processor externalEntityInitProcessor3;
444 static Processor externalEntityContentProcessor;
445 static Processor internalEntityProcessor;
446
447 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
448 const XML_Char *encodingName);
449 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
450 const char *s, const char *next);
451 static enum XML_Error initializeEncoding(XML_Parser parser);
452 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
453 const char *s, const char *end, int tok,
454 const char *next, const char **nextPtr,
455 XML_Bool haveMore, XML_Bool allowClosingDoctype,
456 enum XML_Account account);
457 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
458 XML_Bool betweenDecl);
459 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
460 const ENCODING *enc, const char *start,
461 const char *end, const char **endPtr,
462 XML_Bool haveMore, enum XML_Account account);
463 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
464 const char **startPtr, const char *end,
465 const char **nextPtr, XML_Bool haveMore,
466 enum XML_Account account);
467 #ifdef XML_DTD
468 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
469 const char **startPtr, const char *end,
470 const char **nextPtr, XML_Bool haveMore);
471 #endif /* XML_DTD */
472
473 static void freeBindings(XML_Parser parser, BINDING *bindings);
474 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
475 const char *s, TAG_NAME *tagNamePtr,
476 BINDING **bindingsPtr,
477 enum XML_Account account);
478 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
479 const ATTRIBUTE_ID *attId, const XML_Char *uri,
480 BINDING **bindingsPtr);
481 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
482 XML_Bool isId, const XML_Char *dfltValue,
483 XML_Parser parser);
484 static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
485 XML_Bool isCdata, const char *,
486 const char *, STRING_POOL *,
487 enum XML_Account account);
488 static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
489 XML_Bool isCdata, const char *,
490 const char *, STRING_POOL *,
491 enum XML_Account account);
492 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
493 const char *start, const char *end);
494 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
495 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
496 const char *start, const char *end,
497 enum XML_Account account);
498 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
499 const char *start, const char *end);
500 static int reportComment(XML_Parser parser, const ENCODING *enc,
501 const char *start, const char *end);
502 static void reportDefault(XML_Parser parser, const ENCODING *enc,
503 const char *start, const char *end);
504
505 static const XML_Char *getContext(XML_Parser parser);
506 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
507
508 static void FASTCALL normalizePublicId(XML_Char *s);
509
510 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
511 /* do not call if m_parentParser != NULL */
512 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
513 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
514 const XML_Memory_Handling_Suite *ms);
515 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
516 const XML_Memory_Handling_Suite *ms);
517 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
518 const HASH_TABLE *);
519 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
520 size_t createSize);
521 static void FASTCALL hashTableInit(HASH_TABLE *,
522 const XML_Memory_Handling_Suite *ms);
523 static void FASTCALL hashTableClear(HASH_TABLE *);
524 static void FASTCALL hashTableDestroy(HASH_TABLE *);
525 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
526 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
527
528 static void FASTCALL poolInit(STRING_POOL *,
529 const XML_Memory_Handling_Suite *ms);
530 static void FASTCALL poolClear(STRING_POOL *);
531 static void FASTCALL poolDestroy(STRING_POOL *);
532 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
533 const char *ptr, const char *end);
534 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
535 const char *ptr, const char *end);
536 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
537 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
538 const XML_Char *s);
539 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
540 int n);
541 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
542 const XML_Char *s);
543
544 static int FASTCALL nextScaffoldPart(XML_Parser parser);
545 static XML_Content *build_model(XML_Parser parser);
546 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
547 const char *ptr, const char *end);
548
549 static XML_Char *copyString(const XML_Char *s,
550 const XML_Memory_Handling_Suite *memsuite);
551
552 static unsigned long generate_hash_secret_salt(XML_Parser parser);
553 static XML_Bool startParsing(XML_Parser parser);
554
555 static XML_Parser parserCreate(const XML_Char *encodingName,
556 const XML_Memory_Handling_Suite *memsuite,
557 const XML_Char *nameSep, DTD *dtd);
558
559 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
560
561 #ifdef XML_DTD
562 static float accountingGetCurrentAmplification(XML_Parser rootParser);
563 static void accountingReportStats(XML_Parser originParser, const char *epilog);
564 static void accountingOnAbort(XML_Parser originParser);
565 static void accountingReportDiff(XML_Parser rootParser,
566 unsigned int levelsAwayFromRootParser,
567 const char *before, const char *after,
568 ptrdiff_t bytesMore, int source_line,
569 enum XML_Account account);
570 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
571 const char *before, const char *after,
572 int source_line,
573 enum XML_Account account);
574
575 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
576 const char *action, int sourceLine);
577 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
578 int sourceLine);
579 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
580 int sourceLine);
581
582 static XML_Parser getRootParserOf(XML_Parser parser,
583 unsigned int *outLevelDiff);
584 #endif /* XML_DTD */
585
586 static unsigned long getDebugLevel(const char *variableName,
587 unsigned long defaultDebugLevel);
588
589 #define poolStart(pool) ((pool)->start)
590 #define poolEnd(pool) ((pool)->ptr)
591 #define poolLength(pool) ((pool)->ptr - (pool)->start)
592 #define poolChop(pool) ((void)--(pool->ptr))
593 #define poolLastChar(pool) (((pool)->ptr)[-1])
594 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
595 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
596 #define poolAppendChar(pool, c) \
597 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
598 ? 0 \
599 : ((*((pool)->ptr)++ = c), 1))
600
601 struct XML_ParserStruct {
602 /* The first member must be m_userData so that the XML_GetUserData
603 macro works. */
604 void *m_userData;
605 void *m_handlerArg;
606 char *m_buffer;
607 const XML_Memory_Handling_Suite m_mem;
608 /* first character to be parsed */
609 const char *m_bufferPtr;
610 /* past last character to be parsed */
611 char *m_bufferEnd;
612 /* allocated end of m_buffer */
613 const char *m_bufferLim;
614 XML_Index m_parseEndByteIndex;
615 const char *m_parseEndPtr;
616 XML_Char *m_dataBuf;
617 XML_Char *m_dataBufEnd;
618 XML_StartElementHandler m_startElementHandler;
619 XML_EndElementHandler m_endElementHandler;
620 XML_CharacterDataHandler m_characterDataHandler;
621 XML_ProcessingInstructionHandler m_processingInstructionHandler;
622 XML_CommentHandler m_commentHandler;
623 XML_StartCdataSectionHandler m_startCdataSectionHandler;
624 XML_EndCdataSectionHandler m_endCdataSectionHandler;
625 XML_DefaultHandler m_defaultHandler;
626 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
627 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
628 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
629 XML_NotationDeclHandler m_notationDeclHandler;
630 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
631 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
632 XML_NotStandaloneHandler m_notStandaloneHandler;
633 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
634 XML_Parser m_externalEntityRefHandlerArg;
635 XML_SkippedEntityHandler m_skippedEntityHandler;
636 XML_UnknownEncodingHandler m_unknownEncodingHandler;
637 XML_ElementDeclHandler m_elementDeclHandler;
638 XML_AttlistDeclHandler m_attlistDeclHandler;
639 XML_EntityDeclHandler m_entityDeclHandler;
640 XML_XmlDeclHandler m_xmlDeclHandler;
641 const ENCODING *m_encoding;
642 INIT_ENCODING m_initEncoding;
643 const ENCODING *m_internalEncoding;
644 const XML_Char *m_protocolEncodingName;
645 XML_Bool m_ns;
646 XML_Bool m_ns_triplets;
647 void *m_unknownEncodingMem;
648 void *m_unknownEncodingData;
649 void *m_unknownEncodingHandlerData;
650 void(XMLCALL *m_unknownEncodingRelease)(void *);
651 PROLOG_STATE m_prologState;
652 Processor *m_processor;
653 enum XML_Error m_errorCode;
654 const char *m_eventPtr;
655 const char *m_eventEndPtr;
656 const char *m_positionPtr;
657 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
658 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
659 XML_Bool m_defaultExpandInternalEntities;
660 int m_tagLevel;
661 ENTITY *m_declEntity;
662 const XML_Char *m_doctypeName;
663 const XML_Char *m_doctypeSysid;
664 const XML_Char *m_doctypePubid;
665 const XML_Char *m_declAttributeType;
666 const XML_Char *m_declNotationName;
667 const XML_Char *m_declNotationPublicId;
668 ELEMENT_TYPE *m_declElementType;
669 ATTRIBUTE_ID *m_declAttributeId;
670 XML_Bool m_declAttributeIsCdata;
671 XML_Bool m_declAttributeIsId;
672 DTD *m_dtd;
673 const XML_Char *m_curBase;
674 TAG *m_tagStack;
675 TAG *m_freeTagList;
676 BINDING *m_inheritedBindings;
677 BINDING *m_freeBindingList;
678 int m_attsSize;
679 int m_nSpecifiedAtts;
680 int m_idAttIndex;
681 ATTRIBUTE *m_atts;
682 NS_ATT *m_nsAtts;
683 unsigned long m_nsAttsVersion;
684 unsigned char m_nsAttsPower;
685 #ifdef XML_ATTR_INFO
686 XML_AttrInfo *m_attInfo;
687 #endif
688 POSITION m_position;
689 STRING_POOL m_tempPool;
690 STRING_POOL m_temp2Pool;
691 char *m_groupConnector;
692 unsigned int m_groupSize;
693 XML_Char m_namespaceSeparator;
694 XML_Parser m_parentParser;
695 XML_ParsingStatus m_parsingStatus;
696 #ifdef XML_DTD
697 XML_Bool m_isParamEntity;
698 XML_Bool m_useForeignDTD;
699 enum XML_ParamEntityParsing m_paramEntityParsing;
700 #endif
701 unsigned long m_hash_secret_salt;
702 #ifdef XML_DTD
703 ACCOUNTING m_accounting;
704 ENTITY_STATS m_entity_stats;
705 #endif
706 };
707
708 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
709 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
710 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
711
712 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)713 XML_ParserCreate(const XML_Char *encodingName) {
714 return XML_ParserCreate_MM(encodingName, NULL, NULL);
715 }
716
717 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)718 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
719 XML_Char tmp[2] = {nsSep, 0};
720 return XML_ParserCreate_MM(encodingName, NULL, tmp);
721 }
722
723 static const XML_Char implicitContext[]
724 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
725 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
726 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
727 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
728 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
729 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
730 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
731 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
732 '\0'};
733
734 /* To avoid warnings about unused functions: */
735 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
736
737 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
738
739 /* Obtain entropy on Linux 3.17+ */
740 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)741 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
742 int success = 0; /* full count bytes written? */
743 size_t bytesWrittenTotal = 0;
744 const unsigned int getrandomFlags = GRND_NONBLOCK;
745
746 do {
747 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
748 const size_t bytesToWrite = count - bytesWrittenTotal;
749
750 const int bytesWrittenMore =
751 # if defined(HAVE_GETRANDOM)
752 getrandom(currentTarget, bytesToWrite, getrandomFlags);
753 # else
754 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
755 # endif
756
757 if (bytesWrittenMore > 0) {
758 bytesWrittenTotal += bytesWrittenMore;
759 if (bytesWrittenTotal >= count)
760 success = 1;
761 }
762 } while (! success && (errno == EINTR));
763
764 return success;
765 }
766
767 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
768
769 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
770
771 /* Extract entropy from /dev/urandom */
772 static int
writeRandomBytes_dev_urandom(void * target,size_t count)773 writeRandomBytes_dev_urandom(void *target, size_t count) {
774 int success = 0; /* full count bytes written? */
775 size_t bytesWrittenTotal = 0;
776
777 const int fd = open("/dev/urandom", O_RDONLY);
778 if (fd < 0) {
779 return 0;
780 }
781
782 do {
783 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
784 const size_t bytesToWrite = count - bytesWrittenTotal;
785
786 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
787
788 if (bytesWrittenMore > 0) {
789 bytesWrittenTotal += bytesWrittenMore;
790 if (bytesWrittenTotal >= count)
791 success = 1;
792 }
793 } while (! success && (errno == EINTR));
794
795 close(fd);
796 return success;
797 }
798
799 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
800
801 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
802
803 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
804
805 static void
writeRandomBytes_arc4random(void * target,size_t count)806 writeRandomBytes_arc4random(void *target, size_t count) {
807 size_t bytesWrittenTotal = 0;
808
809 while (bytesWrittenTotal < count) {
810 const uint32_t random32 = arc4random();
811 size_t i = 0;
812
813 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
814 i++, bytesWrittenTotal++) {
815 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
816 ((uint8_t *)target)[bytesWrittenTotal] = random8;
817 }
818 }
819 }
820
821 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
822
823 #ifdef _WIN32
824
825 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
826 as it didn't declare it in its header prior to version 5.3.0 of its
827 runtime package (mingwrt, containing stdlib.h). The upstream fix
828 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
829 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
830 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
831 __declspec(dllimport) int rand_s(unsigned int *);
832 # endif
833
834 /* Obtain entropy on Windows using the rand_s() function which
835 * generates cryptographically secure random numbers. Internally it
836 * uses RtlGenRandom API which is present in Windows XP and later.
837 */
838 static int
writeRandomBytes_rand_s(void * target,size_t count)839 writeRandomBytes_rand_s(void *target, size_t count) {
840 size_t bytesWrittenTotal = 0;
841
842 while (bytesWrittenTotal < count) {
843 unsigned int random32 = 0;
844 size_t i = 0;
845
846 if (rand_s(&random32))
847 return 0; /* failure */
848
849 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
850 i++, bytesWrittenTotal++) {
851 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
852 ((uint8_t *)target)[bytesWrittenTotal] = random8;
853 }
854 }
855 return 1; /* success */
856 }
857
858 #endif /* _WIN32 */
859
860 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
861
862 static unsigned long
gather_time_entropy(void)863 gather_time_entropy(void) {
864 # ifdef _WIN32
865 FILETIME ft;
866 GetSystemTimeAsFileTime(&ft); /* never fails */
867 return ft.dwHighDateTime ^ ft.dwLowDateTime;
868 # else
869 struct timeval tv;
870 int gettimeofday_res;
871
872 gettimeofday_res = gettimeofday(&tv, NULL);
873
874 # if defined(NDEBUG)
875 (void)gettimeofday_res;
876 # else
877 assert(gettimeofday_res == 0);
878 # endif /* defined(NDEBUG) */
879
880 /* Microseconds time is <20 bits entropy */
881 return tv.tv_usec;
882 # endif
883 }
884
885 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
886
887 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)888 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
889 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
890 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
891 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
892 }
893 return entropy;
894 }
895
896 static unsigned long
generate_hash_secret_salt(XML_Parser parser)897 generate_hash_secret_salt(XML_Parser parser) {
898 unsigned long entropy;
899 (void)parser;
900
901 /* "Failproof" high quality providers: */
902 #if defined(HAVE_ARC4RANDOM_BUF)
903 arc4random_buf(&entropy, sizeof(entropy));
904 return ENTROPY_DEBUG("arc4random_buf", entropy);
905 #elif defined(HAVE_ARC4RANDOM)
906 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
907 return ENTROPY_DEBUG("arc4random", entropy);
908 #else
909 /* Try high quality providers first .. */
910 # ifdef _WIN32
911 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
912 return ENTROPY_DEBUG("rand_s", entropy);
913 }
914 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
915 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
916 return ENTROPY_DEBUG("getrandom", entropy);
917 }
918 # endif
919 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
920 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
921 return ENTROPY_DEBUG("/dev/urandom", entropy);
922 }
923 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
924 /* .. and self-made low quality for backup: */
925
926 /* Process ID is 0 bits entropy if attacker has local access */
927 entropy = gather_time_entropy() ^ getpid();
928
929 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
930 if (sizeof(unsigned long) == 4) {
931 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
932 } else {
933 return ENTROPY_DEBUG("fallback(8)",
934 entropy * (unsigned long)2305843009213693951ULL);
935 }
936 #endif
937 }
938
939 static unsigned long
get_hash_secret_salt(XML_Parser parser)940 get_hash_secret_salt(XML_Parser parser) {
941 if (parser->m_parentParser != NULL)
942 return get_hash_secret_salt(parser->m_parentParser);
943 return parser->m_hash_secret_salt;
944 }
945
946 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)947 startParsing(XML_Parser parser) {
948 /* hash functions must be initialized before setContext() is called */
949 if (parser->m_hash_secret_salt == 0)
950 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
951 if (parser->m_ns) {
952 /* implicit context only set for root parser, since child
953 parsers (i.e. external entity parsers) will inherit it
954 */
955 return setContext(parser, implicitContext);
956 }
957 return XML_TRUE;
958 }
959
960 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)961 XML_ParserCreate_MM(const XML_Char *encodingName,
962 const XML_Memory_Handling_Suite *memsuite,
963 const XML_Char *nameSep) {
964 return parserCreate(encodingName, memsuite, nameSep, NULL);
965 }
966
967 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)968 parserCreate(const XML_Char *encodingName,
969 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
970 DTD *dtd) {
971 XML_Parser parser;
972
973 if (memsuite) {
974 XML_Memory_Handling_Suite *mtemp;
975 parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
976 if (parser != NULL) {
977 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
978 mtemp->malloc_fcn = memsuite->malloc_fcn;
979 mtemp->realloc_fcn = memsuite->realloc_fcn;
980 mtemp->free_fcn = memsuite->free_fcn;
981 }
982 } else {
983 XML_Memory_Handling_Suite *mtemp;
984 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
985 if (parser != NULL) {
986 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
987 mtemp->malloc_fcn = malloc;
988 mtemp->realloc_fcn = realloc;
989 mtemp->free_fcn = free;
990 }
991 }
992
993 if (! parser)
994 return parser;
995
996 parser->m_buffer = NULL;
997 parser->m_bufferLim = NULL;
998
999 parser->m_attsSize = INIT_ATTS_SIZE;
1000 parser->m_atts
1001 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1002 if (parser->m_atts == NULL) {
1003 FREE(parser, parser);
1004 return NULL;
1005 }
1006 #ifdef XML_ATTR_INFO
1007 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1008 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1009 if (parser->m_attInfo == NULL) {
1010 FREE(parser, parser->m_atts);
1011 FREE(parser, parser);
1012 return NULL;
1013 }
1014 #endif
1015 parser->m_dataBuf
1016 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1017 if (parser->m_dataBuf == NULL) {
1018 FREE(parser, parser->m_atts);
1019 #ifdef XML_ATTR_INFO
1020 FREE(parser, parser->m_attInfo);
1021 #endif
1022 FREE(parser, parser);
1023 return NULL;
1024 }
1025 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1026
1027 if (dtd)
1028 parser->m_dtd = dtd;
1029 else {
1030 parser->m_dtd = dtdCreate(&parser->m_mem);
1031 if (parser->m_dtd == NULL) {
1032 FREE(parser, parser->m_dataBuf);
1033 FREE(parser, parser->m_atts);
1034 #ifdef XML_ATTR_INFO
1035 FREE(parser, parser->m_attInfo);
1036 #endif
1037 FREE(parser, parser);
1038 return NULL;
1039 }
1040 }
1041
1042 parser->m_freeBindingList = NULL;
1043 parser->m_freeTagList = NULL;
1044 parser->m_freeInternalEntities = NULL;
1045
1046 parser->m_groupSize = 0;
1047 parser->m_groupConnector = NULL;
1048
1049 parser->m_unknownEncodingHandler = NULL;
1050 parser->m_unknownEncodingHandlerData = NULL;
1051
1052 parser->m_namespaceSeparator = ASCII_EXCL;
1053 parser->m_ns = XML_FALSE;
1054 parser->m_ns_triplets = XML_FALSE;
1055
1056 parser->m_nsAtts = NULL;
1057 parser->m_nsAttsVersion = 0;
1058 parser->m_nsAttsPower = 0;
1059
1060 parser->m_protocolEncodingName = NULL;
1061
1062 poolInit(&parser->m_tempPool, &(parser->m_mem));
1063 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1064 parserInit(parser, encodingName);
1065
1066 if (encodingName && ! parser->m_protocolEncodingName) {
1067 if (dtd) {
1068 // We need to stop the upcoming call to XML_ParserFree from happily
1069 // destroying parser->m_dtd because the DTD is shared with the parent
1070 // parser and the only guard that keeps XML_ParserFree from destroying
1071 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1072 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1073 parser->m_dtd = NULL;
1074 }
1075 XML_ParserFree(parser);
1076 return NULL;
1077 }
1078
1079 if (nameSep) {
1080 parser->m_ns = XML_TRUE;
1081 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1082 parser->m_namespaceSeparator = *nameSep;
1083 } else {
1084 parser->m_internalEncoding = XmlGetInternalEncoding();
1085 }
1086
1087 return parser;
1088 }
1089
1090 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1091 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1092 parser->m_processor = prologInitProcessor;
1093 XmlPrologStateInit(&parser->m_prologState);
1094 if (encodingName != NULL) {
1095 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1096 }
1097 parser->m_curBase = NULL;
1098 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1099 parser->m_userData = NULL;
1100 parser->m_handlerArg = NULL;
1101 parser->m_startElementHandler = NULL;
1102 parser->m_endElementHandler = NULL;
1103 parser->m_characterDataHandler = NULL;
1104 parser->m_processingInstructionHandler = NULL;
1105 parser->m_commentHandler = NULL;
1106 parser->m_startCdataSectionHandler = NULL;
1107 parser->m_endCdataSectionHandler = NULL;
1108 parser->m_defaultHandler = NULL;
1109 parser->m_startDoctypeDeclHandler = NULL;
1110 parser->m_endDoctypeDeclHandler = NULL;
1111 parser->m_unparsedEntityDeclHandler = NULL;
1112 parser->m_notationDeclHandler = NULL;
1113 parser->m_startNamespaceDeclHandler = NULL;
1114 parser->m_endNamespaceDeclHandler = NULL;
1115 parser->m_notStandaloneHandler = NULL;
1116 parser->m_externalEntityRefHandler = NULL;
1117 parser->m_externalEntityRefHandlerArg = parser;
1118 parser->m_skippedEntityHandler = NULL;
1119 parser->m_elementDeclHandler = NULL;
1120 parser->m_attlistDeclHandler = NULL;
1121 parser->m_entityDeclHandler = NULL;
1122 parser->m_xmlDeclHandler = NULL;
1123 parser->m_bufferPtr = parser->m_buffer;
1124 parser->m_bufferEnd = parser->m_buffer;
1125 parser->m_parseEndByteIndex = 0;
1126 parser->m_parseEndPtr = NULL;
1127 parser->m_declElementType = NULL;
1128 parser->m_declAttributeId = NULL;
1129 parser->m_declEntity = NULL;
1130 parser->m_doctypeName = NULL;
1131 parser->m_doctypeSysid = NULL;
1132 parser->m_doctypePubid = NULL;
1133 parser->m_declAttributeType = NULL;
1134 parser->m_declNotationName = NULL;
1135 parser->m_declNotationPublicId = NULL;
1136 parser->m_declAttributeIsCdata = XML_FALSE;
1137 parser->m_declAttributeIsId = XML_FALSE;
1138 memset(&parser->m_position, 0, sizeof(POSITION));
1139 parser->m_errorCode = XML_ERROR_NONE;
1140 parser->m_eventPtr = NULL;
1141 parser->m_eventEndPtr = NULL;
1142 parser->m_positionPtr = NULL;
1143 parser->m_openInternalEntities = NULL;
1144 parser->m_defaultExpandInternalEntities = XML_TRUE;
1145 parser->m_tagLevel = 0;
1146 parser->m_tagStack = NULL;
1147 parser->m_inheritedBindings = NULL;
1148 parser->m_nSpecifiedAtts = 0;
1149 parser->m_unknownEncodingMem = NULL;
1150 parser->m_unknownEncodingRelease = NULL;
1151 parser->m_unknownEncodingData = NULL;
1152 parser->m_parentParser = NULL;
1153 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1154 #ifdef XML_DTD
1155 parser->m_isParamEntity = XML_FALSE;
1156 parser->m_useForeignDTD = XML_FALSE;
1157 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1158 #endif
1159 parser->m_hash_secret_salt = 0;
1160
1161 #ifdef XML_DTD
1162 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1163 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1164 parser->m_accounting.maximumAmplificationFactor
1165 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1166 parser->m_accounting.activationThresholdBytes
1167 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1168
1169 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1170 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1171 #endif
1172 }
1173
1174 /* moves list of bindings to m_freeBindingList */
1175 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1176 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1177 while (bindings) {
1178 BINDING *b = bindings;
1179 bindings = bindings->nextTagBinding;
1180 b->nextTagBinding = parser->m_freeBindingList;
1181 parser->m_freeBindingList = b;
1182 }
1183 }
1184
1185 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1186 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1187 TAG *tStk;
1188 OPEN_INTERNAL_ENTITY *openEntityList;
1189
1190 if (parser == NULL)
1191 return XML_FALSE;
1192
1193 if (parser->m_parentParser)
1194 return XML_FALSE;
1195 /* move m_tagStack to m_freeTagList */
1196 tStk = parser->m_tagStack;
1197 while (tStk) {
1198 TAG *tag = tStk;
1199 tStk = tStk->parent;
1200 tag->parent = parser->m_freeTagList;
1201 moveToFreeBindingList(parser, tag->bindings);
1202 tag->bindings = NULL;
1203 parser->m_freeTagList = tag;
1204 }
1205 /* move m_openInternalEntities to m_freeInternalEntities */
1206 openEntityList = parser->m_openInternalEntities;
1207 while (openEntityList) {
1208 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1209 openEntityList = openEntity->next;
1210 openEntity->next = parser->m_freeInternalEntities;
1211 parser->m_freeInternalEntities = openEntity;
1212 }
1213 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1214 FREE(parser, parser->m_unknownEncodingMem);
1215 if (parser->m_unknownEncodingRelease)
1216 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1217 poolClear(&parser->m_tempPool);
1218 poolClear(&parser->m_temp2Pool);
1219 FREE(parser, (void *)parser->m_protocolEncodingName);
1220 parser->m_protocolEncodingName = NULL;
1221 parserInit(parser, encodingName);
1222 dtdReset(parser->m_dtd, &parser->m_mem);
1223 return XML_TRUE;
1224 }
1225
1226 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1227 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1228 if (parser == NULL)
1229 return XML_STATUS_ERROR;
1230 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1231 XXX There's no way for the caller to determine which of the
1232 XXX possible error cases caused the XML_STATUS_ERROR return.
1233 */
1234 if (parser->m_parsingStatus.parsing == XML_PARSING
1235 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1236 return XML_STATUS_ERROR;
1237
1238 /* Get rid of any previous encoding name */
1239 FREE(parser, (void *)parser->m_protocolEncodingName);
1240
1241 if (encodingName == NULL)
1242 /* No new encoding name */
1243 parser->m_protocolEncodingName = NULL;
1244 else {
1245 /* Copy the new encoding name into allocated memory */
1246 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1247 if (! parser->m_protocolEncodingName)
1248 return XML_STATUS_ERROR;
1249 }
1250 return XML_STATUS_OK;
1251 }
1252
1253 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1254 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1255 const XML_Char *encodingName) {
1256 XML_Parser parser = oldParser;
1257 DTD *newDtd = NULL;
1258 DTD *oldDtd;
1259 XML_StartElementHandler oldStartElementHandler;
1260 XML_EndElementHandler oldEndElementHandler;
1261 XML_CharacterDataHandler oldCharacterDataHandler;
1262 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1263 XML_CommentHandler oldCommentHandler;
1264 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1265 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1266 XML_DefaultHandler oldDefaultHandler;
1267 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1268 XML_NotationDeclHandler oldNotationDeclHandler;
1269 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1270 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1271 XML_NotStandaloneHandler oldNotStandaloneHandler;
1272 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1273 XML_SkippedEntityHandler oldSkippedEntityHandler;
1274 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1275 XML_ElementDeclHandler oldElementDeclHandler;
1276 XML_AttlistDeclHandler oldAttlistDeclHandler;
1277 XML_EntityDeclHandler oldEntityDeclHandler;
1278 XML_XmlDeclHandler oldXmlDeclHandler;
1279 ELEMENT_TYPE *oldDeclElementType;
1280
1281 void *oldUserData;
1282 void *oldHandlerArg;
1283 XML_Bool oldDefaultExpandInternalEntities;
1284 XML_Parser oldExternalEntityRefHandlerArg;
1285 #ifdef XML_DTD
1286 enum XML_ParamEntityParsing oldParamEntityParsing;
1287 int oldInEntityValue;
1288 #endif
1289 XML_Bool oldns_triplets;
1290 /* Note that the new parser shares the same hash secret as the old
1291 parser, so that dtdCopy and copyEntityTable can lookup values
1292 from hash tables associated with either parser without us having
1293 to worry which hash secrets each table has.
1294 */
1295 unsigned long oldhash_secret_salt;
1296
1297 /* Validate the oldParser parameter before we pull everything out of it */
1298 if (oldParser == NULL)
1299 return NULL;
1300
1301 /* Stash the original parser contents on the stack */
1302 oldDtd = parser->m_dtd;
1303 oldStartElementHandler = parser->m_startElementHandler;
1304 oldEndElementHandler = parser->m_endElementHandler;
1305 oldCharacterDataHandler = parser->m_characterDataHandler;
1306 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1307 oldCommentHandler = parser->m_commentHandler;
1308 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1309 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1310 oldDefaultHandler = parser->m_defaultHandler;
1311 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1312 oldNotationDeclHandler = parser->m_notationDeclHandler;
1313 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1314 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1315 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1316 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1317 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1318 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1319 oldElementDeclHandler = parser->m_elementDeclHandler;
1320 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1321 oldEntityDeclHandler = parser->m_entityDeclHandler;
1322 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1323 oldDeclElementType = parser->m_declElementType;
1324
1325 oldUserData = parser->m_userData;
1326 oldHandlerArg = parser->m_handlerArg;
1327 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1328 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1329 #ifdef XML_DTD
1330 oldParamEntityParsing = parser->m_paramEntityParsing;
1331 oldInEntityValue = parser->m_prologState.inEntityValue;
1332 #endif
1333 oldns_triplets = parser->m_ns_triplets;
1334 /* Note that the new parser shares the same hash secret as the old
1335 parser, so that dtdCopy and copyEntityTable can lookup values
1336 from hash tables associated with either parser without us having
1337 to worry which hash secrets each table has.
1338 */
1339 oldhash_secret_salt = parser->m_hash_secret_salt;
1340
1341 #ifdef XML_DTD
1342 if (! context)
1343 newDtd = oldDtd;
1344 #endif /* XML_DTD */
1345
1346 /* Note that the magical uses of the pre-processor to make field
1347 access look more like C++ require that `parser' be overwritten
1348 here. This makes this function more painful to follow than it
1349 would be otherwise.
1350 */
1351 if (parser->m_ns) {
1352 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1353 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1354 } else {
1355 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1356 }
1357
1358 if (! parser)
1359 return NULL;
1360
1361 parser->m_startElementHandler = oldStartElementHandler;
1362 parser->m_endElementHandler = oldEndElementHandler;
1363 parser->m_characterDataHandler = oldCharacterDataHandler;
1364 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1365 parser->m_commentHandler = oldCommentHandler;
1366 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1367 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1368 parser->m_defaultHandler = oldDefaultHandler;
1369 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1370 parser->m_notationDeclHandler = oldNotationDeclHandler;
1371 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1372 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1373 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1374 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1375 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1376 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1377 parser->m_elementDeclHandler = oldElementDeclHandler;
1378 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1379 parser->m_entityDeclHandler = oldEntityDeclHandler;
1380 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1381 parser->m_declElementType = oldDeclElementType;
1382 parser->m_userData = oldUserData;
1383 if (oldUserData == oldHandlerArg)
1384 parser->m_handlerArg = parser->m_userData;
1385 else
1386 parser->m_handlerArg = parser;
1387 if (oldExternalEntityRefHandlerArg != oldParser)
1388 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1389 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1390 parser->m_ns_triplets = oldns_triplets;
1391 parser->m_hash_secret_salt = oldhash_secret_salt;
1392 parser->m_parentParser = oldParser;
1393 #ifdef XML_DTD
1394 parser->m_paramEntityParsing = oldParamEntityParsing;
1395 parser->m_prologState.inEntityValue = oldInEntityValue;
1396 if (context) {
1397 #endif /* XML_DTD */
1398 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1399 || ! setContext(parser, context)) {
1400 XML_ParserFree(parser);
1401 return NULL;
1402 }
1403 parser->m_processor = externalEntityInitProcessor;
1404 #ifdef XML_DTD
1405 } else {
1406 /* The DTD instance referenced by parser->m_dtd is shared between the
1407 document's root parser and external PE parsers, therefore one does not
1408 need to call setContext. In addition, one also *must* not call
1409 setContext, because this would overwrite existing prefix->binding
1410 pointers in parser->m_dtd with ones that get destroyed with the external
1411 PE parser. This would leave those prefixes with dangling pointers.
1412 */
1413 parser->m_isParamEntity = XML_TRUE;
1414 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1415 parser->m_processor = externalParEntInitProcessor;
1416 }
1417 #endif /* XML_DTD */
1418 return parser;
1419 }
1420
1421 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1422 destroyBindings(BINDING *bindings, XML_Parser parser) {
1423 for (;;) {
1424 BINDING *b = bindings;
1425 if (! b)
1426 break;
1427 bindings = b->nextTagBinding;
1428 FREE(parser, b->uri);
1429 FREE(parser, b);
1430 }
1431 }
1432
1433 void XMLCALL
XML_ParserFree(XML_Parser parser)1434 XML_ParserFree(XML_Parser parser) {
1435 TAG *tagList;
1436 OPEN_INTERNAL_ENTITY *entityList;
1437 if (parser == NULL)
1438 return;
1439 /* free m_tagStack and m_freeTagList */
1440 tagList = parser->m_tagStack;
1441 for (;;) {
1442 TAG *p;
1443 if (tagList == NULL) {
1444 if (parser->m_freeTagList == NULL)
1445 break;
1446 tagList = parser->m_freeTagList;
1447 parser->m_freeTagList = NULL;
1448 }
1449 p = tagList;
1450 tagList = tagList->parent;
1451 FREE(parser, p->buf);
1452 destroyBindings(p->bindings, parser);
1453 FREE(parser, p);
1454 }
1455 /* free m_openInternalEntities and m_freeInternalEntities */
1456 entityList = parser->m_openInternalEntities;
1457 for (;;) {
1458 OPEN_INTERNAL_ENTITY *openEntity;
1459 if (entityList == NULL) {
1460 if (parser->m_freeInternalEntities == NULL)
1461 break;
1462 entityList = parser->m_freeInternalEntities;
1463 parser->m_freeInternalEntities = NULL;
1464 }
1465 openEntity = entityList;
1466 entityList = entityList->next;
1467 FREE(parser, openEntity);
1468 }
1469
1470 destroyBindings(parser->m_freeBindingList, parser);
1471 destroyBindings(parser->m_inheritedBindings, parser);
1472 poolDestroy(&parser->m_tempPool);
1473 poolDestroy(&parser->m_temp2Pool);
1474 FREE(parser, (void *)parser->m_protocolEncodingName);
1475 #ifdef XML_DTD
1476 /* external parameter entity parsers share the DTD structure
1477 parser->m_dtd with the root parser, so we must not destroy it
1478 */
1479 if (! parser->m_isParamEntity && parser->m_dtd)
1480 #else
1481 if (parser->m_dtd)
1482 #endif /* XML_DTD */
1483 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1484 &parser->m_mem);
1485 FREE(parser, (void *)parser->m_atts);
1486 #ifdef XML_ATTR_INFO
1487 FREE(parser, (void *)parser->m_attInfo);
1488 #endif
1489 FREE(parser, parser->m_groupConnector);
1490 FREE(parser, parser->m_buffer);
1491 FREE(parser, parser->m_dataBuf);
1492 FREE(parser, parser->m_nsAtts);
1493 FREE(parser, parser->m_unknownEncodingMem);
1494 if (parser->m_unknownEncodingRelease)
1495 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1496 FREE(parser, parser);
1497 }
1498
1499 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1500 XML_UseParserAsHandlerArg(XML_Parser parser) {
1501 if (parser != NULL)
1502 parser->m_handlerArg = parser;
1503 }
1504
1505 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1506 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1507 if (parser == NULL)
1508 return XML_ERROR_INVALID_ARGUMENT;
1509 #ifdef XML_DTD
1510 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1511 if (parser->m_parsingStatus.parsing == XML_PARSING
1512 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1513 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1514 parser->m_useForeignDTD = useDTD;
1515 return XML_ERROR_NONE;
1516 #else
1517 UNUSED_P(useDTD);
1518 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1519 #endif
1520 }
1521
1522 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1523 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1524 if (parser == NULL)
1525 return;
1526 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1527 if (parser->m_parsingStatus.parsing == XML_PARSING
1528 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1529 return;
1530 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1531 }
1532
1533 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1534 XML_SetUserData(XML_Parser parser, void *p) {
1535 if (parser == NULL)
1536 return;
1537 if (parser->m_handlerArg == parser->m_userData)
1538 parser->m_handlerArg = parser->m_userData = p;
1539 else
1540 parser->m_userData = p;
1541 }
1542
1543 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1544 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1545 if (parser == NULL)
1546 return XML_STATUS_ERROR;
1547 if (p) {
1548 p = poolCopyString(&parser->m_dtd->pool, p);
1549 if (! p)
1550 return XML_STATUS_ERROR;
1551 parser->m_curBase = p;
1552 } else
1553 parser->m_curBase = NULL;
1554 return XML_STATUS_OK;
1555 }
1556
1557 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1558 XML_GetBase(XML_Parser parser) {
1559 if (parser == NULL)
1560 return NULL;
1561 return parser->m_curBase;
1562 }
1563
1564 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1565 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1566 if (parser == NULL)
1567 return -1;
1568 return parser->m_nSpecifiedAtts;
1569 }
1570
1571 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1572 XML_GetIdAttributeIndex(XML_Parser parser) {
1573 if (parser == NULL)
1574 return -1;
1575 return parser->m_idAttIndex;
1576 }
1577
1578 #ifdef XML_ATTR_INFO
1579 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1580 XML_GetAttributeInfo(XML_Parser parser) {
1581 if (parser == NULL)
1582 return NULL;
1583 return parser->m_attInfo;
1584 }
1585 #endif
1586
1587 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1588 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1589 XML_EndElementHandler end) {
1590 if (parser == NULL)
1591 return;
1592 parser->m_startElementHandler = start;
1593 parser->m_endElementHandler = end;
1594 }
1595
1596 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1597 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1598 if (parser != NULL)
1599 parser->m_startElementHandler = start;
1600 }
1601
1602 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1603 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1604 if (parser != NULL)
1605 parser->m_endElementHandler = end;
1606 }
1607
1608 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1609 XML_SetCharacterDataHandler(XML_Parser parser,
1610 XML_CharacterDataHandler handler) {
1611 if (parser != NULL)
1612 parser->m_characterDataHandler = handler;
1613 }
1614
1615 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1616 XML_SetProcessingInstructionHandler(XML_Parser parser,
1617 XML_ProcessingInstructionHandler handler) {
1618 if (parser != NULL)
1619 parser->m_processingInstructionHandler = handler;
1620 }
1621
1622 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1623 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1624 if (parser != NULL)
1625 parser->m_commentHandler = handler;
1626 }
1627
1628 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1629 XML_SetCdataSectionHandler(XML_Parser parser,
1630 XML_StartCdataSectionHandler start,
1631 XML_EndCdataSectionHandler end) {
1632 if (parser == NULL)
1633 return;
1634 parser->m_startCdataSectionHandler = start;
1635 parser->m_endCdataSectionHandler = end;
1636 }
1637
1638 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1639 XML_SetStartCdataSectionHandler(XML_Parser parser,
1640 XML_StartCdataSectionHandler start) {
1641 if (parser != NULL)
1642 parser->m_startCdataSectionHandler = start;
1643 }
1644
1645 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1646 XML_SetEndCdataSectionHandler(XML_Parser parser,
1647 XML_EndCdataSectionHandler end) {
1648 if (parser != NULL)
1649 parser->m_endCdataSectionHandler = end;
1650 }
1651
1652 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1653 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1654 if (parser == NULL)
1655 return;
1656 parser->m_defaultHandler = handler;
1657 parser->m_defaultExpandInternalEntities = XML_FALSE;
1658 }
1659
1660 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1661 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1662 if (parser == NULL)
1663 return;
1664 parser->m_defaultHandler = handler;
1665 parser->m_defaultExpandInternalEntities = XML_TRUE;
1666 }
1667
1668 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1669 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1670 XML_EndDoctypeDeclHandler end) {
1671 if (parser == NULL)
1672 return;
1673 parser->m_startDoctypeDeclHandler = start;
1674 parser->m_endDoctypeDeclHandler = end;
1675 }
1676
1677 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1678 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1679 XML_StartDoctypeDeclHandler start) {
1680 if (parser != NULL)
1681 parser->m_startDoctypeDeclHandler = start;
1682 }
1683
1684 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1685 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1686 if (parser != NULL)
1687 parser->m_endDoctypeDeclHandler = end;
1688 }
1689
1690 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1691 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1692 XML_UnparsedEntityDeclHandler handler) {
1693 if (parser != NULL)
1694 parser->m_unparsedEntityDeclHandler = handler;
1695 }
1696
1697 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1698 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1699 if (parser != NULL)
1700 parser->m_notationDeclHandler = handler;
1701 }
1702
1703 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1704 XML_SetNamespaceDeclHandler(XML_Parser parser,
1705 XML_StartNamespaceDeclHandler start,
1706 XML_EndNamespaceDeclHandler end) {
1707 if (parser == NULL)
1708 return;
1709 parser->m_startNamespaceDeclHandler = start;
1710 parser->m_endNamespaceDeclHandler = end;
1711 }
1712
1713 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1714 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1715 XML_StartNamespaceDeclHandler start) {
1716 if (parser != NULL)
1717 parser->m_startNamespaceDeclHandler = start;
1718 }
1719
1720 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1721 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1722 XML_EndNamespaceDeclHandler end) {
1723 if (parser != NULL)
1724 parser->m_endNamespaceDeclHandler = end;
1725 }
1726
1727 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1728 XML_SetNotStandaloneHandler(XML_Parser parser,
1729 XML_NotStandaloneHandler handler) {
1730 if (parser != NULL)
1731 parser->m_notStandaloneHandler = handler;
1732 }
1733
1734 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1735 XML_SetExternalEntityRefHandler(XML_Parser parser,
1736 XML_ExternalEntityRefHandler handler) {
1737 if (parser != NULL)
1738 parser->m_externalEntityRefHandler = handler;
1739 }
1740
1741 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1742 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1743 if (parser == NULL)
1744 return;
1745 if (arg)
1746 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1747 else
1748 parser->m_externalEntityRefHandlerArg = parser;
1749 }
1750
1751 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1752 XML_SetSkippedEntityHandler(XML_Parser parser,
1753 XML_SkippedEntityHandler handler) {
1754 if (parser != NULL)
1755 parser->m_skippedEntityHandler = handler;
1756 }
1757
1758 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1759 XML_SetUnknownEncodingHandler(XML_Parser parser,
1760 XML_UnknownEncodingHandler handler, void *data) {
1761 if (parser == NULL)
1762 return;
1763 parser->m_unknownEncodingHandler = handler;
1764 parser->m_unknownEncodingHandlerData = data;
1765 }
1766
1767 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1768 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1769 if (parser != NULL)
1770 parser->m_elementDeclHandler = eldecl;
1771 }
1772
1773 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1774 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1775 if (parser != NULL)
1776 parser->m_attlistDeclHandler = attdecl;
1777 }
1778
1779 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1780 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1781 if (parser != NULL)
1782 parser->m_entityDeclHandler = handler;
1783 }
1784
1785 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1786 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1787 if (parser != NULL)
1788 parser->m_xmlDeclHandler = handler;
1789 }
1790
1791 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1792 XML_SetParamEntityParsing(XML_Parser parser,
1793 enum XML_ParamEntityParsing peParsing) {
1794 if (parser == NULL)
1795 return 0;
1796 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1797 if (parser->m_parsingStatus.parsing == XML_PARSING
1798 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1799 return 0;
1800 #ifdef XML_DTD
1801 parser->m_paramEntityParsing = peParsing;
1802 return 1;
1803 #else
1804 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1805 #endif
1806 }
1807
1808 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1809 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1810 if (parser == NULL)
1811 return 0;
1812 if (parser->m_parentParser)
1813 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1814 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1815 if (parser->m_parsingStatus.parsing == XML_PARSING
1816 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1817 return 0;
1818 parser->m_hash_secret_salt = hash_salt;
1819 return 1;
1820 }
1821
1822 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1823 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1824 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1825 if (parser != NULL)
1826 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1827 return XML_STATUS_ERROR;
1828 }
1829 switch (parser->m_parsingStatus.parsing) {
1830 case XML_SUSPENDED:
1831 parser->m_errorCode = XML_ERROR_SUSPENDED;
1832 return XML_STATUS_ERROR;
1833 case XML_FINISHED:
1834 parser->m_errorCode = XML_ERROR_FINISHED;
1835 return XML_STATUS_ERROR;
1836 case XML_INITIALIZED:
1837 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1838 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1839 return XML_STATUS_ERROR;
1840 }
1841 /* fall through */
1842 default:
1843 parser->m_parsingStatus.parsing = XML_PARSING;
1844 }
1845
1846 if (len == 0) {
1847 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1848 if (! isFinal)
1849 return XML_STATUS_OK;
1850 parser->m_positionPtr = parser->m_bufferPtr;
1851 parser->m_parseEndPtr = parser->m_bufferEnd;
1852
1853 /* If data are left over from last buffer, and we now know that these
1854 data are the final chunk of input, then we have to check them again
1855 to detect errors based on that fact.
1856 */
1857 parser->m_errorCode
1858 = parser->m_processor(parser, parser->m_bufferPtr,
1859 parser->m_parseEndPtr, &parser->m_bufferPtr);
1860
1861 if (parser->m_errorCode == XML_ERROR_NONE) {
1862 switch (parser->m_parsingStatus.parsing) {
1863 case XML_SUSPENDED:
1864 /* It is hard to be certain, but it seems that this case
1865 * cannot occur. This code is cleaning up a previous parse
1866 * with no new data (since len == 0). Changing the parsing
1867 * state requires getting to execute a handler function, and
1868 * there doesn't seem to be an opportunity for that while in
1869 * this circumstance.
1870 *
1871 * Given the uncertainty, we retain the code but exclude it
1872 * from coverage tests.
1873 *
1874 * LCOV_EXCL_START
1875 */
1876 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1877 parser->m_bufferPtr, &parser->m_position);
1878 parser->m_positionPtr = parser->m_bufferPtr;
1879 return XML_STATUS_SUSPENDED;
1880 /* LCOV_EXCL_STOP */
1881 case XML_INITIALIZED:
1882 case XML_PARSING:
1883 parser->m_parsingStatus.parsing = XML_FINISHED;
1884 /* fall through */
1885 default:
1886 return XML_STATUS_OK;
1887 }
1888 }
1889 parser->m_eventEndPtr = parser->m_eventPtr;
1890 parser->m_processor = errorProcessor;
1891 return XML_STATUS_ERROR;
1892 }
1893 #ifndef XML_CONTEXT_BYTES
1894 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1895 const char *end;
1896 int nLeftOver;
1897 enum XML_Status result;
1898 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1899 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1900 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1901 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1902 parser->m_processor = errorProcessor;
1903 return XML_STATUS_ERROR;
1904 }
1905 parser->m_parseEndByteIndex += len;
1906 parser->m_positionPtr = s;
1907 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1908
1909 parser->m_errorCode
1910 = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1911
1912 if (parser->m_errorCode != XML_ERROR_NONE) {
1913 parser->m_eventEndPtr = parser->m_eventPtr;
1914 parser->m_processor = errorProcessor;
1915 return XML_STATUS_ERROR;
1916 } else {
1917 switch (parser->m_parsingStatus.parsing) {
1918 case XML_SUSPENDED:
1919 result = XML_STATUS_SUSPENDED;
1920 break;
1921 case XML_INITIALIZED:
1922 case XML_PARSING:
1923 if (isFinal) {
1924 parser->m_parsingStatus.parsing = XML_FINISHED;
1925 return XML_STATUS_OK;
1926 }
1927 /* fall through */
1928 default:
1929 result = XML_STATUS_OK;
1930 }
1931 }
1932
1933 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1934 &parser->m_position);
1935 nLeftOver = s + len - end;
1936 if (nLeftOver) {
1937 if (parser->m_buffer == NULL
1938 || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1939 /* avoid _signed_ integer overflow */
1940 char *temp = NULL;
1941 const int bytesToAllocate = (int)((unsigned)len * 2U);
1942 if (bytesToAllocate > 0) {
1943 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1944 }
1945 if (temp == NULL) {
1946 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1947 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1948 parser->m_processor = errorProcessor;
1949 return XML_STATUS_ERROR;
1950 }
1951 parser->m_buffer = temp;
1952 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1953 }
1954 memcpy(parser->m_buffer, end, nLeftOver);
1955 }
1956 parser->m_bufferPtr = parser->m_buffer;
1957 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1958 parser->m_positionPtr = parser->m_bufferPtr;
1959 parser->m_parseEndPtr = parser->m_bufferEnd;
1960 parser->m_eventPtr = parser->m_bufferPtr;
1961 parser->m_eventEndPtr = parser->m_bufferPtr;
1962 return result;
1963 }
1964 #endif /* not defined XML_CONTEXT_BYTES */
1965 else {
1966 void *buff = XML_GetBuffer(parser, len);
1967 if (buff == NULL)
1968 return XML_STATUS_ERROR;
1969 else {
1970 memcpy(buff, s, len);
1971 return XML_ParseBuffer(parser, len, isFinal);
1972 }
1973 }
1974 }
1975
1976 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)1977 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
1978 const char *start;
1979 enum XML_Status result = XML_STATUS_OK;
1980
1981 if (parser == NULL)
1982 return XML_STATUS_ERROR;
1983 switch (parser->m_parsingStatus.parsing) {
1984 case XML_SUSPENDED:
1985 parser->m_errorCode = XML_ERROR_SUSPENDED;
1986 return XML_STATUS_ERROR;
1987 case XML_FINISHED:
1988 parser->m_errorCode = XML_ERROR_FINISHED;
1989 return XML_STATUS_ERROR;
1990 case XML_INITIALIZED:
1991 /* Has someone called XML_GetBuffer successfully before? */
1992 if (! parser->m_bufferPtr) {
1993 parser->m_errorCode = XML_ERROR_NO_BUFFER;
1994 return XML_STATUS_ERROR;
1995 }
1996
1997 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1998 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1999 return XML_STATUS_ERROR;
2000 }
2001 /* fall through */
2002 default:
2003 parser->m_parsingStatus.parsing = XML_PARSING;
2004 }
2005
2006 start = parser->m_bufferPtr;
2007 parser->m_positionPtr = start;
2008 parser->m_bufferEnd += len;
2009 parser->m_parseEndPtr = parser->m_bufferEnd;
2010 parser->m_parseEndByteIndex += len;
2011 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2012
2013 parser->m_errorCode = parser->m_processor(
2014 parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
2015
2016 if (parser->m_errorCode != XML_ERROR_NONE) {
2017 parser->m_eventEndPtr = parser->m_eventPtr;
2018 parser->m_processor = errorProcessor;
2019 return XML_STATUS_ERROR;
2020 } else {
2021 switch (parser->m_parsingStatus.parsing) {
2022 case XML_SUSPENDED:
2023 result = XML_STATUS_SUSPENDED;
2024 break;
2025 case XML_INITIALIZED:
2026 case XML_PARSING:
2027 if (isFinal) {
2028 parser->m_parsingStatus.parsing = XML_FINISHED;
2029 return result;
2030 }
2031 default:; /* should not happen */
2032 }
2033 }
2034
2035 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2036 parser->m_bufferPtr, &parser->m_position);
2037 parser->m_positionPtr = parser->m_bufferPtr;
2038 return result;
2039 }
2040
2041 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2042 XML_GetBuffer(XML_Parser parser, int len) {
2043 if (parser == NULL)
2044 return NULL;
2045 if (len < 0) {
2046 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2047 return NULL;
2048 }
2049 switch (parser->m_parsingStatus.parsing) {
2050 case XML_SUSPENDED:
2051 parser->m_errorCode = XML_ERROR_SUSPENDED;
2052 return NULL;
2053 case XML_FINISHED:
2054 parser->m_errorCode = XML_ERROR_FINISHED;
2055 return NULL;
2056 default:;
2057 }
2058
2059 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
2060 #ifdef XML_CONTEXT_BYTES
2061 int keep;
2062 #endif /* defined XML_CONTEXT_BYTES */
2063 /* Do not invoke signed arithmetic overflow: */
2064 int neededSize = (int)((unsigned)len
2065 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2066 parser->m_bufferEnd, parser->m_bufferPtr));
2067 if (neededSize < 0) {
2068 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2069 return NULL;
2070 }
2071 #ifdef XML_CONTEXT_BYTES
2072 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2073 if (keep > XML_CONTEXT_BYTES)
2074 keep = XML_CONTEXT_BYTES;
2075 /* Detect and prevent integer overflow */
2076 if (keep > INT_MAX - neededSize) {
2077 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2078 return NULL;
2079 }
2080 neededSize += keep;
2081 #endif /* defined XML_CONTEXT_BYTES */
2082 if (neededSize
2083 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2084 #ifdef XML_CONTEXT_BYTES
2085 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2086 int offset
2087 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2088 - keep;
2089 /* The buffer pointers cannot be NULL here; we have at least some bytes
2090 * in the buffer */
2091 memmove(parser->m_buffer, &parser->m_buffer[offset],
2092 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2093 parser->m_bufferEnd -= offset;
2094 parser->m_bufferPtr -= offset;
2095 }
2096 #else
2097 if (parser->m_buffer && parser->m_bufferPtr) {
2098 memmove(parser->m_buffer, parser->m_bufferPtr,
2099 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2100 parser->m_bufferEnd
2101 = parser->m_buffer
2102 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2103 parser->m_bufferPtr = parser->m_buffer;
2104 }
2105 #endif /* not defined XML_CONTEXT_BYTES */
2106 } else {
2107 char *newBuf;
2108 int bufferSize
2109 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
2110 if (bufferSize == 0)
2111 bufferSize = INIT_BUFFER_SIZE;
2112 do {
2113 /* Do not invoke signed arithmetic overflow: */
2114 bufferSize = (int)(2U * (unsigned)bufferSize);
2115 } while (bufferSize < neededSize && bufferSize > 0);
2116 if (bufferSize <= 0) {
2117 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2118 return NULL;
2119 }
2120 newBuf = (char *)MALLOC(parser, bufferSize);
2121 if (newBuf == 0) {
2122 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2123 return NULL;
2124 }
2125 parser->m_bufferLim = newBuf + bufferSize;
2126 #ifdef XML_CONTEXT_BYTES
2127 if (parser->m_bufferPtr) {
2128 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2129 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2130 + keep);
2131 FREE(parser, parser->m_buffer);
2132 parser->m_buffer = newBuf;
2133 parser->m_bufferEnd
2134 = parser->m_buffer
2135 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2136 + keep;
2137 parser->m_bufferPtr = parser->m_buffer + keep;
2138 } else {
2139 /* This must be a brand new buffer with no data in it yet */
2140 parser->m_bufferEnd = newBuf;
2141 parser->m_bufferPtr = parser->m_buffer = newBuf;
2142 }
2143 #else
2144 if (parser->m_bufferPtr) {
2145 memcpy(newBuf, parser->m_bufferPtr,
2146 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2147 FREE(parser, parser->m_buffer);
2148 parser->m_bufferEnd
2149 = newBuf
2150 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2151 } else {
2152 /* This must be a brand new buffer with no data in it yet */
2153 parser->m_bufferEnd = newBuf;
2154 }
2155 parser->m_bufferPtr = parser->m_buffer = newBuf;
2156 #endif /* not defined XML_CONTEXT_BYTES */
2157 }
2158 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2159 parser->m_positionPtr = NULL;
2160 }
2161 return parser->m_bufferEnd;
2162 }
2163
2164 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2165 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2166 if (parser == NULL)
2167 return XML_STATUS_ERROR;
2168 switch (parser->m_parsingStatus.parsing) {
2169 case XML_SUSPENDED:
2170 if (resumable) {
2171 parser->m_errorCode = XML_ERROR_SUSPENDED;
2172 return XML_STATUS_ERROR;
2173 }
2174 parser->m_parsingStatus.parsing = XML_FINISHED;
2175 break;
2176 case XML_FINISHED:
2177 parser->m_errorCode = XML_ERROR_FINISHED;
2178 return XML_STATUS_ERROR;
2179 default:
2180 if (resumable) {
2181 #ifdef XML_DTD
2182 if (parser->m_isParamEntity) {
2183 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2184 return XML_STATUS_ERROR;
2185 }
2186 #endif
2187 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2188 } else
2189 parser->m_parsingStatus.parsing = XML_FINISHED;
2190 }
2191 return XML_STATUS_OK;
2192 }
2193
2194 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2195 XML_ResumeParser(XML_Parser parser) {
2196 enum XML_Status result = XML_STATUS_OK;
2197
2198 if (parser == NULL)
2199 return XML_STATUS_ERROR;
2200 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2201 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2202 return XML_STATUS_ERROR;
2203 }
2204 parser->m_parsingStatus.parsing = XML_PARSING;
2205
2206 parser->m_errorCode = parser->m_processor(
2207 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2208
2209 if (parser->m_errorCode != XML_ERROR_NONE) {
2210 parser->m_eventEndPtr = parser->m_eventPtr;
2211 parser->m_processor = errorProcessor;
2212 return XML_STATUS_ERROR;
2213 } else {
2214 switch (parser->m_parsingStatus.parsing) {
2215 case XML_SUSPENDED:
2216 result = XML_STATUS_SUSPENDED;
2217 break;
2218 case XML_INITIALIZED:
2219 case XML_PARSING:
2220 if (parser->m_parsingStatus.finalBuffer) {
2221 parser->m_parsingStatus.parsing = XML_FINISHED;
2222 return result;
2223 }
2224 default:;
2225 }
2226 }
2227
2228 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2229 parser->m_bufferPtr, &parser->m_position);
2230 parser->m_positionPtr = parser->m_bufferPtr;
2231 return result;
2232 }
2233
2234 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2235 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2236 if (parser == NULL)
2237 return;
2238 assert(status != NULL);
2239 *status = parser->m_parsingStatus;
2240 }
2241
2242 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2243 XML_GetErrorCode(XML_Parser parser) {
2244 if (parser == NULL)
2245 return XML_ERROR_INVALID_ARGUMENT;
2246 return parser->m_errorCode;
2247 }
2248
2249 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2250 XML_GetCurrentByteIndex(XML_Parser parser) {
2251 if (parser == NULL)
2252 return -1;
2253 if (parser->m_eventPtr)
2254 return (XML_Index)(parser->m_parseEndByteIndex
2255 - (parser->m_parseEndPtr - parser->m_eventPtr));
2256 return -1;
2257 }
2258
2259 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2260 XML_GetCurrentByteCount(XML_Parser parser) {
2261 if (parser == NULL)
2262 return 0;
2263 if (parser->m_eventEndPtr && parser->m_eventPtr)
2264 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2265 return 0;
2266 }
2267
2268 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2269 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2270 #ifdef XML_CONTEXT_BYTES
2271 if (parser == NULL)
2272 return NULL;
2273 if (parser->m_eventPtr && parser->m_buffer) {
2274 if (offset != NULL)
2275 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2276 if (size != NULL)
2277 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2278 return parser->m_buffer;
2279 }
2280 #else
2281 (void)parser;
2282 (void)offset;
2283 (void)size;
2284 #endif /* defined XML_CONTEXT_BYTES */
2285 return (const char *)0;
2286 }
2287
2288 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2289 XML_GetCurrentLineNumber(XML_Parser parser) {
2290 if (parser == NULL)
2291 return 0;
2292 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2293 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2294 parser->m_eventPtr, &parser->m_position);
2295 parser->m_positionPtr = parser->m_eventPtr;
2296 }
2297 return parser->m_position.lineNumber + 1;
2298 }
2299
2300 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2301 XML_GetCurrentColumnNumber(XML_Parser parser) {
2302 if (parser == NULL)
2303 return 0;
2304 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2305 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2306 parser->m_eventPtr, &parser->m_position);
2307 parser->m_positionPtr = parser->m_eventPtr;
2308 }
2309 return parser->m_position.columnNumber;
2310 }
2311
2312 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2313 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2314 if (parser != NULL)
2315 FREE(parser, model);
2316 }
2317
2318 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2319 XML_MemMalloc(XML_Parser parser, size_t size) {
2320 if (parser == NULL)
2321 return NULL;
2322 return MALLOC(parser, size);
2323 }
2324
2325 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2326 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2327 if (parser == NULL)
2328 return NULL;
2329 return REALLOC(parser, ptr, size);
2330 }
2331
2332 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2333 XML_MemFree(XML_Parser parser, void *ptr) {
2334 if (parser != NULL)
2335 FREE(parser, ptr);
2336 }
2337
2338 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2339 XML_DefaultCurrent(XML_Parser parser) {
2340 if (parser == NULL)
2341 return;
2342 if (parser->m_defaultHandler) {
2343 if (parser->m_openInternalEntities)
2344 reportDefault(parser, parser->m_internalEncoding,
2345 parser->m_openInternalEntities->internalEventPtr,
2346 parser->m_openInternalEntities->internalEventEndPtr);
2347 else
2348 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2349 parser->m_eventEndPtr);
2350 }
2351 }
2352
2353 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2354 XML_ErrorString(enum XML_Error code) {
2355 switch (code) {
2356 case XML_ERROR_NONE:
2357 return NULL;
2358 case XML_ERROR_NO_MEMORY:
2359 return XML_L("out of memory");
2360 case XML_ERROR_SYNTAX:
2361 return XML_L("syntax error");
2362 case XML_ERROR_NO_ELEMENTS:
2363 return XML_L("no element found");
2364 case XML_ERROR_INVALID_TOKEN:
2365 return XML_L("not well-formed (invalid token)");
2366 case XML_ERROR_UNCLOSED_TOKEN:
2367 return XML_L("unclosed token");
2368 case XML_ERROR_PARTIAL_CHAR:
2369 return XML_L("partial character");
2370 case XML_ERROR_TAG_MISMATCH:
2371 return XML_L("mismatched tag");
2372 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2373 return XML_L("duplicate attribute");
2374 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2375 return XML_L("junk after document element");
2376 case XML_ERROR_PARAM_ENTITY_REF:
2377 return XML_L("illegal parameter entity reference");
2378 case XML_ERROR_UNDEFINED_ENTITY:
2379 return XML_L("undefined entity");
2380 case XML_ERROR_RECURSIVE_ENTITY_REF:
2381 return XML_L("recursive entity reference");
2382 case XML_ERROR_ASYNC_ENTITY:
2383 return XML_L("asynchronous entity");
2384 case XML_ERROR_BAD_CHAR_REF:
2385 return XML_L("reference to invalid character number");
2386 case XML_ERROR_BINARY_ENTITY_REF:
2387 return XML_L("reference to binary entity");
2388 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2389 return XML_L("reference to external entity in attribute");
2390 case XML_ERROR_MISPLACED_XML_PI:
2391 return XML_L("XML or text declaration not at start of entity");
2392 case XML_ERROR_UNKNOWN_ENCODING:
2393 return XML_L("unknown encoding");
2394 case XML_ERROR_INCORRECT_ENCODING:
2395 return XML_L("encoding specified in XML declaration is incorrect");
2396 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2397 return XML_L("unclosed CDATA section");
2398 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2399 return XML_L("error in processing external entity reference");
2400 case XML_ERROR_NOT_STANDALONE:
2401 return XML_L("document is not standalone");
2402 case XML_ERROR_UNEXPECTED_STATE:
2403 return XML_L("unexpected parser state - please send a bug report");
2404 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2405 return XML_L("entity declared in parameter entity");
2406 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2407 return XML_L("requested feature requires XML_DTD support in Expat");
2408 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2409 return XML_L("cannot change setting once parsing has begun");
2410 /* Added in 1.95.7. */
2411 case XML_ERROR_UNBOUND_PREFIX:
2412 return XML_L("unbound prefix");
2413 /* Added in 1.95.8. */
2414 case XML_ERROR_UNDECLARING_PREFIX:
2415 return XML_L("must not undeclare prefix");
2416 case XML_ERROR_INCOMPLETE_PE:
2417 return XML_L("incomplete markup in parameter entity");
2418 case XML_ERROR_XML_DECL:
2419 return XML_L("XML declaration not well-formed");
2420 case XML_ERROR_TEXT_DECL:
2421 return XML_L("text declaration not well-formed");
2422 case XML_ERROR_PUBLICID:
2423 return XML_L("illegal character(s) in public id");
2424 case XML_ERROR_SUSPENDED:
2425 return XML_L("parser suspended");
2426 case XML_ERROR_NOT_SUSPENDED:
2427 return XML_L("parser not suspended");
2428 case XML_ERROR_ABORTED:
2429 return XML_L("parsing aborted");
2430 case XML_ERROR_FINISHED:
2431 return XML_L("parsing finished");
2432 case XML_ERROR_SUSPEND_PE:
2433 return XML_L("cannot suspend in external parameter entity");
2434 /* Added in 2.0.0. */
2435 case XML_ERROR_RESERVED_PREFIX_XML:
2436 return XML_L(
2437 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2438 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2439 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2440 case XML_ERROR_RESERVED_NAMESPACE_URI:
2441 return XML_L(
2442 "prefix must not be bound to one of the reserved namespace names");
2443 /* Added in 2.2.5. */
2444 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2445 return XML_L("invalid argument");
2446 /* Added in 2.3.0. */
2447 case XML_ERROR_NO_BUFFER:
2448 return XML_L(
2449 "a successful prior call to function XML_GetBuffer is required");
2450 /* Added in 2.4.0. */
2451 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2452 return XML_L(
2453 "limit on input amplification factor (from DTD and entities) breached");
2454 }
2455 return NULL;
2456 }
2457
2458 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2459 XML_ExpatVersion(void) {
2460 /* V1 is used to string-ize the version number. However, it would
2461 string-ize the actual version macro *names* unless we get them
2462 substituted before being passed to V1. CPP is defined to expand
2463 a macro, then rescan for more expansions. Thus, we use V2 to expand
2464 the version macros, then CPP will expand the resulting V1() macro
2465 with the correct numerals. */
2466 /* ### I'm assuming cpp is portable in this respect... */
2467
2468 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2469 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2470
2471 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2472
2473 #undef V1
2474 #undef V2
2475 }
2476
2477 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2478 XML_ExpatVersionInfo(void) {
2479 XML_Expat_Version version;
2480
2481 version.major = XML_MAJOR_VERSION;
2482 version.minor = XML_MINOR_VERSION;
2483 version.micro = XML_MICRO_VERSION;
2484
2485 return version;
2486 }
2487
2488 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2489 XML_GetFeatureList(void) {
2490 static const XML_Feature features[] = {
2491 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2492 sizeof(XML_Char)},
2493 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2494 sizeof(XML_LChar)},
2495 #ifdef XML_UNICODE
2496 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2497 #endif
2498 #ifdef XML_UNICODE_WCHAR_T
2499 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2500 #endif
2501 #ifdef XML_DTD
2502 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2503 #endif
2504 #ifdef XML_CONTEXT_BYTES
2505 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2506 XML_CONTEXT_BYTES},
2507 #endif
2508 #ifdef XML_MIN_SIZE
2509 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2510 #endif
2511 #ifdef XML_NS
2512 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2513 #endif
2514 #ifdef XML_LARGE_SIZE
2515 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2516 #endif
2517 #ifdef XML_ATTR_INFO
2518 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2519 #endif
2520 #ifdef XML_DTD
2521 /* Added in Expat 2.4.0. */
2522 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2523 XML_L("XML_BLAP_MAX_AMP"),
2524 (long int)
2525 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2526 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2527 XML_L("XML_BLAP_ACT_THRES"),
2528 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2529 #endif
2530 {XML_FEATURE_END, NULL, 0}};
2531
2532 return features;
2533 }
2534
2535 #ifdef XML_DTD
2536 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2537 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2538 XML_Parser parser, float maximumAmplificationFactor) {
2539 if ((parser == NULL) || (parser->m_parentParser != NULL)
2540 || isnan(maximumAmplificationFactor)
2541 || (maximumAmplificationFactor < 1.0f)) {
2542 return XML_FALSE;
2543 }
2544 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2545 return XML_TRUE;
2546 }
2547
2548 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2549 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2550 XML_Parser parser, unsigned long long activationThresholdBytes) {
2551 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2552 return XML_FALSE;
2553 }
2554 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2555 return XML_TRUE;
2556 }
2557 #endif /* XML_DTD */
2558
2559 /* Initially tag->rawName always points into the parse buffer;
2560 for those TAG instances opened while the current parse buffer was
2561 processed, and not yet closed, we need to store tag->rawName in a more
2562 permanent location, since the parse buffer is about to be discarded.
2563 */
2564 static XML_Bool
storeRawNames(XML_Parser parser)2565 storeRawNames(XML_Parser parser) {
2566 TAG *tag = parser->m_tagStack;
2567 while (tag) {
2568 int bufSize;
2569 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2570 size_t rawNameLen;
2571 char *rawNameBuf = tag->buf + nameLen;
2572 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2573 at the first entry that has already been copied; everything
2574 below it in the stack is already been accounted for in a
2575 previous call to this function.
2576 */
2577 if (tag->rawName == rawNameBuf)
2578 break;
2579 /* For re-use purposes we need to ensure that the
2580 size of tag->buf is a multiple of sizeof(XML_Char).
2581 */
2582 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2583 /* Detect and prevent integer overflow. */
2584 if (rawNameLen > (size_t)INT_MAX - nameLen)
2585 return XML_FALSE;
2586 bufSize = nameLen + (int)rawNameLen;
2587 if (bufSize > tag->bufEnd - tag->buf) {
2588 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2589 if (temp == NULL)
2590 return XML_FALSE;
2591 /* if tag->name.str points to tag->buf (only when namespace
2592 processing is off) then we have to update it
2593 */
2594 if (tag->name.str == (XML_Char *)tag->buf)
2595 tag->name.str = (XML_Char *)temp;
2596 /* if tag->name.localPart is set (when namespace processing is on)
2597 then update it as well, since it will always point into tag->buf
2598 */
2599 if (tag->name.localPart)
2600 tag->name.localPart
2601 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2602 tag->buf = temp;
2603 tag->bufEnd = temp + bufSize;
2604 rawNameBuf = temp + nameLen;
2605 }
2606 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2607 tag->rawName = rawNameBuf;
2608 tag = tag->parent;
2609 }
2610 return XML_TRUE;
2611 }
2612
2613 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2614 contentProcessor(XML_Parser parser, const char *start, const char *end,
2615 const char **endPtr) {
2616 enum XML_Error result = doContent(
2617 parser, 0, parser->m_encoding, start, end, endPtr,
2618 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2619 if (result == XML_ERROR_NONE) {
2620 if (! storeRawNames(parser))
2621 return XML_ERROR_NO_MEMORY;
2622 }
2623 return result;
2624 }
2625
2626 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2627 externalEntityInitProcessor(XML_Parser parser, const char *start,
2628 const char *end, const char **endPtr) {
2629 enum XML_Error result = initializeEncoding(parser);
2630 if (result != XML_ERROR_NONE)
2631 return result;
2632 parser->m_processor = externalEntityInitProcessor2;
2633 return externalEntityInitProcessor2(parser, start, end, endPtr);
2634 }
2635
2636 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2637 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2638 const char *end, const char **endPtr) {
2639 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2640 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2641 switch (tok) {
2642 case XML_TOK_BOM:
2643 #ifdef XML_DTD
2644 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2645 XML_ACCOUNT_DIRECT)) {
2646 accountingOnAbort(parser);
2647 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2648 }
2649 #endif /* XML_DTD */
2650
2651 /* If we are at the end of the buffer, this would cause the next stage,
2652 i.e. externalEntityInitProcessor3, to pass control directly to
2653 doContent (by detecting XML_TOK_NONE) without processing any xml text
2654 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2655 */
2656 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2657 *endPtr = next;
2658 return XML_ERROR_NONE;
2659 }
2660 start = next;
2661 break;
2662 case XML_TOK_PARTIAL:
2663 if (! parser->m_parsingStatus.finalBuffer) {
2664 *endPtr = start;
2665 return XML_ERROR_NONE;
2666 }
2667 parser->m_eventPtr = start;
2668 return XML_ERROR_UNCLOSED_TOKEN;
2669 case XML_TOK_PARTIAL_CHAR:
2670 if (! parser->m_parsingStatus.finalBuffer) {
2671 *endPtr = start;
2672 return XML_ERROR_NONE;
2673 }
2674 parser->m_eventPtr = start;
2675 return XML_ERROR_PARTIAL_CHAR;
2676 }
2677 parser->m_processor = externalEntityInitProcessor3;
2678 return externalEntityInitProcessor3(parser, start, end, endPtr);
2679 }
2680
2681 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2682 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2683 const char *end, const char **endPtr) {
2684 int tok;
2685 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2686 parser->m_eventPtr = start;
2687 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2688 /* Note: These bytes are accounted later in:
2689 - processXmlDecl
2690 - externalEntityContentProcessor
2691 */
2692 parser->m_eventEndPtr = next;
2693
2694 switch (tok) {
2695 case XML_TOK_XML_DECL: {
2696 enum XML_Error result;
2697 result = processXmlDecl(parser, 1, start, next);
2698 if (result != XML_ERROR_NONE)
2699 return result;
2700 switch (parser->m_parsingStatus.parsing) {
2701 case XML_SUSPENDED:
2702 *endPtr = next;
2703 return XML_ERROR_NONE;
2704 case XML_FINISHED:
2705 return XML_ERROR_ABORTED;
2706 default:
2707 start = next;
2708 }
2709 } break;
2710 case XML_TOK_PARTIAL:
2711 if (! parser->m_parsingStatus.finalBuffer) {
2712 *endPtr = start;
2713 return XML_ERROR_NONE;
2714 }
2715 return XML_ERROR_UNCLOSED_TOKEN;
2716 case XML_TOK_PARTIAL_CHAR:
2717 if (! parser->m_parsingStatus.finalBuffer) {
2718 *endPtr = start;
2719 return XML_ERROR_NONE;
2720 }
2721 return XML_ERROR_PARTIAL_CHAR;
2722 }
2723 parser->m_processor = externalEntityContentProcessor;
2724 parser->m_tagLevel = 1;
2725 return externalEntityContentProcessor(parser, start, end, endPtr);
2726 }
2727
2728 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2729 externalEntityContentProcessor(XML_Parser parser, const char *start,
2730 const char *end, const char **endPtr) {
2731 enum XML_Error result
2732 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2733 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2734 XML_ACCOUNT_ENTITY_EXPANSION);
2735 if (result == XML_ERROR_NONE) {
2736 if (! storeRawNames(parser))
2737 return XML_ERROR_NO_MEMORY;
2738 }
2739 return result;
2740 }
2741
2742 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2743 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2744 const char *s, const char *end, const char **nextPtr,
2745 XML_Bool haveMore, enum XML_Account account) {
2746 /* save one level of indirection */
2747 DTD *const dtd = parser->m_dtd;
2748
2749 const char **eventPP;
2750 const char **eventEndPP;
2751 if (enc == parser->m_encoding) {
2752 eventPP = &parser->m_eventPtr;
2753 eventEndPP = &parser->m_eventEndPtr;
2754 } else {
2755 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2756 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2757 }
2758 *eventPP = s;
2759
2760 for (;;) {
2761 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2762 int tok = XmlContentTok(enc, s, end, &next);
2763 #ifdef XML_DTD
2764 const char *accountAfter
2765 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2766 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2767 : next;
2768 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2769 account)) {
2770 accountingOnAbort(parser);
2771 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2772 }
2773 #endif
2774 *eventEndPP = next;
2775 switch (tok) {
2776 case XML_TOK_TRAILING_CR:
2777 if (haveMore) {
2778 *nextPtr = s;
2779 return XML_ERROR_NONE;
2780 }
2781 *eventEndPP = end;
2782 if (parser->m_characterDataHandler) {
2783 XML_Char c = 0xA;
2784 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2785 } else if (parser->m_defaultHandler)
2786 reportDefault(parser, enc, s, end);
2787 /* We are at the end of the final buffer, should we check for
2788 XML_SUSPENDED, XML_FINISHED?
2789 */
2790 if (startTagLevel == 0)
2791 return XML_ERROR_NO_ELEMENTS;
2792 if (parser->m_tagLevel != startTagLevel)
2793 return XML_ERROR_ASYNC_ENTITY;
2794 *nextPtr = end;
2795 return XML_ERROR_NONE;
2796 case XML_TOK_NONE:
2797 if (haveMore) {
2798 *nextPtr = s;
2799 return XML_ERROR_NONE;
2800 }
2801 if (startTagLevel > 0) {
2802 if (parser->m_tagLevel != startTagLevel)
2803 return XML_ERROR_ASYNC_ENTITY;
2804 *nextPtr = s;
2805 return XML_ERROR_NONE;
2806 }
2807 return XML_ERROR_NO_ELEMENTS;
2808 case XML_TOK_INVALID:
2809 *eventPP = next;
2810 return XML_ERROR_INVALID_TOKEN;
2811 case XML_TOK_PARTIAL:
2812 if (haveMore) {
2813 *nextPtr = s;
2814 return XML_ERROR_NONE;
2815 }
2816 return XML_ERROR_UNCLOSED_TOKEN;
2817 case XML_TOK_PARTIAL_CHAR:
2818 if (haveMore) {
2819 *nextPtr = s;
2820 return XML_ERROR_NONE;
2821 }
2822 return XML_ERROR_PARTIAL_CHAR;
2823 case XML_TOK_ENTITY_REF: {
2824 const XML_Char *name;
2825 ENTITY *entity;
2826 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2827 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2828 if (ch) {
2829 #ifdef XML_DTD
2830 /* NOTE: We are replacing 4-6 characters original input for 1 character
2831 * so there is no amplification and hence recording without
2832 * protection. */
2833 accountingDiffTolerated(parser, tok, (char *)&ch,
2834 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2835 XML_ACCOUNT_ENTITY_EXPANSION);
2836 #endif /* XML_DTD */
2837 if (parser->m_characterDataHandler)
2838 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2839 else if (parser->m_defaultHandler)
2840 reportDefault(parser, enc, s, next);
2841 break;
2842 }
2843 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2844 next - enc->minBytesPerChar);
2845 if (! name)
2846 return XML_ERROR_NO_MEMORY;
2847 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2848 poolDiscard(&dtd->pool);
2849 /* First, determine if a check for an existing declaration is needed;
2850 if yes, check that the entity exists, and that it is internal,
2851 otherwise call the skipped entity or default handler.
2852 */
2853 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2854 if (! entity)
2855 return XML_ERROR_UNDEFINED_ENTITY;
2856 else if (! entity->is_internal)
2857 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2858 } else if (! entity) {
2859 if (parser->m_skippedEntityHandler)
2860 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2861 else if (parser->m_defaultHandler)
2862 reportDefault(parser, enc, s, next);
2863 break;
2864 }
2865 if (entity->open)
2866 return XML_ERROR_RECURSIVE_ENTITY_REF;
2867 if (entity->notation)
2868 return XML_ERROR_BINARY_ENTITY_REF;
2869 if (entity->textPtr) {
2870 enum XML_Error result;
2871 if (! parser->m_defaultExpandInternalEntities) {
2872 if (parser->m_skippedEntityHandler)
2873 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2874 0);
2875 else if (parser->m_defaultHandler)
2876 reportDefault(parser, enc, s, next);
2877 break;
2878 }
2879 result = processInternalEntity(parser, entity, XML_FALSE);
2880 if (result != XML_ERROR_NONE)
2881 return result;
2882 } else if (parser->m_externalEntityRefHandler) {
2883 const XML_Char *context;
2884 entity->open = XML_TRUE;
2885 context = getContext(parser);
2886 entity->open = XML_FALSE;
2887 if (! context)
2888 return XML_ERROR_NO_MEMORY;
2889 if (! parser->m_externalEntityRefHandler(
2890 parser->m_externalEntityRefHandlerArg, context, entity->base,
2891 entity->systemId, entity->publicId))
2892 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2893 poolDiscard(&parser->m_tempPool);
2894 } else if (parser->m_defaultHandler)
2895 reportDefault(parser, enc, s, next);
2896 break;
2897 }
2898 case XML_TOK_START_TAG_NO_ATTS:
2899 /* fall through */
2900 case XML_TOK_START_TAG_WITH_ATTS: {
2901 TAG *tag;
2902 enum XML_Error result;
2903 XML_Char *toPtr;
2904 if (parser->m_freeTagList) {
2905 tag = parser->m_freeTagList;
2906 parser->m_freeTagList = parser->m_freeTagList->parent;
2907 } else {
2908 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2909 if (! tag)
2910 return XML_ERROR_NO_MEMORY;
2911 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2912 if (! tag->buf) {
2913 FREE(parser, tag);
2914 return XML_ERROR_NO_MEMORY;
2915 }
2916 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2917 }
2918 tag->bindings = NULL;
2919 tag->parent = parser->m_tagStack;
2920 parser->m_tagStack = tag;
2921 tag->name.localPart = NULL;
2922 tag->name.prefix = NULL;
2923 tag->rawName = s + enc->minBytesPerChar;
2924 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2925 ++parser->m_tagLevel;
2926 {
2927 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2928 const char *fromPtr = tag->rawName;
2929 toPtr = (XML_Char *)tag->buf;
2930 for (;;) {
2931 int bufSize;
2932 int convLen;
2933 const enum XML_Convert_Result convert_res
2934 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
2935 (ICHAR *)tag->bufEnd - 1);
2936 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2937 if ((fromPtr >= rawNameEnd)
2938 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2939 tag->name.strLen = convLen;
2940 break;
2941 }
2942 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2943 {
2944 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2945 if (temp == NULL)
2946 return XML_ERROR_NO_MEMORY;
2947 tag->buf = temp;
2948 tag->bufEnd = temp + bufSize;
2949 toPtr = (XML_Char *)temp + convLen;
2950 }
2951 }
2952 }
2953 tag->name.str = (XML_Char *)tag->buf;
2954 *toPtr = XML_T('\0');
2955 result
2956 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
2957 if (result)
2958 return result;
2959 if (parser->m_startElementHandler)
2960 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2961 (const XML_Char **)parser->m_atts);
2962 else if (parser->m_defaultHandler)
2963 reportDefault(parser, enc, s, next);
2964 poolClear(&parser->m_tempPool);
2965 break;
2966 }
2967 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2968 /* fall through */
2969 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
2970 const char *rawName = s + enc->minBytesPerChar;
2971 enum XML_Error result;
2972 BINDING *bindings = NULL;
2973 XML_Bool noElmHandlers = XML_TRUE;
2974 TAG_NAME name;
2975 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2976 rawName + XmlNameLength(enc, rawName));
2977 if (! name.str)
2978 return XML_ERROR_NO_MEMORY;
2979 poolFinish(&parser->m_tempPool);
2980 result = storeAtts(parser, enc, s, &name, &bindings,
2981 XML_ACCOUNT_NONE /* token spans whole start tag */);
2982 if (result != XML_ERROR_NONE) {
2983 freeBindings(parser, bindings);
2984 return result;
2985 }
2986 poolFinish(&parser->m_tempPool);
2987 if (parser->m_startElementHandler) {
2988 parser->m_startElementHandler(parser->m_handlerArg, name.str,
2989 (const XML_Char **)parser->m_atts);
2990 noElmHandlers = XML_FALSE;
2991 }
2992 if (parser->m_endElementHandler) {
2993 if (parser->m_startElementHandler)
2994 *eventPP = *eventEndPP;
2995 parser->m_endElementHandler(parser->m_handlerArg, name.str);
2996 noElmHandlers = XML_FALSE;
2997 }
2998 if (noElmHandlers && parser->m_defaultHandler)
2999 reportDefault(parser, enc, s, next);
3000 poolClear(&parser->m_tempPool);
3001 freeBindings(parser, bindings);
3002 }
3003 if ((parser->m_tagLevel == 0)
3004 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3005 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3006 parser->m_processor = epilogProcessor;
3007 else
3008 return epilogProcessor(parser, next, end, nextPtr);
3009 }
3010 break;
3011 case XML_TOK_END_TAG:
3012 if (parser->m_tagLevel == startTagLevel)
3013 return XML_ERROR_ASYNC_ENTITY;
3014 else {
3015 int len;
3016 const char *rawName;
3017 TAG *tag = parser->m_tagStack;
3018 parser->m_tagStack = tag->parent;
3019 tag->parent = parser->m_freeTagList;
3020 parser->m_freeTagList = tag;
3021 rawName = s + enc->minBytesPerChar * 2;
3022 len = XmlNameLength(enc, rawName);
3023 if (len != tag->rawNameLength
3024 || memcmp(tag->rawName, rawName, len) != 0) {
3025 *eventPP = rawName;
3026 return XML_ERROR_TAG_MISMATCH;
3027 }
3028 --parser->m_tagLevel;
3029 if (parser->m_endElementHandler) {
3030 const XML_Char *localPart;
3031 const XML_Char *prefix;
3032 XML_Char *uri;
3033 localPart = tag->name.localPart;
3034 if (parser->m_ns && localPart) {
3035 /* localPart and prefix may have been overwritten in
3036 tag->name.str, since this points to the binding->uri
3037 buffer which gets re-used; so we have to add them again
3038 */
3039 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3040 /* don't need to check for space - already done in storeAtts() */
3041 while (*localPart)
3042 *uri++ = *localPart++;
3043 prefix = (XML_Char *)tag->name.prefix;
3044 if (parser->m_ns_triplets && prefix) {
3045 *uri++ = parser->m_namespaceSeparator;
3046 while (*prefix)
3047 *uri++ = *prefix++;
3048 }
3049 *uri = XML_T('\0');
3050 }
3051 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3052 } else if (parser->m_defaultHandler)
3053 reportDefault(parser, enc, s, next);
3054 while (tag->bindings) {
3055 BINDING *b = tag->bindings;
3056 if (parser->m_endNamespaceDeclHandler)
3057 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3058 b->prefix->name);
3059 tag->bindings = tag->bindings->nextTagBinding;
3060 b->nextTagBinding = parser->m_freeBindingList;
3061 parser->m_freeBindingList = b;
3062 b->prefix->binding = b->prevPrefixBinding;
3063 }
3064 if ((parser->m_tagLevel == 0)
3065 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3066 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3067 parser->m_processor = epilogProcessor;
3068 else
3069 return epilogProcessor(parser, next, end, nextPtr);
3070 }
3071 }
3072 break;
3073 case XML_TOK_CHAR_REF: {
3074 int n = XmlCharRefNumber(enc, s);
3075 if (n < 0)
3076 return XML_ERROR_BAD_CHAR_REF;
3077 if (parser->m_characterDataHandler) {
3078 XML_Char buf[XML_ENCODE_MAX];
3079 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3080 XmlEncode(n, (ICHAR *)buf));
3081 } else if (parser->m_defaultHandler)
3082 reportDefault(parser, enc, s, next);
3083 } break;
3084 case XML_TOK_XML_DECL:
3085 return XML_ERROR_MISPLACED_XML_PI;
3086 case XML_TOK_DATA_NEWLINE:
3087 if (parser->m_characterDataHandler) {
3088 XML_Char c = 0xA;
3089 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3090 } else if (parser->m_defaultHandler)
3091 reportDefault(parser, enc, s, next);
3092 break;
3093 case XML_TOK_CDATA_SECT_OPEN: {
3094 enum XML_Error result;
3095 if (parser->m_startCdataSectionHandler)
3096 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3097 /* BEGIN disabled code */
3098 /* Suppose you doing a transformation on a document that involves
3099 changing only the character data. You set up a defaultHandler
3100 and a characterDataHandler. The defaultHandler simply copies
3101 characters through. The characterDataHandler does the
3102 transformation and writes the characters out escaping them as
3103 necessary. This case will fail to work if we leave out the
3104 following two lines (because & and < inside CDATA sections will
3105 be incorrectly escaped).
3106
3107 However, now we have a start/endCdataSectionHandler, so it seems
3108 easier to let the user deal with this.
3109 */
3110 else if (0 && parser->m_characterDataHandler)
3111 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3112 0);
3113 /* END disabled code */
3114 else if (parser->m_defaultHandler)
3115 reportDefault(parser, enc, s, next);
3116 result
3117 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3118 if (result != XML_ERROR_NONE)
3119 return result;
3120 else if (! next) {
3121 parser->m_processor = cdataSectionProcessor;
3122 return result;
3123 }
3124 } break;
3125 case XML_TOK_TRAILING_RSQB:
3126 if (haveMore) {
3127 *nextPtr = s;
3128 return XML_ERROR_NONE;
3129 }
3130 if (parser->m_characterDataHandler) {
3131 if (MUST_CONVERT(enc, s)) {
3132 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3133 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3134 parser->m_characterDataHandler(
3135 parser->m_handlerArg, parser->m_dataBuf,
3136 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3137 } else
3138 parser->m_characterDataHandler(
3139 parser->m_handlerArg, (XML_Char *)s,
3140 (int)((XML_Char *)end - (XML_Char *)s));
3141 } else if (parser->m_defaultHandler)
3142 reportDefault(parser, enc, s, end);
3143 /* We are at the end of the final buffer, should we check for
3144 XML_SUSPENDED, XML_FINISHED?
3145 */
3146 if (startTagLevel == 0) {
3147 *eventPP = end;
3148 return XML_ERROR_NO_ELEMENTS;
3149 }
3150 if (parser->m_tagLevel != startTagLevel) {
3151 *eventPP = end;
3152 return XML_ERROR_ASYNC_ENTITY;
3153 }
3154 *nextPtr = end;
3155 return XML_ERROR_NONE;
3156 case XML_TOK_DATA_CHARS: {
3157 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3158 if (charDataHandler) {
3159 if (MUST_CONVERT(enc, s)) {
3160 for (;;) {
3161 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3162 const enum XML_Convert_Result convert_res = XmlConvert(
3163 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3164 *eventEndPP = s;
3165 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3166 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3167 if ((convert_res == XML_CONVERT_COMPLETED)
3168 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3169 break;
3170 *eventPP = s;
3171 }
3172 } else
3173 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3174 (int)((XML_Char *)next - (XML_Char *)s));
3175 } else if (parser->m_defaultHandler)
3176 reportDefault(parser, enc, s, next);
3177 } break;
3178 case XML_TOK_PI:
3179 if (! reportProcessingInstruction(parser, enc, s, next))
3180 return XML_ERROR_NO_MEMORY;
3181 break;
3182 case XML_TOK_COMMENT:
3183 if (! reportComment(parser, enc, s, next))
3184 return XML_ERROR_NO_MEMORY;
3185 break;
3186 default:
3187 /* All of the tokens produced by XmlContentTok() have their own
3188 * explicit cases, so this default is not strictly necessary.
3189 * However it is a useful safety net, so we retain the code and
3190 * simply exclude it from the coverage tests.
3191 *
3192 * LCOV_EXCL_START
3193 */
3194 if (parser->m_defaultHandler)
3195 reportDefault(parser, enc, s, next);
3196 break;
3197 /* LCOV_EXCL_STOP */
3198 }
3199 *eventPP = s = next;
3200 switch (parser->m_parsingStatus.parsing) {
3201 case XML_SUSPENDED:
3202 *nextPtr = next;
3203 return XML_ERROR_NONE;
3204 case XML_FINISHED:
3205 return XML_ERROR_ABORTED;
3206 default:;
3207 }
3208 }
3209 /* not reached */
3210 }
3211
3212 /* This function does not call free() on the allocated memory, merely
3213 * moving it to the parser's m_freeBindingList where it can be freed or
3214 * reused as appropriate.
3215 */
3216 static void
freeBindings(XML_Parser parser,BINDING * bindings)3217 freeBindings(XML_Parser parser, BINDING *bindings) {
3218 while (bindings) {
3219 BINDING *b = bindings;
3220
3221 /* m_startNamespaceDeclHandler will have been called for this
3222 * binding in addBindings(), so call the end handler now.
3223 */
3224 if (parser->m_endNamespaceDeclHandler)
3225 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3226
3227 bindings = bindings->nextTagBinding;
3228 b->nextTagBinding = parser->m_freeBindingList;
3229 parser->m_freeBindingList = b;
3230 b->prefix->binding = b->prevPrefixBinding;
3231 }
3232 }
3233
3234 /* Precondition: all arguments must be non-NULL;
3235 Purpose:
3236 - normalize attributes
3237 - check attributes for well-formedness
3238 - generate namespace aware attribute names (URI, prefix)
3239 - build list of attributes for startElementHandler
3240 - default attributes
3241 - process namespace declarations (check and report them)
3242 - generate namespace aware element name (URI, prefix)
3243 */
3244 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3245 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3246 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3247 enum XML_Account account) {
3248 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3249 ELEMENT_TYPE *elementType;
3250 int nDefaultAtts;
3251 const XML_Char **appAtts; /* the attribute list for the application */
3252 int attIndex = 0;
3253 int prefixLen;
3254 int i;
3255 int n;
3256 XML_Char *uri;
3257 int nPrefixes = 0;
3258 BINDING *binding;
3259 const XML_Char *localPart;
3260
3261 /* lookup the element type name */
3262 elementType
3263 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3264 if (! elementType) {
3265 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3266 if (! name)
3267 return XML_ERROR_NO_MEMORY;
3268 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3269 sizeof(ELEMENT_TYPE));
3270 if (! elementType)
3271 return XML_ERROR_NO_MEMORY;
3272 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3273 return XML_ERROR_NO_MEMORY;
3274 }
3275 nDefaultAtts = elementType->nDefaultAtts;
3276
3277 /* get the attributes from the tokenizer */
3278 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3279
3280 /* Detect and prevent integer overflow */
3281 if (n > INT_MAX - nDefaultAtts) {
3282 return XML_ERROR_NO_MEMORY;
3283 }
3284
3285 if (n + nDefaultAtts > parser->m_attsSize) {
3286 int oldAttsSize = parser->m_attsSize;
3287 ATTRIBUTE *temp;
3288 #ifdef XML_ATTR_INFO
3289 XML_AttrInfo *temp2;
3290 #endif
3291
3292 /* Detect and prevent integer overflow */
3293 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3294 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3295 return XML_ERROR_NO_MEMORY;
3296 }
3297
3298 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3299
3300 /* Detect and prevent integer overflow.
3301 * The preprocessor guard addresses the "always false" warning
3302 * from -Wtype-limits on platforms where
3303 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3304 #if UINT_MAX >= SIZE_MAX
3305 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3306 parser->m_attsSize = oldAttsSize;
3307 return XML_ERROR_NO_MEMORY;
3308 }
3309 #endif
3310
3311 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3312 parser->m_attsSize * sizeof(ATTRIBUTE));
3313 if (temp == NULL) {
3314 parser->m_attsSize = oldAttsSize;
3315 return XML_ERROR_NO_MEMORY;
3316 }
3317 parser->m_atts = temp;
3318 #ifdef XML_ATTR_INFO
3319 /* Detect and prevent integer overflow.
3320 * The preprocessor guard addresses the "always false" warning
3321 * from -Wtype-limits on platforms where
3322 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3323 # if UINT_MAX >= SIZE_MAX
3324 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3325 parser->m_attsSize = oldAttsSize;
3326 return XML_ERROR_NO_MEMORY;
3327 }
3328 # endif
3329
3330 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3331 parser->m_attsSize * sizeof(XML_AttrInfo));
3332 if (temp2 == NULL) {
3333 parser->m_attsSize = oldAttsSize;
3334 return XML_ERROR_NO_MEMORY;
3335 }
3336 parser->m_attInfo = temp2;
3337 #endif
3338 if (n > oldAttsSize)
3339 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3340 }
3341
3342 appAtts = (const XML_Char **)parser->m_atts;
3343 for (i = 0; i < n; i++) {
3344 ATTRIBUTE *currAtt = &parser->m_atts[i];
3345 #ifdef XML_ATTR_INFO
3346 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3347 #endif
3348 /* add the name and value to the attribute list */
3349 ATTRIBUTE_ID *attId
3350 = getAttributeId(parser, enc, currAtt->name,
3351 currAtt->name + XmlNameLength(enc, currAtt->name));
3352 if (! attId)
3353 return XML_ERROR_NO_MEMORY;
3354 #ifdef XML_ATTR_INFO
3355 currAttInfo->nameStart
3356 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3357 currAttInfo->nameEnd
3358 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3359 currAttInfo->valueStart = parser->m_parseEndByteIndex
3360 - (parser->m_parseEndPtr - currAtt->valuePtr);
3361 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3362 - (parser->m_parseEndPtr - currAtt->valueEnd);
3363 #endif
3364 /* Detect duplicate attributes by their QNames. This does not work when
3365 namespace processing is turned on and different prefixes for the same
3366 namespace are used. For this case we have a check further down.
3367 */
3368 if ((attId->name)[-1]) {
3369 if (enc == parser->m_encoding)
3370 parser->m_eventPtr = parser->m_atts[i].name;
3371 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3372 }
3373 (attId->name)[-1] = 1;
3374 appAtts[attIndex++] = attId->name;
3375 if (! parser->m_atts[i].normalized) {
3376 enum XML_Error result;
3377 XML_Bool isCdata = XML_TRUE;
3378
3379 /* figure out whether declared as other than CDATA */
3380 if (attId->maybeTokenized) {
3381 int j;
3382 for (j = 0; j < nDefaultAtts; j++) {
3383 if (attId == elementType->defaultAtts[j].id) {
3384 isCdata = elementType->defaultAtts[j].isCdata;
3385 break;
3386 }
3387 }
3388 }
3389
3390 /* normalize the attribute value */
3391 result = storeAttributeValue(
3392 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3393 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3394 if (result)
3395 return result;
3396 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3397 poolFinish(&parser->m_tempPool);
3398 } else {
3399 /* the value did not need normalizing */
3400 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3401 parser->m_atts[i].valuePtr,
3402 parser->m_atts[i].valueEnd);
3403 if (appAtts[attIndex] == 0)
3404 return XML_ERROR_NO_MEMORY;
3405 poolFinish(&parser->m_tempPool);
3406 }
3407 /* handle prefixed attribute names */
3408 if (attId->prefix) {
3409 if (attId->xmlns) {
3410 /* deal with namespace declarations here */
3411 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3412 appAtts[attIndex], bindingsPtr);
3413 if (result)
3414 return result;
3415 --attIndex;
3416 } else {
3417 /* deal with other prefixed names later */
3418 attIndex++;
3419 nPrefixes++;
3420 (attId->name)[-1] = 2;
3421 }
3422 } else
3423 attIndex++;
3424 }
3425
3426 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3427 parser->m_nSpecifiedAtts = attIndex;
3428 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3429 for (i = 0; i < attIndex; i += 2)
3430 if (appAtts[i] == elementType->idAtt->name) {
3431 parser->m_idAttIndex = i;
3432 break;
3433 }
3434 } else
3435 parser->m_idAttIndex = -1;
3436
3437 /* do attribute defaulting */
3438 for (i = 0; i < nDefaultAtts; i++) {
3439 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3440 if (! (da->id->name)[-1] && da->value) {
3441 if (da->id->prefix) {
3442 if (da->id->xmlns) {
3443 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3444 da->value, bindingsPtr);
3445 if (result)
3446 return result;
3447 } else {
3448 (da->id->name)[-1] = 2;
3449 nPrefixes++;
3450 appAtts[attIndex++] = da->id->name;
3451 appAtts[attIndex++] = da->value;
3452 }
3453 } else {
3454 (da->id->name)[-1] = 1;
3455 appAtts[attIndex++] = da->id->name;
3456 appAtts[attIndex++] = da->value;
3457 }
3458 }
3459 }
3460 appAtts[attIndex] = 0;
3461
3462 /* expand prefixed attribute names, check for duplicates,
3463 and clear flags that say whether attributes were specified */
3464 i = 0;
3465 if (nPrefixes) {
3466 int j; /* hash table index */
3467 unsigned long version = parser->m_nsAttsVersion;
3468
3469 /* Detect and prevent invalid shift */
3470 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3471 return XML_ERROR_NO_MEMORY;
3472 }
3473
3474 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3475 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3476 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3477 if ((nPrefixes << 1)
3478 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3479 NS_ATT *temp;
3480 /* hash table size must also be a power of 2 and >= 8 */
3481 while (nPrefixes >> parser->m_nsAttsPower++)
3482 ;
3483 if (parser->m_nsAttsPower < 3)
3484 parser->m_nsAttsPower = 3;
3485
3486 /* Detect and prevent invalid shift */
3487 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3488 /* Restore actual size of memory in m_nsAtts */
3489 parser->m_nsAttsPower = oldNsAttsPower;
3490 return XML_ERROR_NO_MEMORY;
3491 }
3492
3493 nsAttsSize = 1u << parser->m_nsAttsPower;
3494
3495 /* Detect and prevent integer overflow.
3496 * The preprocessor guard addresses the "always false" warning
3497 * from -Wtype-limits on platforms where
3498 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3499 #if UINT_MAX >= SIZE_MAX
3500 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3501 /* Restore actual size of memory in m_nsAtts */
3502 parser->m_nsAttsPower = oldNsAttsPower;
3503 return XML_ERROR_NO_MEMORY;
3504 }
3505 #endif
3506
3507 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3508 nsAttsSize * sizeof(NS_ATT));
3509 if (! temp) {
3510 /* Restore actual size of memory in m_nsAtts */
3511 parser->m_nsAttsPower = oldNsAttsPower;
3512 return XML_ERROR_NO_MEMORY;
3513 }
3514 parser->m_nsAtts = temp;
3515 version = 0; /* force re-initialization of m_nsAtts hash table */
3516 }
3517 /* using a version flag saves us from initializing m_nsAtts every time */
3518 if (! version) { /* initialize version flags when version wraps around */
3519 version = INIT_ATTS_VERSION;
3520 for (j = nsAttsSize; j != 0;)
3521 parser->m_nsAtts[--j].version = version;
3522 }
3523 parser->m_nsAttsVersion = --version;
3524
3525 /* expand prefixed names and check for duplicates */
3526 for (; i < attIndex; i += 2) {
3527 const XML_Char *s = appAtts[i];
3528 if (s[-1] == 2) { /* prefixed */
3529 ATTRIBUTE_ID *id;
3530 const BINDING *b;
3531 unsigned long uriHash;
3532 struct siphash sip_state;
3533 struct sipkey sip_key;
3534
3535 copy_salt_to_sipkey(parser, &sip_key);
3536 sip24_init(&sip_state, &sip_key);
3537
3538 ((XML_Char *)s)[-1] = 0; /* clear flag */
3539 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3540 if (! id || ! id->prefix) {
3541 /* This code is walking through the appAtts array, dealing
3542 * with (in this case) a prefixed attribute name. To be in
3543 * the array, the attribute must have already been bound, so
3544 * has to have passed through the hash table lookup once
3545 * already. That implies that an entry for it already
3546 * exists, so the lookup above will return a pointer to
3547 * already allocated memory. There is no opportunaity for
3548 * the allocator to fail, so the condition above cannot be
3549 * fulfilled.
3550 *
3551 * Since it is difficult to be certain that the above
3552 * analysis is complete, we retain the test and merely
3553 * remove the code from coverage tests.
3554 */
3555 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3556 }
3557 b = id->prefix->binding;
3558 if (! b)
3559 return XML_ERROR_UNBOUND_PREFIX;
3560
3561 for (j = 0; j < b->uriLen; j++) {
3562 const XML_Char c = b->uri[j];
3563 if (! poolAppendChar(&parser->m_tempPool, c))
3564 return XML_ERROR_NO_MEMORY;
3565 }
3566
3567 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3568
3569 while (*s++ != XML_T(ASCII_COLON))
3570 ;
3571
3572 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3573
3574 do { /* copies null terminator */
3575 if (! poolAppendChar(&parser->m_tempPool, *s))
3576 return XML_ERROR_NO_MEMORY;
3577 } while (*s++);
3578
3579 uriHash = (unsigned long)sip24_final(&sip_state);
3580
3581 { /* Check hash table for duplicate of expanded name (uriName).
3582 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3583 */
3584 unsigned char step = 0;
3585 unsigned long mask = nsAttsSize - 1;
3586 j = uriHash & mask; /* index into hash table */
3587 while (parser->m_nsAtts[j].version == version) {
3588 /* for speed we compare stored hash values first */
3589 if (uriHash == parser->m_nsAtts[j].hash) {
3590 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3591 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3592 /* s1 is null terminated, but not s2 */
3593 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3594 ;
3595 if (*s1 == 0)
3596 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3597 }
3598 if (! step)
3599 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3600 j < step ? (j += nsAttsSize - step) : (j -= step);
3601 }
3602 }
3603
3604 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3605 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3606 s = b->prefix->name;
3607 do {
3608 if (! poolAppendChar(&parser->m_tempPool, *s))
3609 return XML_ERROR_NO_MEMORY;
3610 } while (*s++);
3611 }
3612
3613 /* store expanded name in attribute list */
3614 s = poolStart(&parser->m_tempPool);
3615 poolFinish(&parser->m_tempPool);
3616 appAtts[i] = s;
3617
3618 /* fill empty slot with new version, uriName and hash value */
3619 parser->m_nsAtts[j].version = version;
3620 parser->m_nsAtts[j].hash = uriHash;
3621 parser->m_nsAtts[j].uriName = s;
3622
3623 if (! --nPrefixes) {
3624 i += 2;
3625 break;
3626 }
3627 } else /* not prefixed */
3628 ((XML_Char *)s)[-1] = 0; /* clear flag */
3629 }
3630 }
3631 /* clear flags for the remaining attributes */
3632 for (; i < attIndex; i += 2)
3633 ((XML_Char *)(appAtts[i]))[-1] = 0;
3634 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3635 binding->attId->name[-1] = 0;
3636
3637 if (! parser->m_ns)
3638 return XML_ERROR_NONE;
3639
3640 /* expand the element type name */
3641 if (elementType->prefix) {
3642 binding = elementType->prefix->binding;
3643 if (! binding)
3644 return XML_ERROR_UNBOUND_PREFIX;
3645 localPart = tagNamePtr->str;
3646 while (*localPart++ != XML_T(ASCII_COLON))
3647 ;
3648 } else if (dtd->defaultPrefix.binding) {
3649 binding = dtd->defaultPrefix.binding;
3650 localPart = tagNamePtr->str;
3651 } else
3652 return XML_ERROR_NONE;
3653 prefixLen = 0;
3654 if (parser->m_ns_triplets && binding->prefix->name) {
3655 for (; binding->prefix->name[prefixLen++];)
3656 ; /* prefixLen includes null terminator */
3657 }
3658 tagNamePtr->localPart = localPart;
3659 tagNamePtr->uriLen = binding->uriLen;
3660 tagNamePtr->prefix = binding->prefix->name;
3661 tagNamePtr->prefixLen = prefixLen;
3662 for (i = 0; localPart[i++];)
3663 ; /* i includes null terminator */
3664
3665 /* Detect and prevent integer overflow */
3666 if (binding->uriLen > INT_MAX - prefixLen
3667 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3668 return XML_ERROR_NO_MEMORY;
3669 }
3670
3671 n = i + binding->uriLen + prefixLen;
3672 if (n > binding->uriAlloc) {
3673 TAG *p;
3674
3675 /* Detect and prevent integer overflow */
3676 if (n > INT_MAX - EXPAND_SPARE) {
3677 return XML_ERROR_NO_MEMORY;
3678 }
3679 /* Detect and prevent integer overflow.
3680 * The preprocessor guard addresses the "always false" warning
3681 * from -Wtype-limits on platforms where
3682 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3683 #if UINT_MAX >= SIZE_MAX
3684 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3685 return XML_ERROR_NO_MEMORY;
3686 }
3687 #endif
3688
3689 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3690 if (! uri)
3691 return XML_ERROR_NO_MEMORY;
3692 binding->uriAlloc = n + EXPAND_SPARE;
3693 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3694 for (p = parser->m_tagStack; p; p = p->parent)
3695 if (p->name.str == binding->uri)
3696 p->name.str = uri;
3697 FREE(parser, binding->uri);
3698 binding->uri = uri;
3699 }
3700 /* if m_namespaceSeparator != '\0' then uri includes it already */
3701 uri = binding->uri + binding->uriLen;
3702 memcpy(uri, localPart, i * sizeof(XML_Char));
3703 /* we always have a namespace separator between localPart and prefix */
3704 if (prefixLen) {
3705 uri += i - 1;
3706 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3707 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3708 }
3709 tagNamePtr->str = binding->uri;
3710 return XML_ERROR_NONE;
3711 }
3712
3713 /* addBinding() overwrites the value of prefix->binding without checking.
3714 Therefore one must keep track of the old value outside of addBinding().
3715 */
3716 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3717 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3718 const XML_Char *uri, BINDING **bindingsPtr) {
3719 static const XML_Char xmlNamespace[]
3720 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3721 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3722 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3723 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3724 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3725 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3726 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3727 ASCII_e, '\0'};
3728 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3729 static const XML_Char xmlnsNamespace[]
3730 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3731 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3732 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3733 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3734 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3735 static const int xmlnsLen
3736 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3737
3738 XML_Bool mustBeXML = XML_FALSE;
3739 XML_Bool isXML = XML_TRUE;
3740 XML_Bool isXMLNS = XML_TRUE;
3741
3742 BINDING *b;
3743 int len;
3744
3745 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3746 if (*uri == XML_T('\0') && prefix->name)
3747 return XML_ERROR_UNDECLARING_PREFIX;
3748
3749 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3750 && prefix->name[1] == XML_T(ASCII_m)
3751 && prefix->name[2] == XML_T(ASCII_l)) {
3752 /* Not allowed to bind xmlns */
3753 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3754 && prefix->name[5] == XML_T('\0'))
3755 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3756
3757 if (prefix->name[3] == XML_T('\0'))
3758 mustBeXML = XML_TRUE;
3759 }
3760
3761 for (len = 0; uri[len]; len++) {
3762 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3763 isXML = XML_FALSE;
3764
3765 if (! mustBeXML && isXMLNS
3766 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3767 isXMLNS = XML_FALSE;
3768
3769 // NOTE: While Expat does not validate namespace URIs against RFC 3986,
3770 // we have to at least make sure that the XML processor on top of
3771 // Expat (that is splitting tag names by namespace separator into
3772 // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
3773 // by an attacker putting additional namespace separator characters
3774 // into namespace declarations. That would be ambiguous and not to
3775 // be expected.
3776 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
3777 return XML_ERROR_SYNTAX;
3778 }
3779 }
3780 isXML = isXML && len == xmlLen;
3781 isXMLNS = isXMLNS && len == xmlnsLen;
3782
3783 if (mustBeXML != isXML)
3784 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3785 : XML_ERROR_RESERVED_NAMESPACE_URI;
3786
3787 if (isXMLNS)
3788 return XML_ERROR_RESERVED_NAMESPACE_URI;
3789
3790 if (parser->m_namespaceSeparator)
3791 len++;
3792 if (parser->m_freeBindingList) {
3793 b = parser->m_freeBindingList;
3794 if (len > b->uriAlloc) {
3795 /* Detect and prevent integer overflow */
3796 if (len > INT_MAX - EXPAND_SPARE) {
3797 return XML_ERROR_NO_MEMORY;
3798 }
3799
3800 /* Detect and prevent integer overflow.
3801 * The preprocessor guard addresses the "always false" warning
3802 * from -Wtype-limits on platforms where
3803 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3804 #if UINT_MAX >= SIZE_MAX
3805 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3806 return XML_ERROR_NO_MEMORY;
3807 }
3808 #endif
3809
3810 XML_Char *temp = (XML_Char *)REALLOC(
3811 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
3812 if (temp == NULL)
3813 return XML_ERROR_NO_MEMORY;
3814 b->uri = temp;
3815 b->uriAlloc = len + EXPAND_SPARE;
3816 }
3817 parser->m_freeBindingList = b->nextTagBinding;
3818 } else {
3819 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3820 if (! b)
3821 return XML_ERROR_NO_MEMORY;
3822
3823 /* Detect and prevent integer overflow */
3824 if (len > INT_MAX - EXPAND_SPARE) {
3825 return XML_ERROR_NO_MEMORY;
3826 }
3827 /* Detect and prevent integer overflow.
3828 * The preprocessor guard addresses the "always false" warning
3829 * from -Wtype-limits on platforms where
3830 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3831 #if UINT_MAX >= SIZE_MAX
3832 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3833 return XML_ERROR_NO_MEMORY;
3834 }
3835 #endif
3836
3837 b->uri
3838 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3839 if (! b->uri) {
3840 FREE(parser, b);
3841 return XML_ERROR_NO_MEMORY;
3842 }
3843 b->uriAlloc = len + EXPAND_SPARE;
3844 }
3845 b->uriLen = len;
3846 memcpy(b->uri, uri, len * sizeof(XML_Char));
3847 if (parser->m_namespaceSeparator)
3848 b->uri[len - 1] = parser->m_namespaceSeparator;
3849 b->prefix = prefix;
3850 b->attId = attId;
3851 b->prevPrefixBinding = prefix->binding;
3852 /* NULL binding when default namespace undeclared */
3853 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3854 prefix->binding = NULL;
3855 else
3856 prefix->binding = b;
3857 b->nextTagBinding = *bindingsPtr;
3858 *bindingsPtr = b;
3859 /* if attId == NULL then we are not starting a namespace scope */
3860 if (attId && parser->m_startNamespaceDeclHandler)
3861 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3862 prefix->binding ? uri : 0);
3863 return XML_ERROR_NONE;
3864 }
3865
3866 /* The idea here is to avoid using stack for each CDATA section when
3867 the whole file is parsed with one call.
3868 */
3869 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3870 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
3871 const char **endPtr) {
3872 enum XML_Error result = doCdataSection(
3873 parser, parser->m_encoding, &start, end, endPtr,
3874 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
3875 if (result != XML_ERROR_NONE)
3876 return result;
3877 if (start) {
3878 if (parser->m_parentParser) { /* we are parsing an external entity */
3879 parser->m_processor = externalEntityContentProcessor;
3880 return externalEntityContentProcessor(parser, start, end, endPtr);
3881 } else {
3882 parser->m_processor = contentProcessor;
3883 return contentProcessor(parser, start, end, endPtr);
3884 }
3885 }
3886 return result;
3887 }
3888
3889 /* startPtr gets set to non-null if the section is closed, and to null if
3890 the section is not yet closed.
3891 */
3892 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)3893 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
3894 const char *end, const char **nextPtr, XML_Bool haveMore,
3895 enum XML_Account account) {
3896 const char *s = *startPtr;
3897 const char **eventPP;
3898 const char **eventEndPP;
3899 if (enc == parser->m_encoding) {
3900 eventPP = &parser->m_eventPtr;
3901 *eventPP = s;
3902 eventEndPP = &parser->m_eventEndPtr;
3903 } else {
3904 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3905 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3906 }
3907 *eventPP = s;
3908 *startPtr = NULL;
3909
3910 for (;;) {
3911 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
3912 int tok = XmlCdataSectionTok(enc, s, end, &next);
3913 #ifdef XML_DTD
3914 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
3915 accountingOnAbort(parser);
3916 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3917 }
3918 #else
3919 UNUSED_P(account);
3920 #endif
3921 *eventEndPP = next;
3922 switch (tok) {
3923 case XML_TOK_CDATA_SECT_CLOSE:
3924 if (parser->m_endCdataSectionHandler)
3925 parser->m_endCdataSectionHandler(parser->m_handlerArg);
3926 /* BEGIN disabled code */
3927 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3928 else if (0 && parser->m_characterDataHandler)
3929 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3930 0);
3931 /* END disabled code */
3932 else if (parser->m_defaultHandler)
3933 reportDefault(parser, enc, s, next);
3934 *startPtr = next;
3935 *nextPtr = next;
3936 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3937 return XML_ERROR_ABORTED;
3938 else
3939 return XML_ERROR_NONE;
3940 case XML_TOK_DATA_NEWLINE:
3941 if (parser->m_characterDataHandler) {
3942 XML_Char c = 0xA;
3943 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3944 } else if (parser->m_defaultHandler)
3945 reportDefault(parser, enc, s, next);
3946 break;
3947 case XML_TOK_DATA_CHARS: {
3948 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3949 if (charDataHandler) {
3950 if (MUST_CONVERT(enc, s)) {
3951 for (;;) {
3952 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3953 const enum XML_Convert_Result convert_res = XmlConvert(
3954 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3955 *eventEndPP = next;
3956 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3957 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3958 if ((convert_res == XML_CONVERT_COMPLETED)
3959 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3960 break;
3961 *eventPP = s;
3962 }
3963 } else
3964 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3965 (int)((XML_Char *)next - (XML_Char *)s));
3966 } else if (parser->m_defaultHandler)
3967 reportDefault(parser, enc, s, next);
3968 } break;
3969 case XML_TOK_INVALID:
3970 *eventPP = next;
3971 return XML_ERROR_INVALID_TOKEN;
3972 case XML_TOK_PARTIAL_CHAR:
3973 if (haveMore) {
3974 *nextPtr = s;
3975 return XML_ERROR_NONE;
3976 }
3977 return XML_ERROR_PARTIAL_CHAR;
3978 case XML_TOK_PARTIAL:
3979 case XML_TOK_NONE:
3980 if (haveMore) {
3981 *nextPtr = s;
3982 return XML_ERROR_NONE;
3983 }
3984 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3985 default:
3986 /* Every token returned by XmlCdataSectionTok() has its own
3987 * explicit case, so this default case will never be executed.
3988 * We retain it as a safety net and exclude it from the coverage
3989 * statistics.
3990 *
3991 * LCOV_EXCL_START
3992 */
3993 *eventPP = next;
3994 return XML_ERROR_UNEXPECTED_STATE;
3995 /* LCOV_EXCL_STOP */
3996 }
3997
3998 *eventPP = s = next;
3999 switch (parser->m_parsingStatus.parsing) {
4000 case XML_SUSPENDED:
4001 *nextPtr = next;
4002 return XML_ERROR_NONE;
4003 case XML_FINISHED:
4004 return XML_ERROR_ABORTED;
4005 default:;
4006 }
4007 }
4008 /* not reached */
4009 }
4010
4011 #ifdef XML_DTD
4012
4013 /* The idea here is to avoid using stack for each IGNORE section when
4014 the whole file is parsed with one call.
4015 */
4016 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4017 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4018 const char **endPtr) {
4019 enum XML_Error result
4020 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4021 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4022 if (result != XML_ERROR_NONE)
4023 return result;
4024 if (start) {
4025 parser->m_processor = prologProcessor;
4026 return prologProcessor(parser, start, end, endPtr);
4027 }
4028 return result;
4029 }
4030
4031 /* startPtr gets set to non-null is the section is closed, and to null
4032 if the section is not yet closed.
4033 */
4034 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4035 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4036 const char *end, const char **nextPtr, XML_Bool haveMore) {
4037 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4038 int tok;
4039 const char *s = *startPtr;
4040 const char **eventPP;
4041 const char **eventEndPP;
4042 if (enc == parser->m_encoding) {
4043 eventPP = &parser->m_eventPtr;
4044 *eventPP = s;
4045 eventEndPP = &parser->m_eventEndPtr;
4046 } else {
4047 /* It's not entirely clear, but it seems the following two lines
4048 * of code cannot be executed. The only occasions on which 'enc'
4049 * is not 'encoding' are when this function is called
4050 * from the internal entity processing, and IGNORE sections are an
4051 * error in internal entities.
4052 *
4053 * Since it really isn't clear that this is true, we keep the code
4054 * and just remove it from our coverage tests.
4055 *
4056 * LCOV_EXCL_START
4057 */
4058 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4059 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4060 /* LCOV_EXCL_STOP */
4061 }
4062 *eventPP = s;
4063 *startPtr = NULL;
4064 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4065 # ifdef XML_DTD
4066 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4067 XML_ACCOUNT_DIRECT)) {
4068 accountingOnAbort(parser);
4069 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4070 }
4071 # endif
4072 *eventEndPP = next;
4073 switch (tok) {
4074 case XML_TOK_IGNORE_SECT:
4075 if (parser->m_defaultHandler)
4076 reportDefault(parser, enc, s, next);
4077 *startPtr = next;
4078 *nextPtr = next;
4079 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4080 return XML_ERROR_ABORTED;
4081 else
4082 return XML_ERROR_NONE;
4083 case XML_TOK_INVALID:
4084 *eventPP = next;
4085 return XML_ERROR_INVALID_TOKEN;
4086 case XML_TOK_PARTIAL_CHAR:
4087 if (haveMore) {
4088 *nextPtr = s;
4089 return XML_ERROR_NONE;
4090 }
4091 return XML_ERROR_PARTIAL_CHAR;
4092 case XML_TOK_PARTIAL:
4093 case XML_TOK_NONE:
4094 if (haveMore) {
4095 *nextPtr = s;
4096 return XML_ERROR_NONE;
4097 }
4098 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4099 default:
4100 /* All of the tokens that XmlIgnoreSectionTok() returns have
4101 * explicit cases to handle them, so this default case is never
4102 * executed. We keep it as a safety net anyway, and remove it
4103 * from our test coverage statistics.
4104 *
4105 * LCOV_EXCL_START
4106 */
4107 *eventPP = next;
4108 return XML_ERROR_UNEXPECTED_STATE;
4109 /* LCOV_EXCL_STOP */
4110 }
4111 /* not reached */
4112 }
4113
4114 #endif /* XML_DTD */
4115
4116 static enum XML_Error
initializeEncoding(XML_Parser parser)4117 initializeEncoding(XML_Parser parser) {
4118 const char *s;
4119 #ifdef XML_UNICODE
4120 char encodingBuf[128];
4121 /* See comments abount `protoclEncodingName` in parserInit() */
4122 if (! parser->m_protocolEncodingName)
4123 s = NULL;
4124 else {
4125 int i;
4126 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4127 if (i == sizeof(encodingBuf) - 1
4128 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4129 encodingBuf[0] = '\0';
4130 break;
4131 }
4132 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4133 }
4134 encodingBuf[i] = '\0';
4135 s = encodingBuf;
4136 }
4137 #else
4138 s = parser->m_protocolEncodingName;
4139 #endif
4140 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4141 &parser->m_initEncoding, &parser->m_encoding, s))
4142 return XML_ERROR_NONE;
4143 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4144 }
4145
4146 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4147 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4148 const char *next) {
4149 const char *encodingName = NULL;
4150 const XML_Char *storedEncName = NULL;
4151 const ENCODING *newEncoding = NULL;
4152 const char *version = NULL;
4153 const char *versionend;
4154 const XML_Char *storedversion = NULL;
4155 int standalone = -1;
4156
4157 #ifdef XML_DTD
4158 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4159 XML_ACCOUNT_DIRECT)) {
4160 accountingOnAbort(parser);
4161 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4162 }
4163 #endif
4164
4165 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4166 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4167 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4168 if (isGeneralTextEntity)
4169 return XML_ERROR_TEXT_DECL;
4170 else
4171 return XML_ERROR_XML_DECL;
4172 }
4173 if (! isGeneralTextEntity && standalone == 1) {
4174 parser->m_dtd->standalone = XML_TRUE;
4175 #ifdef XML_DTD
4176 if (parser->m_paramEntityParsing
4177 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4178 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4179 #endif /* XML_DTD */
4180 }
4181 if (parser->m_xmlDeclHandler) {
4182 if (encodingName != NULL) {
4183 storedEncName = poolStoreString(
4184 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4185 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4186 if (! storedEncName)
4187 return XML_ERROR_NO_MEMORY;
4188 poolFinish(&parser->m_temp2Pool);
4189 }
4190 if (version) {
4191 storedversion
4192 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4193 versionend - parser->m_encoding->minBytesPerChar);
4194 if (! storedversion)
4195 return XML_ERROR_NO_MEMORY;
4196 }
4197 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4198 standalone);
4199 } else if (parser->m_defaultHandler)
4200 reportDefault(parser, parser->m_encoding, s, next);
4201 if (parser->m_protocolEncodingName == NULL) {
4202 if (newEncoding) {
4203 /* Check that the specified encoding does not conflict with what
4204 * the parser has already deduced. Do we have the same number
4205 * of bytes in the smallest representation of a character? If
4206 * this is UTF-16, is it the same endianness?
4207 */
4208 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4209 || (newEncoding->minBytesPerChar == 2
4210 && newEncoding != parser->m_encoding)) {
4211 parser->m_eventPtr = encodingName;
4212 return XML_ERROR_INCORRECT_ENCODING;
4213 }
4214 parser->m_encoding = newEncoding;
4215 } else if (encodingName) {
4216 enum XML_Error result;
4217 if (! storedEncName) {
4218 storedEncName = poolStoreString(
4219 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4220 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4221 if (! storedEncName)
4222 return XML_ERROR_NO_MEMORY;
4223 }
4224 result = handleUnknownEncoding(parser, storedEncName);
4225 poolClear(&parser->m_temp2Pool);
4226 if (result == XML_ERROR_UNKNOWN_ENCODING)
4227 parser->m_eventPtr = encodingName;
4228 return result;
4229 }
4230 }
4231
4232 if (storedEncName || storedversion)
4233 poolClear(&parser->m_temp2Pool);
4234
4235 return XML_ERROR_NONE;
4236 }
4237
4238 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4239 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4240 if (parser->m_unknownEncodingHandler) {
4241 XML_Encoding info;
4242 int i;
4243 for (i = 0; i < 256; i++)
4244 info.map[i] = -1;
4245 info.convert = NULL;
4246 info.data = NULL;
4247 info.release = NULL;
4248 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4249 encodingName, &info)) {
4250 ENCODING *enc;
4251 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4252 if (! parser->m_unknownEncodingMem) {
4253 if (info.release)
4254 info.release(info.data);
4255 return XML_ERROR_NO_MEMORY;
4256 }
4257 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4258 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4259 if (enc) {
4260 parser->m_unknownEncodingData = info.data;
4261 parser->m_unknownEncodingRelease = info.release;
4262 parser->m_encoding = enc;
4263 return XML_ERROR_NONE;
4264 }
4265 }
4266 if (info.release != NULL)
4267 info.release(info.data);
4268 }
4269 return XML_ERROR_UNKNOWN_ENCODING;
4270 }
4271
4272 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4273 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4274 const char **nextPtr) {
4275 enum XML_Error result = initializeEncoding(parser);
4276 if (result != XML_ERROR_NONE)
4277 return result;
4278 parser->m_processor = prologProcessor;
4279 return prologProcessor(parser, s, end, nextPtr);
4280 }
4281
4282 #ifdef XML_DTD
4283
4284 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4285 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4286 const char **nextPtr) {
4287 enum XML_Error result = initializeEncoding(parser);
4288 if (result != XML_ERROR_NONE)
4289 return result;
4290
4291 /* we know now that XML_Parse(Buffer) has been called,
4292 so we consider the external parameter entity read */
4293 parser->m_dtd->paramEntityRead = XML_TRUE;
4294
4295 if (parser->m_prologState.inEntityValue) {
4296 parser->m_processor = entityValueInitProcessor;
4297 return entityValueInitProcessor(parser, s, end, nextPtr);
4298 } else {
4299 parser->m_processor = externalParEntProcessor;
4300 return externalParEntProcessor(parser, s, end, nextPtr);
4301 }
4302 }
4303
4304 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4305 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4306 const char **nextPtr) {
4307 int tok;
4308 const char *start = s;
4309 const char *next = start;
4310 parser->m_eventPtr = start;
4311
4312 for (;;) {
4313 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4314 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4315 - storeEntityValue
4316 - processXmlDecl
4317 */
4318 parser->m_eventEndPtr = next;
4319 if (tok <= 0) {
4320 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4321 *nextPtr = s;
4322 return XML_ERROR_NONE;
4323 }
4324 switch (tok) {
4325 case XML_TOK_INVALID:
4326 return XML_ERROR_INVALID_TOKEN;
4327 case XML_TOK_PARTIAL:
4328 return XML_ERROR_UNCLOSED_TOKEN;
4329 case XML_TOK_PARTIAL_CHAR:
4330 return XML_ERROR_PARTIAL_CHAR;
4331 case XML_TOK_NONE: /* start == end */
4332 default:
4333 break;
4334 }
4335 /* found end of entity value - can store it now */
4336 return storeEntityValue(parser, parser->m_encoding, s, end,
4337 XML_ACCOUNT_DIRECT);
4338 } else if (tok == XML_TOK_XML_DECL) {
4339 enum XML_Error result;
4340 result = processXmlDecl(parser, 0, start, next);
4341 if (result != XML_ERROR_NONE)
4342 return result;
4343 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4344 * that to happen, a parameter entity parsing handler must have attempted
4345 * to suspend the parser, which fails and raises an error. The parser can
4346 * be aborted, but can't be suspended.
4347 */
4348 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4349 return XML_ERROR_ABORTED;
4350 *nextPtr = next;
4351 /* stop scanning for text declaration - we found one */
4352 parser->m_processor = entityValueProcessor;
4353 return entityValueProcessor(parser, next, end, nextPtr);
4354 }
4355 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4356 return XML_TOK_NONE on the next call, which would then cause the
4357 function to exit with *nextPtr set to s - that is what we want for other
4358 tokens, but not for the BOM - we would rather like to skip it;
4359 then, when this routine is entered the next time, XmlPrologTok will
4360 return XML_TOK_INVALID, since the BOM is still in the buffer
4361 */
4362 else if (tok == XML_TOK_BOM && next == end
4363 && ! parser->m_parsingStatus.finalBuffer) {
4364 # ifdef XML_DTD
4365 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4366 XML_ACCOUNT_DIRECT)) {
4367 accountingOnAbort(parser);
4368 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4369 }
4370 # endif
4371
4372 *nextPtr = next;
4373 return XML_ERROR_NONE;
4374 }
4375 /* If we get this token, we have the start of what might be a
4376 normal tag, but not a declaration (i.e. it doesn't begin with
4377 "<!"). In a DTD context, that isn't legal.
4378 */
4379 else if (tok == XML_TOK_INSTANCE_START) {
4380 *nextPtr = next;
4381 return XML_ERROR_SYNTAX;
4382 }
4383 start = next;
4384 parser->m_eventPtr = start;
4385 }
4386 }
4387
4388 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4389 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4390 const char **nextPtr) {
4391 const char *next = s;
4392 int tok;
4393
4394 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4395 if (tok <= 0) {
4396 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4397 *nextPtr = s;
4398 return XML_ERROR_NONE;
4399 }
4400 switch (tok) {
4401 case XML_TOK_INVALID:
4402 return XML_ERROR_INVALID_TOKEN;
4403 case XML_TOK_PARTIAL:
4404 return XML_ERROR_UNCLOSED_TOKEN;
4405 case XML_TOK_PARTIAL_CHAR:
4406 return XML_ERROR_PARTIAL_CHAR;
4407 case XML_TOK_NONE: /* start == end */
4408 default:
4409 break;
4410 }
4411 }
4412 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4413 However, when parsing an external subset, doProlog will not accept a BOM
4414 as valid, and report a syntax error, so we have to skip the BOM, and
4415 account for the BOM bytes.
4416 */
4417 else if (tok == XML_TOK_BOM) {
4418 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4419 XML_ACCOUNT_DIRECT)) {
4420 accountingOnAbort(parser);
4421 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4422 }
4423
4424 s = next;
4425 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4426 }
4427
4428 parser->m_processor = prologProcessor;
4429 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4430 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4431 XML_ACCOUNT_DIRECT);
4432 }
4433
4434 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4435 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4436 const char **nextPtr) {
4437 const char *start = s;
4438 const char *next = s;
4439 const ENCODING *enc = parser->m_encoding;
4440 int tok;
4441
4442 for (;;) {
4443 tok = XmlPrologTok(enc, start, end, &next);
4444 /* Note: These bytes are accounted later in:
4445 - storeEntityValue
4446 */
4447 if (tok <= 0) {
4448 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4449 *nextPtr = s;
4450 return XML_ERROR_NONE;
4451 }
4452 switch (tok) {
4453 case XML_TOK_INVALID:
4454 return XML_ERROR_INVALID_TOKEN;
4455 case XML_TOK_PARTIAL:
4456 return XML_ERROR_UNCLOSED_TOKEN;
4457 case XML_TOK_PARTIAL_CHAR:
4458 return XML_ERROR_PARTIAL_CHAR;
4459 case XML_TOK_NONE: /* start == end */
4460 default:
4461 break;
4462 }
4463 /* found end of entity value - can store it now */
4464 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4465 }
4466 start = next;
4467 }
4468 }
4469
4470 #endif /* XML_DTD */
4471
4472 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4473 prologProcessor(XML_Parser parser, const char *s, const char *end,
4474 const char **nextPtr) {
4475 const char *next = s;
4476 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4477 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4478 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4479 XML_ACCOUNT_DIRECT);
4480 }
4481
4482 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4483 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4484 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4485 XML_Bool allowClosingDoctype, enum XML_Account account) {
4486 #ifdef XML_DTD
4487 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4488 #endif /* XML_DTD */
4489 static const XML_Char atypeCDATA[]
4490 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4491 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4492 static const XML_Char atypeIDREF[]
4493 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4494 static const XML_Char atypeIDREFS[]
4495 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4496 static const XML_Char atypeENTITY[]
4497 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4498 static const XML_Char atypeENTITIES[]
4499 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4500 ASCII_I, ASCII_E, ASCII_S, '\0'};
4501 static const XML_Char atypeNMTOKEN[]
4502 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4503 static const XML_Char atypeNMTOKENS[]
4504 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4505 ASCII_E, ASCII_N, ASCII_S, '\0'};
4506 static const XML_Char notationPrefix[]
4507 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4508 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4509 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4510 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4511
4512 #ifndef XML_DTD
4513 UNUSED_P(account);
4514 #endif
4515
4516 /* save one level of indirection */
4517 DTD *const dtd = parser->m_dtd;
4518
4519 const char **eventPP;
4520 const char **eventEndPP;
4521 enum XML_Content_Quant quant;
4522
4523 if (enc == parser->m_encoding) {
4524 eventPP = &parser->m_eventPtr;
4525 eventEndPP = &parser->m_eventEndPtr;
4526 } else {
4527 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4528 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4529 }
4530
4531 for (;;) {
4532 int role;
4533 XML_Bool handleDefault = XML_TRUE;
4534 *eventPP = s;
4535 *eventEndPP = next;
4536 if (tok <= 0) {
4537 if (haveMore && tok != XML_TOK_INVALID) {
4538 *nextPtr = s;
4539 return XML_ERROR_NONE;
4540 }
4541 switch (tok) {
4542 case XML_TOK_INVALID:
4543 *eventPP = next;
4544 return XML_ERROR_INVALID_TOKEN;
4545 case XML_TOK_PARTIAL:
4546 return XML_ERROR_UNCLOSED_TOKEN;
4547 case XML_TOK_PARTIAL_CHAR:
4548 return XML_ERROR_PARTIAL_CHAR;
4549 case -XML_TOK_PROLOG_S:
4550 tok = -tok;
4551 break;
4552 case XML_TOK_NONE:
4553 #ifdef XML_DTD
4554 /* for internal PE NOT referenced between declarations */
4555 if (enc != parser->m_encoding
4556 && ! parser->m_openInternalEntities->betweenDecl) {
4557 *nextPtr = s;
4558 return XML_ERROR_NONE;
4559 }
4560 /* WFC: PE Between Declarations - must check that PE contains
4561 complete markup, not only for external PEs, but also for
4562 internal PEs if the reference occurs between declarations.
4563 */
4564 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4565 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4566 == XML_ROLE_ERROR)
4567 return XML_ERROR_INCOMPLETE_PE;
4568 *nextPtr = s;
4569 return XML_ERROR_NONE;
4570 }
4571 #endif /* XML_DTD */
4572 return XML_ERROR_NO_ELEMENTS;
4573 default:
4574 tok = -tok;
4575 next = end;
4576 break;
4577 }
4578 }
4579 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4580 #ifdef XML_DTD
4581 switch (role) {
4582 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4583 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4584 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4585 break;
4586 default:
4587 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4588 accountingOnAbort(parser);
4589 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4590 }
4591 }
4592 #endif
4593 switch (role) {
4594 case XML_ROLE_XML_DECL: {
4595 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4596 if (result != XML_ERROR_NONE)
4597 return result;
4598 enc = parser->m_encoding;
4599 handleDefault = XML_FALSE;
4600 } break;
4601 case XML_ROLE_DOCTYPE_NAME:
4602 if (parser->m_startDoctypeDeclHandler) {
4603 parser->m_doctypeName
4604 = poolStoreString(&parser->m_tempPool, enc, s, next);
4605 if (! parser->m_doctypeName)
4606 return XML_ERROR_NO_MEMORY;
4607 poolFinish(&parser->m_tempPool);
4608 parser->m_doctypePubid = NULL;
4609 handleDefault = XML_FALSE;
4610 }
4611 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4612 break;
4613 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4614 if (parser->m_startDoctypeDeclHandler) {
4615 parser->m_startDoctypeDeclHandler(
4616 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4617 parser->m_doctypePubid, 1);
4618 parser->m_doctypeName = NULL;
4619 poolClear(&parser->m_tempPool);
4620 handleDefault = XML_FALSE;
4621 }
4622 break;
4623 #ifdef XML_DTD
4624 case XML_ROLE_TEXT_DECL: {
4625 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4626 if (result != XML_ERROR_NONE)
4627 return result;
4628 enc = parser->m_encoding;
4629 handleDefault = XML_FALSE;
4630 } break;
4631 #endif /* XML_DTD */
4632 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4633 #ifdef XML_DTD
4634 parser->m_useForeignDTD = XML_FALSE;
4635 parser->m_declEntity = (ENTITY *)lookup(
4636 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4637 if (! parser->m_declEntity)
4638 return XML_ERROR_NO_MEMORY;
4639 #endif /* XML_DTD */
4640 dtd->hasParamEntityRefs = XML_TRUE;
4641 if (parser->m_startDoctypeDeclHandler) {
4642 XML_Char *pubId;
4643 if (! XmlIsPublicId(enc, s, next, eventPP))
4644 return XML_ERROR_PUBLICID;
4645 pubId = poolStoreString(&parser->m_tempPool, enc,
4646 s + enc->minBytesPerChar,
4647 next - enc->minBytesPerChar);
4648 if (! pubId)
4649 return XML_ERROR_NO_MEMORY;
4650 normalizePublicId(pubId);
4651 poolFinish(&parser->m_tempPool);
4652 parser->m_doctypePubid = pubId;
4653 handleDefault = XML_FALSE;
4654 goto alreadyChecked;
4655 }
4656 /* fall through */
4657 case XML_ROLE_ENTITY_PUBLIC_ID:
4658 if (! XmlIsPublicId(enc, s, next, eventPP))
4659 return XML_ERROR_PUBLICID;
4660 alreadyChecked:
4661 if (dtd->keepProcessing && parser->m_declEntity) {
4662 XML_Char *tem
4663 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4664 next - enc->minBytesPerChar);
4665 if (! tem)
4666 return XML_ERROR_NO_MEMORY;
4667 normalizePublicId(tem);
4668 parser->m_declEntity->publicId = tem;
4669 poolFinish(&dtd->pool);
4670 /* Don't suppress the default handler if we fell through from
4671 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4672 */
4673 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4674 handleDefault = XML_FALSE;
4675 }
4676 break;
4677 case XML_ROLE_DOCTYPE_CLOSE:
4678 if (allowClosingDoctype != XML_TRUE) {
4679 /* Must not close doctype from within expanded parameter entities */
4680 return XML_ERROR_INVALID_TOKEN;
4681 }
4682
4683 if (parser->m_doctypeName) {
4684 parser->m_startDoctypeDeclHandler(
4685 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4686 parser->m_doctypePubid, 0);
4687 poolClear(&parser->m_tempPool);
4688 handleDefault = XML_FALSE;
4689 }
4690 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4691 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4692 was not set, indicating an external subset
4693 */
4694 #ifdef XML_DTD
4695 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4696 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4697 dtd->hasParamEntityRefs = XML_TRUE;
4698 if (parser->m_paramEntityParsing
4699 && parser->m_externalEntityRefHandler) {
4700 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4701 externalSubsetName, sizeof(ENTITY));
4702 if (! entity) {
4703 /* The external subset name "#" will have already been
4704 * inserted into the hash table at the start of the
4705 * external entity parsing, so no allocation will happen
4706 * and lookup() cannot fail.
4707 */
4708 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4709 }
4710 if (parser->m_useForeignDTD)
4711 entity->base = parser->m_curBase;
4712 dtd->paramEntityRead = XML_FALSE;
4713 if (! parser->m_externalEntityRefHandler(
4714 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4715 entity->systemId, entity->publicId))
4716 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4717 if (dtd->paramEntityRead) {
4718 if (! dtd->standalone && parser->m_notStandaloneHandler
4719 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4720 return XML_ERROR_NOT_STANDALONE;
4721 }
4722 /* if we didn't read the foreign DTD then this means that there
4723 is no external subset and we must reset dtd->hasParamEntityRefs
4724 */
4725 else if (! parser->m_doctypeSysid)
4726 dtd->hasParamEntityRefs = hadParamEntityRefs;
4727 /* end of DTD - no need to update dtd->keepProcessing */
4728 }
4729 parser->m_useForeignDTD = XML_FALSE;
4730 }
4731 #endif /* XML_DTD */
4732 if (parser->m_endDoctypeDeclHandler) {
4733 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4734 handleDefault = XML_FALSE;
4735 }
4736 break;
4737 case XML_ROLE_INSTANCE_START:
4738 #ifdef XML_DTD
4739 /* if there is no DOCTYPE declaration then now is the
4740 last chance to read the foreign DTD
4741 */
4742 if (parser->m_useForeignDTD) {
4743 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4744 dtd->hasParamEntityRefs = XML_TRUE;
4745 if (parser->m_paramEntityParsing
4746 && parser->m_externalEntityRefHandler) {
4747 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4748 externalSubsetName, sizeof(ENTITY));
4749 if (! entity)
4750 return XML_ERROR_NO_MEMORY;
4751 entity->base = parser->m_curBase;
4752 dtd->paramEntityRead = XML_FALSE;
4753 if (! parser->m_externalEntityRefHandler(
4754 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4755 entity->systemId, entity->publicId))
4756 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4757 if (dtd->paramEntityRead) {
4758 if (! dtd->standalone && parser->m_notStandaloneHandler
4759 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4760 return XML_ERROR_NOT_STANDALONE;
4761 }
4762 /* if we didn't read the foreign DTD then this means that there
4763 is no external subset and we must reset dtd->hasParamEntityRefs
4764 */
4765 else
4766 dtd->hasParamEntityRefs = hadParamEntityRefs;
4767 /* end of DTD - no need to update dtd->keepProcessing */
4768 }
4769 }
4770 #endif /* XML_DTD */
4771 parser->m_processor = contentProcessor;
4772 return contentProcessor(parser, s, end, nextPtr);
4773 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4774 parser->m_declElementType = getElementType(parser, enc, s, next);
4775 if (! parser->m_declElementType)
4776 return XML_ERROR_NO_MEMORY;
4777 goto checkAttListDeclHandler;
4778 case XML_ROLE_ATTRIBUTE_NAME:
4779 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4780 if (! parser->m_declAttributeId)
4781 return XML_ERROR_NO_MEMORY;
4782 parser->m_declAttributeIsCdata = XML_FALSE;
4783 parser->m_declAttributeType = NULL;
4784 parser->m_declAttributeIsId = XML_FALSE;
4785 goto checkAttListDeclHandler;
4786 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4787 parser->m_declAttributeIsCdata = XML_TRUE;
4788 parser->m_declAttributeType = atypeCDATA;
4789 goto checkAttListDeclHandler;
4790 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4791 parser->m_declAttributeIsId = XML_TRUE;
4792 parser->m_declAttributeType = atypeID;
4793 goto checkAttListDeclHandler;
4794 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4795 parser->m_declAttributeType = atypeIDREF;
4796 goto checkAttListDeclHandler;
4797 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4798 parser->m_declAttributeType = atypeIDREFS;
4799 goto checkAttListDeclHandler;
4800 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4801 parser->m_declAttributeType = atypeENTITY;
4802 goto checkAttListDeclHandler;
4803 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4804 parser->m_declAttributeType = atypeENTITIES;
4805 goto checkAttListDeclHandler;
4806 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4807 parser->m_declAttributeType = atypeNMTOKEN;
4808 goto checkAttListDeclHandler;
4809 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4810 parser->m_declAttributeType = atypeNMTOKENS;
4811 checkAttListDeclHandler:
4812 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4813 handleDefault = XML_FALSE;
4814 break;
4815 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4816 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4817 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4818 const XML_Char *prefix;
4819 if (parser->m_declAttributeType) {
4820 prefix = enumValueSep;
4821 } else {
4822 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
4823 : enumValueStart);
4824 }
4825 if (! poolAppendString(&parser->m_tempPool, prefix))
4826 return XML_ERROR_NO_MEMORY;
4827 if (! poolAppend(&parser->m_tempPool, enc, s, next))
4828 return XML_ERROR_NO_MEMORY;
4829 parser->m_declAttributeType = parser->m_tempPool.start;
4830 handleDefault = XML_FALSE;
4831 }
4832 break;
4833 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4834 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4835 if (dtd->keepProcessing) {
4836 if (! defineAttribute(parser->m_declElementType,
4837 parser->m_declAttributeId,
4838 parser->m_declAttributeIsCdata,
4839 parser->m_declAttributeIsId, 0, parser))
4840 return XML_ERROR_NO_MEMORY;
4841 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4842 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4843 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4844 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4845 /* Enumerated or Notation type */
4846 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4847 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4848 return XML_ERROR_NO_MEMORY;
4849 parser->m_declAttributeType = parser->m_tempPool.start;
4850 poolFinish(&parser->m_tempPool);
4851 }
4852 *eventEndPP = s;
4853 parser->m_attlistDeclHandler(
4854 parser->m_handlerArg, parser->m_declElementType->name,
4855 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
4856 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4857 poolClear(&parser->m_tempPool);
4858 handleDefault = XML_FALSE;
4859 }
4860 }
4861 break;
4862 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4863 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4864 if (dtd->keepProcessing) {
4865 const XML_Char *attVal;
4866 enum XML_Error result = storeAttributeValue(
4867 parser, enc, parser->m_declAttributeIsCdata,
4868 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
4869 XML_ACCOUNT_NONE);
4870 if (result)
4871 return result;
4872 attVal = poolStart(&dtd->pool);
4873 poolFinish(&dtd->pool);
4874 /* ID attributes aren't allowed to have a default */
4875 if (! defineAttribute(
4876 parser->m_declElementType, parser->m_declAttributeId,
4877 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
4878 return XML_ERROR_NO_MEMORY;
4879 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4880 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4881 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4882 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4883 /* Enumerated or Notation type */
4884 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4885 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4886 return XML_ERROR_NO_MEMORY;
4887 parser->m_declAttributeType = parser->m_tempPool.start;
4888 poolFinish(&parser->m_tempPool);
4889 }
4890 *eventEndPP = s;
4891 parser->m_attlistDeclHandler(
4892 parser->m_handlerArg, parser->m_declElementType->name,
4893 parser->m_declAttributeId->name, parser->m_declAttributeType,
4894 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4895 poolClear(&parser->m_tempPool);
4896 handleDefault = XML_FALSE;
4897 }
4898 }
4899 break;
4900 case XML_ROLE_ENTITY_VALUE:
4901 if (dtd->keepProcessing) {
4902 enum XML_Error result
4903 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
4904 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
4905 if (parser->m_declEntity) {
4906 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4907 parser->m_declEntity->textLen
4908 = (int)(poolLength(&dtd->entityValuePool));
4909 poolFinish(&dtd->entityValuePool);
4910 if (parser->m_entityDeclHandler) {
4911 *eventEndPP = s;
4912 parser->m_entityDeclHandler(
4913 parser->m_handlerArg, parser->m_declEntity->name,
4914 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
4915 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
4916 handleDefault = XML_FALSE;
4917 }
4918 } else
4919 poolDiscard(&dtd->entityValuePool);
4920 if (result != XML_ERROR_NONE)
4921 return result;
4922 }
4923 break;
4924 case XML_ROLE_DOCTYPE_SYSTEM_ID:
4925 #ifdef XML_DTD
4926 parser->m_useForeignDTD = XML_FALSE;
4927 #endif /* XML_DTD */
4928 dtd->hasParamEntityRefs = XML_TRUE;
4929 if (parser->m_startDoctypeDeclHandler) {
4930 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
4931 s + enc->minBytesPerChar,
4932 next - enc->minBytesPerChar);
4933 if (parser->m_doctypeSysid == NULL)
4934 return XML_ERROR_NO_MEMORY;
4935 poolFinish(&parser->m_tempPool);
4936 handleDefault = XML_FALSE;
4937 }
4938 #ifdef XML_DTD
4939 else
4940 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4941 for the case where no parser->m_startDoctypeDeclHandler is set */
4942 parser->m_doctypeSysid = externalSubsetName;
4943 #endif /* XML_DTD */
4944 if (! dtd->standalone
4945 #ifdef XML_DTD
4946 && ! parser->m_paramEntityParsing
4947 #endif /* XML_DTD */
4948 && parser->m_notStandaloneHandler
4949 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4950 return XML_ERROR_NOT_STANDALONE;
4951 #ifndef XML_DTD
4952 break;
4953 #else /* XML_DTD */
4954 if (! parser->m_declEntity) {
4955 parser->m_declEntity = (ENTITY *)lookup(
4956 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4957 if (! parser->m_declEntity)
4958 return XML_ERROR_NO_MEMORY;
4959 parser->m_declEntity->publicId = NULL;
4960 }
4961 #endif /* XML_DTD */
4962 /* fall through */
4963 case XML_ROLE_ENTITY_SYSTEM_ID:
4964 if (dtd->keepProcessing && parser->m_declEntity) {
4965 parser->m_declEntity->systemId
4966 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4967 next - enc->minBytesPerChar);
4968 if (! parser->m_declEntity->systemId)
4969 return XML_ERROR_NO_MEMORY;
4970 parser->m_declEntity->base = parser->m_curBase;
4971 poolFinish(&dtd->pool);
4972 /* Don't suppress the default handler if we fell through from
4973 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4974 */
4975 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
4976 handleDefault = XML_FALSE;
4977 }
4978 break;
4979 case XML_ROLE_ENTITY_COMPLETE:
4980 if (dtd->keepProcessing && parser->m_declEntity
4981 && parser->m_entityDeclHandler) {
4982 *eventEndPP = s;
4983 parser->m_entityDeclHandler(
4984 parser->m_handlerArg, parser->m_declEntity->name,
4985 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
4986 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
4987 handleDefault = XML_FALSE;
4988 }
4989 break;
4990 case XML_ROLE_ENTITY_NOTATION_NAME:
4991 if (dtd->keepProcessing && parser->m_declEntity) {
4992 parser->m_declEntity->notation
4993 = poolStoreString(&dtd->pool, enc, s, next);
4994 if (! parser->m_declEntity->notation)
4995 return XML_ERROR_NO_MEMORY;
4996 poolFinish(&dtd->pool);
4997 if (parser->m_unparsedEntityDeclHandler) {
4998 *eventEndPP = s;
4999 parser->m_unparsedEntityDeclHandler(
5000 parser->m_handlerArg, parser->m_declEntity->name,
5001 parser->m_declEntity->base, parser->m_declEntity->systemId,
5002 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5003 handleDefault = XML_FALSE;
5004 } else if (parser->m_entityDeclHandler) {
5005 *eventEndPP = s;
5006 parser->m_entityDeclHandler(
5007 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5008 parser->m_declEntity->base, parser->m_declEntity->systemId,
5009 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5010 handleDefault = XML_FALSE;
5011 }
5012 }
5013 break;
5014 case XML_ROLE_GENERAL_ENTITY_NAME: {
5015 if (XmlPredefinedEntityName(enc, s, next)) {
5016 parser->m_declEntity = NULL;
5017 break;
5018 }
5019 if (dtd->keepProcessing) {
5020 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5021 if (! name)
5022 return XML_ERROR_NO_MEMORY;
5023 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5024 name, sizeof(ENTITY));
5025 if (! parser->m_declEntity)
5026 return XML_ERROR_NO_MEMORY;
5027 if (parser->m_declEntity->name != name) {
5028 poolDiscard(&dtd->pool);
5029 parser->m_declEntity = NULL;
5030 } else {
5031 poolFinish(&dtd->pool);
5032 parser->m_declEntity->publicId = NULL;
5033 parser->m_declEntity->is_param = XML_FALSE;
5034 /* if we have a parent parser or are reading an internal parameter
5035 entity, then the entity declaration is not considered "internal"
5036 */
5037 parser->m_declEntity->is_internal
5038 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5039 if (parser->m_entityDeclHandler)
5040 handleDefault = XML_FALSE;
5041 }
5042 } else {
5043 poolDiscard(&dtd->pool);
5044 parser->m_declEntity = NULL;
5045 }
5046 } break;
5047 case XML_ROLE_PARAM_ENTITY_NAME:
5048 #ifdef XML_DTD
5049 if (dtd->keepProcessing) {
5050 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5051 if (! name)
5052 return XML_ERROR_NO_MEMORY;
5053 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5054 name, sizeof(ENTITY));
5055 if (! parser->m_declEntity)
5056 return XML_ERROR_NO_MEMORY;
5057 if (parser->m_declEntity->name != name) {
5058 poolDiscard(&dtd->pool);
5059 parser->m_declEntity = NULL;
5060 } else {
5061 poolFinish(&dtd->pool);
5062 parser->m_declEntity->publicId = NULL;
5063 parser->m_declEntity->is_param = XML_TRUE;
5064 /* if we have a parent parser or are reading an internal parameter
5065 entity, then the entity declaration is not considered "internal"
5066 */
5067 parser->m_declEntity->is_internal
5068 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5069 if (parser->m_entityDeclHandler)
5070 handleDefault = XML_FALSE;
5071 }
5072 } else {
5073 poolDiscard(&dtd->pool);
5074 parser->m_declEntity = NULL;
5075 }
5076 #else /* not XML_DTD */
5077 parser->m_declEntity = NULL;
5078 #endif /* XML_DTD */
5079 break;
5080 case XML_ROLE_NOTATION_NAME:
5081 parser->m_declNotationPublicId = NULL;
5082 parser->m_declNotationName = NULL;
5083 if (parser->m_notationDeclHandler) {
5084 parser->m_declNotationName
5085 = poolStoreString(&parser->m_tempPool, enc, s, next);
5086 if (! parser->m_declNotationName)
5087 return XML_ERROR_NO_MEMORY;
5088 poolFinish(&parser->m_tempPool);
5089 handleDefault = XML_FALSE;
5090 }
5091 break;
5092 case XML_ROLE_NOTATION_PUBLIC_ID:
5093 if (! XmlIsPublicId(enc, s, next, eventPP))
5094 return XML_ERROR_PUBLICID;
5095 if (parser
5096 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5097 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5098 s + enc->minBytesPerChar,
5099 next - enc->minBytesPerChar);
5100 if (! tem)
5101 return XML_ERROR_NO_MEMORY;
5102 normalizePublicId(tem);
5103 parser->m_declNotationPublicId = tem;
5104 poolFinish(&parser->m_tempPool);
5105 handleDefault = XML_FALSE;
5106 }
5107 break;
5108 case XML_ROLE_NOTATION_SYSTEM_ID:
5109 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5110 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5111 s + enc->minBytesPerChar,
5112 next - enc->minBytesPerChar);
5113 if (! systemId)
5114 return XML_ERROR_NO_MEMORY;
5115 *eventEndPP = s;
5116 parser->m_notationDeclHandler(
5117 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5118 systemId, parser->m_declNotationPublicId);
5119 handleDefault = XML_FALSE;
5120 }
5121 poolClear(&parser->m_tempPool);
5122 break;
5123 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5124 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5125 *eventEndPP = s;
5126 parser->m_notationDeclHandler(
5127 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5128 0, parser->m_declNotationPublicId);
5129 handleDefault = XML_FALSE;
5130 }
5131 poolClear(&parser->m_tempPool);
5132 break;
5133 case XML_ROLE_ERROR:
5134 switch (tok) {
5135 case XML_TOK_PARAM_ENTITY_REF:
5136 /* PE references in internal subset are
5137 not allowed within declarations. */
5138 return XML_ERROR_PARAM_ENTITY_REF;
5139 case XML_TOK_XML_DECL:
5140 return XML_ERROR_MISPLACED_XML_PI;
5141 default:
5142 return XML_ERROR_SYNTAX;
5143 }
5144 #ifdef XML_DTD
5145 case XML_ROLE_IGNORE_SECT: {
5146 enum XML_Error result;
5147 if (parser->m_defaultHandler)
5148 reportDefault(parser, enc, s, next);
5149 handleDefault = XML_FALSE;
5150 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5151 if (result != XML_ERROR_NONE)
5152 return result;
5153 else if (! next) {
5154 parser->m_processor = ignoreSectionProcessor;
5155 return result;
5156 }
5157 } break;
5158 #endif /* XML_DTD */
5159 case XML_ROLE_GROUP_OPEN:
5160 if (parser->m_prologState.level >= parser->m_groupSize) {
5161 if (parser->m_groupSize) {
5162 {
5163 /* Detect and prevent integer overflow */
5164 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5165 return XML_ERROR_NO_MEMORY;
5166 }
5167
5168 char *const new_connector = (char *)REALLOC(
5169 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5170 if (new_connector == NULL) {
5171 parser->m_groupSize /= 2;
5172 return XML_ERROR_NO_MEMORY;
5173 }
5174 parser->m_groupConnector = new_connector;
5175 }
5176
5177 if (dtd->scaffIndex) {
5178 /* Detect and prevent integer overflow.
5179 * The preprocessor guard addresses the "always false" warning
5180 * from -Wtype-limits on platforms where
5181 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5182 #if UINT_MAX >= SIZE_MAX
5183 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5184 return XML_ERROR_NO_MEMORY;
5185 }
5186 #endif
5187
5188 int *const new_scaff_index = (int *)REALLOC(
5189 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5190 if (new_scaff_index == NULL)
5191 return XML_ERROR_NO_MEMORY;
5192 dtd->scaffIndex = new_scaff_index;
5193 }
5194 } else {
5195 parser->m_groupConnector
5196 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5197 if (! parser->m_groupConnector) {
5198 parser->m_groupSize = 0;
5199 return XML_ERROR_NO_MEMORY;
5200 }
5201 }
5202 }
5203 parser->m_groupConnector[parser->m_prologState.level] = 0;
5204 if (dtd->in_eldecl) {
5205 int myindex = nextScaffoldPart(parser);
5206 if (myindex < 0)
5207 return XML_ERROR_NO_MEMORY;
5208 assert(dtd->scaffIndex != NULL);
5209 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5210 dtd->scaffLevel++;
5211 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5212 if (parser->m_elementDeclHandler)
5213 handleDefault = XML_FALSE;
5214 }
5215 break;
5216 case XML_ROLE_GROUP_SEQUENCE:
5217 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5218 return XML_ERROR_SYNTAX;
5219 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5220 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5221 handleDefault = XML_FALSE;
5222 break;
5223 case XML_ROLE_GROUP_CHOICE:
5224 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5225 return XML_ERROR_SYNTAX;
5226 if (dtd->in_eldecl
5227 && ! parser->m_groupConnector[parser->m_prologState.level]
5228 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5229 != XML_CTYPE_MIXED)) {
5230 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5231 = XML_CTYPE_CHOICE;
5232 if (parser->m_elementDeclHandler)
5233 handleDefault = XML_FALSE;
5234 }
5235 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5236 break;
5237 case XML_ROLE_PARAM_ENTITY_REF:
5238 #ifdef XML_DTD
5239 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5240 dtd->hasParamEntityRefs = XML_TRUE;
5241 if (! parser->m_paramEntityParsing)
5242 dtd->keepProcessing = dtd->standalone;
5243 else {
5244 const XML_Char *name;
5245 ENTITY *entity;
5246 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5247 next - enc->minBytesPerChar);
5248 if (! name)
5249 return XML_ERROR_NO_MEMORY;
5250 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5251 poolDiscard(&dtd->pool);
5252 /* first, determine if a check for an existing declaration is needed;
5253 if yes, check that the entity exists, and that it is internal,
5254 otherwise call the skipped entity handler
5255 */
5256 if (parser->m_prologState.documentEntity
5257 && (dtd->standalone ? ! parser->m_openInternalEntities
5258 : ! dtd->hasParamEntityRefs)) {
5259 if (! entity)
5260 return XML_ERROR_UNDEFINED_ENTITY;
5261 else if (! entity->is_internal) {
5262 /* It's hard to exhaustively search the code to be sure,
5263 * but there doesn't seem to be a way of executing the
5264 * following line. There are two cases:
5265 *
5266 * If 'standalone' is false, the DTD must have no
5267 * parameter entities or we wouldn't have passed the outer
5268 * 'if' statement. That measn the only entity in the hash
5269 * table is the external subset name "#" which cannot be
5270 * given as a parameter entity name in XML syntax, so the
5271 * lookup must have returned NULL and we don't even reach
5272 * the test for an internal entity.
5273 *
5274 * If 'standalone' is true, it does not seem to be
5275 * possible to create entities taking this code path that
5276 * are not internal entities, so fail the test above.
5277 *
5278 * Because this analysis is very uncertain, the code is
5279 * being left in place and merely removed from the
5280 * coverage test statistics.
5281 */
5282 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5283 }
5284 } else if (! entity) {
5285 dtd->keepProcessing = dtd->standalone;
5286 /* cannot report skipped entities in declarations */
5287 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5288 && parser->m_skippedEntityHandler) {
5289 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5290 handleDefault = XML_FALSE;
5291 }
5292 break;
5293 }
5294 if (entity->open)
5295 return XML_ERROR_RECURSIVE_ENTITY_REF;
5296 if (entity->textPtr) {
5297 enum XML_Error result;
5298 XML_Bool betweenDecl
5299 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5300 result = processInternalEntity(parser, entity, betweenDecl);
5301 if (result != XML_ERROR_NONE)
5302 return result;
5303 handleDefault = XML_FALSE;
5304 break;
5305 }
5306 if (parser->m_externalEntityRefHandler) {
5307 dtd->paramEntityRead = XML_FALSE;
5308 entity->open = XML_TRUE;
5309 entityTrackingOnOpen(parser, entity, __LINE__);
5310 if (! parser->m_externalEntityRefHandler(
5311 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5312 entity->systemId, entity->publicId)) {
5313 entityTrackingOnClose(parser, entity, __LINE__);
5314 entity->open = XML_FALSE;
5315 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5316 }
5317 entityTrackingOnClose(parser, entity, __LINE__);
5318 entity->open = XML_FALSE;
5319 handleDefault = XML_FALSE;
5320 if (! dtd->paramEntityRead) {
5321 dtd->keepProcessing = dtd->standalone;
5322 break;
5323 }
5324 } else {
5325 dtd->keepProcessing = dtd->standalone;
5326 break;
5327 }
5328 }
5329 #endif /* XML_DTD */
5330 if (! dtd->standalone && parser->m_notStandaloneHandler
5331 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5332 return XML_ERROR_NOT_STANDALONE;
5333 break;
5334
5335 /* Element declaration stuff */
5336
5337 case XML_ROLE_ELEMENT_NAME:
5338 if (parser->m_elementDeclHandler) {
5339 parser->m_declElementType = getElementType(parser, enc, s, next);
5340 if (! parser->m_declElementType)
5341 return XML_ERROR_NO_MEMORY;
5342 dtd->scaffLevel = 0;
5343 dtd->scaffCount = 0;
5344 dtd->in_eldecl = XML_TRUE;
5345 handleDefault = XML_FALSE;
5346 }
5347 break;
5348
5349 case XML_ROLE_CONTENT_ANY:
5350 case XML_ROLE_CONTENT_EMPTY:
5351 if (dtd->in_eldecl) {
5352 if (parser->m_elementDeclHandler) {
5353 XML_Content *content
5354 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5355 if (! content)
5356 return XML_ERROR_NO_MEMORY;
5357 content->quant = XML_CQUANT_NONE;
5358 content->name = NULL;
5359 content->numchildren = 0;
5360 content->children = NULL;
5361 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5362 : XML_CTYPE_EMPTY);
5363 *eventEndPP = s;
5364 parser->m_elementDeclHandler(
5365 parser->m_handlerArg, parser->m_declElementType->name, content);
5366 handleDefault = XML_FALSE;
5367 }
5368 dtd->in_eldecl = XML_FALSE;
5369 }
5370 break;
5371
5372 case XML_ROLE_CONTENT_PCDATA:
5373 if (dtd->in_eldecl) {
5374 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5375 = XML_CTYPE_MIXED;
5376 if (parser->m_elementDeclHandler)
5377 handleDefault = XML_FALSE;
5378 }
5379 break;
5380
5381 case XML_ROLE_CONTENT_ELEMENT:
5382 quant = XML_CQUANT_NONE;
5383 goto elementContent;
5384 case XML_ROLE_CONTENT_ELEMENT_OPT:
5385 quant = XML_CQUANT_OPT;
5386 goto elementContent;
5387 case XML_ROLE_CONTENT_ELEMENT_REP:
5388 quant = XML_CQUANT_REP;
5389 goto elementContent;
5390 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5391 quant = XML_CQUANT_PLUS;
5392 elementContent:
5393 if (dtd->in_eldecl) {
5394 ELEMENT_TYPE *el;
5395 const XML_Char *name;
5396 size_t nameLen;
5397 const char *nxt
5398 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5399 int myindex = nextScaffoldPart(parser);
5400 if (myindex < 0)
5401 return XML_ERROR_NO_MEMORY;
5402 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5403 dtd->scaffold[myindex].quant = quant;
5404 el = getElementType(parser, enc, s, nxt);
5405 if (! el)
5406 return XML_ERROR_NO_MEMORY;
5407 name = el->name;
5408 dtd->scaffold[myindex].name = name;
5409 nameLen = 0;
5410 for (; name[nameLen++];)
5411 ;
5412
5413 /* Detect and prevent integer overflow */
5414 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5415 return XML_ERROR_NO_MEMORY;
5416 }
5417
5418 dtd->contentStringLen += (unsigned)nameLen;
5419 if (parser->m_elementDeclHandler)
5420 handleDefault = XML_FALSE;
5421 }
5422 break;
5423
5424 case XML_ROLE_GROUP_CLOSE:
5425 quant = XML_CQUANT_NONE;
5426 goto closeGroup;
5427 case XML_ROLE_GROUP_CLOSE_OPT:
5428 quant = XML_CQUANT_OPT;
5429 goto closeGroup;
5430 case XML_ROLE_GROUP_CLOSE_REP:
5431 quant = XML_CQUANT_REP;
5432 goto closeGroup;
5433 case XML_ROLE_GROUP_CLOSE_PLUS:
5434 quant = XML_CQUANT_PLUS;
5435 closeGroup:
5436 if (dtd->in_eldecl) {
5437 if (parser->m_elementDeclHandler)
5438 handleDefault = XML_FALSE;
5439 dtd->scaffLevel--;
5440 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5441 if (dtd->scaffLevel == 0) {
5442 if (! handleDefault) {
5443 XML_Content *model = build_model(parser);
5444 if (! model)
5445 return XML_ERROR_NO_MEMORY;
5446 *eventEndPP = s;
5447 parser->m_elementDeclHandler(
5448 parser->m_handlerArg, parser->m_declElementType->name, model);
5449 }
5450 dtd->in_eldecl = XML_FALSE;
5451 dtd->contentStringLen = 0;
5452 }
5453 }
5454 break;
5455 /* End element declaration stuff */
5456
5457 case XML_ROLE_PI:
5458 if (! reportProcessingInstruction(parser, enc, s, next))
5459 return XML_ERROR_NO_MEMORY;
5460 handleDefault = XML_FALSE;
5461 break;
5462 case XML_ROLE_COMMENT:
5463 if (! reportComment(parser, enc, s, next))
5464 return XML_ERROR_NO_MEMORY;
5465 handleDefault = XML_FALSE;
5466 break;
5467 case XML_ROLE_NONE:
5468 switch (tok) {
5469 case XML_TOK_BOM:
5470 handleDefault = XML_FALSE;
5471 break;
5472 }
5473 break;
5474 case XML_ROLE_DOCTYPE_NONE:
5475 if (parser->m_startDoctypeDeclHandler)
5476 handleDefault = XML_FALSE;
5477 break;
5478 case XML_ROLE_ENTITY_NONE:
5479 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5480 handleDefault = XML_FALSE;
5481 break;
5482 case XML_ROLE_NOTATION_NONE:
5483 if (parser->m_notationDeclHandler)
5484 handleDefault = XML_FALSE;
5485 break;
5486 case XML_ROLE_ATTLIST_NONE:
5487 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5488 handleDefault = XML_FALSE;
5489 break;
5490 case XML_ROLE_ELEMENT_NONE:
5491 if (parser->m_elementDeclHandler)
5492 handleDefault = XML_FALSE;
5493 break;
5494 } /* end of big switch */
5495
5496 if (handleDefault && parser->m_defaultHandler)
5497 reportDefault(parser, enc, s, next);
5498
5499 switch (parser->m_parsingStatus.parsing) {
5500 case XML_SUSPENDED:
5501 *nextPtr = next;
5502 return XML_ERROR_NONE;
5503 case XML_FINISHED:
5504 return XML_ERROR_ABORTED;
5505 default:
5506 s = next;
5507 tok = XmlPrologTok(enc, s, end, &next);
5508 }
5509 }
5510 /* not reached */
5511 }
5512
5513 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5514 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5515 const char **nextPtr) {
5516 parser->m_processor = epilogProcessor;
5517 parser->m_eventPtr = s;
5518 for (;;) {
5519 const char *next = NULL;
5520 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5521 #ifdef XML_DTD
5522 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5523 XML_ACCOUNT_DIRECT)) {
5524 accountingOnAbort(parser);
5525 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5526 }
5527 #endif
5528 parser->m_eventEndPtr = next;
5529 switch (tok) {
5530 /* report partial linebreak - it might be the last token */
5531 case -XML_TOK_PROLOG_S:
5532 if (parser->m_defaultHandler) {
5533 reportDefault(parser, parser->m_encoding, s, next);
5534 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5535 return XML_ERROR_ABORTED;
5536 }
5537 *nextPtr = next;
5538 return XML_ERROR_NONE;
5539 case XML_TOK_NONE:
5540 *nextPtr = s;
5541 return XML_ERROR_NONE;
5542 case XML_TOK_PROLOG_S:
5543 if (parser->m_defaultHandler)
5544 reportDefault(parser, parser->m_encoding, s, next);
5545 break;
5546 case XML_TOK_PI:
5547 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5548 return XML_ERROR_NO_MEMORY;
5549 break;
5550 case XML_TOK_COMMENT:
5551 if (! reportComment(parser, parser->m_encoding, s, next))
5552 return XML_ERROR_NO_MEMORY;
5553 break;
5554 case XML_TOK_INVALID:
5555 parser->m_eventPtr = next;
5556 return XML_ERROR_INVALID_TOKEN;
5557 case XML_TOK_PARTIAL:
5558 if (! parser->m_parsingStatus.finalBuffer) {
5559 *nextPtr = s;
5560 return XML_ERROR_NONE;
5561 }
5562 return XML_ERROR_UNCLOSED_TOKEN;
5563 case XML_TOK_PARTIAL_CHAR:
5564 if (! parser->m_parsingStatus.finalBuffer) {
5565 *nextPtr = s;
5566 return XML_ERROR_NONE;
5567 }
5568 return XML_ERROR_PARTIAL_CHAR;
5569 default:
5570 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5571 }
5572 parser->m_eventPtr = s = next;
5573 switch (parser->m_parsingStatus.parsing) {
5574 case XML_SUSPENDED:
5575 *nextPtr = next;
5576 return XML_ERROR_NONE;
5577 case XML_FINISHED:
5578 return XML_ERROR_ABORTED;
5579 default:;
5580 }
5581 }
5582 }
5583
5584 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5585 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5586 const char *textStart, *textEnd;
5587 const char *next;
5588 enum XML_Error result;
5589 OPEN_INTERNAL_ENTITY *openEntity;
5590
5591 if (parser->m_freeInternalEntities) {
5592 openEntity = parser->m_freeInternalEntities;
5593 parser->m_freeInternalEntities = openEntity->next;
5594 } else {
5595 openEntity
5596 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5597 if (! openEntity)
5598 return XML_ERROR_NO_MEMORY;
5599 }
5600 entity->open = XML_TRUE;
5601 #ifdef XML_DTD
5602 entityTrackingOnOpen(parser, entity, __LINE__);
5603 #endif
5604 entity->processed = 0;
5605 openEntity->next = parser->m_openInternalEntities;
5606 parser->m_openInternalEntities = openEntity;
5607 openEntity->entity = entity;
5608 openEntity->startTagLevel = parser->m_tagLevel;
5609 openEntity->betweenDecl = betweenDecl;
5610 openEntity->internalEventPtr = NULL;
5611 openEntity->internalEventEndPtr = NULL;
5612 textStart = (const char *)entity->textPtr;
5613 textEnd = (const char *)(entity->textPtr + entity->textLen);
5614 /* Set a safe default value in case 'next' does not get set */
5615 next = textStart;
5616
5617 #ifdef XML_DTD
5618 if (entity->is_param) {
5619 int tok
5620 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5621 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5622 tok, next, &next, XML_FALSE, XML_FALSE,
5623 XML_ACCOUNT_ENTITY_EXPANSION);
5624 } else
5625 #endif /* XML_DTD */
5626 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5627 textStart, textEnd, &next, XML_FALSE,
5628 XML_ACCOUNT_ENTITY_EXPANSION);
5629
5630 if (result == XML_ERROR_NONE) {
5631 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5632 entity->processed = (int)(next - textStart);
5633 parser->m_processor = internalEntityProcessor;
5634 } else {
5635 #ifdef XML_DTD
5636 entityTrackingOnClose(parser, entity, __LINE__);
5637 #endif /* XML_DTD */
5638 entity->open = XML_FALSE;
5639 parser->m_openInternalEntities = openEntity->next;
5640 /* put openEntity back in list of free instances */
5641 openEntity->next = parser->m_freeInternalEntities;
5642 parser->m_freeInternalEntities = openEntity;
5643 }
5644 }
5645 return result;
5646 }
5647
5648 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5649 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5650 const char **nextPtr) {
5651 ENTITY *entity;
5652 const char *textStart, *textEnd;
5653 const char *next;
5654 enum XML_Error result;
5655 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5656 if (! openEntity)
5657 return XML_ERROR_UNEXPECTED_STATE;
5658
5659 entity = openEntity->entity;
5660 textStart = ((const char *)entity->textPtr) + entity->processed;
5661 textEnd = (const char *)(entity->textPtr + entity->textLen);
5662 /* Set a safe default value in case 'next' does not get set */
5663 next = textStart;
5664
5665 #ifdef XML_DTD
5666 if (entity->is_param) {
5667 int tok
5668 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5669 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5670 tok, next, &next, XML_FALSE, XML_TRUE,
5671 XML_ACCOUNT_ENTITY_EXPANSION);
5672 } else
5673 #endif /* XML_DTD */
5674 result = doContent(parser, openEntity->startTagLevel,
5675 parser->m_internalEncoding, textStart, textEnd, &next,
5676 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5677
5678 if (result != XML_ERROR_NONE)
5679 return result;
5680 else if (textEnd != next
5681 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5682 entity->processed = (int)(next - (const char *)entity->textPtr);
5683 return result;
5684 } else {
5685 #ifdef XML_DTD
5686 entityTrackingOnClose(parser, entity, __LINE__);
5687 #endif
5688 entity->open = XML_FALSE;
5689 parser->m_openInternalEntities = openEntity->next;
5690 /* put openEntity back in list of free instances */
5691 openEntity->next = parser->m_freeInternalEntities;
5692 parser->m_freeInternalEntities = openEntity;
5693 }
5694
5695 #ifdef XML_DTD
5696 if (entity->is_param) {
5697 int tok;
5698 parser->m_processor = prologProcessor;
5699 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5700 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5701 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5702 XML_ACCOUNT_DIRECT);
5703 } else
5704 #endif /* XML_DTD */
5705 {
5706 parser->m_processor = contentProcessor;
5707 /* see externalEntityContentProcessor vs contentProcessor */
5708 result = doContent(parser, parser->m_parentParser ? 1 : 0,
5709 parser->m_encoding, s, end, nextPtr,
5710 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5711 XML_ACCOUNT_DIRECT);
5712 if (result == XML_ERROR_NONE) {
5713 if (! storeRawNames(parser))
5714 return XML_ERROR_NO_MEMORY;
5715 }
5716 return result;
5717 }
5718 }
5719
5720 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5721 errorProcessor(XML_Parser parser, const char *s, const char *end,
5722 const char **nextPtr) {
5723 UNUSED_P(s);
5724 UNUSED_P(end);
5725 UNUSED_P(nextPtr);
5726 return parser->m_errorCode;
5727 }
5728
5729 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5730 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5731 const char *ptr, const char *end, STRING_POOL *pool,
5732 enum XML_Account account) {
5733 enum XML_Error result
5734 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5735 if (result)
5736 return result;
5737 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5738 poolChop(pool);
5739 if (! poolAppendChar(pool, XML_T('\0')))
5740 return XML_ERROR_NO_MEMORY;
5741 return XML_ERROR_NONE;
5742 }
5743
5744 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5745 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5746 const char *ptr, const char *end, STRING_POOL *pool,
5747 enum XML_Account account) {
5748 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5749 #ifndef XML_DTD
5750 UNUSED_P(account);
5751 #endif
5752
5753 for (;;) {
5754 const char *next
5755 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5756 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5757 #ifdef XML_DTD
5758 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5759 accountingOnAbort(parser);
5760 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5761 }
5762 #endif
5763 switch (tok) {
5764 case XML_TOK_NONE:
5765 return XML_ERROR_NONE;
5766 case XML_TOK_INVALID:
5767 if (enc == parser->m_encoding)
5768 parser->m_eventPtr = next;
5769 return XML_ERROR_INVALID_TOKEN;
5770 case XML_TOK_PARTIAL:
5771 if (enc == parser->m_encoding)
5772 parser->m_eventPtr = ptr;
5773 return XML_ERROR_INVALID_TOKEN;
5774 case XML_TOK_CHAR_REF: {
5775 XML_Char buf[XML_ENCODE_MAX];
5776 int i;
5777 int n = XmlCharRefNumber(enc, ptr);
5778 if (n < 0) {
5779 if (enc == parser->m_encoding)
5780 parser->m_eventPtr = ptr;
5781 return XML_ERROR_BAD_CHAR_REF;
5782 }
5783 if (! isCdata && n == 0x20 /* space */
5784 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5785 break;
5786 n = XmlEncode(n, (ICHAR *)buf);
5787 /* The XmlEncode() functions can never return 0 here. That
5788 * error return happens if the code point passed in is either
5789 * negative or greater than or equal to 0x110000. The
5790 * XmlCharRefNumber() functions will all return a number
5791 * strictly less than 0x110000 or a negative value if an error
5792 * occurred. The negative value is intercepted above, so
5793 * XmlEncode() is never passed a value it might return an
5794 * error for.
5795 */
5796 for (i = 0; i < n; i++) {
5797 if (! poolAppendChar(pool, buf[i]))
5798 return XML_ERROR_NO_MEMORY;
5799 }
5800 } break;
5801 case XML_TOK_DATA_CHARS:
5802 if (! poolAppend(pool, enc, ptr, next))
5803 return XML_ERROR_NO_MEMORY;
5804 break;
5805 case XML_TOK_TRAILING_CR:
5806 next = ptr + enc->minBytesPerChar;
5807 /* fall through */
5808 case XML_TOK_ATTRIBUTE_VALUE_S:
5809 case XML_TOK_DATA_NEWLINE:
5810 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5811 break;
5812 if (! poolAppendChar(pool, 0x20))
5813 return XML_ERROR_NO_MEMORY;
5814 break;
5815 case XML_TOK_ENTITY_REF: {
5816 const XML_Char *name;
5817 ENTITY *entity;
5818 char checkEntityDecl;
5819 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
5820 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
5821 if (ch) {
5822 #ifdef XML_DTD
5823 /* NOTE: We are replacing 4-6 characters original input for 1 character
5824 * so there is no amplification and hence recording without
5825 * protection. */
5826 accountingDiffTolerated(parser, tok, (char *)&ch,
5827 ((char *)&ch) + sizeof(XML_Char), __LINE__,
5828 XML_ACCOUNT_ENTITY_EXPANSION);
5829 #endif /* XML_DTD */
5830 if (! poolAppendChar(pool, ch))
5831 return XML_ERROR_NO_MEMORY;
5832 break;
5833 }
5834 name = poolStoreString(&parser->m_temp2Pool, enc,
5835 ptr + enc->minBytesPerChar,
5836 next - enc->minBytesPerChar);
5837 if (! name)
5838 return XML_ERROR_NO_MEMORY;
5839 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5840 poolDiscard(&parser->m_temp2Pool);
5841 /* First, determine if a check for an existing declaration is needed;
5842 if yes, check that the entity exists, and that it is internal.
5843 */
5844 if (pool == &dtd->pool) /* are we called from prolog? */
5845 checkEntityDecl =
5846 #ifdef XML_DTD
5847 parser->m_prologState.documentEntity &&
5848 #endif /* XML_DTD */
5849 (dtd->standalone ? ! parser->m_openInternalEntities
5850 : ! dtd->hasParamEntityRefs);
5851 else /* if (pool == &parser->m_tempPool): we are called from content */
5852 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
5853 if (checkEntityDecl) {
5854 if (! entity)
5855 return XML_ERROR_UNDEFINED_ENTITY;
5856 else if (! entity->is_internal)
5857 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5858 } else if (! entity) {
5859 /* Cannot report skipped entity here - see comments on
5860 parser->m_skippedEntityHandler.
5861 if (parser->m_skippedEntityHandler)
5862 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5863 */
5864 /* Cannot call the default handler because this would be
5865 out of sync with the call to the startElementHandler.
5866 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5867 reportDefault(parser, enc, ptr, next);
5868 */
5869 break;
5870 }
5871 if (entity->open) {
5872 if (enc == parser->m_encoding) {
5873 /* It does not appear that this line can be executed.
5874 *
5875 * The "if (entity->open)" check catches recursive entity
5876 * definitions. In order to be called with an open
5877 * entity, it must have gone through this code before and
5878 * been through the recursive call to
5879 * appendAttributeValue() some lines below. That call
5880 * sets the local encoding ("enc") to the parser's
5881 * internal encoding (internal_utf8 or internal_utf16),
5882 * which can never be the same as the principle encoding.
5883 * It doesn't appear there is another code path that gets
5884 * here with entity->open being TRUE.
5885 *
5886 * Since it is not certain that this logic is watertight,
5887 * we keep the line and merely exclude it from coverage
5888 * tests.
5889 */
5890 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5891 }
5892 return XML_ERROR_RECURSIVE_ENTITY_REF;
5893 }
5894 if (entity->notation) {
5895 if (enc == parser->m_encoding)
5896 parser->m_eventPtr = ptr;
5897 return XML_ERROR_BINARY_ENTITY_REF;
5898 }
5899 if (! entity->textPtr) {
5900 if (enc == parser->m_encoding)
5901 parser->m_eventPtr = ptr;
5902 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5903 } else {
5904 enum XML_Error result;
5905 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5906 entity->open = XML_TRUE;
5907 #ifdef XML_DTD
5908 entityTrackingOnOpen(parser, entity, __LINE__);
5909 #endif
5910 result = appendAttributeValue(parser, parser->m_internalEncoding,
5911 isCdata, (const char *)entity->textPtr,
5912 (const char *)textEnd, pool,
5913 XML_ACCOUNT_ENTITY_EXPANSION);
5914 #ifdef XML_DTD
5915 entityTrackingOnClose(parser, entity, __LINE__);
5916 #endif
5917 entity->open = XML_FALSE;
5918 if (result)
5919 return result;
5920 }
5921 } break;
5922 default:
5923 /* The only token returned by XmlAttributeValueTok() that does
5924 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5925 * Getting that would require an entity name to contain an
5926 * incomplete XML character (e.g. \xE2\x82); however previous
5927 * tokenisers will have already recognised and rejected such
5928 * names before XmlAttributeValueTok() gets a look-in. This
5929 * default case should be retained as a safety net, but the code
5930 * excluded from coverage tests.
5931 *
5932 * LCOV_EXCL_START
5933 */
5934 if (enc == parser->m_encoding)
5935 parser->m_eventPtr = ptr;
5936 return XML_ERROR_UNEXPECTED_STATE;
5937 /* LCOV_EXCL_STOP */
5938 }
5939 ptr = next;
5940 }
5941 /* not reached */
5942 }
5943
5944 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)5945 storeEntityValue(XML_Parser parser, const ENCODING *enc,
5946 const char *entityTextPtr, const char *entityTextEnd,
5947 enum XML_Account account) {
5948 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5949 STRING_POOL *pool = &(dtd->entityValuePool);
5950 enum XML_Error result = XML_ERROR_NONE;
5951 #ifdef XML_DTD
5952 int oldInEntityValue = parser->m_prologState.inEntityValue;
5953 parser->m_prologState.inEntityValue = 1;
5954 #else
5955 UNUSED_P(account);
5956 #endif /* XML_DTD */
5957 /* never return Null for the value argument in EntityDeclHandler,
5958 since this would indicate an external entity; therefore we
5959 have to make sure that entityValuePool.start is not null */
5960 if (! pool->blocks) {
5961 if (! poolGrow(pool))
5962 return XML_ERROR_NO_MEMORY;
5963 }
5964
5965 for (;;) {
5966 const char *next
5967 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
5968 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5969
5970 #ifdef XML_DTD
5971 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
5972 account)) {
5973 accountingOnAbort(parser);
5974 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5975 goto endEntityValue;
5976 }
5977 #endif
5978
5979 switch (tok) {
5980 case XML_TOK_PARAM_ENTITY_REF:
5981 #ifdef XML_DTD
5982 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5983 const XML_Char *name;
5984 ENTITY *entity;
5985 name = poolStoreString(&parser->m_tempPool, enc,
5986 entityTextPtr + enc->minBytesPerChar,
5987 next - enc->minBytesPerChar);
5988 if (! name) {
5989 result = XML_ERROR_NO_MEMORY;
5990 goto endEntityValue;
5991 }
5992 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5993 poolDiscard(&parser->m_tempPool);
5994 if (! entity) {
5995 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5996 /* cannot report skipped entity here - see comments on
5997 parser->m_skippedEntityHandler
5998 if (parser->m_skippedEntityHandler)
5999 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6000 */
6001 dtd->keepProcessing = dtd->standalone;
6002 goto endEntityValue;
6003 }
6004 if (entity->open) {
6005 if (enc == parser->m_encoding)
6006 parser->m_eventPtr = entityTextPtr;
6007 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6008 goto endEntityValue;
6009 }
6010 if (entity->systemId) {
6011 if (parser->m_externalEntityRefHandler) {
6012 dtd->paramEntityRead = XML_FALSE;
6013 entity->open = XML_TRUE;
6014 entityTrackingOnOpen(parser, entity, __LINE__);
6015 if (! parser->m_externalEntityRefHandler(
6016 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6017 entity->systemId, entity->publicId)) {
6018 entityTrackingOnClose(parser, entity, __LINE__);
6019 entity->open = XML_FALSE;
6020 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6021 goto endEntityValue;
6022 }
6023 entityTrackingOnClose(parser, entity, __LINE__);
6024 entity->open = XML_FALSE;
6025 if (! dtd->paramEntityRead)
6026 dtd->keepProcessing = dtd->standalone;
6027 } else
6028 dtd->keepProcessing = dtd->standalone;
6029 } else {
6030 entity->open = XML_TRUE;
6031 entityTrackingOnOpen(parser, entity, __LINE__);
6032 result = storeEntityValue(
6033 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6034 (const char *)(entity->textPtr + entity->textLen),
6035 XML_ACCOUNT_ENTITY_EXPANSION);
6036 entityTrackingOnClose(parser, entity, __LINE__);
6037 entity->open = XML_FALSE;
6038 if (result)
6039 goto endEntityValue;
6040 }
6041 break;
6042 }
6043 #endif /* XML_DTD */
6044 /* In the internal subset, PE references are not legal
6045 within markup declarations, e.g entity values in this case. */
6046 parser->m_eventPtr = entityTextPtr;
6047 result = XML_ERROR_PARAM_ENTITY_REF;
6048 goto endEntityValue;
6049 case XML_TOK_NONE:
6050 result = XML_ERROR_NONE;
6051 goto endEntityValue;
6052 case XML_TOK_ENTITY_REF:
6053 case XML_TOK_DATA_CHARS:
6054 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6055 result = XML_ERROR_NO_MEMORY;
6056 goto endEntityValue;
6057 }
6058 break;
6059 case XML_TOK_TRAILING_CR:
6060 next = entityTextPtr + enc->minBytesPerChar;
6061 /* fall through */
6062 case XML_TOK_DATA_NEWLINE:
6063 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6064 result = XML_ERROR_NO_MEMORY;
6065 goto endEntityValue;
6066 }
6067 *(pool->ptr)++ = 0xA;
6068 break;
6069 case XML_TOK_CHAR_REF: {
6070 XML_Char buf[XML_ENCODE_MAX];
6071 int i;
6072 int n = XmlCharRefNumber(enc, entityTextPtr);
6073 if (n < 0) {
6074 if (enc == parser->m_encoding)
6075 parser->m_eventPtr = entityTextPtr;
6076 result = XML_ERROR_BAD_CHAR_REF;
6077 goto endEntityValue;
6078 }
6079 n = XmlEncode(n, (ICHAR *)buf);
6080 /* The XmlEncode() functions can never return 0 here. That
6081 * error return happens if the code point passed in is either
6082 * negative or greater than or equal to 0x110000. The
6083 * XmlCharRefNumber() functions will all return a number
6084 * strictly less than 0x110000 or a negative value if an error
6085 * occurred. The negative value is intercepted above, so
6086 * XmlEncode() is never passed a value it might return an
6087 * error for.
6088 */
6089 for (i = 0; i < n; i++) {
6090 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6091 result = XML_ERROR_NO_MEMORY;
6092 goto endEntityValue;
6093 }
6094 *(pool->ptr)++ = buf[i];
6095 }
6096 } break;
6097 case XML_TOK_PARTIAL:
6098 if (enc == parser->m_encoding)
6099 parser->m_eventPtr = entityTextPtr;
6100 result = XML_ERROR_INVALID_TOKEN;
6101 goto endEntityValue;
6102 case XML_TOK_INVALID:
6103 if (enc == parser->m_encoding)
6104 parser->m_eventPtr = next;
6105 result = XML_ERROR_INVALID_TOKEN;
6106 goto endEntityValue;
6107 default:
6108 /* This default case should be unnecessary -- all the tokens
6109 * that XmlEntityValueTok() can return have their own explicit
6110 * cases -- but should be retained for safety. We do however
6111 * exclude it from the coverage statistics.
6112 *
6113 * LCOV_EXCL_START
6114 */
6115 if (enc == parser->m_encoding)
6116 parser->m_eventPtr = entityTextPtr;
6117 result = XML_ERROR_UNEXPECTED_STATE;
6118 goto endEntityValue;
6119 /* LCOV_EXCL_STOP */
6120 }
6121 entityTextPtr = next;
6122 }
6123 endEntityValue:
6124 #ifdef XML_DTD
6125 parser->m_prologState.inEntityValue = oldInEntityValue;
6126 #endif /* XML_DTD */
6127 return result;
6128 }
6129
6130 static void FASTCALL
normalizeLines(XML_Char * s)6131 normalizeLines(XML_Char *s) {
6132 XML_Char *p;
6133 for (;; s++) {
6134 if (*s == XML_T('\0'))
6135 return;
6136 if (*s == 0xD)
6137 break;
6138 }
6139 p = s;
6140 do {
6141 if (*s == 0xD) {
6142 *p++ = 0xA;
6143 if (*++s == 0xA)
6144 s++;
6145 } else
6146 *p++ = *s++;
6147 } while (*s);
6148 *p = XML_T('\0');
6149 }
6150
6151 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6152 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6153 const char *start, const char *end) {
6154 const XML_Char *target;
6155 XML_Char *data;
6156 const char *tem;
6157 if (! parser->m_processingInstructionHandler) {
6158 if (parser->m_defaultHandler)
6159 reportDefault(parser, enc, start, end);
6160 return 1;
6161 }
6162 start += enc->minBytesPerChar * 2;
6163 tem = start + XmlNameLength(enc, start);
6164 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6165 if (! target)
6166 return 0;
6167 poolFinish(&parser->m_tempPool);
6168 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6169 end - enc->minBytesPerChar * 2);
6170 if (! data)
6171 return 0;
6172 normalizeLines(data);
6173 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6174 poolClear(&parser->m_tempPool);
6175 return 1;
6176 }
6177
6178 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6179 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6180 const char *end) {
6181 XML_Char *data;
6182 if (! parser->m_commentHandler) {
6183 if (parser->m_defaultHandler)
6184 reportDefault(parser, enc, start, end);
6185 return 1;
6186 }
6187 data = poolStoreString(&parser->m_tempPool, enc,
6188 start + enc->minBytesPerChar * 4,
6189 end - enc->minBytesPerChar * 3);
6190 if (! data)
6191 return 0;
6192 normalizeLines(data);
6193 parser->m_commentHandler(parser->m_handlerArg, data);
6194 poolClear(&parser->m_tempPool);
6195 return 1;
6196 }
6197
6198 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6199 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6200 const char *end) {
6201 if (MUST_CONVERT(enc, s)) {
6202 enum XML_Convert_Result convert_res;
6203 const char **eventPP;
6204 const char **eventEndPP;
6205 if (enc == parser->m_encoding) {
6206 eventPP = &parser->m_eventPtr;
6207 eventEndPP = &parser->m_eventEndPtr;
6208 } else {
6209 /* To get here, two things must be true; the parser must be
6210 * using a character encoding that is not the same as the
6211 * encoding passed in, and the encoding passed in must need
6212 * conversion to the internal format (UTF-8 unless XML_UNICODE
6213 * is defined). The only occasions on which the encoding passed
6214 * in is not the same as the parser's encoding are when it is
6215 * the internal encoding (e.g. a previously defined parameter
6216 * entity, already converted to internal format). This by
6217 * definition doesn't need conversion, so the whole branch never
6218 * gets executed.
6219 *
6220 * For safety's sake we don't delete these lines and merely
6221 * exclude them from coverage statistics.
6222 *
6223 * LCOV_EXCL_START
6224 */
6225 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6226 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6227 /* LCOV_EXCL_STOP */
6228 }
6229 do {
6230 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6231 convert_res
6232 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6233 *eventEndPP = s;
6234 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6235 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6236 *eventPP = s;
6237 } while ((convert_res != XML_CONVERT_COMPLETED)
6238 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6239 } else
6240 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
6241 (int)((XML_Char *)end - (XML_Char *)s));
6242 }
6243
6244 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6245 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6246 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6247 DEFAULT_ATTRIBUTE *att;
6248 if (value || isId) {
6249 /* The handling of default attributes gets messed up if we have
6250 a default which duplicates a non-default. */
6251 int i;
6252 for (i = 0; i < type->nDefaultAtts; i++)
6253 if (attId == type->defaultAtts[i].id)
6254 return 1;
6255 if (isId && ! type->idAtt && ! attId->xmlns)
6256 type->idAtt = attId;
6257 }
6258 if (type->nDefaultAtts == type->allocDefaultAtts) {
6259 if (type->allocDefaultAtts == 0) {
6260 type->allocDefaultAtts = 8;
6261 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6262 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6263 if (! type->defaultAtts) {
6264 type->allocDefaultAtts = 0;
6265 return 0;
6266 }
6267 } else {
6268 DEFAULT_ATTRIBUTE *temp;
6269
6270 /* Detect and prevent integer overflow */
6271 if (type->allocDefaultAtts > INT_MAX / 2) {
6272 return 0;
6273 }
6274
6275 int count = type->allocDefaultAtts * 2;
6276
6277 /* Detect and prevent integer overflow.
6278 * The preprocessor guard addresses the "always false" warning
6279 * from -Wtype-limits on platforms where
6280 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6281 #if UINT_MAX >= SIZE_MAX
6282 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6283 return 0;
6284 }
6285 #endif
6286
6287 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6288 (count * sizeof(DEFAULT_ATTRIBUTE)));
6289 if (temp == NULL)
6290 return 0;
6291 type->allocDefaultAtts = count;
6292 type->defaultAtts = temp;
6293 }
6294 }
6295 att = type->defaultAtts + type->nDefaultAtts;
6296 att->id = attId;
6297 att->value = value;
6298 att->isCdata = isCdata;
6299 if (! isCdata)
6300 attId->maybeTokenized = XML_TRUE;
6301 type->nDefaultAtts += 1;
6302 return 1;
6303 }
6304
6305 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6306 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6307 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6308 const XML_Char *name;
6309 for (name = elementType->name; *name; name++) {
6310 if (*name == XML_T(ASCII_COLON)) {
6311 PREFIX *prefix;
6312 const XML_Char *s;
6313 for (s = elementType->name; s != name; s++) {
6314 if (! poolAppendChar(&dtd->pool, *s))
6315 return 0;
6316 }
6317 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6318 return 0;
6319 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6320 sizeof(PREFIX));
6321 if (! prefix)
6322 return 0;
6323 if (prefix->name == poolStart(&dtd->pool))
6324 poolFinish(&dtd->pool);
6325 else
6326 poolDiscard(&dtd->pool);
6327 elementType->prefix = prefix;
6328 break;
6329 }
6330 }
6331 return 1;
6332 }
6333
6334 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6335 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6336 const char *end) {
6337 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6338 ATTRIBUTE_ID *id;
6339 const XML_Char *name;
6340 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6341 return NULL;
6342 name = poolStoreString(&dtd->pool, enc, start, end);
6343 if (! name)
6344 return NULL;
6345 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6346 ++name;
6347 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6348 sizeof(ATTRIBUTE_ID));
6349 if (! id)
6350 return NULL;
6351 if (id->name != name)
6352 poolDiscard(&dtd->pool);
6353 else {
6354 poolFinish(&dtd->pool);
6355 if (! parser->m_ns)
6356 ;
6357 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6358 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6359 && name[4] == XML_T(ASCII_s)
6360 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6361 if (name[5] == XML_T('\0'))
6362 id->prefix = &dtd->defaultPrefix;
6363 else
6364 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6365 sizeof(PREFIX));
6366 id->xmlns = XML_TRUE;
6367 } else {
6368 int i;
6369 for (i = 0; name[i]; i++) {
6370 /* attributes without prefix are *not* in the default namespace */
6371 if (name[i] == XML_T(ASCII_COLON)) {
6372 int j;
6373 for (j = 0; j < i; j++) {
6374 if (! poolAppendChar(&dtd->pool, name[j]))
6375 return NULL;
6376 }
6377 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6378 return NULL;
6379 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6380 poolStart(&dtd->pool), sizeof(PREFIX));
6381 if (! id->prefix)
6382 return NULL;
6383 if (id->prefix->name == poolStart(&dtd->pool))
6384 poolFinish(&dtd->pool);
6385 else
6386 poolDiscard(&dtd->pool);
6387 break;
6388 }
6389 }
6390 }
6391 }
6392 return id;
6393 }
6394
6395 #define CONTEXT_SEP XML_T(ASCII_FF)
6396
6397 static const XML_Char *
getContext(XML_Parser parser)6398 getContext(XML_Parser parser) {
6399 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6400 HASH_TABLE_ITER iter;
6401 XML_Bool needSep = XML_FALSE;
6402
6403 if (dtd->defaultPrefix.binding) {
6404 int i;
6405 int len;
6406 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6407 return NULL;
6408 len = dtd->defaultPrefix.binding->uriLen;
6409 if (parser->m_namespaceSeparator)
6410 len--;
6411 for (i = 0; i < len; i++) {
6412 if (! poolAppendChar(&parser->m_tempPool,
6413 dtd->defaultPrefix.binding->uri[i])) {
6414 /* Because of memory caching, I don't believe this line can be
6415 * executed.
6416 *
6417 * This is part of a loop copying the default prefix binding
6418 * URI into the parser's temporary string pool. Previously,
6419 * that URI was copied into the same string pool, with a
6420 * terminating NUL character, as part of setContext(). When
6421 * the pool was cleared, that leaves a block definitely big
6422 * enough to hold the URI on the free block list of the pool.
6423 * The URI copy in getContext() therefore cannot run out of
6424 * memory.
6425 *
6426 * If the pool is used between the setContext() and
6427 * getContext() calls, the worst it can do is leave a bigger
6428 * block on the front of the free list. Given that this is
6429 * all somewhat inobvious and program logic can be changed, we
6430 * don't delete the line but we do exclude it from the test
6431 * coverage statistics.
6432 */
6433 return NULL; /* LCOV_EXCL_LINE */
6434 }
6435 }
6436 needSep = XML_TRUE;
6437 }
6438
6439 hashTableIterInit(&iter, &(dtd->prefixes));
6440 for (;;) {
6441 int i;
6442 int len;
6443 const XML_Char *s;
6444 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6445 if (! prefix)
6446 break;
6447 if (! prefix->binding) {
6448 /* This test appears to be (justifiable) paranoia. There does
6449 * not seem to be a way of injecting a prefix without a binding
6450 * that doesn't get errored long before this function is called.
6451 * The test should remain for safety's sake, so we instead
6452 * exclude the following line from the coverage statistics.
6453 */
6454 continue; /* LCOV_EXCL_LINE */
6455 }
6456 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6457 return NULL;
6458 for (s = prefix->name; *s; s++)
6459 if (! poolAppendChar(&parser->m_tempPool, *s))
6460 return NULL;
6461 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6462 return NULL;
6463 len = prefix->binding->uriLen;
6464 if (parser->m_namespaceSeparator)
6465 len--;
6466 for (i = 0; i < len; i++)
6467 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6468 return NULL;
6469 needSep = XML_TRUE;
6470 }
6471
6472 hashTableIterInit(&iter, &(dtd->generalEntities));
6473 for (;;) {
6474 const XML_Char *s;
6475 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6476 if (! e)
6477 break;
6478 if (! e->open)
6479 continue;
6480 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6481 return NULL;
6482 for (s = e->name; *s; s++)
6483 if (! poolAppendChar(&parser->m_tempPool, *s))
6484 return 0;
6485 needSep = XML_TRUE;
6486 }
6487
6488 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6489 return NULL;
6490 return parser->m_tempPool.start;
6491 }
6492
6493 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6494 setContext(XML_Parser parser, const XML_Char *context) {
6495 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6496 const XML_Char *s = context;
6497
6498 while (*context != XML_T('\0')) {
6499 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6500 ENTITY *e;
6501 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6502 return XML_FALSE;
6503 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6504 poolStart(&parser->m_tempPool), 0);
6505 if (e)
6506 e->open = XML_TRUE;
6507 if (*s != XML_T('\0'))
6508 s++;
6509 context = s;
6510 poolDiscard(&parser->m_tempPool);
6511 } else if (*s == XML_T(ASCII_EQUALS)) {
6512 PREFIX *prefix;
6513 if (poolLength(&parser->m_tempPool) == 0)
6514 prefix = &dtd->defaultPrefix;
6515 else {
6516 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6517 return XML_FALSE;
6518 prefix
6519 = (PREFIX *)lookup(parser, &dtd->prefixes,
6520 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6521 if (! prefix)
6522 return XML_FALSE;
6523 if (prefix->name == poolStart(&parser->m_tempPool)) {
6524 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6525 if (! prefix->name)
6526 return XML_FALSE;
6527 }
6528 poolDiscard(&parser->m_tempPool);
6529 }
6530 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6531 context++)
6532 if (! poolAppendChar(&parser->m_tempPool, *context))
6533 return XML_FALSE;
6534 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6535 return XML_FALSE;
6536 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6537 &parser->m_inheritedBindings)
6538 != XML_ERROR_NONE)
6539 return XML_FALSE;
6540 poolDiscard(&parser->m_tempPool);
6541 if (*context != XML_T('\0'))
6542 ++context;
6543 s = context;
6544 } else {
6545 if (! poolAppendChar(&parser->m_tempPool, *s))
6546 return XML_FALSE;
6547 s++;
6548 }
6549 }
6550 return XML_TRUE;
6551 }
6552
6553 static void FASTCALL
normalizePublicId(XML_Char * publicId)6554 normalizePublicId(XML_Char *publicId) {
6555 XML_Char *p = publicId;
6556 XML_Char *s;
6557 for (s = publicId; *s; s++) {
6558 switch (*s) {
6559 case 0x20:
6560 case 0xD:
6561 case 0xA:
6562 if (p != publicId && p[-1] != 0x20)
6563 *p++ = 0x20;
6564 break;
6565 default:
6566 *p++ = *s;
6567 }
6568 }
6569 if (p != publicId && p[-1] == 0x20)
6570 --p;
6571 *p = XML_T('\0');
6572 }
6573
6574 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6575 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6576 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6577 if (p == NULL)
6578 return p;
6579 poolInit(&(p->pool), ms);
6580 poolInit(&(p->entityValuePool), ms);
6581 hashTableInit(&(p->generalEntities), ms);
6582 hashTableInit(&(p->elementTypes), ms);
6583 hashTableInit(&(p->attributeIds), ms);
6584 hashTableInit(&(p->prefixes), ms);
6585 #ifdef XML_DTD
6586 p->paramEntityRead = XML_FALSE;
6587 hashTableInit(&(p->paramEntities), ms);
6588 #endif /* XML_DTD */
6589 p->defaultPrefix.name = NULL;
6590 p->defaultPrefix.binding = NULL;
6591
6592 p->in_eldecl = XML_FALSE;
6593 p->scaffIndex = NULL;
6594 p->scaffold = NULL;
6595 p->scaffLevel = 0;
6596 p->scaffSize = 0;
6597 p->scaffCount = 0;
6598 p->contentStringLen = 0;
6599
6600 p->keepProcessing = XML_TRUE;
6601 p->hasParamEntityRefs = XML_FALSE;
6602 p->standalone = XML_FALSE;
6603 return p;
6604 }
6605
6606 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6607 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6608 HASH_TABLE_ITER iter;
6609 hashTableIterInit(&iter, &(p->elementTypes));
6610 for (;;) {
6611 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6612 if (! e)
6613 break;
6614 if (e->allocDefaultAtts != 0)
6615 ms->free_fcn(e->defaultAtts);
6616 }
6617 hashTableClear(&(p->generalEntities));
6618 #ifdef XML_DTD
6619 p->paramEntityRead = XML_FALSE;
6620 hashTableClear(&(p->paramEntities));
6621 #endif /* XML_DTD */
6622 hashTableClear(&(p->elementTypes));
6623 hashTableClear(&(p->attributeIds));
6624 hashTableClear(&(p->prefixes));
6625 poolClear(&(p->pool));
6626 poolClear(&(p->entityValuePool));
6627 p->defaultPrefix.name = NULL;
6628 p->defaultPrefix.binding = NULL;
6629
6630 p->in_eldecl = XML_FALSE;
6631
6632 ms->free_fcn(p->scaffIndex);
6633 p->scaffIndex = NULL;
6634 ms->free_fcn(p->scaffold);
6635 p->scaffold = NULL;
6636
6637 p->scaffLevel = 0;
6638 p->scaffSize = 0;
6639 p->scaffCount = 0;
6640 p->contentStringLen = 0;
6641
6642 p->keepProcessing = XML_TRUE;
6643 p->hasParamEntityRefs = XML_FALSE;
6644 p->standalone = XML_FALSE;
6645 }
6646
6647 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6648 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6649 HASH_TABLE_ITER iter;
6650 hashTableIterInit(&iter, &(p->elementTypes));
6651 for (;;) {
6652 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6653 if (! e)
6654 break;
6655 if (e->allocDefaultAtts != 0)
6656 ms->free_fcn(e->defaultAtts);
6657 }
6658 hashTableDestroy(&(p->generalEntities));
6659 #ifdef XML_DTD
6660 hashTableDestroy(&(p->paramEntities));
6661 #endif /* XML_DTD */
6662 hashTableDestroy(&(p->elementTypes));
6663 hashTableDestroy(&(p->attributeIds));
6664 hashTableDestroy(&(p->prefixes));
6665 poolDestroy(&(p->pool));
6666 poolDestroy(&(p->entityValuePool));
6667 if (isDocEntity) {
6668 ms->free_fcn(p->scaffIndex);
6669 ms->free_fcn(p->scaffold);
6670 }
6671 ms->free_fcn(p);
6672 }
6673
6674 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6675 The new DTD has already been initialized.
6676 */
6677 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6678 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6679 const XML_Memory_Handling_Suite *ms) {
6680 HASH_TABLE_ITER iter;
6681
6682 /* Copy the prefix table. */
6683
6684 hashTableIterInit(&iter, &(oldDtd->prefixes));
6685 for (;;) {
6686 const XML_Char *name;
6687 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6688 if (! oldP)
6689 break;
6690 name = poolCopyString(&(newDtd->pool), oldP->name);
6691 if (! name)
6692 return 0;
6693 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6694 return 0;
6695 }
6696
6697 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6698
6699 /* Copy the attribute id table. */
6700
6701 for (;;) {
6702 ATTRIBUTE_ID *newA;
6703 const XML_Char *name;
6704 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6705
6706 if (! oldA)
6707 break;
6708 /* Remember to allocate the scratch byte before the name. */
6709 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6710 return 0;
6711 name = poolCopyString(&(newDtd->pool), oldA->name);
6712 if (! name)
6713 return 0;
6714 ++name;
6715 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6716 sizeof(ATTRIBUTE_ID));
6717 if (! newA)
6718 return 0;
6719 newA->maybeTokenized = oldA->maybeTokenized;
6720 if (oldA->prefix) {
6721 newA->xmlns = oldA->xmlns;
6722 if (oldA->prefix == &oldDtd->defaultPrefix)
6723 newA->prefix = &newDtd->defaultPrefix;
6724 else
6725 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6726 oldA->prefix->name, 0);
6727 }
6728 }
6729
6730 /* Copy the element type table. */
6731
6732 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6733
6734 for (;;) {
6735 int i;
6736 ELEMENT_TYPE *newE;
6737 const XML_Char *name;
6738 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6739 if (! oldE)
6740 break;
6741 name = poolCopyString(&(newDtd->pool), oldE->name);
6742 if (! name)
6743 return 0;
6744 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6745 sizeof(ELEMENT_TYPE));
6746 if (! newE)
6747 return 0;
6748 if (oldE->nDefaultAtts) {
6749 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn(
6750 oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6751 if (! newE->defaultAtts) {
6752 return 0;
6753 }
6754 }
6755 if (oldE->idAtt)
6756 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
6757 oldE->idAtt->name, 0);
6758 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6759 if (oldE->prefix)
6760 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6761 oldE->prefix->name, 0);
6762 for (i = 0; i < newE->nDefaultAtts; i++) {
6763 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
6764 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6765 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6766 if (oldE->defaultAtts[i].value) {
6767 newE->defaultAtts[i].value
6768 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6769 if (! newE->defaultAtts[i].value)
6770 return 0;
6771 } else
6772 newE->defaultAtts[i].value = NULL;
6773 }
6774 }
6775
6776 /* Copy the entity tables. */
6777 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
6778 &(oldDtd->generalEntities)))
6779 return 0;
6780
6781 #ifdef XML_DTD
6782 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
6783 &(oldDtd->paramEntities)))
6784 return 0;
6785 newDtd->paramEntityRead = oldDtd->paramEntityRead;
6786 #endif /* XML_DTD */
6787
6788 newDtd->keepProcessing = oldDtd->keepProcessing;
6789 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6790 newDtd->standalone = oldDtd->standalone;
6791
6792 /* Don't want deep copying for scaffolding */
6793 newDtd->in_eldecl = oldDtd->in_eldecl;
6794 newDtd->scaffold = oldDtd->scaffold;
6795 newDtd->contentStringLen = oldDtd->contentStringLen;
6796 newDtd->scaffSize = oldDtd->scaffSize;
6797 newDtd->scaffLevel = oldDtd->scaffLevel;
6798 newDtd->scaffIndex = oldDtd->scaffIndex;
6799
6800 return 1;
6801 } /* End dtdCopy */
6802
6803 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)6804 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
6805 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
6806 HASH_TABLE_ITER iter;
6807 const XML_Char *cachedOldBase = NULL;
6808 const XML_Char *cachedNewBase = NULL;
6809
6810 hashTableIterInit(&iter, oldTable);
6811
6812 for (;;) {
6813 ENTITY *newE;
6814 const XML_Char *name;
6815 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6816 if (! oldE)
6817 break;
6818 name = poolCopyString(newPool, oldE->name);
6819 if (! name)
6820 return 0;
6821 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6822 if (! newE)
6823 return 0;
6824 if (oldE->systemId) {
6825 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6826 if (! tem)
6827 return 0;
6828 newE->systemId = tem;
6829 if (oldE->base) {
6830 if (oldE->base == cachedOldBase)
6831 newE->base = cachedNewBase;
6832 else {
6833 cachedOldBase = oldE->base;
6834 tem = poolCopyString(newPool, cachedOldBase);
6835 if (! tem)
6836 return 0;
6837 cachedNewBase = newE->base = tem;
6838 }
6839 }
6840 if (oldE->publicId) {
6841 tem = poolCopyString(newPool, oldE->publicId);
6842 if (! tem)
6843 return 0;
6844 newE->publicId = tem;
6845 }
6846 } else {
6847 const XML_Char *tem
6848 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
6849 if (! tem)
6850 return 0;
6851 newE->textPtr = tem;
6852 newE->textLen = oldE->textLen;
6853 }
6854 if (oldE->notation) {
6855 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6856 if (! tem)
6857 return 0;
6858 newE->notation = tem;
6859 }
6860 newE->is_param = oldE->is_param;
6861 newE->is_internal = oldE->is_internal;
6862 }
6863 return 1;
6864 }
6865
6866 #define INIT_POWER 6
6867
6868 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)6869 keyeq(KEY s1, KEY s2) {
6870 for (; *s1 == *s2; s1++, s2++)
6871 if (*s1 == 0)
6872 return XML_TRUE;
6873 return XML_FALSE;
6874 }
6875
6876 static size_t
keylen(KEY s)6877 keylen(KEY s) {
6878 size_t len = 0;
6879 for (; *s; s++, len++)
6880 ;
6881 return len;
6882 }
6883
6884 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)6885 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
6886 key->k[0] = 0;
6887 key->k[1] = get_hash_secret_salt(parser);
6888 }
6889
6890 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)6891 hash(XML_Parser parser, KEY s) {
6892 struct siphash state;
6893 struct sipkey key;
6894 (void)sip24_valid;
6895 copy_salt_to_sipkey(parser, &key);
6896 sip24_init(&state, &key);
6897 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6898 return (unsigned long)sip24_final(&state);
6899 }
6900
6901 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)6902 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
6903 size_t i;
6904 if (table->size == 0) {
6905 size_t tsize;
6906 if (! createSize)
6907 return NULL;
6908 table->power = INIT_POWER;
6909 /* table->size is a power of 2 */
6910 table->size = (size_t)1 << INIT_POWER;
6911 tsize = table->size * sizeof(NAMED *);
6912 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
6913 if (! table->v) {
6914 table->size = 0;
6915 return NULL;
6916 }
6917 memset(table->v, 0, tsize);
6918 i = hash(parser, name) & ((unsigned long)table->size - 1);
6919 } else {
6920 unsigned long h = hash(parser, name);
6921 unsigned long mask = (unsigned long)table->size - 1;
6922 unsigned char step = 0;
6923 i = h & mask;
6924 while (table->v[i]) {
6925 if (keyeq(name, table->v[i]->name))
6926 return table->v[i];
6927 if (! step)
6928 step = PROBE_STEP(h, mask, table->power);
6929 i < step ? (i += table->size - step) : (i -= step);
6930 }
6931 if (! createSize)
6932 return NULL;
6933
6934 /* check for overflow (table is half full) */
6935 if (table->used >> (table->power - 1)) {
6936 unsigned char newPower = table->power + 1;
6937
6938 /* Detect and prevent invalid shift */
6939 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
6940 return NULL;
6941 }
6942
6943 size_t newSize = (size_t)1 << newPower;
6944 unsigned long newMask = (unsigned long)newSize - 1;
6945
6946 /* Detect and prevent integer overflow */
6947 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
6948 return NULL;
6949 }
6950
6951 size_t tsize = newSize * sizeof(NAMED *);
6952 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
6953 if (! newV)
6954 return NULL;
6955 memset(newV, 0, tsize);
6956 for (i = 0; i < table->size; i++)
6957 if (table->v[i]) {
6958 unsigned long newHash = hash(parser, table->v[i]->name);
6959 size_t j = newHash & newMask;
6960 step = 0;
6961 while (newV[j]) {
6962 if (! step)
6963 step = PROBE_STEP(newHash, newMask, newPower);
6964 j < step ? (j += newSize - step) : (j -= step);
6965 }
6966 newV[j] = table->v[i];
6967 }
6968 table->mem->free_fcn(table->v);
6969 table->v = newV;
6970 table->power = newPower;
6971 table->size = newSize;
6972 i = h & newMask;
6973 step = 0;
6974 while (table->v[i]) {
6975 if (! step)
6976 step = PROBE_STEP(h, newMask, newPower);
6977 i < step ? (i += newSize - step) : (i -= step);
6978 }
6979 }
6980 }
6981 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
6982 if (! table->v[i])
6983 return NULL;
6984 memset(table->v[i], 0, createSize);
6985 table->v[i]->name = name;
6986 (table->used)++;
6987 return table->v[i];
6988 }
6989
6990 static void FASTCALL
hashTableClear(HASH_TABLE * table)6991 hashTableClear(HASH_TABLE *table) {
6992 size_t i;
6993 for (i = 0; i < table->size; i++) {
6994 table->mem->free_fcn(table->v[i]);
6995 table->v[i] = NULL;
6996 }
6997 table->used = 0;
6998 }
6999
7000 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7001 hashTableDestroy(HASH_TABLE *table) {
7002 size_t i;
7003 for (i = 0; i < table->size; i++)
7004 table->mem->free_fcn(table->v[i]);
7005 table->mem->free_fcn(table->v);
7006 }
7007
7008 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7009 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7010 p->power = 0;
7011 p->size = 0;
7012 p->used = 0;
7013 p->v = NULL;
7014 p->mem = ms;
7015 }
7016
7017 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7018 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7019 iter->p = table->v;
7020 iter->end = iter->p ? iter->p + table->size : NULL;
7021 }
7022
7023 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7024 hashTableIterNext(HASH_TABLE_ITER *iter) {
7025 while (iter->p != iter->end) {
7026 NAMED *tem = *(iter->p)++;
7027 if (tem)
7028 return tem;
7029 }
7030 return NULL;
7031 }
7032
7033 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7034 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7035 pool->blocks = NULL;
7036 pool->freeBlocks = NULL;
7037 pool->start = NULL;
7038 pool->ptr = NULL;
7039 pool->end = NULL;
7040 pool->mem = ms;
7041 }
7042
7043 static void FASTCALL
poolClear(STRING_POOL * pool)7044 poolClear(STRING_POOL *pool) {
7045 if (! pool->freeBlocks)
7046 pool->freeBlocks = pool->blocks;
7047 else {
7048 BLOCK *p = pool->blocks;
7049 while (p) {
7050 BLOCK *tem = p->next;
7051 p->next = pool->freeBlocks;
7052 pool->freeBlocks = p;
7053 p = tem;
7054 }
7055 }
7056 pool->blocks = NULL;
7057 pool->start = NULL;
7058 pool->ptr = NULL;
7059 pool->end = NULL;
7060 }
7061
7062 static void FASTCALL
poolDestroy(STRING_POOL * pool)7063 poolDestroy(STRING_POOL *pool) {
7064 BLOCK *p = pool->blocks;
7065 while (p) {
7066 BLOCK *tem = p->next;
7067 pool->mem->free_fcn(p);
7068 p = tem;
7069 }
7070 p = pool->freeBlocks;
7071 while (p) {
7072 BLOCK *tem = p->next;
7073 pool->mem->free_fcn(p);
7074 p = tem;
7075 }
7076 }
7077
7078 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7079 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7080 const char *end) {
7081 if (! pool->ptr && ! poolGrow(pool))
7082 return NULL;
7083 for (;;) {
7084 const enum XML_Convert_Result convert_res = XmlConvert(
7085 enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
7086 if ((convert_res == XML_CONVERT_COMPLETED)
7087 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7088 break;
7089 if (! poolGrow(pool))
7090 return NULL;
7091 }
7092 return pool->start;
7093 }
7094
7095 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7096 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7097 do {
7098 if (! poolAppendChar(pool, *s))
7099 return NULL;
7100 } while (*s++);
7101 s = pool->start;
7102 poolFinish(pool);
7103 return s;
7104 }
7105
7106 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7107 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7108 if (! pool->ptr && ! poolGrow(pool)) {
7109 /* The following line is unreachable given the current usage of
7110 * poolCopyStringN(). Currently it is called from exactly one
7111 * place to copy the text of a simple general entity. By that
7112 * point, the name of the entity is already stored in the pool, so
7113 * pool->ptr cannot be NULL.
7114 *
7115 * If poolCopyStringN() is used elsewhere as it well might be,
7116 * this line may well become executable again. Regardless, this
7117 * sort of check shouldn't be removed lightly, so we just exclude
7118 * it from the coverage statistics.
7119 */
7120 return NULL; /* LCOV_EXCL_LINE */
7121 }
7122 for (; n > 0; --n, s++) {
7123 if (! poolAppendChar(pool, *s))
7124 return NULL;
7125 }
7126 s = pool->start;
7127 poolFinish(pool);
7128 return s;
7129 }
7130
7131 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7132 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7133 while (*s) {
7134 if (! poolAppendChar(pool, *s))
7135 return NULL;
7136 s++;
7137 }
7138 return pool->start;
7139 }
7140
7141 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7142 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7143 const char *end) {
7144 if (! poolAppend(pool, enc, ptr, end))
7145 return NULL;
7146 if (pool->ptr == pool->end && ! poolGrow(pool))
7147 return NULL;
7148 *(pool->ptr)++ = 0;
7149 return pool->start;
7150 }
7151
7152 static size_t
poolBytesToAllocateFor(int blockSize)7153 poolBytesToAllocateFor(int blockSize) {
7154 /* Unprotected math would be:
7155 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7156 **
7157 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7158 ** For a + b * c we check b * c in isolation first, so that addition of a
7159 ** on top has no chance of making us accept a small non-negative number
7160 */
7161 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7162
7163 if (blockSize <= 0)
7164 return 0;
7165
7166 if (blockSize > (int)(INT_MAX / stretch))
7167 return 0;
7168
7169 {
7170 const int stretchedBlockSize = blockSize * (int)stretch;
7171 const int bytesToAllocate
7172 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7173 if (bytesToAllocate < 0)
7174 return 0;
7175
7176 return (size_t)bytesToAllocate;
7177 }
7178 }
7179
7180 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7181 poolGrow(STRING_POOL *pool) {
7182 if (pool->freeBlocks) {
7183 if (pool->start == 0) {
7184 pool->blocks = pool->freeBlocks;
7185 pool->freeBlocks = pool->freeBlocks->next;
7186 pool->blocks->next = NULL;
7187 pool->start = pool->blocks->s;
7188 pool->end = pool->start + pool->blocks->size;
7189 pool->ptr = pool->start;
7190 return XML_TRUE;
7191 }
7192 if (pool->end - pool->start < pool->freeBlocks->size) {
7193 BLOCK *tem = pool->freeBlocks->next;
7194 pool->freeBlocks->next = pool->blocks;
7195 pool->blocks = pool->freeBlocks;
7196 pool->freeBlocks = tem;
7197 memcpy(pool->blocks->s, pool->start,
7198 (pool->end - pool->start) * sizeof(XML_Char));
7199 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7200 pool->start = pool->blocks->s;
7201 pool->end = pool->start + pool->blocks->size;
7202 return XML_TRUE;
7203 }
7204 }
7205 if (pool->blocks && pool->start == pool->blocks->s) {
7206 BLOCK *temp;
7207 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7208 size_t bytesToAllocate;
7209
7210 /* NOTE: Needs to be calculated prior to calling `realloc`
7211 to avoid dangling pointers: */
7212 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7213
7214 if (blockSize < 0) {
7215 /* This condition traps a situation where either more than
7216 * INT_MAX/2 bytes have already been allocated. This isn't
7217 * readily testable, since it is unlikely that an average
7218 * machine will have that much memory, so we exclude it from the
7219 * coverage statistics.
7220 */
7221 return XML_FALSE; /* LCOV_EXCL_LINE */
7222 }
7223
7224 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7225 if (bytesToAllocate == 0)
7226 return XML_FALSE;
7227
7228 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7229 (unsigned)bytesToAllocate);
7230 if (temp == NULL)
7231 return XML_FALSE;
7232 pool->blocks = temp;
7233 pool->blocks->size = blockSize;
7234 pool->ptr = pool->blocks->s + offsetInsideBlock;
7235 pool->start = pool->blocks->s;
7236 pool->end = pool->start + blockSize;
7237 } else {
7238 BLOCK *tem;
7239 int blockSize = (int)(pool->end - pool->start);
7240 size_t bytesToAllocate;
7241
7242 if (blockSize < 0) {
7243 /* This condition traps a situation where either more than
7244 * INT_MAX bytes have already been allocated (which is prevented
7245 * by various pieces of program logic, not least this one, never
7246 * mind the unlikelihood of actually having that much memory) or
7247 * the pool control fields have been corrupted (which could
7248 * conceivably happen in an extremely buggy user handler
7249 * function). Either way it isn't readily testable, so we
7250 * exclude it from the coverage statistics.
7251 */
7252 return XML_FALSE; /* LCOV_EXCL_LINE */
7253 }
7254
7255 if (blockSize < INIT_BLOCK_SIZE)
7256 blockSize = INIT_BLOCK_SIZE;
7257 else {
7258 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7259 if ((int)((unsigned)blockSize * 2U) < 0) {
7260 return XML_FALSE;
7261 }
7262 blockSize *= 2;
7263 }
7264
7265 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7266 if (bytesToAllocate == 0)
7267 return XML_FALSE;
7268
7269 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
7270 if (! tem)
7271 return XML_FALSE;
7272 tem->size = blockSize;
7273 tem->next = pool->blocks;
7274 pool->blocks = tem;
7275 if (pool->ptr != pool->start)
7276 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7277 pool->ptr = tem->s + (pool->ptr - pool->start);
7278 pool->start = tem->s;
7279 pool->end = tem->s + blockSize;
7280 }
7281 return XML_TRUE;
7282 }
7283
7284 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7285 nextScaffoldPart(XML_Parser parser) {
7286 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7287 CONTENT_SCAFFOLD *me;
7288 int next;
7289
7290 if (! dtd->scaffIndex) {
7291 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7292 if (! dtd->scaffIndex)
7293 return -1;
7294 dtd->scaffIndex[0] = 0;
7295 }
7296
7297 if (dtd->scaffCount >= dtd->scaffSize) {
7298 CONTENT_SCAFFOLD *temp;
7299 if (dtd->scaffold) {
7300 /* Detect and prevent integer overflow */
7301 if (dtd->scaffSize > UINT_MAX / 2u) {
7302 return -1;
7303 }
7304 /* Detect and prevent integer overflow.
7305 * The preprocessor guard addresses the "always false" warning
7306 * from -Wtype-limits on platforms where
7307 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7308 #if UINT_MAX >= SIZE_MAX
7309 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7310 return -1;
7311 }
7312 #endif
7313
7314 temp = (CONTENT_SCAFFOLD *)REALLOC(
7315 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7316 if (temp == NULL)
7317 return -1;
7318 dtd->scaffSize *= 2;
7319 } else {
7320 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7321 * sizeof(CONTENT_SCAFFOLD));
7322 if (temp == NULL)
7323 return -1;
7324 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7325 }
7326 dtd->scaffold = temp;
7327 }
7328 next = dtd->scaffCount++;
7329 me = &dtd->scaffold[next];
7330 if (dtd->scaffLevel) {
7331 CONTENT_SCAFFOLD *parent
7332 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7333 if (parent->lastchild) {
7334 dtd->scaffold[parent->lastchild].nextsib = next;
7335 }
7336 if (! parent->childcnt)
7337 parent->firstchild = next;
7338 parent->lastchild = next;
7339 parent->childcnt++;
7340 }
7341 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7342 return next;
7343 }
7344
7345 static XML_Content *
build_model(XML_Parser parser)7346 build_model(XML_Parser parser) {
7347 /* Function build_model transforms the existing parser->m_dtd->scaffold
7348 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7349 * XML_Content tree nodes followed by a gapless list of zero-terminated
7350 * strings. */
7351 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7352 XML_Content *ret;
7353 XML_Char *str; /* the current string writing location */
7354
7355 /* Detect and prevent integer overflow.
7356 * The preprocessor guard addresses the "always false" warning
7357 * from -Wtype-limits on platforms where
7358 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7359 #if UINT_MAX >= SIZE_MAX
7360 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7361 return NULL;
7362 }
7363 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7364 return NULL;
7365 }
7366 #endif
7367 if (dtd->scaffCount * sizeof(XML_Content)
7368 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7369 return NULL;
7370 }
7371
7372 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7373 + (dtd->contentStringLen * sizeof(XML_Char)));
7374
7375 ret = (XML_Content *)MALLOC(parser, allocsize);
7376 if (! ret)
7377 return NULL;
7378
7379 /* What follows is an iterative implementation (of what was previously done
7380 * recursively in a dedicated function called "build_node". The old recursive
7381 * build_node could be forced into stack exhaustion from input as small as a
7382 * few megabyte, and so that was a security issue. Hence, a function call
7383 * stack is avoided now by resolving recursion.)
7384 *
7385 * The iterative approach works as follows:
7386 *
7387 * - We use space in the target array for building a temporary stack structure
7388 * while that space is still unused.
7389 * The stack grows from the array's end downwards and the "actual data"
7390 * grows from the start upwards, sequentially.
7391 * (Because stack grows downwards, pushing onto the stack is a decrement
7392 * while popping off the stack is an increment.)
7393 *
7394 * - A stack element appears as a regular XML_Content node on the outside,
7395 * but only uses a single field -- numchildren -- to store the source
7396 * tree node array index. These are the breadcrumbs leading the way back
7397 * during pre-order (node first) depth-first traversal.
7398 *
7399 * - The reason we know the stack will never grow into (or overlap with)
7400 * the area with data of value at the start of the array is because
7401 * the overall number of elements to process matches the size of the array,
7402 * and the sum of fully processed nodes and yet-to-be processed nodes
7403 * on the stack, cannot be more than the total number of nodes.
7404 * It is possible for the top of the stack and the about-to-write node
7405 * to meet, but that is safe because we get the source index out
7406 * before doing any writes on that node.
7407 */
7408 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7409 XML_Content *const destLimit = &ret[dtd->scaffCount];
7410 XML_Content *const stackBottom = &ret[dtd->scaffCount];
7411 XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
7412 str = (XML_Char *)&ret[dtd->scaffCount];
7413
7414 /* Push source tree root node index onto the stack */
7415 (--stackTop)->numchildren = 0;
7416
7417 for (; dest < destLimit; dest++) {
7418 /* Pop source tree node index off the stack */
7419 const int src_node = (int)(stackTop++)->numchildren;
7420
7421 /* Convert item */
7422 dest->type = dtd->scaffold[src_node].type;
7423 dest->quant = dtd->scaffold[src_node].quant;
7424 if (dest->type == XML_CTYPE_NAME) {
7425 const XML_Char *src;
7426 dest->name = str;
7427 src = dtd->scaffold[src_node].name;
7428 for (;;) {
7429 *str++ = *src;
7430 if (! *src)
7431 break;
7432 src++;
7433 }
7434 dest->numchildren = 0;
7435 dest->children = NULL;
7436 } else {
7437 unsigned int i;
7438 int cn;
7439 dest->name = NULL;
7440 dest->numchildren = dtd->scaffold[src_node].childcnt;
7441 dest->children = &dest[1];
7442
7443 /* Push children to the stack
7444 * in a way where the first child ends up at the top of the
7445 * (downwards growing) stack, in order to be processed first. */
7446 stackTop -= dest->numchildren;
7447 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7448 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
7449 (stackTop + i)->numchildren = (unsigned int)cn;
7450 }
7451 }
7452 }
7453
7454 return ret;
7455 }
7456
7457 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7458 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7459 const char *end) {
7460 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7461 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7462 ELEMENT_TYPE *ret;
7463
7464 if (! name)
7465 return NULL;
7466 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7467 sizeof(ELEMENT_TYPE));
7468 if (! ret)
7469 return NULL;
7470 if (ret->name != name)
7471 poolDiscard(&dtd->pool);
7472 else {
7473 poolFinish(&dtd->pool);
7474 if (! setElementTypePrefix(parser, ret))
7475 return NULL;
7476 }
7477 return ret;
7478 }
7479
7480 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7481 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7482 size_t charsRequired = 0;
7483 XML_Char *result;
7484
7485 /* First determine how long the string is */
7486 while (s[charsRequired] != 0) {
7487 charsRequired++;
7488 }
7489 /* Include the terminator */
7490 charsRequired++;
7491
7492 /* Now allocate space for the copy */
7493 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7494 if (result == NULL)
7495 return NULL;
7496 /* Copy the original into place */
7497 memcpy(result, s, charsRequired * sizeof(XML_Char));
7498 return result;
7499 }
7500
7501 #ifdef XML_DTD
7502
7503 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7504 accountingGetCurrentAmplification(XML_Parser rootParser) {
7505 const XmlBigCount countBytesOutput
7506 = rootParser->m_accounting.countBytesDirect
7507 + rootParser->m_accounting.countBytesIndirect;
7508 const float amplificationFactor
7509 = rootParser->m_accounting.countBytesDirect
7510 ? (countBytesOutput
7511 / (float)(rootParser->m_accounting.countBytesDirect))
7512 : 1.0f;
7513 assert(! rootParser->m_parentParser);
7514 return amplificationFactor;
7515 }
7516
7517 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7518 accountingReportStats(XML_Parser originParser, const char *epilog) {
7519 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7520 assert(! rootParser->m_parentParser);
7521
7522 if (rootParser->m_accounting.debugLevel < 1) {
7523 return;
7524 }
7525
7526 const float amplificationFactor
7527 = accountingGetCurrentAmplification(rootParser);
7528 fprintf(stderr,
7529 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7530 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7531 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7532 rootParser->m_accounting.countBytesIndirect,
7533 (double)amplificationFactor, epilog);
7534 }
7535
7536 static void
accountingOnAbort(XML_Parser originParser)7537 accountingOnAbort(XML_Parser originParser) {
7538 accountingReportStats(originParser, " ABORTING\n");
7539 }
7540
7541 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7542 accountingReportDiff(XML_Parser rootParser,
7543 unsigned int levelsAwayFromRootParser, const char *before,
7544 const char *after, ptrdiff_t bytesMore, int source_line,
7545 enum XML_Account account) {
7546 assert(! rootParser->m_parentParser);
7547
7548 fprintf(stderr,
7549 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7550 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7551 levelsAwayFromRootParser, source_line, 10, "");
7552
7553 const char ellipis[] = "[..]";
7554 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7555 const unsigned int contextLength = 10;
7556
7557 /* Note: Performance is of no concern here */
7558 const char *walker = before;
7559 if ((rootParser->m_accounting.debugLevel >= 3)
7560 || (after - before)
7561 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7562 for (; walker < after; walker++) {
7563 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7564 }
7565 } else {
7566 for (; walker < before + contextLength; walker++) {
7567 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7568 }
7569 fprintf(stderr, ellipis);
7570 walker = after - contextLength;
7571 for (; walker < after; walker++) {
7572 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7573 }
7574 }
7575 fprintf(stderr, "\"\n");
7576 }
7577
7578 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7579 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7580 const char *after, int source_line,
7581 enum XML_Account account) {
7582 /* Note: We need to check the token type *first* to be sure that
7583 * we can even access variable <after>, safely.
7584 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7585 switch (tok) {
7586 case XML_TOK_INVALID:
7587 case XML_TOK_PARTIAL:
7588 case XML_TOK_PARTIAL_CHAR:
7589 case XML_TOK_NONE:
7590 return XML_TRUE;
7591 }
7592
7593 if (account == XML_ACCOUNT_NONE)
7594 return XML_TRUE; /* because these bytes have been accounted for, already */
7595
7596 unsigned int levelsAwayFromRootParser;
7597 const XML_Parser rootParser
7598 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7599 assert(! rootParser->m_parentParser);
7600
7601 const int isDirect
7602 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7603 const ptrdiff_t bytesMore = after - before;
7604
7605 XmlBigCount *const additionTarget
7606 = isDirect ? &rootParser->m_accounting.countBytesDirect
7607 : &rootParser->m_accounting.countBytesIndirect;
7608
7609 /* Detect and avoid integer overflow */
7610 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7611 return XML_FALSE;
7612 *additionTarget += bytesMore;
7613
7614 const XmlBigCount countBytesOutput
7615 = rootParser->m_accounting.countBytesDirect
7616 + rootParser->m_accounting.countBytesIndirect;
7617 const float amplificationFactor
7618 = accountingGetCurrentAmplification(rootParser);
7619 const XML_Bool tolerated
7620 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7621 || (amplificationFactor
7622 <= rootParser->m_accounting.maximumAmplificationFactor);
7623
7624 if (rootParser->m_accounting.debugLevel >= 2) {
7625 accountingReportStats(rootParser, "");
7626 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7627 bytesMore, source_line, account);
7628 }
7629
7630 return tolerated;
7631 }
7632
7633 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7634 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7635 if (! parser)
7636 return 0;
7637 return parser->m_accounting.countBytesDirect;
7638 }
7639
7640 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7641 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7642 if (! parser)
7643 return 0;
7644 return parser->m_accounting.countBytesIndirect;
7645 }
7646
7647 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7648 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7649 const char *action, int sourceLine) {
7650 assert(! rootParser->m_parentParser);
7651 if (rootParser->m_entity_stats.debugLevel < 1)
7652 return;
7653
7654 # if defined(XML_UNICODE)
7655 const char *const entityName = "[..]";
7656 # else
7657 const char *const entityName = entity->name;
7658 # endif
7659
7660 fprintf(
7661 stderr,
7662 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7663 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7664 rootParser->m_entity_stats.currentDepth,
7665 rootParser->m_entity_stats.maximumDepthSeen,
7666 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7667 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7668 sourceLine);
7669 }
7670
7671 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7672 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7673 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7674 assert(! rootParser->m_parentParser);
7675
7676 rootParser->m_entity_stats.countEverOpened++;
7677 rootParser->m_entity_stats.currentDepth++;
7678 if (rootParser->m_entity_stats.currentDepth
7679 > rootParser->m_entity_stats.maximumDepthSeen) {
7680 rootParser->m_entity_stats.maximumDepthSeen++;
7681 }
7682
7683 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7684 }
7685
7686 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7687 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7688 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7689 assert(! rootParser->m_parentParser);
7690
7691 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7692 rootParser->m_entity_stats.currentDepth--;
7693 }
7694
7695 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)7696 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7697 XML_Parser rootParser = parser;
7698 unsigned int stepsTakenUpwards = 0;
7699 while (rootParser->m_parentParser) {
7700 rootParser = rootParser->m_parentParser;
7701 stepsTakenUpwards++;
7702 }
7703 assert(! rootParser->m_parentParser);
7704 if (outLevelDiff != NULL) {
7705 *outLevelDiff = stepsTakenUpwards;
7706 }
7707 return rootParser;
7708 }
7709
7710 const char *
unsignedCharToPrintable(unsigned char c)7711 unsignedCharToPrintable(unsigned char c) {
7712 switch (c) {
7713 case 0:
7714 return "\\0";
7715 case 1:
7716 return "\\x1";
7717 case 2:
7718 return "\\x2";
7719 case 3:
7720 return "\\x3";
7721 case 4:
7722 return "\\x4";
7723 case 5:
7724 return "\\x5";
7725 case 6:
7726 return "\\x6";
7727 case 7:
7728 return "\\x7";
7729 case 8:
7730 return "\\x8";
7731 case 9:
7732 return "\\t";
7733 case 10:
7734 return "\\n";
7735 case 11:
7736 return "\\xB";
7737 case 12:
7738 return "\\xC";
7739 case 13:
7740 return "\\r";
7741 case 14:
7742 return "\\xE";
7743 case 15:
7744 return "\\xF";
7745 case 16:
7746 return "\\x10";
7747 case 17:
7748 return "\\x11";
7749 case 18:
7750 return "\\x12";
7751 case 19:
7752 return "\\x13";
7753 case 20:
7754 return "\\x14";
7755 case 21:
7756 return "\\x15";
7757 case 22:
7758 return "\\x16";
7759 case 23:
7760 return "\\x17";
7761 case 24:
7762 return "\\x18";
7763 case 25:
7764 return "\\x19";
7765 case 26:
7766 return "\\x1A";
7767 case 27:
7768 return "\\x1B";
7769 case 28:
7770 return "\\x1C";
7771 case 29:
7772 return "\\x1D";
7773 case 30:
7774 return "\\x1E";
7775 case 31:
7776 return "\\x1F";
7777 case 32:
7778 return " ";
7779 case 33:
7780 return "!";
7781 case 34:
7782 return "\\\"";
7783 case 35:
7784 return "#";
7785 case 36:
7786 return "$";
7787 case 37:
7788 return "%";
7789 case 38:
7790 return "&";
7791 case 39:
7792 return "'";
7793 case 40:
7794 return "(";
7795 case 41:
7796 return ")";
7797 case 42:
7798 return "*";
7799 case 43:
7800 return "+";
7801 case 44:
7802 return ",";
7803 case 45:
7804 return "-";
7805 case 46:
7806 return ".";
7807 case 47:
7808 return "/";
7809 case 48:
7810 return "0";
7811 case 49:
7812 return "1";
7813 case 50:
7814 return "2";
7815 case 51:
7816 return "3";
7817 case 52:
7818 return "4";
7819 case 53:
7820 return "5";
7821 case 54:
7822 return "6";
7823 case 55:
7824 return "7";
7825 case 56:
7826 return "8";
7827 case 57:
7828 return "9";
7829 case 58:
7830 return ":";
7831 case 59:
7832 return ";";
7833 case 60:
7834 return "<";
7835 case 61:
7836 return "=";
7837 case 62:
7838 return ">";
7839 case 63:
7840 return "?";
7841 case 64:
7842 return "@";
7843 case 65:
7844 return "A";
7845 case 66:
7846 return "B";
7847 case 67:
7848 return "C";
7849 case 68:
7850 return "D";
7851 case 69:
7852 return "E";
7853 case 70:
7854 return "F";
7855 case 71:
7856 return "G";
7857 case 72:
7858 return "H";
7859 case 73:
7860 return "I";
7861 case 74:
7862 return "J";
7863 case 75:
7864 return "K";
7865 case 76:
7866 return "L";
7867 case 77:
7868 return "M";
7869 case 78:
7870 return "N";
7871 case 79:
7872 return "O";
7873 case 80:
7874 return "P";
7875 case 81:
7876 return "Q";
7877 case 82:
7878 return "R";
7879 case 83:
7880 return "S";
7881 case 84:
7882 return "T";
7883 case 85:
7884 return "U";
7885 case 86:
7886 return "V";
7887 case 87:
7888 return "W";
7889 case 88:
7890 return "X";
7891 case 89:
7892 return "Y";
7893 case 90:
7894 return "Z";
7895 case 91:
7896 return "[";
7897 case 92:
7898 return "\\\\";
7899 case 93:
7900 return "]";
7901 case 94:
7902 return "^";
7903 case 95:
7904 return "_";
7905 case 96:
7906 return "`";
7907 case 97:
7908 return "a";
7909 case 98:
7910 return "b";
7911 case 99:
7912 return "c";
7913 case 100:
7914 return "d";
7915 case 101:
7916 return "e";
7917 case 102:
7918 return "f";
7919 case 103:
7920 return "g";
7921 case 104:
7922 return "h";
7923 case 105:
7924 return "i";
7925 case 106:
7926 return "j";
7927 case 107:
7928 return "k";
7929 case 108:
7930 return "l";
7931 case 109:
7932 return "m";
7933 case 110:
7934 return "n";
7935 case 111:
7936 return "o";
7937 case 112:
7938 return "p";
7939 case 113:
7940 return "q";
7941 case 114:
7942 return "r";
7943 case 115:
7944 return "s";
7945 case 116:
7946 return "t";
7947 case 117:
7948 return "u";
7949 case 118:
7950 return "v";
7951 case 119:
7952 return "w";
7953 case 120:
7954 return "x";
7955 case 121:
7956 return "y";
7957 case 122:
7958 return "z";
7959 case 123:
7960 return "{";
7961 case 124:
7962 return "|";
7963 case 125:
7964 return "}";
7965 case 126:
7966 return "~";
7967 case 127:
7968 return "\\x7F";
7969 case 128:
7970 return "\\x80";
7971 case 129:
7972 return "\\x81";
7973 case 130:
7974 return "\\x82";
7975 case 131:
7976 return "\\x83";
7977 case 132:
7978 return "\\x84";
7979 case 133:
7980 return "\\x85";
7981 case 134:
7982 return "\\x86";
7983 case 135:
7984 return "\\x87";
7985 case 136:
7986 return "\\x88";
7987 case 137:
7988 return "\\x89";
7989 case 138:
7990 return "\\x8A";
7991 case 139:
7992 return "\\x8B";
7993 case 140:
7994 return "\\x8C";
7995 case 141:
7996 return "\\x8D";
7997 case 142:
7998 return "\\x8E";
7999 case 143:
8000 return "\\x8F";
8001 case 144:
8002 return "\\x90";
8003 case 145:
8004 return "\\x91";
8005 case 146:
8006 return "\\x92";
8007 case 147:
8008 return "\\x93";
8009 case 148:
8010 return "\\x94";
8011 case 149:
8012 return "\\x95";
8013 case 150:
8014 return "\\x96";
8015 case 151:
8016 return "\\x97";
8017 case 152:
8018 return "\\x98";
8019 case 153:
8020 return "\\x99";
8021 case 154:
8022 return "\\x9A";
8023 case 155:
8024 return "\\x9B";
8025 case 156:
8026 return "\\x9C";
8027 case 157:
8028 return "\\x9D";
8029 case 158:
8030 return "\\x9E";
8031 case 159:
8032 return "\\x9F";
8033 case 160:
8034 return "\\xA0";
8035 case 161:
8036 return "\\xA1";
8037 case 162:
8038 return "\\xA2";
8039 case 163:
8040 return "\\xA3";
8041 case 164:
8042 return "\\xA4";
8043 case 165:
8044 return "\\xA5";
8045 case 166:
8046 return "\\xA6";
8047 case 167:
8048 return "\\xA7";
8049 case 168:
8050 return "\\xA8";
8051 case 169:
8052 return "\\xA9";
8053 case 170:
8054 return "\\xAA";
8055 case 171:
8056 return "\\xAB";
8057 case 172:
8058 return "\\xAC";
8059 case 173:
8060 return "\\xAD";
8061 case 174:
8062 return "\\xAE";
8063 case 175:
8064 return "\\xAF";
8065 case 176:
8066 return "\\xB0";
8067 case 177:
8068 return "\\xB1";
8069 case 178:
8070 return "\\xB2";
8071 case 179:
8072 return "\\xB3";
8073 case 180:
8074 return "\\xB4";
8075 case 181:
8076 return "\\xB5";
8077 case 182:
8078 return "\\xB6";
8079 case 183:
8080 return "\\xB7";
8081 case 184:
8082 return "\\xB8";
8083 case 185:
8084 return "\\xB9";
8085 case 186:
8086 return "\\xBA";
8087 case 187:
8088 return "\\xBB";
8089 case 188:
8090 return "\\xBC";
8091 case 189:
8092 return "\\xBD";
8093 case 190:
8094 return "\\xBE";
8095 case 191:
8096 return "\\xBF";
8097 case 192:
8098 return "\\xC0";
8099 case 193:
8100 return "\\xC1";
8101 case 194:
8102 return "\\xC2";
8103 case 195:
8104 return "\\xC3";
8105 case 196:
8106 return "\\xC4";
8107 case 197:
8108 return "\\xC5";
8109 case 198:
8110 return "\\xC6";
8111 case 199:
8112 return "\\xC7";
8113 case 200:
8114 return "\\xC8";
8115 case 201:
8116 return "\\xC9";
8117 case 202:
8118 return "\\xCA";
8119 case 203:
8120 return "\\xCB";
8121 case 204:
8122 return "\\xCC";
8123 case 205:
8124 return "\\xCD";
8125 case 206:
8126 return "\\xCE";
8127 case 207:
8128 return "\\xCF";
8129 case 208:
8130 return "\\xD0";
8131 case 209:
8132 return "\\xD1";
8133 case 210:
8134 return "\\xD2";
8135 case 211:
8136 return "\\xD3";
8137 case 212:
8138 return "\\xD4";
8139 case 213:
8140 return "\\xD5";
8141 case 214:
8142 return "\\xD6";
8143 case 215:
8144 return "\\xD7";
8145 case 216:
8146 return "\\xD8";
8147 case 217:
8148 return "\\xD9";
8149 case 218:
8150 return "\\xDA";
8151 case 219:
8152 return "\\xDB";
8153 case 220:
8154 return "\\xDC";
8155 case 221:
8156 return "\\xDD";
8157 case 222:
8158 return "\\xDE";
8159 case 223:
8160 return "\\xDF";
8161 case 224:
8162 return "\\xE0";
8163 case 225:
8164 return "\\xE1";
8165 case 226:
8166 return "\\xE2";
8167 case 227:
8168 return "\\xE3";
8169 case 228:
8170 return "\\xE4";
8171 case 229:
8172 return "\\xE5";
8173 case 230:
8174 return "\\xE6";
8175 case 231:
8176 return "\\xE7";
8177 case 232:
8178 return "\\xE8";
8179 case 233:
8180 return "\\xE9";
8181 case 234:
8182 return "\\xEA";
8183 case 235:
8184 return "\\xEB";
8185 case 236:
8186 return "\\xEC";
8187 case 237:
8188 return "\\xED";
8189 case 238:
8190 return "\\xEE";
8191 case 239:
8192 return "\\xEF";
8193 case 240:
8194 return "\\xF0";
8195 case 241:
8196 return "\\xF1";
8197 case 242:
8198 return "\\xF2";
8199 case 243:
8200 return "\\xF3";
8201 case 244:
8202 return "\\xF4";
8203 case 245:
8204 return "\\xF5";
8205 case 246:
8206 return "\\xF6";
8207 case 247:
8208 return "\\xF7";
8209 case 248:
8210 return "\\xF8";
8211 case 249:
8212 return "\\xF9";
8213 case 250:
8214 return "\\xFA";
8215 case 251:
8216 return "\\xFB";
8217 case 252:
8218 return "\\xFC";
8219 case 253:
8220 return "\\xFD";
8221 case 254:
8222 return "\\xFE";
8223 case 255:
8224 return "\\xFF";
8225 default:
8226 assert(0); /* never gets here */
8227 return "dead code";
8228 }
8229 assert(0); /* never gets here */
8230 }
8231
8232 #endif /* XML_DTD */
8233
8234 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8235 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8236 const char *const valueOrNull = getenv(variableName);
8237 if (valueOrNull == NULL) {
8238 return defaultDebugLevel;
8239 }
8240 const char *const value = valueOrNull;
8241
8242 errno = 0;
8243 char *afterValue = (char *)value;
8244 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8245 if ((errno != 0) || (afterValue[0] != '\0')) {
8246 errno = 0;
8247 return defaultDebugLevel;
8248 }
8249
8250 return debugLevel;
8251 }
8252