• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* 8539b9040d9d901366a62560a064af7cb99811335784b363abc039c5b0ebc416 (2.4.1+)
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14    Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
15    Copyright (c) 2016      Eric Rahm <erahm@mozilla.com>
16    Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
17    Copyright (c) 2016      Gaurav <g.gupta@samsung.com>
18    Copyright (c) 2016      Thomas Beutlich <tc@tbeu.de>
19    Copyright (c) 2016      Gustavo Grieco <gustavo.grieco@imag.fr>
20    Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com>
21    Copyright (c) 2016      Ed Schouten <ed@nuxi.nl>
22    Copyright (c) 2017-2018 Rhodri James <rhodri@wildebeest.org.uk>
23    Copyright (c) 2017      Václav Slavík <vaclav@slavik.io>
24    Copyright (c) 2017      Viktor Szakats <commit@vsz.me>
25    Copyright (c) 2017      Chanho Park <chanho61.park@samsung.com>
26    Copyright (c) 2017      Rolf Eike Beer <eike@sf-mail.de>
27    Copyright (c) 2017      Hans Wennborg <hans@chromium.org>
28    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
29    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
30    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
31    Copyright (c) 2018      Mariusz Zaborski <oshogbo@vexillium.org>
32    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
33    Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34    Copyright (c) 2019      Vadim Zeitlin <vadim@zeitlins.org>
35    Licensed under the MIT license:
36 
37    Permission is  hereby granted,  free of charge,  to any  person obtaining
38    a  copy  of  this  software   and  associated  documentation  files  (the
39    "Software"),  to  deal in  the  Software  without restriction,  including
40    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
41    distribute, sublicense, and/or sell copies of the Software, and to permit
42    persons  to whom  the Software  is  furnished to  do so,  subject to  the
43    following conditions:
44 
45    The above copyright  notice and this permission notice  shall be included
46    in all copies or substantial portions of the Software.
47 
48    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
49    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
50    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
51    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
52    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
53    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
54    USE OR OTHER DEALINGS IN THE SOFTWARE.
55 */
56 
57 #if ! defined(_GNU_SOURCE)
58 #  define _GNU_SOURCE 1 /* syscall prototype */
59 #endif
60 
61 #ifdef _WIN32
62 /* force stdlib to define rand_s() */
63 #  if ! defined(_CRT_RAND_S)
64 #    define _CRT_RAND_S
65 #  endif
66 #endif
67 
68 #include <stddef.h>
69 #include <string.h> /* memset(), memcpy() */
70 #include <assert.h>
71 #include <limits.h> /* UINT_MAX */
72 #include <stdio.h>  /* fprintf */
73 #include <stdlib.h> /* getenv, rand_s */
74 #include <stdint.h> /* uintptr_t */
75 #include <math.h>   /* isnan */
76 
77 #ifdef _WIN32
78 #  define getpid GetCurrentProcessId
79 #else
80 #  include <sys/time.h>  /* gettimeofday() */
81 #  include <sys/types.h> /* getpid() */
82 #  include <unistd.h>    /* getpid() */
83 #  include <fcntl.h>     /* O_RDONLY */
84 #  include <errno.h>
85 #endif
86 
87 #define XML_BUILDING_EXPAT 1
88 
89 #ifdef _WIN32
90 #  include "winconfig.h"
91 #elif defined(HAVE_EXPAT_CONFIG_H)
92 #  include <expat_config.h>
93 #endif /* ndef _WIN32 */
94 
95 #include "ascii.h"
96 #include "expat.h"
97 #include "siphash.h"
98 
99 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
100 #  if defined(HAVE_GETRANDOM)
101 #    include <sys/random.h> /* getrandom */
102 #  else
103 #    include <unistd.h>      /* syscall */
104 #    include <sys/syscall.h> /* SYS_getrandom */
105 #  endif
106 #  if ! defined(GRND_NONBLOCK)
107 #    define GRND_NONBLOCK 0x0001
108 #  endif /* defined(GRND_NONBLOCK) */
109 #endif   /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
110 
111 #if defined(HAVE_LIBBSD)                                                       \
112     && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
113 #  include <bsd/stdlib.h>
114 #endif
115 
116 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
117 #  define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
118 #endif
119 
120 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM)             \
121     && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)            \
122     && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32)                         \
123     && ! defined(XML_POOR_ENTROPY)
124 #  error You do not have support for any sources of high quality entropy \
125     enabled.  For end user security, that is probably not what you want. \
126     \
127     Your options include: \
128       * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
129       * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
130       * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
131       * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
132       * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
133       * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
134       * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
135       * Windows >=Vista (rand_s): _WIN32. \
136     \
137     If insist on not using any of these, bypass this error by defining \
138     XML_POOR_ENTROPY; you have been warned. \
139     \
140     If you have reasons to patch this detection code away or need changes \
141     to the build system, please open a bug.  Thank you!
142 #endif
143 
144 #ifdef XML_UNICODE
145 #  define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
146 #  define XmlConvert XmlUtf16Convert
147 #  define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
148 #  define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
149 #  define XmlEncode XmlUtf16Encode
150 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
151 typedef unsigned short ICHAR;
152 #else
153 #  define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
154 #  define XmlConvert XmlUtf8Convert
155 #  define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
156 #  define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
157 #  define XmlEncode XmlUtf8Encode
158 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
159 typedef char ICHAR;
160 #endif
161 
162 #ifndef XML_NS
163 
164 #  define XmlInitEncodingNS XmlInitEncoding
165 #  define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
166 #  undef XmlGetInternalEncodingNS
167 #  define XmlGetInternalEncodingNS XmlGetInternalEncoding
168 #  define XmlParseXmlDeclNS XmlParseXmlDecl
169 
170 #endif
171 
172 #ifdef XML_UNICODE
173 
174 #  ifdef XML_UNICODE_WCHAR_T
175 #    define XML_T(x) (const wchar_t) x
176 #    define XML_L(x) L##x
177 #  else
178 #    define XML_T(x) (const unsigned short)x
179 #    define XML_L(x) x
180 #  endif
181 
182 #else
183 
184 #  define XML_T(x) x
185 #  define XML_L(x) x
186 
187 #endif
188 
189 /* Round up n to be a multiple of sz, where sz is a power of 2. */
190 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
191 
192 /* Do safe (NULL-aware) pointer arithmetic */
193 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
194 
195 #include "internal.h"
196 #include "xmltok.h"
197 #include "xmlrole.h"
198 
199 typedef const XML_Char *KEY;
200 
201 typedef struct {
202   KEY name;
203 } NAMED;
204 
205 typedef struct {
206   NAMED **v;
207   unsigned char power;
208   size_t size;
209   size_t used;
210   const XML_Memory_Handling_Suite *mem;
211 } HASH_TABLE;
212 
213 static size_t keylen(KEY s);
214 
215 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
216 
217 /* For probing (after a collision) we need a step size relative prime
218    to the hash table size, which is a power of 2. We use double-hashing,
219    since we can calculate a second hash value cheaply by taking those bits
220    of the first hash value that were discarded (masked out) when the table
221    index was calculated: index = hash & mask, where mask = table->size - 1.
222    We limit the maximum step size to table->size / 4 (mask >> 2) and make
223    it odd, since odd numbers are always relative prime to a power of 2.
224 */
225 #define SECOND_HASH(hash, mask, power)                                         \
226   ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
227 #define PROBE_STEP(hash, mask, power)                                          \
228   ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
229 
230 typedef struct {
231   NAMED **p;
232   NAMED **end;
233 } HASH_TABLE_ITER;
234 
235 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
236 #define INIT_DATA_BUF_SIZE 1024
237 #define INIT_ATTS_SIZE 16
238 #define INIT_ATTS_VERSION 0xFFFFFFFF
239 #define INIT_BLOCK_SIZE 1024
240 #define INIT_BUFFER_SIZE 1024
241 
242 #define EXPAND_SPARE 24
243 
244 typedef struct binding {
245   struct prefix *prefix;
246   struct binding *nextTagBinding;
247   struct binding *prevPrefixBinding;
248   const struct attribute_id *attId;
249   XML_Char *uri;
250   int uriLen;
251   int uriAlloc;
252 } BINDING;
253 
254 typedef struct prefix {
255   const XML_Char *name;
256   BINDING *binding;
257 } PREFIX;
258 
259 typedef struct {
260   const XML_Char *str;
261   const XML_Char *localPart;
262   const XML_Char *prefix;
263   int strLen;
264   int uriLen;
265   int prefixLen;
266 } TAG_NAME;
267 
268 /* TAG represents an open element.
269    The name of the element is stored in both the document and API
270    encodings.  The memory buffer 'buf' is a separately-allocated
271    memory area which stores the name.  During the XML_Parse()/
272    XMLParseBuffer() when the element is open, the memory for the 'raw'
273    version of the name (in the document encoding) is shared with the
274    document buffer.  If the element is open across calls to
275    XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
276    contain the 'raw' name as well.
277 
278    A parser re-uses these structures, maintaining a list of allocated
279    TAG objects in a free list.
280 */
281 typedef struct tag {
282   struct tag *parent;  /* parent of this element */
283   const char *rawName; /* tagName in the original encoding */
284   int rawNameLength;
285   TAG_NAME name; /* tagName in the API encoding */
286   char *buf;     /* buffer for name components */
287   char *bufEnd;  /* end of the buffer */
288   BINDING *bindings;
289 } TAG;
290 
291 typedef struct {
292   const XML_Char *name;
293   const XML_Char *textPtr;
294   int textLen;   /* length in XML_Chars */
295   int processed; /* # of processed bytes - when suspended */
296   const XML_Char *systemId;
297   const XML_Char *base;
298   const XML_Char *publicId;
299   const XML_Char *notation;
300   XML_Bool open;
301   XML_Bool is_param;
302   XML_Bool is_internal; /* true if declared in internal subset outside PE */
303 } ENTITY;
304 
305 typedef struct {
306   enum XML_Content_Type type;
307   enum XML_Content_Quant quant;
308   const XML_Char *name;
309   int firstchild;
310   int lastchild;
311   int childcnt;
312   int nextsib;
313 } CONTENT_SCAFFOLD;
314 
315 #define INIT_SCAFFOLD_ELEMENTS 32
316 
317 typedef struct block {
318   struct block *next;
319   int size;
320   XML_Char s[1];
321 } BLOCK;
322 
323 typedef struct {
324   BLOCK *blocks;
325   BLOCK *freeBlocks;
326   const XML_Char *end;
327   XML_Char *ptr;
328   XML_Char *start;
329   const XML_Memory_Handling_Suite *mem;
330 } STRING_POOL;
331 
332 /* The XML_Char before the name is used to determine whether
333    an attribute has been specified. */
334 typedef struct attribute_id {
335   XML_Char *name;
336   PREFIX *prefix;
337   XML_Bool maybeTokenized;
338   XML_Bool xmlns;
339 } ATTRIBUTE_ID;
340 
341 typedef struct {
342   const ATTRIBUTE_ID *id;
343   XML_Bool isCdata;
344   const XML_Char *value;
345 } DEFAULT_ATTRIBUTE;
346 
347 typedef struct {
348   unsigned long version;
349   unsigned long hash;
350   const XML_Char *uriName;
351 } NS_ATT;
352 
353 typedef struct {
354   const XML_Char *name;
355   PREFIX *prefix;
356   const ATTRIBUTE_ID *idAtt;
357   int nDefaultAtts;
358   int allocDefaultAtts;
359   DEFAULT_ATTRIBUTE *defaultAtts;
360 } ELEMENT_TYPE;
361 
362 typedef struct {
363   HASH_TABLE generalEntities;
364   HASH_TABLE elementTypes;
365   HASH_TABLE attributeIds;
366   HASH_TABLE prefixes;
367   STRING_POOL pool;
368   STRING_POOL entityValuePool;
369   /* false once a parameter entity reference has been skipped */
370   XML_Bool keepProcessing;
371   /* true once an internal or external PE reference has been encountered;
372      this includes the reference to an external subset */
373   XML_Bool hasParamEntityRefs;
374   XML_Bool standalone;
375 #ifdef XML_DTD
376   /* indicates if external PE has been read */
377   XML_Bool paramEntityRead;
378   HASH_TABLE paramEntities;
379 #endif /* XML_DTD */
380   PREFIX defaultPrefix;
381   /* === scaffolding for building content model === */
382   XML_Bool in_eldecl;
383   CONTENT_SCAFFOLD *scaffold;
384   unsigned contentStringLen;
385   unsigned scaffSize;
386   unsigned scaffCount;
387   int scaffLevel;
388   int *scaffIndex;
389 } DTD;
390 
391 typedef struct open_internal_entity {
392   const char *internalEventPtr;
393   const char *internalEventEndPtr;
394   struct open_internal_entity *next;
395   ENTITY *entity;
396   int startTagLevel;
397   XML_Bool betweenDecl; /* WFC: PE Between Declarations */
398 } OPEN_INTERNAL_ENTITY;
399 
400 enum XML_Account {
401   XML_ACCOUNT_DIRECT,           /* bytes directly passed to the Expat parser */
402   XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
403                                    expansion */
404   XML_ACCOUNT_NONE              /* i.e. do not account, was accounted already */
405 };
406 
407 #ifdef XML_DTD
408 typedef unsigned long long XmlBigCount;
409 typedef struct accounting {
410   XmlBigCount countBytesDirect;
411   XmlBigCount countBytesIndirect;
412   int debugLevel;
413   float maximumAmplificationFactor; // >=1.0
414   unsigned long long activationThresholdBytes;
415 } ACCOUNTING;
416 
417 typedef struct entity_stats {
418   unsigned int countEverOpened;
419   unsigned int currentDepth;
420   unsigned int maximumDepthSeen;
421   int debugLevel;
422 } ENTITY_STATS;
423 #endif /* XML_DTD */
424 
425 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
426                                          const char *end, const char **endPtr);
427 
428 static Processor prologProcessor;
429 static Processor prologInitProcessor;
430 static Processor contentProcessor;
431 static Processor cdataSectionProcessor;
432 #ifdef XML_DTD
433 static Processor ignoreSectionProcessor;
434 static Processor externalParEntProcessor;
435 static Processor externalParEntInitProcessor;
436 static Processor entityValueProcessor;
437 static Processor entityValueInitProcessor;
438 #endif /* XML_DTD */
439 static Processor epilogProcessor;
440 static Processor errorProcessor;
441 static Processor externalEntityInitProcessor;
442 static Processor externalEntityInitProcessor2;
443 static Processor externalEntityInitProcessor3;
444 static Processor externalEntityContentProcessor;
445 static Processor internalEntityProcessor;
446 
447 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
448                                             const XML_Char *encodingName);
449 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
450                                      const char *s, const char *next);
451 static enum XML_Error initializeEncoding(XML_Parser parser);
452 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
453                                const char *s, const char *end, int tok,
454                                const char *next, const char **nextPtr,
455                                XML_Bool haveMore, XML_Bool allowClosingDoctype,
456                                enum XML_Account account);
457 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
458                                             XML_Bool betweenDecl);
459 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
460                                 const ENCODING *enc, const char *start,
461                                 const char *end, const char **endPtr,
462                                 XML_Bool haveMore, enum XML_Account account);
463 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
464                                      const char **startPtr, const char *end,
465                                      const char **nextPtr, XML_Bool haveMore,
466                                      enum XML_Account account);
467 #ifdef XML_DTD
468 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
469                                       const char **startPtr, const char *end,
470                                       const char **nextPtr, XML_Bool haveMore);
471 #endif /* XML_DTD */
472 
473 static void freeBindings(XML_Parser parser, BINDING *bindings);
474 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
475                                 const char *s, TAG_NAME *tagNamePtr,
476                                 BINDING **bindingsPtr,
477                                 enum XML_Account account);
478 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
479                                  const ATTRIBUTE_ID *attId, const XML_Char *uri,
480                                  BINDING **bindingsPtr);
481 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
482                            XML_Bool isId, const XML_Char *dfltValue,
483                            XML_Parser parser);
484 static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
485                                           XML_Bool isCdata, const char *,
486                                           const char *, STRING_POOL *,
487                                           enum XML_Account account);
488 static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
489                                            XML_Bool isCdata, const char *,
490                                            const char *, STRING_POOL *,
491                                            enum XML_Account account);
492 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
493                                     const char *start, const char *end);
494 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
495 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
496                                        const char *start, const char *end,
497                                        enum XML_Account account);
498 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
499                                        const char *start, const char *end);
500 static int reportComment(XML_Parser parser, const ENCODING *enc,
501                          const char *start, const char *end);
502 static void reportDefault(XML_Parser parser, const ENCODING *enc,
503                           const char *start, const char *end);
504 
505 static const XML_Char *getContext(XML_Parser parser);
506 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
507 
508 static void FASTCALL normalizePublicId(XML_Char *s);
509 
510 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
511 /* do not call if m_parentParser != NULL */
512 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
513 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
514                        const XML_Memory_Handling_Suite *ms);
515 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
516                    const XML_Memory_Handling_Suite *ms);
517 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
518                            const HASH_TABLE *);
519 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
520                      size_t createSize);
521 static void FASTCALL hashTableInit(HASH_TABLE *,
522                                    const XML_Memory_Handling_Suite *ms);
523 static void FASTCALL hashTableClear(HASH_TABLE *);
524 static void FASTCALL hashTableDestroy(HASH_TABLE *);
525 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
526 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
527 
528 static void FASTCALL poolInit(STRING_POOL *,
529                               const XML_Memory_Handling_Suite *ms);
530 static void FASTCALL poolClear(STRING_POOL *);
531 static void FASTCALL poolDestroy(STRING_POOL *);
532 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
533                             const char *ptr, const char *end);
534 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
535                                  const char *ptr, const char *end);
536 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
537 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
538                                                const XML_Char *s);
539 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
540                                        int n);
541 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
542                                                  const XML_Char *s);
543 
544 static int FASTCALL nextScaffoldPart(XML_Parser parser);
545 static XML_Content *build_model(XML_Parser parser);
546 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
547                                     const char *ptr, const char *end);
548 
549 static XML_Char *copyString(const XML_Char *s,
550                             const XML_Memory_Handling_Suite *memsuite);
551 
552 static unsigned long generate_hash_secret_salt(XML_Parser parser);
553 static XML_Bool startParsing(XML_Parser parser);
554 
555 static XML_Parser parserCreate(const XML_Char *encodingName,
556                                const XML_Memory_Handling_Suite *memsuite,
557                                const XML_Char *nameSep, DTD *dtd);
558 
559 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
560 
561 #ifdef XML_DTD
562 static float accountingGetCurrentAmplification(XML_Parser rootParser);
563 static void accountingReportStats(XML_Parser originParser, const char *epilog);
564 static void accountingOnAbort(XML_Parser originParser);
565 static void accountingReportDiff(XML_Parser rootParser,
566                                  unsigned int levelsAwayFromRootParser,
567                                  const char *before, const char *after,
568                                  ptrdiff_t bytesMore, int source_line,
569                                  enum XML_Account account);
570 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
571                                         const char *before, const char *after,
572                                         int source_line,
573                                         enum XML_Account account);
574 
575 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
576                                       const char *action, int sourceLine);
577 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
578                                  int sourceLine);
579 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
580                                   int sourceLine);
581 
582 static XML_Parser getRootParserOf(XML_Parser parser,
583                                   unsigned int *outLevelDiff);
584 #endif /* XML_DTD */
585 
586 static unsigned long getDebugLevel(const char *variableName,
587                                    unsigned long defaultDebugLevel);
588 
589 #define poolStart(pool) ((pool)->start)
590 #define poolEnd(pool) ((pool)->ptr)
591 #define poolLength(pool) ((pool)->ptr - (pool)->start)
592 #define poolChop(pool) ((void)--(pool->ptr))
593 #define poolLastChar(pool) (((pool)->ptr)[-1])
594 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
595 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
596 #define poolAppendChar(pool, c)                                                \
597   (((pool)->ptr == (pool)->end && ! poolGrow(pool))                            \
598        ? 0                                                                     \
599        : ((*((pool)->ptr)++ = c), 1))
600 
601 struct XML_ParserStruct {
602   /* The first member must be m_userData so that the XML_GetUserData
603      macro works. */
604   void *m_userData;
605   void *m_handlerArg;
606   char *m_buffer;
607   const XML_Memory_Handling_Suite m_mem;
608   /* first character to be parsed */
609   const char *m_bufferPtr;
610   /* past last character to be parsed */
611   char *m_bufferEnd;
612   /* allocated end of m_buffer */
613   const char *m_bufferLim;
614   XML_Index m_parseEndByteIndex;
615   const char *m_parseEndPtr;
616   XML_Char *m_dataBuf;
617   XML_Char *m_dataBufEnd;
618   XML_StartElementHandler m_startElementHandler;
619   XML_EndElementHandler m_endElementHandler;
620   XML_CharacterDataHandler m_characterDataHandler;
621   XML_ProcessingInstructionHandler m_processingInstructionHandler;
622   XML_CommentHandler m_commentHandler;
623   XML_StartCdataSectionHandler m_startCdataSectionHandler;
624   XML_EndCdataSectionHandler m_endCdataSectionHandler;
625   XML_DefaultHandler m_defaultHandler;
626   XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
627   XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
628   XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
629   XML_NotationDeclHandler m_notationDeclHandler;
630   XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
631   XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
632   XML_NotStandaloneHandler m_notStandaloneHandler;
633   XML_ExternalEntityRefHandler m_externalEntityRefHandler;
634   XML_Parser m_externalEntityRefHandlerArg;
635   XML_SkippedEntityHandler m_skippedEntityHandler;
636   XML_UnknownEncodingHandler m_unknownEncodingHandler;
637   XML_ElementDeclHandler m_elementDeclHandler;
638   XML_AttlistDeclHandler m_attlistDeclHandler;
639   XML_EntityDeclHandler m_entityDeclHandler;
640   XML_XmlDeclHandler m_xmlDeclHandler;
641   const ENCODING *m_encoding;
642   INIT_ENCODING m_initEncoding;
643   const ENCODING *m_internalEncoding;
644   const XML_Char *m_protocolEncodingName;
645   XML_Bool m_ns;
646   XML_Bool m_ns_triplets;
647   void *m_unknownEncodingMem;
648   void *m_unknownEncodingData;
649   void *m_unknownEncodingHandlerData;
650   void(XMLCALL *m_unknownEncodingRelease)(void *);
651   PROLOG_STATE m_prologState;
652   Processor *m_processor;
653   enum XML_Error m_errorCode;
654   const char *m_eventPtr;
655   const char *m_eventEndPtr;
656   const char *m_positionPtr;
657   OPEN_INTERNAL_ENTITY *m_openInternalEntities;
658   OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
659   XML_Bool m_defaultExpandInternalEntities;
660   int m_tagLevel;
661   ENTITY *m_declEntity;
662   const XML_Char *m_doctypeName;
663   const XML_Char *m_doctypeSysid;
664   const XML_Char *m_doctypePubid;
665   const XML_Char *m_declAttributeType;
666   const XML_Char *m_declNotationName;
667   const XML_Char *m_declNotationPublicId;
668   ELEMENT_TYPE *m_declElementType;
669   ATTRIBUTE_ID *m_declAttributeId;
670   XML_Bool m_declAttributeIsCdata;
671   XML_Bool m_declAttributeIsId;
672   DTD *m_dtd;
673   const XML_Char *m_curBase;
674   TAG *m_tagStack;
675   TAG *m_freeTagList;
676   BINDING *m_inheritedBindings;
677   BINDING *m_freeBindingList;
678   int m_attsSize;
679   int m_nSpecifiedAtts;
680   int m_idAttIndex;
681   ATTRIBUTE *m_atts;
682   NS_ATT *m_nsAtts;
683   unsigned long m_nsAttsVersion;
684   unsigned char m_nsAttsPower;
685 #ifdef XML_ATTR_INFO
686   XML_AttrInfo *m_attInfo;
687 #endif
688   POSITION m_position;
689   STRING_POOL m_tempPool;
690   STRING_POOL m_temp2Pool;
691   char *m_groupConnector;
692   unsigned int m_groupSize;
693   XML_Char m_namespaceSeparator;
694   XML_Parser m_parentParser;
695   XML_ParsingStatus m_parsingStatus;
696 #ifdef XML_DTD
697   XML_Bool m_isParamEntity;
698   XML_Bool m_useForeignDTD;
699   enum XML_ParamEntityParsing m_paramEntityParsing;
700 #endif
701   unsigned long m_hash_secret_salt;
702 #ifdef XML_DTD
703   ACCOUNTING m_accounting;
704   ENTITY_STATS m_entity_stats;
705 #endif
706 };
707 
708 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
709 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
710 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
711 
712 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)713 XML_ParserCreate(const XML_Char *encodingName) {
714   return XML_ParserCreate_MM(encodingName, NULL, NULL);
715 }
716 
717 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)718 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
719   XML_Char tmp[2] = {nsSep, 0};
720   return XML_ParserCreate_MM(encodingName, NULL, tmp);
721 }
722 
723 static const XML_Char implicitContext[]
724     = {ASCII_x,     ASCII_m,     ASCII_l,      ASCII_EQUALS, ASCII_h,
725        ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,  ASCII_SLASH,
726        ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,      ASCII_PERIOD,
727        ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,      ASCII_r,
728        ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,      ASCII_L,
729        ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,      ASCII_8,
730        ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,      ASCII_e,
731        ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,      ASCII_e,
732        '\0'};
733 
734 /* To avoid warnings about unused functions: */
735 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
736 
737 #  if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
738 
739 /* Obtain entropy on Linux 3.17+ */
740 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)741 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
742   int success = 0; /* full count bytes written? */
743   size_t bytesWrittenTotal = 0;
744   const unsigned int getrandomFlags = GRND_NONBLOCK;
745 
746   do {
747     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
748     const size_t bytesToWrite = count - bytesWrittenTotal;
749 
750     const int bytesWrittenMore =
751 #    if defined(HAVE_GETRANDOM)
752         getrandom(currentTarget, bytesToWrite, getrandomFlags);
753 #    else
754         syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
755 #    endif
756 
757     if (bytesWrittenMore > 0) {
758       bytesWrittenTotal += bytesWrittenMore;
759       if (bytesWrittenTotal >= count)
760         success = 1;
761     }
762   } while (! success && (errno == EINTR));
763 
764   return success;
765 }
766 
767 #  endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
768 
769 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
770 
771 /* Extract entropy from /dev/urandom */
772 static int
writeRandomBytes_dev_urandom(void * target,size_t count)773 writeRandomBytes_dev_urandom(void *target, size_t count) {
774   int success = 0; /* full count bytes written? */
775   size_t bytesWrittenTotal = 0;
776 
777   const int fd = open("/dev/urandom", O_RDONLY);
778   if (fd < 0) {
779     return 0;
780   }
781 
782   do {
783     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
784     const size_t bytesToWrite = count - bytesWrittenTotal;
785 
786     const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
787 
788     if (bytesWrittenMore > 0) {
789       bytesWrittenTotal += bytesWrittenMore;
790       if (bytesWrittenTotal >= count)
791         success = 1;
792     }
793   } while (! success && (errno == EINTR));
794 
795   close(fd);
796   return success;
797 }
798 
799 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
800 
801 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
802 
803 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
804 
805 static void
writeRandomBytes_arc4random(void * target,size_t count)806 writeRandomBytes_arc4random(void *target, size_t count) {
807   size_t bytesWrittenTotal = 0;
808 
809   while (bytesWrittenTotal < count) {
810     const uint32_t random32 = arc4random();
811     size_t i = 0;
812 
813     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
814          i++, bytesWrittenTotal++) {
815       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
816       ((uint8_t *)target)[bytesWrittenTotal] = random8;
817     }
818   }
819 }
820 
821 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
822 
823 #ifdef _WIN32
824 
825 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
826    as it didn't declare it in its header prior to version 5.3.0 of its
827    runtime package (mingwrt, containing stdlib.h).  The upstream fix
828    was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
829 #  if defined(__MINGW32__) && defined(__MINGW32_VERSION)                       \
830       && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
831 __declspec(dllimport) int rand_s(unsigned int *);
832 #  endif
833 
834 /* Obtain entropy on Windows using the rand_s() function which
835  * generates cryptographically secure random numbers.  Internally it
836  * uses RtlGenRandom API which is present in Windows XP and later.
837  */
838 static int
writeRandomBytes_rand_s(void * target,size_t count)839 writeRandomBytes_rand_s(void *target, size_t count) {
840   size_t bytesWrittenTotal = 0;
841 
842   while (bytesWrittenTotal < count) {
843     unsigned int random32 = 0;
844     size_t i = 0;
845 
846     if (rand_s(&random32))
847       return 0; /* failure */
848 
849     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
850          i++, bytesWrittenTotal++) {
851       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
852       ((uint8_t *)target)[bytesWrittenTotal] = random8;
853     }
854   }
855   return 1; /* success */
856 }
857 
858 #endif /* _WIN32 */
859 
860 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
861 
862 static unsigned long
gather_time_entropy(void)863 gather_time_entropy(void) {
864 #  ifdef _WIN32
865   FILETIME ft;
866   GetSystemTimeAsFileTime(&ft); /* never fails */
867   return ft.dwHighDateTime ^ ft.dwLowDateTime;
868 #  else
869   struct timeval tv;
870   int gettimeofday_res;
871 
872   gettimeofday_res = gettimeofday(&tv, NULL);
873 
874 #    if defined(NDEBUG)
875   (void)gettimeofday_res;
876 #    else
877   assert(gettimeofday_res == 0);
878 #    endif /* defined(NDEBUG) */
879 
880   /* Microseconds time is <20 bits entropy */
881   return tv.tv_usec;
882 #  endif
883 }
884 
885 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
886 
887 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)888 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
889   if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
890     fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
891             (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
892   }
893   return entropy;
894 }
895 
896 static unsigned long
generate_hash_secret_salt(XML_Parser parser)897 generate_hash_secret_salt(XML_Parser parser) {
898   unsigned long entropy;
899   (void)parser;
900 
901   /* "Failproof" high quality providers: */
902 #if defined(HAVE_ARC4RANDOM_BUF)
903   arc4random_buf(&entropy, sizeof(entropy));
904   return ENTROPY_DEBUG("arc4random_buf", entropy);
905 #elif defined(HAVE_ARC4RANDOM)
906   writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
907   return ENTROPY_DEBUG("arc4random", entropy);
908 #else
909   /* Try high quality providers first .. */
910 #  ifdef _WIN32
911   if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
912     return ENTROPY_DEBUG("rand_s", entropy);
913   }
914 #  elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
915   if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
916     return ENTROPY_DEBUG("getrandom", entropy);
917   }
918 #  endif
919 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
920   if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
921     return ENTROPY_DEBUG("/dev/urandom", entropy);
922   }
923 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
924   /* .. and self-made low quality for backup: */
925 
926   /* Process ID is 0 bits entropy if attacker has local access */
927   entropy = gather_time_entropy() ^ getpid();
928 
929   /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
930   if (sizeof(unsigned long) == 4) {
931     return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
932   } else {
933     return ENTROPY_DEBUG("fallback(8)",
934                          entropy * (unsigned long)2305843009213693951ULL);
935   }
936 #endif
937 }
938 
939 static unsigned long
get_hash_secret_salt(XML_Parser parser)940 get_hash_secret_salt(XML_Parser parser) {
941   if (parser->m_parentParser != NULL)
942     return get_hash_secret_salt(parser->m_parentParser);
943   return parser->m_hash_secret_salt;
944 }
945 
946 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)947 startParsing(XML_Parser parser) {
948   /* hash functions must be initialized before setContext() is called */
949   if (parser->m_hash_secret_salt == 0)
950     parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
951   if (parser->m_ns) {
952     /* implicit context only set for root parser, since child
953        parsers (i.e. external entity parsers) will inherit it
954     */
955     return setContext(parser, implicitContext);
956   }
957   return XML_TRUE;
958 }
959 
960 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)961 XML_ParserCreate_MM(const XML_Char *encodingName,
962                     const XML_Memory_Handling_Suite *memsuite,
963                     const XML_Char *nameSep) {
964   return parserCreate(encodingName, memsuite, nameSep, NULL);
965 }
966 
967 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)968 parserCreate(const XML_Char *encodingName,
969              const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
970              DTD *dtd) {
971   XML_Parser parser;
972 
973   if (memsuite) {
974     XML_Memory_Handling_Suite *mtemp;
975     parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
976     if (parser != NULL) {
977       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
978       mtemp->malloc_fcn = memsuite->malloc_fcn;
979       mtemp->realloc_fcn = memsuite->realloc_fcn;
980       mtemp->free_fcn = memsuite->free_fcn;
981     }
982   } else {
983     XML_Memory_Handling_Suite *mtemp;
984     parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
985     if (parser != NULL) {
986       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
987       mtemp->malloc_fcn = malloc;
988       mtemp->realloc_fcn = realloc;
989       mtemp->free_fcn = free;
990     }
991   }
992 
993   if (! parser)
994     return parser;
995 
996   parser->m_buffer = NULL;
997   parser->m_bufferLim = NULL;
998 
999   parser->m_attsSize = INIT_ATTS_SIZE;
1000   parser->m_atts
1001       = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1002   if (parser->m_atts == NULL) {
1003     FREE(parser, parser);
1004     return NULL;
1005   }
1006 #ifdef XML_ATTR_INFO
1007   parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1008       parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1009   if (parser->m_attInfo == NULL) {
1010     FREE(parser, parser->m_atts);
1011     FREE(parser, parser);
1012     return NULL;
1013   }
1014 #endif
1015   parser->m_dataBuf
1016       = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1017   if (parser->m_dataBuf == NULL) {
1018     FREE(parser, parser->m_atts);
1019 #ifdef XML_ATTR_INFO
1020     FREE(parser, parser->m_attInfo);
1021 #endif
1022     FREE(parser, parser);
1023     return NULL;
1024   }
1025   parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1026 
1027   if (dtd)
1028     parser->m_dtd = dtd;
1029   else {
1030     parser->m_dtd = dtdCreate(&parser->m_mem);
1031     if (parser->m_dtd == NULL) {
1032       FREE(parser, parser->m_dataBuf);
1033       FREE(parser, parser->m_atts);
1034 #ifdef XML_ATTR_INFO
1035       FREE(parser, parser->m_attInfo);
1036 #endif
1037       FREE(parser, parser);
1038       return NULL;
1039     }
1040   }
1041 
1042   parser->m_freeBindingList = NULL;
1043   parser->m_freeTagList = NULL;
1044   parser->m_freeInternalEntities = NULL;
1045 
1046   parser->m_groupSize = 0;
1047   parser->m_groupConnector = NULL;
1048 
1049   parser->m_unknownEncodingHandler = NULL;
1050   parser->m_unknownEncodingHandlerData = NULL;
1051 
1052   parser->m_namespaceSeparator = ASCII_EXCL;
1053   parser->m_ns = XML_FALSE;
1054   parser->m_ns_triplets = XML_FALSE;
1055 
1056   parser->m_nsAtts = NULL;
1057   parser->m_nsAttsVersion = 0;
1058   parser->m_nsAttsPower = 0;
1059 
1060   parser->m_protocolEncodingName = NULL;
1061 
1062   poolInit(&parser->m_tempPool, &(parser->m_mem));
1063   poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1064   parserInit(parser, encodingName);
1065 
1066   if (encodingName && ! parser->m_protocolEncodingName) {
1067     if (dtd) {
1068       // We need to stop the upcoming call to XML_ParserFree from happily
1069       // destroying parser->m_dtd because the DTD is shared with the parent
1070       // parser and the only guard that keeps XML_ParserFree from destroying
1071       // parser->m_dtd is parser->m_isParamEntity but it will be set to
1072       // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1073       parser->m_dtd = NULL;
1074     }
1075     XML_ParserFree(parser);
1076     return NULL;
1077   }
1078 
1079   if (nameSep) {
1080     parser->m_ns = XML_TRUE;
1081     parser->m_internalEncoding = XmlGetInternalEncodingNS();
1082     parser->m_namespaceSeparator = *nameSep;
1083   } else {
1084     parser->m_internalEncoding = XmlGetInternalEncoding();
1085   }
1086 
1087   return parser;
1088 }
1089 
1090 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1091 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1092   parser->m_processor = prologInitProcessor;
1093   XmlPrologStateInit(&parser->m_prologState);
1094   if (encodingName != NULL) {
1095     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1096   }
1097   parser->m_curBase = NULL;
1098   XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1099   parser->m_userData = NULL;
1100   parser->m_handlerArg = NULL;
1101   parser->m_startElementHandler = NULL;
1102   parser->m_endElementHandler = NULL;
1103   parser->m_characterDataHandler = NULL;
1104   parser->m_processingInstructionHandler = NULL;
1105   parser->m_commentHandler = NULL;
1106   parser->m_startCdataSectionHandler = NULL;
1107   parser->m_endCdataSectionHandler = NULL;
1108   parser->m_defaultHandler = NULL;
1109   parser->m_startDoctypeDeclHandler = NULL;
1110   parser->m_endDoctypeDeclHandler = NULL;
1111   parser->m_unparsedEntityDeclHandler = NULL;
1112   parser->m_notationDeclHandler = NULL;
1113   parser->m_startNamespaceDeclHandler = NULL;
1114   parser->m_endNamespaceDeclHandler = NULL;
1115   parser->m_notStandaloneHandler = NULL;
1116   parser->m_externalEntityRefHandler = NULL;
1117   parser->m_externalEntityRefHandlerArg = parser;
1118   parser->m_skippedEntityHandler = NULL;
1119   parser->m_elementDeclHandler = NULL;
1120   parser->m_attlistDeclHandler = NULL;
1121   parser->m_entityDeclHandler = NULL;
1122   parser->m_xmlDeclHandler = NULL;
1123   parser->m_bufferPtr = parser->m_buffer;
1124   parser->m_bufferEnd = parser->m_buffer;
1125   parser->m_parseEndByteIndex = 0;
1126   parser->m_parseEndPtr = NULL;
1127   parser->m_declElementType = NULL;
1128   parser->m_declAttributeId = NULL;
1129   parser->m_declEntity = NULL;
1130   parser->m_doctypeName = NULL;
1131   parser->m_doctypeSysid = NULL;
1132   parser->m_doctypePubid = NULL;
1133   parser->m_declAttributeType = NULL;
1134   parser->m_declNotationName = NULL;
1135   parser->m_declNotationPublicId = NULL;
1136   parser->m_declAttributeIsCdata = XML_FALSE;
1137   parser->m_declAttributeIsId = XML_FALSE;
1138   memset(&parser->m_position, 0, sizeof(POSITION));
1139   parser->m_errorCode = XML_ERROR_NONE;
1140   parser->m_eventPtr = NULL;
1141   parser->m_eventEndPtr = NULL;
1142   parser->m_positionPtr = NULL;
1143   parser->m_openInternalEntities = NULL;
1144   parser->m_defaultExpandInternalEntities = XML_TRUE;
1145   parser->m_tagLevel = 0;
1146   parser->m_tagStack = NULL;
1147   parser->m_inheritedBindings = NULL;
1148   parser->m_nSpecifiedAtts = 0;
1149   parser->m_unknownEncodingMem = NULL;
1150   parser->m_unknownEncodingRelease = NULL;
1151   parser->m_unknownEncodingData = NULL;
1152   parser->m_parentParser = NULL;
1153   parser->m_parsingStatus.parsing = XML_INITIALIZED;
1154 #ifdef XML_DTD
1155   parser->m_isParamEntity = XML_FALSE;
1156   parser->m_useForeignDTD = XML_FALSE;
1157   parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1158 #endif
1159   parser->m_hash_secret_salt = 0;
1160 
1161 #ifdef XML_DTD
1162   memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1163   parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1164   parser->m_accounting.maximumAmplificationFactor
1165       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1166   parser->m_accounting.activationThresholdBytes
1167       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1168 
1169   memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1170   parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1171 #endif
1172 }
1173 
1174 /* moves list of bindings to m_freeBindingList */
1175 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1176 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1177   while (bindings) {
1178     BINDING *b = bindings;
1179     bindings = bindings->nextTagBinding;
1180     b->nextTagBinding = parser->m_freeBindingList;
1181     parser->m_freeBindingList = b;
1182   }
1183 }
1184 
1185 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1186 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1187   TAG *tStk;
1188   OPEN_INTERNAL_ENTITY *openEntityList;
1189 
1190   if (parser == NULL)
1191     return XML_FALSE;
1192 
1193   if (parser->m_parentParser)
1194     return XML_FALSE;
1195   /* move m_tagStack to m_freeTagList */
1196   tStk = parser->m_tagStack;
1197   while (tStk) {
1198     TAG *tag = tStk;
1199     tStk = tStk->parent;
1200     tag->parent = parser->m_freeTagList;
1201     moveToFreeBindingList(parser, tag->bindings);
1202     tag->bindings = NULL;
1203     parser->m_freeTagList = tag;
1204   }
1205   /* move m_openInternalEntities to m_freeInternalEntities */
1206   openEntityList = parser->m_openInternalEntities;
1207   while (openEntityList) {
1208     OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1209     openEntityList = openEntity->next;
1210     openEntity->next = parser->m_freeInternalEntities;
1211     parser->m_freeInternalEntities = openEntity;
1212   }
1213   moveToFreeBindingList(parser, parser->m_inheritedBindings);
1214   FREE(parser, parser->m_unknownEncodingMem);
1215   if (parser->m_unknownEncodingRelease)
1216     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1217   poolClear(&parser->m_tempPool);
1218   poolClear(&parser->m_temp2Pool);
1219   FREE(parser, (void *)parser->m_protocolEncodingName);
1220   parser->m_protocolEncodingName = NULL;
1221   parserInit(parser, encodingName);
1222   dtdReset(parser->m_dtd, &parser->m_mem);
1223   return XML_TRUE;
1224 }
1225 
1226 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1227 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1228   if (parser == NULL)
1229     return XML_STATUS_ERROR;
1230   /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1231      XXX There's no way for the caller to determine which of the
1232      XXX possible error cases caused the XML_STATUS_ERROR return.
1233   */
1234   if (parser->m_parsingStatus.parsing == XML_PARSING
1235       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1236     return XML_STATUS_ERROR;
1237 
1238   /* Get rid of any previous encoding name */
1239   FREE(parser, (void *)parser->m_protocolEncodingName);
1240 
1241   if (encodingName == NULL)
1242     /* No new encoding name */
1243     parser->m_protocolEncodingName = NULL;
1244   else {
1245     /* Copy the new encoding name into allocated memory */
1246     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1247     if (! parser->m_protocolEncodingName)
1248       return XML_STATUS_ERROR;
1249   }
1250   return XML_STATUS_OK;
1251 }
1252 
1253 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1254 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1255                                const XML_Char *encodingName) {
1256   XML_Parser parser = oldParser;
1257   DTD *newDtd = NULL;
1258   DTD *oldDtd;
1259   XML_StartElementHandler oldStartElementHandler;
1260   XML_EndElementHandler oldEndElementHandler;
1261   XML_CharacterDataHandler oldCharacterDataHandler;
1262   XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1263   XML_CommentHandler oldCommentHandler;
1264   XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1265   XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1266   XML_DefaultHandler oldDefaultHandler;
1267   XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1268   XML_NotationDeclHandler oldNotationDeclHandler;
1269   XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1270   XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1271   XML_NotStandaloneHandler oldNotStandaloneHandler;
1272   XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1273   XML_SkippedEntityHandler oldSkippedEntityHandler;
1274   XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1275   XML_ElementDeclHandler oldElementDeclHandler;
1276   XML_AttlistDeclHandler oldAttlistDeclHandler;
1277   XML_EntityDeclHandler oldEntityDeclHandler;
1278   XML_XmlDeclHandler oldXmlDeclHandler;
1279   ELEMENT_TYPE *oldDeclElementType;
1280 
1281   void *oldUserData;
1282   void *oldHandlerArg;
1283   XML_Bool oldDefaultExpandInternalEntities;
1284   XML_Parser oldExternalEntityRefHandlerArg;
1285 #ifdef XML_DTD
1286   enum XML_ParamEntityParsing oldParamEntityParsing;
1287   int oldInEntityValue;
1288 #endif
1289   XML_Bool oldns_triplets;
1290   /* Note that the new parser shares the same hash secret as the old
1291      parser, so that dtdCopy and copyEntityTable can lookup values
1292      from hash tables associated with either parser without us having
1293      to worry which hash secrets each table has.
1294   */
1295   unsigned long oldhash_secret_salt;
1296 
1297   /* Validate the oldParser parameter before we pull everything out of it */
1298   if (oldParser == NULL)
1299     return NULL;
1300 
1301   /* Stash the original parser contents on the stack */
1302   oldDtd = parser->m_dtd;
1303   oldStartElementHandler = parser->m_startElementHandler;
1304   oldEndElementHandler = parser->m_endElementHandler;
1305   oldCharacterDataHandler = parser->m_characterDataHandler;
1306   oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1307   oldCommentHandler = parser->m_commentHandler;
1308   oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1309   oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1310   oldDefaultHandler = parser->m_defaultHandler;
1311   oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1312   oldNotationDeclHandler = parser->m_notationDeclHandler;
1313   oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1314   oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1315   oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1316   oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1317   oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1318   oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1319   oldElementDeclHandler = parser->m_elementDeclHandler;
1320   oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1321   oldEntityDeclHandler = parser->m_entityDeclHandler;
1322   oldXmlDeclHandler = parser->m_xmlDeclHandler;
1323   oldDeclElementType = parser->m_declElementType;
1324 
1325   oldUserData = parser->m_userData;
1326   oldHandlerArg = parser->m_handlerArg;
1327   oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1328   oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1329 #ifdef XML_DTD
1330   oldParamEntityParsing = parser->m_paramEntityParsing;
1331   oldInEntityValue = parser->m_prologState.inEntityValue;
1332 #endif
1333   oldns_triplets = parser->m_ns_triplets;
1334   /* Note that the new parser shares the same hash secret as the old
1335      parser, so that dtdCopy and copyEntityTable can lookup values
1336      from hash tables associated with either parser without us having
1337      to worry which hash secrets each table has.
1338   */
1339   oldhash_secret_salt = parser->m_hash_secret_salt;
1340 
1341 #ifdef XML_DTD
1342   if (! context)
1343     newDtd = oldDtd;
1344 #endif /* XML_DTD */
1345 
1346   /* Note that the magical uses of the pre-processor to make field
1347      access look more like C++ require that `parser' be overwritten
1348      here.  This makes this function more painful to follow than it
1349      would be otherwise.
1350   */
1351   if (parser->m_ns) {
1352     XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1353     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1354   } else {
1355     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1356   }
1357 
1358   if (! parser)
1359     return NULL;
1360 
1361   parser->m_startElementHandler = oldStartElementHandler;
1362   parser->m_endElementHandler = oldEndElementHandler;
1363   parser->m_characterDataHandler = oldCharacterDataHandler;
1364   parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1365   parser->m_commentHandler = oldCommentHandler;
1366   parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1367   parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1368   parser->m_defaultHandler = oldDefaultHandler;
1369   parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1370   parser->m_notationDeclHandler = oldNotationDeclHandler;
1371   parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1372   parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1373   parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1374   parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1375   parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1376   parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1377   parser->m_elementDeclHandler = oldElementDeclHandler;
1378   parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1379   parser->m_entityDeclHandler = oldEntityDeclHandler;
1380   parser->m_xmlDeclHandler = oldXmlDeclHandler;
1381   parser->m_declElementType = oldDeclElementType;
1382   parser->m_userData = oldUserData;
1383   if (oldUserData == oldHandlerArg)
1384     parser->m_handlerArg = parser->m_userData;
1385   else
1386     parser->m_handlerArg = parser;
1387   if (oldExternalEntityRefHandlerArg != oldParser)
1388     parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1389   parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1390   parser->m_ns_triplets = oldns_triplets;
1391   parser->m_hash_secret_salt = oldhash_secret_salt;
1392   parser->m_parentParser = oldParser;
1393 #ifdef XML_DTD
1394   parser->m_paramEntityParsing = oldParamEntityParsing;
1395   parser->m_prologState.inEntityValue = oldInEntityValue;
1396   if (context) {
1397 #endif /* XML_DTD */
1398     if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1399         || ! setContext(parser, context)) {
1400       XML_ParserFree(parser);
1401       return NULL;
1402     }
1403     parser->m_processor = externalEntityInitProcessor;
1404 #ifdef XML_DTD
1405   } else {
1406     /* The DTD instance referenced by parser->m_dtd is shared between the
1407        document's root parser and external PE parsers, therefore one does not
1408        need to call setContext. In addition, one also *must* not call
1409        setContext, because this would overwrite existing prefix->binding
1410        pointers in parser->m_dtd with ones that get destroyed with the external
1411        PE parser. This would leave those prefixes with dangling pointers.
1412     */
1413     parser->m_isParamEntity = XML_TRUE;
1414     XmlPrologStateInitExternalEntity(&parser->m_prologState);
1415     parser->m_processor = externalParEntInitProcessor;
1416   }
1417 #endif /* XML_DTD */
1418   return parser;
1419 }
1420 
1421 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1422 destroyBindings(BINDING *bindings, XML_Parser parser) {
1423   for (;;) {
1424     BINDING *b = bindings;
1425     if (! b)
1426       break;
1427     bindings = b->nextTagBinding;
1428     FREE(parser, b->uri);
1429     FREE(parser, b);
1430   }
1431 }
1432 
1433 void XMLCALL
XML_ParserFree(XML_Parser parser)1434 XML_ParserFree(XML_Parser parser) {
1435   TAG *tagList;
1436   OPEN_INTERNAL_ENTITY *entityList;
1437   if (parser == NULL)
1438     return;
1439   /* free m_tagStack and m_freeTagList */
1440   tagList = parser->m_tagStack;
1441   for (;;) {
1442     TAG *p;
1443     if (tagList == NULL) {
1444       if (parser->m_freeTagList == NULL)
1445         break;
1446       tagList = parser->m_freeTagList;
1447       parser->m_freeTagList = NULL;
1448     }
1449     p = tagList;
1450     tagList = tagList->parent;
1451     FREE(parser, p->buf);
1452     destroyBindings(p->bindings, parser);
1453     FREE(parser, p);
1454   }
1455   /* free m_openInternalEntities and m_freeInternalEntities */
1456   entityList = parser->m_openInternalEntities;
1457   for (;;) {
1458     OPEN_INTERNAL_ENTITY *openEntity;
1459     if (entityList == NULL) {
1460       if (parser->m_freeInternalEntities == NULL)
1461         break;
1462       entityList = parser->m_freeInternalEntities;
1463       parser->m_freeInternalEntities = NULL;
1464     }
1465     openEntity = entityList;
1466     entityList = entityList->next;
1467     FREE(parser, openEntity);
1468   }
1469 
1470   destroyBindings(parser->m_freeBindingList, parser);
1471   destroyBindings(parser->m_inheritedBindings, parser);
1472   poolDestroy(&parser->m_tempPool);
1473   poolDestroy(&parser->m_temp2Pool);
1474   FREE(parser, (void *)parser->m_protocolEncodingName);
1475 #ifdef XML_DTD
1476   /* external parameter entity parsers share the DTD structure
1477      parser->m_dtd with the root parser, so we must not destroy it
1478   */
1479   if (! parser->m_isParamEntity && parser->m_dtd)
1480 #else
1481   if (parser->m_dtd)
1482 #endif /* XML_DTD */
1483     dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1484                &parser->m_mem);
1485   FREE(parser, (void *)parser->m_atts);
1486 #ifdef XML_ATTR_INFO
1487   FREE(parser, (void *)parser->m_attInfo);
1488 #endif
1489   FREE(parser, parser->m_groupConnector);
1490   FREE(parser, parser->m_buffer);
1491   FREE(parser, parser->m_dataBuf);
1492   FREE(parser, parser->m_nsAtts);
1493   FREE(parser, parser->m_unknownEncodingMem);
1494   if (parser->m_unknownEncodingRelease)
1495     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1496   FREE(parser, parser);
1497 }
1498 
1499 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1500 XML_UseParserAsHandlerArg(XML_Parser parser) {
1501   if (parser != NULL)
1502     parser->m_handlerArg = parser;
1503 }
1504 
1505 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1506 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1507   if (parser == NULL)
1508     return XML_ERROR_INVALID_ARGUMENT;
1509 #ifdef XML_DTD
1510   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1511   if (parser->m_parsingStatus.parsing == XML_PARSING
1512       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1513     return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1514   parser->m_useForeignDTD = useDTD;
1515   return XML_ERROR_NONE;
1516 #else
1517   UNUSED_P(useDTD);
1518   return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1519 #endif
1520 }
1521 
1522 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1523 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1524   if (parser == NULL)
1525     return;
1526   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1527   if (parser->m_parsingStatus.parsing == XML_PARSING
1528       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1529     return;
1530   parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1531 }
1532 
1533 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1534 XML_SetUserData(XML_Parser parser, void *p) {
1535   if (parser == NULL)
1536     return;
1537   if (parser->m_handlerArg == parser->m_userData)
1538     parser->m_handlerArg = parser->m_userData = p;
1539   else
1540     parser->m_userData = p;
1541 }
1542 
1543 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1544 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1545   if (parser == NULL)
1546     return XML_STATUS_ERROR;
1547   if (p) {
1548     p = poolCopyString(&parser->m_dtd->pool, p);
1549     if (! p)
1550       return XML_STATUS_ERROR;
1551     parser->m_curBase = p;
1552   } else
1553     parser->m_curBase = NULL;
1554   return XML_STATUS_OK;
1555 }
1556 
1557 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1558 XML_GetBase(XML_Parser parser) {
1559   if (parser == NULL)
1560     return NULL;
1561   return parser->m_curBase;
1562 }
1563 
1564 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1565 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1566   if (parser == NULL)
1567     return -1;
1568   return parser->m_nSpecifiedAtts;
1569 }
1570 
1571 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1572 XML_GetIdAttributeIndex(XML_Parser parser) {
1573   if (parser == NULL)
1574     return -1;
1575   return parser->m_idAttIndex;
1576 }
1577 
1578 #ifdef XML_ATTR_INFO
1579 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1580 XML_GetAttributeInfo(XML_Parser parser) {
1581   if (parser == NULL)
1582     return NULL;
1583   return parser->m_attInfo;
1584 }
1585 #endif
1586 
1587 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1588 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1589                       XML_EndElementHandler end) {
1590   if (parser == NULL)
1591     return;
1592   parser->m_startElementHandler = start;
1593   parser->m_endElementHandler = end;
1594 }
1595 
1596 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1597 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1598   if (parser != NULL)
1599     parser->m_startElementHandler = start;
1600 }
1601 
1602 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1603 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1604   if (parser != NULL)
1605     parser->m_endElementHandler = end;
1606 }
1607 
1608 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1609 XML_SetCharacterDataHandler(XML_Parser parser,
1610                             XML_CharacterDataHandler handler) {
1611   if (parser != NULL)
1612     parser->m_characterDataHandler = handler;
1613 }
1614 
1615 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1616 XML_SetProcessingInstructionHandler(XML_Parser parser,
1617                                     XML_ProcessingInstructionHandler handler) {
1618   if (parser != NULL)
1619     parser->m_processingInstructionHandler = handler;
1620 }
1621 
1622 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1623 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1624   if (parser != NULL)
1625     parser->m_commentHandler = handler;
1626 }
1627 
1628 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1629 XML_SetCdataSectionHandler(XML_Parser parser,
1630                            XML_StartCdataSectionHandler start,
1631                            XML_EndCdataSectionHandler end) {
1632   if (parser == NULL)
1633     return;
1634   parser->m_startCdataSectionHandler = start;
1635   parser->m_endCdataSectionHandler = end;
1636 }
1637 
1638 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1639 XML_SetStartCdataSectionHandler(XML_Parser parser,
1640                                 XML_StartCdataSectionHandler start) {
1641   if (parser != NULL)
1642     parser->m_startCdataSectionHandler = start;
1643 }
1644 
1645 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1646 XML_SetEndCdataSectionHandler(XML_Parser parser,
1647                               XML_EndCdataSectionHandler end) {
1648   if (parser != NULL)
1649     parser->m_endCdataSectionHandler = end;
1650 }
1651 
1652 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1653 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1654   if (parser == NULL)
1655     return;
1656   parser->m_defaultHandler = handler;
1657   parser->m_defaultExpandInternalEntities = XML_FALSE;
1658 }
1659 
1660 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1661 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1662   if (parser == NULL)
1663     return;
1664   parser->m_defaultHandler = handler;
1665   parser->m_defaultExpandInternalEntities = XML_TRUE;
1666 }
1667 
1668 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1669 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1670                           XML_EndDoctypeDeclHandler end) {
1671   if (parser == NULL)
1672     return;
1673   parser->m_startDoctypeDeclHandler = start;
1674   parser->m_endDoctypeDeclHandler = end;
1675 }
1676 
1677 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1678 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1679                                XML_StartDoctypeDeclHandler start) {
1680   if (parser != NULL)
1681     parser->m_startDoctypeDeclHandler = start;
1682 }
1683 
1684 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1685 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1686   if (parser != NULL)
1687     parser->m_endDoctypeDeclHandler = end;
1688 }
1689 
1690 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1691 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1692                                  XML_UnparsedEntityDeclHandler handler) {
1693   if (parser != NULL)
1694     parser->m_unparsedEntityDeclHandler = handler;
1695 }
1696 
1697 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1698 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1699   if (parser != NULL)
1700     parser->m_notationDeclHandler = handler;
1701 }
1702 
1703 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1704 XML_SetNamespaceDeclHandler(XML_Parser parser,
1705                             XML_StartNamespaceDeclHandler start,
1706                             XML_EndNamespaceDeclHandler end) {
1707   if (parser == NULL)
1708     return;
1709   parser->m_startNamespaceDeclHandler = start;
1710   parser->m_endNamespaceDeclHandler = end;
1711 }
1712 
1713 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1714 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1715                                  XML_StartNamespaceDeclHandler start) {
1716   if (parser != NULL)
1717     parser->m_startNamespaceDeclHandler = start;
1718 }
1719 
1720 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1721 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1722                                XML_EndNamespaceDeclHandler end) {
1723   if (parser != NULL)
1724     parser->m_endNamespaceDeclHandler = end;
1725 }
1726 
1727 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1728 XML_SetNotStandaloneHandler(XML_Parser parser,
1729                             XML_NotStandaloneHandler handler) {
1730   if (parser != NULL)
1731     parser->m_notStandaloneHandler = handler;
1732 }
1733 
1734 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1735 XML_SetExternalEntityRefHandler(XML_Parser parser,
1736                                 XML_ExternalEntityRefHandler handler) {
1737   if (parser != NULL)
1738     parser->m_externalEntityRefHandler = handler;
1739 }
1740 
1741 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1742 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1743   if (parser == NULL)
1744     return;
1745   if (arg)
1746     parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1747   else
1748     parser->m_externalEntityRefHandlerArg = parser;
1749 }
1750 
1751 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1752 XML_SetSkippedEntityHandler(XML_Parser parser,
1753                             XML_SkippedEntityHandler handler) {
1754   if (parser != NULL)
1755     parser->m_skippedEntityHandler = handler;
1756 }
1757 
1758 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1759 XML_SetUnknownEncodingHandler(XML_Parser parser,
1760                               XML_UnknownEncodingHandler handler, void *data) {
1761   if (parser == NULL)
1762     return;
1763   parser->m_unknownEncodingHandler = handler;
1764   parser->m_unknownEncodingHandlerData = data;
1765 }
1766 
1767 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1768 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1769   if (parser != NULL)
1770     parser->m_elementDeclHandler = eldecl;
1771 }
1772 
1773 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1774 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1775   if (parser != NULL)
1776     parser->m_attlistDeclHandler = attdecl;
1777 }
1778 
1779 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1780 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1781   if (parser != NULL)
1782     parser->m_entityDeclHandler = handler;
1783 }
1784 
1785 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1786 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1787   if (parser != NULL)
1788     parser->m_xmlDeclHandler = handler;
1789 }
1790 
1791 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1792 XML_SetParamEntityParsing(XML_Parser parser,
1793                           enum XML_ParamEntityParsing peParsing) {
1794   if (parser == NULL)
1795     return 0;
1796   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1797   if (parser->m_parsingStatus.parsing == XML_PARSING
1798       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1799     return 0;
1800 #ifdef XML_DTD
1801   parser->m_paramEntityParsing = peParsing;
1802   return 1;
1803 #else
1804   return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1805 #endif
1806 }
1807 
1808 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1809 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1810   if (parser == NULL)
1811     return 0;
1812   if (parser->m_parentParser)
1813     return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1814   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1815   if (parser->m_parsingStatus.parsing == XML_PARSING
1816       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1817     return 0;
1818   parser->m_hash_secret_salt = hash_salt;
1819   return 1;
1820 }
1821 
1822 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1823 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1824   if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1825     if (parser != NULL)
1826       parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1827     return XML_STATUS_ERROR;
1828   }
1829   switch (parser->m_parsingStatus.parsing) {
1830   case XML_SUSPENDED:
1831     parser->m_errorCode = XML_ERROR_SUSPENDED;
1832     return XML_STATUS_ERROR;
1833   case XML_FINISHED:
1834     parser->m_errorCode = XML_ERROR_FINISHED;
1835     return XML_STATUS_ERROR;
1836   case XML_INITIALIZED:
1837     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1838       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1839       return XML_STATUS_ERROR;
1840     }
1841     /* fall through */
1842   default:
1843     parser->m_parsingStatus.parsing = XML_PARSING;
1844   }
1845 
1846   if (len == 0) {
1847     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1848     if (! isFinal)
1849       return XML_STATUS_OK;
1850     parser->m_positionPtr = parser->m_bufferPtr;
1851     parser->m_parseEndPtr = parser->m_bufferEnd;
1852 
1853     /* If data are left over from last buffer, and we now know that these
1854        data are the final chunk of input, then we have to check them again
1855        to detect errors based on that fact.
1856     */
1857     parser->m_errorCode
1858         = parser->m_processor(parser, parser->m_bufferPtr,
1859                               parser->m_parseEndPtr, &parser->m_bufferPtr);
1860 
1861     if (parser->m_errorCode == XML_ERROR_NONE) {
1862       switch (parser->m_parsingStatus.parsing) {
1863       case XML_SUSPENDED:
1864         /* It is hard to be certain, but it seems that this case
1865          * cannot occur.  This code is cleaning up a previous parse
1866          * with no new data (since len == 0).  Changing the parsing
1867          * state requires getting to execute a handler function, and
1868          * there doesn't seem to be an opportunity for that while in
1869          * this circumstance.
1870          *
1871          * Given the uncertainty, we retain the code but exclude it
1872          * from coverage tests.
1873          *
1874          * LCOV_EXCL_START
1875          */
1876         XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1877                           parser->m_bufferPtr, &parser->m_position);
1878         parser->m_positionPtr = parser->m_bufferPtr;
1879         return XML_STATUS_SUSPENDED;
1880         /* LCOV_EXCL_STOP */
1881       case XML_INITIALIZED:
1882       case XML_PARSING:
1883         parser->m_parsingStatus.parsing = XML_FINISHED;
1884         /* fall through */
1885       default:
1886         return XML_STATUS_OK;
1887       }
1888     }
1889     parser->m_eventEndPtr = parser->m_eventPtr;
1890     parser->m_processor = errorProcessor;
1891     return XML_STATUS_ERROR;
1892   }
1893 #ifndef XML_CONTEXT_BYTES
1894   else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1895     const char *end;
1896     int nLeftOver;
1897     enum XML_Status result;
1898     /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1899     if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1900       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1901       parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1902       parser->m_processor = errorProcessor;
1903       return XML_STATUS_ERROR;
1904     }
1905     parser->m_parseEndByteIndex += len;
1906     parser->m_positionPtr = s;
1907     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1908 
1909     parser->m_errorCode
1910         = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1911 
1912     if (parser->m_errorCode != XML_ERROR_NONE) {
1913       parser->m_eventEndPtr = parser->m_eventPtr;
1914       parser->m_processor = errorProcessor;
1915       return XML_STATUS_ERROR;
1916     } else {
1917       switch (parser->m_parsingStatus.parsing) {
1918       case XML_SUSPENDED:
1919         result = XML_STATUS_SUSPENDED;
1920         break;
1921       case XML_INITIALIZED:
1922       case XML_PARSING:
1923         if (isFinal) {
1924           parser->m_parsingStatus.parsing = XML_FINISHED;
1925           return XML_STATUS_OK;
1926         }
1927       /* fall through */
1928       default:
1929         result = XML_STATUS_OK;
1930       }
1931     }
1932 
1933     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1934                       &parser->m_position);
1935     nLeftOver = s + len - end;
1936     if (nLeftOver) {
1937       if (parser->m_buffer == NULL
1938           || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1939         /* avoid _signed_ integer overflow */
1940         char *temp = NULL;
1941         const int bytesToAllocate = (int)((unsigned)len * 2U);
1942         if (bytesToAllocate > 0) {
1943           temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1944         }
1945         if (temp == NULL) {
1946           parser->m_errorCode = XML_ERROR_NO_MEMORY;
1947           parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1948           parser->m_processor = errorProcessor;
1949           return XML_STATUS_ERROR;
1950         }
1951         parser->m_buffer = temp;
1952         parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1953       }
1954       memcpy(parser->m_buffer, end, nLeftOver);
1955     }
1956     parser->m_bufferPtr = parser->m_buffer;
1957     parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1958     parser->m_positionPtr = parser->m_bufferPtr;
1959     parser->m_parseEndPtr = parser->m_bufferEnd;
1960     parser->m_eventPtr = parser->m_bufferPtr;
1961     parser->m_eventEndPtr = parser->m_bufferPtr;
1962     return result;
1963   }
1964 #endif /* not defined XML_CONTEXT_BYTES */
1965   else {
1966     void *buff = XML_GetBuffer(parser, len);
1967     if (buff == NULL)
1968       return XML_STATUS_ERROR;
1969     else {
1970       memcpy(buff, s, len);
1971       return XML_ParseBuffer(parser, len, isFinal);
1972     }
1973   }
1974 }
1975 
1976 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)1977 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
1978   const char *start;
1979   enum XML_Status result = XML_STATUS_OK;
1980 
1981   if (parser == NULL)
1982     return XML_STATUS_ERROR;
1983   switch (parser->m_parsingStatus.parsing) {
1984   case XML_SUSPENDED:
1985     parser->m_errorCode = XML_ERROR_SUSPENDED;
1986     return XML_STATUS_ERROR;
1987   case XML_FINISHED:
1988     parser->m_errorCode = XML_ERROR_FINISHED;
1989     return XML_STATUS_ERROR;
1990   case XML_INITIALIZED:
1991     /* Has someone called XML_GetBuffer successfully before? */
1992     if (! parser->m_bufferPtr) {
1993       parser->m_errorCode = XML_ERROR_NO_BUFFER;
1994       return XML_STATUS_ERROR;
1995     }
1996 
1997     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1998       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1999       return XML_STATUS_ERROR;
2000     }
2001     /* fall through */
2002   default:
2003     parser->m_parsingStatus.parsing = XML_PARSING;
2004   }
2005 
2006   start = parser->m_bufferPtr;
2007   parser->m_positionPtr = start;
2008   parser->m_bufferEnd += len;
2009   parser->m_parseEndPtr = parser->m_bufferEnd;
2010   parser->m_parseEndByteIndex += len;
2011   parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2012 
2013   parser->m_errorCode = parser->m_processor(
2014       parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
2015 
2016   if (parser->m_errorCode != XML_ERROR_NONE) {
2017     parser->m_eventEndPtr = parser->m_eventPtr;
2018     parser->m_processor = errorProcessor;
2019     return XML_STATUS_ERROR;
2020   } else {
2021     switch (parser->m_parsingStatus.parsing) {
2022     case XML_SUSPENDED:
2023       result = XML_STATUS_SUSPENDED;
2024       break;
2025     case XML_INITIALIZED:
2026     case XML_PARSING:
2027       if (isFinal) {
2028         parser->m_parsingStatus.parsing = XML_FINISHED;
2029         return result;
2030       }
2031     default:; /* should not happen */
2032     }
2033   }
2034 
2035   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2036                     parser->m_bufferPtr, &parser->m_position);
2037   parser->m_positionPtr = parser->m_bufferPtr;
2038   return result;
2039 }
2040 
2041 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2042 XML_GetBuffer(XML_Parser parser, int len) {
2043   if (parser == NULL)
2044     return NULL;
2045   if (len < 0) {
2046     parser->m_errorCode = XML_ERROR_NO_MEMORY;
2047     return NULL;
2048   }
2049   switch (parser->m_parsingStatus.parsing) {
2050   case XML_SUSPENDED:
2051     parser->m_errorCode = XML_ERROR_SUSPENDED;
2052     return NULL;
2053   case XML_FINISHED:
2054     parser->m_errorCode = XML_ERROR_FINISHED;
2055     return NULL;
2056   default:;
2057   }
2058 
2059   if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
2060 #ifdef XML_CONTEXT_BYTES
2061     int keep;
2062 #endif /* defined XML_CONTEXT_BYTES */
2063     /* Do not invoke signed arithmetic overflow: */
2064     int neededSize = (int)((unsigned)len
2065                            + (unsigned)EXPAT_SAFE_PTR_DIFF(
2066                                parser->m_bufferEnd, parser->m_bufferPtr));
2067     if (neededSize < 0) {
2068       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2069       return NULL;
2070     }
2071 #ifdef XML_CONTEXT_BYTES
2072     keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2073     if (keep > XML_CONTEXT_BYTES)
2074       keep = XML_CONTEXT_BYTES;
2075     /* Detect and prevent integer overflow */
2076     if (keep > INT_MAX - neededSize) {
2077       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2078       return NULL;
2079     }
2080     neededSize += keep;
2081 #endif /* defined XML_CONTEXT_BYTES */
2082     if (neededSize
2083         <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2084 #ifdef XML_CONTEXT_BYTES
2085       if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2086         int offset
2087             = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2088               - keep;
2089         /* The buffer pointers cannot be NULL here; we have at least some bytes
2090          * in the buffer */
2091         memmove(parser->m_buffer, &parser->m_buffer[offset],
2092                 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2093         parser->m_bufferEnd -= offset;
2094         parser->m_bufferPtr -= offset;
2095       }
2096 #else
2097       if (parser->m_buffer && parser->m_bufferPtr) {
2098         memmove(parser->m_buffer, parser->m_bufferPtr,
2099                 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2100         parser->m_bufferEnd
2101             = parser->m_buffer
2102               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2103         parser->m_bufferPtr = parser->m_buffer;
2104       }
2105 #endif /* not defined XML_CONTEXT_BYTES */
2106     } else {
2107       char *newBuf;
2108       int bufferSize
2109           = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
2110       if (bufferSize == 0)
2111         bufferSize = INIT_BUFFER_SIZE;
2112       do {
2113         /* Do not invoke signed arithmetic overflow: */
2114         bufferSize = (int)(2U * (unsigned)bufferSize);
2115       } while (bufferSize < neededSize && bufferSize > 0);
2116       if (bufferSize <= 0) {
2117         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2118         return NULL;
2119       }
2120       newBuf = (char *)MALLOC(parser, bufferSize);
2121       if (newBuf == 0) {
2122         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2123         return NULL;
2124       }
2125       parser->m_bufferLim = newBuf + bufferSize;
2126 #ifdef XML_CONTEXT_BYTES
2127       if (parser->m_bufferPtr) {
2128         memcpy(newBuf, &parser->m_bufferPtr[-keep],
2129                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2130                    + keep);
2131         FREE(parser, parser->m_buffer);
2132         parser->m_buffer = newBuf;
2133         parser->m_bufferEnd
2134             = parser->m_buffer
2135               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2136               + keep;
2137         parser->m_bufferPtr = parser->m_buffer + keep;
2138       } else {
2139         /* This must be a brand new buffer with no data in it yet */
2140         parser->m_bufferEnd = newBuf;
2141         parser->m_bufferPtr = parser->m_buffer = newBuf;
2142       }
2143 #else
2144       if (parser->m_bufferPtr) {
2145         memcpy(newBuf, parser->m_bufferPtr,
2146                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2147         FREE(parser, parser->m_buffer);
2148         parser->m_bufferEnd
2149             = newBuf
2150               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2151       } else {
2152         /* This must be a brand new buffer with no data in it yet */
2153         parser->m_bufferEnd = newBuf;
2154       }
2155       parser->m_bufferPtr = parser->m_buffer = newBuf;
2156 #endif /* not defined XML_CONTEXT_BYTES */
2157     }
2158     parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2159     parser->m_positionPtr = NULL;
2160   }
2161   return parser->m_bufferEnd;
2162 }
2163 
2164 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2165 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2166   if (parser == NULL)
2167     return XML_STATUS_ERROR;
2168   switch (parser->m_parsingStatus.parsing) {
2169   case XML_SUSPENDED:
2170     if (resumable) {
2171       parser->m_errorCode = XML_ERROR_SUSPENDED;
2172       return XML_STATUS_ERROR;
2173     }
2174     parser->m_parsingStatus.parsing = XML_FINISHED;
2175     break;
2176   case XML_FINISHED:
2177     parser->m_errorCode = XML_ERROR_FINISHED;
2178     return XML_STATUS_ERROR;
2179   default:
2180     if (resumable) {
2181 #ifdef XML_DTD
2182       if (parser->m_isParamEntity) {
2183         parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2184         return XML_STATUS_ERROR;
2185       }
2186 #endif
2187       parser->m_parsingStatus.parsing = XML_SUSPENDED;
2188     } else
2189       parser->m_parsingStatus.parsing = XML_FINISHED;
2190   }
2191   return XML_STATUS_OK;
2192 }
2193 
2194 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2195 XML_ResumeParser(XML_Parser parser) {
2196   enum XML_Status result = XML_STATUS_OK;
2197 
2198   if (parser == NULL)
2199     return XML_STATUS_ERROR;
2200   if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2201     parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2202     return XML_STATUS_ERROR;
2203   }
2204   parser->m_parsingStatus.parsing = XML_PARSING;
2205 
2206   parser->m_errorCode = parser->m_processor(
2207       parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2208 
2209   if (parser->m_errorCode != XML_ERROR_NONE) {
2210     parser->m_eventEndPtr = parser->m_eventPtr;
2211     parser->m_processor = errorProcessor;
2212     return XML_STATUS_ERROR;
2213   } else {
2214     switch (parser->m_parsingStatus.parsing) {
2215     case XML_SUSPENDED:
2216       result = XML_STATUS_SUSPENDED;
2217       break;
2218     case XML_INITIALIZED:
2219     case XML_PARSING:
2220       if (parser->m_parsingStatus.finalBuffer) {
2221         parser->m_parsingStatus.parsing = XML_FINISHED;
2222         return result;
2223       }
2224     default:;
2225     }
2226   }
2227 
2228   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2229                     parser->m_bufferPtr, &parser->m_position);
2230   parser->m_positionPtr = parser->m_bufferPtr;
2231   return result;
2232 }
2233 
2234 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2235 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2236   if (parser == NULL)
2237     return;
2238   assert(status != NULL);
2239   *status = parser->m_parsingStatus;
2240 }
2241 
2242 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2243 XML_GetErrorCode(XML_Parser parser) {
2244   if (parser == NULL)
2245     return XML_ERROR_INVALID_ARGUMENT;
2246   return parser->m_errorCode;
2247 }
2248 
2249 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2250 XML_GetCurrentByteIndex(XML_Parser parser) {
2251   if (parser == NULL)
2252     return -1;
2253   if (parser->m_eventPtr)
2254     return (XML_Index)(parser->m_parseEndByteIndex
2255                        - (parser->m_parseEndPtr - parser->m_eventPtr));
2256   return -1;
2257 }
2258 
2259 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2260 XML_GetCurrentByteCount(XML_Parser parser) {
2261   if (parser == NULL)
2262     return 0;
2263   if (parser->m_eventEndPtr && parser->m_eventPtr)
2264     return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2265   return 0;
2266 }
2267 
2268 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2269 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2270 #ifdef XML_CONTEXT_BYTES
2271   if (parser == NULL)
2272     return NULL;
2273   if (parser->m_eventPtr && parser->m_buffer) {
2274     if (offset != NULL)
2275       *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2276     if (size != NULL)
2277       *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2278     return parser->m_buffer;
2279   }
2280 #else
2281   (void)parser;
2282   (void)offset;
2283   (void)size;
2284 #endif /* defined XML_CONTEXT_BYTES */
2285   return (const char *)0;
2286 }
2287 
2288 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2289 XML_GetCurrentLineNumber(XML_Parser parser) {
2290   if (parser == NULL)
2291     return 0;
2292   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2293     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2294                       parser->m_eventPtr, &parser->m_position);
2295     parser->m_positionPtr = parser->m_eventPtr;
2296   }
2297   return parser->m_position.lineNumber + 1;
2298 }
2299 
2300 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2301 XML_GetCurrentColumnNumber(XML_Parser parser) {
2302   if (parser == NULL)
2303     return 0;
2304   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2305     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2306                       parser->m_eventPtr, &parser->m_position);
2307     parser->m_positionPtr = parser->m_eventPtr;
2308   }
2309   return parser->m_position.columnNumber;
2310 }
2311 
2312 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2313 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2314   if (parser != NULL)
2315     FREE(parser, model);
2316 }
2317 
2318 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2319 XML_MemMalloc(XML_Parser parser, size_t size) {
2320   if (parser == NULL)
2321     return NULL;
2322   return MALLOC(parser, size);
2323 }
2324 
2325 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2326 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2327   if (parser == NULL)
2328     return NULL;
2329   return REALLOC(parser, ptr, size);
2330 }
2331 
2332 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2333 XML_MemFree(XML_Parser parser, void *ptr) {
2334   if (parser != NULL)
2335     FREE(parser, ptr);
2336 }
2337 
2338 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2339 XML_DefaultCurrent(XML_Parser parser) {
2340   if (parser == NULL)
2341     return;
2342   if (parser->m_defaultHandler) {
2343     if (parser->m_openInternalEntities)
2344       reportDefault(parser, parser->m_internalEncoding,
2345                     parser->m_openInternalEntities->internalEventPtr,
2346                     parser->m_openInternalEntities->internalEventEndPtr);
2347     else
2348       reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2349                     parser->m_eventEndPtr);
2350   }
2351 }
2352 
2353 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2354 XML_ErrorString(enum XML_Error code) {
2355   switch (code) {
2356   case XML_ERROR_NONE:
2357     return NULL;
2358   case XML_ERROR_NO_MEMORY:
2359     return XML_L("out of memory");
2360   case XML_ERROR_SYNTAX:
2361     return XML_L("syntax error");
2362   case XML_ERROR_NO_ELEMENTS:
2363     return XML_L("no element found");
2364   case XML_ERROR_INVALID_TOKEN:
2365     return XML_L("not well-formed (invalid token)");
2366   case XML_ERROR_UNCLOSED_TOKEN:
2367     return XML_L("unclosed token");
2368   case XML_ERROR_PARTIAL_CHAR:
2369     return XML_L("partial character");
2370   case XML_ERROR_TAG_MISMATCH:
2371     return XML_L("mismatched tag");
2372   case XML_ERROR_DUPLICATE_ATTRIBUTE:
2373     return XML_L("duplicate attribute");
2374   case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2375     return XML_L("junk after document element");
2376   case XML_ERROR_PARAM_ENTITY_REF:
2377     return XML_L("illegal parameter entity reference");
2378   case XML_ERROR_UNDEFINED_ENTITY:
2379     return XML_L("undefined entity");
2380   case XML_ERROR_RECURSIVE_ENTITY_REF:
2381     return XML_L("recursive entity reference");
2382   case XML_ERROR_ASYNC_ENTITY:
2383     return XML_L("asynchronous entity");
2384   case XML_ERROR_BAD_CHAR_REF:
2385     return XML_L("reference to invalid character number");
2386   case XML_ERROR_BINARY_ENTITY_REF:
2387     return XML_L("reference to binary entity");
2388   case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2389     return XML_L("reference to external entity in attribute");
2390   case XML_ERROR_MISPLACED_XML_PI:
2391     return XML_L("XML or text declaration not at start of entity");
2392   case XML_ERROR_UNKNOWN_ENCODING:
2393     return XML_L("unknown encoding");
2394   case XML_ERROR_INCORRECT_ENCODING:
2395     return XML_L("encoding specified in XML declaration is incorrect");
2396   case XML_ERROR_UNCLOSED_CDATA_SECTION:
2397     return XML_L("unclosed CDATA section");
2398   case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2399     return XML_L("error in processing external entity reference");
2400   case XML_ERROR_NOT_STANDALONE:
2401     return XML_L("document is not standalone");
2402   case XML_ERROR_UNEXPECTED_STATE:
2403     return XML_L("unexpected parser state - please send a bug report");
2404   case XML_ERROR_ENTITY_DECLARED_IN_PE:
2405     return XML_L("entity declared in parameter entity");
2406   case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2407     return XML_L("requested feature requires XML_DTD support in Expat");
2408   case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2409     return XML_L("cannot change setting once parsing has begun");
2410   /* Added in 1.95.7. */
2411   case XML_ERROR_UNBOUND_PREFIX:
2412     return XML_L("unbound prefix");
2413   /* Added in 1.95.8. */
2414   case XML_ERROR_UNDECLARING_PREFIX:
2415     return XML_L("must not undeclare prefix");
2416   case XML_ERROR_INCOMPLETE_PE:
2417     return XML_L("incomplete markup in parameter entity");
2418   case XML_ERROR_XML_DECL:
2419     return XML_L("XML declaration not well-formed");
2420   case XML_ERROR_TEXT_DECL:
2421     return XML_L("text declaration not well-formed");
2422   case XML_ERROR_PUBLICID:
2423     return XML_L("illegal character(s) in public id");
2424   case XML_ERROR_SUSPENDED:
2425     return XML_L("parser suspended");
2426   case XML_ERROR_NOT_SUSPENDED:
2427     return XML_L("parser not suspended");
2428   case XML_ERROR_ABORTED:
2429     return XML_L("parsing aborted");
2430   case XML_ERROR_FINISHED:
2431     return XML_L("parsing finished");
2432   case XML_ERROR_SUSPEND_PE:
2433     return XML_L("cannot suspend in external parameter entity");
2434   /* Added in 2.0.0. */
2435   case XML_ERROR_RESERVED_PREFIX_XML:
2436     return XML_L(
2437         "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2438   case XML_ERROR_RESERVED_PREFIX_XMLNS:
2439     return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2440   case XML_ERROR_RESERVED_NAMESPACE_URI:
2441     return XML_L(
2442         "prefix must not be bound to one of the reserved namespace names");
2443   /* Added in 2.2.5. */
2444   case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2445     return XML_L("invalid argument");
2446     /* Added in 2.3.0. */
2447   case XML_ERROR_NO_BUFFER:
2448     return XML_L(
2449         "a successful prior call to function XML_GetBuffer is required");
2450   /* Added in 2.4.0. */
2451   case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2452     return XML_L(
2453         "limit on input amplification factor (from DTD and entities) breached");
2454   }
2455   return NULL;
2456 }
2457 
2458 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2459 XML_ExpatVersion(void) {
2460   /* V1 is used to string-ize the version number. However, it would
2461      string-ize the actual version macro *names* unless we get them
2462      substituted before being passed to V1. CPP is defined to expand
2463      a macro, then rescan for more expansions. Thus, we use V2 to expand
2464      the version macros, then CPP will expand the resulting V1() macro
2465      with the correct numerals. */
2466   /* ### I'm assuming cpp is portable in this respect... */
2467 
2468 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2469 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2470 
2471   return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2472 
2473 #undef V1
2474 #undef V2
2475 }
2476 
2477 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2478 XML_ExpatVersionInfo(void) {
2479   XML_Expat_Version version;
2480 
2481   version.major = XML_MAJOR_VERSION;
2482   version.minor = XML_MINOR_VERSION;
2483   version.micro = XML_MICRO_VERSION;
2484 
2485   return version;
2486 }
2487 
2488 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2489 XML_GetFeatureList(void) {
2490   static const XML_Feature features[] = {
2491       {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2492        sizeof(XML_Char)},
2493       {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2494        sizeof(XML_LChar)},
2495 #ifdef XML_UNICODE
2496       {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2497 #endif
2498 #ifdef XML_UNICODE_WCHAR_T
2499       {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2500 #endif
2501 #ifdef XML_DTD
2502       {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2503 #endif
2504 #ifdef XML_CONTEXT_BYTES
2505       {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2506        XML_CONTEXT_BYTES},
2507 #endif
2508 #ifdef XML_MIN_SIZE
2509       {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2510 #endif
2511 #ifdef XML_NS
2512       {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2513 #endif
2514 #ifdef XML_LARGE_SIZE
2515       {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2516 #endif
2517 #ifdef XML_ATTR_INFO
2518       {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2519 #endif
2520 #ifdef XML_DTD
2521       /* Added in Expat 2.4.0. */
2522       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2523        XML_L("XML_BLAP_MAX_AMP"),
2524        (long int)
2525            EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2526       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2527        XML_L("XML_BLAP_ACT_THRES"),
2528        EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2529 #endif
2530       {XML_FEATURE_END, NULL, 0}};
2531 
2532   return features;
2533 }
2534 
2535 #ifdef XML_DTD
2536 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2537 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2538     XML_Parser parser, float maximumAmplificationFactor) {
2539   if ((parser == NULL) || (parser->m_parentParser != NULL)
2540       || isnan(maximumAmplificationFactor)
2541       || (maximumAmplificationFactor < 1.0f)) {
2542     return XML_FALSE;
2543   }
2544   parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2545   return XML_TRUE;
2546 }
2547 
2548 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2549 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2550     XML_Parser parser, unsigned long long activationThresholdBytes) {
2551   if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2552     return XML_FALSE;
2553   }
2554   parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2555   return XML_TRUE;
2556 }
2557 #endif /* XML_DTD */
2558 
2559 /* Initially tag->rawName always points into the parse buffer;
2560    for those TAG instances opened while the current parse buffer was
2561    processed, and not yet closed, we need to store tag->rawName in a more
2562    permanent location, since the parse buffer is about to be discarded.
2563 */
2564 static XML_Bool
storeRawNames(XML_Parser parser)2565 storeRawNames(XML_Parser parser) {
2566   TAG *tag = parser->m_tagStack;
2567   while (tag) {
2568     int bufSize;
2569     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2570     size_t rawNameLen;
2571     char *rawNameBuf = tag->buf + nameLen;
2572     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
2573        at the first entry that has already been copied; everything
2574        below it in the stack is already been accounted for in a
2575        previous call to this function.
2576     */
2577     if (tag->rawName == rawNameBuf)
2578       break;
2579     /* For re-use purposes we need to ensure that the
2580        size of tag->buf is a multiple of sizeof(XML_Char).
2581     */
2582     rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2583     /* Detect and prevent integer overflow. */
2584     if (rawNameLen > (size_t)INT_MAX - nameLen)
2585       return XML_FALSE;
2586     bufSize = nameLen + (int)rawNameLen;
2587     if (bufSize > tag->bufEnd - tag->buf) {
2588       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2589       if (temp == NULL)
2590         return XML_FALSE;
2591       /* if tag->name.str points to tag->buf (only when namespace
2592          processing is off) then we have to update it
2593       */
2594       if (tag->name.str == (XML_Char *)tag->buf)
2595         tag->name.str = (XML_Char *)temp;
2596       /* if tag->name.localPart is set (when namespace processing is on)
2597          then update it as well, since it will always point into tag->buf
2598       */
2599       if (tag->name.localPart)
2600         tag->name.localPart
2601             = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2602       tag->buf = temp;
2603       tag->bufEnd = temp + bufSize;
2604       rawNameBuf = temp + nameLen;
2605     }
2606     memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2607     tag->rawName = rawNameBuf;
2608     tag = tag->parent;
2609   }
2610   return XML_TRUE;
2611 }
2612 
2613 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2614 contentProcessor(XML_Parser parser, const char *start, const char *end,
2615                  const char **endPtr) {
2616   enum XML_Error result = doContent(
2617       parser, 0, parser->m_encoding, start, end, endPtr,
2618       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2619   if (result == XML_ERROR_NONE) {
2620     if (! storeRawNames(parser))
2621       return XML_ERROR_NO_MEMORY;
2622   }
2623   return result;
2624 }
2625 
2626 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2627 externalEntityInitProcessor(XML_Parser parser, const char *start,
2628                             const char *end, const char **endPtr) {
2629   enum XML_Error result = initializeEncoding(parser);
2630   if (result != XML_ERROR_NONE)
2631     return result;
2632   parser->m_processor = externalEntityInitProcessor2;
2633   return externalEntityInitProcessor2(parser, start, end, endPtr);
2634 }
2635 
2636 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2637 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2638                              const char *end, const char **endPtr) {
2639   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2640   int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2641   switch (tok) {
2642   case XML_TOK_BOM:
2643 #ifdef XML_DTD
2644     if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2645                                   XML_ACCOUNT_DIRECT)) {
2646       accountingOnAbort(parser);
2647       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2648     }
2649 #endif /* XML_DTD */
2650 
2651     /* If we are at the end of the buffer, this would cause the next stage,
2652        i.e. externalEntityInitProcessor3, to pass control directly to
2653        doContent (by detecting XML_TOK_NONE) without processing any xml text
2654        declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2655     */
2656     if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2657       *endPtr = next;
2658       return XML_ERROR_NONE;
2659     }
2660     start = next;
2661     break;
2662   case XML_TOK_PARTIAL:
2663     if (! parser->m_parsingStatus.finalBuffer) {
2664       *endPtr = start;
2665       return XML_ERROR_NONE;
2666     }
2667     parser->m_eventPtr = start;
2668     return XML_ERROR_UNCLOSED_TOKEN;
2669   case XML_TOK_PARTIAL_CHAR:
2670     if (! parser->m_parsingStatus.finalBuffer) {
2671       *endPtr = start;
2672       return XML_ERROR_NONE;
2673     }
2674     parser->m_eventPtr = start;
2675     return XML_ERROR_PARTIAL_CHAR;
2676   }
2677   parser->m_processor = externalEntityInitProcessor3;
2678   return externalEntityInitProcessor3(parser, start, end, endPtr);
2679 }
2680 
2681 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2682 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2683                              const char *end, const char **endPtr) {
2684   int tok;
2685   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2686   parser->m_eventPtr = start;
2687   tok = XmlContentTok(parser->m_encoding, start, end, &next);
2688   /* Note: These bytes are accounted later in:
2689            - processXmlDecl
2690            - externalEntityContentProcessor
2691   */
2692   parser->m_eventEndPtr = next;
2693 
2694   switch (tok) {
2695   case XML_TOK_XML_DECL: {
2696     enum XML_Error result;
2697     result = processXmlDecl(parser, 1, start, next);
2698     if (result != XML_ERROR_NONE)
2699       return result;
2700     switch (parser->m_parsingStatus.parsing) {
2701     case XML_SUSPENDED:
2702       *endPtr = next;
2703       return XML_ERROR_NONE;
2704     case XML_FINISHED:
2705       return XML_ERROR_ABORTED;
2706     default:
2707       start = next;
2708     }
2709   } break;
2710   case XML_TOK_PARTIAL:
2711     if (! parser->m_parsingStatus.finalBuffer) {
2712       *endPtr = start;
2713       return XML_ERROR_NONE;
2714     }
2715     return XML_ERROR_UNCLOSED_TOKEN;
2716   case XML_TOK_PARTIAL_CHAR:
2717     if (! parser->m_parsingStatus.finalBuffer) {
2718       *endPtr = start;
2719       return XML_ERROR_NONE;
2720     }
2721     return XML_ERROR_PARTIAL_CHAR;
2722   }
2723   parser->m_processor = externalEntityContentProcessor;
2724   parser->m_tagLevel = 1;
2725   return externalEntityContentProcessor(parser, start, end, endPtr);
2726 }
2727 
2728 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2729 externalEntityContentProcessor(XML_Parser parser, const char *start,
2730                                const char *end, const char **endPtr) {
2731   enum XML_Error result
2732       = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2733                   (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2734                   XML_ACCOUNT_ENTITY_EXPANSION);
2735   if (result == XML_ERROR_NONE) {
2736     if (! storeRawNames(parser))
2737       return XML_ERROR_NO_MEMORY;
2738   }
2739   return result;
2740 }
2741 
2742 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2743 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2744           const char *s, const char *end, const char **nextPtr,
2745           XML_Bool haveMore, enum XML_Account account) {
2746   /* save one level of indirection */
2747   DTD *const dtd = parser->m_dtd;
2748 
2749   const char **eventPP;
2750   const char **eventEndPP;
2751   if (enc == parser->m_encoding) {
2752     eventPP = &parser->m_eventPtr;
2753     eventEndPP = &parser->m_eventEndPtr;
2754   } else {
2755     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2756     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2757   }
2758   *eventPP = s;
2759 
2760   for (;;) {
2761     const char *next = s; /* XmlContentTok doesn't always set the last arg */
2762     int tok = XmlContentTok(enc, s, end, &next);
2763 #ifdef XML_DTD
2764     const char *accountAfter
2765         = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2766               ? (haveMore ? s /* i.e. 0 bytes */ : end)
2767               : next;
2768     if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2769                                   account)) {
2770       accountingOnAbort(parser);
2771       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2772     }
2773 #endif
2774     *eventEndPP = next;
2775     switch (tok) {
2776     case XML_TOK_TRAILING_CR:
2777       if (haveMore) {
2778         *nextPtr = s;
2779         return XML_ERROR_NONE;
2780       }
2781       *eventEndPP = end;
2782       if (parser->m_characterDataHandler) {
2783         XML_Char c = 0xA;
2784         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2785       } else if (parser->m_defaultHandler)
2786         reportDefault(parser, enc, s, end);
2787       /* We are at the end of the final buffer, should we check for
2788          XML_SUSPENDED, XML_FINISHED?
2789       */
2790       if (startTagLevel == 0)
2791         return XML_ERROR_NO_ELEMENTS;
2792       if (parser->m_tagLevel != startTagLevel)
2793         return XML_ERROR_ASYNC_ENTITY;
2794       *nextPtr = end;
2795       return XML_ERROR_NONE;
2796     case XML_TOK_NONE:
2797       if (haveMore) {
2798         *nextPtr = s;
2799         return XML_ERROR_NONE;
2800       }
2801       if (startTagLevel > 0) {
2802         if (parser->m_tagLevel != startTagLevel)
2803           return XML_ERROR_ASYNC_ENTITY;
2804         *nextPtr = s;
2805         return XML_ERROR_NONE;
2806       }
2807       return XML_ERROR_NO_ELEMENTS;
2808     case XML_TOK_INVALID:
2809       *eventPP = next;
2810       return XML_ERROR_INVALID_TOKEN;
2811     case XML_TOK_PARTIAL:
2812       if (haveMore) {
2813         *nextPtr = s;
2814         return XML_ERROR_NONE;
2815       }
2816       return XML_ERROR_UNCLOSED_TOKEN;
2817     case XML_TOK_PARTIAL_CHAR:
2818       if (haveMore) {
2819         *nextPtr = s;
2820         return XML_ERROR_NONE;
2821       }
2822       return XML_ERROR_PARTIAL_CHAR;
2823     case XML_TOK_ENTITY_REF: {
2824       const XML_Char *name;
2825       ENTITY *entity;
2826       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2827           enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2828       if (ch) {
2829 #ifdef XML_DTD
2830         /* NOTE: We are replacing 4-6 characters original input for 1 character
2831          *       so there is no amplification and hence recording without
2832          *       protection. */
2833         accountingDiffTolerated(parser, tok, (char *)&ch,
2834                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2835                                 XML_ACCOUNT_ENTITY_EXPANSION);
2836 #endif /* XML_DTD */
2837         if (parser->m_characterDataHandler)
2838           parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2839         else if (parser->m_defaultHandler)
2840           reportDefault(parser, enc, s, next);
2841         break;
2842       }
2843       name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2844                              next - enc->minBytesPerChar);
2845       if (! name)
2846         return XML_ERROR_NO_MEMORY;
2847       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2848       poolDiscard(&dtd->pool);
2849       /* First, determine if a check for an existing declaration is needed;
2850          if yes, check that the entity exists, and that it is internal,
2851          otherwise call the skipped entity or default handler.
2852       */
2853       if (! dtd->hasParamEntityRefs || dtd->standalone) {
2854         if (! entity)
2855           return XML_ERROR_UNDEFINED_ENTITY;
2856         else if (! entity->is_internal)
2857           return XML_ERROR_ENTITY_DECLARED_IN_PE;
2858       } else if (! entity) {
2859         if (parser->m_skippedEntityHandler)
2860           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2861         else if (parser->m_defaultHandler)
2862           reportDefault(parser, enc, s, next);
2863         break;
2864       }
2865       if (entity->open)
2866         return XML_ERROR_RECURSIVE_ENTITY_REF;
2867       if (entity->notation)
2868         return XML_ERROR_BINARY_ENTITY_REF;
2869       if (entity->textPtr) {
2870         enum XML_Error result;
2871         if (! parser->m_defaultExpandInternalEntities) {
2872           if (parser->m_skippedEntityHandler)
2873             parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2874                                            0);
2875           else if (parser->m_defaultHandler)
2876             reportDefault(parser, enc, s, next);
2877           break;
2878         }
2879         result = processInternalEntity(parser, entity, XML_FALSE);
2880         if (result != XML_ERROR_NONE)
2881           return result;
2882       } else if (parser->m_externalEntityRefHandler) {
2883         const XML_Char *context;
2884         entity->open = XML_TRUE;
2885         context = getContext(parser);
2886         entity->open = XML_FALSE;
2887         if (! context)
2888           return XML_ERROR_NO_MEMORY;
2889         if (! parser->m_externalEntityRefHandler(
2890                 parser->m_externalEntityRefHandlerArg, context, entity->base,
2891                 entity->systemId, entity->publicId))
2892           return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2893         poolDiscard(&parser->m_tempPool);
2894       } else if (parser->m_defaultHandler)
2895         reportDefault(parser, enc, s, next);
2896       break;
2897     }
2898     case XML_TOK_START_TAG_NO_ATTS:
2899       /* fall through */
2900     case XML_TOK_START_TAG_WITH_ATTS: {
2901       TAG *tag;
2902       enum XML_Error result;
2903       XML_Char *toPtr;
2904       if (parser->m_freeTagList) {
2905         tag = parser->m_freeTagList;
2906         parser->m_freeTagList = parser->m_freeTagList->parent;
2907       } else {
2908         tag = (TAG *)MALLOC(parser, sizeof(TAG));
2909         if (! tag)
2910           return XML_ERROR_NO_MEMORY;
2911         tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2912         if (! tag->buf) {
2913           FREE(parser, tag);
2914           return XML_ERROR_NO_MEMORY;
2915         }
2916         tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2917       }
2918       tag->bindings = NULL;
2919       tag->parent = parser->m_tagStack;
2920       parser->m_tagStack = tag;
2921       tag->name.localPart = NULL;
2922       tag->name.prefix = NULL;
2923       tag->rawName = s + enc->minBytesPerChar;
2924       tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2925       ++parser->m_tagLevel;
2926       {
2927         const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2928         const char *fromPtr = tag->rawName;
2929         toPtr = (XML_Char *)tag->buf;
2930         for (;;) {
2931           int bufSize;
2932           int convLen;
2933           const enum XML_Convert_Result convert_res
2934               = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
2935                            (ICHAR *)tag->bufEnd - 1);
2936           convLen = (int)(toPtr - (XML_Char *)tag->buf);
2937           if ((fromPtr >= rawNameEnd)
2938               || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2939             tag->name.strLen = convLen;
2940             break;
2941           }
2942           bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2943           {
2944             char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2945             if (temp == NULL)
2946               return XML_ERROR_NO_MEMORY;
2947             tag->buf = temp;
2948             tag->bufEnd = temp + bufSize;
2949             toPtr = (XML_Char *)temp + convLen;
2950           }
2951         }
2952       }
2953       tag->name.str = (XML_Char *)tag->buf;
2954       *toPtr = XML_T('\0');
2955       result
2956           = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
2957       if (result)
2958         return result;
2959       if (parser->m_startElementHandler)
2960         parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2961                                       (const XML_Char **)parser->m_atts);
2962       else if (parser->m_defaultHandler)
2963         reportDefault(parser, enc, s, next);
2964       poolClear(&parser->m_tempPool);
2965       break;
2966     }
2967     case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2968       /* fall through */
2969     case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
2970       const char *rawName = s + enc->minBytesPerChar;
2971       enum XML_Error result;
2972       BINDING *bindings = NULL;
2973       XML_Bool noElmHandlers = XML_TRUE;
2974       TAG_NAME name;
2975       name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2976                                  rawName + XmlNameLength(enc, rawName));
2977       if (! name.str)
2978         return XML_ERROR_NO_MEMORY;
2979       poolFinish(&parser->m_tempPool);
2980       result = storeAtts(parser, enc, s, &name, &bindings,
2981                          XML_ACCOUNT_NONE /* token spans whole start tag */);
2982       if (result != XML_ERROR_NONE) {
2983         freeBindings(parser, bindings);
2984         return result;
2985       }
2986       poolFinish(&parser->m_tempPool);
2987       if (parser->m_startElementHandler) {
2988         parser->m_startElementHandler(parser->m_handlerArg, name.str,
2989                                       (const XML_Char **)parser->m_atts);
2990         noElmHandlers = XML_FALSE;
2991       }
2992       if (parser->m_endElementHandler) {
2993         if (parser->m_startElementHandler)
2994           *eventPP = *eventEndPP;
2995         parser->m_endElementHandler(parser->m_handlerArg, name.str);
2996         noElmHandlers = XML_FALSE;
2997       }
2998       if (noElmHandlers && parser->m_defaultHandler)
2999         reportDefault(parser, enc, s, next);
3000       poolClear(&parser->m_tempPool);
3001       freeBindings(parser, bindings);
3002     }
3003       if ((parser->m_tagLevel == 0)
3004           && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3005         if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3006           parser->m_processor = epilogProcessor;
3007         else
3008           return epilogProcessor(parser, next, end, nextPtr);
3009       }
3010       break;
3011     case XML_TOK_END_TAG:
3012       if (parser->m_tagLevel == startTagLevel)
3013         return XML_ERROR_ASYNC_ENTITY;
3014       else {
3015         int len;
3016         const char *rawName;
3017         TAG *tag = parser->m_tagStack;
3018         parser->m_tagStack = tag->parent;
3019         tag->parent = parser->m_freeTagList;
3020         parser->m_freeTagList = tag;
3021         rawName = s + enc->minBytesPerChar * 2;
3022         len = XmlNameLength(enc, rawName);
3023         if (len != tag->rawNameLength
3024             || memcmp(tag->rawName, rawName, len) != 0) {
3025           *eventPP = rawName;
3026           return XML_ERROR_TAG_MISMATCH;
3027         }
3028         --parser->m_tagLevel;
3029         if (parser->m_endElementHandler) {
3030           const XML_Char *localPart;
3031           const XML_Char *prefix;
3032           XML_Char *uri;
3033           localPart = tag->name.localPart;
3034           if (parser->m_ns && localPart) {
3035             /* localPart and prefix may have been overwritten in
3036                tag->name.str, since this points to the binding->uri
3037                buffer which gets re-used; so we have to add them again
3038             */
3039             uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3040             /* don't need to check for space - already done in storeAtts() */
3041             while (*localPart)
3042               *uri++ = *localPart++;
3043             prefix = (XML_Char *)tag->name.prefix;
3044             if (parser->m_ns_triplets && prefix) {
3045               *uri++ = parser->m_namespaceSeparator;
3046               while (*prefix)
3047                 *uri++ = *prefix++;
3048             }
3049             *uri = XML_T('\0');
3050           }
3051           parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3052         } else if (parser->m_defaultHandler)
3053           reportDefault(parser, enc, s, next);
3054         while (tag->bindings) {
3055           BINDING *b = tag->bindings;
3056           if (parser->m_endNamespaceDeclHandler)
3057             parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3058                                               b->prefix->name);
3059           tag->bindings = tag->bindings->nextTagBinding;
3060           b->nextTagBinding = parser->m_freeBindingList;
3061           parser->m_freeBindingList = b;
3062           b->prefix->binding = b->prevPrefixBinding;
3063         }
3064         if ((parser->m_tagLevel == 0)
3065             && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3066           if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3067             parser->m_processor = epilogProcessor;
3068           else
3069             return epilogProcessor(parser, next, end, nextPtr);
3070         }
3071       }
3072       break;
3073     case XML_TOK_CHAR_REF: {
3074       int n = XmlCharRefNumber(enc, s);
3075       if (n < 0)
3076         return XML_ERROR_BAD_CHAR_REF;
3077       if (parser->m_characterDataHandler) {
3078         XML_Char buf[XML_ENCODE_MAX];
3079         parser->m_characterDataHandler(parser->m_handlerArg, buf,
3080                                        XmlEncode(n, (ICHAR *)buf));
3081       } else if (parser->m_defaultHandler)
3082         reportDefault(parser, enc, s, next);
3083     } break;
3084     case XML_TOK_XML_DECL:
3085       return XML_ERROR_MISPLACED_XML_PI;
3086     case XML_TOK_DATA_NEWLINE:
3087       if (parser->m_characterDataHandler) {
3088         XML_Char c = 0xA;
3089         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3090       } else if (parser->m_defaultHandler)
3091         reportDefault(parser, enc, s, next);
3092       break;
3093     case XML_TOK_CDATA_SECT_OPEN: {
3094       enum XML_Error result;
3095       if (parser->m_startCdataSectionHandler)
3096         parser->m_startCdataSectionHandler(parser->m_handlerArg);
3097       /* BEGIN disabled code */
3098       /* Suppose you doing a transformation on a document that involves
3099          changing only the character data.  You set up a defaultHandler
3100          and a characterDataHandler.  The defaultHandler simply copies
3101          characters through.  The characterDataHandler does the
3102          transformation and writes the characters out escaping them as
3103          necessary.  This case will fail to work if we leave out the
3104          following two lines (because & and < inside CDATA sections will
3105          be incorrectly escaped).
3106 
3107          However, now we have a start/endCdataSectionHandler, so it seems
3108          easier to let the user deal with this.
3109       */
3110       else if (0 && parser->m_characterDataHandler)
3111         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3112                                        0);
3113       /* END disabled code */
3114       else if (parser->m_defaultHandler)
3115         reportDefault(parser, enc, s, next);
3116       result
3117           = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3118       if (result != XML_ERROR_NONE)
3119         return result;
3120       else if (! next) {
3121         parser->m_processor = cdataSectionProcessor;
3122         return result;
3123       }
3124     } break;
3125     case XML_TOK_TRAILING_RSQB:
3126       if (haveMore) {
3127         *nextPtr = s;
3128         return XML_ERROR_NONE;
3129       }
3130       if (parser->m_characterDataHandler) {
3131         if (MUST_CONVERT(enc, s)) {
3132           ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3133           XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3134           parser->m_characterDataHandler(
3135               parser->m_handlerArg, parser->m_dataBuf,
3136               (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3137         } else
3138           parser->m_characterDataHandler(
3139               parser->m_handlerArg, (XML_Char *)s,
3140               (int)((XML_Char *)end - (XML_Char *)s));
3141       } else if (parser->m_defaultHandler)
3142         reportDefault(parser, enc, s, end);
3143       /* We are at the end of the final buffer, should we check for
3144          XML_SUSPENDED, XML_FINISHED?
3145       */
3146       if (startTagLevel == 0) {
3147         *eventPP = end;
3148         return XML_ERROR_NO_ELEMENTS;
3149       }
3150       if (parser->m_tagLevel != startTagLevel) {
3151         *eventPP = end;
3152         return XML_ERROR_ASYNC_ENTITY;
3153       }
3154       *nextPtr = end;
3155       return XML_ERROR_NONE;
3156     case XML_TOK_DATA_CHARS: {
3157       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3158       if (charDataHandler) {
3159         if (MUST_CONVERT(enc, s)) {
3160           for (;;) {
3161             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3162             const enum XML_Convert_Result convert_res = XmlConvert(
3163                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3164             *eventEndPP = s;
3165             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3166                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3167             if ((convert_res == XML_CONVERT_COMPLETED)
3168                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3169               break;
3170             *eventPP = s;
3171           }
3172         } else
3173           charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3174                           (int)((XML_Char *)next - (XML_Char *)s));
3175       } else if (parser->m_defaultHandler)
3176         reportDefault(parser, enc, s, next);
3177     } break;
3178     case XML_TOK_PI:
3179       if (! reportProcessingInstruction(parser, enc, s, next))
3180         return XML_ERROR_NO_MEMORY;
3181       break;
3182     case XML_TOK_COMMENT:
3183       if (! reportComment(parser, enc, s, next))
3184         return XML_ERROR_NO_MEMORY;
3185       break;
3186     default:
3187       /* All of the tokens produced by XmlContentTok() have their own
3188        * explicit cases, so this default is not strictly necessary.
3189        * However it is a useful safety net, so we retain the code and
3190        * simply exclude it from the coverage tests.
3191        *
3192        * LCOV_EXCL_START
3193        */
3194       if (parser->m_defaultHandler)
3195         reportDefault(parser, enc, s, next);
3196       break;
3197       /* LCOV_EXCL_STOP */
3198     }
3199     *eventPP = s = next;
3200     switch (parser->m_parsingStatus.parsing) {
3201     case XML_SUSPENDED:
3202       *nextPtr = next;
3203       return XML_ERROR_NONE;
3204     case XML_FINISHED:
3205       return XML_ERROR_ABORTED;
3206     default:;
3207     }
3208   }
3209   /* not reached */
3210 }
3211 
3212 /* This function does not call free() on the allocated memory, merely
3213  * moving it to the parser's m_freeBindingList where it can be freed or
3214  * reused as appropriate.
3215  */
3216 static void
freeBindings(XML_Parser parser,BINDING * bindings)3217 freeBindings(XML_Parser parser, BINDING *bindings) {
3218   while (bindings) {
3219     BINDING *b = bindings;
3220 
3221     /* m_startNamespaceDeclHandler will have been called for this
3222      * binding in addBindings(), so call the end handler now.
3223      */
3224     if (parser->m_endNamespaceDeclHandler)
3225       parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3226 
3227     bindings = bindings->nextTagBinding;
3228     b->nextTagBinding = parser->m_freeBindingList;
3229     parser->m_freeBindingList = b;
3230     b->prefix->binding = b->prevPrefixBinding;
3231   }
3232 }
3233 
3234 /* Precondition: all arguments must be non-NULL;
3235    Purpose:
3236    - normalize attributes
3237    - check attributes for well-formedness
3238    - generate namespace aware attribute names (URI, prefix)
3239    - build list of attributes for startElementHandler
3240    - default attributes
3241    - process namespace declarations (check and report them)
3242    - generate namespace aware element name (URI, prefix)
3243 */
3244 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3245 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3246           TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3247           enum XML_Account account) {
3248   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3249   ELEMENT_TYPE *elementType;
3250   int nDefaultAtts;
3251   const XML_Char **appAtts; /* the attribute list for the application */
3252   int attIndex = 0;
3253   int prefixLen;
3254   int i;
3255   int n;
3256   XML_Char *uri;
3257   int nPrefixes = 0;
3258   BINDING *binding;
3259   const XML_Char *localPart;
3260 
3261   /* lookup the element type name */
3262   elementType
3263       = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3264   if (! elementType) {
3265     const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3266     if (! name)
3267       return XML_ERROR_NO_MEMORY;
3268     elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3269                                          sizeof(ELEMENT_TYPE));
3270     if (! elementType)
3271       return XML_ERROR_NO_MEMORY;
3272     if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3273       return XML_ERROR_NO_MEMORY;
3274   }
3275   nDefaultAtts = elementType->nDefaultAtts;
3276 
3277   /* get the attributes from the tokenizer */
3278   n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3279 
3280   /* Detect and prevent integer overflow */
3281   if (n > INT_MAX - nDefaultAtts) {
3282     return XML_ERROR_NO_MEMORY;
3283   }
3284 
3285   if (n + nDefaultAtts > parser->m_attsSize) {
3286     int oldAttsSize = parser->m_attsSize;
3287     ATTRIBUTE *temp;
3288 #ifdef XML_ATTR_INFO
3289     XML_AttrInfo *temp2;
3290 #endif
3291 
3292     /* Detect and prevent integer overflow */
3293     if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3294         || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3295       return XML_ERROR_NO_MEMORY;
3296     }
3297 
3298     parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3299 
3300     /* Detect and prevent integer overflow.
3301      * The preprocessor guard addresses the "always false" warning
3302      * from -Wtype-limits on platforms where
3303      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3304 #if UINT_MAX >= SIZE_MAX
3305     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3306       parser->m_attsSize = oldAttsSize;
3307       return XML_ERROR_NO_MEMORY;
3308     }
3309 #endif
3310 
3311     temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3312                                 parser->m_attsSize * sizeof(ATTRIBUTE));
3313     if (temp == NULL) {
3314       parser->m_attsSize = oldAttsSize;
3315       return XML_ERROR_NO_MEMORY;
3316     }
3317     parser->m_atts = temp;
3318 #ifdef XML_ATTR_INFO
3319     /* Detect and prevent integer overflow.
3320      * The preprocessor guard addresses the "always false" warning
3321      * from -Wtype-limits on platforms where
3322      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3323 #  if UINT_MAX >= SIZE_MAX
3324     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3325       parser->m_attsSize = oldAttsSize;
3326       return XML_ERROR_NO_MEMORY;
3327     }
3328 #  endif
3329 
3330     temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3331                                     parser->m_attsSize * sizeof(XML_AttrInfo));
3332     if (temp2 == NULL) {
3333       parser->m_attsSize = oldAttsSize;
3334       return XML_ERROR_NO_MEMORY;
3335     }
3336     parser->m_attInfo = temp2;
3337 #endif
3338     if (n > oldAttsSize)
3339       XmlGetAttributes(enc, attStr, n, parser->m_atts);
3340   }
3341 
3342   appAtts = (const XML_Char **)parser->m_atts;
3343   for (i = 0; i < n; i++) {
3344     ATTRIBUTE *currAtt = &parser->m_atts[i];
3345 #ifdef XML_ATTR_INFO
3346     XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3347 #endif
3348     /* add the name and value to the attribute list */
3349     ATTRIBUTE_ID *attId
3350         = getAttributeId(parser, enc, currAtt->name,
3351                          currAtt->name + XmlNameLength(enc, currAtt->name));
3352     if (! attId)
3353       return XML_ERROR_NO_MEMORY;
3354 #ifdef XML_ATTR_INFO
3355     currAttInfo->nameStart
3356         = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3357     currAttInfo->nameEnd
3358         = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3359     currAttInfo->valueStart = parser->m_parseEndByteIndex
3360                               - (parser->m_parseEndPtr - currAtt->valuePtr);
3361     currAttInfo->valueEnd = parser->m_parseEndByteIndex
3362                             - (parser->m_parseEndPtr - currAtt->valueEnd);
3363 #endif
3364     /* Detect duplicate attributes by their QNames. This does not work when
3365        namespace processing is turned on and different prefixes for the same
3366        namespace are used. For this case we have a check further down.
3367     */
3368     if ((attId->name)[-1]) {
3369       if (enc == parser->m_encoding)
3370         parser->m_eventPtr = parser->m_atts[i].name;
3371       return XML_ERROR_DUPLICATE_ATTRIBUTE;
3372     }
3373     (attId->name)[-1] = 1;
3374     appAtts[attIndex++] = attId->name;
3375     if (! parser->m_atts[i].normalized) {
3376       enum XML_Error result;
3377       XML_Bool isCdata = XML_TRUE;
3378 
3379       /* figure out whether declared as other than CDATA */
3380       if (attId->maybeTokenized) {
3381         int j;
3382         for (j = 0; j < nDefaultAtts; j++) {
3383           if (attId == elementType->defaultAtts[j].id) {
3384             isCdata = elementType->defaultAtts[j].isCdata;
3385             break;
3386           }
3387         }
3388       }
3389 
3390       /* normalize the attribute value */
3391       result = storeAttributeValue(
3392           parser, enc, isCdata, parser->m_atts[i].valuePtr,
3393           parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3394       if (result)
3395         return result;
3396       appAtts[attIndex] = poolStart(&parser->m_tempPool);
3397       poolFinish(&parser->m_tempPool);
3398     } else {
3399       /* the value did not need normalizing */
3400       appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3401                                           parser->m_atts[i].valuePtr,
3402                                           parser->m_atts[i].valueEnd);
3403       if (appAtts[attIndex] == 0)
3404         return XML_ERROR_NO_MEMORY;
3405       poolFinish(&parser->m_tempPool);
3406     }
3407     /* handle prefixed attribute names */
3408     if (attId->prefix) {
3409       if (attId->xmlns) {
3410         /* deal with namespace declarations here */
3411         enum XML_Error result = addBinding(parser, attId->prefix, attId,
3412                                            appAtts[attIndex], bindingsPtr);
3413         if (result)
3414           return result;
3415         --attIndex;
3416       } else {
3417         /* deal with other prefixed names later */
3418         attIndex++;
3419         nPrefixes++;
3420         (attId->name)[-1] = 2;
3421       }
3422     } else
3423       attIndex++;
3424   }
3425 
3426   /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3427   parser->m_nSpecifiedAtts = attIndex;
3428   if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3429     for (i = 0; i < attIndex; i += 2)
3430       if (appAtts[i] == elementType->idAtt->name) {
3431         parser->m_idAttIndex = i;
3432         break;
3433       }
3434   } else
3435     parser->m_idAttIndex = -1;
3436 
3437   /* do attribute defaulting */
3438   for (i = 0; i < nDefaultAtts; i++) {
3439     const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3440     if (! (da->id->name)[-1] && da->value) {
3441       if (da->id->prefix) {
3442         if (da->id->xmlns) {
3443           enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3444                                              da->value, bindingsPtr);
3445           if (result)
3446             return result;
3447         } else {
3448           (da->id->name)[-1] = 2;
3449           nPrefixes++;
3450           appAtts[attIndex++] = da->id->name;
3451           appAtts[attIndex++] = da->value;
3452         }
3453       } else {
3454         (da->id->name)[-1] = 1;
3455         appAtts[attIndex++] = da->id->name;
3456         appAtts[attIndex++] = da->value;
3457       }
3458     }
3459   }
3460   appAtts[attIndex] = 0;
3461 
3462   /* expand prefixed attribute names, check for duplicates,
3463      and clear flags that say whether attributes were specified */
3464   i = 0;
3465   if (nPrefixes) {
3466     int j; /* hash table index */
3467     unsigned long version = parser->m_nsAttsVersion;
3468 
3469     /* Detect and prevent invalid shift */
3470     if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3471       return XML_ERROR_NO_MEMORY;
3472     }
3473 
3474     unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3475     unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3476     /* size of hash table must be at least 2 * (# of prefixed attributes) */
3477     if ((nPrefixes << 1)
3478         >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3479       NS_ATT *temp;
3480       /* hash table size must also be a power of 2 and >= 8 */
3481       while (nPrefixes >> parser->m_nsAttsPower++)
3482         ;
3483       if (parser->m_nsAttsPower < 3)
3484         parser->m_nsAttsPower = 3;
3485 
3486       /* Detect and prevent invalid shift */
3487       if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3488         /* Restore actual size of memory in m_nsAtts */
3489         parser->m_nsAttsPower = oldNsAttsPower;
3490         return XML_ERROR_NO_MEMORY;
3491       }
3492 
3493       nsAttsSize = 1u << parser->m_nsAttsPower;
3494 
3495       /* Detect and prevent integer overflow.
3496        * The preprocessor guard addresses the "always false" warning
3497        * from -Wtype-limits on platforms where
3498        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3499 #if UINT_MAX >= SIZE_MAX
3500       if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3501         /* Restore actual size of memory in m_nsAtts */
3502         parser->m_nsAttsPower = oldNsAttsPower;
3503         return XML_ERROR_NO_MEMORY;
3504       }
3505 #endif
3506 
3507       temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3508                                nsAttsSize * sizeof(NS_ATT));
3509       if (! temp) {
3510         /* Restore actual size of memory in m_nsAtts */
3511         parser->m_nsAttsPower = oldNsAttsPower;
3512         return XML_ERROR_NO_MEMORY;
3513       }
3514       parser->m_nsAtts = temp;
3515       version = 0; /* force re-initialization of m_nsAtts hash table */
3516     }
3517     /* using a version flag saves us from initializing m_nsAtts every time */
3518     if (! version) { /* initialize version flags when version wraps around */
3519       version = INIT_ATTS_VERSION;
3520       for (j = nsAttsSize; j != 0;)
3521         parser->m_nsAtts[--j].version = version;
3522     }
3523     parser->m_nsAttsVersion = --version;
3524 
3525     /* expand prefixed names and check for duplicates */
3526     for (; i < attIndex; i += 2) {
3527       const XML_Char *s = appAtts[i];
3528       if (s[-1] == 2) { /* prefixed */
3529         ATTRIBUTE_ID *id;
3530         const BINDING *b;
3531         unsigned long uriHash;
3532         struct siphash sip_state;
3533         struct sipkey sip_key;
3534 
3535         copy_salt_to_sipkey(parser, &sip_key);
3536         sip24_init(&sip_state, &sip_key);
3537 
3538         ((XML_Char *)s)[-1] = 0; /* clear flag */
3539         id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3540         if (! id || ! id->prefix) {
3541           /* This code is walking through the appAtts array, dealing
3542            * with (in this case) a prefixed attribute name.  To be in
3543            * the array, the attribute must have already been bound, so
3544            * has to have passed through the hash table lookup once
3545            * already.  That implies that an entry for it already
3546            * exists, so the lookup above will return a pointer to
3547            * already allocated memory.  There is no opportunaity for
3548            * the allocator to fail, so the condition above cannot be
3549            * fulfilled.
3550            *
3551            * Since it is difficult to be certain that the above
3552            * analysis is complete, we retain the test and merely
3553            * remove the code from coverage tests.
3554            */
3555           return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3556         }
3557         b = id->prefix->binding;
3558         if (! b)
3559           return XML_ERROR_UNBOUND_PREFIX;
3560 
3561         for (j = 0; j < b->uriLen; j++) {
3562           const XML_Char c = b->uri[j];
3563           if (! poolAppendChar(&parser->m_tempPool, c))
3564             return XML_ERROR_NO_MEMORY;
3565         }
3566 
3567         sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3568 
3569         while (*s++ != XML_T(ASCII_COLON))
3570           ;
3571 
3572         sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3573 
3574         do { /* copies null terminator */
3575           if (! poolAppendChar(&parser->m_tempPool, *s))
3576             return XML_ERROR_NO_MEMORY;
3577         } while (*s++);
3578 
3579         uriHash = (unsigned long)sip24_final(&sip_state);
3580 
3581         { /* Check hash table for duplicate of expanded name (uriName).
3582              Derived from code in lookup(parser, HASH_TABLE *table, ...).
3583           */
3584           unsigned char step = 0;
3585           unsigned long mask = nsAttsSize - 1;
3586           j = uriHash & mask; /* index into hash table */
3587           while (parser->m_nsAtts[j].version == version) {
3588             /* for speed we compare stored hash values first */
3589             if (uriHash == parser->m_nsAtts[j].hash) {
3590               const XML_Char *s1 = poolStart(&parser->m_tempPool);
3591               const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3592               /* s1 is null terminated, but not s2 */
3593               for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3594                 ;
3595               if (*s1 == 0)
3596                 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3597             }
3598             if (! step)
3599               step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3600             j < step ? (j += nsAttsSize - step) : (j -= step);
3601           }
3602         }
3603 
3604         if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3605           parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3606           s = b->prefix->name;
3607           do {
3608             if (! poolAppendChar(&parser->m_tempPool, *s))
3609               return XML_ERROR_NO_MEMORY;
3610           } while (*s++);
3611         }
3612 
3613         /* store expanded name in attribute list */
3614         s = poolStart(&parser->m_tempPool);
3615         poolFinish(&parser->m_tempPool);
3616         appAtts[i] = s;
3617 
3618         /* fill empty slot with new version, uriName and hash value */
3619         parser->m_nsAtts[j].version = version;
3620         parser->m_nsAtts[j].hash = uriHash;
3621         parser->m_nsAtts[j].uriName = s;
3622 
3623         if (! --nPrefixes) {
3624           i += 2;
3625           break;
3626         }
3627       } else                     /* not prefixed */
3628         ((XML_Char *)s)[-1] = 0; /* clear flag */
3629     }
3630   }
3631   /* clear flags for the remaining attributes */
3632   for (; i < attIndex; i += 2)
3633     ((XML_Char *)(appAtts[i]))[-1] = 0;
3634   for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3635     binding->attId->name[-1] = 0;
3636 
3637   if (! parser->m_ns)
3638     return XML_ERROR_NONE;
3639 
3640   /* expand the element type name */
3641   if (elementType->prefix) {
3642     binding = elementType->prefix->binding;
3643     if (! binding)
3644       return XML_ERROR_UNBOUND_PREFIX;
3645     localPart = tagNamePtr->str;
3646     while (*localPart++ != XML_T(ASCII_COLON))
3647       ;
3648   } else if (dtd->defaultPrefix.binding) {
3649     binding = dtd->defaultPrefix.binding;
3650     localPart = tagNamePtr->str;
3651   } else
3652     return XML_ERROR_NONE;
3653   prefixLen = 0;
3654   if (parser->m_ns_triplets && binding->prefix->name) {
3655     for (; binding->prefix->name[prefixLen++];)
3656       ; /* prefixLen includes null terminator */
3657   }
3658   tagNamePtr->localPart = localPart;
3659   tagNamePtr->uriLen = binding->uriLen;
3660   tagNamePtr->prefix = binding->prefix->name;
3661   tagNamePtr->prefixLen = prefixLen;
3662   for (i = 0; localPart[i++];)
3663     ; /* i includes null terminator */
3664 
3665   /* Detect and prevent integer overflow */
3666   if (binding->uriLen > INT_MAX - prefixLen
3667       || i > INT_MAX - (binding->uriLen + prefixLen)) {
3668     return XML_ERROR_NO_MEMORY;
3669   }
3670 
3671   n = i + binding->uriLen + prefixLen;
3672   if (n > binding->uriAlloc) {
3673     TAG *p;
3674 
3675     /* Detect and prevent integer overflow */
3676     if (n > INT_MAX - EXPAND_SPARE) {
3677       return XML_ERROR_NO_MEMORY;
3678     }
3679     /* Detect and prevent integer overflow.
3680      * The preprocessor guard addresses the "always false" warning
3681      * from -Wtype-limits on platforms where
3682      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3683 #if UINT_MAX >= SIZE_MAX
3684     if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3685       return XML_ERROR_NO_MEMORY;
3686     }
3687 #endif
3688 
3689     uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3690     if (! uri)
3691       return XML_ERROR_NO_MEMORY;
3692     binding->uriAlloc = n + EXPAND_SPARE;
3693     memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3694     for (p = parser->m_tagStack; p; p = p->parent)
3695       if (p->name.str == binding->uri)
3696         p->name.str = uri;
3697     FREE(parser, binding->uri);
3698     binding->uri = uri;
3699   }
3700   /* if m_namespaceSeparator != '\0' then uri includes it already */
3701   uri = binding->uri + binding->uriLen;
3702   memcpy(uri, localPart, i * sizeof(XML_Char));
3703   /* we always have a namespace separator between localPart and prefix */
3704   if (prefixLen) {
3705     uri += i - 1;
3706     *uri = parser->m_namespaceSeparator; /* replace null terminator */
3707     memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3708   }
3709   tagNamePtr->str = binding->uri;
3710   return XML_ERROR_NONE;
3711 }
3712 
3713 /* addBinding() overwrites the value of prefix->binding without checking.
3714    Therefore one must keep track of the old value outside of addBinding().
3715 */
3716 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3717 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3718            const XML_Char *uri, BINDING **bindingsPtr) {
3719   static const XML_Char xmlNamespace[]
3720       = {ASCII_h,      ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,
3721          ASCII_SLASH,  ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,
3722          ASCII_PERIOD, ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,
3723          ASCII_r,      ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,
3724          ASCII_L,      ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,
3725          ASCII_8,      ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,
3726          ASCII_e,      ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,
3727          ASCII_e,      '\0'};
3728   static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3729   static const XML_Char xmlnsNamespace[]
3730       = {ASCII_h,     ASCII_t,      ASCII_t, ASCII_p, ASCII_COLON,  ASCII_SLASH,
3731          ASCII_SLASH, ASCII_w,      ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3732          ASCII_3,     ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,      ASCII_SLASH,
3733          ASCII_2,     ASCII_0,      ASCII_0, ASCII_0, ASCII_SLASH,  ASCII_x,
3734          ASCII_m,     ASCII_l,      ASCII_n, ASCII_s, ASCII_SLASH,  '\0'};
3735   static const int xmlnsLen
3736       = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3737 
3738   XML_Bool mustBeXML = XML_FALSE;
3739   XML_Bool isXML = XML_TRUE;
3740   XML_Bool isXMLNS = XML_TRUE;
3741 
3742   BINDING *b;
3743   int len;
3744 
3745   /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3746   if (*uri == XML_T('\0') && prefix->name)
3747     return XML_ERROR_UNDECLARING_PREFIX;
3748 
3749   if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3750       && prefix->name[1] == XML_T(ASCII_m)
3751       && prefix->name[2] == XML_T(ASCII_l)) {
3752     /* Not allowed to bind xmlns */
3753     if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3754         && prefix->name[5] == XML_T('\0'))
3755       return XML_ERROR_RESERVED_PREFIX_XMLNS;
3756 
3757     if (prefix->name[3] == XML_T('\0'))
3758       mustBeXML = XML_TRUE;
3759   }
3760 
3761   for (len = 0; uri[len]; len++) {
3762     if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3763       isXML = XML_FALSE;
3764 
3765     if (! mustBeXML && isXMLNS
3766         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3767       isXMLNS = XML_FALSE;
3768 
3769     // NOTE: While Expat does not validate namespace URIs against RFC 3986,
3770     //       we have to at least make sure that the XML processor on top of
3771     //       Expat (that is splitting tag names by namespace separator into
3772     //       2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
3773     //       by an attacker putting additional namespace separator characters
3774     //       into namespace declarations.  That would be ambiguous and not to
3775     //       be expected.
3776     if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
3777       return XML_ERROR_SYNTAX;
3778     }
3779   }
3780   isXML = isXML && len == xmlLen;
3781   isXMLNS = isXMLNS && len == xmlnsLen;
3782 
3783   if (mustBeXML != isXML)
3784     return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3785                      : XML_ERROR_RESERVED_NAMESPACE_URI;
3786 
3787   if (isXMLNS)
3788     return XML_ERROR_RESERVED_NAMESPACE_URI;
3789 
3790   if (parser->m_namespaceSeparator)
3791     len++;
3792   if (parser->m_freeBindingList) {
3793     b = parser->m_freeBindingList;
3794     if (len > b->uriAlloc) {
3795       /* Detect and prevent integer overflow */
3796       if (len > INT_MAX - EXPAND_SPARE) {
3797         return XML_ERROR_NO_MEMORY;
3798       }
3799 
3800       /* Detect and prevent integer overflow.
3801        * The preprocessor guard addresses the "always false" warning
3802        * from -Wtype-limits on platforms where
3803        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3804 #if UINT_MAX >= SIZE_MAX
3805       if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3806         return XML_ERROR_NO_MEMORY;
3807       }
3808 #endif
3809 
3810       XML_Char *temp = (XML_Char *)REALLOC(
3811           parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
3812       if (temp == NULL)
3813         return XML_ERROR_NO_MEMORY;
3814       b->uri = temp;
3815       b->uriAlloc = len + EXPAND_SPARE;
3816     }
3817     parser->m_freeBindingList = b->nextTagBinding;
3818   } else {
3819     b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3820     if (! b)
3821       return XML_ERROR_NO_MEMORY;
3822 
3823     /* Detect and prevent integer overflow */
3824     if (len > INT_MAX - EXPAND_SPARE) {
3825       return XML_ERROR_NO_MEMORY;
3826     }
3827     /* Detect and prevent integer overflow.
3828      * The preprocessor guard addresses the "always false" warning
3829      * from -Wtype-limits on platforms where
3830      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3831 #if UINT_MAX >= SIZE_MAX
3832     if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3833       return XML_ERROR_NO_MEMORY;
3834     }
3835 #endif
3836 
3837     b->uri
3838         = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3839     if (! b->uri) {
3840       FREE(parser, b);
3841       return XML_ERROR_NO_MEMORY;
3842     }
3843     b->uriAlloc = len + EXPAND_SPARE;
3844   }
3845   b->uriLen = len;
3846   memcpy(b->uri, uri, len * sizeof(XML_Char));
3847   if (parser->m_namespaceSeparator)
3848     b->uri[len - 1] = parser->m_namespaceSeparator;
3849   b->prefix = prefix;
3850   b->attId = attId;
3851   b->prevPrefixBinding = prefix->binding;
3852   /* NULL binding when default namespace undeclared */
3853   if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3854     prefix->binding = NULL;
3855   else
3856     prefix->binding = b;
3857   b->nextTagBinding = *bindingsPtr;
3858   *bindingsPtr = b;
3859   /* if attId == NULL then we are not starting a namespace scope */
3860   if (attId && parser->m_startNamespaceDeclHandler)
3861     parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3862                                         prefix->binding ? uri : 0);
3863   return XML_ERROR_NONE;
3864 }
3865 
3866 /* The idea here is to avoid using stack for each CDATA section when
3867    the whole file is parsed with one call.
3868 */
3869 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3870 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
3871                       const char **endPtr) {
3872   enum XML_Error result = doCdataSection(
3873       parser, parser->m_encoding, &start, end, endPtr,
3874       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
3875   if (result != XML_ERROR_NONE)
3876     return result;
3877   if (start) {
3878     if (parser->m_parentParser) { /* we are parsing an external entity */
3879       parser->m_processor = externalEntityContentProcessor;
3880       return externalEntityContentProcessor(parser, start, end, endPtr);
3881     } else {
3882       parser->m_processor = contentProcessor;
3883       return contentProcessor(parser, start, end, endPtr);
3884     }
3885   }
3886   return result;
3887 }
3888 
3889 /* startPtr gets set to non-null if the section is closed, and to null if
3890    the section is not yet closed.
3891 */
3892 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)3893 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
3894                const char *end, const char **nextPtr, XML_Bool haveMore,
3895                enum XML_Account account) {
3896   const char *s = *startPtr;
3897   const char **eventPP;
3898   const char **eventEndPP;
3899   if (enc == parser->m_encoding) {
3900     eventPP = &parser->m_eventPtr;
3901     *eventPP = s;
3902     eventEndPP = &parser->m_eventEndPtr;
3903   } else {
3904     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3905     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3906   }
3907   *eventPP = s;
3908   *startPtr = NULL;
3909 
3910   for (;;) {
3911     const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
3912     int tok = XmlCdataSectionTok(enc, s, end, &next);
3913 #ifdef XML_DTD
3914     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
3915       accountingOnAbort(parser);
3916       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3917     }
3918 #else
3919     UNUSED_P(account);
3920 #endif
3921     *eventEndPP = next;
3922     switch (tok) {
3923     case XML_TOK_CDATA_SECT_CLOSE:
3924       if (parser->m_endCdataSectionHandler)
3925         parser->m_endCdataSectionHandler(parser->m_handlerArg);
3926       /* BEGIN disabled code */
3927       /* see comment under XML_TOK_CDATA_SECT_OPEN */
3928       else if (0 && parser->m_characterDataHandler)
3929         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3930                                        0);
3931       /* END disabled code */
3932       else if (parser->m_defaultHandler)
3933         reportDefault(parser, enc, s, next);
3934       *startPtr = next;
3935       *nextPtr = next;
3936       if (parser->m_parsingStatus.parsing == XML_FINISHED)
3937         return XML_ERROR_ABORTED;
3938       else
3939         return XML_ERROR_NONE;
3940     case XML_TOK_DATA_NEWLINE:
3941       if (parser->m_characterDataHandler) {
3942         XML_Char c = 0xA;
3943         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3944       } else if (parser->m_defaultHandler)
3945         reportDefault(parser, enc, s, next);
3946       break;
3947     case XML_TOK_DATA_CHARS: {
3948       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3949       if (charDataHandler) {
3950         if (MUST_CONVERT(enc, s)) {
3951           for (;;) {
3952             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3953             const enum XML_Convert_Result convert_res = XmlConvert(
3954                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3955             *eventEndPP = next;
3956             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3957                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3958             if ((convert_res == XML_CONVERT_COMPLETED)
3959                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3960               break;
3961             *eventPP = s;
3962           }
3963         } else
3964           charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3965                           (int)((XML_Char *)next - (XML_Char *)s));
3966       } else if (parser->m_defaultHandler)
3967         reportDefault(parser, enc, s, next);
3968     } break;
3969     case XML_TOK_INVALID:
3970       *eventPP = next;
3971       return XML_ERROR_INVALID_TOKEN;
3972     case XML_TOK_PARTIAL_CHAR:
3973       if (haveMore) {
3974         *nextPtr = s;
3975         return XML_ERROR_NONE;
3976       }
3977       return XML_ERROR_PARTIAL_CHAR;
3978     case XML_TOK_PARTIAL:
3979     case XML_TOK_NONE:
3980       if (haveMore) {
3981         *nextPtr = s;
3982         return XML_ERROR_NONE;
3983       }
3984       return XML_ERROR_UNCLOSED_CDATA_SECTION;
3985     default:
3986       /* Every token returned by XmlCdataSectionTok() has its own
3987        * explicit case, so this default case will never be executed.
3988        * We retain it as a safety net and exclude it from the coverage
3989        * statistics.
3990        *
3991        * LCOV_EXCL_START
3992        */
3993       *eventPP = next;
3994       return XML_ERROR_UNEXPECTED_STATE;
3995       /* LCOV_EXCL_STOP */
3996     }
3997 
3998     *eventPP = s = next;
3999     switch (parser->m_parsingStatus.parsing) {
4000     case XML_SUSPENDED:
4001       *nextPtr = next;
4002       return XML_ERROR_NONE;
4003     case XML_FINISHED:
4004       return XML_ERROR_ABORTED;
4005     default:;
4006     }
4007   }
4008   /* not reached */
4009 }
4010 
4011 #ifdef XML_DTD
4012 
4013 /* The idea here is to avoid using stack for each IGNORE section when
4014    the whole file is parsed with one call.
4015 */
4016 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4017 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4018                        const char **endPtr) {
4019   enum XML_Error result
4020       = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4021                         (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4022   if (result != XML_ERROR_NONE)
4023     return result;
4024   if (start) {
4025     parser->m_processor = prologProcessor;
4026     return prologProcessor(parser, start, end, endPtr);
4027   }
4028   return result;
4029 }
4030 
4031 /* startPtr gets set to non-null is the section is closed, and to null
4032    if the section is not yet closed.
4033 */
4034 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4035 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4036                 const char *end, const char **nextPtr, XML_Bool haveMore) {
4037   const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4038   int tok;
4039   const char *s = *startPtr;
4040   const char **eventPP;
4041   const char **eventEndPP;
4042   if (enc == parser->m_encoding) {
4043     eventPP = &parser->m_eventPtr;
4044     *eventPP = s;
4045     eventEndPP = &parser->m_eventEndPtr;
4046   } else {
4047     /* It's not entirely clear, but it seems the following two lines
4048      * of code cannot be executed.  The only occasions on which 'enc'
4049      * is not 'encoding' are when this function is called
4050      * from the internal entity processing, and IGNORE sections are an
4051      * error in internal entities.
4052      *
4053      * Since it really isn't clear that this is true, we keep the code
4054      * and just remove it from our coverage tests.
4055      *
4056      * LCOV_EXCL_START
4057      */
4058     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4059     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4060     /* LCOV_EXCL_STOP */
4061   }
4062   *eventPP = s;
4063   *startPtr = NULL;
4064   tok = XmlIgnoreSectionTok(enc, s, end, &next);
4065 #  ifdef XML_DTD
4066   if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4067                                 XML_ACCOUNT_DIRECT)) {
4068     accountingOnAbort(parser);
4069     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4070   }
4071 #  endif
4072   *eventEndPP = next;
4073   switch (tok) {
4074   case XML_TOK_IGNORE_SECT:
4075     if (parser->m_defaultHandler)
4076       reportDefault(parser, enc, s, next);
4077     *startPtr = next;
4078     *nextPtr = next;
4079     if (parser->m_parsingStatus.parsing == XML_FINISHED)
4080       return XML_ERROR_ABORTED;
4081     else
4082       return XML_ERROR_NONE;
4083   case XML_TOK_INVALID:
4084     *eventPP = next;
4085     return XML_ERROR_INVALID_TOKEN;
4086   case XML_TOK_PARTIAL_CHAR:
4087     if (haveMore) {
4088       *nextPtr = s;
4089       return XML_ERROR_NONE;
4090     }
4091     return XML_ERROR_PARTIAL_CHAR;
4092   case XML_TOK_PARTIAL:
4093   case XML_TOK_NONE:
4094     if (haveMore) {
4095       *nextPtr = s;
4096       return XML_ERROR_NONE;
4097     }
4098     return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4099   default:
4100     /* All of the tokens that XmlIgnoreSectionTok() returns have
4101      * explicit cases to handle them, so this default case is never
4102      * executed.  We keep it as a safety net anyway, and remove it
4103      * from our test coverage statistics.
4104      *
4105      * LCOV_EXCL_START
4106      */
4107     *eventPP = next;
4108     return XML_ERROR_UNEXPECTED_STATE;
4109     /* LCOV_EXCL_STOP */
4110   }
4111   /* not reached */
4112 }
4113 
4114 #endif /* XML_DTD */
4115 
4116 static enum XML_Error
initializeEncoding(XML_Parser parser)4117 initializeEncoding(XML_Parser parser) {
4118   const char *s;
4119 #ifdef XML_UNICODE
4120   char encodingBuf[128];
4121   /* See comments abount `protoclEncodingName` in parserInit() */
4122   if (! parser->m_protocolEncodingName)
4123     s = NULL;
4124   else {
4125     int i;
4126     for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4127       if (i == sizeof(encodingBuf) - 1
4128           || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4129         encodingBuf[0] = '\0';
4130         break;
4131       }
4132       encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4133     }
4134     encodingBuf[i] = '\0';
4135     s = encodingBuf;
4136   }
4137 #else
4138   s = parser->m_protocolEncodingName;
4139 #endif
4140   if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4141           &parser->m_initEncoding, &parser->m_encoding, s))
4142     return XML_ERROR_NONE;
4143   return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4144 }
4145 
4146 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4147 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4148                const char *next) {
4149   const char *encodingName = NULL;
4150   const XML_Char *storedEncName = NULL;
4151   const ENCODING *newEncoding = NULL;
4152   const char *version = NULL;
4153   const char *versionend;
4154   const XML_Char *storedversion = NULL;
4155   int standalone = -1;
4156 
4157 #ifdef XML_DTD
4158   if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4159                                 XML_ACCOUNT_DIRECT)) {
4160     accountingOnAbort(parser);
4161     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4162   }
4163 #endif
4164 
4165   if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4166           isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4167           &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4168     if (isGeneralTextEntity)
4169       return XML_ERROR_TEXT_DECL;
4170     else
4171       return XML_ERROR_XML_DECL;
4172   }
4173   if (! isGeneralTextEntity && standalone == 1) {
4174     parser->m_dtd->standalone = XML_TRUE;
4175 #ifdef XML_DTD
4176     if (parser->m_paramEntityParsing
4177         == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4178       parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4179 #endif /* XML_DTD */
4180   }
4181   if (parser->m_xmlDeclHandler) {
4182     if (encodingName != NULL) {
4183       storedEncName = poolStoreString(
4184           &parser->m_temp2Pool, parser->m_encoding, encodingName,
4185           encodingName + XmlNameLength(parser->m_encoding, encodingName));
4186       if (! storedEncName)
4187         return XML_ERROR_NO_MEMORY;
4188       poolFinish(&parser->m_temp2Pool);
4189     }
4190     if (version) {
4191       storedversion
4192           = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4193                             versionend - parser->m_encoding->minBytesPerChar);
4194       if (! storedversion)
4195         return XML_ERROR_NO_MEMORY;
4196     }
4197     parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4198                              standalone);
4199   } else if (parser->m_defaultHandler)
4200     reportDefault(parser, parser->m_encoding, s, next);
4201   if (parser->m_protocolEncodingName == NULL) {
4202     if (newEncoding) {
4203       /* Check that the specified encoding does not conflict with what
4204        * the parser has already deduced.  Do we have the same number
4205        * of bytes in the smallest representation of a character?  If
4206        * this is UTF-16, is it the same endianness?
4207        */
4208       if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4209           || (newEncoding->minBytesPerChar == 2
4210               && newEncoding != parser->m_encoding)) {
4211         parser->m_eventPtr = encodingName;
4212         return XML_ERROR_INCORRECT_ENCODING;
4213       }
4214       parser->m_encoding = newEncoding;
4215     } else if (encodingName) {
4216       enum XML_Error result;
4217       if (! storedEncName) {
4218         storedEncName = poolStoreString(
4219             &parser->m_temp2Pool, parser->m_encoding, encodingName,
4220             encodingName + XmlNameLength(parser->m_encoding, encodingName));
4221         if (! storedEncName)
4222           return XML_ERROR_NO_MEMORY;
4223       }
4224       result = handleUnknownEncoding(parser, storedEncName);
4225       poolClear(&parser->m_temp2Pool);
4226       if (result == XML_ERROR_UNKNOWN_ENCODING)
4227         parser->m_eventPtr = encodingName;
4228       return result;
4229     }
4230   }
4231 
4232   if (storedEncName || storedversion)
4233     poolClear(&parser->m_temp2Pool);
4234 
4235   return XML_ERROR_NONE;
4236 }
4237 
4238 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4239 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4240   if (parser->m_unknownEncodingHandler) {
4241     XML_Encoding info;
4242     int i;
4243     for (i = 0; i < 256; i++)
4244       info.map[i] = -1;
4245     info.convert = NULL;
4246     info.data = NULL;
4247     info.release = NULL;
4248     if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4249                                          encodingName, &info)) {
4250       ENCODING *enc;
4251       parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4252       if (! parser->m_unknownEncodingMem) {
4253         if (info.release)
4254           info.release(info.data);
4255         return XML_ERROR_NO_MEMORY;
4256       }
4257       enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4258           parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4259       if (enc) {
4260         parser->m_unknownEncodingData = info.data;
4261         parser->m_unknownEncodingRelease = info.release;
4262         parser->m_encoding = enc;
4263         return XML_ERROR_NONE;
4264       }
4265     }
4266     if (info.release != NULL)
4267       info.release(info.data);
4268   }
4269   return XML_ERROR_UNKNOWN_ENCODING;
4270 }
4271 
4272 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4273 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4274                     const char **nextPtr) {
4275   enum XML_Error result = initializeEncoding(parser);
4276   if (result != XML_ERROR_NONE)
4277     return result;
4278   parser->m_processor = prologProcessor;
4279   return prologProcessor(parser, s, end, nextPtr);
4280 }
4281 
4282 #ifdef XML_DTD
4283 
4284 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4285 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4286                             const char **nextPtr) {
4287   enum XML_Error result = initializeEncoding(parser);
4288   if (result != XML_ERROR_NONE)
4289     return result;
4290 
4291   /* we know now that XML_Parse(Buffer) has been called,
4292      so we consider the external parameter entity read */
4293   parser->m_dtd->paramEntityRead = XML_TRUE;
4294 
4295   if (parser->m_prologState.inEntityValue) {
4296     parser->m_processor = entityValueInitProcessor;
4297     return entityValueInitProcessor(parser, s, end, nextPtr);
4298   } else {
4299     parser->m_processor = externalParEntProcessor;
4300     return externalParEntProcessor(parser, s, end, nextPtr);
4301   }
4302 }
4303 
4304 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4305 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4306                          const char **nextPtr) {
4307   int tok;
4308   const char *start = s;
4309   const char *next = start;
4310   parser->m_eventPtr = start;
4311 
4312   for (;;) {
4313     tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4314     /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4315              - storeEntityValue
4316              - processXmlDecl
4317     */
4318     parser->m_eventEndPtr = next;
4319     if (tok <= 0) {
4320       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4321         *nextPtr = s;
4322         return XML_ERROR_NONE;
4323       }
4324       switch (tok) {
4325       case XML_TOK_INVALID:
4326         return XML_ERROR_INVALID_TOKEN;
4327       case XML_TOK_PARTIAL:
4328         return XML_ERROR_UNCLOSED_TOKEN;
4329       case XML_TOK_PARTIAL_CHAR:
4330         return XML_ERROR_PARTIAL_CHAR;
4331       case XML_TOK_NONE: /* start == end */
4332       default:
4333         break;
4334       }
4335       /* found end of entity value - can store it now */
4336       return storeEntityValue(parser, parser->m_encoding, s, end,
4337                               XML_ACCOUNT_DIRECT);
4338     } else if (tok == XML_TOK_XML_DECL) {
4339       enum XML_Error result;
4340       result = processXmlDecl(parser, 0, start, next);
4341       if (result != XML_ERROR_NONE)
4342         return result;
4343       /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED.  For
4344        * that to happen, a parameter entity parsing handler must have attempted
4345        * to suspend the parser, which fails and raises an error.  The parser can
4346        * be aborted, but can't be suspended.
4347        */
4348       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4349         return XML_ERROR_ABORTED;
4350       *nextPtr = next;
4351       /* stop scanning for text declaration - we found one */
4352       parser->m_processor = entityValueProcessor;
4353       return entityValueProcessor(parser, next, end, nextPtr);
4354     }
4355     /* If we are at the end of the buffer, this would cause XmlPrologTok to
4356        return XML_TOK_NONE on the next call, which would then cause the
4357        function to exit with *nextPtr set to s - that is what we want for other
4358        tokens, but not for the BOM - we would rather like to skip it;
4359        then, when this routine is entered the next time, XmlPrologTok will
4360        return XML_TOK_INVALID, since the BOM is still in the buffer
4361     */
4362     else if (tok == XML_TOK_BOM && next == end
4363              && ! parser->m_parsingStatus.finalBuffer) {
4364 #  ifdef XML_DTD
4365       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4366                                     XML_ACCOUNT_DIRECT)) {
4367         accountingOnAbort(parser);
4368         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4369       }
4370 #  endif
4371 
4372       *nextPtr = next;
4373       return XML_ERROR_NONE;
4374     }
4375     /* If we get this token, we have the start of what might be a
4376        normal tag, but not a declaration (i.e. it doesn't begin with
4377        "<!").  In a DTD context, that isn't legal.
4378     */
4379     else if (tok == XML_TOK_INSTANCE_START) {
4380       *nextPtr = next;
4381       return XML_ERROR_SYNTAX;
4382     }
4383     start = next;
4384     parser->m_eventPtr = start;
4385   }
4386 }
4387 
4388 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4389 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4390                         const char **nextPtr) {
4391   const char *next = s;
4392   int tok;
4393 
4394   tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4395   if (tok <= 0) {
4396     if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4397       *nextPtr = s;
4398       return XML_ERROR_NONE;
4399     }
4400     switch (tok) {
4401     case XML_TOK_INVALID:
4402       return XML_ERROR_INVALID_TOKEN;
4403     case XML_TOK_PARTIAL:
4404       return XML_ERROR_UNCLOSED_TOKEN;
4405     case XML_TOK_PARTIAL_CHAR:
4406       return XML_ERROR_PARTIAL_CHAR;
4407     case XML_TOK_NONE: /* start == end */
4408     default:
4409       break;
4410     }
4411   }
4412   /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4413      However, when parsing an external subset, doProlog will not accept a BOM
4414      as valid, and report a syntax error, so we have to skip the BOM, and
4415      account for the BOM bytes.
4416   */
4417   else if (tok == XML_TOK_BOM) {
4418     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4419                                   XML_ACCOUNT_DIRECT)) {
4420       accountingOnAbort(parser);
4421       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4422     }
4423 
4424     s = next;
4425     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4426   }
4427 
4428   parser->m_processor = prologProcessor;
4429   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4430                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4431                   XML_ACCOUNT_DIRECT);
4432 }
4433 
4434 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4435 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4436                      const char **nextPtr) {
4437   const char *start = s;
4438   const char *next = s;
4439   const ENCODING *enc = parser->m_encoding;
4440   int tok;
4441 
4442   for (;;) {
4443     tok = XmlPrologTok(enc, start, end, &next);
4444     /* Note: These bytes are accounted later in:
4445              - storeEntityValue
4446     */
4447     if (tok <= 0) {
4448       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4449         *nextPtr = s;
4450         return XML_ERROR_NONE;
4451       }
4452       switch (tok) {
4453       case XML_TOK_INVALID:
4454         return XML_ERROR_INVALID_TOKEN;
4455       case XML_TOK_PARTIAL:
4456         return XML_ERROR_UNCLOSED_TOKEN;
4457       case XML_TOK_PARTIAL_CHAR:
4458         return XML_ERROR_PARTIAL_CHAR;
4459       case XML_TOK_NONE: /* start == end */
4460       default:
4461         break;
4462       }
4463       /* found end of entity value - can store it now */
4464       return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4465     }
4466     start = next;
4467   }
4468 }
4469 
4470 #endif /* XML_DTD */
4471 
4472 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4473 prologProcessor(XML_Parser parser, const char *s, const char *end,
4474                 const char **nextPtr) {
4475   const char *next = s;
4476   int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4477   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4478                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4479                   XML_ACCOUNT_DIRECT);
4480 }
4481 
4482 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4483 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4484          int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4485          XML_Bool allowClosingDoctype, enum XML_Account account) {
4486 #ifdef XML_DTD
4487   static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4488 #endif /* XML_DTD */
4489   static const XML_Char atypeCDATA[]
4490       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4491   static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4492   static const XML_Char atypeIDREF[]
4493       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4494   static const XML_Char atypeIDREFS[]
4495       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4496   static const XML_Char atypeENTITY[]
4497       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4498   static const XML_Char atypeENTITIES[]
4499       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4500          ASCII_I, ASCII_E, ASCII_S, '\0'};
4501   static const XML_Char atypeNMTOKEN[]
4502       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4503   static const XML_Char atypeNMTOKENS[]
4504       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4505          ASCII_E, ASCII_N, ASCII_S, '\0'};
4506   static const XML_Char notationPrefix[]
4507       = {ASCII_N, ASCII_O, ASCII_T, ASCII_A,      ASCII_T,
4508          ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4509   static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4510   static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4511 
4512 #ifndef XML_DTD
4513   UNUSED_P(account);
4514 #endif
4515 
4516   /* save one level of indirection */
4517   DTD *const dtd = parser->m_dtd;
4518 
4519   const char **eventPP;
4520   const char **eventEndPP;
4521   enum XML_Content_Quant quant;
4522 
4523   if (enc == parser->m_encoding) {
4524     eventPP = &parser->m_eventPtr;
4525     eventEndPP = &parser->m_eventEndPtr;
4526   } else {
4527     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4528     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4529   }
4530 
4531   for (;;) {
4532     int role;
4533     XML_Bool handleDefault = XML_TRUE;
4534     *eventPP = s;
4535     *eventEndPP = next;
4536     if (tok <= 0) {
4537       if (haveMore && tok != XML_TOK_INVALID) {
4538         *nextPtr = s;
4539         return XML_ERROR_NONE;
4540       }
4541       switch (tok) {
4542       case XML_TOK_INVALID:
4543         *eventPP = next;
4544         return XML_ERROR_INVALID_TOKEN;
4545       case XML_TOK_PARTIAL:
4546         return XML_ERROR_UNCLOSED_TOKEN;
4547       case XML_TOK_PARTIAL_CHAR:
4548         return XML_ERROR_PARTIAL_CHAR;
4549       case -XML_TOK_PROLOG_S:
4550         tok = -tok;
4551         break;
4552       case XML_TOK_NONE:
4553 #ifdef XML_DTD
4554         /* for internal PE NOT referenced between declarations */
4555         if (enc != parser->m_encoding
4556             && ! parser->m_openInternalEntities->betweenDecl) {
4557           *nextPtr = s;
4558           return XML_ERROR_NONE;
4559         }
4560         /* WFC: PE Between Declarations - must check that PE contains
4561            complete markup, not only for external PEs, but also for
4562            internal PEs if the reference occurs between declarations.
4563         */
4564         if (parser->m_isParamEntity || enc != parser->m_encoding) {
4565           if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4566               == XML_ROLE_ERROR)
4567             return XML_ERROR_INCOMPLETE_PE;
4568           *nextPtr = s;
4569           return XML_ERROR_NONE;
4570         }
4571 #endif /* XML_DTD */
4572         return XML_ERROR_NO_ELEMENTS;
4573       default:
4574         tok = -tok;
4575         next = end;
4576         break;
4577       }
4578     }
4579     role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4580 #ifdef XML_DTD
4581     switch (role) {
4582     case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4583     case XML_ROLE_XML_DECL:       // bytes accounted in processXmlDecl
4584     case XML_ROLE_TEXT_DECL:      // bytes accounted in processXmlDecl
4585       break;
4586     default:
4587       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4588         accountingOnAbort(parser);
4589         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4590       }
4591     }
4592 #endif
4593     switch (role) {
4594     case XML_ROLE_XML_DECL: {
4595       enum XML_Error result = processXmlDecl(parser, 0, s, next);
4596       if (result != XML_ERROR_NONE)
4597         return result;
4598       enc = parser->m_encoding;
4599       handleDefault = XML_FALSE;
4600     } break;
4601     case XML_ROLE_DOCTYPE_NAME:
4602       if (parser->m_startDoctypeDeclHandler) {
4603         parser->m_doctypeName
4604             = poolStoreString(&parser->m_tempPool, enc, s, next);
4605         if (! parser->m_doctypeName)
4606           return XML_ERROR_NO_MEMORY;
4607         poolFinish(&parser->m_tempPool);
4608         parser->m_doctypePubid = NULL;
4609         handleDefault = XML_FALSE;
4610       }
4611       parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4612       break;
4613     case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4614       if (parser->m_startDoctypeDeclHandler) {
4615         parser->m_startDoctypeDeclHandler(
4616             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4617             parser->m_doctypePubid, 1);
4618         parser->m_doctypeName = NULL;
4619         poolClear(&parser->m_tempPool);
4620         handleDefault = XML_FALSE;
4621       }
4622       break;
4623 #ifdef XML_DTD
4624     case XML_ROLE_TEXT_DECL: {
4625       enum XML_Error result = processXmlDecl(parser, 1, s, next);
4626       if (result != XML_ERROR_NONE)
4627         return result;
4628       enc = parser->m_encoding;
4629       handleDefault = XML_FALSE;
4630     } break;
4631 #endif /* XML_DTD */
4632     case XML_ROLE_DOCTYPE_PUBLIC_ID:
4633 #ifdef XML_DTD
4634       parser->m_useForeignDTD = XML_FALSE;
4635       parser->m_declEntity = (ENTITY *)lookup(
4636           parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4637       if (! parser->m_declEntity)
4638         return XML_ERROR_NO_MEMORY;
4639 #endif /* XML_DTD */
4640       dtd->hasParamEntityRefs = XML_TRUE;
4641       if (parser->m_startDoctypeDeclHandler) {
4642         XML_Char *pubId;
4643         if (! XmlIsPublicId(enc, s, next, eventPP))
4644           return XML_ERROR_PUBLICID;
4645         pubId = poolStoreString(&parser->m_tempPool, enc,
4646                                 s + enc->minBytesPerChar,
4647                                 next - enc->minBytesPerChar);
4648         if (! pubId)
4649           return XML_ERROR_NO_MEMORY;
4650         normalizePublicId(pubId);
4651         poolFinish(&parser->m_tempPool);
4652         parser->m_doctypePubid = pubId;
4653         handleDefault = XML_FALSE;
4654         goto alreadyChecked;
4655       }
4656       /* fall through */
4657     case XML_ROLE_ENTITY_PUBLIC_ID:
4658       if (! XmlIsPublicId(enc, s, next, eventPP))
4659         return XML_ERROR_PUBLICID;
4660     alreadyChecked:
4661       if (dtd->keepProcessing && parser->m_declEntity) {
4662         XML_Char *tem
4663             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4664                               next - enc->minBytesPerChar);
4665         if (! tem)
4666           return XML_ERROR_NO_MEMORY;
4667         normalizePublicId(tem);
4668         parser->m_declEntity->publicId = tem;
4669         poolFinish(&dtd->pool);
4670         /* Don't suppress the default handler if we fell through from
4671          * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4672          */
4673         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4674           handleDefault = XML_FALSE;
4675       }
4676       break;
4677     case XML_ROLE_DOCTYPE_CLOSE:
4678       if (allowClosingDoctype != XML_TRUE) {
4679         /* Must not close doctype from within expanded parameter entities */
4680         return XML_ERROR_INVALID_TOKEN;
4681       }
4682 
4683       if (parser->m_doctypeName) {
4684         parser->m_startDoctypeDeclHandler(
4685             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4686             parser->m_doctypePubid, 0);
4687         poolClear(&parser->m_tempPool);
4688         handleDefault = XML_FALSE;
4689       }
4690       /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4691          XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4692          was not set, indicating an external subset
4693       */
4694 #ifdef XML_DTD
4695       if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4696         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4697         dtd->hasParamEntityRefs = XML_TRUE;
4698         if (parser->m_paramEntityParsing
4699             && parser->m_externalEntityRefHandler) {
4700           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4701                                             externalSubsetName, sizeof(ENTITY));
4702           if (! entity) {
4703             /* The external subset name "#" will have already been
4704              * inserted into the hash table at the start of the
4705              * external entity parsing, so no allocation will happen
4706              * and lookup() cannot fail.
4707              */
4708             return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4709           }
4710           if (parser->m_useForeignDTD)
4711             entity->base = parser->m_curBase;
4712           dtd->paramEntityRead = XML_FALSE;
4713           if (! parser->m_externalEntityRefHandler(
4714                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4715                   entity->systemId, entity->publicId))
4716             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4717           if (dtd->paramEntityRead) {
4718             if (! dtd->standalone && parser->m_notStandaloneHandler
4719                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4720               return XML_ERROR_NOT_STANDALONE;
4721           }
4722           /* if we didn't read the foreign DTD then this means that there
4723              is no external subset and we must reset dtd->hasParamEntityRefs
4724           */
4725           else if (! parser->m_doctypeSysid)
4726             dtd->hasParamEntityRefs = hadParamEntityRefs;
4727           /* end of DTD - no need to update dtd->keepProcessing */
4728         }
4729         parser->m_useForeignDTD = XML_FALSE;
4730       }
4731 #endif /* XML_DTD */
4732       if (parser->m_endDoctypeDeclHandler) {
4733         parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4734         handleDefault = XML_FALSE;
4735       }
4736       break;
4737     case XML_ROLE_INSTANCE_START:
4738 #ifdef XML_DTD
4739       /* if there is no DOCTYPE declaration then now is the
4740          last chance to read the foreign DTD
4741       */
4742       if (parser->m_useForeignDTD) {
4743         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4744         dtd->hasParamEntityRefs = XML_TRUE;
4745         if (parser->m_paramEntityParsing
4746             && parser->m_externalEntityRefHandler) {
4747           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4748                                             externalSubsetName, sizeof(ENTITY));
4749           if (! entity)
4750             return XML_ERROR_NO_MEMORY;
4751           entity->base = parser->m_curBase;
4752           dtd->paramEntityRead = XML_FALSE;
4753           if (! parser->m_externalEntityRefHandler(
4754                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4755                   entity->systemId, entity->publicId))
4756             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4757           if (dtd->paramEntityRead) {
4758             if (! dtd->standalone && parser->m_notStandaloneHandler
4759                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4760               return XML_ERROR_NOT_STANDALONE;
4761           }
4762           /* if we didn't read the foreign DTD then this means that there
4763              is no external subset and we must reset dtd->hasParamEntityRefs
4764           */
4765           else
4766             dtd->hasParamEntityRefs = hadParamEntityRefs;
4767           /* end of DTD - no need to update dtd->keepProcessing */
4768         }
4769       }
4770 #endif /* XML_DTD */
4771       parser->m_processor = contentProcessor;
4772       return contentProcessor(parser, s, end, nextPtr);
4773     case XML_ROLE_ATTLIST_ELEMENT_NAME:
4774       parser->m_declElementType = getElementType(parser, enc, s, next);
4775       if (! parser->m_declElementType)
4776         return XML_ERROR_NO_MEMORY;
4777       goto checkAttListDeclHandler;
4778     case XML_ROLE_ATTRIBUTE_NAME:
4779       parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4780       if (! parser->m_declAttributeId)
4781         return XML_ERROR_NO_MEMORY;
4782       parser->m_declAttributeIsCdata = XML_FALSE;
4783       parser->m_declAttributeType = NULL;
4784       parser->m_declAttributeIsId = XML_FALSE;
4785       goto checkAttListDeclHandler;
4786     case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4787       parser->m_declAttributeIsCdata = XML_TRUE;
4788       parser->m_declAttributeType = atypeCDATA;
4789       goto checkAttListDeclHandler;
4790     case XML_ROLE_ATTRIBUTE_TYPE_ID:
4791       parser->m_declAttributeIsId = XML_TRUE;
4792       parser->m_declAttributeType = atypeID;
4793       goto checkAttListDeclHandler;
4794     case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4795       parser->m_declAttributeType = atypeIDREF;
4796       goto checkAttListDeclHandler;
4797     case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4798       parser->m_declAttributeType = atypeIDREFS;
4799       goto checkAttListDeclHandler;
4800     case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4801       parser->m_declAttributeType = atypeENTITY;
4802       goto checkAttListDeclHandler;
4803     case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4804       parser->m_declAttributeType = atypeENTITIES;
4805       goto checkAttListDeclHandler;
4806     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4807       parser->m_declAttributeType = atypeNMTOKEN;
4808       goto checkAttListDeclHandler;
4809     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4810       parser->m_declAttributeType = atypeNMTOKENS;
4811     checkAttListDeclHandler:
4812       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4813         handleDefault = XML_FALSE;
4814       break;
4815     case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4816     case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4817       if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4818         const XML_Char *prefix;
4819         if (parser->m_declAttributeType) {
4820           prefix = enumValueSep;
4821         } else {
4822           prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
4823                                                               : enumValueStart);
4824         }
4825         if (! poolAppendString(&parser->m_tempPool, prefix))
4826           return XML_ERROR_NO_MEMORY;
4827         if (! poolAppend(&parser->m_tempPool, enc, s, next))
4828           return XML_ERROR_NO_MEMORY;
4829         parser->m_declAttributeType = parser->m_tempPool.start;
4830         handleDefault = XML_FALSE;
4831       }
4832       break;
4833     case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4834     case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4835       if (dtd->keepProcessing) {
4836         if (! defineAttribute(parser->m_declElementType,
4837                               parser->m_declAttributeId,
4838                               parser->m_declAttributeIsCdata,
4839                               parser->m_declAttributeIsId, 0, parser))
4840           return XML_ERROR_NO_MEMORY;
4841         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4842           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4843               || (*parser->m_declAttributeType == XML_T(ASCII_N)
4844                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4845             /* Enumerated or Notation type */
4846             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4847                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4848               return XML_ERROR_NO_MEMORY;
4849             parser->m_declAttributeType = parser->m_tempPool.start;
4850             poolFinish(&parser->m_tempPool);
4851           }
4852           *eventEndPP = s;
4853           parser->m_attlistDeclHandler(
4854               parser->m_handlerArg, parser->m_declElementType->name,
4855               parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
4856               role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4857           poolClear(&parser->m_tempPool);
4858           handleDefault = XML_FALSE;
4859         }
4860       }
4861       break;
4862     case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4863     case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4864       if (dtd->keepProcessing) {
4865         const XML_Char *attVal;
4866         enum XML_Error result = storeAttributeValue(
4867             parser, enc, parser->m_declAttributeIsCdata,
4868             s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
4869             XML_ACCOUNT_NONE);
4870         if (result)
4871           return result;
4872         attVal = poolStart(&dtd->pool);
4873         poolFinish(&dtd->pool);
4874         /* ID attributes aren't allowed to have a default */
4875         if (! defineAttribute(
4876                 parser->m_declElementType, parser->m_declAttributeId,
4877                 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
4878           return XML_ERROR_NO_MEMORY;
4879         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4880           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4881               || (*parser->m_declAttributeType == XML_T(ASCII_N)
4882                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4883             /* Enumerated or Notation type */
4884             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4885                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4886               return XML_ERROR_NO_MEMORY;
4887             parser->m_declAttributeType = parser->m_tempPool.start;
4888             poolFinish(&parser->m_tempPool);
4889           }
4890           *eventEndPP = s;
4891           parser->m_attlistDeclHandler(
4892               parser->m_handlerArg, parser->m_declElementType->name,
4893               parser->m_declAttributeId->name, parser->m_declAttributeType,
4894               attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4895           poolClear(&parser->m_tempPool);
4896           handleDefault = XML_FALSE;
4897         }
4898       }
4899       break;
4900     case XML_ROLE_ENTITY_VALUE:
4901       if (dtd->keepProcessing) {
4902         enum XML_Error result
4903             = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
4904                                next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
4905         if (parser->m_declEntity) {
4906           parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4907           parser->m_declEntity->textLen
4908               = (int)(poolLength(&dtd->entityValuePool));
4909           poolFinish(&dtd->entityValuePool);
4910           if (parser->m_entityDeclHandler) {
4911             *eventEndPP = s;
4912             parser->m_entityDeclHandler(
4913                 parser->m_handlerArg, parser->m_declEntity->name,
4914                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
4915                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
4916             handleDefault = XML_FALSE;
4917           }
4918         } else
4919           poolDiscard(&dtd->entityValuePool);
4920         if (result != XML_ERROR_NONE)
4921           return result;
4922       }
4923       break;
4924     case XML_ROLE_DOCTYPE_SYSTEM_ID:
4925 #ifdef XML_DTD
4926       parser->m_useForeignDTD = XML_FALSE;
4927 #endif /* XML_DTD */
4928       dtd->hasParamEntityRefs = XML_TRUE;
4929       if (parser->m_startDoctypeDeclHandler) {
4930         parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
4931                                                  s + enc->minBytesPerChar,
4932                                                  next - enc->minBytesPerChar);
4933         if (parser->m_doctypeSysid == NULL)
4934           return XML_ERROR_NO_MEMORY;
4935         poolFinish(&parser->m_tempPool);
4936         handleDefault = XML_FALSE;
4937       }
4938 #ifdef XML_DTD
4939       else
4940         /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4941            for the case where no parser->m_startDoctypeDeclHandler is set */
4942         parser->m_doctypeSysid = externalSubsetName;
4943 #endif /* XML_DTD */
4944       if (! dtd->standalone
4945 #ifdef XML_DTD
4946           && ! parser->m_paramEntityParsing
4947 #endif /* XML_DTD */
4948           && parser->m_notStandaloneHandler
4949           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4950         return XML_ERROR_NOT_STANDALONE;
4951 #ifndef XML_DTD
4952       break;
4953 #else  /* XML_DTD */
4954       if (! parser->m_declEntity) {
4955         parser->m_declEntity = (ENTITY *)lookup(
4956             parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4957         if (! parser->m_declEntity)
4958           return XML_ERROR_NO_MEMORY;
4959         parser->m_declEntity->publicId = NULL;
4960       }
4961 #endif /* XML_DTD */
4962       /* fall through */
4963     case XML_ROLE_ENTITY_SYSTEM_ID:
4964       if (dtd->keepProcessing && parser->m_declEntity) {
4965         parser->m_declEntity->systemId
4966             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4967                               next - enc->minBytesPerChar);
4968         if (! parser->m_declEntity->systemId)
4969           return XML_ERROR_NO_MEMORY;
4970         parser->m_declEntity->base = parser->m_curBase;
4971         poolFinish(&dtd->pool);
4972         /* Don't suppress the default handler if we fell through from
4973          * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4974          */
4975         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
4976           handleDefault = XML_FALSE;
4977       }
4978       break;
4979     case XML_ROLE_ENTITY_COMPLETE:
4980       if (dtd->keepProcessing && parser->m_declEntity
4981           && parser->m_entityDeclHandler) {
4982         *eventEndPP = s;
4983         parser->m_entityDeclHandler(
4984             parser->m_handlerArg, parser->m_declEntity->name,
4985             parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
4986             parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
4987         handleDefault = XML_FALSE;
4988       }
4989       break;
4990     case XML_ROLE_ENTITY_NOTATION_NAME:
4991       if (dtd->keepProcessing && parser->m_declEntity) {
4992         parser->m_declEntity->notation
4993             = poolStoreString(&dtd->pool, enc, s, next);
4994         if (! parser->m_declEntity->notation)
4995           return XML_ERROR_NO_MEMORY;
4996         poolFinish(&dtd->pool);
4997         if (parser->m_unparsedEntityDeclHandler) {
4998           *eventEndPP = s;
4999           parser->m_unparsedEntityDeclHandler(
5000               parser->m_handlerArg, parser->m_declEntity->name,
5001               parser->m_declEntity->base, parser->m_declEntity->systemId,
5002               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5003           handleDefault = XML_FALSE;
5004         } else if (parser->m_entityDeclHandler) {
5005           *eventEndPP = s;
5006           parser->m_entityDeclHandler(
5007               parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5008               parser->m_declEntity->base, parser->m_declEntity->systemId,
5009               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5010           handleDefault = XML_FALSE;
5011         }
5012       }
5013       break;
5014     case XML_ROLE_GENERAL_ENTITY_NAME: {
5015       if (XmlPredefinedEntityName(enc, s, next)) {
5016         parser->m_declEntity = NULL;
5017         break;
5018       }
5019       if (dtd->keepProcessing) {
5020         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5021         if (! name)
5022           return XML_ERROR_NO_MEMORY;
5023         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5024                                                 name, sizeof(ENTITY));
5025         if (! parser->m_declEntity)
5026           return XML_ERROR_NO_MEMORY;
5027         if (parser->m_declEntity->name != name) {
5028           poolDiscard(&dtd->pool);
5029           parser->m_declEntity = NULL;
5030         } else {
5031           poolFinish(&dtd->pool);
5032           parser->m_declEntity->publicId = NULL;
5033           parser->m_declEntity->is_param = XML_FALSE;
5034           /* if we have a parent parser or are reading an internal parameter
5035              entity, then the entity declaration is not considered "internal"
5036           */
5037           parser->m_declEntity->is_internal
5038               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5039           if (parser->m_entityDeclHandler)
5040             handleDefault = XML_FALSE;
5041         }
5042       } else {
5043         poolDiscard(&dtd->pool);
5044         parser->m_declEntity = NULL;
5045       }
5046     } break;
5047     case XML_ROLE_PARAM_ENTITY_NAME:
5048 #ifdef XML_DTD
5049       if (dtd->keepProcessing) {
5050         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5051         if (! name)
5052           return XML_ERROR_NO_MEMORY;
5053         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5054                                                 name, sizeof(ENTITY));
5055         if (! parser->m_declEntity)
5056           return XML_ERROR_NO_MEMORY;
5057         if (parser->m_declEntity->name != name) {
5058           poolDiscard(&dtd->pool);
5059           parser->m_declEntity = NULL;
5060         } else {
5061           poolFinish(&dtd->pool);
5062           parser->m_declEntity->publicId = NULL;
5063           parser->m_declEntity->is_param = XML_TRUE;
5064           /* if we have a parent parser or are reading an internal parameter
5065              entity, then the entity declaration is not considered "internal"
5066           */
5067           parser->m_declEntity->is_internal
5068               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5069           if (parser->m_entityDeclHandler)
5070             handleDefault = XML_FALSE;
5071         }
5072       } else {
5073         poolDiscard(&dtd->pool);
5074         parser->m_declEntity = NULL;
5075       }
5076 #else  /* not XML_DTD */
5077       parser->m_declEntity = NULL;
5078 #endif /* XML_DTD */
5079       break;
5080     case XML_ROLE_NOTATION_NAME:
5081       parser->m_declNotationPublicId = NULL;
5082       parser->m_declNotationName = NULL;
5083       if (parser->m_notationDeclHandler) {
5084         parser->m_declNotationName
5085             = poolStoreString(&parser->m_tempPool, enc, s, next);
5086         if (! parser->m_declNotationName)
5087           return XML_ERROR_NO_MEMORY;
5088         poolFinish(&parser->m_tempPool);
5089         handleDefault = XML_FALSE;
5090       }
5091       break;
5092     case XML_ROLE_NOTATION_PUBLIC_ID:
5093       if (! XmlIsPublicId(enc, s, next, eventPP))
5094         return XML_ERROR_PUBLICID;
5095       if (parser
5096               ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5097         XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5098                                         s + enc->minBytesPerChar,
5099                                         next - enc->minBytesPerChar);
5100         if (! tem)
5101           return XML_ERROR_NO_MEMORY;
5102         normalizePublicId(tem);
5103         parser->m_declNotationPublicId = tem;
5104         poolFinish(&parser->m_tempPool);
5105         handleDefault = XML_FALSE;
5106       }
5107       break;
5108     case XML_ROLE_NOTATION_SYSTEM_ID:
5109       if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5110         const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5111                                                    s + enc->minBytesPerChar,
5112                                                    next - enc->minBytesPerChar);
5113         if (! systemId)
5114           return XML_ERROR_NO_MEMORY;
5115         *eventEndPP = s;
5116         parser->m_notationDeclHandler(
5117             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5118             systemId, parser->m_declNotationPublicId);
5119         handleDefault = XML_FALSE;
5120       }
5121       poolClear(&parser->m_tempPool);
5122       break;
5123     case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5124       if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5125         *eventEndPP = s;
5126         parser->m_notationDeclHandler(
5127             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5128             0, parser->m_declNotationPublicId);
5129         handleDefault = XML_FALSE;
5130       }
5131       poolClear(&parser->m_tempPool);
5132       break;
5133     case XML_ROLE_ERROR:
5134       switch (tok) {
5135       case XML_TOK_PARAM_ENTITY_REF:
5136         /* PE references in internal subset are
5137            not allowed within declarations. */
5138         return XML_ERROR_PARAM_ENTITY_REF;
5139       case XML_TOK_XML_DECL:
5140         return XML_ERROR_MISPLACED_XML_PI;
5141       default:
5142         return XML_ERROR_SYNTAX;
5143       }
5144 #ifdef XML_DTD
5145     case XML_ROLE_IGNORE_SECT: {
5146       enum XML_Error result;
5147       if (parser->m_defaultHandler)
5148         reportDefault(parser, enc, s, next);
5149       handleDefault = XML_FALSE;
5150       result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5151       if (result != XML_ERROR_NONE)
5152         return result;
5153       else if (! next) {
5154         parser->m_processor = ignoreSectionProcessor;
5155         return result;
5156       }
5157     } break;
5158 #endif /* XML_DTD */
5159     case XML_ROLE_GROUP_OPEN:
5160       if (parser->m_prologState.level >= parser->m_groupSize) {
5161         if (parser->m_groupSize) {
5162           {
5163             /* Detect and prevent integer overflow */
5164             if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5165               return XML_ERROR_NO_MEMORY;
5166             }
5167 
5168             char *const new_connector = (char *)REALLOC(
5169                 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5170             if (new_connector == NULL) {
5171               parser->m_groupSize /= 2;
5172               return XML_ERROR_NO_MEMORY;
5173             }
5174             parser->m_groupConnector = new_connector;
5175           }
5176 
5177           if (dtd->scaffIndex) {
5178             /* Detect and prevent integer overflow.
5179              * The preprocessor guard addresses the "always false" warning
5180              * from -Wtype-limits on platforms where
5181              * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5182 #if UINT_MAX >= SIZE_MAX
5183             if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5184               return XML_ERROR_NO_MEMORY;
5185             }
5186 #endif
5187 
5188             int *const new_scaff_index = (int *)REALLOC(
5189                 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5190             if (new_scaff_index == NULL)
5191               return XML_ERROR_NO_MEMORY;
5192             dtd->scaffIndex = new_scaff_index;
5193           }
5194         } else {
5195           parser->m_groupConnector
5196               = (char *)MALLOC(parser, parser->m_groupSize = 32);
5197           if (! parser->m_groupConnector) {
5198             parser->m_groupSize = 0;
5199             return XML_ERROR_NO_MEMORY;
5200           }
5201         }
5202       }
5203       parser->m_groupConnector[parser->m_prologState.level] = 0;
5204       if (dtd->in_eldecl) {
5205         int myindex = nextScaffoldPart(parser);
5206         if (myindex < 0)
5207           return XML_ERROR_NO_MEMORY;
5208         assert(dtd->scaffIndex != NULL);
5209         dtd->scaffIndex[dtd->scaffLevel] = myindex;
5210         dtd->scaffLevel++;
5211         dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5212         if (parser->m_elementDeclHandler)
5213           handleDefault = XML_FALSE;
5214       }
5215       break;
5216     case XML_ROLE_GROUP_SEQUENCE:
5217       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5218         return XML_ERROR_SYNTAX;
5219       parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5220       if (dtd->in_eldecl && parser->m_elementDeclHandler)
5221         handleDefault = XML_FALSE;
5222       break;
5223     case XML_ROLE_GROUP_CHOICE:
5224       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5225         return XML_ERROR_SYNTAX;
5226       if (dtd->in_eldecl
5227           && ! parser->m_groupConnector[parser->m_prologState.level]
5228           && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5229               != XML_CTYPE_MIXED)) {
5230         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5231             = XML_CTYPE_CHOICE;
5232         if (parser->m_elementDeclHandler)
5233           handleDefault = XML_FALSE;
5234       }
5235       parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5236       break;
5237     case XML_ROLE_PARAM_ENTITY_REF:
5238 #ifdef XML_DTD
5239     case XML_ROLE_INNER_PARAM_ENTITY_REF:
5240       dtd->hasParamEntityRefs = XML_TRUE;
5241       if (! parser->m_paramEntityParsing)
5242         dtd->keepProcessing = dtd->standalone;
5243       else {
5244         const XML_Char *name;
5245         ENTITY *entity;
5246         name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5247                                next - enc->minBytesPerChar);
5248         if (! name)
5249           return XML_ERROR_NO_MEMORY;
5250         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5251         poolDiscard(&dtd->pool);
5252         /* first, determine if a check for an existing declaration is needed;
5253            if yes, check that the entity exists, and that it is internal,
5254            otherwise call the skipped entity handler
5255         */
5256         if (parser->m_prologState.documentEntity
5257             && (dtd->standalone ? ! parser->m_openInternalEntities
5258                                 : ! dtd->hasParamEntityRefs)) {
5259           if (! entity)
5260             return XML_ERROR_UNDEFINED_ENTITY;
5261           else if (! entity->is_internal) {
5262             /* It's hard to exhaustively search the code to be sure,
5263              * but there doesn't seem to be a way of executing the
5264              * following line.  There are two cases:
5265              *
5266              * If 'standalone' is false, the DTD must have no
5267              * parameter entities or we wouldn't have passed the outer
5268              * 'if' statement.  That measn the only entity in the hash
5269              * table is the external subset name "#" which cannot be
5270              * given as a parameter entity name in XML syntax, so the
5271              * lookup must have returned NULL and we don't even reach
5272              * the test for an internal entity.
5273              *
5274              * If 'standalone' is true, it does not seem to be
5275              * possible to create entities taking this code path that
5276              * are not internal entities, so fail the test above.
5277              *
5278              * Because this analysis is very uncertain, the code is
5279              * being left in place and merely removed from the
5280              * coverage test statistics.
5281              */
5282             return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5283           }
5284         } else if (! entity) {
5285           dtd->keepProcessing = dtd->standalone;
5286           /* cannot report skipped entities in declarations */
5287           if ((role == XML_ROLE_PARAM_ENTITY_REF)
5288               && parser->m_skippedEntityHandler) {
5289             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5290             handleDefault = XML_FALSE;
5291           }
5292           break;
5293         }
5294         if (entity->open)
5295           return XML_ERROR_RECURSIVE_ENTITY_REF;
5296         if (entity->textPtr) {
5297           enum XML_Error result;
5298           XML_Bool betweenDecl
5299               = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5300           result = processInternalEntity(parser, entity, betweenDecl);
5301           if (result != XML_ERROR_NONE)
5302             return result;
5303           handleDefault = XML_FALSE;
5304           break;
5305         }
5306         if (parser->m_externalEntityRefHandler) {
5307           dtd->paramEntityRead = XML_FALSE;
5308           entity->open = XML_TRUE;
5309           entityTrackingOnOpen(parser, entity, __LINE__);
5310           if (! parser->m_externalEntityRefHandler(
5311                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
5312                   entity->systemId, entity->publicId)) {
5313             entityTrackingOnClose(parser, entity, __LINE__);
5314             entity->open = XML_FALSE;
5315             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5316           }
5317           entityTrackingOnClose(parser, entity, __LINE__);
5318           entity->open = XML_FALSE;
5319           handleDefault = XML_FALSE;
5320           if (! dtd->paramEntityRead) {
5321             dtd->keepProcessing = dtd->standalone;
5322             break;
5323           }
5324         } else {
5325           dtd->keepProcessing = dtd->standalone;
5326           break;
5327         }
5328       }
5329 #endif /* XML_DTD */
5330       if (! dtd->standalone && parser->m_notStandaloneHandler
5331           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5332         return XML_ERROR_NOT_STANDALONE;
5333       break;
5334 
5335       /* Element declaration stuff */
5336 
5337     case XML_ROLE_ELEMENT_NAME:
5338       if (parser->m_elementDeclHandler) {
5339         parser->m_declElementType = getElementType(parser, enc, s, next);
5340         if (! parser->m_declElementType)
5341           return XML_ERROR_NO_MEMORY;
5342         dtd->scaffLevel = 0;
5343         dtd->scaffCount = 0;
5344         dtd->in_eldecl = XML_TRUE;
5345         handleDefault = XML_FALSE;
5346       }
5347       break;
5348 
5349     case XML_ROLE_CONTENT_ANY:
5350     case XML_ROLE_CONTENT_EMPTY:
5351       if (dtd->in_eldecl) {
5352         if (parser->m_elementDeclHandler) {
5353           XML_Content *content
5354               = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5355           if (! content)
5356             return XML_ERROR_NO_MEMORY;
5357           content->quant = XML_CQUANT_NONE;
5358           content->name = NULL;
5359           content->numchildren = 0;
5360           content->children = NULL;
5361           content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5362                                                           : XML_CTYPE_EMPTY);
5363           *eventEndPP = s;
5364           parser->m_elementDeclHandler(
5365               parser->m_handlerArg, parser->m_declElementType->name, content);
5366           handleDefault = XML_FALSE;
5367         }
5368         dtd->in_eldecl = XML_FALSE;
5369       }
5370       break;
5371 
5372     case XML_ROLE_CONTENT_PCDATA:
5373       if (dtd->in_eldecl) {
5374         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5375             = XML_CTYPE_MIXED;
5376         if (parser->m_elementDeclHandler)
5377           handleDefault = XML_FALSE;
5378       }
5379       break;
5380 
5381     case XML_ROLE_CONTENT_ELEMENT:
5382       quant = XML_CQUANT_NONE;
5383       goto elementContent;
5384     case XML_ROLE_CONTENT_ELEMENT_OPT:
5385       quant = XML_CQUANT_OPT;
5386       goto elementContent;
5387     case XML_ROLE_CONTENT_ELEMENT_REP:
5388       quant = XML_CQUANT_REP;
5389       goto elementContent;
5390     case XML_ROLE_CONTENT_ELEMENT_PLUS:
5391       quant = XML_CQUANT_PLUS;
5392     elementContent:
5393       if (dtd->in_eldecl) {
5394         ELEMENT_TYPE *el;
5395         const XML_Char *name;
5396         size_t nameLen;
5397         const char *nxt
5398             = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5399         int myindex = nextScaffoldPart(parser);
5400         if (myindex < 0)
5401           return XML_ERROR_NO_MEMORY;
5402         dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5403         dtd->scaffold[myindex].quant = quant;
5404         el = getElementType(parser, enc, s, nxt);
5405         if (! el)
5406           return XML_ERROR_NO_MEMORY;
5407         name = el->name;
5408         dtd->scaffold[myindex].name = name;
5409         nameLen = 0;
5410         for (; name[nameLen++];)
5411           ;
5412 
5413         /* Detect and prevent integer overflow */
5414         if (nameLen > UINT_MAX - dtd->contentStringLen) {
5415           return XML_ERROR_NO_MEMORY;
5416         }
5417 
5418         dtd->contentStringLen += (unsigned)nameLen;
5419         if (parser->m_elementDeclHandler)
5420           handleDefault = XML_FALSE;
5421       }
5422       break;
5423 
5424     case XML_ROLE_GROUP_CLOSE:
5425       quant = XML_CQUANT_NONE;
5426       goto closeGroup;
5427     case XML_ROLE_GROUP_CLOSE_OPT:
5428       quant = XML_CQUANT_OPT;
5429       goto closeGroup;
5430     case XML_ROLE_GROUP_CLOSE_REP:
5431       quant = XML_CQUANT_REP;
5432       goto closeGroup;
5433     case XML_ROLE_GROUP_CLOSE_PLUS:
5434       quant = XML_CQUANT_PLUS;
5435     closeGroup:
5436       if (dtd->in_eldecl) {
5437         if (parser->m_elementDeclHandler)
5438           handleDefault = XML_FALSE;
5439         dtd->scaffLevel--;
5440         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5441         if (dtd->scaffLevel == 0) {
5442           if (! handleDefault) {
5443             XML_Content *model = build_model(parser);
5444             if (! model)
5445               return XML_ERROR_NO_MEMORY;
5446             *eventEndPP = s;
5447             parser->m_elementDeclHandler(
5448                 parser->m_handlerArg, parser->m_declElementType->name, model);
5449           }
5450           dtd->in_eldecl = XML_FALSE;
5451           dtd->contentStringLen = 0;
5452         }
5453       }
5454       break;
5455       /* End element declaration stuff */
5456 
5457     case XML_ROLE_PI:
5458       if (! reportProcessingInstruction(parser, enc, s, next))
5459         return XML_ERROR_NO_MEMORY;
5460       handleDefault = XML_FALSE;
5461       break;
5462     case XML_ROLE_COMMENT:
5463       if (! reportComment(parser, enc, s, next))
5464         return XML_ERROR_NO_MEMORY;
5465       handleDefault = XML_FALSE;
5466       break;
5467     case XML_ROLE_NONE:
5468       switch (tok) {
5469       case XML_TOK_BOM:
5470         handleDefault = XML_FALSE;
5471         break;
5472       }
5473       break;
5474     case XML_ROLE_DOCTYPE_NONE:
5475       if (parser->m_startDoctypeDeclHandler)
5476         handleDefault = XML_FALSE;
5477       break;
5478     case XML_ROLE_ENTITY_NONE:
5479       if (dtd->keepProcessing && parser->m_entityDeclHandler)
5480         handleDefault = XML_FALSE;
5481       break;
5482     case XML_ROLE_NOTATION_NONE:
5483       if (parser->m_notationDeclHandler)
5484         handleDefault = XML_FALSE;
5485       break;
5486     case XML_ROLE_ATTLIST_NONE:
5487       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5488         handleDefault = XML_FALSE;
5489       break;
5490     case XML_ROLE_ELEMENT_NONE:
5491       if (parser->m_elementDeclHandler)
5492         handleDefault = XML_FALSE;
5493       break;
5494     } /* end of big switch */
5495 
5496     if (handleDefault && parser->m_defaultHandler)
5497       reportDefault(parser, enc, s, next);
5498 
5499     switch (parser->m_parsingStatus.parsing) {
5500     case XML_SUSPENDED:
5501       *nextPtr = next;
5502       return XML_ERROR_NONE;
5503     case XML_FINISHED:
5504       return XML_ERROR_ABORTED;
5505     default:
5506       s = next;
5507       tok = XmlPrologTok(enc, s, end, &next);
5508     }
5509   }
5510   /* not reached */
5511 }
5512 
5513 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5514 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5515                 const char **nextPtr) {
5516   parser->m_processor = epilogProcessor;
5517   parser->m_eventPtr = s;
5518   for (;;) {
5519     const char *next = NULL;
5520     int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5521 #ifdef XML_DTD
5522     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5523                                   XML_ACCOUNT_DIRECT)) {
5524       accountingOnAbort(parser);
5525       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5526     }
5527 #endif
5528     parser->m_eventEndPtr = next;
5529     switch (tok) {
5530     /* report partial linebreak - it might be the last token */
5531     case -XML_TOK_PROLOG_S:
5532       if (parser->m_defaultHandler) {
5533         reportDefault(parser, parser->m_encoding, s, next);
5534         if (parser->m_parsingStatus.parsing == XML_FINISHED)
5535           return XML_ERROR_ABORTED;
5536       }
5537       *nextPtr = next;
5538       return XML_ERROR_NONE;
5539     case XML_TOK_NONE:
5540       *nextPtr = s;
5541       return XML_ERROR_NONE;
5542     case XML_TOK_PROLOG_S:
5543       if (parser->m_defaultHandler)
5544         reportDefault(parser, parser->m_encoding, s, next);
5545       break;
5546     case XML_TOK_PI:
5547       if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5548         return XML_ERROR_NO_MEMORY;
5549       break;
5550     case XML_TOK_COMMENT:
5551       if (! reportComment(parser, parser->m_encoding, s, next))
5552         return XML_ERROR_NO_MEMORY;
5553       break;
5554     case XML_TOK_INVALID:
5555       parser->m_eventPtr = next;
5556       return XML_ERROR_INVALID_TOKEN;
5557     case XML_TOK_PARTIAL:
5558       if (! parser->m_parsingStatus.finalBuffer) {
5559         *nextPtr = s;
5560         return XML_ERROR_NONE;
5561       }
5562       return XML_ERROR_UNCLOSED_TOKEN;
5563     case XML_TOK_PARTIAL_CHAR:
5564       if (! parser->m_parsingStatus.finalBuffer) {
5565         *nextPtr = s;
5566         return XML_ERROR_NONE;
5567       }
5568       return XML_ERROR_PARTIAL_CHAR;
5569     default:
5570       return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5571     }
5572     parser->m_eventPtr = s = next;
5573     switch (parser->m_parsingStatus.parsing) {
5574     case XML_SUSPENDED:
5575       *nextPtr = next;
5576       return XML_ERROR_NONE;
5577     case XML_FINISHED:
5578       return XML_ERROR_ABORTED;
5579     default:;
5580     }
5581   }
5582 }
5583 
5584 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5585 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5586   const char *textStart, *textEnd;
5587   const char *next;
5588   enum XML_Error result;
5589   OPEN_INTERNAL_ENTITY *openEntity;
5590 
5591   if (parser->m_freeInternalEntities) {
5592     openEntity = parser->m_freeInternalEntities;
5593     parser->m_freeInternalEntities = openEntity->next;
5594   } else {
5595     openEntity
5596         = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5597     if (! openEntity)
5598       return XML_ERROR_NO_MEMORY;
5599   }
5600   entity->open = XML_TRUE;
5601 #ifdef XML_DTD
5602   entityTrackingOnOpen(parser, entity, __LINE__);
5603 #endif
5604   entity->processed = 0;
5605   openEntity->next = parser->m_openInternalEntities;
5606   parser->m_openInternalEntities = openEntity;
5607   openEntity->entity = entity;
5608   openEntity->startTagLevel = parser->m_tagLevel;
5609   openEntity->betweenDecl = betweenDecl;
5610   openEntity->internalEventPtr = NULL;
5611   openEntity->internalEventEndPtr = NULL;
5612   textStart = (const char *)entity->textPtr;
5613   textEnd = (const char *)(entity->textPtr + entity->textLen);
5614   /* Set a safe default value in case 'next' does not get set */
5615   next = textStart;
5616 
5617 #ifdef XML_DTD
5618   if (entity->is_param) {
5619     int tok
5620         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5621     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5622                       tok, next, &next, XML_FALSE, XML_FALSE,
5623                       XML_ACCOUNT_ENTITY_EXPANSION);
5624   } else
5625 #endif /* XML_DTD */
5626     result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5627                        textStart, textEnd, &next, XML_FALSE,
5628                        XML_ACCOUNT_ENTITY_EXPANSION);
5629 
5630   if (result == XML_ERROR_NONE) {
5631     if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5632       entity->processed = (int)(next - textStart);
5633       parser->m_processor = internalEntityProcessor;
5634     } else {
5635 #ifdef XML_DTD
5636       entityTrackingOnClose(parser, entity, __LINE__);
5637 #endif /* XML_DTD */
5638       entity->open = XML_FALSE;
5639       parser->m_openInternalEntities = openEntity->next;
5640       /* put openEntity back in list of free instances */
5641       openEntity->next = parser->m_freeInternalEntities;
5642       parser->m_freeInternalEntities = openEntity;
5643     }
5644   }
5645   return result;
5646 }
5647 
5648 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5649 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5650                         const char **nextPtr) {
5651   ENTITY *entity;
5652   const char *textStart, *textEnd;
5653   const char *next;
5654   enum XML_Error result;
5655   OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5656   if (! openEntity)
5657     return XML_ERROR_UNEXPECTED_STATE;
5658 
5659   entity = openEntity->entity;
5660   textStart = ((const char *)entity->textPtr) + entity->processed;
5661   textEnd = (const char *)(entity->textPtr + entity->textLen);
5662   /* Set a safe default value in case 'next' does not get set */
5663   next = textStart;
5664 
5665 #ifdef XML_DTD
5666   if (entity->is_param) {
5667     int tok
5668         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5669     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5670                       tok, next, &next, XML_FALSE, XML_TRUE,
5671                       XML_ACCOUNT_ENTITY_EXPANSION);
5672   } else
5673 #endif /* XML_DTD */
5674     result = doContent(parser, openEntity->startTagLevel,
5675                        parser->m_internalEncoding, textStart, textEnd, &next,
5676                        XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5677 
5678   if (result != XML_ERROR_NONE)
5679     return result;
5680   else if (textEnd != next
5681            && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5682     entity->processed = (int)(next - (const char *)entity->textPtr);
5683     return result;
5684   } else {
5685 #ifdef XML_DTD
5686     entityTrackingOnClose(parser, entity, __LINE__);
5687 #endif
5688     entity->open = XML_FALSE;
5689     parser->m_openInternalEntities = openEntity->next;
5690     /* put openEntity back in list of free instances */
5691     openEntity->next = parser->m_freeInternalEntities;
5692     parser->m_freeInternalEntities = openEntity;
5693   }
5694 
5695 #ifdef XML_DTD
5696   if (entity->is_param) {
5697     int tok;
5698     parser->m_processor = prologProcessor;
5699     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5700     return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5701                     (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5702                     XML_ACCOUNT_DIRECT);
5703   } else
5704 #endif /* XML_DTD */
5705   {
5706     parser->m_processor = contentProcessor;
5707     /* see externalEntityContentProcessor vs contentProcessor */
5708     result = doContent(parser, parser->m_parentParser ? 1 : 0,
5709                        parser->m_encoding, s, end, nextPtr,
5710                        (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5711                        XML_ACCOUNT_DIRECT);
5712     if (result == XML_ERROR_NONE) {
5713       if (! storeRawNames(parser))
5714         return XML_ERROR_NO_MEMORY;
5715     }
5716     return result;
5717   }
5718 }
5719 
5720 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5721 errorProcessor(XML_Parser parser, const char *s, const char *end,
5722                const char **nextPtr) {
5723   UNUSED_P(s);
5724   UNUSED_P(end);
5725   UNUSED_P(nextPtr);
5726   return parser->m_errorCode;
5727 }
5728 
5729 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5730 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5731                     const char *ptr, const char *end, STRING_POOL *pool,
5732                     enum XML_Account account) {
5733   enum XML_Error result
5734       = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5735   if (result)
5736     return result;
5737   if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5738     poolChop(pool);
5739   if (! poolAppendChar(pool, XML_T('\0')))
5740     return XML_ERROR_NO_MEMORY;
5741   return XML_ERROR_NONE;
5742 }
5743 
5744 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5745 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5746                      const char *ptr, const char *end, STRING_POOL *pool,
5747                      enum XML_Account account) {
5748   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5749 #ifndef XML_DTD
5750   UNUSED_P(account);
5751 #endif
5752 
5753   for (;;) {
5754     const char *next
5755         = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5756     int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5757 #ifdef XML_DTD
5758     if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5759       accountingOnAbort(parser);
5760       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5761     }
5762 #endif
5763     switch (tok) {
5764     case XML_TOK_NONE:
5765       return XML_ERROR_NONE;
5766     case XML_TOK_INVALID:
5767       if (enc == parser->m_encoding)
5768         parser->m_eventPtr = next;
5769       return XML_ERROR_INVALID_TOKEN;
5770     case XML_TOK_PARTIAL:
5771       if (enc == parser->m_encoding)
5772         parser->m_eventPtr = ptr;
5773       return XML_ERROR_INVALID_TOKEN;
5774     case XML_TOK_CHAR_REF: {
5775       XML_Char buf[XML_ENCODE_MAX];
5776       int i;
5777       int n = XmlCharRefNumber(enc, ptr);
5778       if (n < 0) {
5779         if (enc == parser->m_encoding)
5780           parser->m_eventPtr = ptr;
5781         return XML_ERROR_BAD_CHAR_REF;
5782       }
5783       if (! isCdata && n == 0x20 /* space */
5784           && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5785         break;
5786       n = XmlEncode(n, (ICHAR *)buf);
5787       /* The XmlEncode() functions can never return 0 here.  That
5788        * error return happens if the code point passed in is either
5789        * negative or greater than or equal to 0x110000.  The
5790        * XmlCharRefNumber() functions will all return a number
5791        * strictly less than 0x110000 or a negative value if an error
5792        * occurred.  The negative value is intercepted above, so
5793        * XmlEncode() is never passed a value it might return an
5794        * error for.
5795        */
5796       for (i = 0; i < n; i++) {
5797         if (! poolAppendChar(pool, buf[i]))
5798           return XML_ERROR_NO_MEMORY;
5799       }
5800     } break;
5801     case XML_TOK_DATA_CHARS:
5802       if (! poolAppend(pool, enc, ptr, next))
5803         return XML_ERROR_NO_MEMORY;
5804       break;
5805     case XML_TOK_TRAILING_CR:
5806       next = ptr + enc->minBytesPerChar;
5807       /* fall through */
5808     case XML_TOK_ATTRIBUTE_VALUE_S:
5809     case XML_TOK_DATA_NEWLINE:
5810       if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5811         break;
5812       if (! poolAppendChar(pool, 0x20))
5813         return XML_ERROR_NO_MEMORY;
5814       break;
5815     case XML_TOK_ENTITY_REF: {
5816       const XML_Char *name;
5817       ENTITY *entity;
5818       char checkEntityDecl;
5819       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
5820           enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
5821       if (ch) {
5822 #ifdef XML_DTD
5823         /* NOTE: We are replacing 4-6 characters original input for 1 character
5824          *       so there is no amplification and hence recording without
5825          *       protection. */
5826         accountingDiffTolerated(parser, tok, (char *)&ch,
5827                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
5828                                 XML_ACCOUNT_ENTITY_EXPANSION);
5829 #endif /* XML_DTD */
5830         if (! poolAppendChar(pool, ch))
5831           return XML_ERROR_NO_MEMORY;
5832         break;
5833       }
5834       name = poolStoreString(&parser->m_temp2Pool, enc,
5835                              ptr + enc->minBytesPerChar,
5836                              next - enc->minBytesPerChar);
5837       if (! name)
5838         return XML_ERROR_NO_MEMORY;
5839       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5840       poolDiscard(&parser->m_temp2Pool);
5841       /* First, determine if a check for an existing declaration is needed;
5842          if yes, check that the entity exists, and that it is internal.
5843       */
5844       if (pool == &dtd->pool) /* are we called from prolog? */
5845         checkEntityDecl =
5846 #ifdef XML_DTD
5847             parser->m_prologState.documentEntity &&
5848 #endif /* XML_DTD */
5849             (dtd->standalone ? ! parser->m_openInternalEntities
5850                              : ! dtd->hasParamEntityRefs);
5851       else /* if (pool == &parser->m_tempPool): we are called from content */
5852         checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
5853       if (checkEntityDecl) {
5854         if (! entity)
5855           return XML_ERROR_UNDEFINED_ENTITY;
5856         else if (! entity->is_internal)
5857           return XML_ERROR_ENTITY_DECLARED_IN_PE;
5858       } else if (! entity) {
5859         /* Cannot report skipped entity here - see comments on
5860            parser->m_skippedEntityHandler.
5861         if (parser->m_skippedEntityHandler)
5862           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5863         */
5864         /* Cannot call the default handler because this would be
5865            out of sync with the call to the startElementHandler.
5866         if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5867           reportDefault(parser, enc, ptr, next);
5868         */
5869         break;
5870       }
5871       if (entity->open) {
5872         if (enc == parser->m_encoding) {
5873           /* It does not appear that this line can be executed.
5874            *
5875            * The "if (entity->open)" check catches recursive entity
5876            * definitions.  In order to be called with an open
5877            * entity, it must have gone through this code before and
5878            * been through the recursive call to
5879            * appendAttributeValue() some lines below.  That call
5880            * sets the local encoding ("enc") to the parser's
5881            * internal encoding (internal_utf8 or internal_utf16),
5882            * which can never be the same as the principle encoding.
5883            * It doesn't appear there is another code path that gets
5884            * here with entity->open being TRUE.
5885            *
5886            * Since it is not certain that this logic is watertight,
5887            * we keep the line and merely exclude it from coverage
5888            * tests.
5889            */
5890           parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5891         }
5892         return XML_ERROR_RECURSIVE_ENTITY_REF;
5893       }
5894       if (entity->notation) {
5895         if (enc == parser->m_encoding)
5896           parser->m_eventPtr = ptr;
5897         return XML_ERROR_BINARY_ENTITY_REF;
5898       }
5899       if (! entity->textPtr) {
5900         if (enc == parser->m_encoding)
5901           parser->m_eventPtr = ptr;
5902         return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5903       } else {
5904         enum XML_Error result;
5905         const XML_Char *textEnd = entity->textPtr + entity->textLen;
5906         entity->open = XML_TRUE;
5907 #ifdef XML_DTD
5908         entityTrackingOnOpen(parser, entity, __LINE__);
5909 #endif
5910         result = appendAttributeValue(parser, parser->m_internalEncoding,
5911                                       isCdata, (const char *)entity->textPtr,
5912                                       (const char *)textEnd, pool,
5913                                       XML_ACCOUNT_ENTITY_EXPANSION);
5914 #ifdef XML_DTD
5915         entityTrackingOnClose(parser, entity, __LINE__);
5916 #endif
5917         entity->open = XML_FALSE;
5918         if (result)
5919           return result;
5920       }
5921     } break;
5922     default:
5923       /* The only token returned by XmlAttributeValueTok() that does
5924        * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5925        * Getting that would require an entity name to contain an
5926        * incomplete XML character (e.g. \xE2\x82); however previous
5927        * tokenisers will have already recognised and rejected such
5928        * names before XmlAttributeValueTok() gets a look-in.  This
5929        * default case should be retained as a safety net, but the code
5930        * excluded from coverage tests.
5931        *
5932        * LCOV_EXCL_START
5933        */
5934       if (enc == parser->m_encoding)
5935         parser->m_eventPtr = ptr;
5936       return XML_ERROR_UNEXPECTED_STATE;
5937       /* LCOV_EXCL_STOP */
5938     }
5939     ptr = next;
5940   }
5941   /* not reached */
5942 }
5943 
5944 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)5945 storeEntityValue(XML_Parser parser, const ENCODING *enc,
5946                  const char *entityTextPtr, const char *entityTextEnd,
5947                  enum XML_Account account) {
5948   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5949   STRING_POOL *pool = &(dtd->entityValuePool);
5950   enum XML_Error result = XML_ERROR_NONE;
5951 #ifdef XML_DTD
5952   int oldInEntityValue = parser->m_prologState.inEntityValue;
5953   parser->m_prologState.inEntityValue = 1;
5954 #else
5955   UNUSED_P(account);
5956 #endif /* XML_DTD */
5957   /* never return Null for the value argument in EntityDeclHandler,
5958      since this would indicate an external entity; therefore we
5959      have to make sure that entityValuePool.start is not null */
5960   if (! pool->blocks) {
5961     if (! poolGrow(pool))
5962       return XML_ERROR_NO_MEMORY;
5963   }
5964 
5965   for (;;) {
5966     const char *next
5967         = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
5968     int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5969 
5970 #ifdef XML_DTD
5971     if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
5972                                   account)) {
5973       accountingOnAbort(parser);
5974       result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5975       goto endEntityValue;
5976     }
5977 #endif
5978 
5979     switch (tok) {
5980     case XML_TOK_PARAM_ENTITY_REF:
5981 #ifdef XML_DTD
5982       if (parser->m_isParamEntity || enc != parser->m_encoding) {
5983         const XML_Char *name;
5984         ENTITY *entity;
5985         name = poolStoreString(&parser->m_tempPool, enc,
5986                                entityTextPtr + enc->minBytesPerChar,
5987                                next - enc->minBytesPerChar);
5988         if (! name) {
5989           result = XML_ERROR_NO_MEMORY;
5990           goto endEntityValue;
5991         }
5992         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5993         poolDiscard(&parser->m_tempPool);
5994         if (! entity) {
5995           /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5996           /* cannot report skipped entity here - see comments on
5997              parser->m_skippedEntityHandler
5998           if (parser->m_skippedEntityHandler)
5999             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6000           */
6001           dtd->keepProcessing = dtd->standalone;
6002           goto endEntityValue;
6003         }
6004         if (entity->open) {
6005           if (enc == parser->m_encoding)
6006             parser->m_eventPtr = entityTextPtr;
6007           result = XML_ERROR_RECURSIVE_ENTITY_REF;
6008           goto endEntityValue;
6009         }
6010         if (entity->systemId) {
6011           if (parser->m_externalEntityRefHandler) {
6012             dtd->paramEntityRead = XML_FALSE;
6013             entity->open = XML_TRUE;
6014             entityTrackingOnOpen(parser, entity, __LINE__);
6015             if (! parser->m_externalEntityRefHandler(
6016                     parser->m_externalEntityRefHandlerArg, 0, entity->base,
6017                     entity->systemId, entity->publicId)) {
6018               entityTrackingOnClose(parser, entity, __LINE__);
6019               entity->open = XML_FALSE;
6020               result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6021               goto endEntityValue;
6022             }
6023             entityTrackingOnClose(parser, entity, __LINE__);
6024             entity->open = XML_FALSE;
6025             if (! dtd->paramEntityRead)
6026               dtd->keepProcessing = dtd->standalone;
6027           } else
6028             dtd->keepProcessing = dtd->standalone;
6029         } else {
6030           entity->open = XML_TRUE;
6031           entityTrackingOnOpen(parser, entity, __LINE__);
6032           result = storeEntityValue(
6033               parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6034               (const char *)(entity->textPtr + entity->textLen),
6035               XML_ACCOUNT_ENTITY_EXPANSION);
6036           entityTrackingOnClose(parser, entity, __LINE__);
6037           entity->open = XML_FALSE;
6038           if (result)
6039             goto endEntityValue;
6040         }
6041         break;
6042       }
6043 #endif /* XML_DTD */
6044       /* In the internal subset, PE references are not legal
6045          within markup declarations, e.g entity values in this case. */
6046       parser->m_eventPtr = entityTextPtr;
6047       result = XML_ERROR_PARAM_ENTITY_REF;
6048       goto endEntityValue;
6049     case XML_TOK_NONE:
6050       result = XML_ERROR_NONE;
6051       goto endEntityValue;
6052     case XML_TOK_ENTITY_REF:
6053     case XML_TOK_DATA_CHARS:
6054       if (! poolAppend(pool, enc, entityTextPtr, next)) {
6055         result = XML_ERROR_NO_MEMORY;
6056         goto endEntityValue;
6057       }
6058       break;
6059     case XML_TOK_TRAILING_CR:
6060       next = entityTextPtr + enc->minBytesPerChar;
6061       /* fall through */
6062     case XML_TOK_DATA_NEWLINE:
6063       if (pool->end == pool->ptr && ! poolGrow(pool)) {
6064         result = XML_ERROR_NO_MEMORY;
6065         goto endEntityValue;
6066       }
6067       *(pool->ptr)++ = 0xA;
6068       break;
6069     case XML_TOK_CHAR_REF: {
6070       XML_Char buf[XML_ENCODE_MAX];
6071       int i;
6072       int n = XmlCharRefNumber(enc, entityTextPtr);
6073       if (n < 0) {
6074         if (enc == parser->m_encoding)
6075           parser->m_eventPtr = entityTextPtr;
6076         result = XML_ERROR_BAD_CHAR_REF;
6077         goto endEntityValue;
6078       }
6079       n = XmlEncode(n, (ICHAR *)buf);
6080       /* The XmlEncode() functions can never return 0 here.  That
6081        * error return happens if the code point passed in is either
6082        * negative or greater than or equal to 0x110000.  The
6083        * XmlCharRefNumber() functions will all return a number
6084        * strictly less than 0x110000 or a negative value if an error
6085        * occurred.  The negative value is intercepted above, so
6086        * XmlEncode() is never passed a value it might return an
6087        * error for.
6088        */
6089       for (i = 0; i < n; i++) {
6090         if (pool->end == pool->ptr && ! poolGrow(pool)) {
6091           result = XML_ERROR_NO_MEMORY;
6092           goto endEntityValue;
6093         }
6094         *(pool->ptr)++ = buf[i];
6095       }
6096     } break;
6097     case XML_TOK_PARTIAL:
6098       if (enc == parser->m_encoding)
6099         parser->m_eventPtr = entityTextPtr;
6100       result = XML_ERROR_INVALID_TOKEN;
6101       goto endEntityValue;
6102     case XML_TOK_INVALID:
6103       if (enc == parser->m_encoding)
6104         parser->m_eventPtr = next;
6105       result = XML_ERROR_INVALID_TOKEN;
6106       goto endEntityValue;
6107     default:
6108       /* This default case should be unnecessary -- all the tokens
6109        * that XmlEntityValueTok() can return have their own explicit
6110        * cases -- but should be retained for safety.  We do however
6111        * exclude it from the coverage statistics.
6112        *
6113        * LCOV_EXCL_START
6114        */
6115       if (enc == parser->m_encoding)
6116         parser->m_eventPtr = entityTextPtr;
6117       result = XML_ERROR_UNEXPECTED_STATE;
6118       goto endEntityValue;
6119       /* LCOV_EXCL_STOP */
6120     }
6121     entityTextPtr = next;
6122   }
6123 endEntityValue:
6124 #ifdef XML_DTD
6125   parser->m_prologState.inEntityValue = oldInEntityValue;
6126 #endif /* XML_DTD */
6127   return result;
6128 }
6129 
6130 static void FASTCALL
normalizeLines(XML_Char * s)6131 normalizeLines(XML_Char *s) {
6132   XML_Char *p;
6133   for (;; s++) {
6134     if (*s == XML_T('\0'))
6135       return;
6136     if (*s == 0xD)
6137       break;
6138   }
6139   p = s;
6140   do {
6141     if (*s == 0xD) {
6142       *p++ = 0xA;
6143       if (*++s == 0xA)
6144         s++;
6145     } else
6146       *p++ = *s++;
6147   } while (*s);
6148   *p = XML_T('\0');
6149 }
6150 
6151 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6152 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6153                             const char *start, const char *end) {
6154   const XML_Char *target;
6155   XML_Char *data;
6156   const char *tem;
6157   if (! parser->m_processingInstructionHandler) {
6158     if (parser->m_defaultHandler)
6159       reportDefault(parser, enc, start, end);
6160     return 1;
6161   }
6162   start += enc->minBytesPerChar * 2;
6163   tem = start + XmlNameLength(enc, start);
6164   target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6165   if (! target)
6166     return 0;
6167   poolFinish(&parser->m_tempPool);
6168   data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6169                          end - enc->minBytesPerChar * 2);
6170   if (! data)
6171     return 0;
6172   normalizeLines(data);
6173   parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6174   poolClear(&parser->m_tempPool);
6175   return 1;
6176 }
6177 
6178 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6179 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6180               const char *end) {
6181   XML_Char *data;
6182   if (! parser->m_commentHandler) {
6183     if (parser->m_defaultHandler)
6184       reportDefault(parser, enc, start, end);
6185     return 1;
6186   }
6187   data = poolStoreString(&parser->m_tempPool, enc,
6188                          start + enc->minBytesPerChar * 4,
6189                          end - enc->minBytesPerChar * 3);
6190   if (! data)
6191     return 0;
6192   normalizeLines(data);
6193   parser->m_commentHandler(parser->m_handlerArg, data);
6194   poolClear(&parser->m_tempPool);
6195   return 1;
6196 }
6197 
6198 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6199 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6200               const char *end) {
6201   if (MUST_CONVERT(enc, s)) {
6202     enum XML_Convert_Result convert_res;
6203     const char **eventPP;
6204     const char **eventEndPP;
6205     if (enc == parser->m_encoding) {
6206       eventPP = &parser->m_eventPtr;
6207       eventEndPP = &parser->m_eventEndPtr;
6208     } else {
6209       /* To get here, two things must be true; the parser must be
6210        * using a character encoding that is not the same as the
6211        * encoding passed in, and the encoding passed in must need
6212        * conversion to the internal format (UTF-8 unless XML_UNICODE
6213        * is defined).  The only occasions on which the encoding passed
6214        * in is not the same as the parser's encoding are when it is
6215        * the internal encoding (e.g. a previously defined parameter
6216        * entity, already converted to internal format).  This by
6217        * definition doesn't need conversion, so the whole branch never
6218        * gets executed.
6219        *
6220        * For safety's sake we don't delete these lines and merely
6221        * exclude them from coverage statistics.
6222        *
6223        * LCOV_EXCL_START
6224        */
6225       eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6226       eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6227       /* LCOV_EXCL_STOP */
6228     }
6229     do {
6230       ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6231       convert_res
6232           = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6233       *eventEndPP = s;
6234       parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6235                                (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6236       *eventPP = s;
6237     } while ((convert_res != XML_CONVERT_COMPLETED)
6238              && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6239   } else
6240     parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
6241                              (int)((XML_Char *)end - (XML_Char *)s));
6242 }
6243 
6244 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6245 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6246                 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6247   DEFAULT_ATTRIBUTE *att;
6248   if (value || isId) {
6249     /* The handling of default attributes gets messed up if we have
6250        a default which duplicates a non-default. */
6251     int i;
6252     for (i = 0; i < type->nDefaultAtts; i++)
6253       if (attId == type->defaultAtts[i].id)
6254         return 1;
6255     if (isId && ! type->idAtt && ! attId->xmlns)
6256       type->idAtt = attId;
6257   }
6258   if (type->nDefaultAtts == type->allocDefaultAtts) {
6259     if (type->allocDefaultAtts == 0) {
6260       type->allocDefaultAtts = 8;
6261       type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6262           parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6263       if (! type->defaultAtts) {
6264         type->allocDefaultAtts = 0;
6265         return 0;
6266       }
6267     } else {
6268       DEFAULT_ATTRIBUTE *temp;
6269 
6270       /* Detect and prevent integer overflow */
6271       if (type->allocDefaultAtts > INT_MAX / 2) {
6272         return 0;
6273       }
6274 
6275       int count = type->allocDefaultAtts * 2;
6276 
6277       /* Detect and prevent integer overflow.
6278        * The preprocessor guard addresses the "always false" warning
6279        * from -Wtype-limits on platforms where
6280        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6281 #if UINT_MAX >= SIZE_MAX
6282       if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6283         return 0;
6284       }
6285 #endif
6286 
6287       temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6288                                           (count * sizeof(DEFAULT_ATTRIBUTE)));
6289       if (temp == NULL)
6290         return 0;
6291       type->allocDefaultAtts = count;
6292       type->defaultAtts = temp;
6293     }
6294   }
6295   att = type->defaultAtts + type->nDefaultAtts;
6296   att->id = attId;
6297   att->value = value;
6298   att->isCdata = isCdata;
6299   if (! isCdata)
6300     attId->maybeTokenized = XML_TRUE;
6301   type->nDefaultAtts += 1;
6302   return 1;
6303 }
6304 
6305 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6306 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6307   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6308   const XML_Char *name;
6309   for (name = elementType->name; *name; name++) {
6310     if (*name == XML_T(ASCII_COLON)) {
6311       PREFIX *prefix;
6312       const XML_Char *s;
6313       for (s = elementType->name; s != name; s++) {
6314         if (! poolAppendChar(&dtd->pool, *s))
6315           return 0;
6316       }
6317       if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6318         return 0;
6319       prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6320                                 sizeof(PREFIX));
6321       if (! prefix)
6322         return 0;
6323       if (prefix->name == poolStart(&dtd->pool))
6324         poolFinish(&dtd->pool);
6325       else
6326         poolDiscard(&dtd->pool);
6327       elementType->prefix = prefix;
6328       break;
6329     }
6330   }
6331   return 1;
6332 }
6333 
6334 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6335 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6336                const char *end) {
6337   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6338   ATTRIBUTE_ID *id;
6339   const XML_Char *name;
6340   if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6341     return NULL;
6342   name = poolStoreString(&dtd->pool, enc, start, end);
6343   if (! name)
6344     return NULL;
6345   /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6346   ++name;
6347   id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6348                               sizeof(ATTRIBUTE_ID));
6349   if (! id)
6350     return NULL;
6351   if (id->name != name)
6352     poolDiscard(&dtd->pool);
6353   else {
6354     poolFinish(&dtd->pool);
6355     if (! parser->m_ns)
6356       ;
6357     else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6358              && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6359              && name[4] == XML_T(ASCII_s)
6360              && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6361       if (name[5] == XML_T('\0'))
6362         id->prefix = &dtd->defaultPrefix;
6363       else
6364         id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6365                                       sizeof(PREFIX));
6366       id->xmlns = XML_TRUE;
6367     } else {
6368       int i;
6369       for (i = 0; name[i]; i++) {
6370         /* attributes without prefix are *not* in the default namespace */
6371         if (name[i] == XML_T(ASCII_COLON)) {
6372           int j;
6373           for (j = 0; j < i; j++) {
6374             if (! poolAppendChar(&dtd->pool, name[j]))
6375               return NULL;
6376           }
6377           if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6378             return NULL;
6379           id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6380                                         poolStart(&dtd->pool), sizeof(PREFIX));
6381           if (! id->prefix)
6382             return NULL;
6383           if (id->prefix->name == poolStart(&dtd->pool))
6384             poolFinish(&dtd->pool);
6385           else
6386             poolDiscard(&dtd->pool);
6387           break;
6388         }
6389       }
6390     }
6391   }
6392   return id;
6393 }
6394 
6395 #define CONTEXT_SEP XML_T(ASCII_FF)
6396 
6397 static const XML_Char *
getContext(XML_Parser parser)6398 getContext(XML_Parser parser) {
6399   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6400   HASH_TABLE_ITER iter;
6401   XML_Bool needSep = XML_FALSE;
6402 
6403   if (dtd->defaultPrefix.binding) {
6404     int i;
6405     int len;
6406     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6407       return NULL;
6408     len = dtd->defaultPrefix.binding->uriLen;
6409     if (parser->m_namespaceSeparator)
6410       len--;
6411     for (i = 0; i < len; i++) {
6412       if (! poolAppendChar(&parser->m_tempPool,
6413                            dtd->defaultPrefix.binding->uri[i])) {
6414         /* Because of memory caching, I don't believe this line can be
6415          * executed.
6416          *
6417          * This is part of a loop copying the default prefix binding
6418          * URI into the parser's temporary string pool.  Previously,
6419          * that URI was copied into the same string pool, with a
6420          * terminating NUL character, as part of setContext().  When
6421          * the pool was cleared, that leaves a block definitely big
6422          * enough to hold the URI on the free block list of the pool.
6423          * The URI copy in getContext() therefore cannot run out of
6424          * memory.
6425          *
6426          * If the pool is used between the setContext() and
6427          * getContext() calls, the worst it can do is leave a bigger
6428          * block on the front of the free list.  Given that this is
6429          * all somewhat inobvious and program logic can be changed, we
6430          * don't delete the line but we do exclude it from the test
6431          * coverage statistics.
6432          */
6433         return NULL; /* LCOV_EXCL_LINE */
6434       }
6435     }
6436     needSep = XML_TRUE;
6437   }
6438 
6439   hashTableIterInit(&iter, &(dtd->prefixes));
6440   for (;;) {
6441     int i;
6442     int len;
6443     const XML_Char *s;
6444     PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6445     if (! prefix)
6446       break;
6447     if (! prefix->binding) {
6448       /* This test appears to be (justifiable) paranoia.  There does
6449        * not seem to be a way of injecting a prefix without a binding
6450        * that doesn't get errored long before this function is called.
6451        * The test should remain for safety's sake, so we instead
6452        * exclude the following line from the coverage statistics.
6453        */
6454       continue; /* LCOV_EXCL_LINE */
6455     }
6456     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6457       return NULL;
6458     for (s = prefix->name; *s; s++)
6459       if (! poolAppendChar(&parser->m_tempPool, *s))
6460         return NULL;
6461     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6462       return NULL;
6463     len = prefix->binding->uriLen;
6464     if (parser->m_namespaceSeparator)
6465       len--;
6466     for (i = 0; i < len; i++)
6467       if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6468         return NULL;
6469     needSep = XML_TRUE;
6470   }
6471 
6472   hashTableIterInit(&iter, &(dtd->generalEntities));
6473   for (;;) {
6474     const XML_Char *s;
6475     ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6476     if (! e)
6477       break;
6478     if (! e->open)
6479       continue;
6480     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6481       return NULL;
6482     for (s = e->name; *s; s++)
6483       if (! poolAppendChar(&parser->m_tempPool, *s))
6484         return 0;
6485     needSep = XML_TRUE;
6486   }
6487 
6488   if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6489     return NULL;
6490   return parser->m_tempPool.start;
6491 }
6492 
6493 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6494 setContext(XML_Parser parser, const XML_Char *context) {
6495   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6496   const XML_Char *s = context;
6497 
6498   while (*context != XML_T('\0')) {
6499     if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6500       ENTITY *e;
6501       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6502         return XML_FALSE;
6503       e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6504                            poolStart(&parser->m_tempPool), 0);
6505       if (e)
6506         e->open = XML_TRUE;
6507       if (*s != XML_T('\0'))
6508         s++;
6509       context = s;
6510       poolDiscard(&parser->m_tempPool);
6511     } else if (*s == XML_T(ASCII_EQUALS)) {
6512       PREFIX *prefix;
6513       if (poolLength(&parser->m_tempPool) == 0)
6514         prefix = &dtd->defaultPrefix;
6515       else {
6516         if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6517           return XML_FALSE;
6518         prefix
6519             = (PREFIX *)lookup(parser, &dtd->prefixes,
6520                                poolStart(&parser->m_tempPool), sizeof(PREFIX));
6521         if (! prefix)
6522           return XML_FALSE;
6523         if (prefix->name == poolStart(&parser->m_tempPool)) {
6524           prefix->name = poolCopyString(&dtd->pool, prefix->name);
6525           if (! prefix->name)
6526             return XML_FALSE;
6527         }
6528         poolDiscard(&parser->m_tempPool);
6529       }
6530       for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6531            context++)
6532         if (! poolAppendChar(&parser->m_tempPool, *context))
6533           return XML_FALSE;
6534       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6535         return XML_FALSE;
6536       if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6537                      &parser->m_inheritedBindings)
6538           != XML_ERROR_NONE)
6539         return XML_FALSE;
6540       poolDiscard(&parser->m_tempPool);
6541       if (*context != XML_T('\0'))
6542         ++context;
6543       s = context;
6544     } else {
6545       if (! poolAppendChar(&parser->m_tempPool, *s))
6546         return XML_FALSE;
6547       s++;
6548     }
6549   }
6550   return XML_TRUE;
6551 }
6552 
6553 static void FASTCALL
normalizePublicId(XML_Char * publicId)6554 normalizePublicId(XML_Char *publicId) {
6555   XML_Char *p = publicId;
6556   XML_Char *s;
6557   for (s = publicId; *s; s++) {
6558     switch (*s) {
6559     case 0x20:
6560     case 0xD:
6561     case 0xA:
6562       if (p != publicId && p[-1] != 0x20)
6563         *p++ = 0x20;
6564       break;
6565     default:
6566       *p++ = *s;
6567     }
6568   }
6569   if (p != publicId && p[-1] == 0x20)
6570     --p;
6571   *p = XML_T('\0');
6572 }
6573 
6574 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6575 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6576   DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6577   if (p == NULL)
6578     return p;
6579   poolInit(&(p->pool), ms);
6580   poolInit(&(p->entityValuePool), ms);
6581   hashTableInit(&(p->generalEntities), ms);
6582   hashTableInit(&(p->elementTypes), ms);
6583   hashTableInit(&(p->attributeIds), ms);
6584   hashTableInit(&(p->prefixes), ms);
6585 #ifdef XML_DTD
6586   p->paramEntityRead = XML_FALSE;
6587   hashTableInit(&(p->paramEntities), ms);
6588 #endif /* XML_DTD */
6589   p->defaultPrefix.name = NULL;
6590   p->defaultPrefix.binding = NULL;
6591 
6592   p->in_eldecl = XML_FALSE;
6593   p->scaffIndex = NULL;
6594   p->scaffold = NULL;
6595   p->scaffLevel = 0;
6596   p->scaffSize = 0;
6597   p->scaffCount = 0;
6598   p->contentStringLen = 0;
6599 
6600   p->keepProcessing = XML_TRUE;
6601   p->hasParamEntityRefs = XML_FALSE;
6602   p->standalone = XML_FALSE;
6603   return p;
6604 }
6605 
6606 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6607 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6608   HASH_TABLE_ITER iter;
6609   hashTableIterInit(&iter, &(p->elementTypes));
6610   for (;;) {
6611     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6612     if (! e)
6613       break;
6614     if (e->allocDefaultAtts != 0)
6615       ms->free_fcn(e->defaultAtts);
6616   }
6617   hashTableClear(&(p->generalEntities));
6618 #ifdef XML_DTD
6619   p->paramEntityRead = XML_FALSE;
6620   hashTableClear(&(p->paramEntities));
6621 #endif /* XML_DTD */
6622   hashTableClear(&(p->elementTypes));
6623   hashTableClear(&(p->attributeIds));
6624   hashTableClear(&(p->prefixes));
6625   poolClear(&(p->pool));
6626   poolClear(&(p->entityValuePool));
6627   p->defaultPrefix.name = NULL;
6628   p->defaultPrefix.binding = NULL;
6629 
6630   p->in_eldecl = XML_FALSE;
6631 
6632   ms->free_fcn(p->scaffIndex);
6633   p->scaffIndex = NULL;
6634   ms->free_fcn(p->scaffold);
6635   p->scaffold = NULL;
6636 
6637   p->scaffLevel = 0;
6638   p->scaffSize = 0;
6639   p->scaffCount = 0;
6640   p->contentStringLen = 0;
6641 
6642   p->keepProcessing = XML_TRUE;
6643   p->hasParamEntityRefs = XML_FALSE;
6644   p->standalone = XML_FALSE;
6645 }
6646 
6647 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6648 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6649   HASH_TABLE_ITER iter;
6650   hashTableIterInit(&iter, &(p->elementTypes));
6651   for (;;) {
6652     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6653     if (! e)
6654       break;
6655     if (e->allocDefaultAtts != 0)
6656       ms->free_fcn(e->defaultAtts);
6657   }
6658   hashTableDestroy(&(p->generalEntities));
6659 #ifdef XML_DTD
6660   hashTableDestroy(&(p->paramEntities));
6661 #endif /* XML_DTD */
6662   hashTableDestroy(&(p->elementTypes));
6663   hashTableDestroy(&(p->attributeIds));
6664   hashTableDestroy(&(p->prefixes));
6665   poolDestroy(&(p->pool));
6666   poolDestroy(&(p->entityValuePool));
6667   if (isDocEntity) {
6668     ms->free_fcn(p->scaffIndex);
6669     ms->free_fcn(p->scaffold);
6670   }
6671   ms->free_fcn(p);
6672 }
6673 
6674 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6675    The new DTD has already been initialized.
6676 */
6677 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6678 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6679         const XML_Memory_Handling_Suite *ms) {
6680   HASH_TABLE_ITER iter;
6681 
6682   /* Copy the prefix table. */
6683 
6684   hashTableIterInit(&iter, &(oldDtd->prefixes));
6685   for (;;) {
6686     const XML_Char *name;
6687     const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6688     if (! oldP)
6689       break;
6690     name = poolCopyString(&(newDtd->pool), oldP->name);
6691     if (! name)
6692       return 0;
6693     if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6694       return 0;
6695   }
6696 
6697   hashTableIterInit(&iter, &(oldDtd->attributeIds));
6698 
6699   /* Copy the attribute id table. */
6700 
6701   for (;;) {
6702     ATTRIBUTE_ID *newA;
6703     const XML_Char *name;
6704     const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6705 
6706     if (! oldA)
6707       break;
6708     /* Remember to allocate the scratch byte before the name. */
6709     if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6710       return 0;
6711     name = poolCopyString(&(newDtd->pool), oldA->name);
6712     if (! name)
6713       return 0;
6714     ++name;
6715     newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6716                                   sizeof(ATTRIBUTE_ID));
6717     if (! newA)
6718       return 0;
6719     newA->maybeTokenized = oldA->maybeTokenized;
6720     if (oldA->prefix) {
6721       newA->xmlns = oldA->xmlns;
6722       if (oldA->prefix == &oldDtd->defaultPrefix)
6723         newA->prefix = &newDtd->defaultPrefix;
6724       else
6725         newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6726                                         oldA->prefix->name, 0);
6727     }
6728   }
6729 
6730   /* Copy the element type table. */
6731 
6732   hashTableIterInit(&iter, &(oldDtd->elementTypes));
6733 
6734   for (;;) {
6735     int i;
6736     ELEMENT_TYPE *newE;
6737     const XML_Char *name;
6738     const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6739     if (! oldE)
6740       break;
6741     name = poolCopyString(&(newDtd->pool), oldE->name);
6742     if (! name)
6743       return 0;
6744     newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6745                                   sizeof(ELEMENT_TYPE));
6746     if (! newE)
6747       return 0;
6748     if (oldE->nDefaultAtts) {
6749       newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn(
6750           oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6751       if (! newE->defaultAtts) {
6752         return 0;
6753       }
6754     }
6755     if (oldE->idAtt)
6756       newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
6757                                            oldE->idAtt->name, 0);
6758     newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6759     if (oldE->prefix)
6760       newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6761                                       oldE->prefix->name, 0);
6762     for (i = 0; i < newE->nDefaultAtts; i++) {
6763       newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
6764           oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6765       newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6766       if (oldE->defaultAtts[i].value) {
6767         newE->defaultAtts[i].value
6768             = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6769         if (! newE->defaultAtts[i].value)
6770           return 0;
6771       } else
6772         newE->defaultAtts[i].value = NULL;
6773     }
6774   }
6775 
6776   /* Copy the entity tables. */
6777   if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
6778                         &(oldDtd->generalEntities)))
6779     return 0;
6780 
6781 #ifdef XML_DTD
6782   if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
6783                         &(oldDtd->paramEntities)))
6784     return 0;
6785   newDtd->paramEntityRead = oldDtd->paramEntityRead;
6786 #endif /* XML_DTD */
6787 
6788   newDtd->keepProcessing = oldDtd->keepProcessing;
6789   newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6790   newDtd->standalone = oldDtd->standalone;
6791 
6792   /* Don't want deep copying for scaffolding */
6793   newDtd->in_eldecl = oldDtd->in_eldecl;
6794   newDtd->scaffold = oldDtd->scaffold;
6795   newDtd->contentStringLen = oldDtd->contentStringLen;
6796   newDtd->scaffSize = oldDtd->scaffSize;
6797   newDtd->scaffLevel = oldDtd->scaffLevel;
6798   newDtd->scaffIndex = oldDtd->scaffIndex;
6799 
6800   return 1;
6801 } /* End dtdCopy */
6802 
6803 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)6804 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
6805                 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
6806   HASH_TABLE_ITER iter;
6807   const XML_Char *cachedOldBase = NULL;
6808   const XML_Char *cachedNewBase = NULL;
6809 
6810   hashTableIterInit(&iter, oldTable);
6811 
6812   for (;;) {
6813     ENTITY *newE;
6814     const XML_Char *name;
6815     const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6816     if (! oldE)
6817       break;
6818     name = poolCopyString(newPool, oldE->name);
6819     if (! name)
6820       return 0;
6821     newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6822     if (! newE)
6823       return 0;
6824     if (oldE->systemId) {
6825       const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6826       if (! tem)
6827         return 0;
6828       newE->systemId = tem;
6829       if (oldE->base) {
6830         if (oldE->base == cachedOldBase)
6831           newE->base = cachedNewBase;
6832         else {
6833           cachedOldBase = oldE->base;
6834           tem = poolCopyString(newPool, cachedOldBase);
6835           if (! tem)
6836             return 0;
6837           cachedNewBase = newE->base = tem;
6838         }
6839       }
6840       if (oldE->publicId) {
6841         tem = poolCopyString(newPool, oldE->publicId);
6842         if (! tem)
6843           return 0;
6844         newE->publicId = tem;
6845       }
6846     } else {
6847       const XML_Char *tem
6848           = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
6849       if (! tem)
6850         return 0;
6851       newE->textPtr = tem;
6852       newE->textLen = oldE->textLen;
6853     }
6854     if (oldE->notation) {
6855       const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6856       if (! tem)
6857         return 0;
6858       newE->notation = tem;
6859     }
6860     newE->is_param = oldE->is_param;
6861     newE->is_internal = oldE->is_internal;
6862   }
6863   return 1;
6864 }
6865 
6866 #define INIT_POWER 6
6867 
6868 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)6869 keyeq(KEY s1, KEY s2) {
6870   for (; *s1 == *s2; s1++, s2++)
6871     if (*s1 == 0)
6872       return XML_TRUE;
6873   return XML_FALSE;
6874 }
6875 
6876 static size_t
keylen(KEY s)6877 keylen(KEY s) {
6878   size_t len = 0;
6879   for (; *s; s++, len++)
6880     ;
6881   return len;
6882 }
6883 
6884 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)6885 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
6886   key->k[0] = 0;
6887   key->k[1] = get_hash_secret_salt(parser);
6888 }
6889 
6890 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)6891 hash(XML_Parser parser, KEY s) {
6892   struct siphash state;
6893   struct sipkey key;
6894   (void)sip24_valid;
6895   copy_salt_to_sipkey(parser, &key);
6896   sip24_init(&state, &key);
6897   sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6898   return (unsigned long)sip24_final(&state);
6899 }
6900 
6901 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)6902 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
6903   size_t i;
6904   if (table->size == 0) {
6905     size_t tsize;
6906     if (! createSize)
6907       return NULL;
6908     table->power = INIT_POWER;
6909     /* table->size is a power of 2 */
6910     table->size = (size_t)1 << INIT_POWER;
6911     tsize = table->size * sizeof(NAMED *);
6912     table->v = (NAMED **)table->mem->malloc_fcn(tsize);
6913     if (! table->v) {
6914       table->size = 0;
6915       return NULL;
6916     }
6917     memset(table->v, 0, tsize);
6918     i = hash(parser, name) & ((unsigned long)table->size - 1);
6919   } else {
6920     unsigned long h = hash(parser, name);
6921     unsigned long mask = (unsigned long)table->size - 1;
6922     unsigned char step = 0;
6923     i = h & mask;
6924     while (table->v[i]) {
6925       if (keyeq(name, table->v[i]->name))
6926         return table->v[i];
6927       if (! step)
6928         step = PROBE_STEP(h, mask, table->power);
6929       i < step ? (i += table->size - step) : (i -= step);
6930     }
6931     if (! createSize)
6932       return NULL;
6933 
6934     /* check for overflow (table is half full) */
6935     if (table->used >> (table->power - 1)) {
6936       unsigned char newPower = table->power + 1;
6937 
6938       /* Detect and prevent invalid shift */
6939       if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
6940         return NULL;
6941       }
6942 
6943       size_t newSize = (size_t)1 << newPower;
6944       unsigned long newMask = (unsigned long)newSize - 1;
6945 
6946       /* Detect and prevent integer overflow */
6947       if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
6948         return NULL;
6949       }
6950 
6951       size_t tsize = newSize * sizeof(NAMED *);
6952       NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
6953       if (! newV)
6954         return NULL;
6955       memset(newV, 0, tsize);
6956       for (i = 0; i < table->size; i++)
6957         if (table->v[i]) {
6958           unsigned long newHash = hash(parser, table->v[i]->name);
6959           size_t j = newHash & newMask;
6960           step = 0;
6961           while (newV[j]) {
6962             if (! step)
6963               step = PROBE_STEP(newHash, newMask, newPower);
6964             j < step ? (j += newSize - step) : (j -= step);
6965           }
6966           newV[j] = table->v[i];
6967         }
6968       table->mem->free_fcn(table->v);
6969       table->v = newV;
6970       table->power = newPower;
6971       table->size = newSize;
6972       i = h & newMask;
6973       step = 0;
6974       while (table->v[i]) {
6975         if (! step)
6976           step = PROBE_STEP(h, newMask, newPower);
6977         i < step ? (i += newSize - step) : (i -= step);
6978       }
6979     }
6980   }
6981   table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
6982   if (! table->v[i])
6983     return NULL;
6984   memset(table->v[i], 0, createSize);
6985   table->v[i]->name = name;
6986   (table->used)++;
6987   return table->v[i];
6988 }
6989 
6990 static void FASTCALL
hashTableClear(HASH_TABLE * table)6991 hashTableClear(HASH_TABLE *table) {
6992   size_t i;
6993   for (i = 0; i < table->size; i++) {
6994     table->mem->free_fcn(table->v[i]);
6995     table->v[i] = NULL;
6996   }
6997   table->used = 0;
6998 }
6999 
7000 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7001 hashTableDestroy(HASH_TABLE *table) {
7002   size_t i;
7003   for (i = 0; i < table->size; i++)
7004     table->mem->free_fcn(table->v[i]);
7005   table->mem->free_fcn(table->v);
7006 }
7007 
7008 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7009 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7010   p->power = 0;
7011   p->size = 0;
7012   p->used = 0;
7013   p->v = NULL;
7014   p->mem = ms;
7015 }
7016 
7017 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7018 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7019   iter->p = table->v;
7020   iter->end = iter->p ? iter->p + table->size : NULL;
7021 }
7022 
7023 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7024 hashTableIterNext(HASH_TABLE_ITER *iter) {
7025   while (iter->p != iter->end) {
7026     NAMED *tem = *(iter->p)++;
7027     if (tem)
7028       return tem;
7029   }
7030   return NULL;
7031 }
7032 
7033 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7034 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7035   pool->blocks = NULL;
7036   pool->freeBlocks = NULL;
7037   pool->start = NULL;
7038   pool->ptr = NULL;
7039   pool->end = NULL;
7040   pool->mem = ms;
7041 }
7042 
7043 static void FASTCALL
poolClear(STRING_POOL * pool)7044 poolClear(STRING_POOL *pool) {
7045   if (! pool->freeBlocks)
7046     pool->freeBlocks = pool->blocks;
7047   else {
7048     BLOCK *p = pool->blocks;
7049     while (p) {
7050       BLOCK *tem = p->next;
7051       p->next = pool->freeBlocks;
7052       pool->freeBlocks = p;
7053       p = tem;
7054     }
7055   }
7056   pool->blocks = NULL;
7057   pool->start = NULL;
7058   pool->ptr = NULL;
7059   pool->end = NULL;
7060 }
7061 
7062 static void FASTCALL
poolDestroy(STRING_POOL * pool)7063 poolDestroy(STRING_POOL *pool) {
7064   BLOCK *p = pool->blocks;
7065   while (p) {
7066     BLOCK *tem = p->next;
7067     pool->mem->free_fcn(p);
7068     p = tem;
7069   }
7070   p = pool->freeBlocks;
7071   while (p) {
7072     BLOCK *tem = p->next;
7073     pool->mem->free_fcn(p);
7074     p = tem;
7075   }
7076 }
7077 
7078 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7079 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7080            const char *end) {
7081   if (! pool->ptr && ! poolGrow(pool))
7082     return NULL;
7083   for (;;) {
7084     const enum XML_Convert_Result convert_res = XmlConvert(
7085         enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
7086     if ((convert_res == XML_CONVERT_COMPLETED)
7087         || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7088       break;
7089     if (! poolGrow(pool))
7090       return NULL;
7091   }
7092   return pool->start;
7093 }
7094 
7095 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7096 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7097   do {
7098     if (! poolAppendChar(pool, *s))
7099       return NULL;
7100   } while (*s++);
7101   s = pool->start;
7102   poolFinish(pool);
7103   return s;
7104 }
7105 
7106 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7107 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7108   if (! pool->ptr && ! poolGrow(pool)) {
7109     /* The following line is unreachable given the current usage of
7110      * poolCopyStringN().  Currently it is called from exactly one
7111      * place to copy the text of a simple general entity.  By that
7112      * point, the name of the entity is already stored in the pool, so
7113      * pool->ptr cannot be NULL.
7114      *
7115      * If poolCopyStringN() is used elsewhere as it well might be,
7116      * this line may well become executable again.  Regardless, this
7117      * sort of check shouldn't be removed lightly, so we just exclude
7118      * it from the coverage statistics.
7119      */
7120     return NULL; /* LCOV_EXCL_LINE */
7121   }
7122   for (; n > 0; --n, s++) {
7123     if (! poolAppendChar(pool, *s))
7124       return NULL;
7125   }
7126   s = pool->start;
7127   poolFinish(pool);
7128   return s;
7129 }
7130 
7131 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7132 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7133   while (*s) {
7134     if (! poolAppendChar(pool, *s))
7135       return NULL;
7136     s++;
7137   }
7138   return pool->start;
7139 }
7140 
7141 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7142 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7143                 const char *end) {
7144   if (! poolAppend(pool, enc, ptr, end))
7145     return NULL;
7146   if (pool->ptr == pool->end && ! poolGrow(pool))
7147     return NULL;
7148   *(pool->ptr)++ = 0;
7149   return pool->start;
7150 }
7151 
7152 static size_t
poolBytesToAllocateFor(int blockSize)7153 poolBytesToAllocateFor(int blockSize) {
7154   /* Unprotected math would be:
7155   ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7156   **
7157   ** Detect overflow, avoiding _signed_ overflow undefined behavior
7158   ** For a + b * c we check b * c in isolation first, so that addition of a
7159   ** on top has no chance of making us accept a small non-negative number
7160   */
7161   const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7162 
7163   if (blockSize <= 0)
7164     return 0;
7165 
7166   if (blockSize > (int)(INT_MAX / stretch))
7167     return 0;
7168 
7169   {
7170     const int stretchedBlockSize = blockSize * (int)stretch;
7171     const int bytesToAllocate
7172         = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7173     if (bytesToAllocate < 0)
7174       return 0;
7175 
7176     return (size_t)bytesToAllocate;
7177   }
7178 }
7179 
7180 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7181 poolGrow(STRING_POOL *pool) {
7182   if (pool->freeBlocks) {
7183     if (pool->start == 0) {
7184       pool->blocks = pool->freeBlocks;
7185       pool->freeBlocks = pool->freeBlocks->next;
7186       pool->blocks->next = NULL;
7187       pool->start = pool->blocks->s;
7188       pool->end = pool->start + pool->blocks->size;
7189       pool->ptr = pool->start;
7190       return XML_TRUE;
7191     }
7192     if (pool->end - pool->start < pool->freeBlocks->size) {
7193       BLOCK *tem = pool->freeBlocks->next;
7194       pool->freeBlocks->next = pool->blocks;
7195       pool->blocks = pool->freeBlocks;
7196       pool->freeBlocks = tem;
7197       memcpy(pool->blocks->s, pool->start,
7198              (pool->end - pool->start) * sizeof(XML_Char));
7199       pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7200       pool->start = pool->blocks->s;
7201       pool->end = pool->start + pool->blocks->size;
7202       return XML_TRUE;
7203     }
7204   }
7205   if (pool->blocks && pool->start == pool->blocks->s) {
7206     BLOCK *temp;
7207     int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7208     size_t bytesToAllocate;
7209 
7210     /* NOTE: Needs to be calculated prior to calling `realloc`
7211              to avoid dangling pointers: */
7212     const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7213 
7214     if (blockSize < 0) {
7215       /* This condition traps a situation where either more than
7216        * INT_MAX/2 bytes have already been allocated.  This isn't
7217        * readily testable, since it is unlikely that an average
7218        * machine will have that much memory, so we exclude it from the
7219        * coverage statistics.
7220        */
7221       return XML_FALSE; /* LCOV_EXCL_LINE */
7222     }
7223 
7224     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7225     if (bytesToAllocate == 0)
7226       return XML_FALSE;
7227 
7228     temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7229                                            (unsigned)bytesToAllocate);
7230     if (temp == NULL)
7231       return XML_FALSE;
7232     pool->blocks = temp;
7233     pool->blocks->size = blockSize;
7234     pool->ptr = pool->blocks->s + offsetInsideBlock;
7235     pool->start = pool->blocks->s;
7236     pool->end = pool->start + blockSize;
7237   } else {
7238     BLOCK *tem;
7239     int blockSize = (int)(pool->end - pool->start);
7240     size_t bytesToAllocate;
7241 
7242     if (blockSize < 0) {
7243       /* This condition traps a situation where either more than
7244        * INT_MAX bytes have already been allocated (which is prevented
7245        * by various pieces of program logic, not least this one, never
7246        * mind the unlikelihood of actually having that much memory) or
7247        * the pool control fields have been corrupted (which could
7248        * conceivably happen in an extremely buggy user handler
7249        * function).  Either way it isn't readily testable, so we
7250        * exclude it from the coverage statistics.
7251        */
7252       return XML_FALSE; /* LCOV_EXCL_LINE */
7253     }
7254 
7255     if (blockSize < INIT_BLOCK_SIZE)
7256       blockSize = INIT_BLOCK_SIZE;
7257     else {
7258       /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7259       if ((int)((unsigned)blockSize * 2U) < 0) {
7260         return XML_FALSE;
7261       }
7262       blockSize *= 2;
7263     }
7264 
7265     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7266     if (bytesToAllocate == 0)
7267       return XML_FALSE;
7268 
7269     tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
7270     if (! tem)
7271       return XML_FALSE;
7272     tem->size = blockSize;
7273     tem->next = pool->blocks;
7274     pool->blocks = tem;
7275     if (pool->ptr != pool->start)
7276       memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7277     pool->ptr = tem->s + (pool->ptr - pool->start);
7278     pool->start = tem->s;
7279     pool->end = tem->s + blockSize;
7280   }
7281   return XML_TRUE;
7282 }
7283 
7284 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7285 nextScaffoldPart(XML_Parser parser) {
7286   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7287   CONTENT_SCAFFOLD *me;
7288   int next;
7289 
7290   if (! dtd->scaffIndex) {
7291     dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7292     if (! dtd->scaffIndex)
7293       return -1;
7294     dtd->scaffIndex[0] = 0;
7295   }
7296 
7297   if (dtd->scaffCount >= dtd->scaffSize) {
7298     CONTENT_SCAFFOLD *temp;
7299     if (dtd->scaffold) {
7300       /* Detect and prevent integer overflow */
7301       if (dtd->scaffSize > UINT_MAX / 2u) {
7302         return -1;
7303       }
7304       /* Detect and prevent integer overflow.
7305        * The preprocessor guard addresses the "always false" warning
7306        * from -Wtype-limits on platforms where
7307        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7308 #if UINT_MAX >= SIZE_MAX
7309       if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7310         return -1;
7311       }
7312 #endif
7313 
7314       temp = (CONTENT_SCAFFOLD *)REALLOC(
7315           parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7316       if (temp == NULL)
7317         return -1;
7318       dtd->scaffSize *= 2;
7319     } else {
7320       temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7321                                                     * sizeof(CONTENT_SCAFFOLD));
7322       if (temp == NULL)
7323         return -1;
7324       dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7325     }
7326     dtd->scaffold = temp;
7327   }
7328   next = dtd->scaffCount++;
7329   me = &dtd->scaffold[next];
7330   if (dtd->scaffLevel) {
7331     CONTENT_SCAFFOLD *parent
7332         = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7333     if (parent->lastchild) {
7334       dtd->scaffold[parent->lastchild].nextsib = next;
7335     }
7336     if (! parent->childcnt)
7337       parent->firstchild = next;
7338     parent->lastchild = next;
7339     parent->childcnt++;
7340   }
7341   me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7342   return next;
7343 }
7344 
7345 static XML_Content *
build_model(XML_Parser parser)7346 build_model(XML_Parser parser) {
7347   /* Function build_model transforms the existing parser->m_dtd->scaffold
7348    * array of CONTENT_SCAFFOLD tree nodes into a new array of
7349    * XML_Content tree nodes followed by a gapless list of zero-terminated
7350    * strings. */
7351   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7352   XML_Content *ret;
7353   XML_Char *str; /* the current string writing location */
7354 
7355   /* Detect and prevent integer overflow.
7356    * The preprocessor guard addresses the "always false" warning
7357    * from -Wtype-limits on platforms where
7358    * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7359 #if UINT_MAX >= SIZE_MAX
7360   if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7361     return NULL;
7362   }
7363   if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7364     return NULL;
7365   }
7366 #endif
7367   if (dtd->scaffCount * sizeof(XML_Content)
7368       > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7369     return NULL;
7370   }
7371 
7372   const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7373                             + (dtd->contentStringLen * sizeof(XML_Char)));
7374 
7375   ret = (XML_Content *)MALLOC(parser, allocsize);
7376   if (! ret)
7377     return NULL;
7378 
7379   /* What follows is an iterative implementation (of what was previously done
7380    * recursively in a dedicated function called "build_node".  The old recursive
7381    * build_node could be forced into stack exhaustion from input as small as a
7382    * few megabyte, and so that was a security issue.  Hence, a function call
7383    * stack is avoided now by resolving recursion.)
7384    *
7385    * The iterative approach works as follows:
7386    *
7387    * - We use space in the target array for building a temporary stack structure
7388    *   while that space is still unused.
7389    *   The stack grows from the array's end downwards and the "actual data"
7390    *   grows from the start upwards, sequentially.
7391    *   (Because stack grows downwards, pushing onto the stack is a decrement
7392    *   while popping off the stack is an increment.)
7393    *
7394    * - A stack element appears as a regular XML_Content node on the outside,
7395    *   but only uses a single field -- numchildren -- to store the source
7396    *   tree node array index.  These are the breadcrumbs leading the way back
7397    *   during pre-order (node first) depth-first traversal.
7398    *
7399    * - The reason we know the stack will never grow into (or overlap with)
7400    *   the area with data of value at the start of the array is because
7401    *   the overall number of elements to process matches the size of the array,
7402    *   and the sum of fully processed nodes and yet-to-be processed nodes
7403    *   on the stack, cannot be more than the total number of nodes.
7404    *   It is possible for the top of the stack and the about-to-write node
7405    *   to meet, but that is safe because we get the source index out
7406    *   before doing any writes on that node.
7407    */
7408   XML_Content *dest = ret; /* tree node writing location, moves upwards */
7409   XML_Content *const destLimit = &ret[dtd->scaffCount];
7410   XML_Content *const stackBottom = &ret[dtd->scaffCount];
7411   XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
7412   str = (XML_Char *)&ret[dtd->scaffCount];
7413 
7414   /* Push source tree root node index onto the stack */
7415   (--stackTop)->numchildren = 0;
7416 
7417   for (; dest < destLimit; dest++) {
7418     /* Pop source tree node index off the stack */
7419     const int src_node = (int)(stackTop++)->numchildren;
7420 
7421     /* Convert item */
7422     dest->type = dtd->scaffold[src_node].type;
7423     dest->quant = dtd->scaffold[src_node].quant;
7424     if (dest->type == XML_CTYPE_NAME) {
7425       const XML_Char *src;
7426       dest->name = str;
7427       src = dtd->scaffold[src_node].name;
7428       for (;;) {
7429         *str++ = *src;
7430         if (! *src)
7431           break;
7432         src++;
7433       }
7434       dest->numchildren = 0;
7435       dest->children = NULL;
7436     } else {
7437       unsigned int i;
7438       int cn;
7439       dest->name = NULL;
7440       dest->numchildren = dtd->scaffold[src_node].childcnt;
7441       dest->children = &dest[1];
7442 
7443       /* Push children to the stack
7444        * in a way where the first child ends up at the top of the
7445        * (downwards growing) stack, in order to be processed first. */
7446       stackTop -= dest->numchildren;
7447       for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7448            i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
7449         (stackTop + i)->numchildren = (unsigned int)cn;
7450       }
7451     }
7452   }
7453 
7454   return ret;
7455 }
7456 
7457 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7458 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7459                const char *end) {
7460   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7461   const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7462   ELEMENT_TYPE *ret;
7463 
7464   if (! name)
7465     return NULL;
7466   ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7467                                sizeof(ELEMENT_TYPE));
7468   if (! ret)
7469     return NULL;
7470   if (ret->name != name)
7471     poolDiscard(&dtd->pool);
7472   else {
7473     poolFinish(&dtd->pool);
7474     if (! setElementTypePrefix(parser, ret))
7475       return NULL;
7476   }
7477   return ret;
7478 }
7479 
7480 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7481 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7482   size_t charsRequired = 0;
7483   XML_Char *result;
7484 
7485   /* First determine how long the string is */
7486   while (s[charsRequired] != 0) {
7487     charsRequired++;
7488   }
7489   /* Include the terminator */
7490   charsRequired++;
7491 
7492   /* Now allocate space for the copy */
7493   result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7494   if (result == NULL)
7495     return NULL;
7496   /* Copy the original into place */
7497   memcpy(result, s, charsRequired * sizeof(XML_Char));
7498   return result;
7499 }
7500 
7501 #ifdef XML_DTD
7502 
7503 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7504 accountingGetCurrentAmplification(XML_Parser rootParser) {
7505   const XmlBigCount countBytesOutput
7506       = rootParser->m_accounting.countBytesDirect
7507         + rootParser->m_accounting.countBytesIndirect;
7508   const float amplificationFactor
7509       = rootParser->m_accounting.countBytesDirect
7510             ? (countBytesOutput
7511                / (float)(rootParser->m_accounting.countBytesDirect))
7512             : 1.0f;
7513   assert(! rootParser->m_parentParser);
7514   return amplificationFactor;
7515 }
7516 
7517 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7518 accountingReportStats(XML_Parser originParser, const char *epilog) {
7519   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7520   assert(! rootParser->m_parentParser);
7521 
7522   if (rootParser->m_accounting.debugLevel < 1) {
7523     return;
7524   }
7525 
7526   const float amplificationFactor
7527       = accountingGetCurrentAmplification(rootParser);
7528   fprintf(stderr,
7529           "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7530               "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7531           (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7532           rootParser->m_accounting.countBytesIndirect,
7533           (double)amplificationFactor, epilog);
7534 }
7535 
7536 static void
accountingOnAbort(XML_Parser originParser)7537 accountingOnAbort(XML_Parser originParser) {
7538   accountingReportStats(originParser, " ABORTING\n");
7539 }
7540 
7541 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7542 accountingReportDiff(XML_Parser rootParser,
7543                      unsigned int levelsAwayFromRootParser, const char *before,
7544                      const char *after, ptrdiff_t bytesMore, int source_line,
7545                      enum XML_Account account) {
7546   assert(! rootParser->m_parentParser);
7547 
7548   fprintf(stderr,
7549           " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7550           bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7551           levelsAwayFromRootParser, source_line, 10, "");
7552 
7553   const char ellipis[] = "[..]";
7554   const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7555   const unsigned int contextLength = 10;
7556 
7557   /* Note: Performance is of no concern here */
7558   const char *walker = before;
7559   if ((rootParser->m_accounting.debugLevel >= 3)
7560       || (after - before)
7561              <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7562     for (; walker < after; walker++) {
7563       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7564     }
7565   } else {
7566     for (; walker < before + contextLength; walker++) {
7567       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7568     }
7569     fprintf(stderr, ellipis);
7570     walker = after - contextLength;
7571     for (; walker < after; walker++) {
7572       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7573     }
7574   }
7575   fprintf(stderr, "\"\n");
7576 }
7577 
7578 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7579 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7580                         const char *after, int source_line,
7581                         enum XML_Account account) {
7582   /* Note: We need to check the token type *first* to be sure that
7583    *       we can even access variable <after>, safely.
7584    *       E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7585   switch (tok) {
7586   case XML_TOK_INVALID:
7587   case XML_TOK_PARTIAL:
7588   case XML_TOK_PARTIAL_CHAR:
7589   case XML_TOK_NONE:
7590     return XML_TRUE;
7591   }
7592 
7593   if (account == XML_ACCOUNT_NONE)
7594     return XML_TRUE; /* because these bytes have been accounted for, already */
7595 
7596   unsigned int levelsAwayFromRootParser;
7597   const XML_Parser rootParser
7598       = getRootParserOf(originParser, &levelsAwayFromRootParser);
7599   assert(! rootParser->m_parentParser);
7600 
7601   const int isDirect
7602       = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7603   const ptrdiff_t bytesMore = after - before;
7604 
7605   XmlBigCount *const additionTarget
7606       = isDirect ? &rootParser->m_accounting.countBytesDirect
7607                  : &rootParser->m_accounting.countBytesIndirect;
7608 
7609   /* Detect and avoid integer overflow */
7610   if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7611     return XML_FALSE;
7612   *additionTarget += bytesMore;
7613 
7614   const XmlBigCount countBytesOutput
7615       = rootParser->m_accounting.countBytesDirect
7616         + rootParser->m_accounting.countBytesIndirect;
7617   const float amplificationFactor
7618       = accountingGetCurrentAmplification(rootParser);
7619   const XML_Bool tolerated
7620       = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7621         || (amplificationFactor
7622             <= rootParser->m_accounting.maximumAmplificationFactor);
7623 
7624   if (rootParser->m_accounting.debugLevel >= 2) {
7625     accountingReportStats(rootParser, "");
7626     accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7627                          bytesMore, source_line, account);
7628   }
7629 
7630   return tolerated;
7631 }
7632 
7633 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7634 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7635   if (! parser)
7636     return 0;
7637   return parser->m_accounting.countBytesDirect;
7638 }
7639 
7640 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7641 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7642   if (! parser)
7643     return 0;
7644   return parser->m_accounting.countBytesIndirect;
7645 }
7646 
7647 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7648 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7649                           const char *action, int sourceLine) {
7650   assert(! rootParser->m_parentParser);
7651   if (rootParser->m_entity_stats.debugLevel < 1)
7652     return;
7653 
7654 #  if defined(XML_UNICODE)
7655   const char *const entityName = "[..]";
7656 #  else
7657   const char *const entityName = entity->name;
7658 #  endif
7659 
7660   fprintf(
7661       stderr,
7662       "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7663       (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7664       rootParser->m_entity_stats.currentDepth,
7665       rootParser->m_entity_stats.maximumDepthSeen,
7666       (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7667       entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7668       sourceLine);
7669 }
7670 
7671 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7672 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7673   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7674   assert(! rootParser->m_parentParser);
7675 
7676   rootParser->m_entity_stats.countEverOpened++;
7677   rootParser->m_entity_stats.currentDepth++;
7678   if (rootParser->m_entity_stats.currentDepth
7679       > rootParser->m_entity_stats.maximumDepthSeen) {
7680     rootParser->m_entity_stats.maximumDepthSeen++;
7681   }
7682 
7683   entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7684 }
7685 
7686 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7687 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7688   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7689   assert(! rootParser->m_parentParser);
7690 
7691   entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7692   rootParser->m_entity_stats.currentDepth--;
7693 }
7694 
7695 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)7696 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7697   XML_Parser rootParser = parser;
7698   unsigned int stepsTakenUpwards = 0;
7699   while (rootParser->m_parentParser) {
7700     rootParser = rootParser->m_parentParser;
7701     stepsTakenUpwards++;
7702   }
7703   assert(! rootParser->m_parentParser);
7704   if (outLevelDiff != NULL) {
7705     *outLevelDiff = stepsTakenUpwards;
7706   }
7707   return rootParser;
7708 }
7709 
7710 const char *
unsignedCharToPrintable(unsigned char c)7711 unsignedCharToPrintable(unsigned char c) {
7712   switch (c) {
7713   case 0:
7714     return "\\0";
7715   case 1:
7716     return "\\x1";
7717   case 2:
7718     return "\\x2";
7719   case 3:
7720     return "\\x3";
7721   case 4:
7722     return "\\x4";
7723   case 5:
7724     return "\\x5";
7725   case 6:
7726     return "\\x6";
7727   case 7:
7728     return "\\x7";
7729   case 8:
7730     return "\\x8";
7731   case 9:
7732     return "\\t";
7733   case 10:
7734     return "\\n";
7735   case 11:
7736     return "\\xB";
7737   case 12:
7738     return "\\xC";
7739   case 13:
7740     return "\\r";
7741   case 14:
7742     return "\\xE";
7743   case 15:
7744     return "\\xF";
7745   case 16:
7746     return "\\x10";
7747   case 17:
7748     return "\\x11";
7749   case 18:
7750     return "\\x12";
7751   case 19:
7752     return "\\x13";
7753   case 20:
7754     return "\\x14";
7755   case 21:
7756     return "\\x15";
7757   case 22:
7758     return "\\x16";
7759   case 23:
7760     return "\\x17";
7761   case 24:
7762     return "\\x18";
7763   case 25:
7764     return "\\x19";
7765   case 26:
7766     return "\\x1A";
7767   case 27:
7768     return "\\x1B";
7769   case 28:
7770     return "\\x1C";
7771   case 29:
7772     return "\\x1D";
7773   case 30:
7774     return "\\x1E";
7775   case 31:
7776     return "\\x1F";
7777   case 32:
7778     return " ";
7779   case 33:
7780     return "!";
7781   case 34:
7782     return "\\\"";
7783   case 35:
7784     return "#";
7785   case 36:
7786     return "$";
7787   case 37:
7788     return "%";
7789   case 38:
7790     return "&";
7791   case 39:
7792     return "'";
7793   case 40:
7794     return "(";
7795   case 41:
7796     return ")";
7797   case 42:
7798     return "*";
7799   case 43:
7800     return "+";
7801   case 44:
7802     return ",";
7803   case 45:
7804     return "-";
7805   case 46:
7806     return ".";
7807   case 47:
7808     return "/";
7809   case 48:
7810     return "0";
7811   case 49:
7812     return "1";
7813   case 50:
7814     return "2";
7815   case 51:
7816     return "3";
7817   case 52:
7818     return "4";
7819   case 53:
7820     return "5";
7821   case 54:
7822     return "6";
7823   case 55:
7824     return "7";
7825   case 56:
7826     return "8";
7827   case 57:
7828     return "9";
7829   case 58:
7830     return ":";
7831   case 59:
7832     return ";";
7833   case 60:
7834     return "<";
7835   case 61:
7836     return "=";
7837   case 62:
7838     return ">";
7839   case 63:
7840     return "?";
7841   case 64:
7842     return "@";
7843   case 65:
7844     return "A";
7845   case 66:
7846     return "B";
7847   case 67:
7848     return "C";
7849   case 68:
7850     return "D";
7851   case 69:
7852     return "E";
7853   case 70:
7854     return "F";
7855   case 71:
7856     return "G";
7857   case 72:
7858     return "H";
7859   case 73:
7860     return "I";
7861   case 74:
7862     return "J";
7863   case 75:
7864     return "K";
7865   case 76:
7866     return "L";
7867   case 77:
7868     return "M";
7869   case 78:
7870     return "N";
7871   case 79:
7872     return "O";
7873   case 80:
7874     return "P";
7875   case 81:
7876     return "Q";
7877   case 82:
7878     return "R";
7879   case 83:
7880     return "S";
7881   case 84:
7882     return "T";
7883   case 85:
7884     return "U";
7885   case 86:
7886     return "V";
7887   case 87:
7888     return "W";
7889   case 88:
7890     return "X";
7891   case 89:
7892     return "Y";
7893   case 90:
7894     return "Z";
7895   case 91:
7896     return "[";
7897   case 92:
7898     return "\\\\";
7899   case 93:
7900     return "]";
7901   case 94:
7902     return "^";
7903   case 95:
7904     return "_";
7905   case 96:
7906     return "`";
7907   case 97:
7908     return "a";
7909   case 98:
7910     return "b";
7911   case 99:
7912     return "c";
7913   case 100:
7914     return "d";
7915   case 101:
7916     return "e";
7917   case 102:
7918     return "f";
7919   case 103:
7920     return "g";
7921   case 104:
7922     return "h";
7923   case 105:
7924     return "i";
7925   case 106:
7926     return "j";
7927   case 107:
7928     return "k";
7929   case 108:
7930     return "l";
7931   case 109:
7932     return "m";
7933   case 110:
7934     return "n";
7935   case 111:
7936     return "o";
7937   case 112:
7938     return "p";
7939   case 113:
7940     return "q";
7941   case 114:
7942     return "r";
7943   case 115:
7944     return "s";
7945   case 116:
7946     return "t";
7947   case 117:
7948     return "u";
7949   case 118:
7950     return "v";
7951   case 119:
7952     return "w";
7953   case 120:
7954     return "x";
7955   case 121:
7956     return "y";
7957   case 122:
7958     return "z";
7959   case 123:
7960     return "{";
7961   case 124:
7962     return "|";
7963   case 125:
7964     return "}";
7965   case 126:
7966     return "~";
7967   case 127:
7968     return "\\x7F";
7969   case 128:
7970     return "\\x80";
7971   case 129:
7972     return "\\x81";
7973   case 130:
7974     return "\\x82";
7975   case 131:
7976     return "\\x83";
7977   case 132:
7978     return "\\x84";
7979   case 133:
7980     return "\\x85";
7981   case 134:
7982     return "\\x86";
7983   case 135:
7984     return "\\x87";
7985   case 136:
7986     return "\\x88";
7987   case 137:
7988     return "\\x89";
7989   case 138:
7990     return "\\x8A";
7991   case 139:
7992     return "\\x8B";
7993   case 140:
7994     return "\\x8C";
7995   case 141:
7996     return "\\x8D";
7997   case 142:
7998     return "\\x8E";
7999   case 143:
8000     return "\\x8F";
8001   case 144:
8002     return "\\x90";
8003   case 145:
8004     return "\\x91";
8005   case 146:
8006     return "\\x92";
8007   case 147:
8008     return "\\x93";
8009   case 148:
8010     return "\\x94";
8011   case 149:
8012     return "\\x95";
8013   case 150:
8014     return "\\x96";
8015   case 151:
8016     return "\\x97";
8017   case 152:
8018     return "\\x98";
8019   case 153:
8020     return "\\x99";
8021   case 154:
8022     return "\\x9A";
8023   case 155:
8024     return "\\x9B";
8025   case 156:
8026     return "\\x9C";
8027   case 157:
8028     return "\\x9D";
8029   case 158:
8030     return "\\x9E";
8031   case 159:
8032     return "\\x9F";
8033   case 160:
8034     return "\\xA0";
8035   case 161:
8036     return "\\xA1";
8037   case 162:
8038     return "\\xA2";
8039   case 163:
8040     return "\\xA3";
8041   case 164:
8042     return "\\xA4";
8043   case 165:
8044     return "\\xA5";
8045   case 166:
8046     return "\\xA6";
8047   case 167:
8048     return "\\xA7";
8049   case 168:
8050     return "\\xA8";
8051   case 169:
8052     return "\\xA9";
8053   case 170:
8054     return "\\xAA";
8055   case 171:
8056     return "\\xAB";
8057   case 172:
8058     return "\\xAC";
8059   case 173:
8060     return "\\xAD";
8061   case 174:
8062     return "\\xAE";
8063   case 175:
8064     return "\\xAF";
8065   case 176:
8066     return "\\xB0";
8067   case 177:
8068     return "\\xB1";
8069   case 178:
8070     return "\\xB2";
8071   case 179:
8072     return "\\xB3";
8073   case 180:
8074     return "\\xB4";
8075   case 181:
8076     return "\\xB5";
8077   case 182:
8078     return "\\xB6";
8079   case 183:
8080     return "\\xB7";
8081   case 184:
8082     return "\\xB8";
8083   case 185:
8084     return "\\xB9";
8085   case 186:
8086     return "\\xBA";
8087   case 187:
8088     return "\\xBB";
8089   case 188:
8090     return "\\xBC";
8091   case 189:
8092     return "\\xBD";
8093   case 190:
8094     return "\\xBE";
8095   case 191:
8096     return "\\xBF";
8097   case 192:
8098     return "\\xC0";
8099   case 193:
8100     return "\\xC1";
8101   case 194:
8102     return "\\xC2";
8103   case 195:
8104     return "\\xC3";
8105   case 196:
8106     return "\\xC4";
8107   case 197:
8108     return "\\xC5";
8109   case 198:
8110     return "\\xC6";
8111   case 199:
8112     return "\\xC7";
8113   case 200:
8114     return "\\xC8";
8115   case 201:
8116     return "\\xC9";
8117   case 202:
8118     return "\\xCA";
8119   case 203:
8120     return "\\xCB";
8121   case 204:
8122     return "\\xCC";
8123   case 205:
8124     return "\\xCD";
8125   case 206:
8126     return "\\xCE";
8127   case 207:
8128     return "\\xCF";
8129   case 208:
8130     return "\\xD0";
8131   case 209:
8132     return "\\xD1";
8133   case 210:
8134     return "\\xD2";
8135   case 211:
8136     return "\\xD3";
8137   case 212:
8138     return "\\xD4";
8139   case 213:
8140     return "\\xD5";
8141   case 214:
8142     return "\\xD6";
8143   case 215:
8144     return "\\xD7";
8145   case 216:
8146     return "\\xD8";
8147   case 217:
8148     return "\\xD9";
8149   case 218:
8150     return "\\xDA";
8151   case 219:
8152     return "\\xDB";
8153   case 220:
8154     return "\\xDC";
8155   case 221:
8156     return "\\xDD";
8157   case 222:
8158     return "\\xDE";
8159   case 223:
8160     return "\\xDF";
8161   case 224:
8162     return "\\xE0";
8163   case 225:
8164     return "\\xE1";
8165   case 226:
8166     return "\\xE2";
8167   case 227:
8168     return "\\xE3";
8169   case 228:
8170     return "\\xE4";
8171   case 229:
8172     return "\\xE5";
8173   case 230:
8174     return "\\xE6";
8175   case 231:
8176     return "\\xE7";
8177   case 232:
8178     return "\\xE8";
8179   case 233:
8180     return "\\xE9";
8181   case 234:
8182     return "\\xEA";
8183   case 235:
8184     return "\\xEB";
8185   case 236:
8186     return "\\xEC";
8187   case 237:
8188     return "\\xED";
8189   case 238:
8190     return "\\xEE";
8191   case 239:
8192     return "\\xEF";
8193   case 240:
8194     return "\\xF0";
8195   case 241:
8196     return "\\xF1";
8197   case 242:
8198     return "\\xF2";
8199   case 243:
8200     return "\\xF3";
8201   case 244:
8202     return "\\xF4";
8203   case 245:
8204     return "\\xF5";
8205   case 246:
8206     return "\\xF6";
8207   case 247:
8208     return "\\xF7";
8209   case 248:
8210     return "\\xF8";
8211   case 249:
8212     return "\\xF9";
8213   case 250:
8214     return "\\xFA";
8215   case 251:
8216     return "\\xFB";
8217   case 252:
8218     return "\\xFC";
8219   case 253:
8220     return "\\xFD";
8221   case 254:
8222     return "\\xFE";
8223   case 255:
8224     return "\\xFF";
8225   default:
8226     assert(0); /* never gets here */
8227     return "dead code";
8228   }
8229   assert(0); /* never gets here */
8230 }
8231 
8232 #endif /* XML_DTD */
8233 
8234 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8235 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8236   const char *const valueOrNull = getenv(variableName);
8237   if (valueOrNull == NULL) {
8238     return defaultDebugLevel;
8239   }
8240   const char *const value = valueOrNull;
8241 
8242   errno = 0;
8243   char *afterValue = (char *)value;
8244   unsigned long debugLevel = strtoul(value, &afterValue, 10);
8245   if ((errno != 0) || (afterValue[0] != '\0')) {
8246     errno = 0;
8247     return defaultDebugLevel;
8248   }
8249 
8250   return debugLevel;
8251 }
8252