1 /* 19ac4776051591216f1874e34ee99b6a43a3784c8bd7d70efeb9258dd22b906a (2.2.6+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #if !defined(_GNU_SOURCE)
34 # define _GNU_SOURCE 1 /* syscall prototype */
35 #endif
36
37 #include <stddef.h>
38 #include <string.h> /* memset(), memcpy() */
39 #include <assert.h>
40 #include <limits.h> /* UINT_MAX */
41 #include <stdio.h> /* fprintf */
42 #include <stdlib.h> /* getenv */
43
44 #ifdef _WIN32
45 #define getpid GetCurrentProcessId
46 #else
47 #include <sys/time.h> /* gettimeofday() */
48 #include <sys/types.h> /* getpid() */
49 #include <unistd.h> /* getpid() */
50 #include <fcntl.h> /* O_RDONLY */
51 #include <errno.h>
52 #endif
53
54 #define XML_BUILDING_EXPAT 1
55
56 #ifdef _WIN32
57 #include "winconfig.h"
58 #elif defined(HAVE_EXPAT_CONFIG_H)
59 #include <expat_config.h>
60 #endif /* ndef _WIN32 */
61
62 #include "ascii.h"
63 #include "expat.h"
64 #include "siphash.h"
65
66 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
67 # if defined(HAVE_GETRANDOM)
68 # include <sys/random.h> /* getrandom */
69 # else
70 # include <unistd.h> /* syscall */
71 # include <sys/syscall.h> /* SYS_getrandom */
72 # endif
73 # if ! defined(GRND_NONBLOCK)
74 # define GRND_NONBLOCK 0x0001
75 # endif /* defined(GRND_NONBLOCK) */
76 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
77
78 #if defined(HAVE_LIBBSD) \
79 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
80 # include <bsd/stdlib.h>
81 #endif
82
83 #if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
84 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
85 #endif
86
87 #if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
88 && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
89 && !defined(XML_DEV_URANDOM) \
90 && !defined(_WIN32) \
91 && !defined(XML_POOR_ENTROPY)
92 # error \
93 You do not have support for any sources of high quality entropy \
94 enabled. For end user security, that is probably not what you want. \
95 \
96 Your options include: \
97 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
98 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
99 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
100 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
101 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
102 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
103 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
104 * Windows (RtlGenRandom): _WIN32. \
105 \
106 If insist on not using any of these, bypass this error by defining \
107 XML_POOR_ENTROPY; you have been warned. \
108 \
109 If you have reasons to patch this detection code away or need changes \
110 to the build system, please open a bug. Thank you!
111 #endif
112
113
114 #ifdef XML_UNICODE
115 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
116 #define XmlConvert XmlUtf16Convert
117 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
118 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
119 #define XmlEncode XmlUtf16Encode
120 /* Using pointer subtraction to convert to integer type. */
121 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
122 typedef unsigned short ICHAR;
123 #else
124 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
125 #define XmlConvert XmlUtf8Convert
126 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
127 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
128 #define XmlEncode XmlUtf8Encode
129 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
130 typedef char ICHAR;
131 #endif
132
133
134 #ifndef XML_NS
135
136 #define XmlInitEncodingNS XmlInitEncoding
137 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
138 #undef XmlGetInternalEncodingNS
139 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
140 #define XmlParseXmlDeclNS XmlParseXmlDecl
141
142 #endif
143
144 #ifdef XML_UNICODE
145
146 #ifdef XML_UNICODE_WCHAR_T
147 #define XML_T(x) (const wchar_t)x
148 #define XML_L(x) L ## x
149 #else
150 #define XML_T(x) (const unsigned short)x
151 #define XML_L(x) x
152 #endif
153
154 #else
155
156 #define XML_T(x) x
157 #define XML_L(x) x
158
159 #endif
160
161 /* Round up n to be a multiple of sz, where sz is a power of 2. */
162 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
163
164 /* Do safe (NULL-aware) pointer arithmetic */
165 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
166
167 /* Handle the case where memmove() doesn't exist. */
168 #ifndef HAVE_MEMMOVE
169 #ifdef HAVE_BCOPY
170 #define memmove(d,s,l) bcopy((s),(d),(l))
171 #else
172 #error memmove does not exist on this platform, nor is a substitute available
173 #endif /* HAVE_BCOPY */
174 #endif /* HAVE_MEMMOVE */
175
176 #include "internal.h"
177 #include "xmltok.h"
178 #include "xmlrole.h"
179
180 typedef const XML_Char *KEY;
181
182 typedef struct {
183 KEY name;
184 } NAMED;
185
186 typedef struct {
187 NAMED **v;
188 unsigned char power;
189 size_t size;
190 size_t used;
191 const XML_Memory_Handling_Suite *mem;
192 } HASH_TABLE;
193
194 static size_t
195 keylen(KEY s);
196
197 static void
198 copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
199
200 /* For probing (after a collision) we need a step size relative prime
201 to the hash table size, which is a power of 2. We use double-hashing,
202 since we can calculate a second hash value cheaply by taking those bits
203 of the first hash value that were discarded (masked out) when the table
204 index was calculated: index = hash & mask, where mask = table->size - 1.
205 We limit the maximum step size to table->size / 4 (mask >> 2) and make
206 it odd, since odd numbers are always relative prime to a power of 2.
207 */
208 #define SECOND_HASH(hash, mask, power) \
209 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
210 #define PROBE_STEP(hash, mask, power) \
211 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
212
213 typedef struct {
214 NAMED **p;
215 NAMED **end;
216 } HASH_TABLE_ITER;
217
218 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
219 #define INIT_DATA_BUF_SIZE 1024
220 #define INIT_ATTS_SIZE 16
221 #define INIT_ATTS_VERSION 0xFFFFFFFF
222 #define INIT_BLOCK_SIZE 1024
223 #define INIT_BUFFER_SIZE 1024
224
225 #define EXPAND_SPARE 24
226
227 typedef struct binding {
228 struct prefix *prefix;
229 struct binding *nextTagBinding;
230 struct binding *prevPrefixBinding;
231 const struct attribute_id *attId;
232 XML_Char *uri;
233 int uriLen;
234 int uriAlloc;
235 } BINDING;
236
237 typedef struct prefix {
238 const XML_Char *name;
239 BINDING *binding;
240 } PREFIX;
241
242 typedef struct {
243 const XML_Char *str;
244 const XML_Char *localPart;
245 const XML_Char *prefix;
246 int strLen;
247 int uriLen;
248 int prefixLen;
249 } TAG_NAME;
250
251 /* TAG represents an open element.
252 The name of the element is stored in both the document and API
253 encodings. The memory buffer 'buf' is a separately-allocated
254 memory area which stores the name. During the XML_Parse()/
255 XMLParseBuffer() when the element is open, the memory for the 'raw'
256 version of the name (in the document encoding) is shared with the
257 document buffer. If the element is open across calls to
258 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
259 contain the 'raw' name as well.
260
261 A parser re-uses these structures, maintaining a list of allocated
262 TAG objects in a free list.
263 */
264 typedef struct tag {
265 struct tag *parent; /* parent of this element */
266 const char *rawName; /* tagName in the original encoding */
267 int rawNameLength;
268 TAG_NAME name; /* tagName in the API encoding */
269 char *buf; /* buffer for name components */
270 char *bufEnd; /* end of the buffer */
271 BINDING *bindings;
272 } TAG;
273
274 typedef struct {
275 const XML_Char *name;
276 const XML_Char *textPtr;
277 int textLen; /* length in XML_Chars */
278 int processed; /* # of processed bytes - when suspended */
279 const XML_Char *systemId;
280 const XML_Char *base;
281 const XML_Char *publicId;
282 const XML_Char *notation;
283 XML_Bool open;
284 XML_Bool is_param;
285 XML_Bool is_internal; /* true if declared in internal subset outside PE */
286 } ENTITY;
287
288 typedef struct {
289 enum XML_Content_Type type;
290 enum XML_Content_Quant quant;
291 const XML_Char * name;
292 int firstchild;
293 int lastchild;
294 int childcnt;
295 int nextsib;
296 } CONTENT_SCAFFOLD;
297
298 #define INIT_SCAFFOLD_ELEMENTS 32
299
300 typedef struct block {
301 struct block *next;
302 int size;
303 XML_Char s[1];
304 } BLOCK;
305
306 typedef struct {
307 BLOCK *blocks;
308 BLOCK *freeBlocks;
309 const XML_Char *end;
310 XML_Char *ptr;
311 XML_Char *start;
312 const XML_Memory_Handling_Suite *mem;
313 } STRING_POOL;
314
315 /* The XML_Char before the name is used to determine whether
316 an attribute has been specified. */
317 typedef struct attribute_id {
318 XML_Char *name;
319 PREFIX *prefix;
320 XML_Bool maybeTokenized;
321 XML_Bool xmlns;
322 } ATTRIBUTE_ID;
323
324 typedef struct {
325 const ATTRIBUTE_ID *id;
326 XML_Bool isCdata;
327 const XML_Char *value;
328 } DEFAULT_ATTRIBUTE;
329
330 typedef struct {
331 unsigned long version;
332 unsigned long hash;
333 const XML_Char *uriName;
334 } NS_ATT;
335
336 typedef struct {
337 const XML_Char *name;
338 PREFIX *prefix;
339 const ATTRIBUTE_ID *idAtt;
340 int nDefaultAtts;
341 int allocDefaultAtts;
342 DEFAULT_ATTRIBUTE *defaultAtts;
343 } ELEMENT_TYPE;
344
345 typedef struct {
346 HASH_TABLE generalEntities;
347 HASH_TABLE elementTypes;
348 HASH_TABLE attributeIds;
349 HASH_TABLE prefixes;
350 STRING_POOL pool;
351 STRING_POOL entityValuePool;
352 /* false once a parameter entity reference has been skipped */
353 XML_Bool keepProcessing;
354 /* true once an internal or external PE reference has been encountered;
355 this includes the reference to an external subset */
356 XML_Bool hasParamEntityRefs;
357 XML_Bool standalone;
358 #ifdef XML_DTD
359 /* indicates if external PE has been read */
360 XML_Bool paramEntityRead;
361 HASH_TABLE paramEntities;
362 #endif /* XML_DTD */
363 PREFIX defaultPrefix;
364 /* === scaffolding for building content model === */
365 XML_Bool in_eldecl;
366 CONTENT_SCAFFOLD *scaffold;
367 unsigned contentStringLen;
368 unsigned scaffSize;
369 unsigned scaffCount;
370 int scaffLevel;
371 int *scaffIndex;
372 } DTD;
373
374 typedef struct open_internal_entity {
375 const char *internalEventPtr;
376 const char *internalEventEndPtr;
377 struct open_internal_entity *next;
378 ENTITY *entity;
379 int startTagLevel;
380 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
381 } OPEN_INTERNAL_ENTITY;
382
383 typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
384 const char *start,
385 const char *end,
386 const char **endPtr);
387
388 static Processor prologProcessor;
389 static Processor prologInitProcessor;
390 static Processor contentProcessor;
391 static Processor cdataSectionProcessor;
392 #ifdef XML_DTD
393 static Processor ignoreSectionProcessor;
394 static Processor externalParEntProcessor;
395 static Processor externalParEntInitProcessor;
396 static Processor entityValueProcessor;
397 static Processor entityValueInitProcessor;
398 #endif /* XML_DTD */
399 static Processor epilogProcessor;
400 static Processor errorProcessor;
401 static Processor externalEntityInitProcessor;
402 static Processor externalEntityInitProcessor2;
403 static Processor externalEntityInitProcessor3;
404 static Processor externalEntityContentProcessor;
405 static Processor internalEntityProcessor;
406
407 static enum XML_Error
408 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
409 static enum XML_Error
410 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
411 const char *s, const char *next);
412 static enum XML_Error
413 initializeEncoding(XML_Parser parser);
414 static enum XML_Error
415 doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
416 const char *end, int tok, const char *next, const char **nextPtr,
417 XML_Bool haveMore);
418 static enum XML_Error
419 processInternalEntity(XML_Parser parser, ENTITY *entity,
420 XML_Bool betweenDecl);
421 static enum XML_Error
422 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
423 const char *start, const char *end, const char **endPtr,
424 XML_Bool haveMore);
425 static enum XML_Error
426 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
427 const char *end, const char **nextPtr, XML_Bool haveMore);
428 #ifdef XML_DTD
429 static enum XML_Error
430 doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
431 const char *end, const char **nextPtr, XML_Bool haveMore);
432 #endif /* XML_DTD */
433
434 static void
435 freeBindings(XML_Parser parser, BINDING *bindings);
436 static enum XML_Error
437 storeAtts(XML_Parser parser, const ENCODING *, const char *s,
438 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
439 static enum XML_Error
440 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
441 const XML_Char *uri, BINDING **bindingsPtr);
442 static int
443 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
444 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
445 static enum XML_Error
446 storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
447 const char *, const char *, STRING_POOL *);
448 static enum XML_Error
449 appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
450 const char *, const char *, STRING_POOL *);
451 static ATTRIBUTE_ID *
452 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
453 const char *end);
454 static int
455 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
456 static enum XML_Error
457 storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
458 const char *end);
459 static int
460 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
461 const char *start, const char *end);
462 static int
463 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
464 const char *end);
465 static void
466 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
467 const char *end);
468
469 static const XML_Char * getContext(XML_Parser parser);
470 static XML_Bool
471 setContext(XML_Parser parser, const XML_Char *context);
472
473 static void FASTCALL normalizePublicId(XML_Char *s);
474
475 static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
476 /* do not call if m_parentParser != NULL */
477 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
478 static void
479 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
480 static int
481 dtdCopy(XML_Parser oldParser,
482 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
483 static int
484 copyEntityTable(XML_Parser oldParser,
485 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
486 static NAMED *
487 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
488 static void FASTCALL
489 hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
490 static void FASTCALL hashTableClear(HASH_TABLE *);
491 static void FASTCALL hashTableDestroy(HASH_TABLE *);
492 static void FASTCALL
493 hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
494 static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
495
496 static void FASTCALL
497 poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
498 static void FASTCALL poolClear(STRING_POOL *);
499 static void FASTCALL poolDestroy(STRING_POOL *);
500 static XML_Char *
501 poolAppend(STRING_POOL *pool, const ENCODING *enc,
502 const char *ptr, const char *end);
503 static XML_Char *
504 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
505 const char *ptr, const char *end);
506 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
507 static const XML_Char * FASTCALL
508 poolCopyString(STRING_POOL *pool, const XML_Char *s);
509 static const XML_Char *
510 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
511 static const XML_Char * FASTCALL
512 poolAppendString(STRING_POOL *pool, const XML_Char *s);
513
514 static int FASTCALL nextScaffoldPart(XML_Parser parser);
515 static XML_Content * build_model(XML_Parser parser);
516 static ELEMENT_TYPE *
517 getElementType(XML_Parser parser, const ENCODING *enc,
518 const char *ptr, const char *end);
519
520 static XML_Char *copyString(const XML_Char *s,
521 const XML_Memory_Handling_Suite *memsuite);
522
523 static unsigned long generate_hash_secret_salt(XML_Parser parser);
524 static XML_Bool startParsing(XML_Parser parser);
525
526 static XML_Parser
527 parserCreate(const XML_Char *encodingName,
528 const XML_Memory_Handling_Suite *memsuite,
529 const XML_Char *nameSep,
530 DTD *dtd);
531
532 static void
533 parserInit(XML_Parser parser, const XML_Char *encodingName);
534
535 #define poolStart(pool) ((pool)->start)
536 #define poolEnd(pool) ((pool)->ptr)
537 #define poolLength(pool) ((pool)->ptr - (pool)->start)
538 #define poolChop(pool) ((void)--(pool->ptr))
539 #define poolLastChar(pool) (((pool)->ptr)[-1])
540 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
541 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
542 #define poolAppendChar(pool, c) \
543 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
544 ? 0 \
545 : ((*((pool)->ptr)++ = c), 1))
546
547 struct XML_ParserStruct {
548 /* The first member must be m_userData so that the XML_GetUserData
549 macro works. */
550 void *m_userData;
551 void *m_handlerArg;
552 char *m_buffer;
553 const XML_Memory_Handling_Suite m_mem;
554 /* first character to be parsed */
555 const char *m_bufferPtr;
556 /* past last character to be parsed */
557 char *m_bufferEnd;
558 /* allocated end of m_buffer */
559 const char *m_bufferLim;
560 XML_Index m_parseEndByteIndex;
561 const char *m_parseEndPtr;
562 XML_Char *m_dataBuf;
563 XML_Char *m_dataBufEnd;
564 XML_StartElementHandler m_startElementHandler;
565 XML_EndElementHandler m_endElementHandler;
566 XML_CharacterDataHandler m_characterDataHandler;
567 XML_ProcessingInstructionHandler m_processingInstructionHandler;
568 XML_CommentHandler m_commentHandler;
569 XML_StartCdataSectionHandler m_startCdataSectionHandler;
570 XML_EndCdataSectionHandler m_endCdataSectionHandler;
571 XML_DefaultHandler m_defaultHandler;
572 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
573 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
574 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
575 XML_NotationDeclHandler m_notationDeclHandler;
576 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
577 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
578 XML_NotStandaloneHandler m_notStandaloneHandler;
579 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
580 XML_Parser m_externalEntityRefHandlerArg;
581 XML_SkippedEntityHandler m_skippedEntityHandler;
582 XML_UnknownEncodingHandler m_unknownEncodingHandler;
583 XML_ElementDeclHandler m_elementDeclHandler;
584 XML_AttlistDeclHandler m_attlistDeclHandler;
585 XML_EntityDeclHandler m_entityDeclHandler;
586 XML_XmlDeclHandler m_xmlDeclHandler;
587 const ENCODING *m_encoding;
588 INIT_ENCODING m_initEncoding;
589 const ENCODING *m_internalEncoding;
590 const XML_Char *m_protocolEncodingName;
591 XML_Bool m_ns;
592 XML_Bool m_ns_triplets;
593 void *m_unknownEncodingMem;
594 void *m_unknownEncodingData;
595 void *m_unknownEncodingHandlerData;
596 void (XMLCALL *m_unknownEncodingRelease)(void *);
597 PROLOG_STATE m_prologState;
598 Processor *m_processor;
599 enum XML_Error m_errorCode;
600 const char *m_eventPtr;
601 const char *m_eventEndPtr;
602 const char *m_positionPtr;
603 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
604 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
605 XML_Bool m_defaultExpandInternalEntities;
606 int m_tagLevel;
607 ENTITY *m_declEntity;
608 const XML_Char *m_doctypeName;
609 const XML_Char *m_doctypeSysid;
610 const XML_Char *m_doctypePubid;
611 const XML_Char *m_declAttributeType;
612 const XML_Char *m_declNotationName;
613 const XML_Char *m_declNotationPublicId;
614 ELEMENT_TYPE *m_declElementType;
615 ATTRIBUTE_ID *m_declAttributeId;
616 XML_Bool m_declAttributeIsCdata;
617 XML_Bool m_declAttributeIsId;
618 DTD *m_dtd;
619 const XML_Char *m_curBase;
620 TAG *m_tagStack;
621 TAG *m_freeTagList;
622 BINDING *m_inheritedBindings;
623 BINDING *m_freeBindingList;
624 int m_attsSize;
625 int m_nSpecifiedAtts;
626 int m_idAttIndex;
627 ATTRIBUTE *m_atts;
628 NS_ATT *m_nsAtts;
629 unsigned long m_nsAttsVersion;
630 unsigned char m_nsAttsPower;
631 #ifdef XML_ATTR_INFO
632 XML_AttrInfo *m_attInfo;
633 #endif
634 POSITION m_position;
635 STRING_POOL m_tempPool;
636 STRING_POOL m_temp2Pool;
637 char *m_groupConnector;
638 unsigned int m_groupSize;
639 XML_Char m_namespaceSeparator;
640 XML_Parser m_parentParser;
641 XML_ParsingStatus m_parsingStatus;
642 #ifdef XML_DTD
643 XML_Bool m_isParamEntity;
644 XML_Bool m_useForeignDTD;
645 enum XML_ParamEntityParsing m_paramEntityParsing;
646 #endif
647 unsigned long m_hash_secret_salt;
648 };
649
650 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
651 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p),(s)))
652 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
653
654
655 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)656 XML_ParserCreate(const XML_Char *encodingName)
657 {
658 return XML_ParserCreate_MM(encodingName, NULL, NULL);
659 }
660
661 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)662 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
663 {
664 XML_Char tmp[2];
665 *tmp = nsSep;
666 return XML_ParserCreate_MM(encodingName, NULL, tmp);
667 }
668
669 static const XML_Char implicitContext[] = {
670 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
671 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
672 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
673 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
674 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
675 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
676 };
677
678
679 /* To avoid warnings about unused functions: */
680 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
681
682 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
683
684 /* Obtain entropy on Linux 3.17+ */
685 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)686 writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
687 int success = 0; /* full count bytes written? */
688 size_t bytesWrittenTotal = 0;
689 const unsigned int getrandomFlags = GRND_NONBLOCK;
690
691 do {
692 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
693 const size_t bytesToWrite = count - bytesWrittenTotal;
694
695 const int bytesWrittenMore =
696 #if defined(HAVE_GETRANDOM)
697 getrandom(currentTarget, bytesToWrite, getrandomFlags);
698 #else
699 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
700 #endif
701
702 if (bytesWrittenMore > 0) {
703 bytesWrittenTotal += bytesWrittenMore;
704 if (bytesWrittenTotal >= count)
705 success = 1;
706 }
707 } while (! success && (errno == EINTR));
708
709 return success;
710 }
711
712 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
713
714
715 #if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
716
717 /* Extract entropy from /dev/urandom */
718 static int
writeRandomBytes_dev_urandom(void * target,size_t count)719 writeRandomBytes_dev_urandom(void * target, size_t count) {
720 int success = 0; /* full count bytes written? */
721 size_t bytesWrittenTotal = 0;
722
723 const int fd = open("/dev/urandom", O_RDONLY);
724 if (fd < 0) {
725 return 0;
726 }
727
728 do {
729 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
730 const size_t bytesToWrite = count - bytesWrittenTotal;
731
732 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
733
734 if (bytesWrittenMore > 0) {
735 bytesWrittenTotal += bytesWrittenMore;
736 if (bytesWrittenTotal >= count)
737 success = 1;
738 }
739 } while (! success && (errno == EINTR));
740
741 close(fd);
742 return success;
743 }
744
745 #endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
746
747 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
748
749
750 #if defined(HAVE_ARC4RANDOM)
751
752 static void
writeRandomBytes_arc4random(void * target,size_t count)753 writeRandomBytes_arc4random(void * target, size_t count) {
754 size_t bytesWrittenTotal = 0;
755
756 while (bytesWrittenTotal < count) {
757 const uint32_t random32 = arc4random();
758 size_t i = 0;
759
760 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
761 i++, bytesWrittenTotal++) {
762 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
763 ((uint8_t *)target)[bytesWrittenTotal] = random8;
764 }
765 }
766 }
767
768 #endif /* defined(HAVE_ARC4RANDOM) */
769
770
771 #ifdef _WIN32
772
773 typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
774 HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
775
776 /* Obtain entropy on Windows XP / Windows Server 2003 and later.
777 * Hint on RtlGenRandom and the following article from libsodium.
778 *
779 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
780 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
781 */
782 static int
writeRandomBytes_RtlGenRandom(void * target,size_t count)783 writeRandomBytes_RtlGenRandom(void * target, size_t count) {
784 int success = 0; /* full count bytes written? */
785 const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
786
787 if (advapi32) {
788 const RTLGENRANDOM_FUNC RtlGenRandom
789 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
790 if (RtlGenRandom) {
791 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
792 success = 1;
793 }
794 }
795 FreeLibrary(advapi32);
796 }
797
798 return success;
799 }
800
801 #endif /* _WIN32 */
802
803
804 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
805
806 static unsigned long
gather_time_entropy(void)807 gather_time_entropy(void)
808 {
809 #ifdef _WIN32
810 FILETIME ft;
811 GetSystemTimeAsFileTime(&ft); /* never fails */
812 return ft.dwHighDateTime ^ ft.dwLowDateTime;
813 #else
814 struct timeval tv;
815 int gettimeofday_res;
816
817 gettimeofday_res = gettimeofday(&tv, NULL);
818
819 #if defined(NDEBUG)
820 (void)gettimeofday_res;
821 #else
822 assert (gettimeofday_res == 0);
823 #endif /* defined(NDEBUG) */
824
825 /* Microseconds time is <20 bits entropy */
826 return tv.tv_usec;
827 #endif
828 }
829
830 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
831
832
833 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)834 ENTROPY_DEBUG(const char * label, unsigned long entropy) {
835 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
836 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
837 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
838 label,
839 (int)sizeof(entropy) * 2, entropy,
840 (unsigned long)sizeof(entropy));
841 }
842 return entropy;
843 }
844
845 static unsigned long
generate_hash_secret_salt(XML_Parser parser)846 generate_hash_secret_salt(XML_Parser parser)
847 {
848 unsigned long entropy;
849 (void)parser;
850
851 /* "Failproof" high quality providers: */
852 #if defined(HAVE_ARC4RANDOM_BUF)
853 arc4random_buf(&entropy, sizeof(entropy));
854 return ENTROPY_DEBUG("arc4random_buf", entropy);
855 #elif defined(HAVE_ARC4RANDOM)
856 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
857 return ENTROPY_DEBUG("arc4random", entropy);
858 #else
859 /* Try high quality providers first .. */
860 #ifdef _WIN32
861 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
862 return ENTROPY_DEBUG("RtlGenRandom", entropy);
863 }
864 #elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
865 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
866 return ENTROPY_DEBUG("getrandom", entropy);
867 }
868 #endif
869 #if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
870 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
871 return ENTROPY_DEBUG("/dev/urandom", entropy);
872 }
873 #endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
874 /* .. and self-made low quality for backup: */
875
876 /* Process ID is 0 bits entropy if attacker has local access */
877 entropy = gather_time_entropy() ^ getpid();
878
879 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
880 if (sizeof(unsigned long) == 4) {
881 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
882 } else {
883 return ENTROPY_DEBUG("fallback(8)",
884 entropy * (unsigned long)2305843009213693951ULL);
885 }
886 #endif
887 }
888
889 static unsigned long
get_hash_secret_salt(XML_Parser parser)890 get_hash_secret_salt(XML_Parser parser) {
891 if (parser->m_parentParser != NULL)
892 return get_hash_secret_salt(parser->m_parentParser);
893 return parser->m_hash_secret_salt;
894 }
895
896 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)897 startParsing(XML_Parser parser)
898 {
899 /* hash functions must be initialized before setContext() is called */
900 if (parser->m_hash_secret_salt == 0)
901 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
902 if (parser->m_ns) {
903 /* implicit context only set for root parser, since child
904 parsers (i.e. external entity parsers) will inherit it
905 */
906 return setContext(parser, implicitContext);
907 }
908 return XML_TRUE;
909 }
910
911 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)912 XML_ParserCreate_MM(const XML_Char *encodingName,
913 const XML_Memory_Handling_Suite *memsuite,
914 const XML_Char *nameSep)
915 {
916 return parserCreate(encodingName, memsuite, nameSep, NULL);
917 }
918
919 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)920 parserCreate(const XML_Char *encodingName,
921 const XML_Memory_Handling_Suite *memsuite,
922 const XML_Char *nameSep,
923 DTD *dtd)
924 {
925 XML_Parser parser;
926
927 if (memsuite) {
928 XML_Memory_Handling_Suite *mtemp;
929 parser = (XML_Parser)
930 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
931 if (parser != NULL) {
932 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
933 mtemp->malloc_fcn = memsuite->malloc_fcn;
934 mtemp->realloc_fcn = memsuite->realloc_fcn;
935 mtemp->free_fcn = memsuite->free_fcn;
936 }
937 }
938 else {
939 XML_Memory_Handling_Suite *mtemp;
940 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
941 if (parser != NULL) {
942 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
943 mtemp->malloc_fcn = malloc;
944 mtemp->realloc_fcn = realloc;
945 mtemp->free_fcn = free;
946 }
947 }
948
949 if (!parser)
950 return parser;
951
952 parser->m_buffer = NULL;
953 parser->m_bufferLim = NULL;
954
955 parser->m_attsSize = INIT_ATTS_SIZE;
956 parser->m_atts = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
957 if (parser->m_atts == NULL) {
958 FREE(parser, parser);
959 return NULL;
960 }
961 #ifdef XML_ATTR_INFO
962 parser->m_attInfo = (XML_AttrInfo*)MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
963 if (parser->m_attInfo == NULL) {
964 FREE(parser, parser->m_atts);
965 FREE(parser, parser);
966 return NULL;
967 }
968 #endif
969 parser->m_dataBuf = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
970 if (parser->m_dataBuf == NULL) {
971 FREE(parser, parser->m_atts);
972 #ifdef XML_ATTR_INFO
973 FREE(parser, parser->m_attInfo);
974 #endif
975 FREE(parser, parser);
976 return NULL;
977 }
978 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
979
980 if (dtd)
981 parser->m_dtd = dtd;
982 else {
983 parser->m_dtd = dtdCreate(&parser->m_mem);
984 if (parser->m_dtd == NULL) {
985 FREE(parser, parser->m_dataBuf);
986 FREE(parser, parser->m_atts);
987 #ifdef XML_ATTR_INFO
988 FREE(parser, parser->m_attInfo);
989 #endif
990 FREE(parser, parser);
991 return NULL;
992 }
993 }
994
995 parser->m_freeBindingList = NULL;
996 parser->m_freeTagList = NULL;
997 parser->m_freeInternalEntities = NULL;
998
999 parser->m_groupSize = 0;
1000 parser->m_groupConnector = NULL;
1001
1002 parser->m_unknownEncodingHandler = NULL;
1003 parser->m_unknownEncodingHandlerData = NULL;
1004
1005 parser->m_namespaceSeparator = ASCII_EXCL;
1006 parser->m_ns = XML_FALSE;
1007 parser->m_ns_triplets = XML_FALSE;
1008
1009 parser->m_nsAtts = NULL;
1010 parser->m_nsAttsVersion = 0;
1011 parser->m_nsAttsPower = 0;
1012
1013 parser->m_protocolEncodingName = NULL;
1014
1015 poolInit(&parser->m_tempPool, &(parser->m_mem));
1016 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1017 parserInit(parser, encodingName);
1018
1019 if (encodingName && !parser->m_protocolEncodingName) {
1020 XML_ParserFree(parser);
1021 return NULL;
1022 }
1023
1024 if (nameSep) {
1025 parser->m_ns = XML_TRUE;
1026 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1027 parser->m_namespaceSeparator = *nameSep;
1028 }
1029 else {
1030 parser->m_internalEncoding = XmlGetInternalEncoding();
1031 }
1032
1033 return parser;
1034 }
1035
1036 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1037 parserInit(XML_Parser parser, const XML_Char *encodingName)
1038 {
1039 parser->m_processor = prologInitProcessor;
1040 XmlPrologStateInit(&parser->m_prologState);
1041 if (encodingName != NULL) {
1042 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1043 }
1044 parser->m_curBase = NULL;
1045 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1046 parser->m_userData = NULL;
1047 parser->m_handlerArg = NULL;
1048 parser->m_startElementHandler = NULL;
1049 parser->m_endElementHandler = NULL;
1050 parser->m_characterDataHandler = NULL;
1051 parser->m_processingInstructionHandler = NULL;
1052 parser->m_commentHandler = NULL;
1053 parser->m_startCdataSectionHandler = NULL;
1054 parser->m_endCdataSectionHandler = NULL;
1055 parser->m_defaultHandler = NULL;
1056 parser->m_startDoctypeDeclHandler = NULL;
1057 parser->m_endDoctypeDeclHandler = NULL;
1058 parser->m_unparsedEntityDeclHandler = NULL;
1059 parser->m_notationDeclHandler = NULL;
1060 parser->m_startNamespaceDeclHandler = NULL;
1061 parser->m_endNamespaceDeclHandler = NULL;
1062 parser->m_notStandaloneHandler = NULL;
1063 parser->m_externalEntityRefHandler = NULL;
1064 parser->m_externalEntityRefHandlerArg = parser;
1065 parser->m_skippedEntityHandler = NULL;
1066 parser->m_elementDeclHandler = NULL;
1067 parser->m_attlistDeclHandler = NULL;
1068 parser->m_entityDeclHandler = NULL;
1069 parser->m_xmlDeclHandler = NULL;
1070 parser->m_bufferPtr = parser->m_buffer;
1071 parser->m_bufferEnd = parser->m_buffer;
1072 parser->m_parseEndByteIndex = 0;
1073 parser->m_parseEndPtr = NULL;
1074 parser->m_declElementType = NULL;
1075 parser->m_declAttributeId = NULL;
1076 parser->m_declEntity = NULL;
1077 parser->m_doctypeName = NULL;
1078 parser->m_doctypeSysid = NULL;
1079 parser->m_doctypePubid = NULL;
1080 parser->m_declAttributeType = NULL;
1081 parser->m_declNotationName = NULL;
1082 parser->m_declNotationPublicId = NULL;
1083 parser->m_declAttributeIsCdata = XML_FALSE;
1084 parser->m_declAttributeIsId = XML_FALSE;
1085 memset(&parser->m_position, 0, sizeof(POSITION));
1086 parser->m_errorCode = XML_ERROR_NONE;
1087 parser->m_eventPtr = NULL;
1088 parser->m_eventEndPtr = NULL;
1089 parser->m_positionPtr = NULL;
1090 parser->m_openInternalEntities = NULL;
1091 parser->m_defaultExpandInternalEntities = XML_TRUE;
1092 parser->m_tagLevel = 0;
1093 parser->m_tagStack = NULL;
1094 parser->m_inheritedBindings = NULL;
1095 parser->m_nSpecifiedAtts = 0;
1096 parser->m_unknownEncodingMem = NULL;
1097 parser->m_unknownEncodingRelease = NULL;
1098 parser->m_unknownEncodingData = NULL;
1099 parser->m_parentParser = NULL;
1100 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1101 #ifdef XML_DTD
1102 parser->m_isParamEntity = XML_FALSE;
1103 parser->m_useForeignDTD = XML_FALSE;
1104 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1105 #endif
1106 parser->m_hash_secret_salt = 0;
1107 }
1108
1109 /* moves list of bindings to m_freeBindingList */
1110 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1111 moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1112 {
1113 while (bindings) {
1114 BINDING *b = bindings;
1115 bindings = bindings->nextTagBinding;
1116 b->nextTagBinding = parser->m_freeBindingList;
1117 parser->m_freeBindingList = b;
1118 }
1119 }
1120
1121 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1122 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1123 {
1124 TAG *tStk;
1125 OPEN_INTERNAL_ENTITY *openEntityList;
1126
1127 if (parser == NULL)
1128 return XML_FALSE;
1129
1130 if (parser->m_parentParser)
1131 return XML_FALSE;
1132 /* move m_tagStack to m_freeTagList */
1133 tStk = parser->m_tagStack;
1134 while (tStk) {
1135 TAG *tag = tStk;
1136 tStk = tStk->parent;
1137 tag->parent = parser->m_freeTagList;
1138 moveToFreeBindingList(parser, tag->bindings);
1139 tag->bindings = NULL;
1140 parser->m_freeTagList = tag;
1141 }
1142 /* move m_openInternalEntities to m_freeInternalEntities */
1143 openEntityList = parser->m_openInternalEntities;
1144 while (openEntityList) {
1145 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1146 openEntityList = openEntity->next;
1147 openEntity->next = parser->m_freeInternalEntities;
1148 parser->m_freeInternalEntities = openEntity;
1149 }
1150 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1151 FREE(parser, parser->m_unknownEncodingMem);
1152 if (parser->m_unknownEncodingRelease)
1153 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1154 poolClear(&parser->m_tempPool);
1155 poolClear(&parser->m_temp2Pool);
1156 FREE(parser, (void *)parser->m_protocolEncodingName);
1157 parser->m_protocolEncodingName = NULL;
1158 parserInit(parser, encodingName);
1159 dtdReset(parser->m_dtd, &parser->m_mem);
1160 return XML_TRUE;
1161 }
1162
1163 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1164 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1165 {
1166 if (parser == NULL)
1167 return XML_STATUS_ERROR;
1168 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1169 XXX There's no way for the caller to determine which of the
1170 XXX possible error cases caused the XML_STATUS_ERROR return.
1171 */
1172 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1173 return XML_STATUS_ERROR;
1174
1175 /* Get rid of any previous encoding name */
1176 FREE(parser, (void *)parser->m_protocolEncodingName);
1177
1178 if (encodingName == NULL)
1179 /* No new encoding name */
1180 parser->m_protocolEncodingName = NULL;
1181 else {
1182 /* Copy the new encoding name into allocated memory */
1183 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1184 if (!parser->m_protocolEncodingName)
1185 return XML_STATUS_ERROR;
1186 }
1187 return XML_STATUS_OK;
1188 }
1189
1190 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1191 XML_ExternalEntityParserCreate(XML_Parser oldParser,
1192 const XML_Char *context,
1193 const XML_Char *encodingName)
1194 {
1195 XML_Parser parser = oldParser;
1196 DTD *newDtd = NULL;
1197 DTD *oldDtd;
1198 XML_StartElementHandler oldStartElementHandler;
1199 XML_EndElementHandler oldEndElementHandler;
1200 XML_CharacterDataHandler oldCharacterDataHandler;
1201 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1202 XML_CommentHandler oldCommentHandler;
1203 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1204 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1205 XML_DefaultHandler oldDefaultHandler;
1206 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1207 XML_NotationDeclHandler oldNotationDeclHandler;
1208 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1209 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1210 XML_NotStandaloneHandler oldNotStandaloneHandler;
1211 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1212 XML_SkippedEntityHandler oldSkippedEntityHandler;
1213 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1214 XML_ElementDeclHandler oldElementDeclHandler;
1215 XML_AttlistDeclHandler oldAttlistDeclHandler;
1216 XML_EntityDeclHandler oldEntityDeclHandler;
1217 XML_XmlDeclHandler oldXmlDeclHandler;
1218 ELEMENT_TYPE * oldDeclElementType;
1219
1220 void *oldUserData;
1221 void *oldHandlerArg;
1222 XML_Bool oldDefaultExpandInternalEntities;
1223 XML_Parser oldExternalEntityRefHandlerArg;
1224 #ifdef XML_DTD
1225 enum XML_ParamEntityParsing oldParamEntityParsing;
1226 int oldInEntityValue;
1227 #endif
1228 XML_Bool oldns_triplets;
1229 /* Note that the new parser shares the same hash secret as the old
1230 parser, so that dtdCopy and copyEntityTable can lookup values
1231 from hash tables associated with either parser without us having
1232 to worry which hash secrets each table has.
1233 */
1234 unsigned long oldhash_secret_salt;
1235
1236 /* Validate the oldParser parameter before we pull everything out of it */
1237 if (oldParser == NULL)
1238 return NULL;
1239
1240 /* Stash the original parser contents on the stack */
1241 oldDtd = parser->m_dtd;
1242 oldStartElementHandler = parser->m_startElementHandler;
1243 oldEndElementHandler = parser->m_endElementHandler;
1244 oldCharacterDataHandler = parser->m_characterDataHandler;
1245 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1246 oldCommentHandler = parser->m_commentHandler;
1247 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1248 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1249 oldDefaultHandler = parser->m_defaultHandler;
1250 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1251 oldNotationDeclHandler = parser->m_notationDeclHandler;
1252 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1253 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1254 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1255 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1256 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1257 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1258 oldElementDeclHandler = parser->m_elementDeclHandler;
1259 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1260 oldEntityDeclHandler = parser->m_entityDeclHandler;
1261 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1262 oldDeclElementType = parser->m_declElementType;
1263
1264 oldUserData = parser->m_userData;
1265 oldHandlerArg = parser->m_handlerArg;
1266 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1267 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1268 #ifdef XML_DTD
1269 oldParamEntityParsing = parser->m_paramEntityParsing;
1270 oldInEntityValue = parser->m_prologState.inEntityValue;
1271 #endif
1272 oldns_triplets = parser->m_ns_triplets;
1273 /* Note that the new parser shares the same hash secret as the old
1274 parser, so that dtdCopy and copyEntityTable can lookup values
1275 from hash tables associated with either parser without us having
1276 to worry which hash secrets each table has.
1277 */
1278 oldhash_secret_salt = parser->m_hash_secret_salt;
1279
1280 #ifdef XML_DTD
1281 if (!context)
1282 newDtd = oldDtd;
1283 #endif /* XML_DTD */
1284
1285 /* Note that the magical uses of the pre-processor to make field
1286 access look more like C++ require that `parser' be overwritten
1287 here. This makes this function more painful to follow than it
1288 would be otherwise.
1289 */
1290 if (parser->m_ns) {
1291 XML_Char tmp[2];
1292 *tmp = parser->m_namespaceSeparator;
1293 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1294 }
1295 else {
1296 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1297 }
1298
1299 if (!parser)
1300 return NULL;
1301
1302 parser->m_startElementHandler = oldStartElementHandler;
1303 parser->m_endElementHandler = oldEndElementHandler;
1304 parser->m_characterDataHandler = oldCharacterDataHandler;
1305 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1306 parser->m_commentHandler = oldCommentHandler;
1307 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1308 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1309 parser->m_defaultHandler = oldDefaultHandler;
1310 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1311 parser->m_notationDeclHandler = oldNotationDeclHandler;
1312 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1313 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1314 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1315 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1316 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1317 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1318 parser->m_elementDeclHandler = oldElementDeclHandler;
1319 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1320 parser->m_entityDeclHandler = oldEntityDeclHandler;
1321 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1322 parser->m_declElementType = oldDeclElementType;
1323 parser->m_userData = oldUserData;
1324 if (oldUserData == oldHandlerArg)
1325 parser->m_handlerArg = parser->m_userData;
1326 else
1327 parser->m_handlerArg = parser;
1328 if (oldExternalEntityRefHandlerArg != oldParser)
1329 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1330 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1331 parser->m_ns_triplets = oldns_triplets;
1332 parser->m_hash_secret_salt = oldhash_secret_salt;
1333 parser->m_parentParser = oldParser;
1334 #ifdef XML_DTD
1335 parser->m_paramEntityParsing = oldParamEntityParsing;
1336 parser->m_prologState.inEntityValue = oldInEntityValue;
1337 if (context) {
1338 #endif /* XML_DTD */
1339 if (!dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1340 || !setContext(parser, context)) {
1341 XML_ParserFree(parser);
1342 return NULL;
1343 }
1344 parser->m_processor = externalEntityInitProcessor;
1345 #ifdef XML_DTD
1346 }
1347 else {
1348 /* The DTD instance referenced by parser->m_dtd is shared between the document's
1349 root parser and external PE parsers, therefore one does not need to
1350 call setContext. In addition, one also *must* not call setContext,
1351 because this would overwrite existing prefix->binding pointers in
1352 parser->m_dtd with ones that get destroyed with the external PE parser.
1353 This would leave those prefixes with dangling pointers.
1354 */
1355 parser->m_isParamEntity = XML_TRUE;
1356 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1357 parser->m_processor = externalParEntInitProcessor;
1358 }
1359 #endif /* XML_DTD */
1360 return parser;
1361 }
1362
1363 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1364 destroyBindings(BINDING *bindings, XML_Parser parser)
1365 {
1366 for (;;) {
1367 BINDING *b = bindings;
1368 if (!b)
1369 break;
1370 bindings = b->nextTagBinding;
1371 FREE(parser, b->uri);
1372 FREE(parser, b);
1373 }
1374 }
1375
1376 void XMLCALL
XML_ParserFree(XML_Parser parser)1377 XML_ParserFree(XML_Parser parser)
1378 {
1379 TAG *tagList;
1380 OPEN_INTERNAL_ENTITY *entityList;
1381 if (parser == NULL)
1382 return;
1383 /* free m_tagStack and m_freeTagList */
1384 tagList = parser->m_tagStack;
1385 for (;;) {
1386 TAG *p;
1387 if (tagList == NULL) {
1388 if (parser->m_freeTagList == NULL)
1389 break;
1390 tagList = parser->m_freeTagList;
1391 parser->m_freeTagList = NULL;
1392 }
1393 p = tagList;
1394 tagList = tagList->parent;
1395 FREE(parser, p->buf);
1396 destroyBindings(p->bindings, parser);
1397 FREE(parser, p);
1398 }
1399 /* free m_openInternalEntities and m_freeInternalEntities */
1400 entityList = parser->m_openInternalEntities;
1401 for (;;) {
1402 OPEN_INTERNAL_ENTITY *openEntity;
1403 if (entityList == NULL) {
1404 if (parser->m_freeInternalEntities == NULL)
1405 break;
1406 entityList = parser->m_freeInternalEntities;
1407 parser->m_freeInternalEntities = NULL;
1408 }
1409 openEntity = entityList;
1410 entityList = entityList->next;
1411 FREE(parser, openEntity);
1412 }
1413
1414 destroyBindings(parser->m_freeBindingList, parser);
1415 destroyBindings(parser->m_inheritedBindings, parser);
1416 poolDestroy(&parser->m_tempPool);
1417 poolDestroy(&parser->m_temp2Pool);
1418 FREE(parser, (void *)parser->m_protocolEncodingName);
1419 #ifdef XML_DTD
1420 /* external parameter entity parsers share the DTD structure
1421 parser->m_dtd with the root parser, so we must not destroy it
1422 */
1423 if (!parser->m_isParamEntity && parser->m_dtd)
1424 #else
1425 if (parser->m_dtd)
1426 #endif /* XML_DTD */
1427 dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, &parser->m_mem);
1428 FREE(parser, (void *)parser->m_atts);
1429 #ifdef XML_ATTR_INFO
1430 FREE(parser, (void *)parser->m_attInfo);
1431 #endif
1432 FREE(parser, parser->m_groupConnector);
1433 FREE(parser, parser->m_buffer);
1434 FREE(parser, parser->m_dataBuf);
1435 FREE(parser, parser->m_nsAtts);
1436 FREE(parser, parser->m_unknownEncodingMem);
1437 if (parser->m_unknownEncodingRelease)
1438 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1439 FREE(parser, parser);
1440 }
1441
1442 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1443 XML_UseParserAsHandlerArg(XML_Parser parser)
1444 {
1445 if (parser != NULL)
1446 parser->m_handlerArg = parser;
1447 }
1448
1449 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1450 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1451 {
1452 if (parser == NULL)
1453 return XML_ERROR_INVALID_ARGUMENT;
1454 #ifdef XML_DTD
1455 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1456 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1457 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1458 parser->m_useForeignDTD = useDTD;
1459 return XML_ERROR_NONE;
1460 #else
1461 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1462 #endif
1463 }
1464
1465 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1466 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1467 {
1468 if (parser == NULL)
1469 return;
1470 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1471 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1472 return;
1473 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1474 }
1475
1476 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1477 XML_SetUserData(XML_Parser parser, void *p)
1478 {
1479 if (parser == NULL)
1480 return;
1481 if (parser->m_handlerArg == parser->m_userData)
1482 parser->m_handlerArg = parser->m_userData = p;
1483 else
1484 parser->m_userData = p;
1485 }
1486
1487 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1488 XML_SetBase(XML_Parser parser, const XML_Char *p)
1489 {
1490 if (parser == NULL)
1491 return XML_STATUS_ERROR;
1492 if (p) {
1493 p = poolCopyString(&parser->m_dtd->pool, p);
1494 if (!p)
1495 return XML_STATUS_ERROR;
1496 parser->m_curBase = p;
1497 }
1498 else
1499 parser->m_curBase = NULL;
1500 return XML_STATUS_OK;
1501 }
1502
1503 const XML_Char * XMLCALL
XML_GetBase(XML_Parser parser)1504 XML_GetBase(XML_Parser parser)
1505 {
1506 if (parser == NULL)
1507 return NULL;
1508 return parser->m_curBase;
1509 }
1510
1511 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1512 XML_GetSpecifiedAttributeCount(XML_Parser parser)
1513 {
1514 if (parser == NULL)
1515 return -1;
1516 return parser->m_nSpecifiedAtts;
1517 }
1518
1519 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1520 XML_GetIdAttributeIndex(XML_Parser parser)
1521 {
1522 if (parser == NULL)
1523 return -1;
1524 return parser->m_idAttIndex;
1525 }
1526
1527 #ifdef XML_ATTR_INFO
1528 const XML_AttrInfo * XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1529 XML_GetAttributeInfo(XML_Parser parser)
1530 {
1531 if (parser == NULL)
1532 return NULL;
1533 return parser->m_attInfo;
1534 }
1535 #endif
1536
1537 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1538 XML_SetElementHandler(XML_Parser parser,
1539 XML_StartElementHandler start,
1540 XML_EndElementHandler end)
1541 {
1542 if (parser == NULL)
1543 return;
1544 parser->m_startElementHandler = start;
1545 parser->m_endElementHandler = end;
1546 }
1547
1548 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1549 XML_SetStartElementHandler(XML_Parser parser,
1550 XML_StartElementHandler start) {
1551 if (parser != NULL)
1552 parser->m_startElementHandler = start;
1553 }
1554
1555 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1556 XML_SetEndElementHandler(XML_Parser parser,
1557 XML_EndElementHandler end) {
1558 if (parser != NULL)
1559 parser->m_endElementHandler = end;
1560 }
1561
1562 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1563 XML_SetCharacterDataHandler(XML_Parser parser,
1564 XML_CharacterDataHandler handler)
1565 {
1566 if (parser != NULL)
1567 parser->m_characterDataHandler = handler;
1568 }
1569
1570 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1571 XML_SetProcessingInstructionHandler(XML_Parser parser,
1572 XML_ProcessingInstructionHandler handler)
1573 {
1574 if (parser != NULL)
1575 parser->m_processingInstructionHandler = handler;
1576 }
1577
1578 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1579 XML_SetCommentHandler(XML_Parser parser,
1580 XML_CommentHandler handler)
1581 {
1582 if (parser != NULL)
1583 parser->m_commentHandler = handler;
1584 }
1585
1586 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1587 XML_SetCdataSectionHandler(XML_Parser parser,
1588 XML_StartCdataSectionHandler start,
1589 XML_EndCdataSectionHandler end)
1590 {
1591 if (parser == NULL)
1592 return;
1593 parser->m_startCdataSectionHandler = start;
1594 parser->m_endCdataSectionHandler = end;
1595 }
1596
1597 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1598 XML_SetStartCdataSectionHandler(XML_Parser parser,
1599 XML_StartCdataSectionHandler start) {
1600 if (parser != NULL)
1601 parser->m_startCdataSectionHandler = start;
1602 }
1603
1604 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1605 XML_SetEndCdataSectionHandler(XML_Parser parser,
1606 XML_EndCdataSectionHandler end) {
1607 if (parser != NULL)
1608 parser->m_endCdataSectionHandler = end;
1609 }
1610
1611 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1612 XML_SetDefaultHandler(XML_Parser parser,
1613 XML_DefaultHandler handler)
1614 {
1615 if (parser == NULL)
1616 return;
1617 parser->m_defaultHandler = handler;
1618 parser->m_defaultExpandInternalEntities = XML_FALSE;
1619 }
1620
1621 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1622 XML_SetDefaultHandlerExpand(XML_Parser parser,
1623 XML_DefaultHandler handler)
1624 {
1625 if (parser == NULL)
1626 return;
1627 parser->m_defaultHandler = handler;
1628 parser->m_defaultExpandInternalEntities = XML_TRUE;
1629 }
1630
1631 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1632 XML_SetDoctypeDeclHandler(XML_Parser parser,
1633 XML_StartDoctypeDeclHandler start,
1634 XML_EndDoctypeDeclHandler end)
1635 {
1636 if (parser == NULL)
1637 return;
1638 parser->m_startDoctypeDeclHandler = start;
1639 parser->m_endDoctypeDeclHandler = end;
1640 }
1641
1642 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1643 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1644 XML_StartDoctypeDeclHandler start) {
1645 if (parser != NULL)
1646 parser->m_startDoctypeDeclHandler = start;
1647 }
1648
1649 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1650 XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1651 XML_EndDoctypeDeclHandler end) {
1652 if (parser != NULL)
1653 parser->m_endDoctypeDeclHandler = end;
1654 }
1655
1656 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1657 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1658 XML_UnparsedEntityDeclHandler handler)
1659 {
1660 if (parser != NULL)
1661 parser->m_unparsedEntityDeclHandler = handler;
1662 }
1663
1664 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1665 XML_SetNotationDeclHandler(XML_Parser parser,
1666 XML_NotationDeclHandler handler)
1667 {
1668 if (parser != NULL)
1669 parser->m_notationDeclHandler = handler;
1670 }
1671
1672 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1673 XML_SetNamespaceDeclHandler(XML_Parser parser,
1674 XML_StartNamespaceDeclHandler start,
1675 XML_EndNamespaceDeclHandler end)
1676 {
1677 if (parser == NULL)
1678 return;
1679 parser->m_startNamespaceDeclHandler = start;
1680 parser->m_endNamespaceDeclHandler = end;
1681 }
1682
1683 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1684 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1685 XML_StartNamespaceDeclHandler start) {
1686 if (parser != NULL)
1687 parser->m_startNamespaceDeclHandler = start;
1688 }
1689
1690 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1691 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1692 XML_EndNamespaceDeclHandler end) {
1693 if (parser != NULL)
1694 parser->m_endNamespaceDeclHandler = end;
1695 }
1696
1697 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1698 XML_SetNotStandaloneHandler(XML_Parser parser,
1699 XML_NotStandaloneHandler handler)
1700 {
1701 if (parser != NULL)
1702 parser->m_notStandaloneHandler = handler;
1703 }
1704
1705 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1706 XML_SetExternalEntityRefHandler(XML_Parser parser,
1707 XML_ExternalEntityRefHandler handler)
1708 {
1709 if (parser != NULL)
1710 parser->m_externalEntityRefHandler = handler;
1711 }
1712
1713 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1714 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
1715 {
1716 if (parser == NULL)
1717 return;
1718 if (arg)
1719 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1720 else
1721 parser->m_externalEntityRefHandlerArg = parser;
1722 }
1723
1724 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1725 XML_SetSkippedEntityHandler(XML_Parser parser,
1726 XML_SkippedEntityHandler handler)
1727 {
1728 if (parser != NULL)
1729 parser->m_skippedEntityHandler = handler;
1730 }
1731
1732 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1733 XML_SetUnknownEncodingHandler(XML_Parser parser,
1734 XML_UnknownEncodingHandler handler,
1735 void *data)
1736 {
1737 if (parser == NULL)
1738 return;
1739 parser->m_unknownEncodingHandler = handler;
1740 parser->m_unknownEncodingHandlerData = data;
1741 }
1742
1743 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1744 XML_SetElementDeclHandler(XML_Parser parser,
1745 XML_ElementDeclHandler eldecl)
1746 {
1747 if (parser != NULL)
1748 parser->m_elementDeclHandler = eldecl;
1749 }
1750
1751 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1752 XML_SetAttlistDeclHandler(XML_Parser parser,
1753 XML_AttlistDeclHandler attdecl)
1754 {
1755 if (parser != NULL)
1756 parser->m_attlistDeclHandler = attdecl;
1757 }
1758
1759 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1760 XML_SetEntityDeclHandler(XML_Parser parser,
1761 XML_EntityDeclHandler handler)
1762 {
1763 if (parser != NULL)
1764 parser->m_entityDeclHandler = handler;
1765 }
1766
1767 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1768 XML_SetXmlDeclHandler(XML_Parser parser,
1769 XML_XmlDeclHandler handler) {
1770 if (parser != NULL)
1771 parser->m_xmlDeclHandler = handler;
1772 }
1773
1774 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1775 XML_SetParamEntityParsing(XML_Parser parser,
1776 enum XML_ParamEntityParsing peParsing)
1777 {
1778 if (parser == NULL)
1779 return 0;
1780 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1781 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1782 return 0;
1783 #ifdef XML_DTD
1784 parser->m_paramEntityParsing = peParsing;
1785 return 1;
1786 #else
1787 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1788 #endif
1789 }
1790
1791 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1792 XML_SetHashSalt(XML_Parser parser,
1793 unsigned long hash_salt)
1794 {
1795 if (parser == NULL)
1796 return 0;
1797 if (parser->m_parentParser)
1798 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1799 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1800 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1801 return 0;
1802 parser->m_hash_secret_salt = hash_salt;
1803 return 1;
1804 }
1805
1806 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1807 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
1808 {
1809 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1810 if (parser != NULL)
1811 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1812 return XML_STATUS_ERROR;
1813 }
1814 switch (parser->m_parsingStatus.parsing) {
1815 case XML_SUSPENDED:
1816 parser->m_errorCode = XML_ERROR_SUSPENDED;
1817 return XML_STATUS_ERROR;
1818 case XML_FINISHED:
1819 parser->m_errorCode = XML_ERROR_FINISHED;
1820 return XML_STATUS_ERROR;
1821 case XML_INITIALIZED:
1822 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1823 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1824 return XML_STATUS_ERROR;
1825 }
1826 /* fall through */
1827 default:
1828 parser->m_parsingStatus.parsing = XML_PARSING;
1829 }
1830
1831 if (len == 0) {
1832 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1833 if (!isFinal)
1834 return XML_STATUS_OK;
1835 parser->m_positionPtr = parser->m_bufferPtr;
1836 parser->m_parseEndPtr = parser->m_bufferEnd;
1837
1838 /* If data are left over from last buffer, and we now know that these
1839 data are the final chunk of input, then we have to check them again
1840 to detect errors based on that fact.
1841 */
1842 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
1843
1844 if (parser->m_errorCode == XML_ERROR_NONE) {
1845 switch (parser->m_parsingStatus.parsing) {
1846 case XML_SUSPENDED:
1847 /* It is hard to be certain, but it seems that this case
1848 * cannot occur. This code is cleaning up a previous parse
1849 * with no new data (since len == 0). Changing the parsing
1850 * state requires getting to execute a handler function, and
1851 * there doesn't seem to be an opportunity for that while in
1852 * this circumstance.
1853 *
1854 * Given the uncertainty, we retain the code but exclude it
1855 * from coverage tests.
1856 *
1857 * LCOV_EXCL_START
1858 */
1859 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
1860 parser->m_positionPtr = parser->m_bufferPtr;
1861 return XML_STATUS_SUSPENDED;
1862 /* LCOV_EXCL_STOP */
1863 case XML_INITIALIZED:
1864 case XML_PARSING:
1865 parser->m_parsingStatus.parsing = XML_FINISHED;
1866 /* fall through */
1867 default:
1868 return XML_STATUS_OK;
1869 }
1870 }
1871 parser->m_eventEndPtr = parser->m_eventPtr;
1872 parser->m_processor = errorProcessor;
1873 return XML_STATUS_ERROR;
1874 }
1875 #ifndef XML_CONTEXT_BYTES
1876 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1877 const char *end;
1878 int nLeftOver;
1879 enum XML_Status result;
1880 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1881 if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1882 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1883 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1884 parser->m_processor = errorProcessor;
1885 return XML_STATUS_ERROR;
1886 }
1887 parser->m_parseEndByteIndex += len;
1888 parser->m_positionPtr = s;
1889 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1890
1891 parser->m_errorCode = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1892
1893 if (parser->m_errorCode != XML_ERROR_NONE) {
1894 parser->m_eventEndPtr = parser->m_eventPtr;
1895 parser->m_processor = errorProcessor;
1896 return XML_STATUS_ERROR;
1897 }
1898 else {
1899 switch (parser->m_parsingStatus.parsing) {
1900 case XML_SUSPENDED:
1901 result = XML_STATUS_SUSPENDED;
1902 break;
1903 case XML_INITIALIZED:
1904 case XML_PARSING:
1905 if (isFinal) {
1906 parser->m_parsingStatus.parsing = XML_FINISHED;
1907 return XML_STATUS_OK;
1908 }
1909 /* fall through */
1910 default:
1911 result = XML_STATUS_OK;
1912 }
1913 }
1914
1915 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position);
1916 nLeftOver = s + len - end;
1917 if (nLeftOver) {
1918 if (parser->m_buffer == NULL || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1919 /* avoid _signed_ integer overflow */
1920 char *temp = NULL;
1921 const int bytesToAllocate = (int)((unsigned)len * 2U);
1922 if (bytesToAllocate > 0) {
1923 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1924 }
1925 if (temp == NULL) {
1926 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1927 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1928 parser->m_processor = errorProcessor;
1929 return XML_STATUS_ERROR;
1930 }
1931 parser->m_buffer = temp;
1932 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1933 }
1934 memcpy(parser->m_buffer, end, nLeftOver);
1935 }
1936 parser->m_bufferPtr = parser->m_buffer;
1937 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1938 parser->m_positionPtr = parser->m_bufferPtr;
1939 parser->m_parseEndPtr = parser->m_bufferEnd;
1940 parser->m_eventPtr = parser->m_bufferPtr;
1941 parser->m_eventEndPtr = parser->m_bufferPtr;
1942 return result;
1943 }
1944 #endif /* not defined XML_CONTEXT_BYTES */
1945 else {
1946 void *buff = XML_GetBuffer(parser, len);
1947 if (buff == NULL)
1948 return XML_STATUS_ERROR;
1949 else {
1950 memcpy(buff, s, len);
1951 return XML_ParseBuffer(parser, len, isFinal);
1952 }
1953 }
1954 }
1955
1956 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)1957 XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
1958 {
1959 const char *start;
1960 enum XML_Status result = XML_STATUS_OK;
1961
1962 if (parser == NULL)
1963 return XML_STATUS_ERROR;
1964 switch (parser->m_parsingStatus.parsing) {
1965 case XML_SUSPENDED:
1966 parser->m_errorCode = XML_ERROR_SUSPENDED;
1967 return XML_STATUS_ERROR;
1968 case XML_FINISHED:
1969 parser->m_errorCode = XML_ERROR_FINISHED;
1970 return XML_STATUS_ERROR;
1971 case XML_INITIALIZED:
1972 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1973 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1974 return XML_STATUS_ERROR;
1975 }
1976 /* fall through */
1977 default:
1978 parser->m_parsingStatus.parsing = XML_PARSING;
1979 }
1980
1981 start = parser->m_bufferPtr;
1982 parser->m_positionPtr = start;
1983 parser->m_bufferEnd += len;
1984 parser->m_parseEndPtr = parser->m_bufferEnd;
1985 parser->m_parseEndByteIndex += len;
1986 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1987
1988 parser->m_errorCode = parser->m_processor(parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
1989
1990 if (parser->m_errorCode != XML_ERROR_NONE) {
1991 parser->m_eventEndPtr = parser->m_eventPtr;
1992 parser->m_processor = errorProcessor;
1993 return XML_STATUS_ERROR;
1994 }
1995 else {
1996 switch (parser->m_parsingStatus.parsing) {
1997 case XML_SUSPENDED:
1998 result = XML_STATUS_SUSPENDED;
1999 break;
2000 case XML_INITIALIZED:
2001 case XML_PARSING:
2002 if (isFinal) {
2003 parser->m_parsingStatus.parsing = XML_FINISHED;
2004 return result;
2005 }
2006 default: ; /* should not happen */
2007 }
2008 }
2009
2010 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2011 parser->m_positionPtr = parser->m_bufferPtr;
2012 return result;
2013 }
2014
2015 void * XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2016 XML_GetBuffer(XML_Parser parser, int len)
2017 {
2018 if (parser == NULL)
2019 return NULL;
2020 if (len < 0) {
2021 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2022 return NULL;
2023 }
2024 switch (parser->m_parsingStatus.parsing) {
2025 case XML_SUSPENDED:
2026 parser->m_errorCode = XML_ERROR_SUSPENDED;
2027 return NULL;
2028 case XML_FINISHED:
2029 parser->m_errorCode = XML_ERROR_FINISHED;
2030 return NULL;
2031 default: ;
2032 }
2033
2034 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
2035 #ifdef XML_CONTEXT_BYTES
2036 int keep;
2037 #endif /* defined XML_CONTEXT_BYTES */
2038 /* Do not invoke signed arithmetic overflow: */
2039 int neededSize = (int) ((unsigned)len +
2040 (unsigned)EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd,
2041 parser->m_bufferPtr));
2042 if (neededSize < 0) {
2043 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2044 return NULL;
2045 }
2046 #ifdef XML_CONTEXT_BYTES
2047 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2048 if (keep > XML_CONTEXT_BYTES)
2049 keep = XML_CONTEXT_BYTES;
2050 neededSize += keep;
2051 #endif /* defined XML_CONTEXT_BYTES */
2052 if (neededSize <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2053 #ifdef XML_CONTEXT_BYTES
2054 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2055 int offset = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) - keep;
2056 /* The buffer pointers cannot be NULL here; we have at least some bytes in the buffer */
2057 memmove(parser->m_buffer, &parser->m_buffer[offset], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2058 parser->m_bufferEnd -= offset;
2059 parser->m_bufferPtr -= offset;
2060 }
2061 #else
2062 if (parser->m_buffer && parser->m_bufferPtr) {
2063 memmove(parser->m_buffer, parser->m_bufferPtr,
2064 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2065 parser->m_bufferEnd = parser->m_buffer +
2066 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2067 parser->m_bufferPtr = parser->m_buffer;
2068 }
2069 #endif /* not defined XML_CONTEXT_BYTES */
2070 }
2071 else {
2072 char *newBuf;
2073 int bufferSize = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
2074 if (bufferSize == 0)
2075 bufferSize = INIT_BUFFER_SIZE;
2076 do {
2077 /* Do not invoke signed arithmetic overflow: */
2078 bufferSize = (int) (2U * (unsigned) bufferSize);
2079 } while (bufferSize < neededSize && bufferSize > 0);
2080 if (bufferSize <= 0) {
2081 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2082 return NULL;
2083 }
2084 newBuf = (char *)MALLOC(parser, bufferSize);
2085 if (newBuf == 0) {
2086 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2087 return NULL;
2088 }
2089 parser->m_bufferLim = newBuf + bufferSize;
2090 #ifdef XML_CONTEXT_BYTES
2091 if (parser->m_bufferPtr) {
2092 int keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2093 if (keep > XML_CONTEXT_BYTES)
2094 keep = XML_CONTEXT_BYTES;
2095 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2096 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep);
2097 FREE(parser, parser->m_buffer);
2098 parser->m_buffer = newBuf;
2099 parser->m_bufferEnd = parser->m_buffer +
2100 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep;
2101 parser->m_bufferPtr = parser->m_buffer + keep;
2102 }
2103 else {
2104 /* This must be a brand new buffer with no data in it yet */
2105 parser->m_bufferEnd = newBuf;
2106 parser->m_bufferPtr = parser->m_buffer = newBuf;
2107 }
2108 #else
2109 if (parser->m_bufferPtr) {
2110 memcpy(newBuf, parser->m_bufferPtr,
2111 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2112 FREE(parser, parser->m_buffer);
2113 parser->m_bufferEnd = newBuf +
2114 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2115 }
2116 else {
2117 /* This must be a brand new buffer with no data in it yet */
2118 parser->m_bufferEnd = newBuf;
2119 }
2120 parser->m_bufferPtr = parser->m_buffer = newBuf;
2121 #endif /* not defined XML_CONTEXT_BYTES */
2122 }
2123 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2124 parser->m_positionPtr = NULL;
2125 }
2126 return parser->m_bufferEnd;
2127 }
2128
2129 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2130 XML_StopParser(XML_Parser parser, XML_Bool resumable)
2131 {
2132 if (parser == NULL)
2133 return XML_STATUS_ERROR;
2134 switch (parser->m_parsingStatus.parsing) {
2135 case XML_SUSPENDED:
2136 if (resumable) {
2137 parser->m_errorCode = XML_ERROR_SUSPENDED;
2138 return XML_STATUS_ERROR;
2139 }
2140 parser->m_parsingStatus.parsing = XML_FINISHED;
2141 break;
2142 case XML_FINISHED:
2143 parser->m_errorCode = XML_ERROR_FINISHED;
2144 return XML_STATUS_ERROR;
2145 default:
2146 if (resumable) {
2147 #ifdef XML_DTD
2148 if (parser->m_isParamEntity) {
2149 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2150 return XML_STATUS_ERROR;
2151 }
2152 #endif
2153 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2154 }
2155 else
2156 parser->m_parsingStatus.parsing = XML_FINISHED;
2157 }
2158 return XML_STATUS_OK;
2159 }
2160
2161 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2162 XML_ResumeParser(XML_Parser parser)
2163 {
2164 enum XML_Status result = XML_STATUS_OK;
2165
2166 if (parser == NULL)
2167 return XML_STATUS_ERROR;
2168 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2169 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2170 return XML_STATUS_ERROR;
2171 }
2172 parser->m_parsingStatus.parsing = XML_PARSING;
2173
2174 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2175
2176 if (parser->m_errorCode != XML_ERROR_NONE) {
2177 parser->m_eventEndPtr = parser->m_eventPtr;
2178 parser->m_processor = errorProcessor;
2179 return XML_STATUS_ERROR;
2180 }
2181 else {
2182 switch (parser->m_parsingStatus.parsing) {
2183 case XML_SUSPENDED:
2184 result = XML_STATUS_SUSPENDED;
2185 break;
2186 case XML_INITIALIZED:
2187 case XML_PARSING:
2188 if (parser->m_parsingStatus.finalBuffer) {
2189 parser->m_parsingStatus.parsing = XML_FINISHED;
2190 return result;
2191 }
2192 default: ;
2193 }
2194 }
2195
2196 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2197 parser->m_positionPtr = parser->m_bufferPtr;
2198 return result;
2199 }
2200
2201 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2202 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2203 {
2204 if (parser == NULL)
2205 return;
2206 assert(status != NULL);
2207 *status = parser->m_parsingStatus;
2208 }
2209
2210 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2211 XML_GetErrorCode(XML_Parser parser)
2212 {
2213 if (parser == NULL)
2214 return XML_ERROR_INVALID_ARGUMENT;
2215 return parser->m_errorCode;
2216 }
2217
2218 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2219 XML_GetCurrentByteIndex(XML_Parser parser)
2220 {
2221 if (parser == NULL)
2222 return -1;
2223 if (parser->m_eventPtr)
2224 return (XML_Index)(parser->m_parseEndByteIndex - (parser->m_parseEndPtr - parser->m_eventPtr));
2225 return -1;
2226 }
2227
2228 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2229 XML_GetCurrentByteCount(XML_Parser parser)
2230 {
2231 if (parser == NULL)
2232 return 0;
2233 if (parser->m_eventEndPtr && parser->m_eventPtr)
2234 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2235 return 0;
2236 }
2237
2238 const char * XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2239 XML_GetInputContext(XML_Parser parser, int *offset, int *size)
2240 {
2241 #ifdef XML_CONTEXT_BYTES
2242 if (parser == NULL)
2243 return NULL;
2244 if (parser->m_eventPtr && parser->m_buffer) {
2245 if (offset != NULL)
2246 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2247 if (size != NULL)
2248 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2249 return parser->m_buffer;
2250 }
2251 #else
2252 (void)parser;
2253 (void)offset;
2254 (void)size;
2255 #endif /* defined XML_CONTEXT_BYTES */
2256 return (char *) 0;
2257 }
2258
2259 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2260 XML_GetCurrentLineNumber(XML_Parser parser)
2261 {
2262 if (parser == NULL)
2263 return 0;
2264 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2265 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2266 parser->m_positionPtr = parser->m_eventPtr;
2267 }
2268 return parser->m_position.lineNumber + 1;
2269 }
2270
2271 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2272 XML_GetCurrentColumnNumber(XML_Parser parser)
2273 {
2274 if (parser == NULL)
2275 return 0;
2276 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2277 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2278 parser->m_positionPtr = parser->m_eventPtr;
2279 }
2280 return parser->m_position.columnNumber;
2281 }
2282
2283 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2284 XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2285 {
2286 if (parser != NULL)
2287 FREE(parser, model);
2288 }
2289
2290 void * XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2291 XML_MemMalloc(XML_Parser parser, size_t size)
2292 {
2293 if (parser == NULL)
2294 return NULL;
2295 return MALLOC(parser, size);
2296 }
2297
2298 void * XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2299 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2300 {
2301 if (parser == NULL)
2302 return NULL;
2303 return REALLOC(parser, ptr, size);
2304 }
2305
2306 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2307 XML_MemFree(XML_Parser parser, void *ptr)
2308 {
2309 if (parser != NULL)
2310 FREE(parser, ptr);
2311 }
2312
2313 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2314 XML_DefaultCurrent(XML_Parser parser)
2315 {
2316 if (parser == NULL)
2317 return;
2318 if (parser->m_defaultHandler) {
2319 if (parser->m_openInternalEntities)
2320 reportDefault(parser,
2321 parser->m_internalEncoding,
2322 parser->m_openInternalEntities->internalEventPtr,
2323 parser->m_openInternalEntities->internalEventEndPtr);
2324 else
2325 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, parser->m_eventEndPtr);
2326 }
2327 }
2328
2329 const XML_LChar * XMLCALL
XML_ErrorString(enum XML_Error code)2330 XML_ErrorString(enum XML_Error code)
2331 {
2332 switch (code) {
2333 case XML_ERROR_NONE:
2334 return NULL;
2335 case XML_ERROR_NO_MEMORY:
2336 return XML_L("out of memory");
2337 case XML_ERROR_SYNTAX:
2338 return XML_L("syntax error");
2339 case XML_ERROR_NO_ELEMENTS:
2340 return XML_L("no element found");
2341 case XML_ERROR_INVALID_TOKEN:
2342 return XML_L("not well-formed (invalid token)");
2343 case XML_ERROR_UNCLOSED_TOKEN:
2344 return XML_L("unclosed token");
2345 case XML_ERROR_PARTIAL_CHAR:
2346 return XML_L("partial character");
2347 case XML_ERROR_TAG_MISMATCH:
2348 return XML_L("mismatched tag");
2349 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2350 return XML_L("duplicate attribute");
2351 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2352 return XML_L("junk after document element");
2353 case XML_ERROR_PARAM_ENTITY_REF:
2354 return XML_L("illegal parameter entity reference");
2355 case XML_ERROR_UNDEFINED_ENTITY:
2356 return XML_L("undefined entity");
2357 case XML_ERROR_RECURSIVE_ENTITY_REF:
2358 return XML_L("recursive entity reference");
2359 case XML_ERROR_ASYNC_ENTITY:
2360 return XML_L("asynchronous entity");
2361 case XML_ERROR_BAD_CHAR_REF:
2362 return XML_L("reference to invalid character number");
2363 case XML_ERROR_BINARY_ENTITY_REF:
2364 return XML_L("reference to binary entity");
2365 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2366 return XML_L("reference to external entity in attribute");
2367 case XML_ERROR_MISPLACED_XML_PI:
2368 return XML_L("XML or text declaration not at start of entity");
2369 case XML_ERROR_UNKNOWN_ENCODING:
2370 return XML_L("unknown encoding");
2371 case XML_ERROR_INCORRECT_ENCODING:
2372 return XML_L("encoding specified in XML declaration is incorrect");
2373 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2374 return XML_L("unclosed CDATA section");
2375 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2376 return XML_L("error in processing external entity reference");
2377 case XML_ERROR_NOT_STANDALONE:
2378 return XML_L("document is not standalone");
2379 case XML_ERROR_UNEXPECTED_STATE:
2380 return XML_L("unexpected parser state - please send a bug report");
2381 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2382 return XML_L("entity declared in parameter entity");
2383 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2384 return XML_L("requested feature requires XML_DTD support in Expat");
2385 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2386 return XML_L("cannot change setting once parsing has begun");
2387 /* Added in 1.95.7. */
2388 case XML_ERROR_UNBOUND_PREFIX:
2389 return XML_L("unbound prefix");
2390 /* Added in 1.95.8. */
2391 case XML_ERROR_UNDECLARING_PREFIX:
2392 return XML_L("must not undeclare prefix");
2393 case XML_ERROR_INCOMPLETE_PE:
2394 return XML_L("incomplete markup in parameter entity");
2395 case XML_ERROR_XML_DECL:
2396 return XML_L("XML declaration not well-formed");
2397 case XML_ERROR_TEXT_DECL:
2398 return XML_L("text declaration not well-formed");
2399 case XML_ERROR_PUBLICID:
2400 return XML_L("illegal character(s) in public id");
2401 case XML_ERROR_SUSPENDED:
2402 return XML_L("parser suspended");
2403 case XML_ERROR_NOT_SUSPENDED:
2404 return XML_L("parser not suspended");
2405 case XML_ERROR_ABORTED:
2406 return XML_L("parsing aborted");
2407 case XML_ERROR_FINISHED:
2408 return XML_L("parsing finished");
2409 case XML_ERROR_SUSPEND_PE:
2410 return XML_L("cannot suspend in external parameter entity");
2411 /* Added in 2.0.0. */
2412 case XML_ERROR_RESERVED_PREFIX_XML:
2413 return XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name");
2414 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2415 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2416 case XML_ERROR_RESERVED_NAMESPACE_URI:
2417 return XML_L("prefix must not be bound to one of the reserved namespace names");
2418 /* Added in 2.2.5. */
2419 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2420 return XML_L("invalid argument");
2421 }
2422 return NULL;
2423 }
2424
2425 const XML_LChar * XMLCALL
XML_ExpatVersion(void)2426 XML_ExpatVersion(void) {
2427
2428 /* V1 is used to string-ize the version number. However, it would
2429 string-ize the actual version macro *names* unless we get them
2430 substituted before being passed to V1. CPP is defined to expand
2431 a macro, then rescan for more expansions. Thus, we use V2 to expand
2432 the version macros, then CPP will expand the resulting V1() macro
2433 with the correct numerals. */
2434 /* ### I'm assuming cpp is portable in this respect... */
2435
2436 #define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2437 #define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2438
2439 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2440
2441 #undef V1
2442 #undef V2
2443 }
2444
2445 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2446 XML_ExpatVersionInfo(void)
2447 {
2448 XML_Expat_Version version;
2449
2450 version.major = XML_MAJOR_VERSION;
2451 version.minor = XML_MINOR_VERSION;
2452 version.micro = XML_MICRO_VERSION;
2453
2454 return version;
2455 }
2456
2457 const XML_Feature * XMLCALL
XML_GetFeatureList(void)2458 XML_GetFeatureList(void)
2459 {
2460 static const XML_Feature features[] = {
2461 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2462 sizeof(XML_Char)},
2463 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2464 sizeof(XML_LChar)},
2465 #ifdef XML_UNICODE
2466 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2467 #endif
2468 #ifdef XML_UNICODE_WCHAR_T
2469 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2470 #endif
2471 #ifdef XML_DTD
2472 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2473 #endif
2474 #ifdef XML_CONTEXT_BYTES
2475 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2476 XML_CONTEXT_BYTES},
2477 #endif
2478 #ifdef XML_MIN_SIZE
2479 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2480 #endif
2481 #ifdef XML_NS
2482 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2483 #endif
2484 #ifdef XML_LARGE_SIZE
2485 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2486 #endif
2487 #ifdef XML_ATTR_INFO
2488 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2489 #endif
2490 {XML_FEATURE_END, NULL, 0}
2491 };
2492
2493 return features;
2494 }
2495
2496 /* Initially tag->rawName always points into the parse buffer;
2497 for those TAG instances opened while the current parse buffer was
2498 processed, and not yet closed, we need to store tag->rawName in a more
2499 permanent location, since the parse buffer is about to be discarded.
2500 */
2501 static XML_Bool
storeRawNames(XML_Parser parser)2502 storeRawNames(XML_Parser parser)
2503 {
2504 TAG *tag = parser->m_tagStack;
2505 while (tag) {
2506 int bufSize;
2507 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2508 char *rawNameBuf = tag->buf + nameLen;
2509 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2510 at the first entry that has already been copied; everything
2511 below it in the stack is already been accounted for in a
2512 previous call to this function.
2513 */
2514 if (tag->rawName == rawNameBuf)
2515 break;
2516 /* For re-use purposes we need to ensure that the
2517 size of tag->buf is a multiple of sizeof(XML_Char).
2518 */
2519 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2520 if (bufSize > tag->bufEnd - tag->buf) {
2521 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2522 if (temp == NULL)
2523 return XML_FALSE;
2524 /* if tag->name.str points to tag->buf (only when namespace
2525 processing is off) then we have to update it
2526 */
2527 if (tag->name.str == (XML_Char *)tag->buf)
2528 tag->name.str = (XML_Char *)temp;
2529 /* if tag->name.localPart is set (when namespace processing is on)
2530 then update it as well, since it will always point into tag->buf
2531 */
2532 if (tag->name.localPart)
2533 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2534 (XML_Char *)tag->buf);
2535 tag->buf = temp;
2536 tag->bufEnd = temp + bufSize;
2537 rawNameBuf = temp + nameLen;
2538 }
2539 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2540 tag->rawName = rawNameBuf;
2541 tag = tag->parent;
2542 }
2543 return XML_TRUE;
2544 }
2545
2546 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2547 contentProcessor(XML_Parser parser,
2548 const char *start,
2549 const char *end,
2550 const char **endPtr)
2551 {
2552 enum XML_Error result = doContent(parser, 0, parser->m_encoding, start, end,
2553 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
2554 if (result == XML_ERROR_NONE) {
2555 if (!storeRawNames(parser))
2556 return XML_ERROR_NO_MEMORY;
2557 }
2558 return result;
2559 }
2560
2561 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2562 externalEntityInitProcessor(XML_Parser parser,
2563 const char *start,
2564 const char *end,
2565 const char **endPtr)
2566 {
2567 enum XML_Error result = initializeEncoding(parser);
2568 if (result != XML_ERROR_NONE)
2569 return result;
2570 parser->m_processor = externalEntityInitProcessor2;
2571 return externalEntityInitProcessor2(parser, start, end, endPtr);
2572 }
2573
2574 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2575 externalEntityInitProcessor2(XML_Parser parser,
2576 const char *start,
2577 const char *end,
2578 const char **endPtr)
2579 {
2580 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2581 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2582 switch (tok) {
2583 case XML_TOK_BOM:
2584 /* If we are at the end of the buffer, this would cause the next stage,
2585 i.e. externalEntityInitProcessor3, to pass control directly to
2586 doContent (by detecting XML_TOK_NONE) without processing any xml text
2587 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2588 */
2589 if (next == end && !parser->m_parsingStatus.finalBuffer) {
2590 *endPtr = next;
2591 return XML_ERROR_NONE;
2592 }
2593 start = next;
2594 break;
2595 case XML_TOK_PARTIAL:
2596 if (!parser->m_parsingStatus.finalBuffer) {
2597 *endPtr = start;
2598 return XML_ERROR_NONE;
2599 }
2600 parser->m_eventPtr = start;
2601 return XML_ERROR_UNCLOSED_TOKEN;
2602 case XML_TOK_PARTIAL_CHAR:
2603 if (!parser->m_parsingStatus.finalBuffer) {
2604 *endPtr = start;
2605 return XML_ERROR_NONE;
2606 }
2607 parser->m_eventPtr = start;
2608 return XML_ERROR_PARTIAL_CHAR;
2609 }
2610 parser->m_processor = externalEntityInitProcessor3;
2611 return externalEntityInitProcessor3(parser, start, end, endPtr);
2612 }
2613
2614 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2615 externalEntityInitProcessor3(XML_Parser parser,
2616 const char *start,
2617 const char *end,
2618 const char **endPtr)
2619 {
2620 int tok;
2621 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2622 parser->m_eventPtr = start;
2623 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2624 parser->m_eventEndPtr = next;
2625
2626 switch (tok) {
2627 case XML_TOK_XML_DECL:
2628 {
2629 enum XML_Error result;
2630 result = processXmlDecl(parser, 1, start, next);
2631 if (result != XML_ERROR_NONE)
2632 return result;
2633 switch (parser->m_parsingStatus.parsing) {
2634 case XML_SUSPENDED:
2635 *endPtr = next;
2636 return XML_ERROR_NONE;
2637 case XML_FINISHED:
2638 return XML_ERROR_ABORTED;
2639 default:
2640 start = next;
2641 }
2642 }
2643 break;
2644 case XML_TOK_PARTIAL:
2645 if (!parser->m_parsingStatus.finalBuffer) {
2646 *endPtr = start;
2647 return XML_ERROR_NONE;
2648 }
2649 return XML_ERROR_UNCLOSED_TOKEN;
2650 case XML_TOK_PARTIAL_CHAR:
2651 if (!parser->m_parsingStatus.finalBuffer) {
2652 *endPtr = start;
2653 return XML_ERROR_NONE;
2654 }
2655 return XML_ERROR_PARTIAL_CHAR;
2656 }
2657 parser->m_processor = externalEntityContentProcessor;
2658 parser->m_tagLevel = 1;
2659 return externalEntityContentProcessor(parser, start, end, endPtr);
2660 }
2661
2662 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2663 externalEntityContentProcessor(XML_Parser parser,
2664 const char *start,
2665 const char *end,
2666 const char **endPtr)
2667 {
2668 enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end,
2669 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
2670 if (result == XML_ERROR_NONE) {
2671 if (!storeRawNames(parser))
2672 return XML_ERROR_NO_MEMORY;
2673 }
2674 return result;
2675 }
2676
2677 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore)2678 doContent(XML_Parser parser,
2679 int startTagLevel,
2680 const ENCODING *enc,
2681 const char *s,
2682 const char *end,
2683 const char **nextPtr,
2684 XML_Bool haveMore)
2685 {
2686 /* save one level of indirection */
2687 DTD * const dtd = parser->m_dtd;
2688
2689 const char **eventPP;
2690 const char **eventEndPP;
2691 if (enc == parser->m_encoding) {
2692 eventPP = &parser->m_eventPtr;
2693 eventEndPP = &parser->m_eventEndPtr;
2694 }
2695 else {
2696 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2697 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2698 }
2699 *eventPP = s;
2700
2701 for (;;) {
2702 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2703 int tok = XmlContentTok(enc, s, end, &next);
2704 *eventEndPP = next;
2705 switch (tok) {
2706 case XML_TOK_TRAILING_CR:
2707 if (haveMore) {
2708 *nextPtr = s;
2709 return XML_ERROR_NONE;
2710 }
2711 *eventEndPP = end;
2712 if (parser->m_characterDataHandler) {
2713 XML_Char c = 0xA;
2714 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2715 }
2716 else if (parser->m_defaultHandler)
2717 reportDefault(parser, enc, s, end);
2718 /* We are at the end of the final buffer, should we check for
2719 XML_SUSPENDED, XML_FINISHED?
2720 */
2721 if (startTagLevel == 0)
2722 return XML_ERROR_NO_ELEMENTS;
2723 if (parser->m_tagLevel != startTagLevel)
2724 return XML_ERROR_ASYNC_ENTITY;
2725 *nextPtr = end;
2726 return XML_ERROR_NONE;
2727 case XML_TOK_NONE:
2728 if (haveMore) {
2729 *nextPtr = s;
2730 return XML_ERROR_NONE;
2731 }
2732 if (startTagLevel > 0) {
2733 if (parser->m_tagLevel != startTagLevel)
2734 return XML_ERROR_ASYNC_ENTITY;
2735 *nextPtr = s;
2736 return XML_ERROR_NONE;
2737 }
2738 return XML_ERROR_NO_ELEMENTS;
2739 case XML_TOK_INVALID:
2740 *eventPP = next;
2741 return XML_ERROR_INVALID_TOKEN;
2742 case XML_TOK_PARTIAL:
2743 if (haveMore) {
2744 *nextPtr = s;
2745 return XML_ERROR_NONE;
2746 }
2747 return XML_ERROR_UNCLOSED_TOKEN;
2748 case XML_TOK_PARTIAL_CHAR:
2749 if (haveMore) {
2750 *nextPtr = s;
2751 return XML_ERROR_NONE;
2752 }
2753 return XML_ERROR_PARTIAL_CHAR;
2754 case XML_TOK_ENTITY_REF:
2755 {
2756 const XML_Char *name;
2757 ENTITY *entity;
2758 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2759 s + enc->minBytesPerChar,
2760 next - enc->minBytesPerChar);
2761 if (ch) {
2762 if (parser->m_characterDataHandler)
2763 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2764 else if (parser->m_defaultHandler)
2765 reportDefault(parser, enc, s, next);
2766 break;
2767 }
2768 name = poolStoreString(&dtd->pool, enc,
2769 s + enc->minBytesPerChar,
2770 next - enc->minBytesPerChar);
2771 if (!name)
2772 return XML_ERROR_NO_MEMORY;
2773 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2774 poolDiscard(&dtd->pool);
2775 /* First, determine if a check for an existing declaration is needed;
2776 if yes, check that the entity exists, and that it is internal,
2777 otherwise call the skipped entity or default handler.
2778 */
2779 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2780 if (!entity)
2781 return XML_ERROR_UNDEFINED_ENTITY;
2782 else if (!entity->is_internal)
2783 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2784 }
2785 else if (!entity) {
2786 if (parser->m_skippedEntityHandler)
2787 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2788 else if (parser->m_defaultHandler)
2789 reportDefault(parser, enc, s, next);
2790 break;
2791 }
2792 if (entity->open)
2793 return XML_ERROR_RECURSIVE_ENTITY_REF;
2794 if (entity->notation)
2795 return XML_ERROR_BINARY_ENTITY_REF;
2796 if (entity->textPtr) {
2797 enum XML_Error result;
2798 if (!parser->m_defaultExpandInternalEntities) {
2799 if (parser->m_skippedEntityHandler)
2800 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0);
2801 else if (parser->m_defaultHandler)
2802 reportDefault(parser, enc, s, next);
2803 break;
2804 }
2805 result = processInternalEntity(parser, entity, XML_FALSE);
2806 if (result != XML_ERROR_NONE)
2807 return result;
2808 }
2809 else if (parser->m_externalEntityRefHandler) {
2810 const XML_Char *context;
2811 entity->open = XML_TRUE;
2812 context = getContext(parser);
2813 entity->open = XML_FALSE;
2814 if (!context)
2815 return XML_ERROR_NO_MEMORY;
2816 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
2817 context,
2818 entity->base,
2819 entity->systemId,
2820 entity->publicId))
2821 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2822 poolDiscard(&parser->m_tempPool);
2823 }
2824 else if (parser->m_defaultHandler)
2825 reportDefault(parser, enc, s, next);
2826 break;
2827 }
2828 case XML_TOK_START_TAG_NO_ATTS:
2829 /* fall through */
2830 case XML_TOK_START_TAG_WITH_ATTS:
2831 {
2832 TAG *tag;
2833 enum XML_Error result;
2834 XML_Char *toPtr;
2835 if (parser->m_freeTagList) {
2836 tag = parser->m_freeTagList;
2837 parser->m_freeTagList = parser->m_freeTagList->parent;
2838 }
2839 else {
2840 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2841 if (!tag)
2842 return XML_ERROR_NO_MEMORY;
2843 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2844 if (!tag->buf) {
2845 FREE(parser, tag);
2846 return XML_ERROR_NO_MEMORY;
2847 }
2848 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2849 }
2850 tag->bindings = NULL;
2851 tag->parent = parser->m_tagStack;
2852 parser->m_tagStack = tag;
2853 tag->name.localPart = NULL;
2854 tag->name.prefix = NULL;
2855 tag->rawName = s + enc->minBytesPerChar;
2856 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2857 ++parser->m_tagLevel;
2858 {
2859 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2860 const char *fromPtr = tag->rawName;
2861 toPtr = (XML_Char *)tag->buf;
2862 for (;;) {
2863 int bufSize;
2864 int convLen;
2865 const enum XML_Convert_Result convert_res = XmlConvert(enc,
2866 &fromPtr, rawNameEnd,
2867 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
2868 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2869 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2870 tag->name.strLen = convLen;
2871 break;
2872 }
2873 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2874 {
2875 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2876 if (temp == NULL)
2877 return XML_ERROR_NO_MEMORY;
2878 tag->buf = temp;
2879 tag->bufEnd = temp + bufSize;
2880 toPtr = (XML_Char *)temp + convLen;
2881 }
2882 }
2883 }
2884 tag->name.str = (XML_Char *)tag->buf;
2885 *toPtr = XML_T('\0');
2886 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2887 if (result)
2888 return result;
2889 if (parser->m_startElementHandler)
2890 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2891 (const XML_Char **)parser->m_atts);
2892 else if (parser->m_defaultHandler)
2893 reportDefault(parser, enc, s, next);
2894 poolClear(&parser->m_tempPool);
2895 break;
2896 }
2897 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2898 /* fall through */
2899 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2900 {
2901 const char *rawName = s + enc->minBytesPerChar;
2902 enum XML_Error result;
2903 BINDING *bindings = NULL;
2904 XML_Bool noElmHandlers = XML_TRUE;
2905 TAG_NAME name;
2906 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2907 rawName + XmlNameLength(enc, rawName));
2908 if (!name.str)
2909 return XML_ERROR_NO_MEMORY;
2910 poolFinish(&parser->m_tempPool);
2911 result = storeAtts(parser, enc, s, &name, &bindings);
2912 if (result != XML_ERROR_NONE) {
2913 freeBindings(parser, bindings);
2914 return result;
2915 }
2916 poolFinish(&parser->m_tempPool);
2917 if (parser->m_startElementHandler) {
2918 parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char **)parser->m_atts);
2919 noElmHandlers = XML_FALSE;
2920 }
2921 if (parser->m_endElementHandler) {
2922 if (parser->m_startElementHandler)
2923 *eventPP = *eventEndPP;
2924 parser->m_endElementHandler(parser->m_handlerArg, name.str);
2925 noElmHandlers = XML_FALSE;
2926 }
2927 if (noElmHandlers && parser->m_defaultHandler)
2928 reportDefault(parser, enc, s, next);
2929 poolClear(&parser->m_tempPool);
2930 freeBindings(parser, bindings);
2931 }
2932 if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
2933 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
2934 parser->m_processor = epilogProcessor;
2935 else
2936 return epilogProcessor(parser, next, end, nextPtr);
2937 }
2938 break;
2939 case XML_TOK_END_TAG:
2940 if (parser->m_tagLevel == startTagLevel)
2941 return XML_ERROR_ASYNC_ENTITY;
2942 else {
2943 int len;
2944 const char *rawName;
2945 TAG *tag = parser->m_tagStack;
2946 parser->m_tagStack = tag->parent;
2947 tag->parent = parser->m_freeTagList;
2948 parser->m_freeTagList = tag;
2949 rawName = s + enc->minBytesPerChar*2;
2950 len = XmlNameLength(enc, rawName);
2951 if (len != tag->rawNameLength
2952 || memcmp(tag->rawName, rawName, len) != 0) {
2953 *eventPP = rawName;
2954 return XML_ERROR_TAG_MISMATCH;
2955 }
2956 --parser->m_tagLevel;
2957 if (parser->m_endElementHandler) {
2958 const XML_Char *localPart;
2959 const XML_Char *prefix;
2960 XML_Char *uri;
2961 localPart = tag->name.localPart;
2962 if (parser->m_ns && localPart) {
2963 /* localPart and prefix may have been overwritten in
2964 tag->name.str, since this points to the binding->uri
2965 buffer which gets re-used; so we have to add them again
2966 */
2967 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2968 /* don't need to check for space - already done in storeAtts() */
2969 while (*localPart) *uri++ = *localPart++;
2970 prefix = (XML_Char *)tag->name.prefix;
2971 if (parser->m_ns_triplets && prefix) {
2972 *uri++ = parser->m_namespaceSeparator;
2973 while (*prefix) *uri++ = *prefix++;
2974 }
2975 *uri = XML_T('\0');
2976 }
2977 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
2978 }
2979 else if (parser->m_defaultHandler)
2980 reportDefault(parser, enc, s, next);
2981 while (tag->bindings) {
2982 BINDING *b = tag->bindings;
2983 if (parser->m_endNamespaceDeclHandler)
2984 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
2985 tag->bindings = tag->bindings->nextTagBinding;
2986 b->nextTagBinding = parser->m_freeBindingList;
2987 parser->m_freeBindingList = b;
2988 b->prefix->binding = b->prevPrefixBinding;
2989 }
2990 if (parser->m_tagLevel == 0)
2991 return epilogProcessor(parser, next, end, nextPtr);
2992 }
2993 break;
2994 case XML_TOK_CHAR_REF:
2995 {
2996 int n = XmlCharRefNumber(enc, s);
2997 if (n < 0)
2998 return XML_ERROR_BAD_CHAR_REF;
2999 if (parser->m_characterDataHandler) {
3000 XML_Char buf[XML_ENCODE_MAX];
3001 parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
3002 }
3003 else if (parser->m_defaultHandler)
3004 reportDefault(parser, enc, s, next);
3005 }
3006 break;
3007 case XML_TOK_XML_DECL:
3008 return XML_ERROR_MISPLACED_XML_PI;
3009 case XML_TOK_DATA_NEWLINE:
3010 if (parser->m_characterDataHandler) {
3011 XML_Char c = 0xA;
3012 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3013 }
3014 else if (parser->m_defaultHandler)
3015 reportDefault(parser, enc, s, next);
3016 break;
3017 case XML_TOK_CDATA_SECT_OPEN:
3018 {
3019 enum XML_Error result;
3020 if (parser->m_startCdataSectionHandler)
3021 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3022 #if 0
3023 /* Suppose you doing a transformation on a document that involves
3024 changing only the character data. You set up a defaultHandler
3025 and a characterDataHandler. The defaultHandler simply copies
3026 characters through. The characterDataHandler does the
3027 transformation and writes the characters out escaping them as
3028 necessary. This case will fail to work if we leave out the
3029 following two lines (because & and < inside CDATA sections will
3030 be incorrectly escaped).
3031
3032 However, now we have a start/endCdataSectionHandler, so it seems
3033 easier to let the user deal with this.
3034 */
3035 else if (parser->m_characterDataHandler)
3036 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
3037 #endif
3038 else if (parser->m_defaultHandler)
3039 reportDefault(parser, enc, s, next);
3040 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
3041 if (result != XML_ERROR_NONE)
3042 return result;
3043 else if (!next) {
3044 parser->m_processor = cdataSectionProcessor;
3045 return result;
3046 }
3047 }
3048 break;
3049 case XML_TOK_TRAILING_RSQB:
3050 if (haveMore) {
3051 *nextPtr = s;
3052 return XML_ERROR_NONE;
3053 }
3054 if (parser->m_characterDataHandler) {
3055 if (MUST_CONVERT(enc, s)) {
3056 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3057 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3058 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3059 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3060 }
3061 else
3062 parser->m_characterDataHandler(parser->m_handlerArg,
3063 (XML_Char *)s,
3064 (int)((XML_Char *)end - (XML_Char *)s));
3065 }
3066 else if (parser->m_defaultHandler)
3067 reportDefault(parser, enc, s, end);
3068 /* We are at the end of the final buffer, should we check for
3069 XML_SUSPENDED, XML_FINISHED?
3070 */
3071 if (startTagLevel == 0) {
3072 *eventPP = end;
3073 return XML_ERROR_NO_ELEMENTS;
3074 }
3075 if (parser->m_tagLevel != startTagLevel) {
3076 *eventPP = end;
3077 return XML_ERROR_ASYNC_ENTITY;
3078 }
3079 *nextPtr = end;
3080 return XML_ERROR_NONE;
3081 case XML_TOK_DATA_CHARS:
3082 {
3083 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3084 if (charDataHandler) {
3085 if (MUST_CONVERT(enc, s)) {
3086 for (;;) {
3087 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3088 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3089 *eventEndPP = s;
3090 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3091 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3092 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3093 break;
3094 *eventPP = s;
3095 }
3096 }
3097 else
3098 charDataHandler(parser->m_handlerArg,
3099 (XML_Char *)s,
3100 (int)((XML_Char *)next - (XML_Char *)s));
3101 }
3102 else if (parser->m_defaultHandler)
3103 reportDefault(parser, enc, s, next);
3104 }
3105 break;
3106 case XML_TOK_PI:
3107 if (!reportProcessingInstruction(parser, enc, s, next))
3108 return XML_ERROR_NO_MEMORY;
3109 break;
3110 case XML_TOK_COMMENT:
3111 if (!reportComment(parser, enc, s, next))
3112 return XML_ERROR_NO_MEMORY;
3113 break;
3114 default:
3115 /* All of the tokens produced by XmlContentTok() have their own
3116 * explicit cases, so this default is not strictly necessary.
3117 * However it is a useful safety net, so we retain the code and
3118 * simply exclude it from the coverage tests.
3119 *
3120 * LCOV_EXCL_START
3121 */
3122 if (parser->m_defaultHandler)
3123 reportDefault(parser, enc, s, next);
3124 break;
3125 /* LCOV_EXCL_STOP */
3126 }
3127 *eventPP = s = next;
3128 switch (parser->m_parsingStatus.parsing) {
3129 case XML_SUSPENDED:
3130 *nextPtr = next;
3131 return XML_ERROR_NONE;
3132 case XML_FINISHED:
3133 return XML_ERROR_ABORTED;
3134 default: ;
3135 }
3136 }
3137 /* not reached */
3138 }
3139
3140 /* This function does not call free() on the allocated memory, merely
3141 * moving it to the parser's m_freeBindingList where it can be freed or
3142 * reused as appropriate.
3143 */
3144 static void
freeBindings(XML_Parser parser,BINDING * bindings)3145 freeBindings(XML_Parser parser, BINDING *bindings)
3146 {
3147 while (bindings) {
3148 BINDING *b = bindings;
3149
3150 /* m_startNamespaceDeclHandler will have been called for this
3151 * binding in addBindings(), so call the end handler now.
3152 */
3153 if (parser->m_endNamespaceDeclHandler)
3154 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3155
3156 bindings = bindings->nextTagBinding;
3157 b->nextTagBinding = parser->m_freeBindingList;
3158 parser->m_freeBindingList = b;
3159 b->prefix->binding = b->prevPrefixBinding;
3160 }
3161 }
3162
3163 /* Precondition: all arguments must be non-NULL;
3164 Purpose:
3165 - normalize attributes
3166 - check attributes for well-formedness
3167 - generate namespace aware attribute names (URI, prefix)
3168 - build list of attributes for startElementHandler
3169 - default attributes
3170 - process namespace declarations (check and report them)
3171 - generate namespace aware element name (URI, prefix)
3172 */
3173 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr)3174 storeAtts(XML_Parser parser, const ENCODING *enc,
3175 const char *attStr, TAG_NAME *tagNamePtr,
3176 BINDING **bindingsPtr)
3177 {
3178 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
3179 ELEMENT_TYPE *elementType;
3180 int nDefaultAtts;
3181 const XML_Char **appAtts; /* the attribute list for the application */
3182 int attIndex = 0;
3183 int prefixLen;
3184 int i;
3185 int n;
3186 XML_Char *uri;
3187 int nPrefixes = 0;
3188 BINDING *binding;
3189 const XML_Char *localPart;
3190
3191 /* lookup the element type name */
3192 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
3193 if (!elementType) {
3194 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3195 if (!name)
3196 return XML_ERROR_NO_MEMORY;
3197 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3198 sizeof(ELEMENT_TYPE));
3199 if (!elementType)
3200 return XML_ERROR_NO_MEMORY;
3201 if (parser->m_ns && !setElementTypePrefix(parser, elementType))
3202 return XML_ERROR_NO_MEMORY;
3203 }
3204 nDefaultAtts = elementType->nDefaultAtts;
3205
3206 /* get the attributes from the tokenizer */
3207 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3208 if (n + nDefaultAtts > parser->m_attsSize) {
3209 int oldAttsSize = parser->m_attsSize;
3210 ATTRIBUTE *temp;
3211 #ifdef XML_ATTR_INFO
3212 XML_AttrInfo *temp2;
3213 #endif
3214 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3215 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, parser->m_attsSize * sizeof(ATTRIBUTE));
3216 if (temp == NULL) {
3217 parser->m_attsSize = oldAttsSize;
3218 return XML_ERROR_NO_MEMORY;
3219 }
3220 parser->m_atts = temp;
3221 #ifdef XML_ATTR_INFO
3222 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo));
3223 if (temp2 == NULL) {
3224 parser->m_attsSize = oldAttsSize;
3225 return XML_ERROR_NO_MEMORY;
3226 }
3227 parser->m_attInfo = temp2;
3228 #endif
3229 if (n > oldAttsSize)
3230 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3231 }
3232
3233 appAtts = (const XML_Char **)parser->m_atts;
3234 for (i = 0; i < n; i++) {
3235 ATTRIBUTE *currAtt = &parser->m_atts[i];
3236 #ifdef XML_ATTR_INFO
3237 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3238 #endif
3239 /* add the name and value to the attribute list */
3240 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3241 currAtt->name
3242 + XmlNameLength(enc, currAtt->name));
3243 if (!attId)
3244 return XML_ERROR_NO_MEMORY;
3245 #ifdef XML_ATTR_INFO
3246 currAttInfo->nameStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3247 currAttInfo->nameEnd = currAttInfo->nameStart +
3248 XmlNameLength(enc, currAtt->name);
3249 currAttInfo->valueStart = parser->m_parseEndByteIndex -
3250 (parser->m_parseEndPtr - currAtt->valuePtr);
3251 currAttInfo->valueEnd = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valueEnd);
3252 #endif
3253 /* Detect duplicate attributes by their QNames. This does not work when
3254 namespace processing is turned on and different prefixes for the same
3255 namespace are used. For this case we have a check further down.
3256 */
3257 if ((attId->name)[-1]) {
3258 if (enc == parser->m_encoding)
3259 parser->m_eventPtr = parser->m_atts[i].name;
3260 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3261 }
3262 (attId->name)[-1] = 1;
3263 appAtts[attIndex++] = attId->name;
3264 if (!parser->m_atts[i].normalized) {
3265 enum XML_Error result;
3266 XML_Bool isCdata = XML_TRUE;
3267
3268 /* figure out whether declared as other than CDATA */
3269 if (attId->maybeTokenized) {
3270 int j;
3271 for (j = 0; j < nDefaultAtts; j++) {
3272 if (attId == elementType->defaultAtts[j].id) {
3273 isCdata = elementType->defaultAtts[j].isCdata;
3274 break;
3275 }
3276 }
3277 }
3278
3279 /* normalize the attribute value */
3280 result = storeAttributeValue(parser, enc, isCdata,
3281 parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd,
3282 &parser->m_tempPool);
3283 if (result)
3284 return result;
3285 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3286 poolFinish(&parser->m_tempPool);
3287 }
3288 else {
3289 /* the value did not need normalizing */
3290 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, parser->m_atts[i].valuePtr,
3291 parser->m_atts[i].valueEnd);
3292 if (appAtts[attIndex] == 0)
3293 return XML_ERROR_NO_MEMORY;
3294 poolFinish(&parser->m_tempPool);
3295 }
3296 /* handle prefixed attribute names */
3297 if (attId->prefix) {
3298 if (attId->xmlns) {
3299 /* deal with namespace declarations here */
3300 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3301 appAtts[attIndex], bindingsPtr);
3302 if (result)
3303 return result;
3304 --attIndex;
3305 }
3306 else {
3307 /* deal with other prefixed names later */
3308 attIndex++;
3309 nPrefixes++;
3310 (attId->name)[-1] = 2;
3311 }
3312 }
3313 else
3314 attIndex++;
3315 }
3316
3317 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3318 parser->m_nSpecifiedAtts = attIndex;
3319 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3320 for (i = 0; i < attIndex; i += 2)
3321 if (appAtts[i] == elementType->idAtt->name) {
3322 parser->m_idAttIndex = i;
3323 break;
3324 }
3325 }
3326 else
3327 parser->m_idAttIndex = -1;
3328
3329 /* do attribute defaulting */
3330 for (i = 0; i < nDefaultAtts; i++) {
3331 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3332 if (!(da->id->name)[-1] && da->value) {
3333 if (da->id->prefix) {
3334 if (da->id->xmlns) {
3335 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3336 da->value, bindingsPtr);
3337 if (result)
3338 return result;
3339 }
3340 else {
3341 (da->id->name)[-1] = 2;
3342 nPrefixes++;
3343 appAtts[attIndex++] = da->id->name;
3344 appAtts[attIndex++] = da->value;
3345 }
3346 }
3347 else {
3348 (da->id->name)[-1] = 1;
3349 appAtts[attIndex++] = da->id->name;
3350 appAtts[attIndex++] = da->value;
3351 }
3352 }
3353 }
3354 appAtts[attIndex] = 0;
3355
3356 /* expand prefixed attribute names, check for duplicates,
3357 and clear flags that say whether attributes were specified */
3358 i = 0;
3359 if (nPrefixes) {
3360 int j; /* hash table index */
3361 unsigned long version = parser->m_nsAttsVersion;
3362 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3363 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3364 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3365 if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3366 NS_ATT *temp;
3367 /* hash table size must also be a power of 2 and >= 8 */
3368 while (nPrefixes >> parser->m_nsAttsPower++);
3369 if (parser->m_nsAttsPower < 3)
3370 parser->m_nsAttsPower = 3;
3371 nsAttsSize = (int)1 << parser->m_nsAttsPower;
3372 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
3373 if (!temp) {
3374 /* Restore actual size of memory in m_nsAtts */
3375 parser->m_nsAttsPower = oldNsAttsPower;
3376 return XML_ERROR_NO_MEMORY;
3377 }
3378 parser->m_nsAtts = temp;
3379 version = 0; /* force re-initialization of m_nsAtts hash table */
3380 }
3381 /* using a version flag saves us from initializing m_nsAtts every time */
3382 if (!version) { /* initialize version flags when version wraps around */
3383 version = INIT_ATTS_VERSION;
3384 for (j = nsAttsSize; j != 0; )
3385 parser->m_nsAtts[--j].version = version;
3386 }
3387 parser->m_nsAttsVersion = --version;
3388
3389 /* expand prefixed names and check for duplicates */
3390 for (; i < attIndex; i += 2) {
3391 const XML_Char *s = appAtts[i];
3392 if (s[-1] == 2) { /* prefixed */
3393 ATTRIBUTE_ID *id;
3394 const BINDING *b;
3395 unsigned long uriHash;
3396 struct siphash sip_state;
3397 struct sipkey sip_key;
3398
3399 copy_salt_to_sipkey(parser, &sip_key);
3400 sip24_init(&sip_state, &sip_key);
3401
3402 ((XML_Char *)s)[-1] = 0; /* clear flag */
3403 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3404 if (!id || !id->prefix) {
3405 /* This code is walking through the appAtts array, dealing
3406 * with (in this case) a prefixed attribute name. To be in
3407 * the array, the attribute must have already been bound, so
3408 * has to have passed through the hash table lookup once
3409 * already. That implies that an entry for it already
3410 * exists, so the lookup above will return a pointer to
3411 * already allocated memory. There is no opportunaity for
3412 * the allocator to fail, so the condition above cannot be
3413 * fulfilled.
3414 *
3415 * Since it is difficult to be certain that the above
3416 * analysis is complete, we retain the test and merely
3417 * remove the code from coverage tests.
3418 */
3419 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3420 }
3421 b = id->prefix->binding;
3422 if (!b)
3423 return XML_ERROR_UNBOUND_PREFIX;
3424
3425 for (j = 0; j < b->uriLen; j++) {
3426 const XML_Char c = b->uri[j];
3427 if (!poolAppendChar(&parser->m_tempPool, c))
3428 return XML_ERROR_NO_MEMORY;
3429 }
3430
3431 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3432
3433 while (*s++ != XML_T(ASCII_COLON))
3434 ;
3435
3436 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3437
3438 do { /* copies null terminator */
3439 if (!poolAppendChar(&parser->m_tempPool, *s))
3440 return XML_ERROR_NO_MEMORY;
3441 } while (*s++);
3442
3443 uriHash = (unsigned long)sip24_final(&sip_state);
3444
3445 { /* Check hash table for duplicate of expanded name (uriName).
3446 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3447 */
3448 unsigned char step = 0;
3449 unsigned long mask = nsAttsSize - 1;
3450 j = uriHash & mask; /* index into hash table */
3451 while (parser->m_nsAtts[j].version == version) {
3452 /* for speed we compare stored hash values first */
3453 if (uriHash == parser->m_nsAtts[j].hash) {
3454 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3455 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3456 /* s1 is null terminated, but not s2 */
3457 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3458 if (*s1 == 0)
3459 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3460 }
3461 if (!step)
3462 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3463 j < step ? (j += nsAttsSize - step) : (j -= step);
3464 }
3465 }
3466
3467 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3468 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3469 s = b->prefix->name;
3470 do {
3471 if (!poolAppendChar(&parser->m_tempPool, *s))
3472 return XML_ERROR_NO_MEMORY;
3473 } while (*s++);
3474 }
3475
3476 /* store expanded name in attribute list */
3477 s = poolStart(&parser->m_tempPool);
3478 poolFinish(&parser->m_tempPool);
3479 appAtts[i] = s;
3480
3481 /* fill empty slot with new version, uriName and hash value */
3482 parser->m_nsAtts[j].version = version;
3483 parser->m_nsAtts[j].hash = uriHash;
3484 parser->m_nsAtts[j].uriName = s;
3485
3486 if (!--nPrefixes) {
3487 i += 2;
3488 break;
3489 }
3490 }
3491 else /* not prefixed */
3492 ((XML_Char *)s)[-1] = 0; /* clear flag */
3493 }
3494 }
3495 /* clear flags for the remaining attributes */
3496 for (; i < attIndex; i += 2)
3497 ((XML_Char *)(appAtts[i]))[-1] = 0;
3498 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3499 binding->attId->name[-1] = 0;
3500
3501 if (!parser->m_ns)
3502 return XML_ERROR_NONE;
3503
3504 /* expand the element type name */
3505 if (elementType->prefix) {
3506 binding = elementType->prefix->binding;
3507 if (!binding)
3508 return XML_ERROR_UNBOUND_PREFIX;
3509 localPart = tagNamePtr->str;
3510 while (*localPart++ != XML_T(ASCII_COLON))
3511 ;
3512 }
3513 else if (dtd->defaultPrefix.binding) {
3514 binding = dtd->defaultPrefix.binding;
3515 localPart = tagNamePtr->str;
3516 }
3517 else
3518 return XML_ERROR_NONE;
3519 prefixLen = 0;
3520 if (parser->m_ns_triplets && binding->prefix->name) {
3521 for (; binding->prefix->name[prefixLen++];)
3522 ; /* prefixLen includes null terminator */
3523 }
3524 tagNamePtr->localPart = localPart;
3525 tagNamePtr->uriLen = binding->uriLen;
3526 tagNamePtr->prefix = binding->prefix->name;
3527 tagNamePtr->prefixLen = prefixLen;
3528 for (i = 0; localPart[i++];)
3529 ; /* i includes null terminator */
3530 n = i + binding->uriLen + prefixLen;
3531 if (n > binding->uriAlloc) {
3532 TAG *p;
3533 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3534 if (!uri)
3535 return XML_ERROR_NO_MEMORY;
3536 binding->uriAlloc = n + EXPAND_SPARE;
3537 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3538 for (p = parser->m_tagStack; p; p = p->parent)
3539 if (p->name.str == binding->uri)
3540 p->name.str = uri;
3541 FREE(parser, binding->uri);
3542 binding->uri = uri;
3543 }
3544 /* if m_namespaceSeparator != '\0' then uri includes it already */
3545 uri = binding->uri + binding->uriLen;
3546 memcpy(uri, localPart, i * sizeof(XML_Char));
3547 /* we always have a namespace separator between localPart and prefix */
3548 if (prefixLen) {
3549 uri += i - 1;
3550 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3551 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3552 }
3553 tagNamePtr->str = binding->uri;
3554 return XML_ERROR_NONE;
3555 }
3556
3557 /* addBinding() overwrites the value of prefix->binding without checking.
3558 Therefore one must keep track of the old value outside of addBinding().
3559 */
3560 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3561 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3562 const XML_Char *uri, BINDING **bindingsPtr)
3563 {
3564 static const XML_Char xmlNamespace[] = {
3565 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3566 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3567 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3568 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3569 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3570 ASCII_e, '\0'
3571 };
3572 static const int xmlLen =
3573 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3574 static const XML_Char xmlnsNamespace[] = {
3575 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3576 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3577 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3578 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3579 ASCII_SLASH, '\0'
3580 };
3581 static const int xmlnsLen =
3582 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3583
3584 XML_Bool mustBeXML = XML_FALSE;
3585 XML_Bool isXML = XML_TRUE;
3586 XML_Bool isXMLNS = XML_TRUE;
3587
3588 BINDING *b;
3589 int len;
3590
3591 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3592 if (*uri == XML_T('\0') && prefix->name)
3593 return XML_ERROR_UNDECLARING_PREFIX;
3594
3595 if (prefix->name
3596 && prefix->name[0] == XML_T(ASCII_x)
3597 && prefix->name[1] == XML_T(ASCII_m)
3598 && prefix->name[2] == XML_T(ASCII_l)) {
3599
3600 /* Not allowed to bind xmlns */
3601 if (prefix->name[3] == XML_T(ASCII_n)
3602 && prefix->name[4] == XML_T(ASCII_s)
3603 && prefix->name[5] == XML_T('\0'))
3604 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3605
3606 if (prefix->name[3] == XML_T('\0'))
3607 mustBeXML = XML_TRUE;
3608 }
3609
3610 for (len = 0; uri[len]; len++) {
3611 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3612 isXML = XML_FALSE;
3613
3614 if (!mustBeXML && isXMLNS
3615 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3616 isXMLNS = XML_FALSE;
3617 }
3618 isXML = isXML && len == xmlLen;
3619 isXMLNS = isXMLNS && len == xmlnsLen;
3620
3621 if (mustBeXML != isXML)
3622 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3623 : XML_ERROR_RESERVED_NAMESPACE_URI;
3624
3625 if (isXMLNS)
3626 return XML_ERROR_RESERVED_NAMESPACE_URI;
3627
3628 if (parser->m_namespaceSeparator)
3629 len++;
3630 if (parser->m_freeBindingList) {
3631 b = parser->m_freeBindingList;
3632 if (len > b->uriAlloc) {
3633 XML_Char *temp = (XML_Char *)REALLOC(parser, b->uri,
3634 sizeof(XML_Char) * (len + EXPAND_SPARE));
3635 if (temp == NULL)
3636 return XML_ERROR_NO_MEMORY;
3637 b->uri = temp;
3638 b->uriAlloc = len + EXPAND_SPARE;
3639 }
3640 parser->m_freeBindingList = b->nextTagBinding;
3641 }
3642 else {
3643 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3644 if (!b)
3645 return XML_ERROR_NO_MEMORY;
3646 b->uri = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3647 if (!b->uri) {
3648 FREE(parser, b);
3649 return XML_ERROR_NO_MEMORY;
3650 }
3651 b->uriAlloc = len + EXPAND_SPARE;
3652 }
3653 b->uriLen = len;
3654 memcpy(b->uri, uri, len * sizeof(XML_Char));
3655 if (parser->m_namespaceSeparator)
3656 b->uri[len - 1] = parser->m_namespaceSeparator;
3657 b->prefix = prefix;
3658 b->attId = attId;
3659 b->prevPrefixBinding = prefix->binding;
3660 /* NULL binding when default namespace undeclared */
3661 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3662 prefix->binding = NULL;
3663 else
3664 prefix->binding = b;
3665 b->nextTagBinding = *bindingsPtr;
3666 *bindingsPtr = b;
3667 /* if attId == NULL then we are not starting a namespace scope */
3668 if (attId && parser->m_startNamespaceDeclHandler)
3669 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3670 prefix->binding ? uri : 0);
3671 return XML_ERROR_NONE;
3672 }
3673
3674 /* The idea here is to avoid using stack for each CDATA section when
3675 the whole file is parsed with one call.
3676 */
3677 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3678 cdataSectionProcessor(XML_Parser parser,
3679 const char *start,
3680 const char *end,
3681 const char **endPtr)
3682 {
3683 enum XML_Error result = doCdataSection(parser, parser->m_encoding, &start, end,
3684 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
3685 if (result != XML_ERROR_NONE)
3686 return result;
3687 if (start) {
3688 if (parser->m_parentParser) { /* we are parsing an external entity */
3689 parser->m_processor = externalEntityContentProcessor;
3690 return externalEntityContentProcessor(parser, start, end, endPtr);
3691 }
3692 else {
3693 parser->m_processor = contentProcessor;
3694 return contentProcessor(parser, start, end, endPtr);
3695 }
3696 }
3697 return result;
3698 }
3699
3700 /* startPtr gets set to non-null if the section is closed, and to null if
3701 the section is not yet closed.
3702 */
3703 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)3704 doCdataSection(XML_Parser parser,
3705 const ENCODING *enc,
3706 const char **startPtr,
3707 const char *end,
3708 const char **nextPtr,
3709 XML_Bool haveMore)
3710 {
3711 const char *s = *startPtr;
3712 const char **eventPP;
3713 const char **eventEndPP;
3714 if (enc == parser->m_encoding) {
3715 eventPP = &parser->m_eventPtr;
3716 *eventPP = s;
3717 eventEndPP = &parser->m_eventEndPtr;
3718 }
3719 else {
3720 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3721 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3722 }
3723 *eventPP = s;
3724 *startPtr = NULL;
3725
3726 for (;;) {
3727 const char *next;
3728 int tok = XmlCdataSectionTok(enc, s, end, &next);
3729 *eventEndPP = next;
3730 switch (tok) {
3731 case XML_TOK_CDATA_SECT_CLOSE:
3732 if (parser->m_endCdataSectionHandler)
3733 parser->m_endCdataSectionHandler(parser->m_handlerArg);
3734 #if 0
3735 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3736 else if (parser->m_characterDataHandler)
3737 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
3738 #endif
3739 else if (parser->m_defaultHandler)
3740 reportDefault(parser, enc, s, next);
3741 *startPtr = next;
3742 *nextPtr = next;
3743 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3744 return XML_ERROR_ABORTED;
3745 else
3746 return XML_ERROR_NONE;
3747 case XML_TOK_DATA_NEWLINE:
3748 if (parser->m_characterDataHandler) {
3749 XML_Char c = 0xA;
3750 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3751 }
3752 else if (parser->m_defaultHandler)
3753 reportDefault(parser, enc, s, next);
3754 break;
3755 case XML_TOK_DATA_CHARS:
3756 {
3757 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3758 if (charDataHandler) {
3759 if (MUST_CONVERT(enc, s)) {
3760 for (;;) {
3761 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3762 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3763 *eventEndPP = next;
3764 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3765 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3766 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3767 break;
3768 *eventPP = s;
3769 }
3770 }
3771 else
3772 charDataHandler(parser->m_handlerArg,
3773 (XML_Char *)s,
3774 (int)((XML_Char *)next - (XML_Char *)s));
3775 }
3776 else if (parser->m_defaultHandler)
3777 reportDefault(parser, enc, s, next);
3778 }
3779 break;
3780 case XML_TOK_INVALID:
3781 *eventPP = next;
3782 return XML_ERROR_INVALID_TOKEN;
3783 case XML_TOK_PARTIAL_CHAR:
3784 if (haveMore) {
3785 *nextPtr = s;
3786 return XML_ERROR_NONE;
3787 }
3788 return XML_ERROR_PARTIAL_CHAR;
3789 case XML_TOK_PARTIAL:
3790 case XML_TOK_NONE:
3791 if (haveMore) {
3792 *nextPtr = s;
3793 return XML_ERROR_NONE;
3794 }
3795 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3796 default:
3797 /* Every token returned by XmlCdataSectionTok() has its own
3798 * explicit case, so this default case will never be executed.
3799 * We retain it as a safety net and exclude it from the coverage
3800 * statistics.
3801 *
3802 * LCOV_EXCL_START
3803 */
3804 *eventPP = next;
3805 return XML_ERROR_UNEXPECTED_STATE;
3806 /* LCOV_EXCL_STOP */
3807 }
3808
3809 *eventPP = s = next;
3810 switch (parser->m_parsingStatus.parsing) {
3811 case XML_SUSPENDED:
3812 *nextPtr = next;
3813 return XML_ERROR_NONE;
3814 case XML_FINISHED:
3815 return XML_ERROR_ABORTED;
3816 default: ;
3817 }
3818 }
3819 /* not reached */
3820 }
3821
3822 #ifdef XML_DTD
3823
3824 /* The idea here is to avoid using stack for each IGNORE section when
3825 the whole file is parsed with one call.
3826 */
3827 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3828 ignoreSectionProcessor(XML_Parser parser,
3829 const char *start,
3830 const char *end,
3831 const char **endPtr)
3832 {
3833 enum XML_Error result = doIgnoreSection(parser, parser->m_encoding, &start, end,
3834 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
3835 if (result != XML_ERROR_NONE)
3836 return result;
3837 if (start) {
3838 parser->m_processor = prologProcessor;
3839 return prologProcessor(parser, start, end, endPtr);
3840 }
3841 return result;
3842 }
3843
3844 /* startPtr gets set to non-null is the section is closed, and to null
3845 if the section is not yet closed.
3846 */
3847 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)3848 doIgnoreSection(XML_Parser parser,
3849 const ENCODING *enc,
3850 const char **startPtr,
3851 const char *end,
3852 const char **nextPtr,
3853 XML_Bool haveMore)
3854 {
3855 const char *next;
3856 int tok;
3857 const char *s = *startPtr;
3858 const char **eventPP;
3859 const char **eventEndPP;
3860 if (enc == parser->m_encoding) {
3861 eventPP = &parser->m_eventPtr;
3862 *eventPP = s;
3863 eventEndPP = &parser->m_eventEndPtr;
3864 }
3865 else {
3866 /* It's not entirely clear, but it seems the following two lines
3867 * of code cannot be executed. The only occasions on which 'enc'
3868 * is not 'encoding' are when this function is called
3869 * from the internal entity processing, and IGNORE sections are an
3870 * error in internal entities.
3871 *
3872 * Since it really isn't clear that this is true, we keep the code
3873 * and just remove it from our coverage tests.
3874 *
3875 * LCOV_EXCL_START
3876 */
3877 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3878 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3879 /* LCOV_EXCL_STOP */
3880 }
3881 *eventPP = s;
3882 *startPtr = NULL;
3883 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3884 *eventEndPP = next;
3885 switch (tok) {
3886 case XML_TOK_IGNORE_SECT:
3887 if (parser->m_defaultHandler)
3888 reportDefault(parser, enc, s, next);
3889 *startPtr = next;
3890 *nextPtr = next;
3891 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3892 return XML_ERROR_ABORTED;
3893 else
3894 return XML_ERROR_NONE;
3895 case XML_TOK_INVALID:
3896 *eventPP = next;
3897 return XML_ERROR_INVALID_TOKEN;
3898 case XML_TOK_PARTIAL_CHAR:
3899 if (haveMore) {
3900 *nextPtr = s;
3901 return XML_ERROR_NONE;
3902 }
3903 return XML_ERROR_PARTIAL_CHAR;
3904 case XML_TOK_PARTIAL:
3905 case XML_TOK_NONE:
3906 if (haveMore) {
3907 *nextPtr = s;
3908 return XML_ERROR_NONE;
3909 }
3910 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3911 default:
3912 /* All of the tokens that XmlIgnoreSectionTok() returns have
3913 * explicit cases to handle them, so this default case is never
3914 * executed. We keep it as a safety net anyway, and remove it
3915 * from our test coverage statistics.
3916 *
3917 * LCOV_EXCL_START
3918 */
3919 *eventPP = next;
3920 return XML_ERROR_UNEXPECTED_STATE;
3921 /* LCOV_EXCL_STOP */
3922 }
3923 /* not reached */
3924 }
3925
3926 #endif /* XML_DTD */
3927
3928 static enum XML_Error
initializeEncoding(XML_Parser parser)3929 initializeEncoding(XML_Parser parser)
3930 {
3931 const char *s;
3932 #ifdef XML_UNICODE
3933 char encodingBuf[128];
3934 /* See comments abount `protoclEncodingName` in parserInit() */
3935 if (!parser->m_protocolEncodingName)
3936 s = NULL;
3937 else {
3938 int i;
3939 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
3940 if (i == sizeof(encodingBuf) - 1
3941 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
3942 encodingBuf[0] = '\0';
3943 break;
3944 }
3945 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
3946 }
3947 encodingBuf[i] = '\0';
3948 s = encodingBuf;
3949 }
3950 #else
3951 s = parser->m_protocolEncodingName;
3952 #endif
3953 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(&parser->m_initEncoding, &parser->m_encoding, s))
3954 return XML_ERROR_NONE;
3955 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
3956 }
3957
3958 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)3959 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
3960 const char *s, const char *next)
3961 {
3962 const char *encodingName = NULL;
3963 const XML_Char *storedEncName = NULL;
3964 const ENCODING *newEncoding = NULL;
3965 const char *version = NULL;
3966 const char *versionend;
3967 const XML_Char *storedversion = NULL;
3968 int standalone = -1;
3969 if (!(parser->m_ns
3970 ? XmlParseXmlDeclNS
3971 : XmlParseXmlDecl)(isGeneralTextEntity,
3972 parser->m_encoding,
3973 s,
3974 next,
3975 &parser->m_eventPtr,
3976 &version,
3977 &versionend,
3978 &encodingName,
3979 &newEncoding,
3980 &standalone)) {
3981 if (isGeneralTextEntity)
3982 return XML_ERROR_TEXT_DECL;
3983 else
3984 return XML_ERROR_XML_DECL;
3985 }
3986 if (!isGeneralTextEntity && standalone == 1) {
3987 parser->m_dtd->standalone = XML_TRUE;
3988 #ifdef XML_DTD
3989 if (parser->m_paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3990 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3991 #endif /* XML_DTD */
3992 }
3993 if (parser->m_xmlDeclHandler) {
3994 if (encodingName != NULL) {
3995 storedEncName = poolStoreString(&parser->m_temp2Pool,
3996 parser->m_encoding,
3997 encodingName,
3998 encodingName
3999 + XmlNameLength(parser->m_encoding, encodingName));
4000 if (!storedEncName)
4001 return XML_ERROR_NO_MEMORY;
4002 poolFinish(&parser->m_temp2Pool);
4003 }
4004 if (version) {
4005 storedversion = poolStoreString(&parser->m_temp2Pool,
4006 parser->m_encoding,
4007 version,
4008 versionend - parser->m_encoding->minBytesPerChar);
4009 if (!storedversion)
4010 return XML_ERROR_NO_MEMORY;
4011 }
4012 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone);
4013 }
4014 else if (parser->m_defaultHandler)
4015 reportDefault(parser, parser->m_encoding, s, next);
4016 if (parser->m_protocolEncodingName == NULL) {
4017 if (newEncoding) {
4018 /* Check that the specified encoding does not conflict with what
4019 * the parser has already deduced. Do we have the same number
4020 * of bytes in the smallest representation of a character? If
4021 * this is UTF-16, is it the same endianness?
4022 */
4023 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4024 || (newEncoding->minBytesPerChar == 2 &&
4025 newEncoding != parser->m_encoding)) {
4026 parser->m_eventPtr = encodingName;
4027 return XML_ERROR_INCORRECT_ENCODING;
4028 }
4029 parser->m_encoding = newEncoding;
4030 }
4031 else if (encodingName) {
4032 enum XML_Error result;
4033 if (!storedEncName) {
4034 storedEncName = poolStoreString(
4035 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4036 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4037 if (!storedEncName)
4038 return XML_ERROR_NO_MEMORY;
4039 }
4040 result = handleUnknownEncoding(parser, storedEncName);
4041 poolClear(&parser->m_temp2Pool);
4042 if (result == XML_ERROR_UNKNOWN_ENCODING)
4043 parser->m_eventPtr = encodingName;
4044 return result;
4045 }
4046 }
4047
4048 if (storedEncName || storedversion)
4049 poolClear(&parser->m_temp2Pool);
4050
4051 return XML_ERROR_NONE;
4052 }
4053
4054 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4055 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
4056 {
4057 if (parser->m_unknownEncodingHandler) {
4058 XML_Encoding info;
4059 int i;
4060 for (i = 0; i < 256; i++)
4061 info.map[i] = -1;
4062 info.convert = NULL;
4063 info.data = NULL;
4064 info.release = NULL;
4065 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName,
4066 &info)) {
4067 ENCODING *enc;
4068 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4069 if (!parser->m_unknownEncodingMem) {
4070 if (info.release)
4071 info.release(info.data);
4072 return XML_ERROR_NO_MEMORY;
4073 }
4074 enc = (parser->m_ns
4075 ? XmlInitUnknownEncodingNS
4076 : XmlInitUnknownEncoding)(parser->m_unknownEncodingMem,
4077 info.map,
4078 info.convert,
4079 info.data);
4080 if (enc) {
4081 parser->m_unknownEncodingData = info.data;
4082 parser->m_unknownEncodingRelease = info.release;
4083 parser->m_encoding = enc;
4084 return XML_ERROR_NONE;
4085 }
4086 }
4087 if (info.release != NULL)
4088 info.release(info.data);
4089 }
4090 return XML_ERROR_UNKNOWN_ENCODING;
4091 }
4092
4093 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4094 prologInitProcessor(XML_Parser parser,
4095 const char *s,
4096 const char *end,
4097 const char **nextPtr)
4098 {
4099 enum XML_Error result = initializeEncoding(parser);
4100 if (result != XML_ERROR_NONE)
4101 return result;
4102 parser->m_processor = prologProcessor;
4103 return prologProcessor(parser, s, end, nextPtr);
4104 }
4105
4106 #ifdef XML_DTD
4107
4108 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4109 externalParEntInitProcessor(XML_Parser parser,
4110 const char *s,
4111 const char *end,
4112 const char **nextPtr)
4113 {
4114 enum XML_Error result = initializeEncoding(parser);
4115 if (result != XML_ERROR_NONE)
4116 return result;
4117
4118 /* we know now that XML_Parse(Buffer) has been called,
4119 so we consider the external parameter entity read */
4120 parser->m_dtd->paramEntityRead = XML_TRUE;
4121
4122 if (parser->m_prologState.inEntityValue) {
4123 parser->m_processor = entityValueInitProcessor;
4124 return entityValueInitProcessor(parser, s, end, nextPtr);
4125 }
4126 else {
4127 parser->m_processor = externalParEntProcessor;
4128 return externalParEntProcessor(parser, s, end, nextPtr);
4129 }
4130 }
4131
4132 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4133 entityValueInitProcessor(XML_Parser parser,
4134 const char *s,
4135 const char *end,
4136 const char **nextPtr)
4137 {
4138 int tok;
4139 const char *start = s;
4140 const char *next = start;
4141 parser->m_eventPtr = start;
4142
4143 for (;;) {
4144 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4145 parser->m_eventEndPtr = next;
4146 if (tok <= 0) {
4147 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4148 *nextPtr = s;
4149 return XML_ERROR_NONE;
4150 }
4151 switch (tok) {
4152 case XML_TOK_INVALID:
4153 return XML_ERROR_INVALID_TOKEN;
4154 case XML_TOK_PARTIAL:
4155 return XML_ERROR_UNCLOSED_TOKEN;
4156 case XML_TOK_PARTIAL_CHAR:
4157 return XML_ERROR_PARTIAL_CHAR;
4158 case XML_TOK_NONE: /* start == end */
4159 default:
4160 break;
4161 }
4162 /* found end of entity value - can store it now */
4163 return storeEntityValue(parser, parser->m_encoding, s, end);
4164 }
4165 else if (tok == XML_TOK_XML_DECL) {
4166 enum XML_Error result;
4167 result = processXmlDecl(parser, 0, start, next);
4168 if (result != XML_ERROR_NONE)
4169 return result;
4170 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For that
4171 * to happen, a parameter entity parsing handler must have
4172 * attempted to suspend the parser, which fails and raises an
4173 * error. The parser can be aborted, but can't be suspended.
4174 */
4175 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4176 return XML_ERROR_ABORTED;
4177 *nextPtr = next;
4178 /* stop scanning for text declaration - we found one */
4179 parser->m_processor = entityValueProcessor;
4180 return entityValueProcessor(parser, next, end, nextPtr);
4181 }
4182 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4183 return XML_TOK_NONE on the next call, which would then cause the
4184 function to exit with *nextPtr set to s - that is what we want for other
4185 tokens, but not for the BOM - we would rather like to skip it;
4186 then, when this routine is entered the next time, XmlPrologTok will
4187 return XML_TOK_INVALID, since the BOM is still in the buffer
4188 */
4189 else if (tok == XML_TOK_BOM && next == end && !parser->m_parsingStatus.finalBuffer) {
4190 *nextPtr = next;
4191 return XML_ERROR_NONE;
4192 }
4193 /* If we get this token, we have the start of what might be a
4194 normal tag, but not a declaration (i.e. it doesn't begin with
4195 "<!"). In a DTD context, that isn't legal.
4196 */
4197 else if (tok == XML_TOK_INSTANCE_START) {
4198 *nextPtr = next;
4199 return XML_ERROR_SYNTAX;
4200 }
4201 start = next;
4202 parser->m_eventPtr = start;
4203 }
4204 }
4205
4206 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4207 externalParEntProcessor(XML_Parser parser,
4208 const char *s,
4209 const char *end,
4210 const char **nextPtr)
4211 {
4212 const char *next = s;
4213 int tok;
4214
4215 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4216 if (tok <= 0) {
4217 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4218 *nextPtr = s;
4219 return XML_ERROR_NONE;
4220 }
4221 switch (tok) {
4222 case XML_TOK_INVALID:
4223 return XML_ERROR_INVALID_TOKEN;
4224 case XML_TOK_PARTIAL:
4225 return XML_ERROR_UNCLOSED_TOKEN;
4226 case XML_TOK_PARTIAL_CHAR:
4227 return XML_ERROR_PARTIAL_CHAR;
4228 case XML_TOK_NONE: /* start == end */
4229 default:
4230 break;
4231 }
4232 }
4233 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4234 However, when parsing an external subset, doProlog will not accept a BOM
4235 as valid, and report a syntax error, so we have to skip the BOM
4236 */
4237 else if (tok == XML_TOK_BOM) {
4238 s = next;
4239 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4240 }
4241
4242 parser->m_processor = prologProcessor;
4243 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4244 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
4245 }
4246
4247 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4248 entityValueProcessor(XML_Parser parser,
4249 const char *s,
4250 const char *end,
4251 const char **nextPtr)
4252 {
4253 const char *start = s;
4254 const char *next = s;
4255 const ENCODING *enc = parser->m_encoding;
4256 int tok;
4257
4258 for (;;) {
4259 tok = XmlPrologTok(enc, start, end, &next);
4260 if (tok <= 0) {
4261 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4262 *nextPtr = s;
4263 return XML_ERROR_NONE;
4264 }
4265 switch (tok) {
4266 case XML_TOK_INVALID:
4267 return XML_ERROR_INVALID_TOKEN;
4268 case XML_TOK_PARTIAL:
4269 return XML_ERROR_UNCLOSED_TOKEN;
4270 case XML_TOK_PARTIAL_CHAR:
4271 return XML_ERROR_PARTIAL_CHAR;
4272 case XML_TOK_NONE: /* start == end */
4273 default:
4274 break;
4275 }
4276 /* found end of entity value - can store it now */
4277 return storeEntityValue(parser, enc, s, end);
4278 }
4279 start = next;
4280 }
4281 }
4282
4283 #endif /* XML_DTD */
4284
4285 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4286 prologProcessor(XML_Parser parser,
4287 const char *s,
4288 const char *end,
4289 const char **nextPtr)
4290 {
4291 const char *next = s;
4292 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4293 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4294 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
4295 }
4296
4297 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore)4298 doProlog(XML_Parser parser,
4299 const ENCODING *enc,
4300 const char *s,
4301 const char *end,
4302 int tok,
4303 const char *next,
4304 const char **nextPtr,
4305 XML_Bool haveMore)
4306 {
4307 #ifdef XML_DTD
4308 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
4309 #endif /* XML_DTD */
4310 static const XML_Char atypeCDATA[] =
4311 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4312 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4313 static const XML_Char atypeIDREF[] =
4314 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4315 static const XML_Char atypeIDREFS[] =
4316 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4317 static const XML_Char atypeENTITY[] =
4318 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4319 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4320 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
4321 static const XML_Char atypeNMTOKEN[] = {
4322 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4323 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4324 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4325 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4326 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4327 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4328 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
4329
4330 /* save one level of indirection */
4331 DTD * const dtd = parser->m_dtd;
4332
4333 const char **eventPP;
4334 const char **eventEndPP;
4335 enum XML_Content_Quant quant;
4336
4337 if (enc == parser->m_encoding) {
4338 eventPP = &parser->m_eventPtr;
4339 eventEndPP = &parser->m_eventEndPtr;
4340 }
4341 else {
4342 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4343 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4344 }
4345
4346 for (;;) {
4347 int role;
4348 XML_Bool handleDefault = XML_TRUE;
4349 *eventPP = s;
4350 *eventEndPP = next;
4351 if (tok <= 0) {
4352 if (haveMore && tok != XML_TOK_INVALID) {
4353 *nextPtr = s;
4354 return XML_ERROR_NONE;
4355 }
4356 switch (tok) {
4357 case XML_TOK_INVALID:
4358 *eventPP = next;
4359 return XML_ERROR_INVALID_TOKEN;
4360 case XML_TOK_PARTIAL:
4361 return XML_ERROR_UNCLOSED_TOKEN;
4362 case XML_TOK_PARTIAL_CHAR:
4363 return XML_ERROR_PARTIAL_CHAR;
4364 case -XML_TOK_PROLOG_S:
4365 tok = -tok;
4366 break;
4367 case XML_TOK_NONE:
4368 #ifdef XML_DTD
4369 /* for internal PE NOT referenced between declarations */
4370 if (enc != parser->m_encoding && !parser->m_openInternalEntities->betweenDecl) {
4371 *nextPtr = s;
4372 return XML_ERROR_NONE;
4373 }
4374 /* WFC: PE Between Declarations - must check that PE contains
4375 complete markup, not only for external PEs, but also for
4376 internal PEs if the reference occurs between declarations.
4377 */
4378 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4379 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4380 == XML_ROLE_ERROR)
4381 return XML_ERROR_INCOMPLETE_PE;
4382 *nextPtr = s;
4383 return XML_ERROR_NONE;
4384 }
4385 #endif /* XML_DTD */
4386 return XML_ERROR_NO_ELEMENTS;
4387 default:
4388 tok = -tok;
4389 next = end;
4390 break;
4391 }
4392 }
4393 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4394 switch (role) {
4395 case XML_ROLE_XML_DECL:
4396 {
4397 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4398 if (result != XML_ERROR_NONE)
4399 return result;
4400 enc = parser->m_encoding;
4401 handleDefault = XML_FALSE;
4402 }
4403 break;
4404 case XML_ROLE_DOCTYPE_NAME:
4405 if (parser->m_startDoctypeDeclHandler) {
4406 parser->m_doctypeName = poolStoreString(&parser->m_tempPool, enc, s, next);
4407 if (!parser->m_doctypeName)
4408 return XML_ERROR_NO_MEMORY;
4409 poolFinish(&parser->m_tempPool);
4410 parser->m_doctypePubid = NULL;
4411 handleDefault = XML_FALSE;
4412 }
4413 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4414 break;
4415 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4416 if (parser->m_startDoctypeDeclHandler) {
4417 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4418 parser->m_doctypePubid, 1);
4419 parser->m_doctypeName = NULL;
4420 poolClear(&parser->m_tempPool);
4421 handleDefault = XML_FALSE;
4422 }
4423 break;
4424 #ifdef XML_DTD
4425 case XML_ROLE_TEXT_DECL:
4426 {
4427 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4428 if (result != XML_ERROR_NONE)
4429 return result;
4430 enc = parser->m_encoding;
4431 handleDefault = XML_FALSE;
4432 }
4433 break;
4434 #endif /* XML_DTD */
4435 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4436 #ifdef XML_DTD
4437 parser->m_useForeignDTD = XML_FALSE;
4438 parser->m_declEntity = (ENTITY *)lookup(parser,
4439 &dtd->paramEntities,
4440 externalSubsetName,
4441 sizeof(ENTITY));
4442 if (!parser->m_declEntity)
4443 return XML_ERROR_NO_MEMORY;
4444 #endif /* XML_DTD */
4445 dtd->hasParamEntityRefs = XML_TRUE;
4446 if (parser->m_startDoctypeDeclHandler) {
4447 XML_Char *pubId;
4448 if (!XmlIsPublicId(enc, s, next, eventPP))
4449 return XML_ERROR_PUBLICID;
4450 pubId = poolStoreString(&parser->m_tempPool, enc,
4451 s + enc->minBytesPerChar,
4452 next - enc->minBytesPerChar);
4453 if (!pubId)
4454 return XML_ERROR_NO_MEMORY;
4455 normalizePublicId(pubId);
4456 poolFinish(&parser->m_tempPool);
4457 parser->m_doctypePubid = pubId;
4458 handleDefault = XML_FALSE;
4459 goto alreadyChecked;
4460 }
4461 /* fall through */
4462 case XML_ROLE_ENTITY_PUBLIC_ID:
4463 if (!XmlIsPublicId(enc, s, next, eventPP))
4464 return XML_ERROR_PUBLICID;
4465 alreadyChecked:
4466 if (dtd->keepProcessing && parser->m_declEntity) {
4467 XML_Char *tem = poolStoreString(&dtd->pool,
4468 enc,
4469 s + enc->minBytesPerChar,
4470 next - enc->minBytesPerChar);
4471 if (!tem)
4472 return XML_ERROR_NO_MEMORY;
4473 normalizePublicId(tem);
4474 parser->m_declEntity->publicId = tem;
4475 poolFinish(&dtd->pool);
4476 /* Don't suppress the default handler if we fell through from
4477 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4478 */
4479 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4480 handleDefault = XML_FALSE;
4481 }
4482 break;
4483 case XML_ROLE_DOCTYPE_CLOSE:
4484 if (parser->m_doctypeName) {
4485 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName,
4486 parser->m_doctypeSysid, parser->m_doctypePubid, 0);
4487 poolClear(&parser->m_tempPool);
4488 handleDefault = XML_FALSE;
4489 }
4490 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4491 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4492 was not set, indicating an external subset
4493 */
4494 #ifdef XML_DTD
4495 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4496 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4497 dtd->hasParamEntityRefs = XML_TRUE;
4498 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
4499 ENTITY *entity = (ENTITY *)lookup(parser,
4500 &dtd->paramEntities,
4501 externalSubsetName,
4502 sizeof(ENTITY));
4503 if (!entity) {
4504 /* The external subset name "#" will have already been
4505 * inserted into the hash table at the start of the
4506 * external entity parsing, so no allocation will happen
4507 * and lookup() cannot fail.
4508 */
4509 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4510 }
4511 if (parser->m_useForeignDTD)
4512 entity->base = parser->m_curBase;
4513 dtd->paramEntityRead = XML_FALSE;
4514 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
4515 0,
4516 entity->base,
4517 entity->systemId,
4518 entity->publicId))
4519 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4520 if (dtd->paramEntityRead) {
4521 if (!dtd->standalone &&
4522 parser->m_notStandaloneHandler &&
4523 !parser->m_notStandaloneHandler(parser->m_handlerArg))
4524 return XML_ERROR_NOT_STANDALONE;
4525 }
4526 /* if we didn't read the foreign DTD then this means that there
4527 is no external subset and we must reset dtd->hasParamEntityRefs
4528 */
4529 else if (!parser->m_doctypeSysid)
4530 dtd->hasParamEntityRefs = hadParamEntityRefs;
4531 /* end of DTD - no need to update dtd->keepProcessing */
4532 }
4533 parser->m_useForeignDTD = XML_FALSE;
4534 }
4535 #endif /* XML_DTD */
4536 if (parser->m_endDoctypeDeclHandler) {
4537 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4538 handleDefault = XML_FALSE;
4539 }
4540 break;
4541 case XML_ROLE_INSTANCE_START:
4542 #ifdef XML_DTD
4543 /* if there is no DOCTYPE declaration then now is the
4544 last chance to read the foreign DTD
4545 */
4546 if (parser->m_useForeignDTD) {
4547 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4548 dtd->hasParamEntityRefs = XML_TRUE;
4549 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
4550 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4551 externalSubsetName,
4552 sizeof(ENTITY));
4553 if (!entity)
4554 return XML_ERROR_NO_MEMORY;
4555 entity->base = parser->m_curBase;
4556 dtd->paramEntityRead = XML_FALSE;
4557 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
4558 0,
4559 entity->base,
4560 entity->systemId,
4561 entity->publicId))
4562 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4563 if (dtd->paramEntityRead) {
4564 if (!dtd->standalone &&
4565 parser->m_notStandaloneHandler &&
4566 !parser->m_notStandaloneHandler(parser->m_handlerArg))
4567 return XML_ERROR_NOT_STANDALONE;
4568 }
4569 /* if we didn't read the foreign DTD then this means that there
4570 is no external subset and we must reset dtd->hasParamEntityRefs
4571 */
4572 else
4573 dtd->hasParamEntityRefs = hadParamEntityRefs;
4574 /* end of DTD - no need to update dtd->keepProcessing */
4575 }
4576 }
4577 #endif /* XML_DTD */
4578 parser->m_processor = contentProcessor;
4579 return contentProcessor(parser, s, end, nextPtr);
4580 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4581 parser->m_declElementType = getElementType(parser, enc, s, next);
4582 if (!parser->m_declElementType)
4583 return XML_ERROR_NO_MEMORY;
4584 goto checkAttListDeclHandler;
4585 case XML_ROLE_ATTRIBUTE_NAME:
4586 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4587 if (!parser->m_declAttributeId)
4588 return XML_ERROR_NO_MEMORY;
4589 parser->m_declAttributeIsCdata = XML_FALSE;
4590 parser->m_declAttributeType = NULL;
4591 parser->m_declAttributeIsId = XML_FALSE;
4592 goto checkAttListDeclHandler;
4593 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4594 parser->m_declAttributeIsCdata = XML_TRUE;
4595 parser->m_declAttributeType = atypeCDATA;
4596 goto checkAttListDeclHandler;
4597 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4598 parser->m_declAttributeIsId = XML_TRUE;
4599 parser->m_declAttributeType = atypeID;
4600 goto checkAttListDeclHandler;
4601 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4602 parser->m_declAttributeType = atypeIDREF;
4603 goto checkAttListDeclHandler;
4604 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4605 parser->m_declAttributeType = atypeIDREFS;
4606 goto checkAttListDeclHandler;
4607 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4608 parser->m_declAttributeType = atypeENTITY;
4609 goto checkAttListDeclHandler;
4610 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4611 parser->m_declAttributeType = atypeENTITIES;
4612 goto checkAttListDeclHandler;
4613 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4614 parser->m_declAttributeType = atypeNMTOKEN;
4615 goto checkAttListDeclHandler;
4616 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4617 parser->m_declAttributeType = atypeNMTOKENS;
4618 checkAttListDeclHandler:
4619 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4620 handleDefault = XML_FALSE;
4621 break;
4622 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4623 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4624 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4625 const XML_Char *prefix;
4626 if (parser->m_declAttributeType) {
4627 prefix = enumValueSep;
4628 }
4629 else {
4630 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4631 ? notationPrefix
4632 : enumValueStart);
4633 }
4634 if (!poolAppendString(&parser->m_tempPool, prefix))
4635 return XML_ERROR_NO_MEMORY;
4636 if (!poolAppend(&parser->m_tempPool, enc, s, next))
4637 return XML_ERROR_NO_MEMORY;
4638 parser->m_declAttributeType = parser->m_tempPool.start;
4639 handleDefault = XML_FALSE;
4640 }
4641 break;
4642 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4643 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4644 if (dtd->keepProcessing) {
4645 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4646 parser->m_declAttributeIsCdata, parser->m_declAttributeIsId,
4647 0, parser))
4648 return XML_ERROR_NO_MEMORY;
4649 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4650 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4651 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4652 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4653 /* Enumerated or Notation type */
4654 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4655 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4656 return XML_ERROR_NO_MEMORY;
4657 parser->m_declAttributeType = parser->m_tempPool.start;
4658 poolFinish(&parser->m_tempPool);
4659 }
4660 *eventEndPP = s;
4661 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4662 parser->m_declAttributeId->name, parser->m_declAttributeType,
4663 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4664 poolClear(&parser->m_tempPool);
4665 handleDefault = XML_FALSE;
4666 }
4667 }
4668 break;
4669 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4670 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4671 if (dtd->keepProcessing) {
4672 const XML_Char *attVal;
4673 enum XML_Error result =
4674 storeAttributeValue(parser, enc, parser->m_declAttributeIsCdata,
4675 s + enc->minBytesPerChar,
4676 next - enc->minBytesPerChar,
4677 &dtd->pool);
4678 if (result)
4679 return result;
4680 attVal = poolStart(&dtd->pool);
4681 poolFinish(&dtd->pool);
4682 /* ID attributes aren't allowed to have a default */
4683 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4684 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
4685 return XML_ERROR_NO_MEMORY;
4686 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4687 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4688 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4689 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4690 /* Enumerated or Notation type */
4691 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4692 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4693 return XML_ERROR_NO_MEMORY;
4694 parser->m_declAttributeType = parser->m_tempPool.start;
4695 poolFinish(&parser->m_tempPool);
4696 }
4697 *eventEndPP = s;
4698 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4699 parser->m_declAttributeId->name, parser->m_declAttributeType,
4700 attVal,
4701 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4702 poolClear(&parser->m_tempPool);
4703 handleDefault = XML_FALSE;
4704 }
4705 }
4706 break;
4707 case XML_ROLE_ENTITY_VALUE:
4708 if (dtd->keepProcessing) {
4709 enum XML_Error result = storeEntityValue(parser, enc,
4710 s + enc->minBytesPerChar,
4711 next - enc->minBytesPerChar);
4712 if (parser->m_declEntity) {
4713 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4714 parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
4715 poolFinish(&dtd->entityValuePool);
4716 if (parser->m_entityDeclHandler) {
4717 *eventEndPP = s;
4718 parser->m_entityDeclHandler(parser->m_handlerArg,
4719 parser->m_declEntity->name,
4720 parser->m_declEntity->is_param,
4721 parser->m_declEntity->textPtr,
4722 parser->m_declEntity->textLen,
4723 parser->m_curBase, 0, 0, 0);
4724 handleDefault = XML_FALSE;
4725 }
4726 }
4727 else
4728 poolDiscard(&dtd->entityValuePool);
4729 if (result != XML_ERROR_NONE)
4730 return result;
4731 }
4732 break;
4733 case XML_ROLE_DOCTYPE_SYSTEM_ID:
4734 #ifdef XML_DTD
4735 parser->m_useForeignDTD = XML_FALSE;
4736 #endif /* XML_DTD */
4737 dtd->hasParamEntityRefs = XML_TRUE;
4738 if (parser->m_startDoctypeDeclHandler) {
4739 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
4740 s + enc->minBytesPerChar,
4741 next - enc->minBytesPerChar);
4742 if (parser->m_doctypeSysid == NULL)
4743 return XML_ERROR_NO_MEMORY;
4744 poolFinish(&parser->m_tempPool);
4745 handleDefault = XML_FALSE;
4746 }
4747 #ifdef XML_DTD
4748 else
4749 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4750 for the case where no parser->m_startDoctypeDeclHandler is set */
4751 parser->m_doctypeSysid = externalSubsetName;
4752 #endif /* XML_DTD */
4753 if (!dtd->standalone
4754 #ifdef XML_DTD
4755 && !parser->m_paramEntityParsing
4756 #endif /* XML_DTD */
4757 && parser->m_notStandaloneHandler
4758 && !parser->m_notStandaloneHandler(parser->m_handlerArg))
4759 return XML_ERROR_NOT_STANDALONE;
4760 #ifndef XML_DTD
4761 break;
4762 #else /* XML_DTD */
4763 if (!parser->m_declEntity) {
4764 parser->m_declEntity = (ENTITY *)lookup(parser,
4765 &dtd->paramEntities,
4766 externalSubsetName,
4767 sizeof(ENTITY));
4768 if (!parser->m_declEntity)
4769 return XML_ERROR_NO_MEMORY;
4770 parser->m_declEntity->publicId = NULL;
4771 }
4772 #endif /* XML_DTD */
4773 /* fall through */
4774 case XML_ROLE_ENTITY_SYSTEM_ID:
4775 if (dtd->keepProcessing && parser->m_declEntity) {
4776 parser->m_declEntity->systemId = poolStoreString(&dtd->pool, enc,
4777 s + enc->minBytesPerChar,
4778 next - enc->minBytesPerChar);
4779 if (!parser->m_declEntity->systemId)
4780 return XML_ERROR_NO_MEMORY;
4781 parser->m_declEntity->base = parser->m_curBase;
4782 poolFinish(&dtd->pool);
4783 /* Don't suppress the default handler if we fell through from
4784 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4785 */
4786 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
4787 handleDefault = XML_FALSE;
4788 }
4789 break;
4790 case XML_ROLE_ENTITY_COMPLETE:
4791 if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) {
4792 *eventEndPP = s;
4793 parser->m_entityDeclHandler(parser->m_handlerArg,
4794 parser->m_declEntity->name,
4795 parser->m_declEntity->is_param,
4796 0,0,
4797 parser->m_declEntity->base,
4798 parser->m_declEntity->systemId,
4799 parser->m_declEntity->publicId,
4800 0);
4801 handleDefault = XML_FALSE;
4802 }
4803 break;
4804 case XML_ROLE_ENTITY_NOTATION_NAME:
4805 if (dtd->keepProcessing && parser->m_declEntity) {
4806 parser->m_declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4807 if (!parser->m_declEntity->notation)
4808 return XML_ERROR_NO_MEMORY;
4809 poolFinish(&dtd->pool);
4810 if (parser->m_unparsedEntityDeclHandler) {
4811 *eventEndPP = s;
4812 parser->m_unparsedEntityDeclHandler(parser->m_handlerArg,
4813 parser->m_declEntity->name,
4814 parser->m_declEntity->base,
4815 parser->m_declEntity->systemId,
4816 parser->m_declEntity->publicId,
4817 parser->m_declEntity->notation);
4818 handleDefault = XML_FALSE;
4819 }
4820 else if (parser->m_entityDeclHandler) {
4821 *eventEndPP = s;
4822 parser->m_entityDeclHandler(parser->m_handlerArg,
4823 parser->m_declEntity->name,
4824 0,0,0,
4825 parser->m_declEntity->base,
4826 parser->m_declEntity->systemId,
4827 parser->m_declEntity->publicId,
4828 parser->m_declEntity->notation);
4829 handleDefault = XML_FALSE;
4830 }
4831 }
4832 break;
4833 case XML_ROLE_GENERAL_ENTITY_NAME:
4834 {
4835 if (XmlPredefinedEntityName(enc, s, next)) {
4836 parser->m_declEntity = NULL;
4837 break;
4838 }
4839 if (dtd->keepProcessing) {
4840 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4841 if (!name)
4842 return XML_ERROR_NO_MEMORY;
4843 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
4844 sizeof(ENTITY));
4845 if (!parser->m_declEntity)
4846 return XML_ERROR_NO_MEMORY;
4847 if (parser->m_declEntity->name != name) {
4848 poolDiscard(&dtd->pool);
4849 parser->m_declEntity = NULL;
4850 }
4851 else {
4852 poolFinish(&dtd->pool);
4853 parser->m_declEntity->publicId = NULL;
4854 parser->m_declEntity->is_param = XML_FALSE;
4855 /* if we have a parent parser or are reading an internal parameter
4856 entity, then the entity declaration is not considered "internal"
4857 */
4858 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4859 if (parser->m_entityDeclHandler)
4860 handleDefault = XML_FALSE;
4861 }
4862 }
4863 else {
4864 poolDiscard(&dtd->pool);
4865 parser->m_declEntity = NULL;
4866 }
4867 }
4868 break;
4869 case XML_ROLE_PARAM_ENTITY_NAME:
4870 #ifdef XML_DTD
4871 if (dtd->keepProcessing) {
4872 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4873 if (!name)
4874 return XML_ERROR_NO_MEMORY;
4875 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4876 name, sizeof(ENTITY));
4877 if (!parser->m_declEntity)
4878 return XML_ERROR_NO_MEMORY;
4879 if (parser->m_declEntity->name != name) {
4880 poolDiscard(&dtd->pool);
4881 parser->m_declEntity = NULL;
4882 }
4883 else {
4884 poolFinish(&dtd->pool);
4885 parser->m_declEntity->publicId = NULL;
4886 parser->m_declEntity->is_param = XML_TRUE;
4887 /* if we have a parent parser or are reading an internal parameter
4888 entity, then the entity declaration is not considered "internal"
4889 */
4890 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4891 if (parser->m_entityDeclHandler)
4892 handleDefault = XML_FALSE;
4893 }
4894 }
4895 else {
4896 poolDiscard(&dtd->pool);
4897 parser->m_declEntity = NULL;
4898 }
4899 #else /* not XML_DTD */
4900 parser->m_declEntity = NULL;
4901 #endif /* XML_DTD */
4902 break;
4903 case XML_ROLE_NOTATION_NAME:
4904 parser->m_declNotationPublicId = NULL;
4905 parser->m_declNotationName = NULL;
4906 if (parser->m_notationDeclHandler) {
4907 parser->m_declNotationName = poolStoreString(&parser->m_tempPool, enc, s, next);
4908 if (!parser->m_declNotationName)
4909 return XML_ERROR_NO_MEMORY;
4910 poolFinish(&parser->m_tempPool);
4911 handleDefault = XML_FALSE;
4912 }
4913 break;
4914 case XML_ROLE_NOTATION_PUBLIC_ID:
4915 if (!XmlIsPublicId(enc, s, next, eventPP))
4916 return XML_ERROR_PUBLICID;
4917 if (parser->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4918 XML_Char *tem = poolStoreString(&parser->m_tempPool,
4919 enc,
4920 s + enc->minBytesPerChar,
4921 next - enc->minBytesPerChar);
4922 if (!tem)
4923 return XML_ERROR_NO_MEMORY;
4924 normalizePublicId(tem);
4925 parser->m_declNotationPublicId = tem;
4926 poolFinish(&parser->m_tempPool);
4927 handleDefault = XML_FALSE;
4928 }
4929 break;
4930 case XML_ROLE_NOTATION_SYSTEM_ID:
4931 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
4932 const XML_Char *systemId
4933 = poolStoreString(&parser->m_tempPool, enc,
4934 s + enc->minBytesPerChar,
4935 next - enc->minBytesPerChar);
4936 if (!systemId)
4937 return XML_ERROR_NO_MEMORY;
4938 *eventEndPP = s;
4939 parser->m_notationDeclHandler(parser->m_handlerArg,
4940 parser->m_declNotationName,
4941 parser->m_curBase,
4942 systemId,
4943 parser->m_declNotationPublicId);
4944 handleDefault = XML_FALSE;
4945 }
4946 poolClear(&parser->m_tempPool);
4947 break;
4948 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4949 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
4950 *eventEndPP = s;
4951 parser->m_notationDeclHandler(parser->m_handlerArg,
4952 parser->m_declNotationName,
4953 parser->m_curBase,
4954 0,
4955 parser->m_declNotationPublicId);
4956 handleDefault = XML_FALSE;
4957 }
4958 poolClear(&parser->m_tempPool);
4959 break;
4960 case XML_ROLE_ERROR:
4961 switch (tok) {
4962 case XML_TOK_PARAM_ENTITY_REF:
4963 /* PE references in internal subset are
4964 not allowed within declarations. */
4965 return XML_ERROR_PARAM_ENTITY_REF;
4966 case XML_TOK_XML_DECL:
4967 return XML_ERROR_MISPLACED_XML_PI;
4968 default:
4969 return XML_ERROR_SYNTAX;
4970 }
4971 #ifdef XML_DTD
4972 case XML_ROLE_IGNORE_SECT:
4973 {
4974 enum XML_Error result;
4975 if (parser->m_defaultHandler)
4976 reportDefault(parser, enc, s, next);
4977 handleDefault = XML_FALSE;
4978 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4979 if (result != XML_ERROR_NONE)
4980 return result;
4981 else if (!next) {
4982 parser->m_processor = ignoreSectionProcessor;
4983 return result;
4984 }
4985 }
4986 break;
4987 #endif /* XML_DTD */
4988 case XML_ROLE_GROUP_OPEN:
4989 if (parser->m_prologState.level >= parser->m_groupSize) {
4990 if (parser->m_groupSize) {
4991 char *temp = (char *)REALLOC(parser, parser->m_groupConnector, parser->m_groupSize *= 2);
4992 if (temp == NULL) {
4993 parser->m_groupSize /= 2;
4994 return XML_ERROR_NO_MEMORY;
4995 }
4996 parser->m_groupConnector = temp;
4997 if (dtd->scaffIndex) {
4998 int *temp = (int *)REALLOC(parser, dtd->scaffIndex,
4999 parser->m_groupSize * sizeof(int));
5000 if (temp == NULL)
5001 return XML_ERROR_NO_MEMORY;
5002 dtd->scaffIndex = temp;
5003 }
5004 }
5005 else {
5006 parser->m_groupConnector = (char *)MALLOC(parser, parser->m_groupSize = 32);
5007 if (!parser->m_groupConnector) {
5008 parser->m_groupSize = 0;
5009 return XML_ERROR_NO_MEMORY;
5010 }
5011 }
5012 }
5013 parser->m_groupConnector[parser->m_prologState.level] = 0;
5014 if (dtd->in_eldecl) {
5015 int myindex = nextScaffoldPart(parser);
5016 if (myindex < 0)
5017 return XML_ERROR_NO_MEMORY;
5018 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5019 dtd->scaffLevel++;
5020 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5021 if (parser->m_elementDeclHandler)
5022 handleDefault = XML_FALSE;
5023 }
5024 break;
5025 case XML_ROLE_GROUP_SEQUENCE:
5026 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5027 return XML_ERROR_SYNTAX;
5028 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5029 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5030 handleDefault = XML_FALSE;
5031 break;
5032 case XML_ROLE_GROUP_CHOICE:
5033 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5034 return XML_ERROR_SYNTAX;
5035 if (dtd->in_eldecl
5036 && !parser->m_groupConnector[parser->m_prologState.level]
5037 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5038 != XML_CTYPE_MIXED)
5039 ) {
5040 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5041 = XML_CTYPE_CHOICE;
5042 if (parser->m_elementDeclHandler)
5043 handleDefault = XML_FALSE;
5044 }
5045 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5046 break;
5047 case XML_ROLE_PARAM_ENTITY_REF:
5048 #ifdef XML_DTD
5049 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5050 dtd->hasParamEntityRefs = XML_TRUE;
5051 if (!parser->m_paramEntityParsing)
5052 dtd->keepProcessing = dtd->standalone;
5053 else {
5054 const XML_Char *name;
5055 ENTITY *entity;
5056 name = poolStoreString(&dtd->pool, enc,
5057 s + enc->minBytesPerChar,
5058 next - enc->minBytesPerChar);
5059 if (!name)
5060 return XML_ERROR_NO_MEMORY;
5061 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5062 poolDiscard(&dtd->pool);
5063 /* first, determine if a check for an existing declaration is needed;
5064 if yes, check that the entity exists, and that it is internal,
5065 otherwise call the skipped entity handler
5066 */
5067 if (parser->m_prologState.documentEntity &&
5068 (dtd->standalone
5069 ? !parser->m_openInternalEntities
5070 : !dtd->hasParamEntityRefs)) {
5071 if (!entity)
5072 return XML_ERROR_UNDEFINED_ENTITY;
5073 else if (!entity->is_internal) {
5074 /* It's hard to exhaustively search the code to be sure,
5075 * but there doesn't seem to be a way of executing the
5076 * following line. There are two cases:
5077 *
5078 * If 'standalone' is false, the DTD must have no
5079 * parameter entities or we wouldn't have passed the outer
5080 * 'if' statement. That measn the only entity in the hash
5081 * table is the external subset name "#" which cannot be
5082 * given as a parameter entity name in XML syntax, so the
5083 * lookup must have returned NULL and we don't even reach
5084 * the test for an internal entity.
5085 *
5086 * If 'standalone' is true, it does not seem to be
5087 * possible to create entities taking this code path that
5088 * are not internal entities, so fail the test above.
5089 *
5090 * Because this analysis is very uncertain, the code is
5091 * being left in place and merely removed from the
5092 * coverage test statistics.
5093 */
5094 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5095 }
5096 }
5097 else if (!entity) {
5098 dtd->keepProcessing = dtd->standalone;
5099 /* cannot report skipped entities in declarations */
5100 if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) {
5101 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5102 handleDefault = XML_FALSE;
5103 }
5104 break;
5105 }
5106 if (entity->open)
5107 return XML_ERROR_RECURSIVE_ENTITY_REF;
5108 if (entity->textPtr) {
5109 enum XML_Error result;
5110 XML_Bool betweenDecl =
5111 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5112 result = processInternalEntity(parser, entity, betweenDecl);
5113 if (result != XML_ERROR_NONE)
5114 return result;
5115 handleDefault = XML_FALSE;
5116 break;
5117 }
5118 if (parser->m_externalEntityRefHandler) {
5119 dtd->paramEntityRead = XML_FALSE;
5120 entity->open = XML_TRUE;
5121 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
5122 0,
5123 entity->base,
5124 entity->systemId,
5125 entity->publicId)) {
5126 entity->open = XML_FALSE;
5127 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5128 }
5129 entity->open = XML_FALSE;
5130 handleDefault = XML_FALSE;
5131 if (!dtd->paramEntityRead) {
5132 dtd->keepProcessing = dtd->standalone;
5133 break;
5134 }
5135 }
5136 else {
5137 dtd->keepProcessing = dtd->standalone;
5138 break;
5139 }
5140 }
5141 #endif /* XML_DTD */
5142 if (!dtd->standalone &&
5143 parser->m_notStandaloneHandler &&
5144 !parser->m_notStandaloneHandler(parser->m_handlerArg))
5145 return XML_ERROR_NOT_STANDALONE;
5146 break;
5147
5148 /* Element declaration stuff */
5149
5150 case XML_ROLE_ELEMENT_NAME:
5151 if (parser->m_elementDeclHandler) {
5152 parser->m_declElementType = getElementType(parser, enc, s, next);
5153 if (!parser->m_declElementType)
5154 return XML_ERROR_NO_MEMORY;
5155 dtd->scaffLevel = 0;
5156 dtd->scaffCount = 0;
5157 dtd->in_eldecl = XML_TRUE;
5158 handleDefault = XML_FALSE;
5159 }
5160 break;
5161
5162 case XML_ROLE_CONTENT_ANY:
5163 case XML_ROLE_CONTENT_EMPTY:
5164 if (dtd->in_eldecl) {
5165 if (parser->m_elementDeclHandler) {
5166 XML_Content * content = (XML_Content *) MALLOC(parser, sizeof(XML_Content));
5167 if (!content)
5168 return XML_ERROR_NO_MEMORY;
5169 content->quant = XML_CQUANT_NONE;
5170 content->name = NULL;
5171 content->numchildren = 0;
5172 content->children = NULL;
5173 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
5174 XML_CTYPE_ANY :
5175 XML_CTYPE_EMPTY);
5176 *eventEndPP = s;
5177 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, content);
5178 handleDefault = XML_FALSE;
5179 }
5180 dtd->in_eldecl = XML_FALSE;
5181 }
5182 break;
5183
5184 case XML_ROLE_CONTENT_PCDATA:
5185 if (dtd->in_eldecl) {
5186 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5187 = XML_CTYPE_MIXED;
5188 if (parser->m_elementDeclHandler)
5189 handleDefault = XML_FALSE;
5190 }
5191 break;
5192
5193 case XML_ROLE_CONTENT_ELEMENT:
5194 quant = XML_CQUANT_NONE;
5195 goto elementContent;
5196 case XML_ROLE_CONTENT_ELEMENT_OPT:
5197 quant = XML_CQUANT_OPT;
5198 goto elementContent;
5199 case XML_ROLE_CONTENT_ELEMENT_REP:
5200 quant = XML_CQUANT_REP;
5201 goto elementContent;
5202 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5203 quant = XML_CQUANT_PLUS;
5204 elementContent:
5205 if (dtd->in_eldecl) {
5206 ELEMENT_TYPE *el;
5207 const XML_Char *name;
5208 int nameLen;
5209 const char *nxt = (quant == XML_CQUANT_NONE
5210 ? next
5211 : next - enc->minBytesPerChar);
5212 int myindex = nextScaffoldPart(parser);
5213 if (myindex < 0)
5214 return XML_ERROR_NO_MEMORY;
5215 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5216 dtd->scaffold[myindex].quant = quant;
5217 el = getElementType(parser, enc, s, nxt);
5218 if (!el)
5219 return XML_ERROR_NO_MEMORY;
5220 name = el->name;
5221 dtd->scaffold[myindex].name = name;
5222 nameLen = 0;
5223 for (; name[nameLen++]; );
5224 dtd->contentStringLen += nameLen;
5225 if (parser->m_elementDeclHandler)
5226 handleDefault = XML_FALSE;
5227 }
5228 break;
5229
5230 case XML_ROLE_GROUP_CLOSE:
5231 quant = XML_CQUANT_NONE;
5232 goto closeGroup;
5233 case XML_ROLE_GROUP_CLOSE_OPT:
5234 quant = XML_CQUANT_OPT;
5235 goto closeGroup;
5236 case XML_ROLE_GROUP_CLOSE_REP:
5237 quant = XML_CQUANT_REP;
5238 goto closeGroup;
5239 case XML_ROLE_GROUP_CLOSE_PLUS:
5240 quant = XML_CQUANT_PLUS;
5241 closeGroup:
5242 if (dtd->in_eldecl) {
5243 if (parser->m_elementDeclHandler)
5244 handleDefault = XML_FALSE;
5245 dtd->scaffLevel--;
5246 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5247 if (dtd->scaffLevel == 0) {
5248 if (!handleDefault) {
5249 XML_Content *model = build_model(parser);
5250 if (!model)
5251 return XML_ERROR_NO_MEMORY;
5252 *eventEndPP = s;
5253 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, model);
5254 }
5255 dtd->in_eldecl = XML_FALSE;
5256 dtd->contentStringLen = 0;
5257 }
5258 }
5259 break;
5260 /* End element declaration stuff */
5261
5262 case XML_ROLE_PI:
5263 if (!reportProcessingInstruction(parser, enc, s, next))
5264 return XML_ERROR_NO_MEMORY;
5265 handleDefault = XML_FALSE;
5266 break;
5267 case XML_ROLE_COMMENT:
5268 if (!reportComment(parser, enc, s, next))
5269 return XML_ERROR_NO_MEMORY;
5270 handleDefault = XML_FALSE;
5271 break;
5272 case XML_ROLE_NONE:
5273 switch (tok) {
5274 case XML_TOK_BOM:
5275 handleDefault = XML_FALSE;
5276 break;
5277 }
5278 break;
5279 case XML_ROLE_DOCTYPE_NONE:
5280 if (parser->m_startDoctypeDeclHandler)
5281 handleDefault = XML_FALSE;
5282 break;
5283 case XML_ROLE_ENTITY_NONE:
5284 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5285 handleDefault = XML_FALSE;
5286 break;
5287 case XML_ROLE_NOTATION_NONE:
5288 if (parser->m_notationDeclHandler)
5289 handleDefault = XML_FALSE;
5290 break;
5291 case XML_ROLE_ATTLIST_NONE:
5292 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5293 handleDefault = XML_FALSE;
5294 break;
5295 case XML_ROLE_ELEMENT_NONE:
5296 if (parser->m_elementDeclHandler)
5297 handleDefault = XML_FALSE;
5298 break;
5299 } /* end of big switch */
5300
5301 if (handleDefault && parser->m_defaultHandler)
5302 reportDefault(parser, enc, s, next);
5303
5304 switch (parser->m_parsingStatus.parsing) {
5305 case XML_SUSPENDED:
5306 *nextPtr = next;
5307 return XML_ERROR_NONE;
5308 case XML_FINISHED:
5309 return XML_ERROR_ABORTED;
5310 default:
5311 s = next;
5312 tok = XmlPrologTok(enc, s, end, &next);
5313 }
5314 }
5315 /* not reached */
5316 }
5317
5318 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5319 epilogProcessor(XML_Parser parser,
5320 const char *s,
5321 const char *end,
5322 const char **nextPtr)
5323 {
5324 parser->m_processor = epilogProcessor;
5325 parser->m_eventPtr = s;
5326 for (;;) {
5327 const char *next = NULL;
5328 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5329 parser->m_eventEndPtr = next;
5330 switch (tok) {
5331 /* report partial linebreak - it might be the last token */
5332 case -XML_TOK_PROLOG_S:
5333 if (parser->m_defaultHandler) {
5334 reportDefault(parser, parser->m_encoding, s, next);
5335 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5336 return XML_ERROR_ABORTED;
5337 }
5338 *nextPtr = next;
5339 return XML_ERROR_NONE;
5340 case XML_TOK_NONE:
5341 *nextPtr = s;
5342 return XML_ERROR_NONE;
5343 case XML_TOK_PROLOG_S:
5344 if (parser->m_defaultHandler)
5345 reportDefault(parser, parser->m_encoding, s, next);
5346 break;
5347 case XML_TOK_PI:
5348 if (!reportProcessingInstruction(parser, parser->m_encoding, s, next))
5349 return XML_ERROR_NO_MEMORY;
5350 break;
5351 case XML_TOK_COMMENT:
5352 if (!reportComment(parser, parser->m_encoding, s, next))
5353 return XML_ERROR_NO_MEMORY;
5354 break;
5355 case XML_TOK_INVALID:
5356 parser->m_eventPtr = next;
5357 return XML_ERROR_INVALID_TOKEN;
5358 case XML_TOK_PARTIAL:
5359 if (!parser->m_parsingStatus.finalBuffer) {
5360 *nextPtr = s;
5361 return XML_ERROR_NONE;
5362 }
5363 return XML_ERROR_UNCLOSED_TOKEN;
5364 case XML_TOK_PARTIAL_CHAR:
5365 if (!parser->m_parsingStatus.finalBuffer) {
5366 *nextPtr = s;
5367 return XML_ERROR_NONE;
5368 }
5369 return XML_ERROR_PARTIAL_CHAR;
5370 default:
5371 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5372 }
5373 parser->m_eventPtr = s = next;
5374 switch (parser->m_parsingStatus.parsing) {
5375 case XML_SUSPENDED:
5376 *nextPtr = next;
5377 return XML_ERROR_NONE;
5378 case XML_FINISHED:
5379 return XML_ERROR_ABORTED;
5380 default: ;
5381 }
5382 }
5383 }
5384
5385 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5386 processInternalEntity(XML_Parser parser, ENTITY *entity,
5387 XML_Bool betweenDecl)
5388 {
5389 const char *textStart, *textEnd;
5390 const char *next;
5391 enum XML_Error result;
5392 OPEN_INTERNAL_ENTITY *openEntity;
5393
5394 if (parser->m_freeInternalEntities) {
5395 openEntity = parser->m_freeInternalEntities;
5396 parser->m_freeInternalEntities = openEntity->next;
5397 }
5398 else {
5399 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5400 if (!openEntity)
5401 return XML_ERROR_NO_MEMORY;
5402 }
5403 entity->open = XML_TRUE;
5404 entity->processed = 0;
5405 openEntity->next = parser->m_openInternalEntities;
5406 parser->m_openInternalEntities = openEntity;
5407 openEntity->entity = entity;
5408 openEntity->startTagLevel = parser->m_tagLevel;
5409 openEntity->betweenDecl = betweenDecl;
5410 openEntity->internalEventPtr = NULL;
5411 openEntity->internalEventEndPtr = NULL;
5412 textStart = (char *)entity->textPtr;
5413 textEnd = (char *)(entity->textPtr + entity->textLen);
5414 /* Set a safe default value in case 'next' does not get set */
5415 next = textStart;
5416
5417 #ifdef XML_DTD
5418 if (entity->is_param) {
5419 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5420 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
5421 next, &next, XML_FALSE);
5422 }
5423 else
5424 #endif /* XML_DTD */
5425 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, textStart,
5426 textEnd, &next, XML_FALSE);
5427
5428 if (result == XML_ERROR_NONE) {
5429 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5430 entity->processed = (int)(next - textStart);
5431 parser->m_processor = internalEntityProcessor;
5432 }
5433 else {
5434 entity->open = XML_FALSE;
5435 parser->m_openInternalEntities = openEntity->next;
5436 /* put openEntity back in list of free instances */
5437 openEntity->next = parser->m_freeInternalEntities;
5438 parser->m_freeInternalEntities = openEntity;
5439 }
5440 }
5441 return result;
5442 }
5443
5444 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5445 internalEntityProcessor(XML_Parser parser,
5446 const char *s,
5447 const char *end,
5448 const char **nextPtr)
5449 {
5450 ENTITY *entity;
5451 const char *textStart, *textEnd;
5452 const char *next;
5453 enum XML_Error result;
5454 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5455 if (!openEntity)
5456 return XML_ERROR_UNEXPECTED_STATE;
5457
5458 entity = openEntity->entity;
5459 textStart = ((char *)entity->textPtr) + entity->processed;
5460 textEnd = (char *)(entity->textPtr + entity->textLen);
5461 /* Set a safe default value in case 'next' does not get set */
5462 next = textStart;
5463
5464 #ifdef XML_DTD
5465 if (entity->is_param) {
5466 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5467 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
5468 next, &next, XML_FALSE);
5469 }
5470 else
5471 #endif /* XML_DTD */
5472 result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding,
5473 textStart, textEnd, &next, XML_FALSE);
5474
5475 if (result != XML_ERROR_NONE)
5476 return result;
5477 else if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5478 entity->processed = (int)(next - (char *)entity->textPtr);
5479 return result;
5480 }
5481 else {
5482 entity->open = XML_FALSE;
5483 parser->m_openInternalEntities = openEntity->next;
5484 /* put openEntity back in list of free instances */
5485 openEntity->next = parser->m_freeInternalEntities;
5486 parser->m_freeInternalEntities = openEntity;
5487 }
5488
5489 #ifdef XML_DTD
5490 if (entity->is_param) {
5491 int tok;
5492 parser->m_processor = prologProcessor;
5493 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5494 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5495 (XML_Bool)!parser->m_parsingStatus.finalBuffer);
5496 }
5497 else
5498 #endif /* XML_DTD */
5499 {
5500 parser->m_processor = contentProcessor;
5501 /* see externalEntityContentProcessor vs contentProcessor */
5502 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end,
5503 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
5504 }
5505 }
5506
5507 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * UNUSED_P (s),const char * UNUSED_P (end),const char ** UNUSED_P (nextPtr))5508 errorProcessor(XML_Parser parser,
5509 const char *UNUSED_P(s),
5510 const char *UNUSED_P(end),
5511 const char **UNUSED_P(nextPtr))
5512 {
5513 return parser->m_errorCode;
5514 }
5515
5516 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool)5517 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5518 const char *ptr, const char *end,
5519 STRING_POOL *pool)
5520 {
5521 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5522 end, pool);
5523 if (result)
5524 return result;
5525 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5526 poolChop(pool);
5527 if (!poolAppendChar(pool, XML_T('\0')))
5528 return XML_ERROR_NO_MEMORY;
5529 return XML_ERROR_NONE;
5530 }
5531
5532 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool)5533 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5534 const char *ptr, const char *end,
5535 STRING_POOL *pool)
5536 {
5537 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
5538 for (;;) {
5539 const char *next;
5540 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5541 switch (tok) {
5542 case XML_TOK_NONE:
5543 return XML_ERROR_NONE;
5544 case XML_TOK_INVALID:
5545 if (enc == parser->m_encoding)
5546 parser->m_eventPtr = next;
5547 return XML_ERROR_INVALID_TOKEN;
5548 case XML_TOK_PARTIAL:
5549 if (enc == parser->m_encoding)
5550 parser->m_eventPtr = ptr;
5551 return XML_ERROR_INVALID_TOKEN;
5552 case XML_TOK_CHAR_REF:
5553 {
5554 XML_Char buf[XML_ENCODE_MAX];
5555 int i;
5556 int n = XmlCharRefNumber(enc, ptr);
5557 if (n < 0) {
5558 if (enc == parser->m_encoding)
5559 parser->m_eventPtr = ptr;
5560 return XML_ERROR_BAD_CHAR_REF;
5561 }
5562 if (!isCdata
5563 && n == 0x20 /* space */
5564 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5565 break;
5566 n = XmlEncode(n, (ICHAR *)buf);
5567 /* The XmlEncode() functions can never return 0 here. That
5568 * error return happens if the code point passed in is either
5569 * negative or greater than or equal to 0x110000. The
5570 * XmlCharRefNumber() functions will all return a number
5571 * strictly less than 0x110000 or a negative value if an error
5572 * occurred. The negative value is intercepted above, so
5573 * XmlEncode() is never passed a value it might return an
5574 * error for.
5575 */
5576 for (i = 0; i < n; i++) {
5577 if (!poolAppendChar(pool, buf[i]))
5578 return XML_ERROR_NO_MEMORY;
5579 }
5580 }
5581 break;
5582 case XML_TOK_DATA_CHARS:
5583 if (!poolAppend(pool, enc, ptr, next))
5584 return XML_ERROR_NO_MEMORY;
5585 break;
5586 case XML_TOK_TRAILING_CR:
5587 next = ptr + enc->minBytesPerChar;
5588 /* fall through */
5589 case XML_TOK_ATTRIBUTE_VALUE_S:
5590 case XML_TOK_DATA_NEWLINE:
5591 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5592 break;
5593 if (!poolAppendChar(pool, 0x20))
5594 return XML_ERROR_NO_MEMORY;
5595 break;
5596 case XML_TOK_ENTITY_REF:
5597 {
5598 const XML_Char *name;
5599 ENTITY *entity;
5600 char checkEntityDecl;
5601 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5602 ptr + enc->minBytesPerChar,
5603 next - enc->minBytesPerChar);
5604 if (ch) {
5605 if (!poolAppendChar(pool, ch))
5606 return XML_ERROR_NO_MEMORY;
5607 break;
5608 }
5609 name = poolStoreString(&parser->m_temp2Pool, enc,
5610 ptr + enc->minBytesPerChar,
5611 next - enc->minBytesPerChar);
5612 if (!name)
5613 return XML_ERROR_NO_MEMORY;
5614 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5615 poolDiscard(&parser->m_temp2Pool);
5616 /* First, determine if a check for an existing declaration is needed;
5617 if yes, check that the entity exists, and that it is internal.
5618 */
5619 if (pool == &dtd->pool) /* are we called from prolog? */
5620 checkEntityDecl =
5621 #ifdef XML_DTD
5622 parser->m_prologState.documentEntity &&
5623 #endif /* XML_DTD */
5624 (dtd->standalone
5625 ? !parser->m_openInternalEntities
5626 : !dtd->hasParamEntityRefs);
5627 else /* if (pool == &parser->m_tempPool): we are called from content */
5628 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5629 if (checkEntityDecl) {
5630 if (!entity)
5631 return XML_ERROR_UNDEFINED_ENTITY;
5632 else if (!entity->is_internal)
5633 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5634 }
5635 else if (!entity) {
5636 /* Cannot report skipped entity here - see comments on
5637 parser->m_skippedEntityHandler.
5638 if (parser->m_skippedEntityHandler)
5639 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5640 */
5641 /* Cannot call the default handler because this would be
5642 out of sync with the call to the startElementHandler.
5643 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5644 reportDefault(parser, enc, ptr, next);
5645 */
5646 break;
5647 }
5648 if (entity->open) {
5649 if (enc == parser->m_encoding) {
5650 /* It does not appear that this line can be executed.
5651 *
5652 * The "if (entity->open)" check catches recursive entity
5653 * definitions. In order to be called with an open
5654 * entity, it must have gone through this code before and
5655 * been through the recursive call to
5656 * appendAttributeValue() some lines below. That call
5657 * sets the local encoding ("enc") to the parser's
5658 * internal encoding (internal_utf8 or internal_utf16),
5659 * which can never be the same as the principle encoding.
5660 * It doesn't appear there is another code path that gets
5661 * here with entity->open being TRUE.
5662 *
5663 * Since it is not certain that this logic is watertight,
5664 * we keep the line and merely exclude it from coverage
5665 * tests.
5666 */
5667 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5668 }
5669 return XML_ERROR_RECURSIVE_ENTITY_REF;
5670 }
5671 if (entity->notation) {
5672 if (enc == parser->m_encoding)
5673 parser->m_eventPtr = ptr;
5674 return XML_ERROR_BINARY_ENTITY_REF;
5675 }
5676 if (!entity->textPtr) {
5677 if (enc == parser->m_encoding)
5678 parser->m_eventPtr = ptr;
5679 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5680 }
5681 else {
5682 enum XML_Error result;
5683 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5684 entity->open = XML_TRUE;
5685 result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata,
5686 (char *)entity->textPtr,
5687 (char *)textEnd, pool);
5688 entity->open = XML_FALSE;
5689 if (result)
5690 return result;
5691 }
5692 }
5693 break;
5694 default:
5695 /* The only token returned by XmlAttributeValueTok() that does
5696 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5697 * Getting that would require an entity name to contain an
5698 * incomplete XML character (e.g. \xE2\x82); however previous
5699 * tokenisers will have already recognised and rejected such
5700 * names before XmlAttributeValueTok() gets a look-in. This
5701 * default case should be retained as a safety net, but the code
5702 * excluded from coverage tests.
5703 *
5704 * LCOV_EXCL_START
5705 */
5706 if (enc == parser->m_encoding)
5707 parser->m_eventPtr = ptr;
5708 return XML_ERROR_UNEXPECTED_STATE;
5709 /* LCOV_EXCL_STOP */
5710 }
5711 ptr = next;
5712 }
5713 /* not reached */
5714 }
5715
5716 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd)5717 storeEntityValue(XML_Parser parser,
5718 const ENCODING *enc,
5719 const char *entityTextPtr,
5720 const char *entityTextEnd)
5721 {
5722 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
5723 STRING_POOL *pool = &(dtd->entityValuePool);
5724 enum XML_Error result = XML_ERROR_NONE;
5725 #ifdef XML_DTD
5726 int oldInEntityValue = parser->m_prologState.inEntityValue;
5727 parser->m_prologState.inEntityValue = 1;
5728 #endif /* XML_DTD */
5729 /* never return Null for the value argument in EntityDeclHandler,
5730 since this would indicate an external entity; therefore we
5731 have to make sure that entityValuePool.start is not null */
5732 if (!pool->blocks) {
5733 if (!poolGrow(pool))
5734 return XML_ERROR_NO_MEMORY;
5735 }
5736
5737 for (;;) {
5738 const char *next;
5739 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5740 switch (tok) {
5741 case XML_TOK_PARAM_ENTITY_REF:
5742 #ifdef XML_DTD
5743 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5744 const XML_Char *name;
5745 ENTITY *entity;
5746 name = poolStoreString(&parser->m_tempPool, enc,
5747 entityTextPtr + enc->minBytesPerChar,
5748 next - enc->minBytesPerChar);
5749 if (!name) {
5750 result = XML_ERROR_NO_MEMORY;
5751 goto endEntityValue;
5752 }
5753 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5754 poolDiscard(&parser->m_tempPool);
5755 if (!entity) {
5756 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5757 /* cannot report skipped entity here - see comments on
5758 parser->m_skippedEntityHandler
5759 if (parser->m_skippedEntityHandler)
5760 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5761 */
5762 dtd->keepProcessing = dtd->standalone;
5763 goto endEntityValue;
5764 }
5765 if (entity->open) {
5766 if (enc == parser->m_encoding)
5767 parser->m_eventPtr = entityTextPtr;
5768 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5769 goto endEntityValue;
5770 }
5771 if (entity->systemId) {
5772 if (parser->m_externalEntityRefHandler) {
5773 dtd->paramEntityRead = XML_FALSE;
5774 entity->open = XML_TRUE;
5775 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
5776 0,
5777 entity->base,
5778 entity->systemId,
5779 entity->publicId)) {
5780 entity->open = XML_FALSE;
5781 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5782 goto endEntityValue;
5783 }
5784 entity->open = XML_FALSE;
5785 if (!dtd->paramEntityRead)
5786 dtd->keepProcessing = dtd->standalone;
5787 }
5788 else
5789 dtd->keepProcessing = dtd->standalone;
5790 }
5791 else {
5792 entity->open = XML_TRUE;
5793 result = storeEntityValue(parser,
5794 parser->m_internalEncoding,
5795 (char *)entity->textPtr,
5796 (char *)(entity->textPtr
5797 + entity->textLen));
5798 entity->open = XML_FALSE;
5799 if (result)
5800 goto endEntityValue;
5801 }
5802 break;
5803 }
5804 #endif /* XML_DTD */
5805 /* In the internal subset, PE references are not legal
5806 within markup declarations, e.g entity values in this case. */
5807 parser->m_eventPtr = entityTextPtr;
5808 result = XML_ERROR_PARAM_ENTITY_REF;
5809 goto endEntityValue;
5810 case XML_TOK_NONE:
5811 result = XML_ERROR_NONE;
5812 goto endEntityValue;
5813 case XML_TOK_ENTITY_REF:
5814 case XML_TOK_DATA_CHARS:
5815 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5816 result = XML_ERROR_NO_MEMORY;
5817 goto endEntityValue;
5818 }
5819 break;
5820 case XML_TOK_TRAILING_CR:
5821 next = entityTextPtr + enc->minBytesPerChar;
5822 /* fall through */
5823 case XML_TOK_DATA_NEWLINE:
5824 if (pool->end == pool->ptr && !poolGrow(pool)) {
5825 result = XML_ERROR_NO_MEMORY;
5826 goto endEntityValue;
5827 }
5828 *(pool->ptr)++ = 0xA;
5829 break;
5830 case XML_TOK_CHAR_REF:
5831 {
5832 XML_Char buf[XML_ENCODE_MAX];
5833 int i;
5834 int n = XmlCharRefNumber(enc, entityTextPtr);
5835 if (n < 0) {
5836 if (enc == parser->m_encoding)
5837 parser->m_eventPtr = entityTextPtr;
5838 result = XML_ERROR_BAD_CHAR_REF;
5839 goto endEntityValue;
5840 }
5841 n = XmlEncode(n, (ICHAR *)buf);
5842 /* The XmlEncode() functions can never return 0 here. That
5843 * error return happens if the code point passed in is either
5844 * negative or greater than or equal to 0x110000. The
5845 * XmlCharRefNumber() functions will all return a number
5846 * strictly less than 0x110000 or a negative value if an error
5847 * occurred. The negative value is intercepted above, so
5848 * XmlEncode() is never passed a value it might return an
5849 * error for.
5850 */
5851 for (i = 0; i < n; i++) {
5852 if (pool->end == pool->ptr && !poolGrow(pool)) {
5853 result = XML_ERROR_NO_MEMORY;
5854 goto endEntityValue;
5855 }
5856 *(pool->ptr)++ = buf[i];
5857 }
5858 }
5859 break;
5860 case XML_TOK_PARTIAL:
5861 if (enc == parser->m_encoding)
5862 parser->m_eventPtr = entityTextPtr;
5863 result = XML_ERROR_INVALID_TOKEN;
5864 goto endEntityValue;
5865 case XML_TOK_INVALID:
5866 if (enc == parser->m_encoding)
5867 parser->m_eventPtr = next;
5868 result = XML_ERROR_INVALID_TOKEN;
5869 goto endEntityValue;
5870 default:
5871 /* This default case should be unnecessary -- all the tokens
5872 * that XmlEntityValueTok() can return have their own explicit
5873 * cases -- but should be retained for safety. We do however
5874 * exclude it from the coverage statistics.
5875 *
5876 * LCOV_EXCL_START
5877 */
5878 if (enc == parser->m_encoding)
5879 parser->m_eventPtr = entityTextPtr;
5880 result = XML_ERROR_UNEXPECTED_STATE;
5881 goto endEntityValue;
5882 /* LCOV_EXCL_STOP */
5883 }
5884 entityTextPtr = next;
5885 }
5886 endEntityValue:
5887 #ifdef XML_DTD
5888 parser->m_prologState.inEntityValue = oldInEntityValue;
5889 #endif /* XML_DTD */
5890 return result;
5891 }
5892
5893 static void FASTCALL
normalizeLines(XML_Char * s)5894 normalizeLines(XML_Char *s)
5895 {
5896 XML_Char *p;
5897 for (;; s++) {
5898 if (*s == XML_T('\0'))
5899 return;
5900 if (*s == 0xD)
5901 break;
5902 }
5903 p = s;
5904 do {
5905 if (*s == 0xD) {
5906 *p++ = 0xA;
5907 if (*++s == 0xA)
5908 s++;
5909 }
5910 else
5911 *p++ = *s++;
5912 } while (*s);
5913 *p = XML_T('\0');
5914 }
5915
5916 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)5917 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5918 const char *start, const char *end)
5919 {
5920 const XML_Char *target;
5921 XML_Char *data;
5922 const char *tem;
5923 if (!parser->m_processingInstructionHandler) {
5924 if (parser->m_defaultHandler)
5925 reportDefault(parser, enc, start, end);
5926 return 1;
5927 }
5928 start += enc->minBytesPerChar * 2;
5929 tem = start + XmlNameLength(enc, start);
5930 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
5931 if (!target)
5932 return 0;
5933 poolFinish(&parser->m_tempPool);
5934 data = poolStoreString(&parser->m_tempPool, enc,
5935 XmlSkipS(enc, tem),
5936 end - enc->minBytesPerChar*2);
5937 if (!data)
5938 return 0;
5939 normalizeLines(data);
5940 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
5941 poolClear(&parser->m_tempPool);
5942 return 1;
5943 }
5944
5945 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)5946 reportComment(XML_Parser parser, const ENCODING *enc,
5947 const char *start, const char *end)
5948 {
5949 XML_Char *data;
5950 if (!parser->m_commentHandler) {
5951 if (parser->m_defaultHandler)
5952 reportDefault(parser, enc, start, end);
5953 return 1;
5954 }
5955 data = poolStoreString(&parser->m_tempPool,
5956 enc,
5957 start + enc->minBytesPerChar * 4,
5958 end - enc->minBytesPerChar * 3);
5959 if (!data)
5960 return 0;
5961 normalizeLines(data);
5962 parser->m_commentHandler(parser->m_handlerArg, data);
5963 poolClear(&parser->m_tempPool);
5964 return 1;
5965 }
5966
5967 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)5968 reportDefault(XML_Parser parser, const ENCODING *enc,
5969 const char *s, const char *end)
5970 {
5971 if (MUST_CONVERT(enc, s)) {
5972 enum XML_Convert_Result convert_res;
5973 const char **eventPP;
5974 const char **eventEndPP;
5975 if (enc == parser->m_encoding) {
5976 eventPP = &parser->m_eventPtr;
5977 eventEndPP = &parser->m_eventEndPtr;
5978 }
5979 else {
5980 /* To get here, two things must be true; the parser must be
5981 * using a character encoding that is not the same as the
5982 * encoding passed in, and the encoding passed in must need
5983 * conversion to the internal format (UTF-8 unless XML_UNICODE
5984 * is defined). The only occasions on which the encoding passed
5985 * in is not the same as the parser's encoding are when it is
5986 * the internal encoding (e.g. a previously defined parameter
5987 * entity, already converted to internal format). This by
5988 * definition doesn't need conversion, so the whole branch never
5989 * gets executed.
5990 *
5991 * For safety's sake we don't delete these lines and merely
5992 * exclude them from coverage statistics.
5993 *
5994 * LCOV_EXCL_START
5995 */
5996 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5997 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5998 /* LCOV_EXCL_STOP */
5999 }
6000 do {
6001 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6002 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6003 *eventEndPP = s;
6004 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6005 *eventPP = s;
6006 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6007 }
6008 else
6009 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
6010 }
6011
6012
6013 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6014 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6015 XML_Bool isId, const XML_Char *value, XML_Parser parser)
6016 {
6017 DEFAULT_ATTRIBUTE *att;
6018 if (value || isId) {
6019 /* The handling of default attributes gets messed up if we have
6020 a default which duplicates a non-default. */
6021 int i;
6022 for (i = 0; i < type->nDefaultAtts; i++)
6023 if (attId == type->defaultAtts[i].id)
6024 return 1;
6025 if (isId && !type->idAtt && !attId->xmlns)
6026 type->idAtt = attId;
6027 }
6028 if (type->nDefaultAtts == type->allocDefaultAtts) {
6029 if (type->allocDefaultAtts == 0) {
6030 type->allocDefaultAtts = 8;
6031 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(parser, type->allocDefaultAtts
6032 * sizeof(DEFAULT_ATTRIBUTE));
6033 if (!type->defaultAtts) {
6034 type->allocDefaultAtts = 0;
6035 return 0;
6036 }
6037 }
6038 else {
6039 DEFAULT_ATTRIBUTE *temp;
6040 int count = type->allocDefaultAtts * 2;
6041 temp = (DEFAULT_ATTRIBUTE *)
6042 REALLOC(parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
6043 if (temp == NULL)
6044 return 0;
6045 type->allocDefaultAtts = count;
6046 type->defaultAtts = temp;
6047 }
6048 }
6049 att = type->defaultAtts + type->nDefaultAtts;
6050 att->id = attId;
6051 att->value = value;
6052 att->isCdata = isCdata;
6053 if (!isCdata)
6054 attId->maybeTokenized = XML_TRUE;
6055 type->nDefaultAtts += 1;
6056 return 1;
6057 }
6058
6059 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6060 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
6061 {
6062 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6063 const XML_Char *name;
6064 for (name = elementType->name; *name; name++) {
6065 if (*name == XML_T(ASCII_COLON)) {
6066 PREFIX *prefix;
6067 const XML_Char *s;
6068 for (s = elementType->name; s != name; s++) {
6069 if (!poolAppendChar(&dtd->pool, *s))
6070 return 0;
6071 }
6072 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6073 return 0;
6074 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6075 sizeof(PREFIX));
6076 if (!prefix)
6077 return 0;
6078 if (prefix->name == poolStart(&dtd->pool))
6079 poolFinish(&dtd->pool);
6080 else
6081 poolDiscard(&dtd->pool);
6082 elementType->prefix = prefix;
6083
6084 }
6085 }
6086 return 1;
6087 }
6088
6089 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6090 getAttributeId(XML_Parser parser, const ENCODING *enc,
6091 const char *start, const char *end)
6092 {
6093 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6094 ATTRIBUTE_ID *id;
6095 const XML_Char *name;
6096 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6097 return NULL;
6098 name = poolStoreString(&dtd->pool, enc, start, end);
6099 if (!name)
6100 return NULL;
6101 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6102 ++name;
6103 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
6104 if (!id)
6105 return NULL;
6106 if (id->name != name)
6107 poolDiscard(&dtd->pool);
6108 else {
6109 poolFinish(&dtd->pool);
6110 if (!parser->m_ns)
6111 ;
6112 else if (name[0] == XML_T(ASCII_x)
6113 && name[1] == XML_T(ASCII_m)
6114 && name[2] == XML_T(ASCII_l)
6115 && name[3] == XML_T(ASCII_n)
6116 && name[4] == XML_T(ASCII_s)
6117 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6118 if (name[5] == XML_T('\0'))
6119 id->prefix = &dtd->defaultPrefix;
6120 else
6121 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
6122 id->xmlns = XML_TRUE;
6123 }
6124 else {
6125 int i;
6126 for (i = 0; name[i]; i++) {
6127 /* attributes without prefix are *not* in the default namespace */
6128 if (name[i] == XML_T(ASCII_COLON)) {
6129 int j;
6130 for (j = 0; j < i; j++) {
6131 if (!poolAppendChar(&dtd->pool, name[j]))
6132 return NULL;
6133 }
6134 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6135 return NULL;
6136 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6137 sizeof(PREFIX));
6138 if (!id->prefix)
6139 return NULL;
6140 if (id->prefix->name == poolStart(&dtd->pool))
6141 poolFinish(&dtd->pool);
6142 else
6143 poolDiscard(&dtd->pool);
6144 break;
6145 }
6146 }
6147 }
6148 }
6149 return id;
6150 }
6151
6152 #define CONTEXT_SEP XML_T(ASCII_FF)
6153
6154 static const XML_Char *
getContext(XML_Parser parser)6155 getContext(XML_Parser parser)
6156 {
6157 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6158 HASH_TABLE_ITER iter;
6159 XML_Bool needSep = XML_FALSE;
6160
6161 if (dtd->defaultPrefix.binding) {
6162 int i;
6163 int len;
6164 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6165 return NULL;
6166 len = dtd->defaultPrefix.binding->uriLen;
6167 if (parser->m_namespaceSeparator)
6168 len--;
6169 for (i = 0; i < len; i++) {
6170 if (!poolAppendChar(&parser->m_tempPool, dtd->defaultPrefix.binding->uri[i])) {
6171 /* Because of memory caching, I don't believe this line can be
6172 * executed.
6173 *
6174 * This is part of a loop copying the default prefix binding
6175 * URI into the parser's temporary string pool. Previously,
6176 * that URI was copied into the same string pool, with a
6177 * terminating NUL character, as part of setContext(). When
6178 * the pool was cleared, that leaves a block definitely big
6179 * enough to hold the URI on the free block list of the pool.
6180 * The URI copy in getContext() therefore cannot run out of
6181 * memory.
6182 *
6183 * If the pool is used between the setContext() and
6184 * getContext() calls, the worst it can do is leave a bigger
6185 * block on the front of the free list. Given that this is
6186 * all somewhat inobvious and program logic can be changed, we
6187 * don't delete the line but we do exclude it from the test
6188 * coverage statistics.
6189 */
6190 return NULL; /* LCOV_EXCL_LINE */
6191 }
6192 }
6193 needSep = XML_TRUE;
6194 }
6195
6196 hashTableIterInit(&iter, &(dtd->prefixes));
6197 for (;;) {
6198 int i;
6199 int len;
6200 const XML_Char *s;
6201 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6202 if (!prefix)
6203 break;
6204 if (!prefix->binding) {
6205 /* This test appears to be (justifiable) paranoia. There does
6206 * not seem to be a way of injecting a prefix without a binding
6207 * that doesn't get errored long before this function is called.
6208 * The test should remain for safety's sake, so we instead
6209 * exclude the following line from the coverage statistics.
6210 */
6211 continue; /* LCOV_EXCL_LINE */
6212 }
6213 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6214 return NULL;
6215 for (s = prefix->name; *s; s++)
6216 if (!poolAppendChar(&parser->m_tempPool, *s))
6217 return NULL;
6218 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6219 return NULL;
6220 len = prefix->binding->uriLen;
6221 if (parser->m_namespaceSeparator)
6222 len--;
6223 for (i = 0; i < len; i++)
6224 if (!poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6225 return NULL;
6226 needSep = XML_TRUE;
6227 }
6228
6229
6230 hashTableIterInit(&iter, &(dtd->generalEntities));
6231 for (;;) {
6232 const XML_Char *s;
6233 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6234 if (!e)
6235 break;
6236 if (!e->open)
6237 continue;
6238 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6239 return NULL;
6240 for (s = e->name; *s; s++)
6241 if (!poolAppendChar(&parser->m_tempPool, *s))
6242 return 0;
6243 needSep = XML_TRUE;
6244 }
6245
6246 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6247 return NULL;
6248 return parser->m_tempPool.start;
6249 }
6250
6251 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6252 setContext(XML_Parser parser, const XML_Char *context)
6253 {
6254 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6255 const XML_Char *s = context;
6256
6257 while (*context != XML_T('\0')) {
6258 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6259 ENTITY *e;
6260 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6261 return XML_FALSE;
6262 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&parser->m_tempPool), 0);
6263 if (e)
6264 e->open = XML_TRUE;
6265 if (*s != XML_T('\0'))
6266 s++;
6267 context = s;
6268 poolDiscard(&parser->m_tempPool);
6269 }
6270 else if (*s == XML_T(ASCII_EQUALS)) {
6271 PREFIX *prefix;
6272 if (poolLength(&parser->m_tempPool) == 0)
6273 prefix = &dtd->defaultPrefix;
6274 else {
6275 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6276 return XML_FALSE;
6277 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&parser->m_tempPool),
6278 sizeof(PREFIX));
6279 if (!prefix)
6280 return XML_FALSE;
6281 if (prefix->name == poolStart(&parser->m_tempPool)) {
6282 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6283 if (!prefix->name)
6284 return XML_FALSE;
6285 }
6286 poolDiscard(&parser->m_tempPool);
6287 }
6288 for (context = s + 1;
6289 *context != CONTEXT_SEP && *context != XML_T('\0');
6290 context++)
6291 if (!poolAppendChar(&parser->m_tempPool, *context))
6292 return XML_FALSE;
6293 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6294 return XML_FALSE;
6295 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6296 &parser->m_inheritedBindings) != XML_ERROR_NONE)
6297 return XML_FALSE;
6298 poolDiscard(&parser->m_tempPool);
6299 if (*context != XML_T('\0'))
6300 ++context;
6301 s = context;
6302 }
6303 else {
6304 if (!poolAppendChar(&parser->m_tempPool, *s))
6305 return XML_FALSE;
6306 s++;
6307 }
6308 }
6309 return XML_TRUE;
6310 }
6311
6312 static void FASTCALL
normalizePublicId(XML_Char * publicId)6313 normalizePublicId(XML_Char *publicId)
6314 {
6315 XML_Char *p = publicId;
6316 XML_Char *s;
6317 for (s = publicId; *s; s++) {
6318 switch (*s) {
6319 case 0x20:
6320 case 0xD:
6321 case 0xA:
6322 if (p != publicId && p[-1] != 0x20)
6323 *p++ = 0x20;
6324 break;
6325 default:
6326 *p++ = *s;
6327 }
6328 }
6329 if (p != publicId && p[-1] == 0x20)
6330 --p;
6331 *p = XML_T('\0');
6332 }
6333
6334 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6335 dtdCreate(const XML_Memory_Handling_Suite *ms)
6336 {
6337 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6338 if (p == NULL)
6339 return p;
6340 poolInit(&(p->pool), ms);
6341 poolInit(&(p->entityValuePool), ms);
6342 hashTableInit(&(p->generalEntities), ms);
6343 hashTableInit(&(p->elementTypes), ms);
6344 hashTableInit(&(p->attributeIds), ms);
6345 hashTableInit(&(p->prefixes), ms);
6346 #ifdef XML_DTD
6347 p->paramEntityRead = XML_FALSE;
6348 hashTableInit(&(p->paramEntities), ms);
6349 #endif /* XML_DTD */
6350 p->defaultPrefix.name = NULL;
6351 p->defaultPrefix.binding = NULL;
6352
6353 p->in_eldecl = XML_FALSE;
6354 p->scaffIndex = NULL;
6355 p->scaffold = NULL;
6356 p->scaffLevel = 0;
6357 p->scaffSize = 0;
6358 p->scaffCount = 0;
6359 p->contentStringLen = 0;
6360
6361 p->keepProcessing = XML_TRUE;
6362 p->hasParamEntityRefs = XML_FALSE;
6363 p->standalone = XML_FALSE;
6364 return p;
6365 }
6366
6367 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6368 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
6369 {
6370 HASH_TABLE_ITER iter;
6371 hashTableIterInit(&iter, &(p->elementTypes));
6372 for (;;) {
6373 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6374 if (!e)
6375 break;
6376 if (e->allocDefaultAtts != 0)
6377 ms->free_fcn(e->defaultAtts);
6378 }
6379 hashTableClear(&(p->generalEntities));
6380 #ifdef XML_DTD
6381 p->paramEntityRead = XML_FALSE;
6382 hashTableClear(&(p->paramEntities));
6383 #endif /* XML_DTD */
6384 hashTableClear(&(p->elementTypes));
6385 hashTableClear(&(p->attributeIds));
6386 hashTableClear(&(p->prefixes));
6387 poolClear(&(p->pool));
6388 poolClear(&(p->entityValuePool));
6389 p->defaultPrefix.name = NULL;
6390 p->defaultPrefix.binding = NULL;
6391
6392 p->in_eldecl = XML_FALSE;
6393
6394 ms->free_fcn(p->scaffIndex);
6395 p->scaffIndex = NULL;
6396 ms->free_fcn(p->scaffold);
6397 p->scaffold = NULL;
6398
6399 p->scaffLevel = 0;
6400 p->scaffSize = 0;
6401 p->scaffCount = 0;
6402 p->contentStringLen = 0;
6403
6404 p->keepProcessing = XML_TRUE;
6405 p->hasParamEntityRefs = XML_FALSE;
6406 p->standalone = XML_FALSE;
6407 }
6408
6409 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6410 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6411 {
6412 HASH_TABLE_ITER iter;
6413 hashTableIterInit(&iter, &(p->elementTypes));
6414 for (;;) {
6415 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6416 if (!e)
6417 break;
6418 if (e->allocDefaultAtts != 0)
6419 ms->free_fcn(e->defaultAtts);
6420 }
6421 hashTableDestroy(&(p->generalEntities));
6422 #ifdef XML_DTD
6423 hashTableDestroy(&(p->paramEntities));
6424 #endif /* XML_DTD */
6425 hashTableDestroy(&(p->elementTypes));
6426 hashTableDestroy(&(p->attributeIds));
6427 hashTableDestroy(&(p->prefixes));
6428 poolDestroy(&(p->pool));
6429 poolDestroy(&(p->entityValuePool));
6430 if (isDocEntity) {
6431 ms->free_fcn(p->scaffIndex);
6432 ms->free_fcn(p->scaffold);
6433 }
6434 ms->free_fcn(p);
6435 }
6436
6437 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6438 The new DTD has already been initialized.
6439 */
6440 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6441 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
6442 {
6443 HASH_TABLE_ITER iter;
6444
6445 /* Copy the prefix table. */
6446
6447 hashTableIterInit(&iter, &(oldDtd->prefixes));
6448 for (;;) {
6449 const XML_Char *name;
6450 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6451 if (!oldP)
6452 break;
6453 name = poolCopyString(&(newDtd->pool), oldP->name);
6454 if (!name)
6455 return 0;
6456 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6457 return 0;
6458 }
6459
6460 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6461
6462 /* Copy the attribute id table. */
6463
6464 for (;;) {
6465 ATTRIBUTE_ID *newA;
6466 const XML_Char *name;
6467 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6468
6469 if (!oldA)
6470 break;
6471 /* Remember to allocate the scratch byte before the name. */
6472 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6473 return 0;
6474 name = poolCopyString(&(newDtd->pool), oldA->name);
6475 if (!name)
6476 return 0;
6477 ++name;
6478 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6479 sizeof(ATTRIBUTE_ID));
6480 if (!newA)
6481 return 0;
6482 newA->maybeTokenized = oldA->maybeTokenized;
6483 if (oldA->prefix) {
6484 newA->xmlns = oldA->xmlns;
6485 if (oldA->prefix == &oldDtd->defaultPrefix)
6486 newA->prefix = &newDtd->defaultPrefix;
6487 else
6488 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6489 oldA->prefix->name, 0);
6490 }
6491 }
6492
6493 /* Copy the element type table. */
6494
6495 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6496
6497 for (;;) {
6498 int i;
6499 ELEMENT_TYPE *newE;
6500 const XML_Char *name;
6501 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6502 if (!oldE)
6503 break;
6504 name = poolCopyString(&(newDtd->pool), oldE->name);
6505 if (!name)
6506 return 0;
6507 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6508 sizeof(ELEMENT_TYPE));
6509 if (!newE)
6510 return 0;
6511 if (oldE->nDefaultAtts) {
6512 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6513 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6514 if (!newE->defaultAtts) {
6515 return 0;
6516 }
6517 }
6518 if (oldE->idAtt)
6519 newE->idAtt = (ATTRIBUTE_ID *)
6520 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
6521 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6522 if (oldE->prefix)
6523 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6524 oldE->prefix->name, 0);
6525 for (i = 0; i < newE->nDefaultAtts; i++) {
6526 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
6527 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6528 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6529 if (oldE->defaultAtts[i].value) {
6530 newE->defaultAtts[i].value
6531 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6532 if (!newE->defaultAtts[i].value)
6533 return 0;
6534 }
6535 else
6536 newE->defaultAtts[i].value = NULL;
6537 }
6538 }
6539
6540 /* Copy the entity tables. */
6541 if (!copyEntityTable(oldParser,
6542 &(newDtd->generalEntities),
6543 &(newDtd->pool),
6544 &(oldDtd->generalEntities)))
6545 return 0;
6546
6547 #ifdef XML_DTD
6548 if (!copyEntityTable(oldParser,
6549 &(newDtd->paramEntities),
6550 &(newDtd->pool),
6551 &(oldDtd->paramEntities)))
6552 return 0;
6553 newDtd->paramEntityRead = oldDtd->paramEntityRead;
6554 #endif /* XML_DTD */
6555
6556 newDtd->keepProcessing = oldDtd->keepProcessing;
6557 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6558 newDtd->standalone = oldDtd->standalone;
6559
6560 /* Don't want deep copying for scaffolding */
6561 newDtd->in_eldecl = oldDtd->in_eldecl;
6562 newDtd->scaffold = oldDtd->scaffold;
6563 newDtd->contentStringLen = oldDtd->contentStringLen;
6564 newDtd->scaffSize = oldDtd->scaffSize;
6565 newDtd->scaffLevel = oldDtd->scaffLevel;
6566 newDtd->scaffIndex = oldDtd->scaffIndex;
6567
6568 return 1;
6569 } /* End dtdCopy */
6570
6571 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)6572 copyEntityTable(XML_Parser oldParser,
6573 HASH_TABLE *newTable,
6574 STRING_POOL *newPool,
6575 const HASH_TABLE *oldTable)
6576 {
6577 HASH_TABLE_ITER iter;
6578 const XML_Char *cachedOldBase = NULL;
6579 const XML_Char *cachedNewBase = NULL;
6580
6581 hashTableIterInit(&iter, oldTable);
6582
6583 for (;;) {
6584 ENTITY *newE;
6585 const XML_Char *name;
6586 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6587 if (!oldE)
6588 break;
6589 name = poolCopyString(newPool, oldE->name);
6590 if (!name)
6591 return 0;
6592 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6593 if (!newE)
6594 return 0;
6595 if (oldE->systemId) {
6596 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6597 if (!tem)
6598 return 0;
6599 newE->systemId = tem;
6600 if (oldE->base) {
6601 if (oldE->base == cachedOldBase)
6602 newE->base = cachedNewBase;
6603 else {
6604 cachedOldBase = oldE->base;
6605 tem = poolCopyString(newPool, cachedOldBase);
6606 if (!tem)
6607 return 0;
6608 cachedNewBase = newE->base = tem;
6609 }
6610 }
6611 if (oldE->publicId) {
6612 tem = poolCopyString(newPool, oldE->publicId);
6613 if (!tem)
6614 return 0;
6615 newE->publicId = tem;
6616 }
6617 }
6618 else {
6619 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6620 oldE->textLen);
6621 if (!tem)
6622 return 0;
6623 newE->textPtr = tem;
6624 newE->textLen = oldE->textLen;
6625 }
6626 if (oldE->notation) {
6627 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6628 if (!tem)
6629 return 0;
6630 newE->notation = tem;
6631 }
6632 newE->is_param = oldE->is_param;
6633 newE->is_internal = oldE->is_internal;
6634 }
6635 return 1;
6636 }
6637
6638 #define INIT_POWER 6
6639
6640 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)6641 keyeq(KEY s1, KEY s2)
6642 {
6643 for (; *s1 == *s2; s1++, s2++)
6644 if (*s1 == 0)
6645 return XML_TRUE;
6646 return XML_FALSE;
6647 }
6648
6649 static size_t
keylen(KEY s)6650 keylen(KEY s)
6651 {
6652 size_t len = 0;
6653 for (; *s; s++, len++);
6654 return len;
6655 }
6656
6657 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)6658 copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6659 {
6660 key->k[0] = 0;
6661 key->k[1] = get_hash_secret_salt(parser);
6662 }
6663
6664 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)6665 hash(XML_Parser parser, KEY s)
6666 {
6667 struct siphash state;
6668 struct sipkey key;
6669 (void)sip24_valid;
6670 copy_salt_to_sipkey(parser, &key);
6671 sip24_init(&state, &key);
6672 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6673 return (unsigned long)sip24_final(&state);
6674 }
6675
6676 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)6677 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
6678 {
6679 size_t i;
6680 if (table->size == 0) {
6681 size_t tsize;
6682 if (!createSize)
6683 return NULL;
6684 table->power = INIT_POWER;
6685 /* table->size is a power of 2 */
6686 table->size = (size_t)1 << INIT_POWER;
6687 tsize = table->size * sizeof(NAMED *);
6688 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
6689 if (!table->v) {
6690 table->size = 0;
6691 return NULL;
6692 }
6693 memset(table->v, 0, tsize);
6694 i = hash(parser, name) & ((unsigned long)table->size - 1);
6695 }
6696 else {
6697 unsigned long h = hash(parser, name);
6698 unsigned long mask = (unsigned long)table->size - 1;
6699 unsigned char step = 0;
6700 i = h & mask;
6701 while (table->v[i]) {
6702 if (keyeq(name, table->v[i]->name))
6703 return table->v[i];
6704 if (!step)
6705 step = PROBE_STEP(h, mask, table->power);
6706 i < step ? (i += table->size - step) : (i -= step);
6707 }
6708 if (!createSize)
6709 return NULL;
6710
6711 /* check for overflow (table is half full) */
6712 if (table->used >> (table->power - 1)) {
6713 unsigned char newPower = table->power + 1;
6714 size_t newSize = (size_t)1 << newPower;
6715 unsigned long newMask = (unsigned long)newSize - 1;
6716 size_t tsize = newSize * sizeof(NAMED *);
6717 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
6718 if (!newV)
6719 return NULL;
6720 memset(newV, 0, tsize);
6721 for (i = 0; i < table->size; i++)
6722 if (table->v[i]) {
6723 unsigned long newHash = hash(parser, table->v[i]->name);
6724 size_t j = newHash & newMask;
6725 step = 0;
6726 while (newV[j]) {
6727 if (!step)
6728 step = PROBE_STEP(newHash, newMask, newPower);
6729 j < step ? (j += newSize - step) : (j -= step);
6730 }
6731 newV[j] = table->v[i];
6732 }
6733 table->mem->free_fcn(table->v);
6734 table->v = newV;
6735 table->power = newPower;
6736 table->size = newSize;
6737 i = h & newMask;
6738 step = 0;
6739 while (table->v[i]) {
6740 if (!step)
6741 step = PROBE_STEP(h, newMask, newPower);
6742 i < step ? (i += newSize - step) : (i -= step);
6743 }
6744 }
6745 }
6746 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
6747 if (!table->v[i])
6748 return NULL;
6749 memset(table->v[i], 0, createSize);
6750 table->v[i]->name = name;
6751 (table->used)++;
6752 return table->v[i];
6753 }
6754
6755 static void FASTCALL
hashTableClear(HASH_TABLE * table)6756 hashTableClear(HASH_TABLE *table)
6757 {
6758 size_t i;
6759 for (i = 0; i < table->size; i++) {
6760 table->mem->free_fcn(table->v[i]);
6761 table->v[i] = NULL;
6762 }
6763 table->used = 0;
6764 }
6765
6766 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)6767 hashTableDestroy(HASH_TABLE *table)
6768 {
6769 size_t i;
6770 for (i = 0; i < table->size; i++)
6771 table->mem->free_fcn(table->v[i]);
6772 table->mem->free_fcn(table->v);
6773 }
6774
6775 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)6776 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
6777 {
6778 p->power = 0;
6779 p->size = 0;
6780 p->used = 0;
6781 p->v = NULL;
6782 p->mem = ms;
6783 }
6784
6785 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)6786 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
6787 {
6788 iter->p = table->v;
6789 iter->end = iter->p + table->size;
6790 }
6791
6792 static NAMED * FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)6793 hashTableIterNext(HASH_TABLE_ITER *iter)
6794 {
6795 while (iter->p != iter->end) {
6796 NAMED *tem = *(iter->p)++;
6797 if (tem)
6798 return tem;
6799 }
6800 return NULL;
6801 }
6802
6803 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)6804 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
6805 {
6806 pool->blocks = NULL;
6807 pool->freeBlocks = NULL;
6808 pool->start = NULL;
6809 pool->ptr = NULL;
6810 pool->end = NULL;
6811 pool->mem = ms;
6812 }
6813
6814 static void FASTCALL
poolClear(STRING_POOL * pool)6815 poolClear(STRING_POOL *pool)
6816 {
6817 if (!pool->freeBlocks)
6818 pool->freeBlocks = pool->blocks;
6819 else {
6820 BLOCK *p = pool->blocks;
6821 while (p) {
6822 BLOCK *tem = p->next;
6823 p->next = pool->freeBlocks;
6824 pool->freeBlocks = p;
6825 p = tem;
6826 }
6827 }
6828 pool->blocks = NULL;
6829 pool->start = NULL;
6830 pool->ptr = NULL;
6831 pool->end = NULL;
6832 }
6833
6834 static void FASTCALL
poolDestroy(STRING_POOL * pool)6835 poolDestroy(STRING_POOL *pool)
6836 {
6837 BLOCK *p = pool->blocks;
6838 while (p) {
6839 BLOCK *tem = p->next;
6840 pool->mem->free_fcn(p);
6841 p = tem;
6842 }
6843 p = pool->freeBlocks;
6844 while (p) {
6845 BLOCK *tem = p->next;
6846 pool->mem->free_fcn(p);
6847 p = tem;
6848 }
6849 }
6850
6851 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)6852 poolAppend(STRING_POOL *pool, const ENCODING *enc,
6853 const char *ptr, const char *end)
6854 {
6855 if (!pool->ptr && !poolGrow(pool))
6856 return NULL;
6857 for (;;) {
6858 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6859 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
6860 break;
6861 if (!poolGrow(pool))
6862 return NULL;
6863 }
6864 return pool->start;
6865 }
6866
6867 static const XML_Char * FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)6868 poolCopyString(STRING_POOL *pool, const XML_Char *s)
6869 {
6870 do {
6871 if (!poolAppendChar(pool, *s))
6872 return NULL;
6873 } while (*s++);
6874 s = pool->start;
6875 poolFinish(pool);
6876 return s;
6877 }
6878
6879 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)6880 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
6881 {
6882 if (!pool->ptr && !poolGrow(pool)) {
6883 /* The following line is unreachable given the current usage of
6884 * poolCopyStringN(). Currently it is called from exactly one
6885 * place to copy the text of a simple general entity. By that
6886 * point, the name of the entity is already stored in the pool, so
6887 * pool->ptr cannot be NULL.
6888 *
6889 * If poolCopyStringN() is used elsewhere as it well might be,
6890 * this line may well become executable again. Regardless, this
6891 * sort of check shouldn't be removed lightly, so we just exclude
6892 * it from the coverage statistics.
6893 */
6894 return NULL; /* LCOV_EXCL_LINE */
6895 }
6896 for (; n > 0; --n, s++) {
6897 if (!poolAppendChar(pool, *s))
6898 return NULL;
6899 }
6900 s = pool->start;
6901 poolFinish(pool);
6902 return s;
6903 }
6904
6905 static const XML_Char * FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)6906 poolAppendString(STRING_POOL *pool, const XML_Char *s)
6907 {
6908 while (*s) {
6909 if (!poolAppendChar(pool, *s))
6910 return NULL;
6911 s++;
6912 }
6913 return pool->start;
6914 }
6915
6916 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)6917 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6918 const char *ptr, const char *end)
6919 {
6920 if (!poolAppend(pool, enc, ptr, end))
6921 return NULL;
6922 if (pool->ptr == pool->end && !poolGrow(pool))
6923 return NULL;
6924 *(pool->ptr)++ = 0;
6925 return pool->start;
6926 }
6927
6928 static size_t
poolBytesToAllocateFor(int blockSize)6929 poolBytesToAllocateFor(int blockSize)
6930 {
6931 /* Unprotected math would be:
6932 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6933 **
6934 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6935 ** For a + b * c we check b * c in isolation first, so that addition of a
6936 ** on top has no chance of making us accept a small non-negative number
6937 */
6938 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6939
6940 if (blockSize <= 0)
6941 return 0;
6942
6943 if (blockSize > (int)(INT_MAX / stretch))
6944 return 0;
6945
6946 {
6947 const int stretchedBlockSize = blockSize * (int)stretch;
6948 const int bytesToAllocate = (int)(
6949 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6950 if (bytesToAllocate < 0)
6951 return 0;
6952
6953 return (size_t)bytesToAllocate;
6954 }
6955 }
6956
6957 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)6958 poolGrow(STRING_POOL *pool)
6959 {
6960 if (pool->freeBlocks) {
6961 if (pool->start == 0) {
6962 pool->blocks = pool->freeBlocks;
6963 pool->freeBlocks = pool->freeBlocks->next;
6964 pool->blocks->next = NULL;
6965 pool->start = pool->blocks->s;
6966 pool->end = pool->start + pool->blocks->size;
6967 pool->ptr = pool->start;
6968 return XML_TRUE;
6969 }
6970 if (pool->end - pool->start < pool->freeBlocks->size) {
6971 BLOCK *tem = pool->freeBlocks->next;
6972 pool->freeBlocks->next = pool->blocks;
6973 pool->blocks = pool->freeBlocks;
6974 pool->freeBlocks = tem;
6975 memcpy(pool->blocks->s, pool->start,
6976 (pool->end - pool->start) * sizeof(XML_Char));
6977 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6978 pool->start = pool->blocks->s;
6979 pool->end = pool->start + pool->blocks->size;
6980 return XML_TRUE;
6981 }
6982 }
6983 if (pool->blocks && pool->start == pool->blocks->s) {
6984 BLOCK *temp;
6985 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
6986 size_t bytesToAllocate;
6987
6988 /* NOTE: Needs to be calculated prior to calling `realloc`
6989 to avoid dangling pointers: */
6990 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6991
6992 if (blockSize < 0) {
6993 /* This condition traps a situation where either more than
6994 * INT_MAX/2 bytes have already been allocated. This isn't
6995 * readily testable, since it is unlikely that an average
6996 * machine will have that much memory, so we exclude it from the
6997 * coverage statistics.
6998 */
6999 return XML_FALSE; /* LCOV_EXCL_LINE */
7000 }
7001
7002 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7003 if (bytesToAllocate == 0)
7004 return XML_FALSE;
7005
7006 temp = (BLOCK *)
7007 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
7008 if (temp == NULL)
7009 return XML_FALSE;
7010 pool->blocks = temp;
7011 pool->blocks->size = blockSize;
7012 pool->ptr = pool->blocks->s + offsetInsideBlock;
7013 pool->start = pool->blocks->s;
7014 pool->end = pool->start + blockSize;
7015 }
7016 else {
7017 BLOCK *tem;
7018 int blockSize = (int)(pool->end - pool->start);
7019 size_t bytesToAllocate;
7020
7021 if (blockSize < 0) {
7022 /* This condition traps a situation where either more than
7023 * INT_MAX bytes have already been allocated (which is prevented
7024 * by various pieces of program logic, not least this one, never
7025 * mind the unlikelihood of actually having that much memory) or
7026 * the pool control fields have been corrupted (which could
7027 * conceivably happen in an extremely buggy user handler
7028 * function). Either way it isn't readily testable, so we
7029 * exclude it from the coverage statistics.
7030 */
7031 return XML_FALSE; /* LCOV_EXCL_LINE */
7032 }
7033
7034 if (blockSize < INIT_BLOCK_SIZE)
7035 blockSize = INIT_BLOCK_SIZE;
7036 else {
7037 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7038 if ((int)((unsigned)blockSize * 2U) < 0) {
7039 return XML_FALSE;
7040 }
7041 blockSize *= 2;
7042 }
7043
7044 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7045 if (bytesToAllocate == 0)
7046 return XML_FALSE;
7047
7048 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
7049 if (!tem)
7050 return XML_FALSE;
7051 tem->size = blockSize;
7052 tem->next = pool->blocks;
7053 pool->blocks = tem;
7054 if (pool->ptr != pool->start)
7055 memcpy(tem->s, pool->start,
7056 (pool->ptr - pool->start) * sizeof(XML_Char));
7057 pool->ptr = tem->s + (pool->ptr - pool->start);
7058 pool->start = tem->s;
7059 pool->end = tem->s + blockSize;
7060 }
7061 return XML_TRUE;
7062 }
7063
7064 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7065 nextScaffoldPart(XML_Parser parser)
7066 {
7067 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7068 CONTENT_SCAFFOLD * me;
7069 int next;
7070
7071 if (!dtd->scaffIndex) {
7072 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7073 if (!dtd->scaffIndex)
7074 return -1;
7075 dtd->scaffIndex[0] = 0;
7076 }
7077
7078 if (dtd->scaffCount >= dtd->scaffSize) {
7079 CONTENT_SCAFFOLD *temp;
7080 if (dtd->scaffold) {
7081 temp = (CONTENT_SCAFFOLD *)
7082 REALLOC(parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7083 if (temp == NULL)
7084 return -1;
7085 dtd->scaffSize *= 2;
7086 }
7087 else {
7088 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7089 * sizeof(CONTENT_SCAFFOLD));
7090 if (temp == NULL)
7091 return -1;
7092 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7093 }
7094 dtd->scaffold = temp;
7095 }
7096 next = dtd->scaffCount++;
7097 me = &dtd->scaffold[next];
7098 if (dtd->scaffLevel) {
7099 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
7100 if (parent->lastchild) {
7101 dtd->scaffold[parent->lastchild].nextsib = next;
7102 }
7103 if (!parent->childcnt)
7104 parent->firstchild = next;
7105 parent->lastchild = next;
7106 parent->childcnt++;
7107 }
7108 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7109 return next;
7110 }
7111
7112 static void
build_node(XML_Parser parser,int src_node,XML_Content * dest,XML_Content ** contpos,XML_Char ** strpos)7113 build_node(XML_Parser parser,
7114 int src_node,
7115 XML_Content *dest,
7116 XML_Content **contpos,
7117 XML_Char **strpos)
7118 {
7119 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7120 dest->type = dtd->scaffold[src_node].type;
7121 dest->quant = dtd->scaffold[src_node].quant;
7122 if (dest->type == XML_CTYPE_NAME) {
7123 const XML_Char *src;
7124 dest->name = *strpos;
7125 src = dtd->scaffold[src_node].name;
7126 for (;;) {
7127 *(*strpos)++ = *src;
7128 if (!*src)
7129 break;
7130 src++;
7131 }
7132 dest->numchildren = 0;
7133 dest->children = NULL;
7134 }
7135 else {
7136 unsigned int i;
7137 int cn;
7138 dest->numchildren = dtd->scaffold[src_node].childcnt;
7139 dest->children = *contpos;
7140 *contpos += dest->numchildren;
7141 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7142 i < dest->numchildren;
7143 i++, cn = dtd->scaffold[cn].nextsib) {
7144 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7145 }
7146 dest->name = NULL;
7147 }
7148 }
7149
7150 static XML_Content *
build_model(XML_Parser parser)7151 build_model (XML_Parser parser)
7152 {
7153 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7154 XML_Content *ret;
7155 XML_Content *cpos;
7156 XML_Char * str;
7157 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7158 + (dtd->contentStringLen * sizeof(XML_Char)));
7159
7160 ret = (XML_Content *)MALLOC(parser, allocsize);
7161 if (!ret)
7162 return NULL;
7163
7164 str = (XML_Char *) (&ret[dtd->scaffCount]);
7165 cpos = &ret[1];
7166
7167 build_node(parser, 0, ret, &cpos, &str);
7168 return ret;
7169 }
7170
7171 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7172 getElementType(XML_Parser parser,
7173 const ENCODING *enc,
7174 const char *ptr,
7175 const char *end)
7176 {
7177 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7178 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7179 ELEMENT_TYPE *ret;
7180
7181 if (!name)
7182 return NULL;
7183 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
7184 if (!ret)
7185 return NULL;
7186 if (ret->name != name)
7187 poolDiscard(&dtd->pool);
7188 else {
7189 poolFinish(&dtd->pool);
7190 if (!setElementTypePrefix(parser, ret))
7191 return NULL;
7192 }
7193 return ret;
7194 }
7195
7196 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7197 copyString(const XML_Char *s,
7198 const XML_Memory_Handling_Suite *memsuite)
7199 {
7200 int charsRequired = 0;
7201 XML_Char *result;
7202
7203 /* First determine how long the string is */
7204 while (s[charsRequired] != 0) {
7205 charsRequired++;
7206 }
7207 /* Include the terminator */
7208 charsRequired++;
7209
7210 /* Now allocate space for the copy */
7211 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7212 if (result == NULL)
7213 return NULL;
7214 /* Copy the original into place */
7215 memcpy(result, s, charsRequired * sizeof(XML_Char));
7216 return result;
7217 }
7218