1 /* f2d0ab6d1d4422a08cf1cf3bbdfba96b49dea42fb5ff4615e03a2a25c306e769 (2.2.8+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #if ! defined(_GNU_SOURCE)
34 # define _GNU_SOURCE 1 /* syscall prototype */
35 #endif
36
37 #ifdef _WIN32
38 /* force stdlib to define rand_s() */
39 # define _CRT_RAND_S
40 #endif
41
42 #include <stddef.h>
43 #include <string.h> /* memset(), memcpy() */
44 #include <assert.h>
45 #include <limits.h> /* UINT_MAX */
46 #include <stdio.h> /* fprintf */
47 #include <stdlib.h> /* getenv, rand_s */
48
49 #ifdef _WIN32
50 # define getpid GetCurrentProcessId
51 #else
52 # include <sys/time.h> /* gettimeofday() */
53 # include <sys/types.h> /* getpid() */
54 # include <unistd.h> /* getpid() */
55 # include <fcntl.h> /* O_RDONLY */
56 # include <errno.h>
57 #endif
58
59 #define XML_BUILDING_EXPAT 1
60
61 #ifdef _WIN32
62 # include "winconfig.h"
63 #elif defined(HAVE_EXPAT_CONFIG_H)
64 # include <expat_config.h>
65 #endif /* ndef _WIN32 */
66
67 #include "ascii.h"
68 #include "expat.h"
69 #include "siphash.h"
70
71 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
72 # if defined(HAVE_GETRANDOM)
73 # include <sys/random.h> /* getrandom */
74 # else
75 # include <unistd.h> /* syscall */
76 # include <sys/syscall.h> /* SYS_getrandom */
77 # endif
78 # if ! defined(GRND_NONBLOCK)
79 # define GRND_NONBLOCK 0x0001
80 # endif /* defined(GRND_NONBLOCK) */
81 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
82
83 #if defined(HAVE_LIBBSD) \
84 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
85 # include <bsd/stdlib.h>
86 #endif
87
88 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
89 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
90 #endif
91
92 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
93 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
94 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
95 && ! defined(XML_POOR_ENTROPY)
96 # error You do not have support for any sources of high quality entropy \
97 enabled. For end user security, that is probably not what you want. \
98 \
99 Your options include: \
100 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
101 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
102 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
103 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
104 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
105 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
106 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
107 * Windows (rand_s): _WIN32. \
108 \
109 If insist on not using any of these, bypass this error by defining \
110 XML_POOR_ENTROPY; you have been warned. \
111 \
112 If you have reasons to patch this detection code away or need changes \
113 to the build system, please open a bug. Thank you!
114 #endif
115
116 #ifdef XML_UNICODE
117 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
118 # define XmlConvert XmlUtf16Convert
119 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
120 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
121 # define XmlEncode XmlUtf16Encode
122 /* Using pointer subtraction to convert to integer type. */
123 # define MUST_CONVERT(enc, s) \
124 (! (enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
125 typedef unsigned short ICHAR;
126 #else
127 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
128 # define XmlConvert XmlUtf8Convert
129 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
130 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
131 # define XmlEncode XmlUtf8Encode
132 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
133 typedef char ICHAR;
134 #endif
135
136 #ifndef XML_NS
137
138 # define XmlInitEncodingNS XmlInitEncoding
139 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
140 # undef XmlGetInternalEncodingNS
141 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
142 # define XmlParseXmlDeclNS XmlParseXmlDecl
143
144 #endif
145
146 #ifdef XML_UNICODE
147
148 # ifdef XML_UNICODE_WCHAR_T
149 # define XML_T(x) (const wchar_t) x
150 # define XML_L(x) L##x
151 # else
152 # define XML_T(x) (const unsigned short)x
153 # define XML_L(x) x
154 # endif
155
156 #else
157
158 # define XML_T(x) x
159 # define XML_L(x) x
160
161 #endif
162
163 /* Round up n to be a multiple of sz, where sz is a power of 2. */
164 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
165
166 /* Do safe (NULL-aware) pointer arithmetic */
167 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
168
169 #include "internal.h"
170 #include "xmltok.h"
171 #include "xmlrole.h"
172
173 typedef const XML_Char *KEY;
174
175 typedef struct {
176 KEY name;
177 } NAMED;
178
179 typedef struct {
180 NAMED **v;
181 unsigned char power;
182 size_t size;
183 size_t used;
184 const XML_Memory_Handling_Suite *mem;
185 } HASH_TABLE;
186
187 static size_t keylen(KEY s);
188
189 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
190
191 /* For probing (after a collision) we need a step size relative prime
192 to the hash table size, which is a power of 2. We use double-hashing,
193 since we can calculate a second hash value cheaply by taking those bits
194 of the first hash value that were discarded (masked out) when the table
195 index was calculated: index = hash & mask, where mask = table->size - 1.
196 We limit the maximum step size to table->size / 4 (mask >> 2) and make
197 it odd, since odd numbers are always relative prime to a power of 2.
198 */
199 #define SECOND_HASH(hash, mask, power) \
200 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
201 #define PROBE_STEP(hash, mask, power) \
202 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
203
204 typedef struct {
205 NAMED **p;
206 NAMED **end;
207 } HASH_TABLE_ITER;
208
209 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
210 #define INIT_DATA_BUF_SIZE 1024
211 #define INIT_ATTS_SIZE 16
212 #define INIT_ATTS_VERSION 0xFFFFFFFF
213 #define INIT_BLOCK_SIZE 1024
214 #define INIT_BUFFER_SIZE 1024
215
216 #define EXPAND_SPARE 24
217
218 typedef struct binding {
219 struct prefix *prefix;
220 struct binding *nextTagBinding;
221 struct binding *prevPrefixBinding;
222 const struct attribute_id *attId;
223 XML_Char *uri;
224 int uriLen;
225 int uriAlloc;
226 } BINDING;
227
228 typedef struct prefix {
229 const XML_Char *name;
230 BINDING *binding;
231 } PREFIX;
232
233 typedef struct {
234 const XML_Char *str;
235 const XML_Char *localPart;
236 const XML_Char *prefix;
237 int strLen;
238 int uriLen;
239 int prefixLen;
240 } TAG_NAME;
241
242 /* TAG represents an open element.
243 The name of the element is stored in both the document and API
244 encodings. The memory buffer 'buf' is a separately-allocated
245 memory area which stores the name. During the XML_Parse()/
246 XMLParseBuffer() when the element is open, the memory for the 'raw'
247 version of the name (in the document encoding) is shared with the
248 document buffer. If the element is open across calls to
249 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
250 contain the 'raw' name as well.
251
252 A parser re-uses these structures, maintaining a list of allocated
253 TAG objects in a free list.
254 */
255 typedef struct tag {
256 struct tag *parent; /* parent of this element */
257 const char *rawName; /* tagName in the original encoding */
258 int rawNameLength;
259 TAG_NAME name; /* tagName in the API encoding */
260 char *buf; /* buffer for name components */
261 char *bufEnd; /* end of the buffer */
262 BINDING *bindings;
263 } TAG;
264
265 typedef struct {
266 const XML_Char *name;
267 const XML_Char *textPtr;
268 int textLen; /* length in XML_Chars */
269 int processed; /* # of processed bytes - when suspended */
270 const XML_Char *systemId;
271 const XML_Char *base;
272 const XML_Char *publicId;
273 const XML_Char *notation;
274 XML_Bool open;
275 XML_Bool is_param;
276 XML_Bool is_internal; /* true if declared in internal subset outside PE */
277 } ENTITY;
278
279 typedef struct {
280 enum XML_Content_Type type;
281 enum XML_Content_Quant quant;
282 const XML_Char *name;
283 int firstchild;
284 int lastchild;
285 int childcnt;
286 int nextsib;
287 } CONTENT_SCAFFOLD;
288
289 #define INIT_SCAFFOLD_ELEMENTS 32
290
291 typedef struct block {
292 struct block *next;
293 int size;
294 XML_Char s[1];
295 } BLOCK;
296
297 typedef struct {
298 BLOCK *blocks;
299 BLOCK *freeBlocks;
300 const XML_Char *end;
301 XML_Char *ptr;
302 XML_Char *start;
303 const XML_Memory_Handling_Suite *mem;
304 } STRING_POOL;
305
306 /* The XML_Char before the name is used to determine whether
307 an attribute has been specified. */
308 typedef struct attribute_id {
309 XML_Char *name;
310 PREFIX *prefix;
311 XML_Bool maybeTokenized;
312 XML_Bool xmlns;
313 } ATTRIBUTE_ID;
314
315 typedef struct {
316 const ATTRIBUTE_ID *id;
317 XML_Bool isCdata;
318 const XML_Char *value;
319 } DEFAULT_ATTRIBUTE;
320
321 typedef struct {
322 unsigned long version;
323 unsigned long hash;
324 const XML_Char *uriName;
325 } NS_ATT;
326
327 typedef struct {
328 const XML_Char *name;
329 PREFIX *prefix;
330 const ATTRIBUTE_ID *idAtt;
331 int nDefaultAtts;
332 int allocDefaultAtts;
333 DEFAULT_ATTRIBUTE *defaultAtts;
334 } ELEMENT_TYPE;
335
336 typedef struct {
337 HASH_TABLE generalEntities;
338 HASH_TABLE elementTypes;
339 HASH_TABLE attributeIds;
340 HASH_TABLE prefixes;
341 STRING_POOL pool;
342 STRING_POOL entityValuePool;
343 /* false once a parameter entity reference has been skipped */
344 XML_Bool keepProcessing;
345 /* true once an internal or external PE reference has been encountered;
346 this includes the reference to an external subset */
347 XML_Bool hasParamEntityRefs;
348 XML_Bool standalone;
349 #ifdef XML_DTD
350 /* indicates if external PE has been read */
351 XML_Bool paramEntityRead;
352 HASH_TABLE paramEntities;
353 #endif /* XML_DTD */
354 PREFIX defaultPrefix;
355 /* === scaffolding for building content model === */
356 XML_Bool in_eldecl;
357 CONTENT_SCAFFOLD *scaffold;
358 unsigned contentStringLen;
359 unsigned scaffSize;
360 unsigned scaffCount;
361 int scaffLevel;
362 int *scaffIndex;
363 } DTD;
364
365 typedef struct open_internal_entity {
366 const char *internalEventPtr;
367 const char *internalEventEndPtr;
368 struct open_internal_entity *next;
369 ENTITY *entity;
370 int startTagLevel;
371 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
372 } OPEN_INTERNAL_ENTITY;
373
374 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
375 const char *end, const char **endPtr);
376
377 static Processor prologProcessor;
378 static Processor prologInitProcessor;
379 static Processor contentProcessor;
380 static Processor cdataSectionProcessor;
381 #ifdef XML_DTD
382 static Processor ignoreSectionProcessor;
383 static Processor externalParEntProcessor;
384 static Processor externalParEntInitProcessor;
385 static Processor entityValueProcessor;
386 static Processor entityValueInitProcessor;
387 #endif /* XML_DTD */
388 static Processor epilogProcessor;
389 static Processor errorProcessor;
390 static Processor externalEntityInitProcessor;
391 static Processor externalEntityInitProcessor2;
392 static Processor externalEntityInitProcessor3;
393 static Processor externalEntityContentProcessor;
394 static Processor internalEntityProcessor;
395
396 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
397 const XML_Char *encodingName);
398 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
399 const char *s, const char *next);
400 static enum XML_Error initializeEncoding(XML_Parser parser);
401 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
402 const char *s, const char *end, int tok,
403 const char *next, const char **nextPtr,
404 XML_Bool haveMore, XML_Bool allowClosingDoctype);
405 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
406 XML_Bool betweenDecl);
407 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
408 const ENCODING *enc, const char *start,
409 const char *end, const char **endPtr,
410 XML_Bool haveMore);
411 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
412 const char **startPtr, const char *end,
413 const char **nextPtr, XML_Bool haveMore);
414 #ifdef XML_DTD
415 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
416 const char **startPtr, const char *end,
417 const char **nextPtr, XML_Bool haveMore);
418 #endif /* XML_DTD */
419
420 static void freeBindings(XML_Parser parser, BINDING *bindings);
421 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
422 const char *s, TAG_NAME *tagNamePtr,
423 BINDING **bindingsPtr);
424 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
425 const ATTRIBUTE_ID *attId, const XML_Char *uri,
426 BINDING **bindingsPtr);
427 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
428 XML_Bool isId, const XML_Char *dfltValue,
429 XML_Parser parser);
430 static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
431 XML_Bool isCdata, const char *,
432 const char *, STRING_POOL *);
433 static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
434 XML_Bool isCdata, const char *,
435 const char *, STRING_POOL *);
436 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
437 const char *start, const char *end);
438 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
439 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
440 const char *start, const char *end);
441 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
442 const char *start, const char *end);
443 static int reportComment(XML_Parser parser, const ENCODING *enc,
444 const char *start, const char *end);
445 static void reportDefault(XML_Parser parser, const ENCODING *enc,
446 const char *start, const char *end);
447
448 static const XML_Char *getContext(XML_Parser parser);
449 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
450
451 static void FASTCALL normalizePublicId(XML_Char *s);
452
453 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
454 /* do not call if m_parentParser != NULL */
455 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
456 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
457 const XML_Memory_Handling_Suite *ms);
458 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
459 const XML_Memory_Handling_Suite *ms);
460 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
461 const HASH_TABLE *);
462 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
463 size_t createSize);
464 static void FASTCALL hashTableInit(HASH_TABLE *,
465 const XML_Memory_Handling_Suite *ms);
466 static void FASTCALL hashTableClear(HASH_TABLE *);
467 static void FASTCALL hashTableDestroy(HASH_TABLE *);
468 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
469 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
470
471 static void FASTCALL poolInit(STRING_POOL *,
472 const XML_Memory_Handling_Suite *ms);
473 static void FASTCALL poolClear(STRING_POOL *);
474 static void FASTCALL poolDestroy(STRING_POOL *);
475 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
476 const char *ptr, const char *end);
477 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
478 const char *ptr, const char *end);
479 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
480 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
481 const XML_Char *s);
482 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
483 int n);
484 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
485 const XML_Char *s);
486
487 static int FASTCALL nextScaffoldPart(XML_Parser parser);
488 static XML_Content *build_model(XML_Parser parser);
489 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
490 const char *ptr, const char *end);
491
492 static XML_Char *copyString(const XML_Char *s,
493 const XML_Memory_Handling_Suite *memsuite);
494
495 static unsigned long generate_hash_secret_salt(XML_Parser parser);
496 static XML_Bool startParsing(XML_Parser parser);
497
498 static XML_Parser parserCreate(const XML_Char *encodingName,
499 const XML_Memory_Handling_Suite *memsuite,
500 const XML_Char *nameSep, DTD *dtd);
501
502 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
503
504 #define poolStart(pool) ((pool)->start)
505 #define poolEnd(pool) ((pool)->ptr)
506 #define poolLength(pool) ((pool)->ptr - (pool)->start)
507 #define poolChop(pool) ((void)--(pool->ptr))
508 #define poolLastChar(pool) (((pool)->ptr)[-1])
509 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
510 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
511 #define poolAppendChar(pool, c) \
512 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
513 ? 0 \
514 : ((*((pool)->ptr)++ = c), 1))
515
516 struct XML_ParserStruct {
517 /* The first member must be m_userData so that the XML_GetUserData
518 macro works. */
519 void *m_userData;
520 void *m_handlerArg;
521 char *m_buffer;
522 const XML_Memory_Handling_Suite m_mem;
523 /* first character to be parsed */
524 const char *m_bufferPtr;
525 /* past last character to be parsed */
526 char *m_bufferEnd;
527 /* allocated end of m_buffer */
528 const char *m_bufferLim;
529 XML_Index m_parseEndByteIndex;
530 const char *m_parseEndPtr;
531 XML_Char *m_dataBuf;
532 XML_Char *m_dataBufEnd;
533 XML_StartElementHandler m_startElementHandler;
534 XML_EndElementHandler m_endElementHandler;
535 XML_CharacterDataHandler m_characterDataHandler;
536 XML_ProcessingInstructionHandler m_processingInstructionHandler;
537 XML_CommentHandler m_commentHandler;
538 XML_StartCdataSectionHandler m_startCdataSectionHandler;
539 XML_EndCdataSectionHandler m_endCdataSectionHandler;
540 XML_DefaultHandler m_defaultHandler;
541 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
542 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
543 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
544 XML_NotationDeclHandler m_notationDeclHandler;
545 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
546 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
547 XML_NotStandaloneHandler m_notStandaloneHandler;
548 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
549 XML_Parser m_externalEntityRefHandlerArg;
550 XML_SkippedEntityHandler m_skippedEntityHandler;
551 XML_UnknownEncodingHandler m_unknownEncodingHandler;
552 XML_ElementDeclHandler m_elementDeclHandler;
553 XML_AttlistDeclHandler m_attlistDeclHandler;
554 XML_EntityDeclHandler m_entityDeclHandler;
555 XML_XmlDeclHandler m_xmlDeclHandler;
556 const ENCODING *m_encoding;
557 INIT_ENCODING m_initEncoding;
558 const ENCODING *m_internalEncoding;
559 const XML_Char *m_protocolEncodingName;
560 XML_Bool m_ns;
561 XML_Bool m_ns_triplets;
562 void *m_unknownEncodingMem;
563 void *m_unknownEncodingData;
564 void *m_unknownEncodingHandlerData;
565 void(XMLCALL *m_unknownEncodingRelease)(void *);
566 PROLOG_STATE m_prologState;
567 Processor *m_processor;
568 enum XML_Error m_errorCode;
569 const char *m_eventPtr;
570 const char *m_eventEndPtr;
571 const char *m_positionPtr;
572 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
573 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
574 XML_Bool m_defaultExpandInternalEntities;
575 int m_tagLevel;
576 ENTITY *m_declEntity;
577 const XML_Char *m_doctypeName;
578 const XML_Char *m_doctypeSysid;
579 const XML_Char *m_doctypePubid;
580 const XML_Char *m_declAttributeType;
581 const XML_Char *m_declNotationName;
582 const XML_Char *m_declNotationPublicId;
583 ELEMENT_TYPE *m_declElementType;
584 ATTRIBUTE_ID *m_declAttributeId;
585 XML_Bool m_declAttributeIsCdata;
586 XML_Bool m_declAttributeIsId;
587 DTD *m_dtd;
588 const XML_Char *m_curBase;
589 TAG *m_tagStack;
590 TAG *m_freeTagList;
591 BINDING *m_inheritedBindings;
592 BINDING *m_freeBindingList;
593 int m_attsSize;
594 int m_nSpecifiedAtts;
595 int m_idAttIndex;
596 ATTRIBUTE *m_atts;
597 NS_ATT *m_nsAtts;
598 unsigned long m_nsAttsVersion;
599 unsigned char m_nsAttsPower;
600 #ifdef XML_ATTR_INFO
601 XML_AttrInfo *m_attInfo;
602 #endif
603 POSITION m_position;
604 STRING_POOL m_tempPool;
605 STRING_POOL m_temp2Pool;
606 char *m_groupConnector;
607 unsigned int m_groupSize;
608 XML_Char m_namespaceSeparator;
609 XML_Parser m_parentParser;
610 XML_ParsingStatus m_parsingStatus;
611 #ifdef XML_DTD
612 XML_Bool m_isParamEntity;
613 XML_Bool m_useForeignDTD;
614 enum XML_ParamEntityParsing m_paramEntityParsing;
615 #endif
616 unsigned long m_hash_secret_salt;
617 };
618
619 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
620 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
621 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
622
623 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)624 XML_ParserCreate(const XML_Char *encodingName) {
625 return XML_ParserCreate_MM(encodingName, NULL, NULL);
626 }
627
628 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)629 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
630 XML_Char tmp[2];
631 *tmp = nsSep;
632 return XML_ParserCreate_MM(encodingName, NULL, tmp);
633 }
634
635 static const XML_Char implicitContext[]
636 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
637 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
638 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
639 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
640 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
641 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
642 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
643 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
644 '\0'};
645
646 /* To avoid warnings about unused functions: */
647 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
648
649 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
650
651 /* Obtain entropy on Linux 3.17+ */
652 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)653 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
654 int success = 0; /* full count bytes written? */
655 size_t bytesWrittenTotal = 0;
656 const unsigned int getrandomFlags = GRND_NONBLOCK;
657
658 do {
659 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
660 const size_t bytesToWrite = count - bytesWrittenTotal;
661
662 const int bytesWrittenMore =
663 # if defined(HAVE_GETRANDOM)
664 getrandom(currentTarget, bytesToWrite, getrandomFlags);
665 # else
666 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
667 # endif
668
669 if (bytesWrittenMore > 0) {
670 bytesWrittenTotal += bytesWrittenMore;
671 if (bytesWrittenTotal >= count)
672 success = 1;
673 }
674 } while (! success && (errno == EINTR));
675
676 return success;
677 }
678
679 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
680
681 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
682
683 /* Extract entropy from /dev/urandom */
684 static int
writeRandomBytes_dev_urandom(void * target,size_t count)685 writeRandomBytes_dev_urandom(void *target, size_t count) {
686 int success = 0; /* full count bytes written? */
687 size_t bytesWrittenTotal = 0;
688
689 const int fd = open("/dev/urandom", O_RDONLY);
690 if (fd < 0) {
691 return 0;
692 }
693
694 do {
695 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
696 const size_t bytesToWrite = count - bytesWrittenTotal;
697
698 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
699
700 if (bytesWrittenMore > 0) {
701 bytesWrittenTotal += bytesWrittenMore;
702 if (bytesWrittenTotal >= count)
703 success = 1;
704 }
705 } while (! success && (errno == EINTR));
706
707 close(fd);
708 return success;
709 }
710
711 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
712
713 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
714
715 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
716
717 static void
writeRandomBytes_arc4random(void * target,size_t count)718 writeRandomBytes_arc4random(void *target, size_t count) {
719 size_t bytesWrittenTotal = 0;
720
721 while (bytesWrittenTotal < count) {
722 const uint32_t random32 = arc4random();
723 size_t i = 0;
724
725 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
726 i++, bytesWrittenTotal++) {
727 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
728 ((uint8_t *)target)[bytesWrittenTotal] = random8;
729 }
730 }
731 }
732
733 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
734
735 #ifdef _WIN32
736
737 /* Obtain entropy on Windows using the rand_s() function which
738 * generates cryptographically secure random numbers. Internally it
739 * uses RtlGenRandom API which is present in Windows XP and later.
740 */
741 static int
writeRandomBytes_rand_s(void * target,size_t count)742 writeRandomBytes_rand_s(void *target, size_t count) {
743 size_t bytesWrittenTotal = 0;
744
745 while (bytesWrittenTotal < count) {
746 unsigned int random32 = 0;
747 size_t i = 0;
748
749 if (rand_s(&random32))
750 return 0; /* failure */
751
752 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
753 i++, bytesWrittenTotal++) {
754 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
755 ((uint8_t *)target)[bytesWrittenTotal] = random8;
756 }
757 }
758 return 1; /* success */
759 }
760
761 #endif /* _WIN32 */
762
763 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
764
765 static unsigned long
gather_time_entropy(void)766 gather_time_entropy(void) {
767 # ifdef _WIN32
768 FILETIME ft;
769 GetSystemTimeAsFileTime(&ft); /* never fails */
770 return ft.dwHighDateTime ^ ft.dwLowDateTime;
771 # else
772 struct timeval tv;
773 int gettimeofday_res;
774
775 gettimeofday_res = gettimeofday(&tv, NULL);
776
777 # if defined(NDEBUG)
778 (void)gettimeofday_res;
779 # else
780 assert(gettimeofday_res == 0);
781 # endif /* defined(NDEBUG) */
782
783 /* Microseconds time is <20 bits entropy */
784 return tv.tv_usec;
785 # endif
786 }
787
788 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
789
790 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)791 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
792 const char *const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
793 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
794 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
795 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
796 }
797 return entropy;
798 }
799
800 static unsigned long
generate_hash_secret_salt(XML_Parser parser)801 generate_hash_secret_salt(XML_Parser parser) {
802 unsigned long entropy;
803 (void)parser;
804
805 /* "Failproof" high quality providers: */
806 #if defined(HAVE_ARC4RANDOM_BUF)
807 arc4random_buf(&entropy, sizeof(entropy));
808 return ENTROPY_DEBUG("arc4random_buf", entropy);
809 #elif defined(HAVE_ARC4RANDOM)
810 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
811 return ENTROPY_DEBUG("arc4random", entropy);
812 #else
813 /* Try high quality providers first .. */
814 # ifdef _WIN32
815 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
816 return ENTROPY_DEBUG("rand_s", entropy);
817 }
818 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
819 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
820 return ENTROPY_DEBUG("getrandom", entropy);
821 }
822 # endif
823 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
824 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
825 return ENTROPY_DEBUG("/dev/urandom", entropy);
826 }
827 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
828 /* .. and self-made low quality for backup: */
829
830 /* Process ID is 0 bits entropy if attacker has local access */
831 entropy = gather_time_entropy() ^ getpid();
832
833 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
834 if (sizeof(unsigned long) == 4) {
835 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
836 } else {
837 return ENTROPY_DEBUG("fallback(8)",
838 entropy * (unsigned long)2305843009213693951ULL);
839 }
840 #endif
841 }
842
843 static unsigned long
get_hash_secret_salt(XML_Parser parser)844 get_hash_secret_salt(XML_Parser parser) {
845 if (parser->m_parentParser != NULL)
846 return get_hash_secret_salt(parser->m_parentParser);
847 return parser->m_hash_secret_salt;
848 }
849
850 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)851 startParsing(XML_Parser parser) {
852 /* hash functions must be initialized before setContext() is called */
853 if (parser->m_hash_secret_salt == 0)
854 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
855 if (parser->m_ns) {
856 /* implicit context only set for root parser, since child
857 parsers (i.e. external entity parsers) will inherit it
858 */
859 return setContext(parser, implicitContext);
860 }
861 return XML_TRUE;
862 }
863
864 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)865 XML_ParserCreate_MM(const XML_Char *encodingName,
866 const XML_Memory_Handling_Suite *memsuite,
867 const XML_Char *nameSep) {
868 return parserCreate(encodingName, memsuite, nameSep, NULL);
869 }
870
871 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)872 parserCreate(const XML_Char *encodingName,
873 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
874 DTD *dtd) {
875 XML_Parser parser;
876
877 if (memsuite) {
878 XML_Memory_Handling_Suite *mtemp;
879 parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
880 if (parser != NULL) {
881 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
882 mtemp->malloc_fcn = memsuite->malloc_fcn;
883 mtemp->realloc_fcn = memsuite->realloc_fcn;
884 mtemp->free_fcn = memsuite->free_fcn;
885 }
886 } else {
887 XML_Memory_Handling_Suite *mtemp;
888 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
889 if (parser != NULL) {
890 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
891 mtemp->malloc_fcn = malloc;
892 mtemp->realloc_fcn = realloc;
893 mtemp->free_fcn = free;
894 }
895 }
896
897 if (! parser)
898 return parser;
899
900 parser->m_buffer = NULL;
901 parser->m_bufferLim = NULL;
902
903 parser->m_attsSize = INIT_ATTS_SIZE;
904 parser->m_atts
905 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
906 if (parser->m_atts == NULL) {
907 FREE(parser, parser);
908 return NULL;
909 }
910 #ifdef XML_ATTR_INFO
911 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
912 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
913 if (parser->m_attInfo == NULL) {
914 FREE(parser, parser->m_atts);
915 FREE(parser, parser);
916 return NULL;
917 }
918 #endif
919 parser->m_dataBuf
920 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
921 if (parser->m_dataBuf == NULL) {
922 FREE(parser, parser->m_atts);
923 #ifdef XML_ATTR_INFO
924 FREE(parser, parser->m_attInfo);
925 #endif
926 FREE(parser, parser);
927 return NULL;
928 }
929 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
930
931 if (dtd)
932 parser->m_dtd = dtd;
933 else {
934 parser->m_dtd = dtdCreate(&parser->m_mem);
935 if (parser->m_dtd == NULL) {
936 FREE(parser, parser->m_dataBuf);
937 FREE(parser, parser->m_atts);
938 #ifdef XML_ATTR_INFO
939 FREE(parser, parser->m_attInfo);
940 #endif
941 FREE(parser, parser);
942 return NULL;
943 }
944 }
945
946 parser->m_freeBindingList = NULL;
947 parser->m_freeTagList = NULL;
948 parser->m_freeInternalEntities = NULL;
949
950 parser->m_groupSize = 0;
951 parser->m_groupConnector = NULL;
952
953 parser->m_unknownEncodingHandler = NULL;
954 parser->m_unknownEncodingHandlerData = NULL;
955
956 parser->m_namespaceSeparator = ASCII_EXCL;
957 parser->m_ns = XML_FALSE;
958 parser->m_ns_triplets = XML_FALSE;
959
960 parser->m_nsAtts = NULL;
961 parser->m_nsAttsVersion = 0;
962 parser->m_nsAttsPower = 0;
963
964 parser->m_protocolEncodingName = NULL;
965
966 poolInit(&parser->m_tempPool, &(parser->m_mem));
967 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
968 parserInit(parser, encodingName);
969
970 if (encodingName && ! parser->m_protocolEncodingName) {
971 XML_ParserFree(parser);
972 return NULL;
973 }
974
975 if (nameSep) {
976 parser->m_ns = XML_TRUE;
977 parser->m_internalEncoding = XmlGetInternalEncodingNS();
978 parser->m_namespaceSeparator = *nameSep;
979 } else {
980 parser->m_internalEncoding = XmlGetInternalEncoding();
981 }
982
983 return parser;
984 }
985
986 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)987 parserInit(XML_Parser parser, const XML_Char *encodingName) {
988 parser->m_processor = prologInitProcessor;
989 XmlPrologStateInit(&parser->m_prologState);
990 if (encodingName != NULL) {
991 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
992 }
993 parser->m_curBase = NULL;
994 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
995 parser->m_userData = NULL;
996 parser->m_handlerArg = NULL;
997 parser->m_startElementHandler = NULL;
998 parser->m_endElementHandler = NULL;
999 parser->m_characterDataHandler = NULL;
1000 parser->m_processingInstructionHandler = NULL;
1001 parser->m_commentHandler = NULL;
1002 parser->m_startCdataSectionHandler = NULL;
1003 parser->m_endCdataSectionHandler = NULL;
1004 parser->m_defaultHandler = NULL;
1005 parser->m_startDoctypeDeclHandler = NULL;
1006 parser->m_endDoctypeDeclHandler = NULL;
1007 parser->m_unparsedEntityDeclHandler = NULL;
1008 parser->m_notationDeclHandler = NULL;
1009 parser->m_startNamespaceDeclHandler = NULL;
1010 parser->m_endNamespaceDeclHandler = NULL;
1011 parser->m_notStandaloneHandler = NULL;
1012 parser->m_externalEntityRefHandler = NULL;
1013 parser->m_externalEntityRefHandlerArg = parser;
1014 parser->m_skippedEntityHandler = NULL;
1015 parser->m_elementDeclHandler = NULL;
1016 parser->m_attlistDeclHandler = NULL;
1017 parser->m_entityDeclHandler = NULL;
1018 parser->m_xmlDeclHandler = NULL;
1019 parser->m_bufferPtr = parser->m_buffer;
1020 parser->m_bufferEnd = parser->m_buffer;
1021 parser->m_parseEndByteIndex = 0;
1022 parser->m_parseEndPtr = NULL;
1023 parser->m_declElementType = NULL;
1024 parser->m_declAttributeId = NULL;
1025 parser->m_declEntity = NULL;
1026 parser->m_doctypeName = NULL;
1027 parser->m_doctypeSysid = NULL;
1028 parser->m_doctypePubid = NULL;
1029 parser->m_declAttributeType = NULL;
1030 parser->m_declNotationName = NULL;
1031 parser->m_declNotationPublicId = NULL;
1032 parser->m_declAttributeIsCdata = XML_FALSE;
1033 parser->m_declAttributeIsId = XML_FALSE;
1034 memset(&parser->m_position, 0, sizeof(POSITION));
1035 parser->m_errorCode = XML_ERROR_NONE;
1036 parser->m_eventPtr = NULL;
1037 parser->m_eventEndPtr = NULL;
1038 parser->m_positionPtr = NULL;
1039 parser->m_openInternalEntities = NULL;
1040 parser->m_defaultExpandInternalEntities = XML_TRUE;
1041 parser->m_tagLevel = 0;
1042 parser->m_tagStack = NULL;
1043 parser->m_inheritedBindings = NULL;
1044 parser->m_nSpecifiedAtts = 0;
1045 parser->m_unknownEncodingMem = NULL;
1046 parser->m_unknownEncodingRelease = NULL;
1047 parser->m_unknownEncodingData = NULL;
1048 parser->m_parentParser = NULL;
1049 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1050 #ifdef XML_DTD
1051 parser->m_isParamEntity = XML_FALSE;
1052 parser->m_useForeignDTD = XML_FALSE;
1053 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1054 #endif
1055 parser->m_hash_secret_salt = 0;
1056 }
1057
1058 /* moves list of bindings to m_freeBindingList */
1059 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1060 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1061 while (bindings) {
1062 BINDING *b = bindings;
1063 bindings = bindings->nextTagBinding;
1064 b->nextTagBinding = parser->m_freeBindingList;
1065 parser->m_freeBindingList = b;
1066 }
1067 }
1068
1069 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1070 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1071 TAG *tStk;
1072 OPEN_INTERNAL_ENTITY *openEntityList;
1073
1074 if (parser == NULL)
1075 return XML_FALSE;
1076
1077 if (parser->m_parentParser)
1078 return XML_FALSE;
1079 /* move m_tagStack to m_freeTagList */
1080 tStk = parser->m_tagStack;
1081 while (tStk) {
1082 TAG *tag = tStk;
1083 tStk = tStk->parent;
1084 tag->parent = parser->m_freeTagList;
1085 moveToFreeBindingList(parser, tag->bindings);
1086 tag->bindings = NULL;
1087 parser->m_freeTagList = tag;
1088 }
1089 /* move m_openInternalEntities to m_freeInternalEntities */
1090 openEntityList = parser->m_openInternalEntities;
1091 while (openEntityList) {
1092 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1093 openEntityList = openEntity->next;
1094 openEntity->next = parser->m_freeInternalEntities;
1095 parser->m_freeInternalEntities = openEntity;
1096 }
1097 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1098 FREE(parser, parser->m_unknownEncodingMem);
1099 if (parser->m_unknownEncodingRelease)
1100 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1101 poolClear(&parser->m_tempPool);
1102 poolClear(&parser->m_temp2Pool);
1103 FREE(parser, (void *)parser->m_protocolEncodingName);
1104 parser->m_protocolEncodingName = NULL;
1105 parserInit(parser, encodingName);
1106 dtdReset(parser->m_dtd, &parser->m_mem);
1107 return XML_TRUE;
1108 }
1109
1110 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1111 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1112 if (parser == NULL)
1113 return XML_STATUS_ERROR;
1114 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1115 XXX There's no way for the caller to determine which of the
1116 XXX possible error cases caused the XML_STATUS_ERROR return.
1117 */
1118 if (parser->m_parsingStatus.parsing == XML_PARSING
1119 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1120 return XML_STATUS_ERROR;
1121
1122 /* Get rid of any previous encoding name */
1123 FREE(parser, (void *)parser->m_protocolEncodingName);
1124
1125 if (encodingName == NULL)
1126 /* No new encoding name */
1127 parser->m_protocolEncodingName = NULL;
1128 else {
1129 /* Copy the new encoding name into allocated memory */
1130 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1131 if (! parser->m_protocolEncodingName)
1132 return XML_STATUS_ERROR;
1133 }
1134 return XML_STATUS_OK;
1135 }
1136
1137 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1138 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1139 const XML_Char *encodingName) {
1140 XML_Parser parser = oldParser;
1141 DTD *newDtd = NULL;
1142 DTD *oldDtd;
1143 XML_StartElementHandler oldStartElementHandler;
1144 XML_EndElementHandler oldEndElementHandler;
1145 XML_CharacterDataHandler oldCharacterDataHandler;
1146 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1147 XML_CommentHandler oldCommentHandler;
1148 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1149 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1150 XML_DefaultHandler oldDefaultHandler;
1151 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1152 XML_NotationDeclHandler oldNotationDeclHandler;
1153 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1154 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1155 XML_NotStandaloneHandler oldNotStandaloneHandler;
1156 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1157 XML_SkippedEntityHandler oldSkippedEntityHandler;
1158 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1159 XML_ElementDeclHandler oldElementDeclHandler;
1160 XML_AttlistDeclHandler oldAttlistDeclHandler;
1161 XML_EntityDeclHandler oldEntityDeclHandler;
1162 XML_XmlDeclHandler oldXmlDeclHandler;
1163 ELEMENT_TYPE *oldDeclElementType;
1164
1165 void *oldUserData;
1166 void *oldHandlerArg;
1167 XML_Bool oldDefaultExpandInternalEntities;
1168 XML_Parser oldExternalEntityRefHandlerArg;
1169 #ifdef XML_DTD
1170 enum XML_ParamEntityParsing oldParamEntityParsing;
1171 int oldInEntityValue;
1172 #endif
1173 XML_Bool oldns_triplets;
1174 /* Note that the new parser shares the same hash secret as the old
1175 parser, so that dtdCopy and copyEntityTable can lookup values
1176 from hash tables associated with either parser without us having
1177 to worry which hash secrets each table has.
1178 */
1179 unsigned long oldhash_secret_salt;
1180
1181 /* Validate the oldParser parameter before we pull everything out of it */
1182 if (oldParser == NULL)
1183 return NULL;
1184
1185 /* Stash the original parser contents on the stack */
1186 oldDtd = parser->m_dtd;
1187 oldStartElementHandler = parser->m_startElementHandler;
1188 oldEndElementHandler = parser->m_endElementHandler;
1189 oldCharacterDataHandler = parser->m_characterDataHandler;
1190 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1191 oldCommentHandler = parser->m_commentHandler;
1192 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1193 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1194 oldDefaultHandler = parser->m_defaultHandler;
1195 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1196 oldNotationDeclHandler = parser->m_notationDeclHandler;
1197 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1198 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1199 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1200 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1201 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1202 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1203 oldElementDeclHandler = parser->m_elementDeclHandler;
1204 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1205 oldEntityDeclHandler = parser->m_entityDeclHandler;
1206 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1207 oldDeclElementType = parser->m_declElementType;
1208
1209 oldUserData = parser->m_userData;
1210 oldHandlerArg = parser->m_handlerArg;
1211 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1212 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1213 #ifdef XML_DTD
1214 oldParamEntityParsing = parser->m_paramEntityParsing;
1215 oldInEntityValue = parser->m_prologState.inEntityValue;
1216 #endif
1217 oldns_triplets = parser->m_ns_triplets;
1218 /* Note that the new parser shares the same hash secret as the old
1219 parser, so that dtdCopy and copyEntityTable can lookup values
1220 from hash tables associated with either parser without us having
1221 to worry which hash secrets each table has.
1222 */
1223 oldhash_secret_salt = parser->m_hash_secret_salt;
1224
1225 #ifdef XML_DTD
1226 if (! context)
1227 newDtd = oldDtd;
1228 #endif /* XML_DTD */
1229
1230 /* Note that the magical uses of the pre-processor to make field
1231 access look more like C++ require that `parser' be overwritten
1232 here. This makes this function more painful to follow than it
1233 would be otherwise.
1234 */
1235 if (parser->m_ns) {
1236 XML_Char tmp[2];
1237 *tmp = parser->m_namespaceSeparator;
1238 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1239 } else {
1240 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1241 }
1242
1243 if (! parser)
1244 return NULL;
1245
1246 parser->m_startElementHandler = oldStartElementHandler;
1247 parser->m_endElementHandler = oldEndElementHandler;
1248 parser->m_characterDataHandler = oldCharacterDataHandler;
1249 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1250 parser->m_commentHandler = oldCommentHandler;
1251 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1252 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1253 parser->m_defaultHandler = oldDefaultHandler;
1254 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1255 parser->m_notationDeclHandler = oldNotationDeclHandler;
1256 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1257 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1258 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1259 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1260 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1261 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1262 parser->m_elementDeclHandler = oldElementDeclHandler;
1263 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1264 parser->m_entityDeclHandler = oldEntityDeclHandler;
1265 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1266 parser->m_declElementType = oldDeclElementType;
1267 parser->m_userData = oldUserData;
1268 if (oldUserData == oldHandlerArg)
1269 parser->m_handlerArg = parser->m_userData;
1270 else
1271 parser->m_handlerArg = parser;
1272 if (oldExternalEntityRefHandlerArg != oldParser)
1273 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1274 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1275 parser->m_ns_triplets = oldns_triplets;
1276 parser->m_hash_secret_salt = oldhash_secret_salt;
1277 parser->m_parentParser = oldParser;
1278 #ifdef XML_DTD
1279 parser->m_paramEntityParsing = oldParamEntityParsing;
1280 parser->m_prologState.inEntityValue = oldInEntityValue;
1281 if (context) {
1282 #endif /* XML_DTD */
1283 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1284 || ! setContext(parser, context)) {
1285 XML_ParserFree(parser);
1286 return NULL;
1287 }
1288 parser->m_processor = externalEntityInitProcessor;
1289 #ifdef XML_DTD
1290 } else {
1291 /* The DTD instance referenced by parser->m_dtd is shared between the
1292 document's root parser and external PE parsers, therefore one does not
1293 need to call setContext. In addition, one also *must* not call
1294 setContext, because this would overwrite existing prefix->binding
1295 pointers in parser->m_dtd with ones that get destroyed with the external
1296 PE parser. This would leave those prefixes with dangling pointers.
1297 */
1298 parser->m_isParamEntity = XML_TRUE;
1299 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1300 parser->m_processor = externalParEntInitProcessor;
1301 }
1302 #endif /* XML_DTD */
1303 return parser;
1304 }
1305
1306 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1307 destroyBindings(BINDING *bindings, XML_Parser parser) {
1308 for (;;) {
1309 BINDING *b = bindings;
1310 if (! b)
1311 break;
1312 bindings = b->nextTagBinding;
1313 FREE(parser, b->uri);
1314 FREE(parser, b);
1315 }
1316 }
1317
1318 void XMLCALL
XML_ParserFree(XML_Parser parser)1319 XML_ParserFree(XML_Parser parser) {
1320 TAG *tagList;
1321 OPEN_INTERNAL_ENTITY *entityList;
1322 if (parser == NULL)
1323 return;
1324 /* free m_tagStack and m_freeTagList */
1325 tagList = parser->m_tagStack;
1326 for (;;) {
1327 TAG *p;
1328 if (tagList == NULL) {
1329 if (parser->m_freeTagList == NULL)
1330 break;
1331 tagList = parser->m_freeTagList;
1332 parser->m_freeTagList = NULL;
1333 }
1334 p = tagList;
1335 tagList = tagList->parent;
1336 FREE(parser, p->buf);
1337 destroyBindings(p->bindings, parser);
1338 FREE(parser, p);
1339 }
1340 /* free m_openInternalEntities and m_freeInternalEntities */
1341 entityList = parser->m_openInternalEntities;
1342 for (;;) {
1343 OPEN_INTERNAL_ENTITY *openEntity;
1344 if (entityList == NULL) {
1345 if (parser->m_freeInternalEntities == NULL)
1346 break;
1347 entityList = parser->m_freeInternalEntities;
1348 parser->m_freeInternalEntities = NULL;
1349 }
1350 openEntity = entityList;
1351 entityList = entityList->next;
1352 FREE(parser, openEntity);
1353 }
1354
1355 destroyBindings(parser->m_freeBindingList, parser);
1356 destroyBindings(parser->m_inheritedBindings, parser);
1357 poolDestroy(&parser->m_tempPool);
1358 poolDestroy(&parser->m_temp2Pool);
1359 FREE(parser, (void *)parser->m_protocolEncodingName);
1360 #ifdef XML_DTD
1361 /* external parameter entity parsers share the DTD structure
1362 parser->m_dtd with the root parser, so we must not destroy it
1363 */
1364 if (! parser->m_isParamEntity && parser->m_dtd)
1365 #else
1366 if (parser->m_dtd)
1367 #endif /* XML_DTD */
1368 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1369 &parser->m_mem);
1370 FREE(parser, (void *)parser->m_atts);
1371 #ifdef XML_ATTR_INFO
1372 FREE(parser, (void *)parser->m_attInfo);
1373 #endif
1374 FREE(parser, parser->m_groupConnector);
1375 FREE(parser, parser->m_buffer);
1376 FREE(parser, parser->m_dataBuf);
1377 FREE(parser, parser->m_nsAtts);
1378 FREE(parser, parser->m_unknownEncodingMem);
1379 if (parser->m_unknownEncodingRelease)
1380 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1381 FREE(parser, parser);
1382 }
1383
1384 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1385 XML_UseParserAsHandlerArg(XML_Parser parser) {
1386 if (parser != NULL)
1387 parser->m_handlerArg = parser;
1388 }
1389
1390 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1391 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1392 if (parser == NULL)
1393 return XML_ERROR_INVALID_ARGUMENT;
1394 #ifdef XML_DTD
1395 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1396 if (parser->m_parsingStatus.parsing == XML_PARSING
1397 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1398 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1399 parser->m_useForeignDTD = useDTD;
1400 return XML_ERROR_NONE;
1401 #else
1402 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1403 #endif
1404 }
1405
1406 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1407 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1408 if (parser == NULL)
1409 return;
1410 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1411 if (parser->m_parsingStatus.parsing == XML_PARSING
1412 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1413 return;
1414 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1415 }
1416
1417 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1418 XML_SetUserData(XML_Parser parser, void *p) {
1419 if (parser == NULL)
1420 return;
1421 if (parser->m_handlerArg == parser->m_userData)
1422 parser->m_handlerArg = parser->m_userData = p;
1423 else
1424 parser->m_userData = p;
1425 }
1426
1427 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1428 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1429 if (parser == NULL)
1430 return XML_STATUS_ERROR;
1431 if (p) {
1432 p = poolCopyString(&parser->m_dtd->pool, p);
1433 if (! p)
1434 return XML_STATUS_ERROR;
1435 parser->m_curBase = p;
1436 } else
1437 parser->m_curBase = NULL;
1438 return XML_STATUS_OK;
1439 }
1440
1441 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1442 XML_GetBase(XML_Parser parser) {
1443 if (parser == NULL)
1444 return NULL;
1445 return parser->m_curBase;
1446 }
1447
1448 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1449 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1450 if (parser == NULL)
1451 return -1;
1452 return parser->m_nSpecifiedAtts;
1453 }
1454
1455 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1456 XML_GetIdAttributeIndex(XML_Parser parser) {
1457 if (parser == NULL)
1458 return -1;
1459 return parser->m_idAttIndex;
1460 }
1461
1462 #ifdef XML_ATTR_INFO
1463 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1464 XML_GetAttributeInfo(XML_Parser parser) {
1465 if (parser == NULL)
1466 return NULL;
1467 return parser->m_attInfo;
1468 }
1469 #endif
1470
1471 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1472 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1473 XML_EndElementHandler end) {
1474 if (parser == NULL)
1475 return;
1476 parser->m_startElementHandler = start;
1477 parser->m_endElementHandler = end;
1478 }
1479
1480 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1481 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1482 if (parser != NULL)
1483 parser->m_startElementHandler = start;
1484 }
1485
1486 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1487 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1488 if (parser != NULL)
1489 parser->m_endElementHandler = end;
1490 }
1491
1492 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1493 XML_SetCharacterDataHandler(XML_Parser parser,
1494 XML_CharacterDataHandler handler) {
1495 if (parser != NULL)
1496 parser->m_characterDataHandler = handler;
1497 }
1498
1499 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1500 XML_SetProcessingInstructionHandler(XML_Parser parser,
1501 XML_ProcessingInstructionHandler handler) {
1502 if (parser != NULL)
1503 parser->m_processingInstructionHandler = handler;
1504 }
1505
1506 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1507 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1508 if (parser != NULL)
1509 parser->m_commentHandler = handler;
1510 }
1511
1512 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1513 XML_SetCdataSectionHandler(XML_Parser parser,
1514 XML_StartCdataSectionHandler start,
1515 XML_EndCdataSectionHandler end) {
1516 if (parser == NULL)
1517 return;
1518 parser->m_startCdataSectionHandler = start;
1519 parser->m_endCdataSectionHandler = end;
1520 }
1521
1522 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1523 XML_SetStartCdataSectionHandler(XML_Parser parser,
1524 XML_StartCdataSectionHandler start) {
1525 if (parser != NULL)
1526 parser->m_startCdataSectionHandler = start;
1527 }
1528
1529 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1530 XML_SetEndCdataSectionHandler(XML_Parser parser,
1531 XML_EndCdataSectionHandler end) {
1532 if (parser != NULL)
1533 parser->m_endCdataSectionHandler = end;
1534 }
1535
1536 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1537 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1538 if (parser == NULL)
1539 return;
1540 parser->m_defaultHandler = handler;
1541 parser->m_defaultExpandInternalEntities = XML_FALSE;
1542 }
1543
1544 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1545 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1546 if (parser == NULL)
1547 return;
1548 parser->m_defaultHandler = handler;
1549 parser->m_defaultExpandInternalEntities = XML_TRUE;
1550 }
1551
1552 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1553 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1554 XML_EndDoctypeDeclHandler end) {
1555 if (parser == NULL)
1556 return;
1557 parser->m_startDoctypeDeclHandler = start;
1558 parser->m_endDoctypeDeclHandler = end;
1559 }
1560
1561 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1562 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1563 XML_StartDoctypeDeclHandler start) {
1564 if (parser != NULL)
1565 parser->m_startDoctypeDeclHandler = start;
1566 }
1567
1568 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1569 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1570 if (parser != NULL)
1571 parser->m_endDoctypeDeclHandler = end;
1572 }
1573
1574 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1575 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1576 XML_UnparsedEntityDeclHandler handler) {
1577 if (parser != NULL)
1578 parser->m_unparsedEntityDeclHandler = handler;
1579 }
1580
1581 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1582 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1583 if (parser != NULL)
1584 parser->m_notationDeclHandler = handler;
1585 }
1586
1587 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1588 XML_SetNamespaceDeclHandler(XML_Parser parser,
1589 XML_StartNamespaceDeclHandler start,
1590 XML_EndNamespaceDeclHandler end) {
1591 if (parser == NULL)
1592 return;
1593 parser->m_startNamespaceDeclHandler = start;
1594 parser->m_endNamespaceDeclHandler = end;
1595 }
1596
1597 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1598 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1599 XML_StartNamespaceDeclHandler start) {
1600 if (parser != NULL)
1601 parser->m_startNamespaceDeclHandler = start;
1602 }
1603
1604 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1605 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1606 XML_EndNamespaceDeclHandler end) {
1607 if (parser != NULL)
1608 parser->m_endNamespaceDeclHandler = end;
1609 }
1610
1611 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1612 XML_SetNotStandaloneHandler(XML_Parser parser,
1613 XML_NotStandaloneHandler handler) {
1614 if (parser != NULL)
1615 parser->m_notStandaloneHandler = handler;
1616 }
1617
1618 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1619 XML_SetExternalEntityRefHandler(XML_Parser parser,
1620 XML_ExternalEntityRefHandler handler) {
1621 if (parser != NULL)
1622 parser->m_externalEntityRefHandler = handler;
1623 }
1624
1625 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1626 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1627 if (parser == NULL)
1628 return;
1629 if (arg)
1630 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1631 else
1632 parser->m_externalEntityRefHandlerArg = parser;
1633 }
1634
1635 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1636 XML_SetSkippedEntityHandler(XML_Parser parser,
1637 XML_SkippedEntityHandler handler) {
1638 if (parser != NULL)
1639 parser->m_skippedEntityHandler = handler;
1640 }
1641
1642 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1643 XML_SetUnknownEncodingHandler(XML_Parser parser,
1644 XML_UnknownEncodingHandler handler, void *data) {
1645 if (parser == NULL)
1646 return;
1647 parser->m_unknownEncodingHandler = handler;
1648 parser->m_unknownEncodingHandlerData = data;
1649 }
1650
1651 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1652 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1653 if (parser != NULL)
1654 parser->m_elementDeclHandler = eldecl;
1655 }
1656
1657 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1658 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1659 if (parser != NULL)
1660 parser->m_attlistDeclHandler = attdecl;
1661 }
1662
1663 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1664 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1665 if (parser != NULL)
1666 parser->m_entityDeclHandler = handler;
1667 }
1668
1669 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1670 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1671 if (parser != NULL)
1672 parser->m_xmlDeclHandler = handler;
1673 }
1674
1675 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1676 XML_SetParamEntityParsing(XML_Parser parser,
1677 enum XML_ParamEntityParsing peParsing) {
1678 if (parser == NULL)
1679 return 0;
1680 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1681 if (parser->m_parsingStatus.parsing == XML_PARSING
1682 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1683 return 0;
1684 #ifdef XML_DTD
1685 parser->m_paramEntityParsing = peParsing;
1686 return 1;
1687 #else
1688 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1689 #endif
1690 }
1691
1692 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1693 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1694 if (parser == NULL)
1695 return 0;
1696 if (parser->m_parentParser)
1697 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1698 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1699 if (parser->m_parsingStatus.parsing == XML_PARSING
1700 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1701 return 0;
1702 parser->m_hash_secret_salt = hash_salt;
1703 return 1;
1704 }
1705
1706 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1707 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1708 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1709 if (parser != NULL)
1710 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1711 return XML_STATUS_ERROR;
1712 }
1713 switch (parser->m_parsingStatus.parsing) {
1714 case XML_SUSPENDED:
1715 parser->m_errorCode = XML_ERROR_SUSPENDED;
1716 return XML_STATUS_ERROR;
1717 case XML_FINISHED:
1718 parser->m_errorCode = XML_ERROR_FINISHED;
1719 return XML_STATUS_ERROR;
1720 case XML_INITIALIZED:
1721 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1722 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1723 return XML_STATUS_ERROR;
1724 }
1725 /* fall through */
1726 default:
1727 parser->m_parsingStatus.parsing = XML_PARSING;
1728 }
1729
1730 if (len == 0) {
1731 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1732 if (! isFinal)
1733 return XML_STATUS_OK;
1734 parser->m_positionPtr = parser->m_bufferPtr;
1735 parser->m_parseEndPtr = parser->m_bufferEnd;
1736
1737 /* If data are left over from last buffer, and we now know that these
1738 data are the final chunk of input, then we have to check them again
1739 to detect errors based on that fact.
1740 */
1741 parser->m_errorCode
1742 = parser->m_processor(parser, parser->m_bufferPtr,
1743 parser->m_parseEndPtr, &parser->m_bufferPtr);
1744
1745 if (parser->m_errorCode == XML_ERROR_NONE) {
1746 switch (parser->m_parsingStatus.parsing) {
1747 case XML_SUSPENDED:
1748 /* It is hard to be certain, but it seems that this case
1749 * cannot occur. This code is cleaning up a previous parse
1750 * with no new data (since len == 0). Changing the parsing
1751 * state requires getting to execute a handler function, and
1752 * there doesn't seem to be an opportunity for that while in
1753 * this circumstance.
1754 *
1755 * Given the uncertainty, we retain the code but exclude it
1756 * from coverage tests.
1757 *
1758 * LCOV_EXCL_START
1759 */
1760 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1761 parser->m_bufferPtr, &parser->m_position);
1762 parser->m_positionPtr = parser->m_bufferPtr;
1763 return XML_STATUS_SUSPENDED;
1764 /* LCOV_EXCL_STOP */
1765 case XML_INITIALIZED:
1766 case XML_PARSING:
1767 parser->m_parsingStatus.parsing = XML_FINISHED;
1768 /* fall through */
1769 default:
1770 return XML_STATUS_OK;
1771 }
1772 }
1773 parser->m_eventEndPtr = parser->m_eventPtr;
1774 parser->m_processor = errorProcessor;
1775 return XML_STATUS_ERROR;
1776 }
1777 #ifndef XML_CONTEXT_BYTES
1778 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1779 const char *end;
1780 int nLeftOver;
1781 enum XML_Status result;
1782 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1783 if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1784 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1785 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1786 parser->m_processor = errorProcessor;
1787 return XML_STATUS_ERROR;
1788 }
1789 parser->m_parseEndByteIndex += len;
1790 parser->m_positionPtr = s;
1791 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1792
1793 parser->m_errorCode
1794 = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1795
1796 if (parser->m_errorCode != XML_ERROR_NONE) {
1797 parser->m_eventEndPtr = parser->m_eventPtr;
1798 parser->m_processor = errorProcessor;
1799 return XML_STATUS_ERROR;
1800 } else {
1801 switch (parser->m_parsingStatus.parsing) {
1802 case XML_SUSPENDED:
1803 result = XML_STATUS_SUSPENDED;
1804 break;
1805 case XML_INITIALIZED:
1806 case XML_PARSING:
1807 if (isFinal) {
1808 parser->m_parsingStatus.parsing = XML_FINISHED;
1809 return XML_STATUS_OK;
1810 }
1811 /* fall through */
1812 default:
1813 result = XML_STATUS_OK;
1814 }
1815 }
1816
1817 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1818 &parser->m_position);
1819 nLeftOver = s + len - end;
1820 if (nLeftOver) {
1821 if (parser->m_buffer == NULL
1822 || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1823 /* avoid _signed_ integer overflow */
1824 char *temp = NULL;
1825 const int bytesToAllocate = (int)((unsigned)len * 2U);
1826 if (bytesToAllocate > 0) {
1827 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1828 }
1829 if (temp == NULL) {
1830 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1831 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1832 parser->m_processor = errorProcessor;
1833 return XML_STATUS_ERROR;
1834 }
1835 parser->m_buffer = temp;
1836 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1837 }
1838 memcpy(parser->m_buffer, end, nLeftOver);
1839 }
1840 parser->m_bufferPtr = parser->m_buffer;
1841 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1842 parser->m_positionPtr = parser->m_bufferPtr;
1843 parser->m_parseEndPtr = parser->m_bufferEnd;
1844 parser->m_eventPtr = parser->m_bufferPtr;
1845 parser->m_eventEndPtr = parser->m_bufferPtr;
1846 return result;
1847 }
1848 #endif /* not defined XML_CONTEXT_BYTES */
1849 else {
1850 void *buff = XML_GetBuffer(parser, len);
1851 if (buff == NULL)
1852 return XML_STATUS_ERROR;
1853 else {
1854 memcpy(buff, s, len);
1855 return XML_ParseBuffer(parser, len, isFinal);
1856 }
1857 }
1858 }
1859
1860 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)1861 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
1862 const char *start;
1863 enum XML_Status result = XML_STATUS_OK;
1864
1865 if (parser == NULL)
1866 return XML_STATUS_ERROR;
1867 switch (parser->m_parsingStatus.parsing) {
1868 case XML_SUSPENDED:
1869 parser->m_errorCode = XML_ERROR_SUSPENDED;
1870 return XML_STATUS_ERROR;
1871 case XML_FINISHED:
1872 parser->m_errorCode = XML_ERROR_FINISHED;
1873 return XML_STATUS_ERROR;
1874 case XML_INITIALIZED:
1875 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1876 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1877 return XML_STATUS_ERROR;
1878 }
1879 /* fall through */
1880 default:
1881 parser->m_parsingStatus.parsing = XML_PARSING;
1882 }
1883
1884 start = parser->m_bufferPtr;
1885 parser->m_positionPtr = start;
1886 parser->m_bufferEnd += len;
1887 parser->m_parseEndPtr = parser->m_bufferEnd;
1888 parser->m_parseEndByteIndex += len;
1889 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1890
1891 parser->m_errorCode = parser->m_processor(
1892 parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
1893
1894 if (parser->m_errorCode != XML_ERROR_NONE) {
1895 parser->m_eventEndPtr = parser->m_eventPtr;
1896 parser->m_processor = errorProcessor;
1897 return XML_STATUS_ERROR;
1898 } else {
1899 switch (parser->m_parsingStatus.parsing) {
1900 case XML_SUSPENDED:
1901 result = XML_STATUS_SUSPENDED;
1902 break;
1903 case XML_INITIALIZED:
1904 case XML_PARSING:
1905 if (isFinal) {
1906 parser->m_parsingStatus.parsing = XML_FINISHED;
1907 return result;
1908 }
1909 default:; /* should not happen */
1910 }
1911 }
1912
1913 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1914 parser->m_bufferPtr, &parser->m_position);
1915 parser->m_positionPtr = parser->m_bufferPtr;
1916 return result;
1917 }
1918
1919 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)1920 XML_GetBuffer(XML_Parser parser, int len) {
1921 if (parser == NULL)
1922 return NULL;
1923 if (len < 0) {
1924 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1925 return NULL;
1926 }
1927 switch (parser->m_parsingStatus.parsing) {
1928 case XML_SUSPENDED:
1929 parser->m_errorCode = XML_ERROR_SUSPENDED;
1930 return NULL;
1931 case XML_FINISHED:
1932 parser->m_errorCode = XML_ERROR_FINISHED;
1933 return NULL;
1934 default:;
1935 }
1936
1937 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
1938 #ifdef XML_CONTEXT_BYTES
1939 int keep;
1940 #endif /* defined XML_CONTEXT_BYTES */
1941 /* Do not invoke signed arithmetic overflow: */
1942 int neededSize = (int)((unsigned)len
1943 + (unsigned)EXPAT_SAFE_PTR_DIFF(
1944 parser->m_bufferEnd, parser->m_bufferPtr));
1945 if (neededSize < 0) {
1946 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1947 return NULL;
1948 }
1949 #ifdef XML_CONTEXT_BYTES
1950 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1951 if (keep > XML_CONTEXT_BYTES)
1952 keep = XML_CONTEXT_BYTES;
1953 neededSize += keep;
1954 #endif /* defined XML_CONTEXT_BYTES */
1955 if (neededSize
1956 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
1957 #ifdef XML_CONTEXT_BYTES
1958 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
1959 int offset
1960 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
1961 - keep;
1962 /* The buffer pointers cannot be NULL here; we have at least some bytes
1963 * in the buffer */
1964 memmove(parser->m_buffer, &parser->m_buffer[offset],
1965 parser->m_bufferEnd - parser->m_bufferPtr + keep);
1966 parser->m_bufferEnd -= offset;
1967 parser->m_bufferPtr -= offset;
1968 }
1969 #else
1970 if (parser->m_buffer && parser->m_bufferPtr) {
1971 memmove(parser->m_buffer, parser->m_bufferPtr,
1972 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
1973 parser->m_bufferEnd
1974 = parser->m_buffer
1975 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
1976 parser->m_bufferPtr = parser->m_buffer;
1977 }
1978 #endif /* not defined XML_CONTEXT_BYTES */
1979 } else {
1980 char *newBuf;
1981 int bufferSize
1982 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
1983 if (bufferSize == 0)
1984 bufferSize = INIT_BUFFER_SIZE;
1985 do {
1986 /* Do not invoke signed arithmetic overflow: */
1987 bufferSize = (int)(2U * (unsigned)bufferSize);
1988 } while (bufferSize < neededSize && bufferSize > 0);
1989 if (bufferSize <= 0) {
1990 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1991 return NULL;
1992 }
1993 newBuf = (char *)MALLOC(parser, bufferSize);
1994 if (newBuf == 0) {
1995 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1996 return NULL;
1997 }
1998 parser->m_bufferLim = newBuf + bufferSize;
1999 #ifdef XML_CONTEXT_BYTES
2000 if (parser->m_bufferPtr) {
2001 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2002 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2003 + keep);
2004 FREE(parser, parser->m_buffer);
2005 parser->m_buffer = newBuf;
2006 parser->m_bufferEnd
2007 = parser->m_buffer
2008 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2009 + keep;
2010 parser->m_bufferPtr = parser->m_buffer + keep;
2011 } else {
2012 /* This must be a brand new buffer with no data in it yet */
2013 parser->m_bufferEnd = newBuf;
2014 parser->m_bufferPtr = parser->m_buffer = newBuf;
2015 }
2016 #else
2017 if (parser->m_bufferPtr) {
2018 memcpy(newBuf, parser->m_bufferPtr,
2019 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2020 FREE(parser, parser->m_buffer);
2021 parser->m_bufferEnd
2022 = newBuf
2023 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2024 } else {
2025 /* This must be a brand new buffer with no data in it yet */
2026 parser->m_bufferEnd = newBuf;
2027 }
2028 parser->m_bufferPtr = parser->m_buffer = newBuf;
2029 #endif /* not defined XML_CONTEXT_BYTES */
2030 }
2031 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2032 parser->m_positionPtr = NULL;
2033 }
2034 return parser->m_bufferEnd;
2035 }
2036
2037 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2038 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2039 if (parser == NULL)
2040 return XML_STATUS_ERROR;
2041 switch (parser->m_parsingStatus.parsing) {
2042 case XML_SUSPENDED:
2043 if (resumable) {
2044 parser->m_errorCode = XML_ERROR_SUSPENDED;
2045 return XML_STATUS_ERROR;
2046 }
2047 parser->m_parsingStatus.parsing = XML_FINISHED;
2048 break;
2049 case XML_FINISHED:
2050 parser->m_errorCode = XML_ERROR_FINISHED;
2051 return XML_STATUS_ERROR;
2052 default:
2053 if (resumable) {
2054 #ifdef XML_DTD
2055 if (parser->m_isParamEntity) {
2056 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2057 return XML_STATUS_ERROR;
2058 }
2059 #endif
2060 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2061 } else
2062 parser->m_parsingStatus.parsing = XML_FINISHED;
2063 }
2064 return XML_STATUS_OK;
2065 }
2066
2067 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2068 XML_ResumeParser(XML_Parser parser) {
2069 enum XML_Status result = XML_STATUS_OK;
2070
2071 if (parser == NULL)
2072 return XML_STATUS_ERROR;
2073 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2074 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2075 return XML_STATUS_ERROR;
2076 }
2077 parser->m_parsingStatus.parsing = XML_PARSING;
2078
2079 parser->m_errorCode = parser->m_processor(
2080 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2081
2082 if (parser->m_errorCode != XML_ERROR_NONE) {
2083 parser->m_eventEndPtr = parser->m_eventPtr;
2084 parser->m_processor = errorProcessor;
2085 return XML_STATUS_ERROR;
2086 } else {
2087 switch (parser->m_parsingStatus.parsing) {
2088 case XML_SUSPENDED:
2089 result = XML_STATUS_SUSPENDED;
2090 break;
2091 case XML_INITIALIZED:
2092 case XML_PARSING:
2093 if (parser->m_parsingStatus.finalBuffer) {
2094 parser->m_parsingStatus.parsing = XML_FINISHED;
2095 return result;
2096 }
2097 default:;
2098 }
2099 }
2100
2101 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2102 parser->m_bufferPtr, &parser->m_position);
2103 parser->m_positionPtr = parser->m_bufferPtr;
2104 return result;
2105 }
2106
2107 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2108 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2109 if (parser == NULL)
2110 return;
2111 assert(status != NULL);
2112 *status = parser->m_parsingStatus;
2113 }
2114
2115 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2116 XML_GetErrorCode(XML_Parser parser) {
2117 if (parser == NULL)
2118 return XML_ERROR_INVALID_ARGUMENT;
2119 return parser->m_errorCode;
2120 }
2121
2122 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2123 XML_GetCurrentByteIndex(XML_Parser parser) {
2124 if (parser == NULL)
2125 return -1;
2126 if (parser->m_eventPtr)
2127 return (XML_Index)(parser->m_parseEndByteIndex
2128 - (parser->m_parseEndPtr - parser->m_eventPtr));
2129 return -1;
2130 }
2131
2132 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2133 XML_GetCurrentByteCount(XML_Parser parser) {
2134 if (parser == NULL)
2135 return 0;
2136 if (parser->m_eventEndPtr && parser->m_eventPtr)
2137 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2138 return 0;
2139 }
2140
2141 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2142 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2143 #ifdef XML_CONTEXT_BYTES
2144 if (parser == NULL)
2145 return NULL;
2146 if (parser->m_eventPtr && parser->m_buffer) {
2147 if (offset != NULL)
2148 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2149 if (size != NULL)
2150 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2151 return parser->m_buffer;
2152 }
2153 #else
2154 (void)parser;
2155 (void)offset;
2156 (void)size;
2157 #endif /* defined XML_CONTEXT_BYTES */
2158 return (char *)0;
2159 }
2160
2161 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2162 XML_GetCurrentLineNumber(XML_Parser parser) {
2163 if (parser == NULL)
2164 return 0;
2165 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2166 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2167 parser->m_eventPtr, &parser->m_position);
2168 parser->m_positionPtr = parser->m_eventPtr;
2169 }
2170 return parser->m_position.lineNumber + 1;
2171 }
2172
2173 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2174 XML_GetCurrentColumnNumber(XML_Parser parser) {
2175 if (parser == NULL)
2176 return 0;
2177 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2178 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2179 parser->m_eventPtr, &parser->m_position);
2180 parser->m_positionPtr = parser->m_eventPtr;
2181 }
2182 return parser->m_position.columnNumber;
2183 }
2184
2185 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2186 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2187 if (parser != NULL)
2188 FREE(parser, model);
2189 }
2190
2191 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2192 XML_MemMalloc(XML_Parser parser, size_t size) {
2193 if (parser == NULL)
2194 return NULL;
2195 return MALLOC(parser, size);
2196 }
2197
2198 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2199 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2200 if (parser == NULL)
2201 return NULL;
2202 return REALLOC(parser, ptr, size);
2203 }
2204
2205 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2206 XML_MemFree(XML_Parser parser, void *ptr) {
2207 if (parser != NULL)
2208 FREE(parser, ptr);
2209 }
2210
2211 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2212 XML_DefaultCurrent(XML_Parser parser) {
2213 if (parser == NULL)
2214 return;
2215 if (parser->m_defaultHandler) {
2216 if (parser->m_openInternalEntities)
2217 reportDefault(parser, parser->m_internalEncoding,
2218 parser->m_openInternalEntities->internalEventPtr,
2219 parser->m_openInternalEntities->internalEventEndPtr);
2220 else
2221 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2222 parser->m_eventEndPtr);
2223 }
2224 }
2225
2226 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2227 XML_ErrorString(enum XML_Error code) {
2228 switch (code) {
2229 case XML_ERROR_NONE:
2230 return NULL;
2231 case XML_ERROR_NO_MEMORY:
2232 return XML_L("out of memory");
2233 case XML_ERROR_SYNTAX:
2234 return XML_L("syntax error");
2235 case XML_ERROR_NO_ELEMENTS:
2236 return XML_L("no element found");
2237 case XML_ERROR_INVALID_TOKEN:
2238 return XML_L("not well-formed (invalid token)");
2239 case XML_ERROR_UNCLOSED_TOKEN:
2240 return XML_L("unclosed token");
2241 case XML_ERROR_PARTIAL_CHAR:
2242 return XML_L("partial character");
2243 case XML_ERROR_TAG_MISMATCH:
2244 return XML_L("mismatched tag");
2245 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2246 return XML_L("duplicate attribute");
2247 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2248 return XML_L("junk after document element");
2249 case XML_ERROR_PARAM_ENTITY_REF:
2250 return XML_L("illegal parameter entity reference");
2251 case XML_ERROR_UNDEFINED_ENTITY:
2252 return XML_L("undefined entity");
2253 case XML_ERROR_RECURSIVE_ENTITY_REF:
2254 return XML_L("recursive entity reference");
2255 case XML_ERROR_ASYNC_ENTITY:
2256 return XML_L("asynchronous entity");
2257 case XML_ERROR_BAD_CHAR_REF:
2258 return XML_L("reference to invalid character number");
2259 case XML_ERROR_BINARY_ENTITY_REF:
2260 return XML_L("reference to binary entity");
2261 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2262 return XML_L("reference to external entity in attribute");
2263 case XML_ERROR_MISPLACED_XML_PI:
2264 return XML_L("XML or text declaration not at start of entity");
2265 case XML_ERROR_UNKNOWN_ENCODING:
2266 return XML_L("unknown encoding");
2267 case XML_ERROR_INCORRECT_ENCODING:
2268 return XML_L("encoding specified in XML declaration is incorrect");
2269 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2270 return XML_L("unclosed CDATA section");
2271 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2272 return XML_L("error in processing external entity reference");
2273 case XML_ERROR_NOT_STANDALONE:
2274 return XML_L("document is not standalone");
2275 case XML_ERROR_UNEXPECTED_STATE:
2276 return XML_L("unexpected parser state - please send a bug report");
2277 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2278 return XML_L("entity declared in parameter entity");
2279 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2280 return XML_L("requested feature requires XML_DTD support in Expat");
2281 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2282 return XML_L("cannot change setting once parsing has begun");
2283 /* Added in 1.95.7. */
2284 case XML_ERROR_UNBOUND_PREFIX:
2285 return XML_L("unbound prefix");
2286 /* Added in 1.95.8. */
2287 case XML_ERROR_UNDECLARING_PREFIX:
2288 return XML_L("must not undeclare prefix");
2289 case XML_ERROR_INCOMPLETE_PE:
2290 return XML_L("incomplete markup in parameter entity");
2291 case XML_ERROR_XML_DECL:
2292 return XML_L("XML declaration not well-formed");
2293 case XML_ERROR_TEXT_DECL:
2294 return XML_L("text declaration not well-formed");
2295 case XML_ERROR_PUBLICID:
2296 return XML_L("illegal character(s) in public id");
2297 case XML_ERROR_SUSPENDED:
2298 return XML_L("parser suspended");
2299 case XML_ERROR_NOT_SUSPENDED:
2300 return XML_L("parser not suspended");
2301 case XML_ERROR_ABORTED:
2302 return XML_L("parsing aborted");
2303 case XML_ERROR_FINISHED:
2304 return XML_L("parsing finished");
2305 case XML_ERROR_SUSPEND_PE:
2306 return XML_L("cannot suspend in external parameter entity");
2307 /* Added in 2.0.0. */
2308 case XML_ERROR_RESERVED_PREFIX_XML:
2309 return XML_L(
2310 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2311 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2312 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2313 case XML_ERROR_RESERVED_NAMESPACE_URI:
2314 return XML_L(
2315 "prefix must not be bound to one of the reserved namespace names");
2316 /* Added in 2.2.5. */
2317 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2318 return XML_L("invalid argument");
2319 }
2320 return NULL;
2321 }
2322
2323 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2324 XML_ExpatVersion(void) {
2325 /* V1 is used to string-ize the version number. However, it would
2326 string-ize the actual version macro *names* unless we get them
2327 substituted before being passed to V1. CPP is defined to expand
2328 a macro, then rescan for more expansions. Thus, we use V2 to expand
2329 the version macros, then CPP will expand the resulting V1() macro
2330 with the correct numerals. */
2331 /* ### I'm assuming cpp is portable in this respect... */
2332
2333 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2334 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2335
2336 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2337
2338 #undef V1
2339 #undef V2
2340 }
2341
2342 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2343 XML_ExpatVersionInfo(void) {
2344 XML_Expat_Version version;
2345
2346 version.major = XML_MAJOR_VERSION;
2347 version.minor = XML_MINOR_VERSION;
2348 version.micro = XML_MICRO_VERSION;
2349
2350 return version;
2351 }
2352
2353 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2354 XML_GetFeatureList(void) {
2355 static const XML_Feature features[]
2356 = {{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2357 sizeof(XML_Char)},
2358 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2359 sizeof(XML_LChar)},
2360 #ifdef XML_UNICODE
2361 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2362 #endif
2363 #ifdef XML_UNICODE_WCHAR_T
2364 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2365 #endif
2366 #ifdef XML_DTD
2367 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2368 #endif
2369 #ifdef XML_CONTEXT_BYTES
2370 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2371 XML_CONTEXT_BYTES},
2372 #endif
2373 #ifdef XML_MIN_SIZE
2374 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2375 #endif
2376 #ifdef XML_NS
2377 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2378 #endif
2379 #ifdef XML_LARGE_SIZE
2380 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2381 #endif
2382 #ifdef XML_ATTR_INFO
2383 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2384 #endif
2385 {XML_FEATURE_END, NULL, 0}};
2386
2387 return features;
2388 }
2389
2390 /* Initially tag->rawName always points into the parse buffer;
2391 for those TAG instances opened while the current parse buffer was
2392 processed, and not yet closed, we need to store tag->rawName in a more
2393 permanent location, since the parse buffer is about to be discarded.
2394 */
2395 static XML_Bool
storeRawNames(XML_Parser parser)2396 storeRawNames(XML_Parser parser) {
2397 TAG *tag = parser->m_tagStack;
2398 while (tag) {
2399 int bufSize;
2400 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2401 char *rawNameBuf = tag->buf + nameLen;
2402 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2403 at the first entry that has already been copied; everything
2404 below it in the stack is already been accounted for in a
2405 previous call to this function.
2406 */
2407 if (tag->rawName == rawNameBuf)
2408 break;
2409 /* For re-use purposes we need to ensure that the
2410 size of tag->buf is a multiple of sizeof(XML_Char).
2411 */
2412 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2413 if (bufSize > tag->bufEnd - tag->buf) {
2414 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2415 if (temp == NULL)
2416 return XML_FALSE;
2417 /* if tag->name.str points to tag->buf (only when namespace
2418 processing is off) then we have to update it
2419 */
2420 if (tag->name.str == (XML_Char *)tag->buf)
2421 tag->name.str = (XML_Char *)temp;
2422 /* if tag->name.localPart is set (when namespace processing is on)
2423 then update it as well, since it will always point into tag->buf
2424 */
2425 if (tag->name.localPart)
2426 tag->name.localPart
2427 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2428 tag->buf = temp;
2429 tag->bufEnd = temp + bufSize;
2430 rawNameBuf = temp + nameLen;
2431 }
2432 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2433 tag->rawName = rawNameBuf;
2434 tag = tag->parent;
2435 }
2436 return XML_TRUE;
2437 }
2438
2439 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2440 contentProcessor(XML_Parser parser, const char *start, const char *end,
2441 const char **endPtr) {
2442 enum XML_Error result
2443 = doContent(parser, 0, parser->m_encoding, start, end, endPtr,
2444 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
2445 if (result == XML_ERROR_NONE) {
2446 if (! storeRawNames(parser))
2447 return XML_ERROR_NO_MEMORY;
2448 }
2449 return result;
2450 }
2451
2452 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2453 externalEntityInitProcessor(XML_Parser parser, const char *start,
2454 const char *end, const char **endPtr) {
2455 enum XML_Error result = initializeEncoding(parser);
2456 if (result != XML_ERROR_NONE)
2457 return result;
2458 parser->m_processor = externalEntityInitProcessor2;
2459 return externalEntityInitProcessor2(parser, start, end, endPtr);
2460 }
2461
2462 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2463 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2464 const char *end, const char **endPtr) {
2465 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2466 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2467 switch (tok) {
2468 case XML_TOK_BOM:
2469 /* If we are at the end of the buffer, this would cause the next stage,
2470 i.e. externalEntityInitProcessor3, to pass control directly to
2471 doContent (by detecting XML_TOK_NONE) without processing any xml text
2472 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2473 */
2474 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2475 *endPtr = next;
2476 return XML_ERROR_NONE;
2477 }
2478 start = next;
2479 break;
2480 case XML_TOK_PARTIAL:
2481 if (! parser->m_parsingStatus.finalBuffer) {
2482 *endPtr = start;
2483 return XML_ERROR_NONE;
2484 }
2485 parser->m_eventPtr = start;
2486 return XML_ERROR_UNCLOSED_TOKEN;
2487 case XML_TOK_PARTIAL_CHAR:
2488 if (! parser->m_parsingStatus.finalBuffer) {
2489 *endPtr = start;
2490 return XML_ERROR_NONE;
2491 }
2492 parser->m_eventPtr = start;
2493 return XML_ERROR_PARTIAL_CHAR;
2494 }
2495 parser->m_processor = externalEntityInitProcessor3;
2496 return externalEntityInitProcessor3(parser, start, end, endPtr);
2497 }
2498
2499 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2500 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2501 const char *end, const char **endPtr) {
2502 int tok;
2503 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2504 parser->m_eventPtr = start;
2505 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2506 parser->m_eventEndPtr = next;
2507
2508 switch (tok) {
2509 case XML_TOK_XML_DECL: {
2510 enum XML_Error result;
2511 result = processXmlDecl(parser, 1, start, next);
2512 if (result != XML_ERROR_NONE)
2513 return result;
2514 switch (parser->m_parsingStatus.parsing) {
2515 case XML_SUSPENDED:
2516 *endPtr = next;
2517 return XML_ERROR_NONE;
2518 case XML_FINISHED:
2519 return XML_ERROR_ABORTED;
2520 default:
2521 start = next;
2522 }
2523 } break;
2524 case XML_TOK_PARTIAL:
2525 if (! parser->m_parsingStatus.finalBuffer) {
2526 *endPtr = start;
2527 return XML_ERROR_NONE;
2528 }
2529 return XML_ERROR_UNCLOSED_TOKEN;
2530 case XML_TOK_PARTIAL_CHAR:
2531 if (! parser->m_parsingStatus.finalBuffer) {
2532 *endPtr = start;
2533 return XML_ERROR_NONE;
2534 }
2535 return XML_ERROR_PARTIAL_CHAR;
2536 }
2537 parser->m_processor = externalEntityContentProcessor;
2538 parser->m_tagLevel = 1;
2539 return externalEntityContentProcessor(parser, start, end, endPtr);
2540 }
2541
2542 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2543 externalEntityContentProcessor(XML_Parser parser, const char *start,
2544 const char *end, const char **endPtr) {
2545 enum XML_Error result
2546 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2547 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
2548 if (result == XML_ERROR_NONE) {
2549 if (! storeRawNames(parser))
2550 return XML_ERROR_NO_MEMORY;
2551 }
2552 return result;
2553 }
2554
2555 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore)2556 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2557 const char *s, const char *end, const char **nextPtr,
2558 XML_Bool haveMore) {
2559 /* save one level of indirection */
2560 DTD *const dtd = parser->m_dtd;
2561
2562 const char **eventPP;
2563 const char **eventEndPP;
2564 if (enc == parser->m_encoding) {
2565 eventPP = &parser->m_eventPtr;
2566 eventEndPP = &parser->m_eventEndPtr;
2567 } else {
2568 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2569 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2570 }
2571 *eventPP = s;
2572
2573 for (;;) {
2574 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2575 int tok = XmlContentTok(enc, s, end, &next);
2576 *eventEndPP = next;
2577 switch (tok) {
2578 case XML_TOK_TRAILING_CR:
2579 if (haveMore) {
2580 *nextPtr = s;
2581 return XML_ERROR_NONE;
2582 }
2583 *eventEndPP = end;
2584 if (parser->m_characterDataHandler) {
2585 XML_Char c = 0xA;
2586 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2587 } else if (parser->m_defaultHandler)
2588 reportDefault(parser, enc, s, end);
2589 /* We are at the end of the final buffer, should we check for
2590 XML_SUSPENDED, XML_FINISHED?
2591 */
2592 if (startTagLevel == 0)
2593 return XML_ERROR_NO_ELEMENTS;
2594 if (parser->m_tagLevel != startTagLevel)
2595 return XML_ERROR_ASYNC_ENTITY;
2596 *nextPtr = end;
2597 return XML_ERROR_NONE;
2598 case XML_TOK_NONE:
2599 if (haveMore) {
2600 *nextPtr = s;
2601 return XML_ERROR_NONE;
2602 }
2603 if (startTagLevel > 0) {
2604 if (parser->m_tagLevel != startTagLevel)
2605 return XML_ERROR_ASYNC_ENTITY;
2606 *nextPtr = s;
2607 return XML_ERROR_NONE;
2608 }
2609 return XML_ERROR_NO_ELEMENTS;
2610 case XML_TOK_INVALID:
2611 *eventPP = next;
2612 return XML_ERROR_INVALID_TOKEN;
2613 case XML_TOK_PARTIAL:
2614 if (haveMore) {
2615 *nextPtr = s;
2616 return XML_ERROR_NONE;
2617 }
2618 return XML_ERROR_UNCLOSED_TOKEN;
2619 case XML_TOK_PARTIAL_CHAR:
2620 if (haveMore) {
2621 *nextPtr = s;
2622 return XML_ERROR_NONE;
2623 }
2624 return XML_ERROR_PARTIAL_CHAR;
2625 case XML_TOK_ENTITY_REF: {
2626 const XML_Char *name;
2627 ENTITY *entity;
2628 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2629 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2630 if (ch) {
2631 if (parser->m_characterDataHandler)
2632 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2633 else if (parser->m_defaultHandler)
2634 reportDefault(parser, enc, s, next);
2635 break;
2636 }
2637 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2638 next - enc->minBytesPerChar);
2639 if (! name)
2640 return XML_ERROR_NO_MEMORY;
2641 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2642 poolDiscard(&dtd->pool);
2643 /* First, determine if a check for an existing declaration is needed;
2644 if yes, check that the entity exists, and that it is internal,
2645 otherwise call the skipped entity or default handler.
2646 */
2647 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2648 if (! entity)
2649 return XML_ERROR_UNDEFINED_ENTITY;
2650 else if (! entity->is_internal)
2651 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2652 } else if (! entity) {
2653 if (parser->m_skippedEntityHandler)
2654 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2655 else if (parser->m_defaultHandler)
2656 reportDefault(parser, enc, s, next);
2657 break;
2658 }
2659 if (entity->open)
2660 return XML_ERROR_RECURSIVE_ENTITY_REF;
2661 if (entity->notation)
2662 return XML_ERROR_BINARY_ENTITY_REF;
2663 if (entity->textPtr) {
2664 enum XML_Error result;
2665 if (! parser->m_defaultExpandInternalEntities) {
2666 if (parser->m_skippedEntityHandler)
2667 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2668 0);
2669 else if (parser->m_defaultHandler)
2670 reportDefault(parser, enc, s, next);
2671 break;
2672 }
2673 result = processInternalEntity(parser, entity, XML_FALSE);
2674 if (result != XML_ERROR_NONE)
2675 return result;
2676 } else if (parser->m_externalEntityRefHandler) {
2677 const XML_Char *context;
2678 entity->open = XML_TRUE;
2679 context = getContext(parser);
2680 entity->open = XML_FALSE;
2681 if (! context)
2682 return XML_ERROR_NO_MEMORY;
2683 if (! parser->m_externalEntityRefHandler(
2684 parser->m_externalEntityRefHandlerArg, context, entity->base,
2685 entity->systemId, entity->publicId))
2686 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2687 poolDiscard(&parser->m_tempPool);
2688 } else if (parser->m_defaultHandler)
2689 reportDefault(parser, enc, s, next);
2690 break;
2691 }
2692 case XML_TOK_START_TAG_NO_ATTS:
2693 /* fall through */
2694 case XML_TOK_START_TAG_WITH_ATTS: {
2695 TAG *tag;
2696 enum XML_Error result;
2697 XML_Char *toPtr;
2698 if (parser->m_freeTagList) {
2699 tag = parser->m_freeTagList;
2700 parser->m_freeTagList = parser->m_freeTagList->parent;
2701 } else {
2702 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2703 if (! tag)
2704 return XML_ERROR_NO_MEMORY;
2705 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2706 if (! tag->buf) {
2707 FREE(parser, tag);
2708 return XML_ERROR_NO_MEMORY;
2709 }
2710 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2711 }
2712 tag->bindings = NULL;
2713 tag->parent = parser->m_tagStack;
2714 parser->m_tagStack = tag;
2715 tag->name.localPart = NULL;
2716 tag->name.prefix = NULL;
2717 tag->rawName = s + enc->minBytesPerChar;
2718 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2719 ++parser->m_tagLevel;
2720 {
2721 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2722 const char *fromPtr = tag->rawName;
2723 toPtr = (XML_Char *)tag->buf;
2724 for (;;) {
2725 int bufSize;
2726 int convLen;
2727 const enum XML_Convert_Result convert_res
2728 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
2729 (ICHAR *)tag->bufEnd - 1);
2730 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2731 if ((fromPtr >= rawNameEnd)
2732 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2733 tag->name.strLen = convLen;
2734 break;
2735 }
2736 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2737 {
2738 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2739 if (temp == NULL)
2740 return XML_ERROR_NO_MEMORY;
2741 tag->buf = temp;
2742 tag->bufEnd = temp + bufSize;
2743 toPtr = (XML_Char *)temp + convLen;
2744 }
2745 }
2746 }
2747 tag->name.str = (XML_Char *)tag->buf;
2748 *toPtr = XML_T('\0');
2749 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2750 if (result)
2751 return result;
2752 if (parser->m_startElementHandler)
2753 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2754 (const XML_Char **)parser->m_atts);
2755 else if (parser->m_defaultHandler)
2756 reportDefault(parser, enc, s, next);
2757 poolClear(&parser->m_tempPool);
2758 break;
2759 }
2760 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2761 /* fall through */
2762 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
2763 const char *rawName = s + enc->minBytesPerChar;
2764 enum XML_Error result;
2765 BINDING *bindings = NULL;
2766 XML_Bool noElmHandlers = XML_TRUE;
2767 TAG_NAME name;
2768 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2769 rawName + XmlNameLength(enc, rawName));
2770 if (! name.str)
2771 return XML_ERROR_NO_MEMORY;
2772 poolFinish(&parser->m_tempPool);
2773 result = storeAtts(parser, enc, s, &name, &bindings);
2774 if (result != XML_ERROR_NONE) {
2775 freeBindings(parser, bindings);
2776 return result;
2777 }
2778 poolFinish(&parser->m_tempPool);
2779 if (parser->m_startElementHandler) {
2780 parser->m_startElementHandler(parser->m_handlerArg, name.str,
2781 (const XML_Char **)parser->m_atts);
2782 noElmHandlers = XML_FALSE;
2783 }
2784 if (parser->m_endElementHandler) {
2785 if (parser->m_startElementHandler)
2786 *eventPP = *eventEndPP;
2787 parser->m_endElementHandler(parser->m_handlerArg, name.str);
2788 noElmHandlers = XML_FALSE;
2789 }
2790 if (noElmHandlers && parser->m_defaultHandler)
2791 reportDefault(parser, enc, s, next);
2792 poolClear(&parser->m_tempPool);
2793 freeBindings(parser, bindings);
2794 }
2795 if ((parser->m_tagLevel == 0)
2796 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
2797 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
2798 parser->m_processor = epilogProcessor;
2799 else
2800 return epilogProcessor(parser, next, end, nextPtr);
2801 }
2802 break;
2803 case XML_TOK_END_TAG:
2804 if (parser->m_tagLevel == startTagLevel)
2805 return XML_ERROR_ASYNC_ENTITY;
2806 else {
2807 int len;
2808 const char *rawName;
2809 TAG *tag = parser->m_tagStack;
2810 parser->m_tagStack = tag->parent;
2811 tag->parent = parser->m_freeTagList;
2812 parser->m_freeTagList = tag;
2813 rawName = s + enc->minBytesPerChar * 2;
2814 len = XmlNameLength(enc, rawName);
2815 if (len != tag->rawNameLength
2816 || memcmp(tag->rawName, rawName, len) != 0) {
2817 *eventPP = rawName;
2818 return XML_ERROR_TAG_MISMATCH;
2819 }
2820 --parser->m_tagLevel;
2821 if (parser->m_endElementHandler) {
2822 const XML_Char *localPart;
2823 const XML_Char *prefix;
2824 XML_Char *uri;
2825 localPart = tag->name.localPart;
2826 if (parser->m_ns && localPart) {
2827 /* localPart and prefix may have been overwritten in
2828 tag->name.str, since this points to the binding->uri
2829 buffer which gets re-used; so we have to add them again
2830 */
2831 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2832 /* don't need to check for space - already done in storeAtts() */
2833 while (*localPart)
2834 *uri++ = *localPart++;
2835 prefix = (XML_Char *)tag->name.prefix;
2836 if (parser->m_ns_triplets && prefix) {
2837 *uri++ = parser->m_namespaceSeparator;
2838 while (*prefix)
2839 *uri++ = *prefix++;
2840 }
2841 *uri = XML_T('\0');
2842 }
2843 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
2844 } else if (parser->m_defaultHandler)
2845 reportDefault(parser, enc, s, next);
2846 while (tag->bindings) {
2847 BINDING *b = tag->bindings;
2848 if (parser->m_endNamespaceDeclHandler)
2849 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
2850 b->prefix->name);
2851 tag->bindings = tag->bindings->nextTagBinding;
2852 b->nextTagBinding = parser->m_freeBindingList;
2853 parser->m_freeBindingList = b;
2854 b->prefix->binding = b->prevPrefixBinding;
2855 }
2856 if ((parser->m_tagLevel == 0)
2857 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
2858 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
2859 parser->m_processor = epilogProcessor;
2860 else
2861 return epilogProcessor(parser, next, end, nextPtr);
2862 }
2863 }
2864 break;
2865 case XML_TOK_CHAR_REF: {
2866 int n = XmlCharRefNumber(enc, s);
2867 if (n < 0)
2868 return XML_ERROR_BAD_CHAR_REF;
2869 if (parser->m_characterDataHandler) {
2870 XML_Char buf[XML_ENCODE_MAX];
2871 parser->m_characterDataHandler(parser->m_handlerArg, buf,
2872 XmlEncode(n, (ICHAR *)buf));
2873 } else if (parser->m_defaultHandler)
2874 reportDefault(parser, enc, s, next);
2875 } break;
2876 case XML_TOK_XML_DECL:
2877 return XML_ERROR_MISPLACED_XML_PI;
2878 case XML_TOK_DATA_NEWLINE:
2879 if (parser->m_characterDataHandler) {
2880 XML_Char c = 0xA;
2881 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2882 } else if (parser->m_defaultHandler)
2883 reportDefault(parser, enc, s, next);
2884 break;
2885 case XML_TOK_CDATA_SECT_OPEN: {
2886 enum XML_Error result;
2887 if (parser->m_startCdataSectionHandler)
2888 parser->m_startCdataSectionHandler(parser->m_handlerArg);
2889 /* BEGIN disabled code */
2890 /* Suppose you doing a transformation on a document that involves
2891 changing only the character data. You set up a defaultHandler
2892 and a characterDataHandler. The defaultHandler simply copies
2893 characters through. The characterDataHandler does the
2894 transformation and writes the characters out escaping them as
2895 necessary. This case will fail to work if we leave out the
2896 following two lines (because & and < inside CDATA sections will
2897 be incorrectly escaped).
2898
2899 However, now we have a start/endCdataSectionHandler, so it seems
2900 easier to let the user deal with this.
2901 */
2902 else if (0 && parser->m_characterDataHandler)
2903 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
2904 0);
2905 /* END disabled code */
2906 else if (parser->m_defaultHandler)
2907 reportDefault(parser, enc, s, next);
2908 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
2909 if (result != XML_ERROR_NONE)
2910 return result;
2911 else if (! next) {
2912 parser->m_processor = cdataSectionProcessor;
2913 return result;
2914 }
2915 } break;
2916 case XML_TOK_TRAILING_RSQB:
2917 if (haveMore) {
2918 *nextPtr = s;
2919 return XML_ERROR_NONE;
2920 }
2921 if (parser->m_characterDataHandler) {
2922 if (MUST_CONVERT(enc, s)) {
2923 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
2924 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
2925 parser->m_characterDataHandler(
2926 parser->m_handlerArg, parser->m_dataBuf,
2927 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
2928 } else
2929 parser->m_characterDataHandler(
2930 parser->m_handlerArg, (XML_Char *)s,
2931 (int)((XML_Char *)end - (XML_Char *)s));
2932 } else if (parser->m_defaultHandler)
2933 reportDefault(parser, enc, s, end);
2934 /* We are at the end of the final buffer, should we check for
2935 XML_SUSPENDED, XML_FINISHED?
2936 */
2937 if (startTagLevel == 0) {
2938 *eventPP = end;
2939 return XML_ERROR_NO_ELEMENTS;
2940 }
2941 if (parser->m_tagLevel != startTagLevel) {
2942 *eventPP = end;
2943 return XML_ERROR_ASYNC_ENTITY;
2944 }
2945 *nextPtr = end;
2946 return XML_ERROR_NONE;
2947 case XML_TOK_DATA_CHARS: {
2948 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
2949 if (charDataHandler) {
2950 if (MUST_CONVERT(enc, s)) {
2951 for (;;) {
2952 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
2953 const enum XML_Convert_Result convert_res = XmlConvert(
2954 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
2955 *eventEndPP = s;
2956 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
2957 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
2958 if ((convert_res == XML_CONVERT_COMPLETED)
2959 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
2960 break;
2961 *eventPP = s;
2962 }
2963 } else
2964 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
2965 (int)((XML_Char *)next - (XML_Char *)s));
2966 } else if (parser->m_defaultHandler)
2967 reportDefault(parser, enc, s, next);
2968 } break;
2969 case XML_TOK_PI:
2970 if (! reportProcessingInstruction(parser, enc, s, next))
2971 return XML_ERROR_NO_MEMORY;
2972 break;
2973 case XML_TOK_COMMENT:
2974 if (! reportComment(parser, enc, s, next))
2975 return XML_ERROR_NO_MEMORY;
2976 break;
2977 default:
2978 /* All of the tokens produced by XmlContentTok() have their own
2979 * explicit cases, so this default is not strictly necessary.
2980 * However it is a useful safety net, so we retain the code and
2981 * simply exclude it from the coverage tests.
2982 *
2983 * LCOV_EXCL_START
2984 */
2985 if (parser->m_defaultHandler)
2986 reportDefault(parser, enc, s, next);
2987 break;
2988 /* LCOV_EXCL_STOP */
2989 }
2990 *eventPP = s = next;
2991 switch (parser->m_parsingStatus.parsing) {
2992 case XML_SUSPENDED:
2993 *nextPtr = next;
2994 return XML_ERROR_NONE;
2995 case XML_FINISHED:
2996 return XML_ERROR_ABORTED;
2997 default:;
2998 }
2999 }
3000 /* not reached */
3001 }
3002
3003 /* This function does not call free() on the allocated memory, merely
3004 * moving it to the parser's m_freeBindingList where it can be freed or
3005 * reused as appropriate.
3006 */
3007 static void
freeBindings(XML_Parser parser,BINDING * bindings)3008 freeBindings(XML_Parser parser, BINDING *bindings) {
3009 while (bindings) {
3010 BINDING *b = bindings;
3011
3012 /* m_startNamespaceDeclHandler will have been called for this
3013 * binding in addBindings(), so call the end handler now.
3014 */
3015 if (parser->m_endNamespaceDeclHandler)
3016 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3017
3018 bindings = bindings->nextTagBinding;
3019 b->nextTagBinding = parser->m_freeBindingList;
3020 parser->m_freeBindingList = b;
3021 b->prefix->binding = b->prevPrefixBinding;
3022 }
3023 }
3024
3025 /* Precondition: all arguments must be non-NULL;
3026 Purpose:
3027 - normalize attributes
3028 - check attributes for well-formedness
3029 - generate namespace aware attribute names (URI, prefix)
3030 - build list of attributes for startElementHandler
3031 - default attributes
3032 - process namespace declarations (check and report them)
3033 - generate namespace aware element name (URI, prefix)
3034 */
3035 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr)3036 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3037 TAG_NAME *tagNamePtr, BINDING **bindingsPtr) {
3038 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3039 ELEMENT_TYPE *elementType;
3040 int nDefaultAtts;
3041 const XML_Char **appAtts; /* the attribute list for the application */
3042 int attIndex = 0;
3043 int prefixLen;
3044 int i;
3045 int n;
3046 XML_Char *uri;
3047 int nPrefixes = 0;
3048 BINDING *binding;
3049 const XML_Char *localPart;
3050
3051 /* lookup the element type name */
3052 elementType
3053 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3054 if (! elementType) {
3055 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3056 if (! name)
3057 return XML_ERROR_NO_MEMORY;
3058 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3059 sizeof(ELEMENT_TYPE));
3060 if (! elementType)
3061 return XML_ERROR_NO_MEMORY;
3062 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3063 return XML_ERROR_NO_MEMORY;
3064 }
3065 nDefaultAtts = elementType->nDefaultAtts;
3066
3067 /* get the attributes from the tokenizer */
3068 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3069 if (n + nDefaultAtts > parser->m_attsSize) {
3070 int oldAttsSize = parser->m_attsSize;
3071 ATTRIBUTE *temp;
3072 #ifdef XML_ATTR_INFO
3073 XML_AttrInfo *temp2;
3074 #endif
3075 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3076 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3077 parser->m_attsSize * sizeof(ATTRIBUTE));
3078 if (temp == NULL) {
3079 parser->m_attsSize = oldAttsSize;
3080 return XML_ERROR_NO_MEMORY;
3081 }
3082 parser->m_atts = temp;
3083 #ifdef XML_ATTR_INFO
3084 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3085 parser->m_attsSize * sizeof(XML_AttrInfo));
3086 if (temp2 == NULL) {
3087 parser->m_attsSize = oldAttsSize;
3088 return XML_ERROR_NO_MEMORY;
3089 }
3090 parser->m_attInfo = temp2;
3091 #endif
3092 if (n > oldAttsSize)
3093 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3094 }
3095
3096 appAtts = (const XML_Char **)parser->m_atts;
3097 for (i = 0; i < n; i++) {
3098 ATTRIBUTE *currAtt = &parser->m_atts[i];
3099 #ifdef XML_ATTR_INFO
3100 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3101 #endif
3102 /* add the name and value to the attribute list */
3103 ATTRIBUTE_ID *attId
3104 = getAttributeId(parser, enc, currAtt->name,
3105 currAtt->name + XmlNameLength(enc, currAtt->name));
3106 if (! attId)
3107 return XML_ERROR_NO_MEMORY;
3108 #ifdef XML_ATTR_INFO
3109 currAttInfo->nameStart
3110 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3111 currAttInfo->nameEnd
3112 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3113 currAttInfo->valueStart = parser->m_parseEndByteIndex
3114 - (parser->m_parseEndPtr - currAtt->valuePtr);
3115 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3116 - (parser->m_parseEndPtr - currAtt->valueEnd);
3117 #endif
3118 /* Detect duplicate attributes by their QNames. This does not work when
3119 namespace processing is turned on and different prefixes for the same
3120 namespace are used. For this case we have a check further down.
3121 */
3122 if ((attId->name)[-1]) {
3123 if (enc == parser->m_encoding)
3124 parser->m_eventPtr = parser->m_atts[i].name;
3125 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3126 }
3127 (attId->name)[-1] = 1;
3128 appAtts[attIndex++] = attId->name;
3129 if (! parser->m_atts[i].normalized) {
3130 enum XML_Error result;
3131 XML_Bool isCdata = XML_TRUE;
3132
3133 /* figure out whether declared as other than CDATA */
3134 if (attId->maybeTokenized) {
3135 int j;
3136 for (j = 0; j < nDefaultAtts; j++) {
3137 if (attId == elementType->defaultAtts[j].id) {
3138 isCdata = elementType->defaultAtts[j].isCdata;
3139 break;
3140 }
3141 }
3142 }
3143
3144 /* normalize the attribute value */
3145 result = storeAttributeValue(
3146 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3147 parser->m_atts[i].valueEnd, &parser->m_tempPool);
3148 if (result)
3149 return result;
3150 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3151 poolFinish(&parser->m_tempPool);
3152 } else {
3153 /* the value did not need normalizing */
3154 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3155 parser->m_atts[i].valuePtr,
3156 parser->m_atts[i].valueEnd);
3157 if (appAtts[attIndex] == 0)
3158 return XML_ERROR_NO_MEMORY;
3159 poolFinish(&parser->m_tempPool);
3160 }
3161 /* handle prefixed attribute names */
3162 if (attId->prefix) {
3163 if (attId->xmlns) {
3164 /* deal with namespace declarations here */
3165 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3166 appAtts[attIndex], bindingsPtr);
3167 if (result)
3168 return result;
3169 --attIndex;
3170 } else {
3171 /* deal with other prefixed names later */
3172 attIndex++;
3173 nPrefixes++;
3174 (attId->name)[-1] = 2;
3175 }
3176 } else
3177 attIndex++;
3178 }
3179
3180 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3181 parser->m_nSpecifiedAtts = attIndex;
3182 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3183 for (i = 0; i < attIndex; i += 2)
3184 if (appAtts[i] == elementType->idAtt->name) {
3185 parser->m_idAttIndex = i;
3186 break;
3187 }
3188 } else
3189 parser->m_idAttIndex = -1;
3190
3191 /* do attribute defaulting */
3192 for (i = 0; i < nDefaultAtts; i++) {
3193 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3194 if (! (da->id->name)[-1] && da->value) {
3195 if (da->id->prefix) {
3196 if (da->id->xmlns) {
3197 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3198 da->value, bindingsPtr);
3199 if (result)
3200 return result;
3201 } else {
3202 (da->id->name)[-1] = 2;
3203 nPrefixes++;
3204 appAtts[attIndex++] = da->id->name;
3205 appAtts[attIndex++] = da->value;
3206 }
3207 } else {
3208 (da->id->name)[-1] = 1;
3209 appAtts[attIndex++] = da->id->name;
3210 appAtts[attIndex++] = da->value;
3211 }
3212 }
3213 }
3214 appAtts[attIndex] = 0;
3215
3216 /* expand prefixed attribute names, check for duplicates,
3217 and clear flags that say whether attributes were specified */
3218 i = 0;
3219 if (nPrefixes) {
3220 int j; /* hash table index */
3221 unsigned long version = parser->m_nsAttsVersion;
3222 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3223 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3224 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3225 if ((nPrefixes << 1)
3226 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3227 NS_ATT *temp;
3228 /* hash table size must also be a power of 2 and >= 8 */
3229 while (nPrefixes >> parser->m_nsAttsPower++)
3230 ;
3231 if (parser->m_nsAttsPower < 3)
3232 parser->m_nsAttsPower = 3;
3233 nsAttsSize = (int)1 << parser->m_nsAttsPower;
3234 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3235 nsAttsSize * sizeof(NS_ATT));
3236 if (! temp) {
3237 /* Restore actual size of memory in m_nsAtts */
3238 parser->m_nsAttsPower = oldNsAttsPower;
3239 return XML_ERROR_NO_MEMORY;
3240 }
3241 parser->m_nsAtts = temp;
3242 version = 0; /* force re-initialization of m_nsAtts hash table */
3243 }
3244 /* using a version flag saves us from initializing m_nsAtts every time */
3245 if (! version) { /* initialize version flags when version wraps around */
3246 version = INIT_ATTS_VERSION;
3247 for (j = nsAttsSize; j != 0;)
3248 parser->m_nsAtts[--j].version = version;
3249 }
3250 parser->m_nsAttsVersion = --version;
3251
3252 /* expand prefixed names and check for duplicates */
3253 for (; i < attIndex; i += 2) {
3254 const XML_Char *s = appAtts[i];
3255 if (s[-1] == 2) { /* prefixed */
3256 ATTRIBUTE_ID *id;
3257 const BINDING *b;
3258 unsigned long uriHash;
3259 struct siphash sip_state;
3260 struct sipkey sip_key;
3261
3262 copy_salt_to_sipkey(parser, &sip_key);
3263 sip24_init(&sip_state, &sip_key);
3264
3265 ((XML_Char *)s)[-1] = 0; /* clear flag */
3266 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3267 if (! id || ! id->prefix) {
3268 /* This code is walking through the appAtts array, dealing
3269 * with (in this case) a prefixed attribute name. To be in
3270 * the array, the attribute must have already been bound, so
3271 * has to have passed through the hash table lookup once
3272 * already. That implies that an entry for it already
3273 * exists, so the lookup above will return a pointer to
3274 * already allocated memory. There is no opportunaity for
3275 * the allocator to fail, so the condition above cannot be
3276 * fulfilled.
3277 *
3278 * Since it is difficult to be certain that the above
3279 * analysis is complete, we retain the test and merely
3280 * remove the code from coverage tests.
3281 */
3282 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3283 }
3284 b = id->prefix->binding;
3285 if (! b)
3286 return XML_ERROR_UNBOUND_PREFIX;
3287
3288 for (j = 0; j < b->uriLen; j++) {
3289 const XML_Char c = b->uri[j];
3290 if (! poolAppendChar(&parser->m_tempPool, c))
3291 return XML_ERROR_NO_MEMORY;
3292 }
3293
3294 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3295
3296 while (*s++ != XML_T(ASCII_COLON))
3297 ;
3298
3299 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3300
3301 do { /* copies null terminator */
3302 if (! poolAppendChar(&parser->m_tempPool, *s))
3303 return XML_ERROR_NO_MEMORY;
3304 } while (*s++);
3305
3306 uriHash = (unsigned long)sip24_final(&sip_state);
3307
3308 { /* Check hash table for duplicate of expanded name (uriName).
3309 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3310 */
3311 unsigned char step = 0;
3312 unsigned long mask = nsAttsSize - 1;
3313 j = uriHash & mask; /* index into hash table */
3314 while (parser->m_nsAtts[j].version == version) {
3315 /* for speed we compare stored hash values first */
3316 if (uriHash == parser->m_nsAtts[j].hash) {
3317 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3318 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3319 /* s1 is null terminated, but not s2 */
3320 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3321 ;
3322 if (*s1 == 0)
3323 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3324 }
3325 if (! step)
3326 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3327 j < step ? (j += nsAttsSize - step) : (j -= step);
3328 }
3329 }
3330
3331 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3332 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3333 s = b->prefix->name;
3334 do {
3335 if (! poolAppendChar(&parser->m_tempPool, *s))
3336 return XML_ERROR_NO_MEMORY;
3337 } while (*s++);
3338 }
3339
3340 /* store expanded name in attribute list */
3341 s = poolStart(&parser->m_tempPool);
3342 poolFinish(&parser->m_tempPool);
3343 appAtts[i] = s;
3344
3345 /* fill empty slot with new version, uriName and hash value */
3346 parser->m_nsAtts[j].version = version;
3347 parser->m_nsAtts[j].hash = uriHash;
3348 parser->m_nsAtts[j].uriName = s;
3349
3350 if (! --nPrefixes) {
3351 i += 2;
3352 break;
3353 }
3354 } else /* not prefixed */
3355 ((XML_Char *)s)[-1] = 0; /* clear flag */
3356 }
3357 }
3358 /* clear flags for the remaining attributes */
3359 for (; i < attIndex; i += 2)
3360 ((XML_Char *)(appAtts[i]))[-1] = 0;
3361 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3362 binding->attId->name[-1] = 0;
3363
3364 if (! parser->m_ns)
3365 return XML_ERROR_NONE;
3366
3367 /* expand the element type name */
3368 if (elementType->prefix) {
3369 binding = elementType->prefix->binding;
3370 if (! binding)
3371 return XML_ERROR_UNBOUND_PREFIX;
3372 localPart = tagNamePtr->str;
3373 while (*localPart++ != XML_T(ASCII_COLON))
3374 ;
3375 } else if (dtd->defaultPrefix.binding) {
3376 binding = dtd->defaultPrefix.binding;
3377 localPart = tagNamePtr->str;
3378 } else
3379 return XML_ERROR_NONE;
3380 prefixLen = 0;
3381 if (parser->m_ns_triplets && binding->prefix->name) {
3382 for (; binding->prefix->name[prefixLen++];)
3383 ; /* prefixLen includes null terminator */
3384 }
3385 tagNamePtr->localPart = localPart;
3386 tagNamePtr->uriLen = binding->uriLen;
3387 tagNamePtr->prefix = binding->prefix->name;
3388 tagNamePtr->prefixLen = prefixLen;
3389 for (i = 0; localPart[i++];)
3390 ; /* i includes null terminator */
3391 n = i + binding->uriLen + prefixLen;
3392 if (n > binding->uriAlloc) {
3393 TAG *p;
3394 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3395 if (! uri)
3396 return XML_ERROR_NO_MEMORY;
3397 binding->uriAlloc = n + EXPAND_SPARE;
3398 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3399 for (p = parser->m_tagStack; p; p = p->parent)
3400 if (p->name.str == binding->uri)
3401 p->name.str = uri;
3402 FREE(parser, binding->uri);
3403 binding->uri = uri;
3404 }
3405 /* if m_namespaceSeparator != '\0' then uri includes it already */
3406 uri = binding->uri + binding->uriLen;
3407 memcpy(uri, localPart, i * sizeof(XML_Char));
3408 /* we always have a namespace separator between localPart and prefix */
3409 if (prefixLen) {
3410 uri += i - 1;
3411 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3412 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3413 }
3414 tagNamePtr->str = binding->uri;
3415 return XML_ERROR_NONE;
3416 }
3417
3418 /* addBinding() overwrites the value of prefix->binding without checking.
3419 Therefore one must keep track of the old value outside of addBinding().
3420 */
3421 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3422 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3423 const XML_Char *uri, BINDING **bindingsPtr) {
3424 static const XML_Char xmlNamespace[]
3425 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3426 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3427 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3428 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3429 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3430 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3431 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3432 ASCII_e, '\0'};
3433 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3434 static const XML_Char xmlnsNamespace[]
3435 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3436 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3437 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3438 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3439 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3440 static const int xmlnsLen
3441 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3442
3443 XML_Bool mustBeXML = XML_FALSE;
3444 XML_Bool isXML = XML_TRUE;
3445 XML_Bool isXMLNS = XML_TRUE;
3446
3447 BINDING *b;
3448 int len;
3449
3450 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3451 if (*uri == XML_T('\0') && prefix->name)
3452 return XML_ERROR_UNDECLARING_PREFIX;
3453
3454 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3455 && prefix->name[1] == XML_T(ASCII_m)
3456 && prefix->name[2] == XML_T(ASCII_l)) {
3457 /* Not allowed to bind xmlns */
3458 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3459 && prefix->name[5] == XML_T('\0'))
3460 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3461
3462 if (prefix->name[3] == XML_T('\0'))
3463 mustBeXML = XML_TRUE;
3464 }
3465
3466 for (len = 0; uri[len]; len++) {
3467 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3468 isXML = XML_FALSE;
3469
3470 if (! mustBeXML && isXMLNS
3471 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3472 isXMLNS = XML_FALSE;
3473 }
3474 isXML = isXML && len == xmlLen;
3475 isXMLNS = isXMLNS && len == xmlnsLen;
3476
3477 if (mustBeXML != isXML)
3478 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3479 : XML_ERROR_RESERVED_NAMESPACE_URI;
3480
3481 if (isXMLNS)
3482 return XML_ERROR_RESERVED_NAMESPACE_URI;
3483
3484 if (parser->m_namespaceSeparator)
3485 len++;
3486 if (parser->m_freeBindingList) {
3487 b = parser->m_freeBindingList;
3488 if (len > b->uriAlloc) {
3489 XML_Char *temp = (XML_Char *)REALLOC(
3490 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
3491 if (temp == NULL)
3492 return XML_ERROR_NO_MEMORY;
3493 b->uri = temp;
3494 b->uriAlloc = len + EXPAND_SPARE;
3495 }
3496 parser->m_freeBindingList = b->nextTagBinding;
3497 } else {
3498 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3499 if (! b)
3500 return XML_ERROR_NO_MEMORY;
3501 b->uri
3502 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3503 if (! b->uri) {
3504 FREE(parser, b);
3505 return XML_ERROR_NO_MEMORY;
3506 }
3507 b->uriAlloc = len + EXPAND_SPARE;
3508 }
3509 b->uriLen = len;
3510 memcpy(b->uri, uri, len * sizeof(XML_Char));
3511 if (parser->m_namespaceSeparator)
3512 b->uri[len - 1] = parser->m_namespaceSeparator;
3513 b->prefix = prefix;
3514 b->attId = attId;
3515 b->prevPrefixBinding = prefix->binding;
3516 /* NULL binding when default namespace undeclared */
3517 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3518 prefix->binding = NULL;
3519 else
3520 prefix->binding = b;
3521 b->nextTagBinding = *bindingsPtr;
3522 *bindingsPtr = b;
3523 /* if attId == NULL then we are not starting a namespace scope */
3524 if (attId && parser->m_startNamespaceDeclHandler)
3525 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3526 prefix->binding ? uri : 0);
3527 return XML_ERROR_NONE;
3528 }
3529
3530 /* The idea here is to avoid using stack for each CDATA section when
3531 the whole file is parsed with one call.
3532 */
3533 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3534 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
3535 const char **endPtr) {
3536 enum XML_Error result
3537 = doCdataSection(parser, parser->m_encoding, &start, end, endPtr,
3538 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
3539 if (result != XML_ERROR_NONE)
3540 return result;
3541 if (start) {
3542 if (parser->m_parentParser) { /* we are parsing an external entity */
3543 parser->m_processor = externalEntityContentProcessor;
3544 return externalEntityContentProcessor(parser, start, end, endPtr);
3545 } else {
3546 parser->m_processor = contentProcessor;
3547 return contentProcessor(parser, start, end, endPtr);
3548 }
3549 }
3550 return result;
3551 }
3552
3553 /* startPtr gets set to non-null if the section is closed, and to null if
3554 the section is not yet closed.
3555 */
3556 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)3557 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
3558 const char *end, const char **nextPtr, XML_Bool haveMore) {
3559 const char *s = *startPtr;
3560 const char **eventPP;
3561 const char **eventEndPP;
3562 if (enc == parser->m_encoding) {
3563 eventPP = &parser->m_eventPtr;
3564 *eventPP = s;
3565 eventEndPP = &parser->m_eventEndPtr;
3566 } else {
3567 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3568 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3569 }
3570 *eventPP = s;
3571 *startPtr = NULL;
3572
3573 for (;;) {
3574 const char *next;
3575 int tok = XmlCdataSectionTok(enc, s, end, &next);
3576 *eventEndPP = next;
3577 switch (tok) {
3578 case XML_TOK_CDATA_SECT_CLOSE:
3579 if (parser->m_endCdataSectionHandler)
3580 parser->m_endCdataSectionHandler(parser->m_handlerArg);
3581 /* BEGIN disabled code */
3582 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3583 else if (0 && parser->m_characterDataHandler)
3584 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3585 0);
3586 /* END disabled code */
3587 else if (parser->m_defaultHandler)
3588 reportDefault(parser, enc, s, next);
3589 *startPtr = next;
3590 *nextPtr = next;
3591 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3592 return XML_ERROR_ABORTED;
3593 else
3594 return XML_ERROR_NONE;
3595 case XML_TOK_DATA_NEWLINE:
3596 if (parser->m_characterDataHandler) {
3597 XML_Char c = 0xA;
3598 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3599 } else if (parser->m_defaultHandler)
3600 reportDefault(parser, enc, s, next);
3601 break;
3602 case XML_TOK_DATA_CHARS: {
3603 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3604 if (charDataHandler) {
3605 if (MUST_CONVERT(enc, s)) {
3606 for (;;) {
3607 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3608 const enum XML_Convert_Result convert_res = XmlConvert(
3609 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3610 *eventEndPP = next;
3611 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3612 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3613 if ((convert_res == XML_CONVERT_COMPLETED)
3614 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3615 break;
3616 *eventPP = s;
3617 }
3618 } else
3619 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3620 (int)((XML_Char *)next - (XML_Char *)s));
3621 } else if (parser->m_defaultHandler)
3622 reportDefault(parser, enc, s, next);
3623 } break;
3624 case XML_TOK_INVALID:
3625 *eventPP = next;
3626 return XML_ERROR_INVALID_TOKEN;
3627 case XML_TOK_PARTIAL_CHAR:
3628 if (haveMore) {
3629 *nextPtr = s;
3630 return XML_ERROR_NONE;
3631 }
3632 return XML_ERROR_PARTIAL_CHAR;
3633 case XML_TOK_PARTIAL:
3634 case XML_TOK_NONE:
3635 if (haveMore) {
3636 *nextPtr = s;
3637 return XML_ERROR_NONE;
3638 }
3639 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3640 default:
3641 /* Every token returned by XmlCdataSectionTok() has its own
3642 * explicit case, so this default case will never be executed.
3643 * We retain it as a safety net and exclude it from the coverage
3644 * statistics.
3645 *
3646 * LCOV_EXCL_START
3647 */
3648 *eventPP = next;
3649 return XML_ERROR_UNEXPECTED_STATE;
3650 /* LCOV_EXCL_STOP */
3651 }
3652
3653 *eventPP = s = next;
3654 switch (parser->m_parsingStatus.parsing) {
3655 case XML_SUSPENDED:
3656 *nextPtr = next;
3657 return XML_ERROR_NONE;
3658 case XML_FINISHED:
3659 return XML_ERROR_ABORTED;
3660 default:;
3661 }
3662 }
3663 /* not reached */
3664 }
3665
3666 #ifdef XML_DTD
3667
3668 /* The idea here is to avoid using stack for each IGNORE section when
3669 the whole file is parsed with one call.
3670 */
3671 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3672 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
3673 const char **endPtr) {
3674 enum XML_Error result
3675 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
3676 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
3677 if (result != XML_ERROR_NONE)
3678 return result;
3679 if (start) {
3680 parser->m_processor = prologProcessor;
3681 return prologProcessor(parser, start, end, endPtr);
3682 }
3683 return result;
3684 }
3685
3686 /* startPtr gets set to non-null is the section is closed, and to null
3687 if the section is not yet closed.
3688 */
3689 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)3690 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
3691 const char *end, const char **nextPtr, XML_Bool haveMore) {
3692 const char *next;
3693 int tok;
3694 const char *s = *startPtr;
3695 const char **eventPP;
3696 const char **eventEndPP;
3697 if (enc == parser->m_encoding) {
3698 eventPP = &parser->m_eventPtr;
3699 *eventPP = s;
3700 eventEndPP = &parser->m_eventEndPtr;
3701 } else {
3702 /* It's not entirely clear, but it seems the following two lines
3703 * of code cannot be executed. The only occasions on which 'enc'
3704 * is not 'encoding' are when this function is called
3705 * from the internal entity processing, and IGNORE sections are an
3706 * error in internal entities.
3707 *
3708 * Since it really isn't clear that this is true, we keep the code
3709 * and just remove it from our coverage tests.
3710 *
3711 * LCOV_EXCL_START
3712 */
3713 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3714 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3715 /* LCOV_EXCL_STOP */
3716 }
3717 *eventPP = s;
3718 *startPtr = NULL;
3719 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3720 *eventEndPP = next;
3721 switch (tok) {
3722 case XML_TOK_IGNORE_SECT:
3723 if (parser->m_defaultHandler)
3724 reportDefault(parser, enc, s, next);
3725 *startPtr = next;
3726 *nextPtr = next;
3727 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3728 return XML_ERROR_ABORTED;
3729 else
3730 return XML_ERROR_NONE;
3731 case XML_TOK_INVALID:
3732 *eventPP = next;
3733 return XML_ERROR_INVALID_TOKEN;
3734 case XML_TOK_PARTIAL_CHAR:
3735 if (haveMore) {
3736 *nextPtr = s;
3737 return XML_ERROR_NONE;
3738 }
3739 return XML_ERROR_PARTIAL_CHAR;
3740 case XML_TOK_PARTIAL:
3741 case XML_TOK_NONE:
3742 if (haveMore) {
3743 *nextPtr = s;
3744 return XML_ERROR_NONE;
3745 }
3746 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3747 default:
3748 /* All of the tokens that XmlIgnoreSectionTok() returns have
3749 * explicit cases to handle them, so this default case is never
3750 * executed. We keep it as a safety net anyway, and remove it
3751 * from our test coverage statistics.
3752 *
3753 * LCOV_EXCL_START
3754 */
3755 *eventPP = next;
3756 return XML_ERROR_UNEXPECTED_STATE;
3757 /* LCOV_EXCL_STOP */
3758 }
3759 /* not reached */
3760 }
3761
3762 #endif /* XML_DTD */
3763
3764 static enum XML_Error
initializeEncoding(XML_Parser parser)3765 initializeEncoding(XML_Parser parser) {
3766 const char *s;
3767 #ifdef XML_UNICODE
3768 char encodingBuf[128];
3769 /* See comments abount `protoclEncodingName` in parserInit() */
3770 if (! parser->m_protocolEncodingName)
3771 s = NULL;
3772 else {
3773 int i;
3774 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
3775 if (i == sizeof(encodingBuf) - 1
3776 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
3777 encodingBuf[0] = '\0';
3778 break;
3779 }
3780 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
3781 }
3782 encodingBuf[i] = '\0';
3783 s = encodingBuf;
3784 }
3785 #else
3786 s = parser->m_protocolEncodingName;
3787 #endif
3788 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
3789 &parser->m_initEncoding, &parser->m_encoding, s))
3790 return XML_ERROR_NONE;
3791 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
3792 }
3793
3794 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)3795 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
3796 const char *next) {
3797 const char *encodingName = NULL;
3798 const XML_Char *storedEncName = NULL;
3799 const ENCODING *newEncoding = NULL;
3800 const char *version = NULL;
3801 const char *versionend;
3802 const XML_Char *storedversion = NULL;
3803 int standalone = -1;
3804 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
3805 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
3806 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
3807 if (isGeneralTextEntity)
3808 return XML_ERROR_TEXT_DECL;
3809 else
3810 return XML_ERROR_XML_DECL;
3811 }
3812 if (! isGeneralTextEntity && standalone == 1) {
3813 parser->m_dtd->standalone = XML_TRUE;
3814 #ifdef XML_DTD
3815 if (parser->m_paramEntityParsing
3816 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3817 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3818 #endif /* XML_DTD */
3819 }
3820 if (parser->m_xmlDeclHandler) {
3821 if (encodingName != NULL) {
3822 storedEncName = poolStoreString(
3823 &parser->m_temp2Pool, parser->m_encoding, encodingName,
3824 encodingName + XmlNameLength(parser->m_encoding, encodingName));
3825 if (! storedEncName)
3826 return XML_ERROR_NO_MEMORY;
3827 poolFinish(&parser->m_temp2Pool);
3828 }
3829 if (version) {
3830 storedversion
3831 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
3832 versionend - parser->m_encoding->minBytesPerChar);
3833 if (! storedversion)
3834 return XML_ERROR_NO_MEMORY;
3835 }
3836 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
3837 standalone);
3838 } else if (parser->m_defaultHandler)
3839 reportDefault(parser, parser->m_encoding, s, next);
3840 if (parser->m_protocolEncodingName == NULL) {
3841 if (newEncoding) {
3842 /* Check that the specified encoding does not conflict with what
3843 * the parser has already deduced. Do we have the same number
3844 * of bytes in the smallest representation of a character? If
3845 * this is UTF-16, is it the same endianness?
3846 */
3847 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
3848 || (newEncoding->minBytesPerChar == 2
3849 && newEncoding != parser->m_encoding)) {
3850 parser->m_eventPtr = encodingName;
3851 return XML_ERROR_INCORRECT_ENCODING;
3852 }
3853 parser->m_encoding = newEncoding;
3854 } else if (encodingName) {
3855 enum XML_Error result;
3856 if (! storedEncName) {
3857 storedEncName = poolStoreString(
3858 &parser->m_temp2Pool, parser->m_encoding, encodingName,
3859 encodingName + XmlNameLength(parser->m_encoding, encodingName));
3860 if (! storedEncName)
3861 return XML_ERROR_NO_MEMORY;
3862 }
3863 result = handleUnknownEncoding(parser, storedEncName);
3864 poolClear(&parser->m_temp2Pool);
3865 if (result == XML_ERROR_UNKNOWN_ENCODING)
3866 parser->m_eventPtr = encodingName;
3867 return result;
3868 }
3869 }
3870
3871 if (storedEncName || storedversion)
3872 poolClear(&parser->m_temp2Pool);
3873
3874 return XML_ERROR_NONE;
3875 }
3876
3877 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)3878 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
3879 if (parser->m_unknownEncodingHandler) {
3880 XML_Encoding info;
3881 int i;
3882 for (i = 0; i < 256; i++)
3883 info.map[i] = -1;
3884 info.convert = NULL;
3885 info.data = NULL;
3886 info.release = NULL;
3887 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
3888 encodingName, &info)) {
3889 ENCODING *enc;
3890 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
3891 if (! parser->m_unknownEncodingMem) {
3892 if (info.release)
3893 info.release(info.data);
3894 return XML_ERROR_NO_MEMORY;
3895 }
3896 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
3897 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
3898 if (enc) {
3899 parser->m_unknownEncodingData = info.data;
3900 parser->m_unknownEncodingRelease = info.release;
3901 parser->m_encoding = enc;
3902 return XML_ERROR_NONE;
3903 }
3904 }
3905 if (info.release != NULL)
3906 info.release(info.data);
3907 }
3908 return XML_ERROR_UNKNOWN_ENCODING;
3909 }
3910
3911 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)3912 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
3913 const char **nextPtr) {
3914 enum XML_Error result = initializeEncoding(parser);
3915 if (result != XML_ERROR_NONE)
3916 return result;
3917 parser->m_processor = prologProcessor;
3918 return prologProcessor(parser, s, end, nextPtr);
3919 }
3920
3921 #ifdef XML_DTD
3922
3923 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)3924 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
3925 const char **nextPtr) {
3926 enum XML_Error result = initializeEncoding(parser);
3927 if (result != XML_ERROR_NONE)
3928 return result;
3929
3930 /* we know now that XML_Parse(Buffer) has been called,
3931 so we consider the external parameter entity read */
3932 parser->m_dtd->paramEntityRead = XML_TRUE;
3933
3934 if (parser->m_prologState.inEntityValue) {
3935 parser->m_processor = entityValueInitProcessor;
3936 return entityValueInitProcessor(parser, s, end, nextPtr);
3937 } else {
3938 parser->m_processor = externalParEntProcessor;
3939 return externalParEntProcessor(parser, s, end, nextPtr);
3940 }
3941 }
3942
3943 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)3944 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
3945 const char **nextPtr) {
3946 int tok;
3947 const char *start = s;
3948 const char *next = start;
3949 parser->m_eventPtr = start;
3950
3951 for (;;) {
3952 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
3953 parser->m_eventEndPtr = next;
3954 if (tok <= 0) {
3955 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
3956 *nextPtr = s;
3957 return XML_ERROR_NONE;
3958 }
3959 switch (tok) {
3960 case XML_TOK_INVALID:
3961 return XML_ERROR_INVALID_TOKEN;
3962 case XML_TOK_PARTIAL:
3963 return XML_ERROR_UNCLOSED_TOKEN;
3964 case XML_TOK_PARTIAL_CHAR:
3965 return XML_ERROR_PARTIAL_CHAR;
3966 case XML_TOK_NONE: /* start == end */
3967 default:
3968 break;
3969 }
3970 /* found end of entity value - can store it now */
3971 return storeEntityValue(parser, parser->m_encoding, s, end);
3972 } else if (tok == XML_TOK_XML_DECL) {
3973 enum XML_Error result;
3974 result = processXmlDecl(parser, 0, start, next);
3975 if (result != XML_ERROR_NONE)
3976 return result;
3977 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
3978 * that to happen, a parameter entity parsing handler must have attempted
3979 * to suspend the parser, which fails and raises an error. The parser can
3980 * be aborted, but can't be suspended.
3981 */
3982 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3983 return XML_ERROR_ABORTED;
3984 *nextPtr = next;
3985 /* stop scanning for text declaration - we found one */
3986 parser->m_processor = entityValueProcessor;
3987 return entityValueProcessor(parser, next, end, nextPtr);
3988 }
3989 /* If we are at the end of the buffer, this would cause XmlPrologTok to
3990 return XML_TOK_NONE on the next call, which would then cause the
3991 function to exit with *nextPtr set to s - that is what we want for other
3992 tokens, but not for the BOM - we would rather like to skip it;
3993 then, when this routine is entered the next time, XmlPrologTok will
3994 return XML_TOK_INVALID, since the BOM is still in the buffer
3995 */
3996 else if (tok == XML_TOK_BOM && next == end
3997 && ! parser->m_parsingStatus.finalBuffer) {
3998 *nextPtr = next;
3999 return XML_ERROR_NONE;
4000 }
4001 /* If we get this token, we have the start of what might be a
4002 normal tag, but not a declaration (i.e. it doesn't begin with
4003 "<!"). In a DTD context, that isn't legal.
4004 */
4005 else if (tok == XML_TOK_INSTANCE_START) {
4006 *nextPtr = next;
4007 return XML_ERROR_SYNTAX;
4008 }
4009 start = next;
4010 parser->m_eventPtr = start;
4011 }
4012 }
4013
4014 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4015 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4016 const char **nextPtr) {
4017 const char *next = s;
4018 int tok;
4019
4020 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4021 if (tok <= 0) {
4022 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4023 *nextPtr = s;
4024 return XML_ERROR_NONE;
4025 }
4026 switch (tok) {
4027 case XML_TOK_INVALID:
4028 return XML_ERROR_INVALID_TOKEN;
4029 case XML_TOK_PARTIAL:
4030 return XML_ERROR_UNCLOSED_TOKEN;
4031 case XML_TOK_PARTIAL_CHAR:
4032 return XML_ERROR_PARTIAL_CHAR;
4033 case XML_TOK_NONE: /* start == end */
4034 default:
4035 break;
4036 }
4037 }
4038 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4039 However, when parsing an external subset, doProlog will not accept a BOM
4040 as valid, and report a syntax error, so we have to skip the BOM
4041 */
4042 else if (tok == XML_TOK_BOM) {
4043 s = next;
4044 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4045 }
4046
4047 parser->m_processor = prologProcessor;
4048 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4049 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
4050 }
4051
4052 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4053 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4054 const char **nextPtr) {
4055 const char *start = s;
4056 const char *next = s;
4057 const ENCODING *enc = parser->m_encoding;
4058 int tok;
4059
4060 for (;;) {
4061 tok = XmlPrologTok(enc, start, end, &next);
4062 if (tok <= 0) {
4063 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4064 *nextPtr = s;
4065 return XML_ERROR_NONE;
4066 }
4067 switch (tok) {
4068 case XML_TOK_INVALID:
4069 return XML_ERROR_INVALID_TOKEN;
4070 case XML_TOK_PARTIAL:
4071 return XML_ERROR_UNCLOSED_TOKEN;
4072 case XML_TOK_PARTIAL_CHAR:
4073 return XML_ERROR_PARTIAL_CHAR;
4074 case XML_TOK_NONE: /* start == end */
4075 default:
4076 break;
4077 }
4078 /* found end of entity value - can store it now */
4079 return storeEntityValue(parser, enc, s, end);
4080 }
4081 start = next;
4082 }
4083 }
4084
4085 #endif /* XML_DTD */
4086
4087 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4088 prologProcessor(XML_Parser parser, const char *s, const char *end,
4089 const char **nextPtr) {
4090 const char *next = s;
4091 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4092 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4093 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
4094 }
4095
4096 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype)4097 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4098 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4099 XML_Bool allowClosingDoctype) {
4100 #ifdef XML_DTD
4101 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4102 #endif /* XML_DTD */
4103 static const XML_Char atypeCDATA[]
4104 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4105 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4106 static const XML_Char atypeIDREF[]
4107 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4108 static const XML_Char atypeIDREFS[]
4109 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4110 static const XML_Char atypeENTITY[]
4111 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4112 static const XML_Char atypeENTITIES[]
4113 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4114 ASCII_I, ASCII_E, ASCII_S, '\0'};
4115 static const XML_Char atypeNMTOKEN[]
4116 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4117 static const XML_Char atypeNMTOKENS[]
4118 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4119 ASCII_E, ASCII_N, ASCII_S, '\0'};
4120 static const XML_Char notationPrefix[]
4121 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4122 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4123 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4124 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4125
4126 /* save one level of indirection */
4127 DTD *const dtd = parser->m_dtd;
4128
4129 const char **eventPP;
4130 const char **eventEndPP;
4131 enum XML_Content_Quant quant;
4132
4133 if (enc == parser->m_encoding) {
4134 eventPP = &parser->m_eventPtr;
4135 eventEndPP = &parser->m_eventEndPtr;
4136 } else {
4137 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4138 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4139 }
4140
4141 for (;;) {
4142 int role;
4143 XML_Bool handleDefault = XML_TRUE;
4144 *eventPP = s;
4145 *eventEndPP = next;
4146 if (tok <= 0) {
4147 if (haveMore && tok != XML_TOK_INVALID) {
4148 *nextPtr = s;
4149 return XML_ERROR_NONE;
4150 }
4151 switch (tok) {
4152 case XML_TOK_INVALID:
4153 *eventPP = next;
4154 return XML_ERROR_INVALID_TOKEN;
4155 case XML_TOK_PARTIAL:
4156 return XML_ERROR_UNCLOSED_TOKEN;
4157 case XML_TOK_PARTIAL_CHAR:
4158 return XML_ERROR_PARTIAL_CHAR;
4159 case -XML_TOK_PROLOG_S:
4160 tok = -tok;
4161 break;
4162 case XML_TOK_NONE:
4163 #ifdef XML_DTD
4164 /* for internal PE NOT referenced between declarations */
4165 if (enc != parser->m_encoding
4166 && ! parser->m_openInternalEntities->betweenDecl) {
4167 *nextPtr = s;
4168 return XML_ERROR_NONE;
4169 }
4170 /* WFC: PE Between Declarations - must check that PE contains
4171 complete markup, not only for external PEs, but also for
4172 internal PEs if the reference occurs between declarations.
4173 */
4174 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4175 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4176 == XML_ROLE_ERROR)
4177 return XML_ERROR_INCOMPLETE_PE;
4178 *nextPtr = s;
4179 return XML_ERROR_NONE;
4180 }
4181 #endif /* XML_DTD */
4182 return XML_ERROR_NO_ELEMENTS;
4183 default:
4184 tok = -tok;
4185 next = end;
4186 break;
4187 }
4188 }
4189 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4190 switch (role) {
4191 case XML_ROLE_XML_DECL: {
4192 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4193 if (result != XML_ERROR_NONE)
4194 return result;
4195 enc = parser->m_encoding;
4196 handleDefault = XML_FALSE;
4197 } break;
4198 case XML_ROLE_DOCTYPE_NAME:
4199 if (parser->m_startDoctypeDeclHandler) {
4200 parser->m_doctypeName
4201 = poolStoreString(&parser->m_tempPool, enc, s, next);
4202 if (! parser->m_doctypeName)
4203 return XML_ERROR_NO_MEMORY;
4204 poolFinish(&parser->m_tempPool);
4205 parser->m_doctypePubid = NULL;
4206 handleDefault = XML_FALSE;
4207 }
4208 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4209 break;
4210 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4211 if (parser->m_startDoctypeDeclHandler) {
4212 parser->m_startDoctypeDeclHandler(
4213 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4214 parser->m_doctypePubid, 1);
4215 parser->m_doctypeName = NULL;
4216 poolClear(&parser->m_tempPool);
4217 handleDefault = XML_FALSE;
4218 }
4219 break;
4220 #ifdef XML_DTD
4221 case XML_ROLE_TEXT_DECL: {
4222 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4223 if (result != XML_ERROR_NONE)
4224 return result;
4225 enc = parser->m_encoding;
4226 handleDefault = XML_FALSE;
4227 } break;
4228 #endif /* XML_DTD */
4229 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4230 #ifdef XML_DTD
4231 parser->m_useForeignDTD = XML_FALSE;
4232 parser->m_declEntity = (ENTITY *)lookup(
4233 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4234 if (! parser->m_declEntity)
4235 return XML_ERROR_NO_MEMORY;
4236 #endif /* XML_DTD */
4237 dtd->hasParamEntityRefs = XML_TRUE;
4238 if (parser->m_startDoctypeDeclHandler) {
4239 XML_Char *pubId;
4240 if (! XmlIsPublicId(enc, s, next, eventPP))
4241 return XML_ERROR_PUBLICID;
4242 pubId = poolStoreString(&parser->m_tempPool, enc,
4243 s + enc->minBytesPerChar,
4244 next - enc->minBytesPerChar);
4245 if (! pubId)
4246 return XML_ERROR_NO_MEMORY;
4247 normalizePublicId(pubId);
4248 poolFinish(&parser->m_tempPool);
4249 parser->m_doctypePubid = pubId;
4250 handleDefault = XML_FALSE;
4251 goto alreadyChecked;
4252 }
4253 /* fall through */
4254 case XML_ROLE_ENTITY_PUBLIC_ID:
4255 if (! XmlIsPublicId(enc, s, next, eventPP))
4256 return XML_ERROR_PUBLICID;
4257 alreadyChecked:
4258 if (dtd->keepProcessing && parser->m_declEntity) {
4259 XML_Char *tem
4260 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4261 next - enc->minBytesPerChar);
4262 if (! tem)
4263 return XML_ERROR_NO_MEMORY;
4264 normalizePublicId(tem);
4265 parser->m_declEntity->publicId = tem;
4266 poolFinish(&dtd->pool);
4267 /* Don't suppress the default handler if we fell through from
4268 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4269 */
4270 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4271 handleDefault = XML_FALSE;
4272 }
4273 break;
4274 case XML_ROLE_DOCTYPE_CLOSE:
4275 if (allowClosingDoctype != XML_TRUE) {
4276 /* Must not close doctype from within expanded parameter entities */
4277 return XML_ERROR_INVALID_TOKEN;
4278 }
4279
4280 if (parser->m_doctypeName) {
4281 parser->m_startDoctypeDeclHandler(
4282 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4283 parser->m_doctypePubid, 0);
4284 poolClear(&parser->m_tempPool);
4285 handleDefault = XML_FALSE;
4286 }
4287 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4288 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4289 was not set, indicating an external subset
4290 */
4291 #ifdef XML_DTD
4292 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4293 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4294 dtd->hasParamEntityRefs = XML_TRUE;
4295 if (parser->m_paramEntityParsing
4296 && parser->m_externalEntityRefHandler) {
4297 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4298 externalSubsetName, sizeof(ENTITY));
4299 if (! entity) {
4300 /* The external subset name "#" will have already been
4301 * inserted into the hash table at the start of the
4302 * external entity parsing, so no allocation will happen
4303 * and lookup() cannot fail.
4304 */
4305 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4306 }
4307 if (parser->m_useForeignDTD)
4308 entity->base = parser->m_curBase;
4309 dtd->paramEntityRead = XML_FALSE;
4310 if (! parser->m_externalEntityRefHandler(
4311 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4312 entity->systemId, entity->publicId))
4313 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4314 if (dtd->paramEntityRead) {
4315 if (! dtd->standalone && parser->m_notStandaloneHandler
4316 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4317 return XML_ERROR_NOT_STANDALONE;
4318 }
4319 /* if we didn't read the foreign DTD then this means that there
4320 is no external subset and we must reset dtd->hasParamEntityRefs
4321 */
4322 else if (! parser->m_doctypeSysid)
4323 dtd->hasParamEntityRefs = hadParamEntityRefs;
4324 /* end of DTD - no need to update dtd->keepProcessing */
4325 }
4326 parser->m_useForeignDTD = XML_FALSE;
4327 }
4328 #endif /* XML_DTD */
4329 if (parser->m_endDoctypeDeclHandler) {
4330 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4331 handleDefault = XML_FALSE;
4332 }
4333 break;
4334 case XML_ROLE_INSTANCE_START:
4335 #ifdef XML_DTD
4336 /* if there is no DOCTYPE declaration then now is the
4337 last chance to read the foreign DTD
4338 */
4339 if (parser->m_useForeignDTD) {
4340 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4341 dtd->hasParamEntityRefs = XML_TRUE;
4342 if (parser->m_paramEntityParsing
4343 && parser->m_externalEntityRefHandler) {
4344 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4345 externalSubsetName, sizeof(ENTITY));
4346 if (! entity)
4347 return XML_ERROR_NO_MEMORY;
4348 entity->base = parser->m_curBase;
4349 dtd->paramEntityRead = XML_FALSE;
4350 if (! parser->m_externalEntityRefHandler(
4351 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4352 entity->systemId, entity->publicId))
4353 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4354 if (dtd->paramEntityRead) {
4355 if (! dtd->standalone && parser->m_notStandaloneHandler
4356 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4357 return XML_ERROR_NOT_STANDALONE;
4358 }
4359 /* if we didn't read the foreign DTD then this means that there
4360 is no external subset and we must reset dtd->hasParamEntityRefs
4361 */
4362 else
4363 dtd->hasParamEntityRefs = hadParamEntityRefs;
4364 /* end of DTD - no need to update dtd->keepProcessing */
4365 }
4366 }
4367 #endif /* XML_DTD */
4368 parser->m_processor = contentProcessor;
4369 return contentProcessor(parser, s, end, nextPtr);
4370 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4371 parser->m_declElementType = getElementType(parser, enc, s, next);
4372 if (! parser->m_declElementType)
4373 return XML_ERROR_NO_MEMORY;
4374 goto checkAttListDeclHandler;
4375 case XML_ROLE_ATTRIBUTE_NAME:
4376 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4377 if (! parser->m_declAttributeId)
4378 return XML_ERROR_NO_MEMORY;
4379 parser->m_declAttributeIsCdata = XML_FALSE;
4380 parser->m_declAttributeType = NULL;
4381 parser->m_declAttributeIsId = XML_FALSE;
4382 goto checkAttListDeclHandler;
4383 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4384 parser->m_declAttributeIsCdata = XML_TRUE;
4385 parser->m_declAttributeType = atypeCDATA;
4386 goto checkAttListDeclHandler;
4387 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4388 parser->m_declAttributeIsId = XML_TRUE;
4389 parser->m_declAttributeType = atypeID;
4390 goto checkAttListDeclHandler;
4391 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4392 parser->m_declAttributeType = atypeIDREF;
4393 goto checkAttListDeclHandler;
4394 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4395 parser->m_declAttributeType = atypeIDREFS;
4396 goto checkAttListDeclHandler;
4397 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4398 parser->m_declAttributeType = atypeENTITY;
4399 goto checkAttListDeclHandler;
4400 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4401 parser->m_declAttributeType = atypeENTITIES;
4402 goto checkAttListDeclHandler;
4403 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4404 parser->m_declAttributeType = atypeNMTOKEN;
4405 goto checkAttListDeclHandler;
4406 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4407 parser->m_declAttributeType = atypeNMTOKENS;
4408 checkAttListDeclHandler:
4409 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4410 handleDefault = XML_FALSE;
4411 break;
4412 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4413 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4414 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4415 const XML_Char *prefix;
4416 if (parser->m_declAttributeType) {
4417 prefix = enumValueSep;
4418 } else {
4419 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
4420 : enumValueStart);
4421 }
4422 if (! poolAppendString(&parser->m_tempPool, prefix))
4423 return XML_ERROR_NO_MEMORY;
4424 if (! poolAppend(&parser->m_tempPool, enc, s, next))
4425 return XML_ERROR_NO_MEMORY;
4426 parser->m_declAttributeType = parser->m_tempPool.start;
4427 handleDefault = XML_FALSE;
4428 }
4429 break;
4430 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4431 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4432 if (dtd->keepProcessing) {
4433 if (! defineAttribute(parser->m_declElementType,
4434 parser->m_declAttributeId,
4435 parser->m_declAttributeIsCdata,
4436 parser->m_declAttributeIsId, 0, parser))
4437 return XML_ERROR_NO_MEMORY;
4438 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4439 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4440 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4441 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4442 /* Enumerated or Notation type */
4443 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4444 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4445 return XML_ERROR_NO_MEMORY;
4446 parser->m_declAttributeType = parser->m_tempPool.start;
4447 poolFinish(&parser->m_tempPool);
4448 }
4449 *eventEndPP = s;
4450 parser->m_attlistDeclHandler(
4451 parser->m_handlerArg, parser->m_declElementType->name,
4452 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
4453 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4454 poolClear(&parser->m_tempPool);
4455 handleDefault = XML_FALSE;
4456 }
4457 }
4458 break;
4459 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4460 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4461 if (dtd->keepProcessing) {
4462 const XML_Char *attVal;
4463 enum XML_Error result = storeAttributeValue(
4464 parser, enc, parser->m_declAttributeIsCdata,
4465 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool);
4466 if (result)
4467 return result;
4468 attVal = poolStart(&dtd->pool);
4469 poolFinish(&dtd->pool);
4470 /* ID attributes aren't allowed to have a default */
4471 if (! defineAttribute(
4472 parser->m_declElementType, parser->m_declAttributeId,
4473 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
4474 return XML_ERROR_NO_MEMORY;
4475 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4476 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4477 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4478 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4479 /* Enumerated or Notation type */
4480 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4481 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4482 return XML_ERROR_NO_MEMORY;
4483 parser->m_declAttributeType = parser->m_tempPool.start;
4484 poolFinish(&parser->m_tempPool);
4485 }
4486 *eventEndPP = s;
4487 parser->m_attlistDeclHandler(
4488 parser->m_handlerArg, parser->m_declElementType->name,
4489 parser->m_declAttributeId->name, parser->m_declAttributeType,
4490 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4491 poolClear(&parser->m_tempPool);
4492 handleDefault = XML_FALSE;
4493 }
4494 }
4495 break;
4496 case XML_ROLE_ENTITY_VALUE:
4497 if (dtd->keepProcessing) {
4498 enum XML_Error result = storeEntityValue(
4499 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
4500 if (parser->m_declEntity) {
4501 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4502 parser->m_declEntity->textLen
4503 = (int)(poolLength(&dtd->entityValuePool));
4504 poolFinish(&dtd->entityValuePool);
4505 if (parser->m_entityDeclHandler) {
4506 *eventEndPP = s;
4507 parser->m_entityDeclHandler(
4508 parser->m_handlerArg, parser->m_declEntity->name,
4509 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
4510 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
4511 handleDefault = XML_FALSE;
4512 }
4513 } else
4514 poolDiscard(&dtd->entityValuePool);
4515 if (result != XML_ERROR_NONE)
4516 return result;
4517 }
4518 break;
4519 case XML_ROLE_DOCTYPE_SYSTEM_ID:
4520 #ifdef XML_DTD
4521 parser->m_useForeignDTD = XML_FALSE;
4522 #endif /* XML_DTD */
4523 dtd->hasParamEntityRefs = XML_TRUE;
4524 if (parser->m_startDoctypeDeclHandler) {
4525 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
4526 s + enc->minBytesPerChar,
4527 next - enc->minBytesPerChar);
4528 if (parser->m_doctypeSysid == NULL)
4529 return XML_ERROR_NO_MEMORY;
4530 poolFinish(&parser->m_tempPool);
4531 handleDefault = XML_FALSE;
4532 }
4533 #ifdef XML_DTD
4534 else
4535 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4536 for the case where no parser->m_startDoctypeDeclHandler is set */
4537 parser->m_doctypeSysid = externalSubsetName;
4538 #endif /* XML_DTD */
4539 if (! dtd->standalone
4540 #ifdef XML_DTD
4541 && ! parser->m_paramEntityParsing
4542 #endif /* XML_DTD */
4543 && parser->m_notStandaloneHandler
4544 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4545 return XML_ERROR_NOT_STANDALONE;
4546 #ifndef XML_DTD
4547 break;
4548 #else /* XML_DTD */
4549 if (! parser->m_declEntity) {
4550 parser->m_declEntity = (ENTITY *)lookup(
4551 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4552 if (! parser->m_declEntity)
4553 return XML_ERROR_NO_MEMORY;
4554 parser->m_declEntity->publicId = NULL;
4555 }
4556 #endif /* XML_DTD */
4557 /* fall through */
4558 case XML_ROLE_ENTITY_SYSTEM_ID:
4559 if (dtd->keepProcessing && parser->m_declEntity) {
4560 parser->m_declEntity->systemId
4561 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4562 next - enc->minBytesPerChar);
4563 if (! parser->m_declEntity->systemId)
4564 return XML_ERROR_NO_MEMORY;
4565 parser->m_declEntity->base = parser->m_curBase;
4566 poolFinish(&dtd->pool);
4567 /* Don't suppress the default handler if we fell through from
4568 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4569 */
4570 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
4571 handleDefault = XML_FALSE;
4572 }
4573 break;
4574 case XML_ROLE_ENTITY_COMPLETE:
4575 if (dtd->keepProcessing && parser->m_declEntity
4576 && parser->m_entityDeclHandler) {
4577 *eventEndPP = s;
4578 parser->m_entityDeclHandler(
4579 parser->m_handlerArg, parser->m_declEntity->name,
4580 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
4581 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
4582 handleDefault = XML_FALSE;
4583 }
4584 break;
4585 case XML_ROLE_ENTITY_NOTATION_NAME:
4586 if (dtd->keepProcessing && parser->m_declEntity) {
4587 parser->m_declEntity->notation
4588 = poolStoreString(&dtd->pool, enc, s, next);
4589 if (! parser->m_declEntity->notation)
4590 return XML_ERROR_NO_MEMORY;
4591 poolFinish(&dtd->pool);
4592 if (parser->m_unparsedEntityDeclHandler) {
4593 *eventEndPP = s;
4594 parser->m_unparsedEntityDeclHandler(
4595 parser->m_handlerArg, parser->m_declEntity->name,
4596 parser->m_declEntity->base, parser->m_declEntity->systemId,
4597 parser->m_declEntity->publicId, parser->m_declEntity->notation);
4598 handleDefault = XML_FALSE;
4599 } else if (parser->m_entityDeclHandler) {
4600 *eventEndPP = s;
4601 parser->m_entityDeclHandler(
4602 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
4603 parser->m_declEntity->base, parser->m_declEntity->systemId,
4604 parser->m_declEntity->publicId, parser->m_declEntity->notation);
4605 handleDefault = XML_FALSE;
4606 }
4607 }
4608 break;
4609 case XML_ROLE_GENERAL_ENTITY_NAME: {
4610 if (XmlPredefinedEntityName(enc, s, next)) {
4611 parser->m_declEntity = NULL;
4612 break;
4613 }
4614 if (dtd->keepProcessing) {
4615 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4616 if (! name)
4617 return XML_ERROR_NO_MEMORY;
4618 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
4619 name, sizeof(ENTITY));
4620 if (! parser->m_declEntity)
4621 return XML_ERROR_NO_MEMORY;
4622 if (parser->m_declEntity->name != name) {
4623 poolDiscard(&dtd->pool);
4624 parser->m_declEntity = NULL;
4625 } else {
4626 poolFinish(&dtd->pool);
4627 parser->m_declEntity->publicId = NULL;
4628 parser->m_declEntity->is_param = XML_FALSE;
4629 /* if we have a parent parser or are reading an internal parameter
4630 entity, then the entity declaration is not considered "internal"
4631 */
4632 parser->m_declEntity->is_internal
4633 = ! (parser->m_parentParser || parser->m_openInternalEntities);
4634 if (parser->m_entityDeclHandler)
4635 handleDefault = XML_FALSE;
4636 }
4637 } else {
4638 poolDiscard(&dtd->pool);
4639 parser->m_declEntity = NULL;
4640 }
4641 } break;
4642 case XML_ROLE_PARAM_ENTITY_NAME:
4643 #ifdef XML_DTD
4644 if (dtd->keepProcessing) {
4645 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4646 if (! name)
4647 return XML_ERROR_NO_MEMORY;
4648 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4649 name, sizeof(ENTITY));
4650 if (! parser->m_declEntity)
4651 return XML_ERROR_NO_MEMORY;
4652 if (parser->m_declEntity->name != name) {
4653 poolDiscard(&dtd->pool);
4654 parser->m_declEntity = NULL;
4655 } else {
4656 poolFinish(&dtd->pool);
4657 parser->m_declEntity->publicId = NULL;
4658 parser->m_declEntity->is_param = XML_TRUE;
4659 /* if we have a parent parser or are reading an internal parameter
4660 entity, then the entity declaration is not considered "internal"
4661 */
4662 parser->m_declEntity->is_internal
4663 = ! (parser->m_parentParser || parser->m_openInternalEntities);
4664 if (parser->m_entityDeclHandler)
4665 handleDefault = XML_FALSE;
4666 }
4667 } else {
4668 poolDiscard(&dtd->pool);
4669 parser->m_declEntity = NULL;
4670 }
4671 #else /* not XML_DTD */
4672 parser->m_declEntity = NULL;
4673 #endif /* XML_DTD */
4674 break;
4675 case XML_ROLE_NOTATION_NAME:
4676 parser->m_declNotationPublicId = NULL;
4677 parser->m_declNotationName = NULL;
4678 if (parser->m_notationDeclHandler) {
4679 parser->m_declNotationName
4680 = poolStoreString(&parser->m_tempPool, enc, s, next);
4681 if (! parser->m_declNotationName)
4682 return XML_ERROR_NO_MEMORY;
4683 poolFinish(&parser->m_tempPool);
4684 handleDefault = XML_FALSE;
4685 }
4686 break;
4687 case XML_ROLE_NOTATION_PUBLIC_ID:
4688 if (! XmlIsPublicId(enc, s, next, eventPP))
4689 return XML_ERROR_PUBLICID;
4690 if (parser
4691 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4692 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
4693 s + enc->minBytesPerChar,
4694 next - enc->minBytesPerChar);
4695 if (! tem)
4696 return XML_ERROR_NO_MEMORY;
4697 normalizePublicId(tem);
4698 parser->m_declNotationPublicId = tem;
4699 poolFinish(&parser->m_tempPool);
4700 handleDefault = XML_FALSE;
4701 }
4702 break;
4703 case XML_ROLE_NOTATION_SYSTEM_ID:
4704 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
4705 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
4706 s + enc->minBytesPerChar,
4707 next - enc->minBytesPerChar);
4708 if (! systemId)
4709 return XML_ERROR_NO_MEMORY;
4710 *eventEndPP = s;
4711 parser->m_notationDeclHandler(
4712 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
4713 systemId, parser->m_declNotationPublicId);
4714 handleDefault = XML_FALSE;
4715 }
4716 poolClear(&parser->m_tempPool);
4717 break;
4718 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4719 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
4720 *eventEndPP = s;
4721 parser->m_notationDeclHandler(
4722 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
4723 0, parser->m_declNotationPublicId);
4724 handleDefault = XML_FALSE;
4725 }
4726 poolClear(&parser->m_tempPool);
4727 break;
4728 case XML_ROLE_ERROR:
4729 switch (tok) {
4730 case XML_TOK_PARAM_ENTITY_REF:
4731 /* PE references in internal subset are
4732 not allowed within declarations. */
4733 return XML_ERROR_PARAM_ENTITY_REF;
4734 case XML_TOK_XML_DECL:
4735 return XML_ERROR_MISPLACED_XML_PI;
4736 default:
4737 return XML_ERROR_SYNTAX;
4738 }
4739 #ifdef XML_DTD
4740 case XML_ROLE_IGNORE_SECT: {
4741 enum XML_Error result;
4742 if (parser->m_defaultHandler)
4743 reportDefault(parser, enc, s, next);
4744 handleDefault = XML_FALSE;
4745 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4746 if (result != XML_ERROR_NONE)
4747 return result;
4748 else if (! next) {
4749 parser->m_processor = ignoreSectionProcessor;
4750 return result;
4751 }
4752 } break;
4753 #endif /* XML_DTD */
4754 case XML_ROLE_GROUP_OPEN:
4755 if (parser->m_prologState.level >= parser->m_groupSize) {
4756 if (parser->m_groupSize) {
4757 {
4758 char *const new_connector = (char *)REALLOC(
4759 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
4760 if (new_connector == NULL) {
4761 parser->m_groupSize /= 2;
4762 return XML_ERROR_NO_MEMORY;
4763 }
4764 parser->m_groupConnector = new_connector;
4765 }
4766
4767 if (dtd->scaffIndex) {
4768 int *const new_scaff_index = (int *)REALLOC(
4769 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
4770 if (new_scaff_index == NULL)
4771 return XML_ERROR_NO_MEMORY;
4772 dtd->scaffIndex = new_scaff_index;
4773 }
4774 } else {
4775 parser->m_groupConnector
4776 = (char *)MALLOC(parser, parser->m_groupSize = 32);
4777 if (! parser->m_groupConnector) {
4778 parser->m_groupSize = 0;
4779 return XML_ERROR_NO_MEMORY;
4780 }
4781 }
4782 }
4783 parser->m_groupConnector[parser->m_prologState.level] = 0;
4784 if (dtd->in_eldecl) {
4785 int myindex = nextScaffoldPart(parser);
4786 if (myindex < 0)
4787 return XML_ERROR_NO_MEMORY;
4788 assert(dtd->scaffIndex != NULL);
4789 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4790 dtd->scaffLevel++;
4791 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4792 if (parser->m_elementDeclHandler)
4793 handleDefault = XML_FALSE;
4794 }
4795 break;
4796 case XML_ROLE_GROUP_SEQUENCE:
4797 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
4798 return XML_ERROR_SYNTAX;
4799 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
4800 if (dtd->in_eldecl && parser->m_elementDeclHandler)
4801 handleDefault = XML_FALSE;
4802 break;
4803 case XML_ROLE_GROUP_CHOICE:
4804 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
4805 return XML_ERROR_SYNTAX;
4806 if (dtd->in_eldecl
4807 && ! parser->m_groupConnector[parser->m_prologState.level]
4808 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4809 != XML_CTYPE_MIXED)) {
4810 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4811 = XML_CTYPE_CHOICE;
4812 if (parser->m_elementDeclHandler)
4813 handleDefault = XML_FALSE;
4814 }
4815 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
4816 break;
4817 case XML_ROLE_PARAM_ENTITY_REF:
4818 #ifdef XML_DTD
4819 case XML_ROLE_INNER_PARAM_ENTITY_REF:
4820 dtd->hasParamEntityRefs = XML_TRUE;
4821 if (! parser->m_paramEntityParsing)
4822 dtd->keepProcessing = dtd->standalone;
4823 else {
4824 const XML_Char *name;
4825 ENTITY *entity;
4826 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4827 next - enc->minBytesPerChar);
4828 if (! name)
4829 return XML_ERROR_NO_MEMORY;
4830 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
4831 poolDiscard(&dtd->pool);
4832 /* first, determine if a check for an existing declaration is needed;
4833 if yes, check that the entity exists, and that it is internal,
4834 otherwise call the skipped entity handler
4835 */
4836 if (parser->m_prologState.documentEntity
4837 && (dtd->standalone ? ! parser->m_openInternalEntities
4838 : ! dtd->hasParamEntityRefs)) {
4839 if (! entity)
4840 return XML_ERROR_UNDEFINED_ENTITY;
4841 else if (! entity->is_internal) {
4842 /* It's hard to exhaustively search the code to be sure,
4843 * but there doesn't seem to be a way of executing the
4844 * following line. There are two cases:
4845 *
4846 * If 'standalone' is false, the DTD must have no
4847 * parameter entities or we wouldn't have passed the outer
4848 * 'if' statement. That measn the only entity in the hash
4849 * table is the external subset name "#" which cannot be
4850 * given as a parameter entity name in XML syntax, so the
4851 * lookup must have returned NULL and we don't even reach
4852 * the test for an internal entity.
4853 *
4854 * If 'standalone' is true, it does not seem to be
4855 * possible to create entities taking this code path that
4856 * are not internal entities, so fail the test above.
4857 *
4858 * Because this analysis is very uncertain, the code is
4859 * being left in place and merely removed from the
4860 * coverage test statistics.
4861 */
4862 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
4863 }
4864 } else if (! entity) {
4865 dtd->keepProcessing = dtd->standalone;
4866 /* cannot report skipped entities in declarations */
4867 if ((role == XML_ROLE_PARAM_ENTITY_REF)
4868 && parser->m_skippedEntityHandler) {
4869 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
4870 handleDefault = XML_FALSE;
4871 }
4872 break;
4873 }
4874 if (entity->open)
4875 return XML_ERROR_RECURSIVE_ENTITY_REF;
4876 if (entity->textPtr) {
4877 enum XML_Error result;
4878 XML_Bool betweenDecl
4879 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
4880 result = processInternalEntity(parser, entity, betweenDecl);
4881 if (result != XML_ERROR_NONE)
4882 return result;
4883 handleDefault = XML_FALSE;
4884 break;
4885 }
4886 if (parser->m_externalEntityRefHandler) {
4887 dtd->paramEntityRead = XML_FALSE;
4888 entity->open = XML_TRUE;
4889 if (! parser->m_externalEntityRefHandler(
4890 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4891 entity->systemId, entity->publicId)) {
4892 entity->open = XML_FALSE;
4893 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4894 }
4895 entity->open = XML_FALSE;
4896 handleDefault = XML_FALSE;
4897 if (! dtd->paramEntityRead) {
4898 dtd->keepProcessing = dtd->standalone;
4899 break;
4900 }
4901 } else {
4902 dtd->keepProcessing = dtd->standalone;
4903 break;
4904 }
4905 }
4906 #endif /* XML_DTD */
4907 if (! dtd->standalone && parser->m_notStandaloneHandler
4908 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4909 return XML_ERROR_NOT_STANDALONE;
4910 break;
4911
4912 /* Element declaration stuff */
4913
4914 case XML_ROLE_ELEMENT_NAME:
4915 if (parser->m_elementDeclHandler) {
4916 parser->m_declElementType = getElementType(parser, enc, s, next);
4917 if (! parser->m_declElementType)
4918 return XML_ERROR_NO_MEMORY;
4919 dtd->scaffLevel = 0;
4920 dtd->scaffCount = 0;
4921 dtd->in_eldecl = XML_TRUE;
4922 handleDefault = XML_FALSE;
4923 }
4924 break;
4925
4926 case XML_ROLE_CONTENT_ANY:
4927 case XML_ROLE_CONTENT_EMPTY:
4928 if (dtd->in_eldecl) {
4929 if (parser->m_elementDeclHandler) {
4930 XML_Content *content
4931 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
4932 if (! content)
4933 return XML_ERROR_NO_MEMORY;
4934 content->quant = XML_CQUANT_NONE;
4935 content->name = NULL;
4936 content->numchildren = 0;
4937 content->children = NULL;
4938 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
4939 : XML_CTYPE_EMPTY);
4940 *eventEndPP = s;
4941 parser->m_elementDeclHandler(
4942 parser->m_handlerArg, parser->m_declElementType->name, content);
4943 handleDefault = XML_FALSE;
4944 }
4945 dtd->in_eldecl = XML_FALSE;
4946 }
4947 break;
4948
4949 case XML_ROLE_CONTENT_PCDATA:
4950 if (dtd->in_eldecl) {
4951 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4952 = XML_CTYPE_MIXED;
4953 if (parser->m_elementDeclHandler)
4954 handleDefault = XML_FALSE;
4955 }
4956 break;
4957
4958 case XML_ROLE_CONTENT_ELEMENT:
4959 quant = XML_CQUANT_NONE;
4960 goto elementContent;
4961 case XML_ROLE_CONTENT_ELEMENT_OPT:
4962 quant = XML_CQUANT_OPT;
4963 goto elementContent;
4964 case XML_ROLE_CONTENT_ELEMENT_REP:
4965 quant = XML_CQUANT_REP;
4966 goto elementContent;
4967 case XML_ROLE_CONTENT_ELEMENT_PLUS:
4968 quant = XML_CQUANT_PLUS;
4969 elementContent:
4970 if (dtd->in_eldecl) {
4971 ELEMENT_TYPE *el;
4972 const XML_Char *name;
4973 int nameLen;
4974 const char *nxt
4975 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
4976 int myindex = nextScaffoldPart(parser);
4977 if (myindex < 0)
4978 return XML_ERROR_NO_MEMORY;
4979 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
4980 dtd->scaffold[myindex].quant = quant;
4981 el = getElementType(parser, enc, s, nxt);
4982 if (! el)
4983 return XML_ERROR_NO_MEMORY;
4984 name = el->name;
4985 dtd->scaffold[myindex].name = name;
4986 nameLen = 0;
4987 for (; name[nameLen++];)
4988 ;
4989 dtd->contentStringLen += nameLen;
4990 if (parser->m_elementDeclHandler)
4991 handleDefault = XML_FALSE;
4992 }
4993 break;
4994
4995 case XML_ROLE_GROUP_CLOSE:
4996 quant = XML_CQUANT_NONE;
4997 goto closeGroup;
4998 case XML_ROLE_GROUP_CLOSE_OPT:
4999 quant = XML_CQUANT_OPT;
5000 goto closeGroup;
5001 case XML_ROLE_GROUP_CLOSE_REP:
5002 quant = XML_CQUANT_REP;
5003 goto closeGroup;
5004 case XML_ROLE_GROUP_CLOSE_PLUS:
5005 quant = XML_CQUANT_PLUS;
5006 closeGroup:
5007 if (dtd->in_eldecl) {
5008 if (parser->m_elementDeclHandler)
5009 handleDefault = XML_FALSE;
5010 dtd->scaffLevel--;
5011 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5012 if (dtd->scaffLevel == 0) {
5013 if (! handleDefault) {
5014 XML_Content *model = build_model(parser);
5015 if (! model)
5016 return XML_ERROR_NO_MEMORY;
5017 *eventEndPP = s;
5018 parser->m_elementDeclHandler(
5019 parser->m_handlerArg, parser->m_declElementType->name, model);
5020 }
5021 dtd->in_eldecl = XML_FALSE;
5022 dtd->contentStringLen = 0;
5023 }
5024 }
5025 break;
5026 /* End element declaration stuff */
5027
5028 case XML_ROLE_PI:
5029 if (! reportProcessingInstruction(parser, enc, s, next))
5030 return XML_ERROR_NO_MEMORY;
5031 handleDefault = XML_FALSE;
5032 break;
5033 case XML_ROLE_COMMENT:
5034 if (! reportComment(parser, enc, s, next))
5035 return XML_ERROR_NO_MEMORY;
5036 handleDefault = XML_FALSE;
5037 break;
5038 case XML_ROLE_NONE:
5039 switch (tok) {
5040 case XML_TOK_BOM:
5041 handleDefault = XML_FALSE;
5042 break;
5043 }
5044 break;
5045 case XML_ROLE_DOCTYPE_NONE:
5046 if (parser->m_startDoctypeDeclHandler)
5047 handleDefault = XML_FALSE;
5048 break;
5049 case XML_ROLE_ENTITY_NONE:
5050 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5051 handleDefault = XML_FALSE;
5052 break;
5053 case XML_ROLE_NOTATION_NONE:
5054 if (parser->m_notationDeclHandler)
5055 handleDefault = XML_FALSE;
5056 break;
5057 case XML_ROLE_ATTLIST_NONE:
5058 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5059 handleDefault = XML_FALSE;
5060 break;
5061 case XML_ROLE_ELEMENT_NONE:
5062 if (parser->m_elementDeclHandler)
5063 handleDefault = XML_FALSE;
5064 break;
5065 } /* end of big switch */
5066
5067 if (handleDefault && parser->m_defaultHandler)
5068 reportDefault(parser, enc, s, next);
5069
5070 switch (parser->m_parsingStatus.parsing) {
5071 case XML_SUSPENDED:
5072 *nextPtr = next;
5073 return XML_ERROR_NONE;
5074 case XML_FINISHED:
5075 return XML_ERROR_ABORTED;
5076 default:
5077 s = next;
5078 tok = XmlPrologTok(enc, s, end, &next);
5079 }
5080 }
5081 /* not reached */
5082 }
5083
5084 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5085 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5086 const char **nextPtr) {
5087 parser->m_processor = epilogProcessor;
5088 parser->m_eventPtr = s;
5089 for (;;) {
5090 const char *next = NULL;
5091 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5092 parser->m_eventEndPtr = next;
5093 switch (tok) {
5094 /* report partial linebreak - it might be the last token */
5095 case -XML_TOK_PROLOG_S:
5096 if (parser->m_defaultHandler) {
5097 reportDefault(parser, parser->m_encoding, s, next);
5098 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5099 return XML_ERROR_ABORTED;
5100 }
5101 *nextPtr = next;
5102 return XML_ERROR_NONE;
5103 case XML_TOK_NONE:
5104 *nextPtr = s;
5105 return XML_ERROR_NONE;
5106 case XML_TOK_PROLOG_S:
5107 if (parser->m_defaultHandler)
5108 reportDefault(parser, parser->m_encoding, s, next);
5109 break;
5110 case XML_TOK_PI:
5111 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5112 return XML_ERROR_NO_MEMORY;
5113 break;
5114 case XML_TOK_COMMENT:
5115 if (! reportComment(parser, parser->m_encoding, s, next))
5116 return XML_ERROR_NO_MEMORY;
5117 break;
5118 case XML_TOK_INVALID:
5119 parser->m_eventPtr = next;
5120 return XML_ERROR_INVALID_TOKEN;
5121 case XML_TOK_PARTIAL:
5122 if (! parser->m_parsingStatus.finalBuffer) {
5123 *nextPtr = s;
5124 return XML_ERROR_NONE;
5125 }
5126 return XML_ERROR_UNCLOSED_TOKEN;
5127 case XML_TOK_PARTIAL_CHAR:
5128 if (! parser->m_parsingStatus.finalBuffer) {
5129 *nextPtr = s;
5130 return XML_ERROR_NONE;
5131 }
5132 return XML_ERROR_PARTIAL_CHAR;
5133 default:
5134 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5135 }
5136 parser->m_eventPtr = s = next;
5137 switch (parser->m_parsingStatus.parsing) {
5138 case XML_SUSPENDED:
5139 *nextPtr = next;
5140 return XML_ERROR_NONE;
5141 case XML_FINISHED:
5142 return XML_ERROR_ABORTED;
5143 default:;
5144 }
5145 }
5146 }
5147
5148 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5149 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5150 const char *textStart, *textEnd;
5151 const char *next;
5152 enum XML_Error result;
5153 OPEN_INTERNAL_ENTITY *openEntity;
5154
5155 if (parser->m_freeInternalEntities) {
5156 openEntity = parser->m_freeInternalEntities;
5157 parser->m_freeInternalEntities = openEntity->next;
5158 } else {
5159 openEntity
5160 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5161 if (! openEntity)
5162 return XML_ERROR_NO_MEMORY;
5163 }
5164 entity->open = XML_TRUE;
5165 entity->processed = 0;
5166 openEntity->next = parser->m_openInternalEntities;
5167 parser->m_openInternalEntities = openEntity;
5168 openEntity->entity = entity;
5169 openEntity->startTagLevel = parser->m_tagLevel;
5170 openEntity->betweenDecl = betweenDecl;
5171 openEntity->internalEventPtr = NULL;
5172 openEntity->internalEventEndPtr = NULL;
5173 textStart = (char *)entity->textPtr;
5174 textEnd = (char *)(entity->textPtr + entity->textLen);
5175 /* Set a safe default value in case 'next' does not get set */
5176 next = textStart;
5177
5178 #ifdef XML_DTD
5179 if (entity->is_param) {
5180 int tok
5181 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5182 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5183 tok, next, &next, XML_FALSE, XML_FALSE);
5184 } else
5185 #endif /* XML_DTD */
5186 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5187 textStart, textEnd, &next, XML_FALSE);
5188
5189 if (result == XML_ERROR_NONE) {
5190 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5191 entity->processed = (int)(next - textStart);
5192 parser->m_processor = internalEntityProcessor;
5193 } else {
5194 entity->open = XML_FALSE;
5195 parser->m_openInternalEntities = openEntity->next;
5196 /* put openEntity back in list of free instances */
5197 openEntity->next = parser->m_freeInternalEntities;
5198 parser->m_freeInternalEntities = openEntity;
5199 }
5200 }
5201 return result;
5202 }
5203
5204 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5205 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5206 const char **nextPtr) {
5207 ENTITY *entity;
5208 const char *textStart, *textEnd;
5209 const char *next;
5210 enum XML_Error result;
5211 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5212 if (! openEntity)
5213 return XML_ERROR_UNEXPECTED_STATE;
5214
5215 entity = openEntity->entity;
5216 textStart = ((char *)entity->textPtr) + entity->processed;
5217 textEnd = (char *)(entity->textPtr + entity->textLen);
5218 /* Set a safe default value in case 'next' does not get set */
5219 next = textStart;
5220
5221 #ifdef XML_DTD
5222 if (entity->is_param) {
5223 int tok
5224 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5225 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5226 tok, next, &next, XML_FALSE, XML_TRUE);
5227 } else
5228 #endif /* XML_DTD */
5229 result = doContent(parser, openEntity->startTagLevel,
5230 parser->m_internalEncoding, textStart, textEnd, &next,
5231 XML_FALSE);
5232
5233 if (result != XML_ERROR_NONE)
5234 return result;
5235 else if (textEnd != next
5236 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5237 entity->processed = (int)(next - (char *)entity->textPtr);
5238 return result;
5239 } else {
5240 entity->open = XML_FALSE;
5241 parser->m_openInternalEntities = openEntity->next;
5242 /* put openEntity back in list of free instances */
5243 openEntity->next = parser->m_freeInternalEntities;
5244 parser->m_freeInternalEntities = openEntity;
5245 }
5246
5247 #ifdef XML_DTD
5248 if (entity->is_param) {
5249 int tok;
5250 parser->m_processor = prologProcessor;
5251 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5252 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5253 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
5254 } else
5255 #endif /* XML_DTD */
5256 {
5257 parser->m_processor = contentProcessor;
5258 /* see externalEntityContentProcessor vs contentProcessor */
5259 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding,
5260 s, end, nextPtr,
5261 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
5262 }
5263 }
5264
5265 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5266 errorProcessor(XML_Parser parser, const char *s, const char *end,
5267 const char **nextPtr) {
5268 UNUSED_P(s);
5269 UNUSED_P(end);
5270 UNUSED_P(nextPtr);
5271 return parser->m_errorCode;
5272 }
5273
5274 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool)5275 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5276 const char *ptr, const char *end, STRING_POOL *pool) {
5277 enum XML_Error result
5278 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
5279 if (result)
5280 return result;
5281 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5282 poolChop(pool);
5283 if (! poolAppendChar(pool, XML_T('\0')))
5284 return XML_ERROR_NO_MEMORY;
5285 return XML_ERROR_NONE;
5286 }
5287
5288 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool)5289 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5290 const char *ptr, const char *end, STRING_POOL *pool) {
5291 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5292 for (;;) {
5293 const char *next;
5294 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5295 switch (tok) {
5296 case XML_TOK_NONE:
5297 return XML_ERROR_NONE;
5298 case XML_TOK_INVALID:
5299 if (enc == parser->m_encoding)
5300 parser->m_eventPtr = next;
5301 return XML_ERROR_INVALID_TOKEN;
5302 case XML_TOK_PARTIAL:
5303 if (enc == parser->m_encoding)
5304 parser->m_eventPtr = ptr;
5305 return XML_ERROR_INVALID_TOKEN;
5306 case XML_TOK_CHAR_REF: {
5307 XML_Char buf[XML_ENCODE_MAX];
5308 int i;
5309 int n = XmlCharRefNumber(enc, ptr);
5310 if (n < 0) {
5311 if (enc == parser->m_encoding)
5312 parser->m_eventPtr = ptr;
5313 return XML_ERROR_BAD_CHAR_REF;
5314 }
5315 if (! isCdata && n == 0x20 /* space */
5316 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5317 break;
5318 n = XmlEncode(n, (ICHAR *)buf);
5319 /* The XmlEncode() functions can never return 0 here. That
5320 * error return happens if the code point passed in is either
5321 * negative or greater than or equal to 0x110000. The
5322 * XmlCharRefNumber() functions will all return a number
5323 * strictly less than 0x110000 or a negative value if an error
5324 * occurred. The negative value is intercepted above, so
5325 * XmlEncode() is never passed a value it might return an
5326 * error for.
5327 */
5328 for (i = 0; i < n; i++) {
5329 if (! poolAppendChar(pool, buf[i]))
5330 return XML_ERROR_NO_MEMORY;
5331 }
5332 } break;
5333 case XML_TOK_DATA_CHARS:
5334 if (! poolAppend(pool, enc, ptr, next))
5335 return XML_ERROR_NO_MEMORY;
5336 break;
5337 case XML_TOK_TRAILING_CR:
5338 next = ptr + enc->minBytesPerChar;
5339 /* fall through */
5340 case XML_TOK_ATTRIBUTE_VALUE_S:
5341 case XML_TOK_DATA_NEWLINE:
5342 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5343 break;
5344 if (! poolAppendChar(pool, 0x20))
5345 return XML_ERROR_NO_MEMORY;
5346 break;
5347 case XML_TOK_ENTITY_REF: {
5348 const XML_Char *name;
5349 ENTITY *entity;
5350 char checkEntityDecl;
5351 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
5352 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
5353 if (ch) {
5354 if (! poolAppendChar(pool, ch))
5355 return XML_ERROR_NO_MEMORY;
5356 break;
5357 }
5358 name = poolStoreString(&parser->m_temp2Pool, enc,
5359 ptr + enc->minBytesPerChar,
5360 next - enc->minBytesPerChar);
5361 if (! name)
5362 return XML_ERROR_NO_MEMORY;
5363 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5364 poolDiscard(&parser->m_temp2Pool);
5365 /* First, determine if a check for an existing declaration is needed;
5366 if yes, check that the entity exists, and that it is internal.
5367 */
5368 if (pool == &dtd->pool) /* are we called from prolog? */
5369 checkEntityDecl =
5370 #ifdef XML_DTD
5371 parser->m_prologState.documentEntity &&
5372 #endif /* XML_DTD */
5373 (dtd->standalone ? ! parser->m_openInternalEntities
5374 : ! dtd->hasParamEntityRefs);
5375 else /* if (pool == &parser->m_tempPool): we are called from content */
5376 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
5377 if (checkEntityDecl) {
5378 if (! entity)
5379 return XML_ERROR_UNDEFINED_ENTITY;
5380 else if (! entity->is_internal)
5381 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5382 } else if (! entity) {
5383 /* Cannot report skipped entity here - see comments on
5384 parser->m_skippedEntityHandler.
5385 if (parser->m_skippedEntityHandler)
5386 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5387 */
5388 /* Cannot call the default handler because this would be
5389 out of sync with the call to the startElementHandler.
5390 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5391 reportDefault(parser, enc, ptr, next);
5392 */
5393 break;
5394 }
5395 if (entity->open) {
5396 if (enc == parser->m_encoding) {
5397 /* It does not appear that this line can be executed.
5398 *
5399 * The "if (entity->open)" check catches recursive entity
5400 * definitions. In order to be called with an open
5401 * entity, it must have gone through this code before and
5402 * been through the recursive call to
5403 * appendAttributeValue() some lines below. That call
5404 * sets the local encoding ("enc") to the parser's
5405 * internal encoding (internal_utf8 or internal_utf16),
5406 * which can never be the same as the principle encoding.
5407 * It doesn't appear there is another code path that gets
5408 * here with entity->open being TRUE.
5409 *
5410 * Since it is not certain that this logic is watertight,
5411 * we keep the line and merely exclude it from coverage
5412 * tests.
5413 */
5414 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5415 }
5416 return XML_ERROR_RECURSIVE_ENTITY_REF;
5417 }
5418 if (entity->notation) {
5419 if (enc == parser->m_encoding)
5420 parser->m_eventPtr = ptr;
5421 return XML_ERROR_BINARY_ENTITY_REF;
5422 }
5423 if (! entity->textPtr) {
5424 if (enc == parser->m_encoding)
5425 parser->m_eventPtr = ptr;
5426 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5427 } else {
5428 enum XML_Error result;
5429 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5430 entity->open = XML_TRUE;
5431 result = appendAttributeValue(parser, parser->m_internalEncoding,
5432 isCdata, (char *)entity->textPtr,
5433 (char *)textEnd, pool);
5434 entity->open = XML_FALSE;
5435 if (result)
5436 return result;
5437 }
5438 } break;
5439 default:
5440 /* The only token returned by XmlAttributeValueTok() that does
5441 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5442 * Getting that would require an entity name to contain an
5443 * incomplete XML character (e.g. \xE2\x82); however previous
5444 * tokenisers will have already recognised and rejected such
5445 * names before XmlAttributeValueTok() gets a look-in. This
5446 * default case should be retained as a safety net, but the code
5447 * excluded from coverage tests.
5448 *
5449 * LCOV_EXCL_START
5450 */
5451 if (enc == parser->m_encoding)
5452 parser->m_eventPtr = ptr;
5453 return XML_ERROR_UNEXPECTED_STATE;
5454 /* LCOV_EXCL_STOP */
5455 }
5456 ptr = next;
5457 }
5458 /* not reached */
5459 }
5460
5461 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd)5462 storeEntityValue(XML_Parser parser, const ENCODING *enc,
5463 const char *entityTextPtr, const char *entityTextEnd) {
5464 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5465 STRING_POOL *pool = &(dtd->entityValuePool);
5466 enum XML_Error result = XML_ERROR_NONE;
5467 #ifdef XML_DTD
5468 int oldInEntityValue = parser->m_prologState.inEntityValue;
5469 parser->m_prologState.inEntityValue = 1;
5470 #endif /* XML_DTD */
5471 /* never return Null for the value argument in EntityDeclHandler,
5472 since this would indicate an external entity; therefore we
5473 have to make sure that entityValuePool.start is not null */
5474 if (! pool->blocks) {
5475 if (! poolGrow(pool))
5476 return XML_ERROR_NO_MEMORY;
5477 }
5478
5479 for (;;) {
5480 const char *next;
5481 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5482 switch (tok) {
5483 case XML_TOK_PARAM_ENTITY_REF:
5484 #ifdef XML_DTD
5485 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5486 const XML_Char *name;
5487 ENTITY *entity;
5488 name = poolStoreString(&parser->m_tempPool, enc,
5489 entityTextPtr + enc->minBytesPerChar,
5490 next - enc->minBytesPerChar);
5491 if (! name) {
5492 result = XML_ERROR_NO_MEMORY;
5493 goto endEntityValue;
5494 }
5495 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5496 poolDiscard(&parser->m_tempPool);
5497 if (! entity) {
5498 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5499 /* cannot report skipped entity here - see comments on
5500 parser->m_skippedEntityHandler
5501 if (parser->m_skippedEntityHandler)
5502 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5503 */
5504 dtd->keepProcessing = dtd->standalone;
5505 goto endEntityValue;
5506 }
5507 if (entity->open) {
5508 if (enc == parser->m_encoding)
5509 parser->m_eventPtr = entityTextPtr;
5510 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5511 goto endEntityValue;
5512 }
5513 if (entity->systemId) {
5514 if (parser->m_externalEntityRefHandler) {
5515 dtd->paramEntityRead = XML_FALSE;
5516 entity->open = XML_TRUE;
5517 if (! parser->m_externalEntityRefHandler(
5518 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5519 entity->systemId, entity->publicId)) {
5520 entity->open = XML_FALSE;
5521 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5522 goto endEntityValue;
5523 }
5524 entity->open = XML_FALSE;
5525 if (! dtd->paramEntityRead)
5526 dtd->keepProcessing = dtd->standalone;
5527 } else
5528 dtd->keepProcessing = dtd->standalone;
5529 } else {
5530 entity->open = XML_TRUE;
5531 result = storeEntityValue(
5532 parser, parser->m_internalEncoding, (char *)entity->textPtr,
5533 (char *)(entity->textPtr + entity->textLen));
5534 entity->open = XML_FALSE;
5535 if (result)
5536 goto endEntityValue;
5537 }
5538 break;
5539 }
5540 #endif /* XML_DTD */
5541 /* In the internal subset, PE references are not legal
5542 within markup declarations, e.g entity values in this case. */
5543 parser->m_eventPtr = entityTextPtr;
5544 result = XML_ERROR_PARAM_ENTITY_REF;
5545 goto endEntityValue;
5546 case XML_TOK_NONE:
5547 result = XML_ERROR_NONE;
5548 goto endEntityValue;
5549 case XML_TOK_ENTITY_REF:
5550 case XML_TOK_DATA_CHARS:
5551 if (! poolAppend(pool, enc, entityTextPtr, next)) {
5552 result = XML_ERROR_NO_MEMORY;
5553 goto endEntityValue;
5554 }
5555 break;
5556 case XML_TOK_TRAILING_CR:
5557 next = entityTextPtr + enc->minBytesPerChar;
5558 /* fall through */
5559 case XML_TOK_DATA_NEWLINE:
5560 if (pool->end == pool->ptr && ! poolGrow(pool)) {
5561 result = XML_ERROR_NO_MEMORY;
5562 goto endEntityValue;
5563 }
5564 *(pool->ptr)++ = 0xA;
5565 break;
5566 case XML_TOK_CHAR_REF: {
5567 XML_Char buf[XML_ENCODE_MAX];
5568 int i;
5569 int n = XmlCharRefNumber(enc, entityTextPtr);
5570 if (n < 0) {
5571 if (enc == parser->m_encoding)
5572 parser->m_eventPtr = entityTextPtr;
5573 result = XML_ERROR_BAD_CHAR_REF;
5574 goto endEntityValue;
5575 }
5576 n = XmlEncode(n, (ICHAR *)buf);
5577 /* The XmlEncode() functions can never return 0 here. That
5578 * error return happens if the code point passed in is either
5579 * negative or greater than or equal to 0x110000. The
5580 * XmlCharRefNumber() functions will all return a number
5581 * strictly less than 0x110000 or a negative value if an error
5582 * occurred. The negative value is intercepted above, so
5583 * XmlEncode() is never passed a value it might return an
5584 * error for.
5585 */
5586 for (i = 0; i < n; i++) {
5587 if (pool->end == pool->ptr && ! poolGrow(pool)) {
5588 result = XML_ERROR_NO_MEMORY;
5589 goto endEntityValue;
5590 }
5591 *(pool->ptr)++ = buf[i];
5592 }
5593 } break;
5594 case XML_TOK_PARTIAL:
5595 if (enc == parser->m_encoding)
5596 parser->m_eventPtr = entityTextPtr;
5597 result = XML_ERROR_INVALID_TOKEN;
5598 goto endEntityValue;
5599 case XML_TOK_INVALID:
5600 if (enc == parser->m_encoding)
5601 parser->m_eventPtr = next;
5602 result = XML_ERROR_INVALID_TOKEN;
5603 goto endEntityValue;
5604 default:
5605 /* This default case should be unnecessary -- all the tokens
5606 * that XmlEntityValueTok() can return have their own explicit
5607 * cases -- but should be retained for safety. We do however
5608 * exclude it from the coverage statistics.
5609 *
5610 * LCOV_EXCL_START
5611 */
5612 if (enc == parser->m_encoding)
5613 parser->m_eventPtr = entityTextPtr;
5614 result = XML_ERROR_UNEXPECTED_STATE;
5615 goto endEntityValue;
5616 /* LCOV_EXCL_STOP */
5617 }
5618 entityTextPtr = next;
5619 }
5620 endEntityValue:
5621 #ifdef XML_DTD
5622 parser->m_prologState.inEntityValue = oldInEntityValue;
5623 #endif /* XML_DTD */
5624 return result;
5625 }
5626
5627 static void FASTCALL
normalizeLines(XML_Char * s)5628 normalizeLines(XML_Char *s) {
5629 XML_Char *p;
5630 for (;; s++) {
5631 if (*s == XML_T('\0'))
5632 return;
5633 if (*s == 0xD)
5634 break;
5635 }
5636 p = s;
5637 do {
5638 if (*s == 0xD) {
5639 *p++ = 0xA;
5640 if (*++s == 0xA)
5641 s++;
5642 } else
5643 *p++ = *s++;
5644 } while (*s);
5645 *p = XML_T('\0');
5646 }
5647
5648 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)5649 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5650 const char *start, const char *end) {
5651 const XML_Char *target;
5652 XML_Char *data;
5653 const char *tem;
5654 if (! parser->m_processingInstructionHandler) {
5655 if (parser->m_defaultHandler)
5656 reportDefault(parser, enc, start, end);
5657 return 1;
5658 }
5659 start += enc->minBytesPerChar * 2;
5660 tem = start + XmlNameLength(enc, start);
5661 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
5662 if (! target)
5663 return 0;
5664 poolFinish(&parser->m_tempPool);
5665 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
5666 end - enc->minBytesPerChar * 2);
5667 if (! data)
5668 return 0;
5669 normalizeLines(data);
5670 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
5671 poolClear(&parser->m_tempPool);
5672 return 1;
5673 }
5674
5675 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)5676 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
5677 const char *end) {
5678 XML_Char *data;
5679 if (! parser->m_commentHandler) {
5680 if (parser->m_defaultHandler)
5681 reportDefault(parser, enc, start, end);
5682 return 1;
5683 }
5684 data = poolStoreString(&parser->m_tempPool, enc,
5685 start + enc->minBytesPerChar * 4,
5686 end - enc->minBytesPerChar * 3);
5687 if (! data)
5688 return 0;
5689 normalizeLines(data);
5690 parser->m_commentHandler(parser->m_handlerArg, data);
5691 poolClear(&parser->m_tempPool);
5692 return 1;
5693 }
5694
5695 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)5696 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
5697 const char *end) {
5698 if (MUST_CONVERT(enc, s)) {
5699 enum XML_Convert_Result convert_res;
5700 const char **eventPP;
5701 const char **eventEndPP;
5702 if (enc == parser->m_encoding) {
5703 eventPP = &parser->m_eventPtr;
5704 eventEndPP = &parser->m_eventEndPtr;
5705 } else {
5706 /* To get here, two things must be true; the parser must be
5707 * using a character encoding that is not the same as the
5708 * encoding passed in, and the encoding passed in must need
5709 * conversion to the internal format (UTF-8 unless XML_UNICODE
5710 * is defined). The only occasions on which the encoding passed
5711 * in is not the same as the parser's encoding are when it is
5712 * the internal encoding (e.g. a previously defined parameter
5713 * entity, already converted to internal format). This by
5714 * definition doesn't need conversion, so the whole branch never
5715 * gets executed.
5716 *
5717 * For safety's sake we don't delete these lines and merely
5718 * exclude them from coverage statistics.
5719 *
5720 * LCOV_EXCL_START
5721 */
5722 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5723 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5724 /* LCOV_EXCL_STOP */
5725 }
5726 do {
5727 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
5728 convert_res
5729 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
5730 *eventEndPP = s;
5731 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
5732 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
5733 *eventPP = s;
5734 } while ((convert_res != XML_CONVERT_COMPLETED)
5735 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
5736 } else
5737 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
5738 (int)((XML_Char *)end - (XML_Char *)s));
5739 }
5740
5741 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)5742 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5743 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
5744 DEFAULT_ATTRIBUTE *att;
5745 if (value || isId) {
5746 /* The handling of default attributes gets messed up if we have
5747 a default which duplicates a non-default. */
5748 int i;
5749 for (i = 0; i < type->nDefaultAtts; i++)
5750 if (attId == type->defaultAtts[i].id)
5751 return 1;
5752 if (isId && ! type->idAtt && ! attId->xmlns)
5753 type->idAtt = attId;
5754 }
5755 if (type->nDefaultAtts == type->allocDefaultAtts) {
5756 if (type->allocDefaultAtts == 0) {
5757 type->allocDefaultAtts = 8;
5758 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
5759 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
5760 if (! type->defaultAtts) {
5761 type->allocDefaultAtts = 0;
5762 return 0;
5763 }
5764 } else {
5765 DEFAULT_ATTRIBUTE *temp;
5766 int count = type->allocDefaultAtts * 2;
5767 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
5768 (count * sizeof(DEFAULT_ATTRIBUTE)));
5769 if (temp == NULL)
5770 return 0;
5771 type->allocDefaultAtts = count;
5772 type->defaultAtts = temp;
5773 }
5774 }
5775 att = type->defaultAtts + type->nDefaultAtts;
5776 att->id = attId;
5777 att->value = value;
5778 att->isCdata = isCdata;
5779 if (! isCdata)
5780 attId->maybeTokenized = XML_TRUE;
5781 type->nDefaultAtts += 1;
5782 return 1;
5783 }
5784
5785 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)5786 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
5787 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5788 const XML_Char *name;
5789 for (name = elementType->name; *name; name++) {
5790 if (*name == XML_T(ASCII_COLON)) {
5791 PREFIX *prefix;
5792 const XML_Char *s;
5793 for (s = elementType->name; s != name; s++) {
5794 if (! poolAppendChar(&dtd->pool, *s))
5795 return 0;
5796 }
5797 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
5798 return 0;
5799 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
5800 sizeof(PREFIX));
5801 if (! prefix)
5802 return 0;
5803 if (prefix->name == poolStart(&dtd->pool))
5804 poolFinish(&dtd->pool);
5805 else
5806 poolDiscard(&dtd->pool);
5807 elementType->prefix = prefix;
5808 break;
5809 }
5810 }
5811 return 1;
5812 }
5813
5814 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)5815 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
5816 const char *end) {
5817 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5818 ATTRIBUTE_ID *id;
5819 const XML_Char *name;
5820 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
5821 return NULL;
5822 name = poolStoreString(&dtd->pool, enc, start, end);
5823 if (! name)
5824 return NULL;
5825 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
5826 ++name;
5827 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
5828 sizeof(ATTRIBUTE_ID));
5829 if (! id)
5830 return NULL;
5831 if (id->name != name)
5832 poolDiscard(&dtd->pool);
5833 else {
5834 poolFinish(&dtd->pool);
5835 if (! parser->m_ns)
5836 ;
5837 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
5838 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
5839 && name[4] == XML_T(ASCII_s)
5840 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
5841 if (name[5] == XML_T('\0'))
5842 id->prefix = &dtd->defaultPrefix;
5843 else
5844 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
5845 sizeof(PREFIX));
5846 id->xmlns = XML_TRUE;
5847 } else {
5848 int i;
5849 for (i = 0; name[i]; i++) {
5850 /* attributes without prefix are *not* in the default namespace */
5851 if (name[i] == XML_T(ASCII_COLON)) {
5852 int j;
5853 for (j = 0; j < i; j++) {
5854 if (! poolAppendChar(&dtd->pool, name[j]))
5855 return NULL;
5856 }
5857 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
5858 return NULL;
5859 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
5860 poolStart(&dtd->pool), sizeof(PREFIX));
5861 if (! id->prefix)
5862 return NULL;
5863 if (id->prefix->name == poolStart(&dtd->pool))
5864 poolFinish(&dtd->pool);
5865 else
5866 poolDiscard(&dtd->pool);
5867 break;
5868 }
5869 }
5870 }
5871 }
5872 return id;
5873 }
5874
5875 #define CONTEXT_SEP XML_T(ASCII_FF)
5876
5877 static const XML_Char *
getContext(XML_Parser parser)5878 getContext(XML_Parser parser) {
5879 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5880 HASH_TABLE_ITER iter;
5881 XML_Bool needSep = XML_FALSE;
5882
5883 if (dtd->defaultPrefix.binding) {
5884 int i;
5885 int len;
5886 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
5887 return NULL;
5888 len = dtd->defaultPrefix.binding->uriLen;
5889 if (parser->m_namespaceSeparator)
5890 len--;
5891 for (i = 0; i < len; i++) {
5892 if (! poolAppendChar(&parser->m_tempPool,
5893 dtd->defaultPrefix.binding->uri[i])) {
5894 /* Because of memory caching, I don't believe this line can be
5895 * executed.
5896 *
5897 * This is part of a loop copying the default prefix binding
5898 * URI into the parser's temporary string pool. Previously,
5899 * that URI was copied into the same string pool, with a
5900 * terminating NUL character, as part of setContext(). When
5901 * the pool was cleared, that leaves a block definitely big
5902 * enough to hold the URI on the free block list of the pool.
5903 * The URI copy in getContext() therefore cannot run out of
5904 * memory.
5905 *
5906 * If the pool is used between the setContext() and
5907 * getContext() calls, the worst it can do is leave a bigger
5908 * block on the front of the free list. Given that this is
5909 * all somewhat inobvious and program logic can be changed, we
5910 * don't delete the line but we do exclude it from the test
5911 * coverage statistics.
5912 */
5913 return NULL; /* LCOV_EXCL_LINE */
5914 }
5915 }
5916 needSep = XML_TRUE;
5917 }
5918
5919 hashTableIterInit(&iter, &(dtd->prefixes));
5920 for (;;) {
5921 int i;
5922 int len;
5923 const XML_Char *s;
5924 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
5925 if (! prefix)
5926 break;
5927 if (! prefix->binding) {
5928 /* This test appears to be (justifiable) paranoia. There does
5929 * not seem to be a way of injecting a prefix without a binding
5930 * that doesn't get errored long before this function is called.
5931 * The test should remain for safety's sake, so we instead
5932 * exclude the following line from the coverage statistics.
5933 */
5934 continue; /* LCOV_EXCL_LINE */
5935 }
5936 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
5937 return NULL;
5938 for (s = prefix->name; *s; s++)
5939 if (! poolAppendChar(&parser->m_tempPool, *s))
5940 return NULL;
5941 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
5942 return NULL;
5943 len = prefix->binding->uriLen;
5944 if (parser->m_namespaceSeparator)
5945 len--;
5946 for (i = 0; i < len; i++)
5947 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
5948 return NULL;
5949 needSep = XML_TRUE;
5950 }
5951
5952 hashTableIterInit(&iter, &(dtd->generalEntities));
5953 for (;;) {
5954 const XML_Char *s;
5955 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
5956 if (! e)
5957 break;
5958 if (! e->open)
5959 continue;
5960 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
5961 return NULL;
5962 for (s = e->name; *s; s++)
5963 if (! poolAppendChar(&parser->m_tempPool, *s))
5964 return 0;
5965 needSep = XML_TRUE;
5966 }
5967
5968 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5969 return NULL;
5970 return parser->m_tempPool.start;
5971 }
5972
5973 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)5974 setContext(XML_Parser parser, const XML_Char *context) {
5975 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5976 const XML_Char *s = context;
5977
5978 while (*context != XML_T('\0')) {
5979 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
5980 ENTITY *e;
5981 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5982 return XML_FALSE;
5983 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
5984 poolStart(&parser->m_tempPool), 0);
5985 if (e)
5986 e->open = XML_TRUE;
5987 if (*s != XML_T('\0'))
5988 s++;
5989 context = s;
5990 poolDiscard(&parser->m_tempPool);
5991 } else if (*s == XML_T(ASCII_EQUALS)) {
5992 PREFIX *prefix;
5993 if (poolLength(&parser->m_tempPool) == 0)
5994 prefix = &dtd->defaultPrefix;
5995 else {
5996 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5997 return XML_FALSE;
5998 prefix
5999 = (PREFIX *)lookup(parser, &dtd->prefixes,
6000 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6001 if (! prefix)
6002 return XML_FALSE;
6003 if (prefix->name == poolStart(&parser->m_tempPool)) {
6004 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6005 if (! prefix->name)
6006 return XML_FALSE;
6007 }
6008 poolDiscard(&parser->m_tempPool);
6009 }
6010 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6011 context++)
6012 if (! poolAppendChar(&parser->m_tempPool, *context))
6013 return XML_FALSE;
6014 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6015 return XML_FALSE;
6016 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6017 &parser->m_inheritedBindings)
6018 != XML_ERROR_NONE)
6019 return XML_FALSE;
6020 poolDiscard(&parser->m_tempPool);
6021 if (*context != XML_T('\0'))
6022 ++context;
6023 s = context;
6024 } else {
6025 if (! poolAppendChar(&parser->m_tempPool, *s))
6026 return XML_FALSE;
6027 s++;
6028 }
6029 }
6030 return XML_TRUE;
6031 }
6032
6033 static void FASTCALL
normalizePublicId(XML_Char * publicId)6034 normalizePublicId(XML_Char *publicId) {
6035 XML_Char *p = publicId;
6036 XML_Char *s;
6037 for (s = publicId; *s; s++) {
6038 switch (*s) {
6039 case 0x20:
6040 case 0xD:
6041 case 0xA:
6042 if (p != publicId && p[-1] != 0x20)
6043 *p++ = 0x20;
6044 break;
6045 default:
6046 *p++ = *s;
6047 }
6048 }
6049 if (p != publicId && p[-1] == 0x20)
6050 --p;
6051 *p = XML_T('\0');
6052 }
6053
6054 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6055 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6056 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6057 if (p == NULL)
6058 return p;
6059 poolInit(&(p->pool), ms);
6060 poolInit(&(p->entityValuePool), ms);
6061 hashTableInit(&(p->generalEntities), ms);
6062 hashTableInit(&(p->elementTypes), ms);
6063 hashTableInit(&(p->attributeIds), ms);
6064 hashTableInit(&(p->prefixes), ms);
6065 #ifdef XML_DTD
6066 p->paramEntityRead = XML_FALSE;
6067 hashTableInit(&(p->paramEntities), ms);
6068 #endif /* XML_DTD */
6069 p->defaultPrefix.name = NULL;
6070 p->defaultPrefix.binding = NULL;
6071
6072 p->in_eldecl = XML_FALSE;
6073 p->scaffIndex = NULL;
6074 p->scaffold = NULL;
6075 p->scaffLevel = 0;
6076 p->scaffSize = 0;
6077 p->scaffCount = 0;
6078 p->contentStringLen = 0;
6079
6080 p->keepProcessing = XML_TRUE;
6081 p->hasParamEntityRefs = XML_FALSE;
6082 p->standalone = XML_FALSE;
6083 return p;
6084 }
6085
6086 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6087 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6088 HASH_TABLE_ITER iter;
6089 hashTableIterInit(&iter, &(p->elementTypes));
6090 for (;;) {
6091 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6092 if (! e)
6093 break;
6094 if (e->allocDefaultAtts != 0)
6095 ms->free_fcn(e->defaultAtts);
6096 }
6097 hashTableClear(&(p->generalEntities));
6098 #ifdef XML_DTD
6099 p->paramEntityRead = XML_FALSE;
6100 hashTableClear(&(p->paramEntities));
6101 #endif /* XML_DTD */
6102 hashTableClear(&(p->elementTypes));
6103 hashTableClear(&(p->attributeIds));
6104 hashTableClear(&(p->prefixes));
6105 poolClear(&(p->pool));
6106 poolClear(&(p->entityValuePool));
6107 p->defaultPrefix.name = NULL;
6108 p->defaultPrefix.binding = NULL;
6109
6110 p->in_eldecl = XML_FALSE;
6111
6112 ms->free_fcn(p->scaffIndex);
6113 p->scaffIndex = NULL;
6114 ms->free_fcn(p->scaffold);
6115 p->scaffold = NULL;
6116
6117 p->scaffLevel = 0;
6118 p->scaffSize = 0;
6119 p->scaffCount = 0;
6120 p->contentStringLen = 0;
6121
6122 p->keepProcessing = XML_TRUE;
6123 p->hasParamEntityRefs = XML_FALSE;
6124 p->standalone = XML_FALSE;
6125 }
6126
6127 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6128 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6129 HASH_TABLE_ITER iter;
6130 hashTableIterInit(&iter, &(p->elementTypes));
6131 for (;;) {
6132 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6133 if (! e)
6134 break;
6135 if (e->allocDefaultAtts != 0)
6136 ms->free_fcn(e->defaultAtts);
6137 }
6138 hashTableDestroy(&(p->generalEntities));
6139 #ifdef XML_DTD
6140 hashTableDestroy(&(p->paramEntities));
6141 #endif /* XML_DTD */
6142 hashTableDestroy(&(p->elementTypes));
6143 hashTableDestroy(&(p->attributeIds));
6144 hashTableDestroy(&(p->prefixes));
6145 poolDestroy(&(p->pool));
6146 poolDestroy(&(p->entityValuePool));
6147 if (isDocEntity) {
6148 ms->free_fcn(p->scaffIndex);
6149 ms->free_fcn(p->scaffold);
6150 }
6151 ms->free_fcn(p);
6152 }
6153
6154 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6155 The new DTD has already been initialized.
6156 */
6157 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6158 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6159 const XML_Memory_Handling_Suite *ms) {
6160 HASH_TABLE_ITER iter;
6161
6162 /* Copy the prefix table. */
6163
6164 hashTableIterInit(&iter, &(oldDtd->prefixes));
6165 for (;;) {
6166 const XML_Char *name;
6167 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6168 if (! oldP)
6169 break;
6170 name = poolCopyString(&(newDtd->pool), oldP->name);
6171 if (! name)
6172 return 0;
6173 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6174 return 0;
6175 }
6176
6177 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6178
6179 /* Copy the attribute id table. */
6180
6181 for (;;) {
6182 ATTRIBUTE_ID *newA;
6183 const XML_Char *name;
6184 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6185
6186 if (! oldA)
6187 break;
6188 /* Remember to allocate the scratch byte before the name. */
6189 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6190 return 0;
6191 name = poolCopyString(&(newDtd->pool), oldA->name);
6192 if (! name)
6193 return 0;
6194 ++name;
6195 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6196 sizeof(ATTRIBUTE_ID));
6197 if (! newA)
6198 return 0;
6199 newA->maybeTokenized = oldA->maybeTokenized;
6200 if (oldA->prefix) {
6201 newA->xmlns = oldA->xmlns;
6202 if (oldA->prefix == &oldDtd->defaultPrefix)
6203 newA->prefix = &newDtd->defaultPrefix;
6204 else
6205 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6206 oldA->prefix->name, 0);
6207 }
6208 }
6209
6210 /* Copy the element type table. */
6211
6212 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6213
6214 for (;;) {
6215 int i;
6216 ELEMENT_TYPE *newE;
6217 const XML_Char *name;
6218 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6219 if (! oldE)
6220 break;
6221 name = poolCopyString(&(newDtd->pool), oldE->name);
6222 if (! name)
6223 return 0;
6224 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6225 sizeof(ELEMENT_TYPE));
6226 if (! newE)
6227 return 0;
6228 if (oldE->nDefaultAtts) {
6229 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn(
6230 oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6231 if (! newE->defaultAtts) {
6232 return 0;
6233 }
6234 }
6235 if (oldE->idAtt)
6236 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
6237 oldE->idAtt->name, 0);
6238 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6239 if (oldE->prefix)
6240 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6241 oldE->prefix->name, 0);
6242 for (i = 0; i < newE->nDefaultAtts; i++) {
6243 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
6244 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6245 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6246 if (oldE->defaultAtts[i].value) {
6247 newE->defaultAtts[i].value
6248 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6249 if (! newE->defaultAtts[i].value)
6250 return 0;
6251 } else
6252 newE->defaultAtts[i].value = NULL;
6253 }
6254 }
6255
6256 /* Copy the entity tables. */
6257 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
6258 &(oldDtd->generalEntities)))
6259 return 0;
6260
6261 #ifdef XML_DTD
6262 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
6263 &(oldDtd->paramEntities)))
6264 return 0;
6265 newDtd->paramEntityRead = oldDtd->paramEntityRead;
6266 #endif /* XML_DTD */
6267
6268 newDtd->keepProcessing = oldDtd->keepProcessing;
6269 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6270 newDtd->standalone = oldDtd->standalone;
6271
6272 /* Don't want deep copying for scaffolding */
6273 newDtd->in_eldecl = oldDtd->in_eldecl;
6274 newDtd->scaffold = oldDtd->scaffold;
6275 newDtd->contentStringLen = oldDtd->contentStringLen;
6276 newDtd->scaffSize = oldDtd->scaffSize;
6277 newDtd->scaffLevel = oldDtd->scaffLevel;
6278 newDtd->scaffIndex = oldDtd->scaffIndex;
6279
6280 return 1;
6281 } /* End dtdCopy */
6282
6283 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)6284 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
6285 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
6286 HASH_TABLE_ITER iter;
6287 const XML_Char *cachedOldBase = NULL;
6288 const XML_Char *cachedNewBase = NULL;
6289
6290 hashTableIterInit(&iter, oldTable);
6291
6292 for (;;) {
6293 ENTITY *newE;
6294 const XML_Char *name;
6295 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6296 if (! oldE)
6297 break;
6298 name = poolCopyString(newPool, oldE->name);
6299 if (! name)
6300 return 0;
6301 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6302 if (! newE)
6303 return 0;
6304 if (oldE->systemId) {
6305 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6306 if (! tem)
6307 return 0;
6308 newE->systemId = tem;
6309 if (oldE->base) {
6310 if (oldE->base == cachedOldBase)
6311 newE->base = cachedNewBase;
6312 else {
6313 cachedOldBase = oldE->base;
6314 tem = poolCopyString(newPool, cachedOldBase);
6315 if (! tem)
6316 return 0;
6317 cachedNewBase = newE->base = tem;
6318 }
6319 }
6320 if (oldE->publicId) {
6321 tem = poolCopyString(newPool, oldE->publicId);
6322 if (! tem)
6323 return 0;
6324 newE->publicId = tem;
6325 }
6326 } else {
6327 const XML_Char *tem
6328 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
6329 if (! tem)
6330 return 0;
6331 newE->textPtr = tem;
6332 newE->textLen = oldE->textLen;
6333 }
6334 if (oldE->notation) {
6335 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6336 if (! tem)
6337 return 0;
6338 newE->notation = tem;
6339 }
6340 newE->is_param = oldE->is_param;
6341 newE->is_internal = oldE->is_internal;
6342 }
6343 return 1;
6344 }
6345
6346 #define INIT_POWER 6
6347
6348 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)6349 keyeq(KEY s1, KEY s2) {
6350 for (; *s1 == *s2; s1++, s2++)
6351 if (*s1 == 0)
6352 return XML_TRUE;
6353 return XML_FALSE;
6354 }
6355
6356 static size_t
keylen(KEY s)6357 keylen(KEY s) {
6358 size_t len = 0;
6359 for (; *s; s++, len++)
6360 ;
6361 return len;
6362 }
6363
6364 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)6365 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
6366 key->k[0] = 0;
6367 key->k[1] = get_hash_secret_salt(parser);
6368 }
6369
6370 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)6371 hash(XML_Parser parser, KEY s) {
6372 struct siphash state;
6373 struct sipkey key;
6374 (void)sip24_valid;
6375 copy_salt_to_sipkey(parser, &key);
6376 sip24_init(&state, &key);
6377 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6378 return (unsigned long)sip24_final(&state);
6379 }
6380
6381 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)6382 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
6383 size_t i;
6384 if (table->size == 0) {
6385 size_t tsize;
6386 if (! createSize)
6387 return NULL;
6388 table->power = INIT_POWER;
6389 /* table->size is a power of 2 */
6390 table->size = (size_t)1 << INIT_POWER;
6391 tsize = table->size * sizeof(NAMED *);
6392 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
6393 if (! table->v) {
6394 table->size = 0;
6395 return NULL;
6396 }
6397 memset(table->v, 0, tsize);
6398 i = hash(parser, name) & ((unsigned long)table->size - 1);
6399 } else {
6400 unsigned long h = hash(parser, name);
6401 unsigned long mask = (unsigned long)table->size - 1;
6402 unsigned char step = 0;
6403 i = h & mask;
6404 while (table->v[i]) {
6405 if (keyeq(name, table->v[i]->name))
6406 return table->v[i];
6407 if (! step)
6408 step = PROBE_STEP(h, mask, table->power);
6409 i < step ? (i += table->size - step) : (i -= step);
6410 }
6411 if (! createSize)
6412 return NULL;
6413
6414 /* check for overflow (table is half full) */
6415 if (table->used >> (table->power - 1)) {
6416 unsigned char newPower = table->power + 1;
6417 size_t newSize = (size_t)1 << newPower;
6418 unsigned long newMask = (unsigned long)newSize - 1;
6419 size_t tsize = newSize * sizeof(NAMED *);
6420 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
6421 if (! newV)
6422 return NULL;
6423 memset(newV, 0, tsize);
6424 for (i = 0; i < table->size; i++)
6425 if (table->v[i]) {
6426 unsigned long newHash = hash(parser, table->v[i]->name);
6427 size_t j = newHash & newMask;
6428 step = 0;
6429 while (newV[j]) {
6430 if (! step)
6431 step = PROBE_STEP(newHash, newMask, newPower);
6432 j < step ? (j += newSize - step) : (j -= step);
6433 }
6434 newV[j] = table->v[i];
6435 }
6436 table->mem->free_fcn(table->v);
6437 table->v = newV;
6438 table->power = newPower;
6439 table->size = newSize;
6440 i = h & newMask;
6441 step = 0;
6442 while (table->v[i]) {
6443 if (! step)
6444 step = PROBE_STEP(h, newMask, newPower);
6445 i < step ? (i += newSize - step) : (i -= step);
6446 }
6447 }
6448 }
6449 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
6450 if (! table->v[i])
6451 return NULL;
6452 memset(table->v[i], 0, createSize);
6453 table->v[i]->name = name;
6454 (table->used)++;
6455 return table->v[i];
6456 }
6457
6458 static void FASTCALL
hashTableClear(HASH_TABLE * table)6459 hashTableClear(HASH_TABLE *table) {
6460 size_t i;
6461 for (i = 0; i < table->size; i++) {
6462 table->mem->free_fcn(table->v[i]);
6463 table->v[i] = NULL;
6464 }
6465 table->used = 0;
6466 }
6467
6468 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)6469 hashTableDestroy(HASH_TABLE *table) {
6470 size_t i;
6471 for (i = 0; i < table->size; i++)
6472 table->mem->free_fcn(table->v[i]);
6473 table->mem->free_fcn(table->v);
6474 }
6475
6476 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)6477 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
6478 p->power = 0;
6479 p->size = 0;
6480 p->used = 0;
6481 p->v = NULL;
6482 p->mem = ms;
6483 }
6484
6485 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)6486 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
6487 iter->p = table->v;
6488 iter->end = iter->p + table->size;
6489 }
6490
6491 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)6492 hashTableIterNext(HASH_TABLE_ITER *iter) {
6493 while (iter->p != iter->end) {
6494 NAMED *tem = *(iter->p)++;
6495 if (tem)
6496 return tem;
6497 }
6498 return NULL;
6499 }
6500
6501 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)6502 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
6503 pool->blocks = NULL;
6504 pool->freeBlocks = NULL;
6505 pool->start = NULL;
6506 pool->ptr = NULL;
6507 pool->end = NULL;
6508 pool->mem = ms;
6509 }
6510
6511 static void FASTCALL
poolClear(STRING_POOL * pool)6512 poolClear(STRING_POOL *pool) {
6513 if (! pool->freeBlocks)
6514 pool->freeBlocks = pool->blocks;
6515 else {
6516 BLOCK *p = pool->blocks;
6517 while (p) {
6518 BLOCK *tem = p->next;
6519 p->next = pool->freeBlocks;
6520 pool->freeBlocks = p;
6521 p = tem;
6522 }
6523 }
6524 pool->blocks = NULL;
6525 pool->start = NULL;
6526 pool->ptr = NULL;
6527 pool->end = NULL;
6528 }
6529
6530 static void FASTCALL
poolDestroy(STRING_POOL * pool)6531 poolDestroy(STRING_POOL *pool) {
6532 BLOCK *p = pool->blocks;
6533 while (p) {
6534 BLOCK *tem = p->next;
6535 pool->mem->free_fcn(p);
6536 p = tem;
6537 }
6538 p = pool->freeBlocks;
6539 while (p) {
6540 BLOCK *tem = p->next;
6541 pool->mem->free_fcn(p);
6542 p = tem;
6543 }
6544 }
6545
6546 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)6547 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
6548 const char *end) {
6549 if (! pool->ptr && ! poolGrow(pool))
6550 return NULL;
6551 for (;;) {
6552 const enum XML_Convert_Result convert_res = XmlConvert(
6553 enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6554 if ((convert_res == XML_CONVERT_COMPLETED)
6555 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
6556 break;
6557 if (! poolGrow(pool))
6558 return NULL;
6559 }
6560 return pool->start;
6561 }
6562
6563 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)6564 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
6565 do {
6566 if (! poolAppendChar(pool, *s))
6567 return NULL;
6568 } while (*s++);
6569 s = pool->start;
6570 poolFinish(pool);
6571 return s;
6572 }
6573
6574 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)6575 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
6576 if (! pool->ptr && ! poolGrow(pool)) {
6577 /* The following line is unreachable given the current usage of
6578 * poolCopyStringN(). Currently it is called from exactly one
6579 * place to copy the text of a simple general entity. By that
6580 * point, the name of the entity is already stored in the pool, so
6581 * pool->ptr cannot be NULL.
6582 *
6583 * If poolCopyStringN() is used elsewhere as it well might be,
6584 * this line may well become executable again. Regardless, this
6585 * sort of check shouldn't be removed lightly, so we just exclude
6586 * it from the coverage statistics.
6587 */
6588 return NULL; /* LCOV_EXCL_LINE */
6589 }
6590 for (; n > 0; --n, s++) {
6591 if (! poolAppendChar(pool, *s))
6592 return NULL;
6593 }
6594 s = pool->start;
6595 poolFinish(pool);
6596 return s;
6597 }
6598
6599 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)6600 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
6601 while (*s) {
6602 if (! poolAppendChar(pool, *s))
6603 return NULL;
6604 s++;
6605 }
6606 return pool->start;
6607 }
6608
6609 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)6610 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
6611 const char *end) {
6612 if (! poolAppend(pool, enc, ptr, end))
6613 return NULL;
6614 if (pool->ptr == pool->end && ! poolGrow(pool))
6615 return NULL;
6616 *(pool->ptr)++ = 0;
6617 return pool->start;
6618 }
6619
6620 static size_t
poolBytesToAllocateFor(int blockSize)6621 poolBytesToAllocateFor(int blockSize) {
6622 /* Unprotected math would be:
6623 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6624 **
6625 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6626 ** For a + b * c we check b * c in isolation first, so that addition of a
6627 ** on top has no chance of making us accept a small non-negative number
6628 */
6629 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6630
6631 if (blockSize <= 0)
6632 return 0;
6633
6634 if (blockSize > (int)(INT_MAX / stretch))
6635 return 0;
6636
6637 {
6638 const int stretchedBlockSize = blockSize * (int)stretch;
6639 const int bytesToAllocate
6640 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6641 if (bytesToAllocate < 0)
6642 return 0;
6643
6644 return (size_t)bytesToAllocate;
6645 }
6646 }
6647
6648 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)6649 poolGrow(STRING_POOL *pool) {
6650 if (pool->freeBlocks) {
6651 if (pool->start == 0) {
6652 pool->blocks = pool->freeBlocks;
6653 pool->freeBlocks = pool->freeBlocks->next;
6654 pool->blocks->next = NULL;
6655 pool->start = pool->blocks->s;
6656 pool->end = pool->start + pool->blocks->size;
6657 pool->ptr = pool->start;
6658 return XML_TRUE;
6659 }
6660 if (pool->end - pool->start < pool->freeBlocks->size) {
6661 BLOCK *tem = pool->freeBlocks->next;
6662 pool->freeBlocks->next = pool->blocks;
6663 pool->blocks = pool->freeBlocks;
6664 pool->freeBlocks = tem;
6665 memcpy(pool->blocks->s, pool->start,
6666 (pool->end - pool->start) * sizeof(XML_Char));
6667 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6668 pool->start = pool->blocks->s;
6669 pool->end = pool->start + pool->blocks->size;
6670 return XML_TRUE;
6671 }
6672 }
6673 if (pool->blocks && pool->start == pool->blocks->s) {
6674 BLOCK *temp;
6675 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
6676 size_t bytesToAllocate;
6677
6678 /* NOTE: Needs to be calculated prior to calling `realloc`
6679 to avoid dangling pointers: */
6680 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6681
6682 if (blockSize < 0) {
6683 /* This condition traps a situation where either more than
6684 * INT_MAX/2 bytes have already been allocated. This isn't
6685 * readily testable, since it is unlikely that an average
6686 * machine will have that much memory, so we exclude it from the
6687 * coverage statistics.
6688 */
6689 return XML_FALSE; /* LCOV_EXCL_LINE */
6690 }
6691
6692 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6693 if (bytesToAllocate == 0)
6694 return XML_FALSE;
6695
6696 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
6697 (unsigned)bytesToAllocate);
6698 if (temp == NULL)
6699 return XML_FALSE;
6700 pool->blocks = temp;
6701 pool->blocks->size = blockSize;
6702 pool->ptr = pool->blocks->s + offsetInsideBlock;
6703 pool->start = pool->blocks->s;
6704 pool->end = pool->start + blockSize;
6705 } else {
6706 BLOCK *tem;
6707 int blockSize = (int)(pool->end - pool->start);
6708 size_t bytesToAllocate;
6709
6710 if (blockSize < 0) {
6711 /* This condition traps a situation where either more than
6712 * INT_MAX bytes have already been allocated (which is prevented
6713 * by various pieces of program logic, not least this one, never
6714 * mind the unlikelihood of actually having that much memory) or
6715 * the pool control fields have been corrupted (which could
6716 * conceivably happen in an extremely buggy user handler
6717 * function). Either way it isn't readily testable, so we
6718 * exclude it from the coverage statistics.
6719 */
6720 return XML_FALSE; /* LCOV_EXCL_LINE */
6721 }
6722
6723 if (blockSize < INIT_BLOCK_SIZE)
6724 blockSize = INIT_BLOCK_SIZE;
6725 else {
6726 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
6727 if ((int)((unsigned)blockSize * 2U) < 0) {
6728 return XML_FALSE;
6729 }
6730 blockSize *= 2;
6731 }
6732
6733 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6734 if (bytesToAllocate == 0)
6735 return XML_FALSE;
6736
6737 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
6738 if (! tem)
6739 return XML_FALSE;
6740 tem->size = blockSize;
6741 tem->next = pool->blocks;
6742 pool->blocks = tem;
6743 if (pool->ptr != pool->start)
6744 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
6745 pool->ptr = tem->s + (pool->ptr - pool->start);
6746 pool->start = tem->s;
6747 pool->end = tem->s + blockSize;
6748 }
6749 return XML_TRUE;
6750 }
6751
6752 static int FASTCALL
nextScaffoldPart(XML_Parser parser)6753 nextScaffoldPart(XML_Parser parser) {
6754 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6755 CONTENT_SCAFFOLD *me;
6756 int next;
6757
6758 if (! dtd->scaffIndex) {
6759 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
6760 if (! dtd->scaffIndex)
6761 return -1;
6762 dtd->scaffIndex[0] = 0;
6763 }
6764
6765 if (dtd->scaffCount >= dtd->scaffSize) {
6766 CONTENT_SCAFFOLD *temp;
6767 if (dtd->scaffold) {
6768 temp = (CONTENT_SCAFFOLD *)REALLOC(
6769 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
6770 if (temp == NULL)
6771 return -1;
6772 dtd->scaffSize *= 2;
6773 } else {
6774 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
6775 * sizeof(CONTENT_SCAFFOLD));
6776 if (temp == NULL)
6777 return -1;
6778 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
6779 }
6780 dtd->scaffold = temp;
6781 }
6782 next = dtd->scaffCount++;
6783 me = &dtd->scaffold[next];
6784 if (dtd->scaffLevel) {
6785 CONTENT_SCAFFOLD *parent
6786 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
6787 if (parent->lastchild) {
6788 dtd->scaffold[parent->lastchild].nextsib = next;
6789 }
6790 if (! parent->childcnt)
6791 parent->firstchild = next;
6792 parent->lastchild = next;
6793 parent->childcnt++;
6794 }
6795 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
6796 return next;
6797 }
6798
6799 static void
build_node(XML_Parser parser,int src_node,XML_Content * dest,XML_Content ** contpos,XML_Char ** strpos)6800 build_node(XML_Parser parser, int src_node, XML_Content *dest,
6801 XML_Content **contpos, XML_Char **strpos) {
6802 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6803 dest->type = dtd->scaffold[src_node].type;
6804 dest->quant = dtd->scaffold[src_node].quant;
6805 if (dest->type == XML_CTYPE_NAME) {
6806 const XML_Char *src;
6807 dest->name = *strpos;
6808 src = dtd->scaffold[src_node].name;
6809 for (;;) {
6810 *(*strpos)++ = *src;
6811 if (! *src)
6812 break;
6813 src++;
6814 }
6815 dest->numchildren = 0;
6816 dest->children = NULL;
6817 } else {
6818 unsigned int i;
6819 int cn;
6820 dest->numchildren = dtd->scaffold[src_node].childcnt;
6821 dest->children = *contpos;
6822 *contpos += dest->numchildren;
6823 for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
6824 i++, cn = dtd->scaffold[cn].nextsib) {
6825 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
6826 }
6827 dest->name = NULL;
6828 }
6829 }
6830
6831 static XML_Content *
build_model(XML_Parser parser)6832 build_model(XML_Parser parser) {
6833 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6834 XML_Content *ret;
6835 XML_Content *cpos;
6836 XML_Char *str;
6837 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
6838 + (dtd->contentStringLen * sizeof(XML_Char)));
6839
6840 ret = (XML_Content *)MALLOC(parser, allocsize);
6841 if (! ret)
6842 return NULL;
6843
6844 str = (XML_Char *)(&ret[dtd->scaffCount]);
6845 cpos = &ret[1];
6846
6847 build_node(parser, 0, ret, &cpos, &str);
6848 return ret;
6849 }
6850
6851 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)6852 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
6853 const char *end) {
6854 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6855 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
6856 ELEMENT_TYPE *ret;
6857
6858 if (! name)
6859 return NULL;
6860 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
6861 sizeof(ELEMENT_TYPE));
6862 if (! ret)
6863 return NULL;
6864 if (ret->name != name)
6865 poolDiscard(&dtd->pool);
6866 else {
6867 poolFinish(&dtd->pool);
6868 if (! setElementTypePrefix(parser, ret))
6869 return NULL;
6870 }
6871 return ret;
6872 }
6873
6874 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)6875 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
6876 int charsRequired = 0;
6877 XML_Char *result;
6878
6879 /* First determine how long the string is */
6880 while (s[charsRequired] != 0) {
6881 charsRequired++;
6882 }
6883 /* Include the terminator */
6884 charsRequired++;
6885
6886 /* Now allocate space for the copy */
6887 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
6888 if (result == NULL)
6889 return NULL;
6890 /* Copy the original into place */
6891 memcpy(result, s, charsRequired * sizeof(XML_Char));
6892 return result;
6893 }
6894