1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/parser.h>
55 #include <libxml/xmlmemory.h>
56 #include <libxml/tree.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #include <libxml/SAX2.h>
65 #ifdef LIBXML_CATALOG_ENABLED
66 #include <libxml/catalog.h>
67 #endif
68
69 #include "private/buf.h"
70 #include "private/dict.h"
71 #include "private/entities.h"
72 #include "private/error.h"
73 #include "private/html.h"
74 #include "private/io.h"
75 #include "private/parser.h"
76
77 #define NS_INDEX_EMPTY INT_MAX
78 #define NS_INDEX_XML (INT_MAX - 1)
79 #define URI_HASH_EMPTY 0xD943A04E
80 #define URI_HASH_XML 0xF0451F02
81
82 struct _xmlStartTag {
83 const xmlChar *prefix;
84 const xmlChar *URI;
85 int line;
86 int nsNr;
87 };
88
89 typedef struct {
90 void *saxData;
91 unsigned prefixHashValue;
92 unsigned uriHashValue;
93 unsigned elementId;
94 int oldIndex;
95 } xmlParserNsExtra;
96
97 typedef struct {
98 unsigned hashValue;
99 int index;
100 } xmlParserNsBucket;
101
102 struct _xmlParserNsData {
103 xmlParserNsExtra *extra;
104
105 unsigned hashSize;
106 unsigned hashElems;
107 xmlParserNsBucket *hash;
108
109 unsigned elementId;
110 int defaultNsIndex;
111 int minNsIndex;
112 };
113
114 struct _xmlAttrHashBucket {
115 int index;
116 };
117
118 static int
119 xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121 static void
122 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124 static xmlEntityPtr
125 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127 static const xmlChar *
128 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130 /************************************************************************
131 * *
132 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
133 * *
134 ************************************************************************/
135
136 #define XML_PARSER_BIG_ENTITY 1000
137 #define XML_PARSER_LOT_ENTITY 5000
138
139 /*
140 * Constants for protection against abusive entity expansion
141 * ("billion laughs").
142 */
143
144 /*
145 * A certain amount of entity expansion which is always allowed.
146 */
147 #define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149 /*
150 * Fixed cost for each entity reference. This crudely models processing time
151 * as well to protect, for example, against exponential expansion of empty
152 * or very short entities.
153 */
154 #define XML_ENT_FIXED_COST 20
155
156 /**
157 * xmlParserMaxDepth:
158 *
159 * arbitrary depth limit for the XML documents that we allow to
160 * process. This is not a limitation of the parser but a safety
161 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162 * parser option.
163 */
164 const unsigned int xmlParserMaxDepth = 256;
165
166
167
168 #define XML_PARSER_BIG_BUFFER_SIZE 300
169 #define XML_PARSER_BUFFER_SIZE 100
170 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172 /**
173 * XML_PARSER_CHUNK_SIZE
174 *
175 * When calling GROW that's the minimal amount of data
176 * the parser expected to have received. It is not a hard
177 * limit but an optimization when reading strings like Names
178 * It is not strictly needed as long as inputs available characters
179 * are followed by 0, which should be provided by the I/O level
180 */
181 #define XML_PARSER_CHUNK_SIZE 100
182
183 /**
184 * xmlParserVersion:
185 *
186 * Constant string describing the internal version of the library
187 */
188 const char *const
189 xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191 /*
192 * List of XML prefixed PI allowed by W3C specs
193 */
194
195 static const char* const xmlW3CPIs[] = {
196 "xml-stylesheet",
197 "xml-model",
198 NULL
199 };
200
201
202 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204 const xmlChar **str);
205
206 static void
207 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209 static int
210 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212 /************************************************************************
213 * *
214 * Some factorized error routines *
215 * *
216 ************************************************************************/
217
218 static void
xmlErrMemory(xmlParserCtxtPtr ctxt)219 xmlErrMemory(xmlParserCtxtPtr ctxt) {
220 xmlCtxtErrMemory(ctxt);
221 }
222
223 /**
224 * xmlErrAttributeDup:
225 * @ctxt: an XML parser context
226 * @prefix: the attribute prefix
227 * @localname: the attribute localname
228 *
229 * Handle a redefinition of attribute error
230 */
231 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)232 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233 const xmlChar * localname)
234 {
235 if (prefix == NULL)
236 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237 XML_ERR_FATAL, localname, NULL, NULL, 0,
238 "Attribute %s redefined\n", localname);
239 else
240 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241 XML_ERR_FATAL, prefix, localname, NULL, 0,
242 "Attribute %s:%s redefined\n", prefix, localname);
243 }
244
245 /**
246 * xmlFatalErrMsg:
247 * @ctxt: an XML parser context
248 * @error: the error number
249 * @msg: the error message
250 *
251 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252 */
253 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)254 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255 const char *msg)
256 {
257 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258 NULL, NULL, NULL, 0, "%s", msg);
259 }
260
261 /**
262 * xmlWarningMsg:
263 * @ctxt: an XML parser context
264 * @error: the error number
265 * @msg: the error message
266 * @str1: extra data
267 * @str2: extra data
268 *
269 * Handle a warning.
270 */
271 void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)272 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273 const char *msg, const xmlChar *str1, const xmlChar *str2)
274 {
275 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276 str1, str2, NULL, 0, msg, str1, str2);
277 }
278
279 /**
280 * xmlValidityError:
281 * @ctxt: an XML parser context
282 * @error: the error number
283 * @msg: the error message
284 * @str1: extra data
285 *
286 * Handle a validity error.
287 */
288 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)289 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290 const char *msg, const xmlChar *str1, const xmlChar *str2)
291 {
292 ctxt->valid = 0;
293
294 xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295 str1, str2, NULL, 0, msg, str1, str2);
296 }
297
298 /**
299 * xmlFatalErrMsgInt:
300 * @ctxt: an XML parser context
301 * @error: the error number
302 * @msg: the error message
303 * @val: an integer value
304 *
305 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306 */
307 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)308 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309 const char *msg, int val)
310 {
311 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312 NULL, NULL, NULL, val, msg, val);
313 }
314
315 /**
316 * xmlFatalErrMsgStrIntStr:
317 * @ctxt: an XML parser context
318 * @error: the error number
319 * @msg: the error message
320 * @str1: an string info
321 * @val: an integer value
322 * @str2: an string info
323 *
324 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325 */
326 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)327 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328 const char *msg, const xmlChar *str1, int val,
329 const xmlChar *str2)
330 {
331 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332 str1, str2, NULL, val, msg, str1, val, str2);
333 }
334
335 /**
336 * xmlFatalErrMsgStr:
337 * @ctxt: an XML parser context
338 * @error: the error number
339 * @msg: the error message
340 * @val: a string value
341 *
342 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343 */
344 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)345 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346 const char *msg, const xmlChar * val)
347 {
348 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349 val, NULL, NULL, 0, msg, val);
350 }
351
352 /**
353 * xmlErrMsgStr:
354 * @ctxt: an XML parser context
355 * @error: the error number
356 * @msg: the error message
357 * @val: a string value
358 *
359 * Handle a non fatal parser error
360 */
361 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)362 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363 const char *msg, const xmlChar * val)
364 {
365 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366 val, NULL, NULL, 0, msg, val);
367 }
368
369 /**
370 * xmlNsErr:
371 * @ctxt: an XML parser context
372 * @error: the error number
373 * @msg: the message
374 * @info1: extra information string
375 * @info2: extra information string
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)380 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg,
382 const xmlChar * info1, const xmlChar * info2,
383 const xmlChar * info3)
384 {
385 ctxt->nsWellFormed = 0;
386
387 xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388 info1, info2, info3, 0, msg, info1, info2, info3);
389 }
390
391 /**
392 * xmlNsWarn
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the message
396 * @info1: extra information string
397 * @info2: extra information string
398 *
399 * Handle a namespace warning error
400 */
401 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)402 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg,
404 const xmlChar * info1, const xmlChar * info2,
405 const xmlChar * info3)
406 {
407 xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408 info1, info2, info3, 0, msg, info1, info2, info3);
409 }
410
411 static void
xmlSaturatedAdd(unsigned long * dst,unsigned long val)412 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413 if (val > ULONG_MAX - *dst)
414 *dst = ULONG_MAX;
415 else
416 *dst += val;
417 }
418
419 static void
xmlSaturatedAddSizeT(unsigned long * dst,unsigned long val)420 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421 if (val > ULONG_MAX - *dst)
422 *dst = ULONG_MAX;
423 else
424 *dst += val;
425 }
426
427 /**
428 * xmlParserEntityCheck:
429 * @ctxt: parser context
430 * @extra: sum of unexpanded entity sizes
431 *
432 * Check for non-linear entity expansion behaviour.
433 *
434 * In some cases like xmlExpandEntityInAttValue, this function is called
435 * for each, possibly nested entity and its unexpanded content length.
436 *
437 * In other cases like xmlParseReference, it's only called for each
438 * top-level entity with its unexpanded content length plus the sum of
439 * the unexpanded content lengths (plus fixed cost) of all nested
440 * entities.
441 *
442 * Summing the unexpanded lengths also adds the length of the reference.
443 * This is by design. Taking the length of the entity name into account
444 * discourages attacks that try to waste CPU time with abusively long
445 * entity names. See test/recurse/lol6.xml for example. Each call also
446 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447 * short entities.
448 *
449 * Returns 1 on error, 0 on success.
450 */
451 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long extra)452 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453 {
454 unsigned long consumed;
455 unsigned long *expandedSize;
456 xmlParserInputPtr input = ctxt->input;
457 xmlEntityPtr entity = input->entity;
458
459 if ((entity) && (entity->flags & XML_ENT_CHECKED))
460 return(0);
461
462 /*
463 * Compute total consumed bytes so far, including input streams of
464 * external entities.
465 */
466 consumed = input->consumed;
467 xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468 xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470 if (entity)
471 expandedSize = &entity->expandedSize;
472 else
473 expandedSize = &ctxt->sizeentcopy;
474
475 /*
476 * Add extra cost and some fixed cost.
477 */
478 xmlSaturatedAdd(expandedSize, extra);
479 xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481 /*
482 * It's important to always use saturation arithmetic when tracking
483 * entity sizes to make the size checks reliable. If "sizeentcopy"
484 * overflows, we have to abort.
485 */
486 if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487 ((*expandedSize >= ULONG_MAX) ||
488 (*expandedSize / ctxt->maxAmpl > consumed))) {
489 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490 "Maximum entity amplification factor exceeded, see "
491 "xmlCtxtSetMaxAmplification.\n");
492 xmlHaltParser(ctxt);
493 return(1);
494 }
495
496 return(0);
497 }
498
499 /************************************************************************
500 * *
501 * Library wide options *
502 * *
503 ************************************************************************/
504
505 /**
506 * xmlHasFeature:
507 * @feature: the feature to be examined
508 *
509 * Examines if the library has been compiled with a given feature.
510 *
511 * Returns a non-zero value if the feature exist, otherwise zero.
512 * Returns zero (0) if the feature does not exist or an unknown
513 * unknown feature is requested, non-zero otherwise.
514 */
515 int
xmlHasFeature(xmlFeature feature)516 xmlHasFeature(xmlFeature feature)
517 {
518 switch (feature) {
519 case XML_WITH_THREAD:
520 #ifdef LIBXML_THREAD_ENABLED
521 return(1);
522 #else
523 return(0);
524 #endif
525 case XML_WITH_TREE:
526 #ifdef LIBXML_TREE_ENABLED
527 return(1);
528 #else
529 return(0);
530 #endif
531 case XML_WITH_OUTPUT:
532 #ifdef LIBXML_OUTPUT_ENABLED
533 return(1);
534 #else
535 return(0);
536 #endif
537 case XML_WITH_PUSH:
538 #ifdef LIBXML_PUSH_ENABLED
539 return(1);
540 #else
541 return(0);
542 #endif
543 case XML_WITH_READER:
544 #ifdef LIBXML_READER_ENABLED
545 return(1);
546 #else
547 return(0);
548 #endif
549 case XML_WITH_PATTERN:
550 #ifdef LIBXML_PATTERN_ENABLED
551 return(1);
552 #else
553 return(0);
554 #endif
555 case XML_WITH_WRITER:
556 #ifdef LIBXML_WRITER_ENABLED
557 return(1);
558 #else
559 return(0);
560 #endif
561 case XML_WITH_SAX1:
562 #ifdef LIBXML_SAX1_ENABLED
563 return(1);
564 #else
565 return(0);
566 #endif
567 case XML_WITH_HTTP:
568 #ifdef LIBXML_HTTP_ENABLED
569 return(1);
570 #else
571 return(0);
572 #endif
573 case XML_WITH_VALID:
574 #ifdef LIBXML_VALID_ENABLED
575 return(1);
576 #else
577 return(0);
578 #endif
579 case XML_WITH_HTML:
580 #ifdef LIBXML_HTML_ENABLED
581 return(1);
582 #else
583 return(0);
584 #endif
585 case XML_WITH_LEGACY:
586 #ifdef LIBXML_LEGACY_ENABLED
587 return(1);
588 #else
589 return(0);
590 #endif
591 case XML_WITH_C14N:
592 #ifdef LIBXML_C14N_ENABLED
593 return(1);
594 #else
595 return(0);
596 #endif
597 case XML_WITH_CATALOG:
598 #ifdef LIBXML_CATALOG_ENABLED
599 return(1);
600 #else
601 return(0);
602 #endif
603 case XML_WITH_XPATH:
604 #ifdef LIBXML_XPATH_ENABLED
605 return(1);
606 #else
607 return(0);
608 #endif
609 case XML_WITH_XPTR:
610 #ifdef LIBXML_XPTR_ENABLED
611 return(1);
612 #else
613 return(0);
614 #endif
615 case XML_WITH_XINCLUDE:
616 #ifdef LIBXML_XINCLUDE_ENABLED
617 return(1);
618 #else
619 return(0);
620 #endif
621 case XML_WITH_ICONV:
622 #ifdef LIBXML_ICONV_ENABLED
623 return(1);
624 #else
625 return(0);
626 #endif
627 case XML_WITH_ISO8859X:
628 #ifdef LIBXML_ISO8859X_ENABLED
629 return(1);
630 #else
631 return(0);
632 #endif
633 case XML_WITH_UNICODE:
634 #ifdef LIBXML_UNICODE_ENABLED
635 return(1);
636 #else
637 return(0);
638 #endif
639 case XML_WITH_REGEXP:
640 #ifdef LIBXML_REGEXP_ENABLED
641 return(1);
642 #else
643 return(0);
644 #endif
645 case XML_WITH_AUTOMATA:
646 #ifdef LIBXML_AUTOMATA_ENABLED
647 return(1);
648 #else
649 return(0);
650 #endif
651 case XML_WITH_EXPR:
652 #ifdef LIBXML_EXPR_ENABLED
653 return(1);
654 #else
655 return(0);
656 #endif
657 case XML_WITH_SCHEMAS:
658 #ifdef LIBXML_SCHEMAS_ENABLED
659 return(1);
660 #else
661 return(0);
662 #endif
663 case XML_WITH_SCHEMATRON:
664 #ifdef LIBXML_SCHEMATRON_ENABLED
665 return(1);
666 #else
667 return(0);
668 #endif
669 case XML_WITH_MODULES:
670 #ifdef LIBXML_MODULES_ENABLED
671 return(1);
672 #else
673 return(0);
674 #endif
675 case XML_WITH_DEBUG:
676 #ifdef LIBXML_DEBUG_ENABLED
677 return(1);
678 #else
679 return(0);
680 #endif
681 case XML_WITH_DEBUG_MEM:
682 return(0);
683 case XML_WITH_ZLIB:
684 #ifdef LIBXML_ZLIB_ENABLED
685 return(1);
686 #else
687 return(0);
688 #endif
689 case XML_WITH_LZMA:
690 #ifdef LIBXML_LZMA_ENABLED
691 return(1);
692 #else
693 return(0);
694 #endif
695 case XML_WITH_ICU:
696 #ifdef LIBXML_ICU_ENABLED
697 return(1);
698 #else
699 return(0);
700 #endif
701 default:
702 break;
703 }
704 return(0);
705 }
706
707 /************************************************************************
708 * *
709 * Simple string buffer *
710 * *
711 ************************************************************************/
712
713 typedef struct {
714 xmlChar *mem;
715 unsigned size;
716 unsigned cap; /* size < cap */
717 unsigned max; /* size <= max */
718 xmlParserErrors code;
719 } xmlSBuf;
720
721 static void
xmlSBufInit(xmlSBuf * buf,unsigned max)722 xmlSBufInit(xmlSBuf *buf, unsigned max) {
723 buf->mem = NULL;
724 buf->size = 0;
725 buf->cap = 0;
726 buf->max = max;
727 buf->code = XML_ERR_OK;
728 }
729
730 static int
xmlSBufGrow(xmlSBuf * buf,unsigned len)731 xmlSBufGrow(xmlSBuf *buf, unsigned len) {
732 xmlChar *mem;
733 unsigned cap;
734
735 if (len >= UINT_MAX / 2 - buf->size) {
736 if (buf->code == XML_ERR_OK)
737 buf->code = XML_ERR_RESOURCE_LIMIT;
738 return(-1);
739 }
740
741 cap = (buf->size + len) * 2;
742 if (cap < 240)
743 cap = 240;
744
745 mem = xmlRealloc(buf->mem, cap);
746 if (mem == NULL) {
747 buf->code = XML_ERR_NO_MEMORY;
748 return(-1);
749 }
750
751 buf->mem = mem;
752 buf->cap = cap;
753
754 return(0);
755 }
756
757 static void
xmlSBufAddString(xmlSBuf * buf,const xmlChar * str,unsigned len)758 xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
759 if (buf->max - buf->size < len) {
760 if (buf->code == XML_ERR_OK)
761 buf->code = XML_ERR_RESOURCE_LIMIT;
762 return;
763 }
764
765 if (buf->cap - buf->size <= len) {
766 if (xmlSBufGrow(buf, len) < 0)
767 return;
768 }
769
770 if (len > 0)
771 memcpy(buf->mem + buf->size, str, len);
772 buf->size += len;
773 }
774
775 static void
xmlSBufAddCString(xmlSBuf * buf,const char * str,unsigned len)776 xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
777 xmlSBufAddString(buf, (const xmlChar *) str, len);
778 }
779
780 static void
xmlSBufAddChar(xmlSBuf * buf,int c)781 xmlSBufAddChar(xmlSBuf *buf, int c) {
782 xmlChar *end;
783
784 if (buf->max - buf->size < 4) {
785 if (buf->code == XML_ERR_OK)
786 buf->code = XML_ERR_RESOURCE_LIMIT;
787 return;
788 }
789
790 if (buf->cap - buf->size <= 4) {
791 if (xmlSBufGrow(buf, 4) < 0)
792 return;
793 }
794
795 end = buf->mem + buf->size;
796
797 if (c < 0x80) {
798 *end = (xmlChar) c;
799 buf->size += 1;
800 } else {
801 buf->size += xmlCopyCharMultiByte(end, c);
802 }
803 }
804
805 static void
xmlSBufAddReplChar(xmlSBuf * buf)806 xmlSBufAddReplChar(xmlSBuf *buf) {
807 xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
808 }
809
810 static void
xmlSBufReportError(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)811 xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
812 if (buf->code == XML_ERR_NO_MEMORY)
813 xmlCtxtErrMemory(ctxt);
814 else
815 xmlFatalErr(ctxt, buf->code, errMsg);
816 }
817
818 static xmlChar *
xmlSBufFinish(xmlSBuf * buf,int * sizeOut,xmlParserCtxtPtr ctxt,const char * errMsg)819 xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
820 const char *errMsg) {
821 if (buf->mem == NULL) {
822 buf->mem = xmlMalloc(1);
823 if (buf->mem == NULL) {
824 buf->code = XML_ERR_NO_MEMORY;
825 } else {
826 buf->mem[0] = 0;
827 }
828 } else {
829 buf->mem[buf->size] = 0;
830 }
831
832 if (buf->code == XML_ERR_OK) {
833 if (sizeOut != NULL)
834 *sizeOut = buf->size;
835 return(buf->mem);
836 }
837
838 xmlSBufReportError(buf, ctxt, errMsg);
839
840 xmlFree(buf->mem);
841
842 if (sizeOut != NULL)
843 *sizeOut = 0;
844 return(NULL);
845 }
846
847 static void
xmlSBufCleanup(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)848 xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
849 if (buf->code != XML_ERR_OK)
850 xmlSBufReportError(buf, ctxt, errMsg);
851
852 xmlFree(buf->mem);
853 }
854
855 static int
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * errMsg)856 xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
857 const char *errMsg) {
858 int c = str[0];
859 int c1 = str[1];
860
861 if ((c1 & 0xC0) != 0x80)
862 goto encoding_error;
863
864 if (c < 0xE0) {
865 /* 2-byte sequence */
866 if (c < 0xC2)
867 goto encoding_error;
868
869 return(2);
870 } else {
871 int c2 = str[2];
872
873 if ((c2 & 0xC0) != 0x80)
874 goto encoding_error;
875
876 if (c < 0xF0) {
877 /* 3-byte sequence */
878 if (c == 0xE0) {
879 /* overlong */
880 if (c1 < 0xA0)
881 goto encoding_error;
882 } else if (c == 0xED) {
883 /* surrogate */
884 if (c1 >= 0xA0)
885 goto encoding_error;
886 } else if (c == 0xEF) {
887 /* U+FFFE and U+FFFF are invalid Chars */
888 if ((c1 == 0xBF) && (c2 >= 0xBE))
889 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
890 }
891
892 return(3);
893 } else {
894 /* 4-byte sequence */
895 if ((str[3] & 0xC0) != 0x80)
896 goto encoding_error;
897 if (c == 0xF0) {
898 /* overlong */
899 if (c1 < 0x90)
900 goto encoding_error;
901 } else if (c >= 0xF4) {
902 /* greater than 0x10FFFF */
903 if ((c > 0xF4) || (c1 >= 0x90))
904 goto encoding_error;
905 }
906
907 return(4);
908 }
909 }
910
911 encoding_error:
912 /* Only report the first error */
913 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
914 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
915 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
916 }
917
918 return(0);
919 }
920
921 /************************************************************************
922 * *
923 * SAX2 defaulted attributes handling *
924 * *
925 ************************************************************************/
926
927 /**
928 * xmlCtxtInitializeLate:
929 * @ctxt: an XML parser context
930 *
931 * Final initialization of the parser context before starting to parse.
932 *
933 * This accounts for users modifying struct members of parser context
934 * directly.
935 */
936 static void
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt)937 xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
938 xmlSAXHandlerPtr sax;
939
940 /* Avoid unused variable warning if features are disabled. */
941 (void) sax;
942
943 /*
944 * Changing the SAX struct directly is still widespread practice
945 * in internal and external code.
946 */
947 if (ctxt == NULL) return;
948 sax = ctxt->sax;
949 #ifdef LIBXML_SAX1_ENABLED
950 /*
951 * Only enable SAX2 if there SAX2 element handlers, except when there
952 * are no element handlers at all.
953 */
954 if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
955 (sax) &&
956 (sax->initialized == XML_SAX2_MAGIC) &&
957 ((sax->startElementNs != NULL) ||
958 (sax->endElementNs != NULL) ||
959 ((sax->startElement == NULL) && (sax->endElement == NULL))))
960 ctxt->sax2 = 1;
961 #else
962 ctxt->sax2 = 1;
963 #endif /* LIBXML_SAX1_ENABLED */
964
965 /*
966 * Some users replace the dictionary directly in the context struct.
967 * We really need an API function to do that cleanly.
968 */
969 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
970 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
971 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
972 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
973 (ctxt->str_xml_ns == NULL)) {
974 xmlErrMemory(ctxt);
975 }
976 }
977
978 typedef struct {
979 xmlHashedString prefix;
980 xmlHashedString name;
981 xmlHashedString value;
982 const xmlChar *valueEnd;
983 int external;
984 int expandedSize;
985 } xmlDefAttr;
986
987 typedef struct _xmlDefAttrs xmlDefAttrs;
988 typedef xmlDefAttrs *xmlDefAttrsPtr;
989 struct _xmlDefAttrs {
990 int nbAttrs; /* number of defaulted attributes on that element */
991 int maxAttrs; /* the size of the array */
992 #if __STDC_VERSION__ >= 199901L
993 /* Using a C99 flexible array member avoids UBSan errors. */
994 xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
995 #else
996 xmlDefAttr attrs[1];
997 #endif
998 };
999
1000 /**
1001 * xmlAttrNormalizeSpace:
1002 * @src: the source string
1003 * @dst: the target string
1004 *
1005 * Normalize the space in non CDATA attribute values:
1006 * If the attribute type is not CDATA, then the XML processor MUST further
1007 * process the normalized attribute value by discarding any leading and
1008 * trailing space (#x20) characters, and by replacing sequences of space
1009 * (#x20) characters by a single space (#x20) character.
1010 * Note that the size of dst need to be at least src, and if one doesn't need
1011 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1012 * passing src as dst is just fine.
1013 *
1014 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1015 * is needed.
1016 */
1017 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1018 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1019 {
1020 if ((src == NULL) || (dst == NULL))
1021 return(NULL);
1022
1023 while (*src == 0x20) src++;
1024 while (*src != 0) {
1025 if (*src == 0x20) {
1026 while (*src == 0x20) src++;
1027 if (*src != 0)
1028 *dst++ = 0x20;
1029 } else {
1030 *dst++ = *src++;
1031 }
1032 }
1033 *dst = 0;
1034 if (dst == src)
1035 return(NULL);
1036 return(dst);
1037 }
1038
1039 /**
1040 * xmlAddDefAttrs:
1041 * @ctxt: an XML parser context
1042 * @fullname: the element fullname
1043 * @fullattr: the attribute fullname
1044 * @value: the attribute value
1045 *
1046 * Add a defaulted attribute for an element
1047 */
1048 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1049 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1050 const xmlChar *fullname,
1051 const xmlChar *fullattr,
1052 const xmlChar *value) {
1053 xmlDefAttrsPtr defaults;
1054 xmlDefAttr *attr;
1055 int len, expandedSize;
1056 xmlHashedString name;
1057 xmlHashedString prefix;
1058 xmlHashedString hvalue;
1059 const xmlChar *localname;
1060
1061 /*
1062 * Allows to detect attribute redefinitions
1063 */
1064 if (ctxt->attsSpecial != NULL) {
1065 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1066 return;
1067 }
1068
1069 if (ctxt->attsDefault == NULL) {
1070 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1071 if (ctxt->attsDefault == NULL)
1072 goto mem_error;
1073 }
1074
1075 /*
1076 * split the element name into prefix:localname , the string found
1077 * are within the DTD and then not associated to namespace names.
1078 */
1079 localname = xmlSplitQName3(fullname, &len);
1080 if (localname == NULL) {
1081 name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1082 prefix.name = NULL;
1083 } else {
1084 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1085 prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1086 if (prefix.name == NULL)
1087 goto mem_error;
1088 }
1089 if (name.name == NULL)
1090 goto mem_error;
1091
1092 /*
1093 * make sure there is some storage
1094 */
1095 defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1096 if ((defaults == NULL) ||
1097 (defaults->nbAttrs >= defaults->maxAttrs)) {
1098 xmlDefAttrsPtr temp;
1099 int newSize;
1100
1101 newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1102 temp = xmlRealloc(defaults,
1103 sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1104 if (temp == NULL)
1105 goto mem_error;
1106 if (defaults == NULL)
1107 temp->nbAttrs = 0;
1108 temp->maxAttrs = newSize;
1109 defaults = temp;
1110 if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1111 defaults, NULL) < 0) {
1112 xmlFree(defaults);
1113 goto mem_error;
1114 }
1115 }
1116
1117 /*
1118 * Split the attribute name into prefix:localname , the string found
1119 * are within the DTD and hen not associated to namespace names.
1120 */
1121 localname = xmlSplitQName3(fullattr, &len);
1122 if (localname == NULL) {
1123 name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1124 prefix.name = NULL;
1125 } else {
1126 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1127 prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1128 if (prefix.name == NULL)
1129 goto mem_error;
1130 }
1131 if (name.name == NULL)
1132 goto mem_error;
1133
1134 /* intern the string and precompute the end */
1135 len = strlen((const char *) value);
1136 hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1137 if (hvalue.name == NULL)
1138 goto mem_error;
1139
1140 expandedSize = strlen((const char *) name.name);
1141 if (prefix.name != NULL)
1142 expandedSize += strlen((const char *) prefix.name);
1143 expandedSize += len;
1144
1145 attr = &defaults->attrs[defaults->nbAttrs++];
1146 attr->name = name;
1147 attr->prefix = prefix;
1148 attr->value = hvalue;
1149 attr->valueEnd = hvalue.name + len;
1150 attr->external = PARSER_EXTERNAL(ctxt);
1151 attr->expandedSize = expandedSize;
1152
1153 return;
1154
1155 mem_error:
1156 xmlErrMemory(ctxt);
1157 return;
1158 }
1159
1160 /**
1161 * xmlAddSpecialAttr:
1162 * @ctxt: an XML parser context
1163 * @fullname: the element fullname
1164 * @fullattr: the attribute fullname
1165 * @type: the attribute type
1166 *
1167 * Register this attribute type
1168 */
1169 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1170 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1171 const xmlChar *fullname,
1172 const xmlChar *fullattr,
1173 int type)
1174 {
1175 if (ctxt->attsSpecial == NULL) {
1176 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1177 if (ctxt->attsSpecial == NULL)
1178 goto mem_error;
1179 }
1180
1181 if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1182 (void *) (ptrdiff_t) type) < 0)
1183 goto mem_error;
1184 return;
1185
1186 mem_error:
1187 xmlErrMemory(ctxt);
1188 return;
1189 }
1190
1191 /**
1192 * xmlCleanSpecialAttrCallback:
1193 *
1194 * Removes CDATA attributes from the special attribute table
1195 */
1196 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1197 xmlCleanSpecialAttrCallback(void *payload, void *data,
1198 const xmlChar *fullname, const xmlChar *fullattr,
1199 const xmlChar *unused ATTRIBUTE_UNUSED) {
1200 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1201
1202 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1203 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1204 }
1205 }
1206
1207 /**
1208 * xmlCleanSpecialAttr:
1209 * @ctxt: an XML parser context
1210 *
1211 * Trim the list of attributes defined to remove all those of type
1212 * CDATA as they are not special. This call should be done when finishing
1213 * to parse the DTD and before starting to parse the document root.
1214 */
1215 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1216 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1217 {
1218 if (ctxt->attsSpecial == NULL)
1219 return;
1220
1221 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1222
1223 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1224 xmlHashFree(ctxt->attsSpecial, NULL);
1225 ctxt->attsSpecial = NULL;
1226 }
1227 return;
1228 }
1229
1230 /**
1231 * xmlCheckLanguageID:
1232 * @lang: pointer to the string value
1233 *
1234 * DEPRECATED: Internal function, do not use.
1235 *
1236 * Checks that the value conforms to the LanguageID production:
1237 *
1238 * NOTE: this is somewhat deprecated, those productions were removed from
1239 * the XML Second edition.
1240 *
1241 * [33] LanguageID ::= Langcode ('-' Subcode)*
1242 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1243 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1244 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1245 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1246 * [38] Subcode ::= ([a-z] | [A-Z])+
1247 *
1248 * The current REC reference the successors of RFC 1766, currently 5646
1249 *
1250 * http://www.rfc-editor.org/rfc/rfc5646.txt
1251 * langtag = language
1252 * ["-" script]
1253 * ["-" region]
1254 * *("-" variant)
1255 * *("-" extension)
1256 * ["-" privateuse]
1257 * language = 2*3ALPHA ; shortest ISO 639 code
1258 * ["-" extlang] ; sometimes followed by
1259 * ; extended language subtags
1260 * / 4ALPHA ; or reserved for future use
1261 * / 5*8ALPHA ; or registered language subtag
1262 *
1263 * extlang = 3ALPHA ; selected ISO 639 codes
1264 * *2("-" 3ALPHA) ; permanently reserved
1265 *
1266 * script = 4ALPHA ; ISO 15924 code
1267 *
1268 * region = 2ALPHA ; ISO 3166-1 code
1269 * / 3DIGIT ; UN M.49 code
1270 *
1271 * variant = 5*8alphanum ; registered variants
1272 * / (DIGIT 3alphanum)
1273 *
1274 * extension = singleton 1*("-" (2*8alphanum))
1275 *
1276 * ; Single alphanumerics
1277 * ; "x" reserved for private use
1278 * singleton = DIGIT ; 0 - 9
1279 * / %x41-57 ; A - W
1280 * / %x59-5A ; Y - Z
1281 * / %x61-77 ; a - w
1282 * / %x79-7A ; y - z
1283 *
1284 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1285 * The parser below doesn't try to cope with extension or privateuse
1286 * that could be added but that's not interoperable anyway
1287 *
1288 * Returns 1 if correct 0 otherwise
1289 **/
1290 int
xmlCheckLanguageID(const xmlChar * lang)1291 xmlCheckLanguageID(const xmlChar * lang)
1292 {
1293 const xmlChar *cur = lang, *nxt;
1294
1295 if (cur == NULL)
1296 return (0);
1297 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1298 ((cur[0] == 'I') && (cur[1] == '-')) ||
1299 ((cur[0] == 'x') && (cur[1] == '-')) ||
1300 ((cur[0] == 'X') && (cur[1] == '-'))) {
1301 /*
1302 * Still allow IANA code and user code which were coming
1303 * from the previous version of the XML-1.0 specification
1304 * it's deprecated but we should not fail
1305 */
1306 cur += 2;
1307 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1308 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1309 cur++;
1310 return(cur[0] == 0);
1311 }
1312 nxt = cur;
1313 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1314 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1315 nxt++;
1316 if (nxt - cur >= 4) {
1317 /*
1318 * Reserved
1319 */
1320 if ((nxt - cur > 8) || (nxt[0] != 0))
1321 return(0);
1322 return(1);
1323 }
1324 if (nxt - cur < 2)
1325 return(0);
1326 /* we got an ISO 639 code */
1327 if (nxt[0] == 0)
1328 return(1);
1329 if (nxt[0] != '-')
1330 return(0);
1331
1332 nxt++;
1333 cur = nxt;
1334 /* now we can have extlang or script or region or variant */
1335 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1336 goto region_m49;
1337
1338 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1339 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1340 nxt++;
1341 if (nxt - cur == 4)
1342 goto script;
1343 if (nxt - cur == 2)
1344 goto region;
1345 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1346 goto variant;
1347 if (nxt - cur != 3)
1348 return(0);
1349 /* we parsed an extlang */
1350 if (nxt[0] == 0)
1351 return(1);
1352 if (nxt[0] != '-')
1353 return(0);
1354
1355 nxt++;
1356 cur = nxt;
1357 /* now we can have script or region or variant */
1358 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1359 goto region_m49;
1360
1361 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1362 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1363 nxt++;
1364 if (nxt - cur == 2)
1365 goto region;
1366 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1367 goto variant;
1368 if (nxt - cur != 4)
1369 return(0);
1370 /* we parsed a script */
1371 script:
1372 if (nxt[0] == 0)
1373 return(1);
1374 if (nxt[0] != '-')
1375 return(0);
1376
1377 nxt++;
1378 cur = nxt;
1379 /* now we can have region or variant */
1380 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1381 goto region_m49;
1382
1383 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1384 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1385 nxt++;
1386
1387 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1388 goto variant;
1389 if (nxt - cur != 2)
1390 return(0);
1391 /* we parsed a region */
1392 region:
1393 if (nxt[0] == 0)
1394 return(1);
1395 if (nxt[0] != '-')
1396 return(0);
1397
1398 nxt++;
1399 cur = nxt;
1400 /* now we can just have a variant */
1401 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1402 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1403 nxt++;
1404
1405 if ((nxt - cur < 5) || (nxt - cur > 8))
1406 return(0);
1407
1408 /* we parsed a variant */
1409 variant:
1410 if (nxt[0] == 0)
1411 return(1);
1412 if (nxt[0] != '-')
1413 return(0);
1414 /* extensions and private use subtags not checked */
1415 return (1);
1416
1417 region_m49:
1418 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1419 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1420 nxt += 3;
1421 goto region;
1422 }
1423 return(0);
1424 }
1425
1426 /************************************************************************
1427 * *
1428 * Parser stacks related functions and macros *
1429 * *
1430 ************************************************************************/
1431
1432 static xmlChar *
1433 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1434
1435 /**
1436 * xmlParserNsCreate:
1437 *
1438 * Create a new namespace database.
1439 *
1440 * Returns the new obejct.
1441 */
1442 xmlParserNsData *
xmlParserNsCreate(void)1443 xmlParserNsCreate(void) {
1444 xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1445
1446 if (nsdb == NULL)
1447 return(NULL);
1448 memset(nsdb, 0, sizeof(*nsdb));
1449 nsdb->defaultNsIndex = INT_MAX;
1450
1451 return(nsdb);
1452 }
1453
1454 /**
1455 * xmlParserNsFree:
1456 * @nsdb: namespace database
1457 *
1458 * Free a namespace database.
1459 */
1460 void
xmlParserNsFree(xmlParserNsData * nsdb)1461 xmlParserNsFree(xmlParserNsData *nsdb) {
1462 if (nsdb == NULL)
1463 return;
1464
1465 xmlFree(nsdb->extra);
1466 xmlFree(nsdb->hash);
1467 xmlFree(nsdb);
1468 }
1469
1470 /**
1471 * xmlParserNsReset:
1472 * @nsdb: namespace database
1473 *
1474 * Reset a namespace database.
1475 */
1476 static void
xmlParserNsReset(xmlParserNsData * nsdb)1477 xmlParserNsReset(xmlParserNsData *nsdb) {
1478 if (nsdb == NULL)
1479 return;
1480
1481 nsdb->hashElems = 0;
1482 nsdb->elementId = 0;
1483 nsdb->defaultNsIndex = INT_MAX;
1484
1485 if (nsdb->hash)
1486 memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1487 }
1488
1489 /**
1490 * xmlParserStartElement:
1491 * @nsdb: namespace database
1492 *
1493 * Signal that a new element has started.
1494 *
1495 * Returns 0 on success, -1 if the element counter overflowed.
1496 */
1497 static int
xmlParserNsStartElement(xmlParserNsData * nsdb)1498 xmlParserNsStartElement(xmlParserNsData *nsdb) {
1499 if (nsdb->elementId == UINT_MAX)
1500 return(-1);
1501 nsdb->elementId++;
1502
1503 return(0);
1504 }
1505
1506 /**
1507 * xmlParserNsLookup:
1508 * @ctxt: parser context
1509 * @prefix: namespace prefix
1510 * @bucketPtr: optional bucket (return value)
1511 *
1512 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1513 * be set to the matching bucket, or the first empty bucket if no match
1514 * was found.
1515 *
1516 * Returns the namespace index on success, INT_MAX if no namespace was
1517 * found.
1518 */
1519 static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,xmlParserNsBucket ** bucketPtr)1520 xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1521 xmlParserNsBucket **bucketPtr) {
1522 xmlParserNsBucket *bucket, *tombstone;
1523 unsigned index, hashValue;
1524
1525 if (prefix->name == NULL)
1526 return(ctxt->nsdb->defaultNsIndex);
1527
1528 if (ctxt->nsdb->hashSize == 0)
1529 return(INT_MAX);
1530
1531 hashValue = prefix->hashValue;
1532 index = hashValue & (ctxt->nsdb->hashSize - 1);
1533 bucket = &ctxt->nsdb->hash[index];
1534 tombstone = NULL;
1535
1536 while (bucket->hashValue) {
1537 if (bucket->index == INT_MAX) {
1538 if (tombstone == NULL)
1539 tombstone = bucket;
1540 } else if (bucket->hashValue == hashValue) {
1541 if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1542 if (bucketPtr != NULL)
1543 *bucketPtr = bucket;
1544 return(bucket->index);
1545 }
1546 }
1547
1548 index++;
1549 bucket++;
1550 if (index == ctxt->nsdb->hashSize) {
1551 index = 0;
1552 bucket = ctxt->nsdb->hash;
1553 }
1554 }
1555
1556 if (bucketPtr != NULL)
1557 *bucketPtr = tombstone ? tombstone : bucket;
1558 return(INT_MAX);
1559 }
1560
1561 /**
1562 * xmlParserNsLookupUri:
1563 * @ctxt: parser context
1564 * @prefix: namespace prefix
1565 *
1566 * Lookup namespace URI with given prefix.
1567 *
1568 * Returns the namespace URI on success, NULL if no namespace was found.
1569 */
1570 static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix)1571 xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1572 const xmlChar *ret;
1573 int nsIndex;
1574
1575 if (prefix->name == ctxt->str_xml)
1576 return(ctxt->str_xml_ns);
1577
1578 /*
1579 * minNsIndex is used when building an entity tree. We must
1580 * ignore namespaces declared outside the entity.
1581 */
1582 nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1583 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1584 return(NULL);
1585
1586 ret = ctxt->nsTab[nsIndex * 2 + 1];
1587 if (ret[0] == 0)
1588 ret = NULL;
1589 return(ret);
1590 }
1591
1592 /**
1593 * xmlParserNsLookupSax:
1594 * @ctxt: parser context
1595 * @prefix: namespace prefix
1596 *
1597 * Lookup extra data for the given prefix. This returns data stored
1598 * with xmlParserNsUdpateSax.
1599 *
1600 * Returns the data on success, NULL if no namespace was found.
1601 */
1602 void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix)1603 xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1604 xmlHashedString hprefix;
1605 int nsIndex;
1606
1607 if (prefix == ctxt->str_xml)
1608 return(NULL);
1609
1610 hprefix.name = prefix;
1611 if (prefix != NULL)
1612 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1613 else
1614 hprefix.hashValue = 0;
1615 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1616 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1617 return(NULL);
1618
1619 return(ctxt->nsdb->extra[nsIndex].saxData);
1620 }
1621
1622 /**
1623 * xmlParserNsUpdateSax:
1624 * @ctxt: parser context
1625 * @prefix: namespace prefix
1626 * @saxData: extra data for SAX handler
1627 *
1628 * Sets or updates extra data for the given prefix. This value will be
1629 * returned by xmlParserNsLookupSax as long as the namespace with the
1630 * given prefix is in scope.
1631 *
1632 * Returns the data on success, NULL if no namespace was found.
1633 */
1634 int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix,void * saxData)1635 xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1636 void *saxData) {
1637 xmlHashedString hprefix;
1638 int nsIndex;
1639
1640 if (prefix == ctxt->str_xml)
1641 return(-1);
1642
1643 hprefix.name = prefix;
1644 if (prefix != NULL)
1645 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1646 else
1647 hprefix.hashValue = 0;
1648 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1649 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1650 return(-1);
1651
1652 ctxt->nsdb->extra[nsIndex].saxData = saxData;
1653 return(0);
1654 }
1655
1656 /**
1657 * xmlParserNsGrow:
1658 * @ctxt: parser context
1659 *
1660 * Grows the namespace tables.
1661 *
1662 * Returns 0 on success, -1 if a memory allocation failed.
1663 */
1664 static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt)1665 xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1666 const xmlChar **table;
1667 xmlParserNsExtra *extra;
1668 int newSize;
1669
1670 if (ctxt->nsMax > INT_MAX / 2)
1671 goto error;
1672 newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1673
1674 table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1675 if (table == NULL)
1676 goto error;
1677 ctxt->nsTab = table;
1678
1679 extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1680 if (extra == NULL)
1681 goto error;
1682 ctxt->nsdb->extra = extra;
1683
1684 ctxt->nsMax = newSize;
1685 return(0);
1686
1687 error:
1688 xmlErrMemory(ctxt);
1689 return(-1);
1690 }
1691
1692 /**
1693 * xmlParserNsPush:
1694 * @ctxt: parser context
1695 * @prefix: prefix with hash value
1696 * @uri: uri with hash value
1697 * @saxData: extra data for SAX handler
1698 * @defAttr: whether the namespace comes from a default attribute
1699 *
1700 * Push a new namespace on the table.
1701 *
1702 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1703 * -1 if a memory allocation failed.
1704 */
1705 static int
xmlParserNsPush(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,const xmlHashedString * uri,void * saxData,int defAttr)1706 xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1707 const xmlHashedString *uri, void *saxData, int defAttr) {
1708 xmlParserNsBucket *bucket = NULL;
1709 xmlParserNsExtra *extra;
1710 const xmlChar **ns;
1711 unsigned hashValue, nsIndex, oldIndex;
1712
1713 if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1714 return(0);
1715
1716 if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1717 xmlErrMemory(ctxt);
1718 return(-1);
1719 }
1720
1721 /*
1722 * Default namespace and 'xml' namespace
1723 */
1724 if ((prefix == NULL) || (prefix->name == NULL)) {
1725 oldIndex = ctxt->nsdb->defaultNsIndex;
1726
1727 if (oldIndex != INT_MAX) {
1728 extra = &ctxt->nsdb->extra[oldIndex];
1729
1730 if (extra->elementId == ctxt->nsdb->elementId) {
1731 if (defAttr == 0)
1732 xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1733 return(0);
1734 }
1735
1736 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1737 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1738 return(0);
1739 }
1740
1741 ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1742 goto populate_entry;
1743 }
1744
1745 /*
1746 * Hash table lookup
1747 */
1748 oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1749 if (oldIndex != INT_MAX) {
1750 extra = &ctxt->nsdb->extra[oldIndex];
1751
1752 /*
1753 * Check for duplicate definitions on the same element.
1754 */
1755 if (extra->elementId == ctxt->nsdb->elementId) {
1756 if (defAttr == 0)
1757 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1758 return(0);
1759 }
1760
1761 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1762 (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1763 return(0);
1764
1765 bucket->index = ctxt->nsNr;
1766 goto populate_entry;
1767 }
1768
1769 /*
1770 * Insert new bucket
1771 */
1772
1773 hashValue = prefix->hashValue;
1774
1775 /*
1776 * Grow hash table, 50% fill factor
1777 */
1778 if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1779 xmlParserNsBucket *newHash;
1780 unsigned newSize, i, index;
1781
1782 if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1783 xmlErrMemory(ctxt);
1784 return(-1);
1785 }
1786 newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1787 newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1788 if (newHash == NULL) {
1789 xmlErrMemory(ctxt);
1790 return(-1);
1791 }
1792 memset(newHash, 0, newSize * sizeof(newHash[0]));
1793
1794 for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1795 unsigned hv = ctxt->nsdb->hash[i].hashValue;
1796 unsigned newIndex;
1797
1798 if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1799 continue;
1800 newIndex = hv & (newSize - 1);
1801
1802 while (newHash[newIndex].hashValue != 0) {
1803 newIndex++;
1804 if (newIndex == newSize)
1805 newIndex = 0;
1806 }
1807
1808 newHash[newIndex] = ctxt->nsdb->hash[i];
1809 }
1810
1811 xmlFree(ctxt->nsdb->hash);
1812 ctxt->nsdb->hash = newHash;
1813 ctxt->nsdb->hashSize = newSize;
1814
1815 /*
1816 * Relookup
1817 */
1818 index = hashValue & (newSize - 1);
1819
1820 while (newHash[index].hashValue != 0) {
1821 index++;
1822 if (index == newSize)
1823 index = 0;
1824 }
1825
1826 bucket = &newHash[index];
1827 }
1828
1829 bucket->hashValue = hashValue;
1830 bucket->index = ctxt->nsNr;
1831 ctxt->nsdb->hashElems++;
1832 oldIndex = INT_MAX;
1833
1834 populate_entry:
1835 nsIndex = ctxt->nsNr;
1836
1837 ns = &ctxt->nsTab[nsIndex * 2];
1838 ns[0] = prefix ? prefix->name : NULL;
1839 ns[1] = uri->name;
1840
1841 extra = &ctxt->nsdb->extra[nsIndex];
1842 extra->saxData = saxData;
1843 extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1844 extra->uriHashValue = uri->hashValue;
1845 extra->elementId = ctxt->nsdb->elementId;
1846 extra->oldIndex = oldIndex;
1847
1848 ctxt->nsNr++;
1849
1850 return(1);
1851 }
1852
1853 /**
1854 * xmlParserNsPop:
1855 * @ctxt: an XML parser context
1856 * @nr: the number to pop
1857 *
1858 * Pops the top @nr namespaces and restores the hash table.
1859 *
1860 * Returns the number of namespaces popped.
1861 */
1862 static int
xmlParserNsPop(xmlParserCtxtPtr ctxt,int nr)1863 xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1864 {
1865 int i;
1866
1867 /* assert(nr <= ctxt->nsNr); */
1868
1869 for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1870 const xmlChar *prefix = ctxt->nsTab[i * 2];
1871 xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1872
1873 if (prefix == NULL) {
1874 ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1875 } else {
1876 xmlHashedString hprefix;
1877 xmlParserNsBucket *bucket = NULL;
1878
1879 hprefix.name = prefix;
1880 hprefix.hashValue = extra->prefixHashValue;
1881 xmlParserNsLookup(ctxt, &hprefix, &bucket);
1882 /* assert(bucket && bucket->hashValue); */
1883 bucket->index = extra->oldIndex;
1884 }
1885 }
1886
1887 ctxt->nsNr -= nr;
1888 return(nr);
1889 }
1890
1891 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1892 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1893 const xmlChar **atts;
1894 unsigned *attallocs;
1895 int maxatts;
1896
1897 if (nr + 5 > ctxt->maxatts) {
1898 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1899 atts = (const xmlChar **) xmlMalloc(
1900 maxatts * sizeof(const xmlChar *));
1901 if (atts == NULL) goto mem_error;
1902 attallocs = xmlRealloc(ctxt->attallocs,
1903 (maxatts / 5) * sizeof(attallocs[0]));
1904 if (attallocs == NULL) {
1905 xmlFree(atts);
1906 goto mem_error;
1907 }
1908 if (ctxt->maxatts > 0)
1909 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1910 xmlFree(ctxt->atts);
1911 ctxt->atts = atts;
1912 ctxt->attallocs = attallocs;
1913 ctxt->maxatts = maxatts;
1914 }
1915 return(ctxt->maxatts);
1916 mem_error:
1917 xmlErrMemory(ctxt);
1918 return(-1);
1919 }
1920
1921 /**
1922 * inputPush:
1923 * @ctxt: an XML parser context
1924 * @value: the parser input
1925 *
1926 * Pushes a new parser input on top of the input stack
1927 *
1928 * Returns -1 in case of error, the index in the stack otherwise
1929 */
1930 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1931 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1932 {
1933 if ((ctxt == NULL) || (value == NULL))
1934 return(-1);
1935 if (ctxt->inputNr >= ctxt->inputMax) {
1936 size_t newSize = ctxt->inputMax * 2;
1937 xmlParserInputPtr *tmp;
1938
1939 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1940 newSize * sizeof(*tmp));
1941 if (tmp == NULL) {
1942 xmlErrMemory(ctxt);
1943 return (-1);
1944 }
1945 ctxt->inputTab = tmp;
1946 ctxt->inputMax = newSize;
1947 }
1948 ctxt->inputTab[ctxt->inputNr] = value;
1949 ctxt->input = value;
1950 return (ctxt->inputNr++);
1951 }
1952 /**
1953 * inputPop:
1954 * @ctxt: an XML parser context
1955 *
1956 * Pops the top parser input from the input stack
1957 *
1958 * Returns the input just removed
1959 */
1960 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1961 inputPop(xmlParserCtxtPtr ctxt)
1962 {
1963 xmlParserInputPtr ret;
1964
1965 if (ctxt == NULL)
1966 return(NULL);
1967 if (ctxt->inputNr <= 0)
1968 return (NULL);
1969 ctxt->inputNr--;
1970 if (ctxt->inputNr > 0)
1971 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1972 else
1973 ctxt->input = NULL;
1974 ret = ctxt->inputTab[ctxt->inputNr];
1975 ctxt->inputTab[ctxt->inputNr] = NULL;
1976 return (ret);
1977 }
1978 /**
1979 * nodePush:
1980 * @ctxt: an XML parser context
1981 * @value: the element node
1982 *
1983 * DEPRECATED: Internal function, do not use.
1984 *
1985 * Pushes a new element node on top of the node stack
1986 *
1987 * Returns -1 in case of error, the index in the stack otherwise
1988 */
1989 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1990 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1991 {
1992 int maxDepth;
1993
1994 if (ctxt == NULL)
1995 return(0);
1996
1997 maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1998 if (ctxt->nodeNr > maxDepth) {
1999 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2000 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2001 ctxt->nodeNr);
2002 xmlHaltParser(ctxt);
2003 return(-1);
2004 }
2005 if (ctxt->nodeNr >= ctxt->nodeMax) {
2006 xmlNodePtr *tmp;
2007
2008 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2009 ctxt->nodeMax * 2 *
2010 sizeof(ctxt->nodeTab[0]));
2011 if (tmp == NULL) {
2012 xmlErrMemory(ctxt);
2013 return (-1);
2014 }
2015 ctxt->nodeTab = tmp;
2016 ctxt->nodeMax *= 2;
2017 }
2018 ctxt->nodeTab[ctxt->nodeNr] = value;
2019 ctxt->node = value;
2020 return (ctxt->nodeNr++);
2021 }
2022
2023 /**
2024 * nodePop:
2025 * @ctxt: an XML parser context
2026 *
2027 * DEPRECATED: Internal function, do not use.
2028 *
2029 * Pops the top element node from the node stack
2030 *
2031 * Returns the node just removed
2032 */
2033 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)2034 nodePop(xmlParserCtxtPtr ctxt)
2035 {
2036 xmlNodePtr ret;
2037
2038 if (ctxt == NULL) return(NULL);
2039 if (ctxt->nodeNr <= 0)
2040 return (NULL);
2041 ctxt->nodeNr--;
2042 if (ctxt->nodeNr > 0)
2043 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2044 else
2045 ctxt->node = NULL;
2046 ret = ctxt->nodeTab[ctxt->nodeNr];
2047 ctxt->nodeTab[ctxt->nodeNr] = NULL;
2048 return (ret);
2049 }
2050
2051 /**
2052 * nameNsPush:
2053 * @ctxt: an XML parser context
2054 * @value: the element name
2055 * @prefix: the element prefix
2056 * @URI: the element namespace name
2057 * @line: the current line number for error messages
2058 * @nsNr: the number of namespaces pushed on the namespace table
2059 *
2060 * Pushes a new element name/prefix/URL on top of the name stack
2061 *
2062 * Returns -1 in case of error, the index in the stack otherwise
2063 */
2064 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)2065 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2066 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2067 {
2068 xmlStartTag *tag;
2069
2070 if (ctxt->nameNr >= ctxt->nameMax) {
2071 const xmlChar * *tmp;
2072 xmlStartTag *tmp2;
2073 ctxt->nameMax *= 2;
2074 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2075 ctxt->nameMax *
2076 sizeof(ctxt->nameTab[0]));
2077 if (tmp == NULL) {
2078 ctxt->nameMax /= 2;
2079 goto mem_error;
2080 }
2081 ctxt->nameTab = tmp;
2082 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2083 ctxt->nameMax *
2084 sizeof(ctxt->pushTab[0]));
2085 if (tmp2 == NULL) {
2086 ctxt->nameMax /= 2;
2087 goto mem_error;
2088 }
2089 ctxt->pushTab = tmp2;
2090 } else if (ctxt->pushTab == NULL) {
2091 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2092 sizeof(ctxt->pushTab[0]));
2093 if (ctxt->pushTab == NULL)
2094 goto mem_error;
2095 }
2096 ctxt->nameTab[ctxt->nameNr] = value;
2097 ctxt->name = value;
2098 tag = &ctxt->pushTab[ctxt->nameNr];
2099 tag->prefix = prefix;
2100 tag->URI = URI;
2101 tag->line = line;
2102 tag->nsNr = nsNr;
2103 return (ctxt->nameNr++);
2104 mem_error:
2105 xmlErrMemory(ctxt);
2106 return (-1);
2107 }
2108 #ifdef LIBXML_PUSH_ENABLED
2109 /**
2110 * nameNsPop:
2111 * @ctxt: an XML parser context
2112 *
2113 * Pops the top element/prefix/URI name from the name stack
2114 *
2115 * Returns the name just removed
2116 */
2117 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)2118 nameNsPop(xmlParserCtxtPtr ctxt)
2119 {
2120 const xmlChar *ret;
2121
2122 if (ctxt->nameNr <= 0)
2123 return (NULL);
2124 ctxt->nameNr--;
2125 if (ctxt->nameNr > 0)
2126 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2127 else
2128 ctxt->name = NULL;
2129 ret = ctxt->nameTab[ctxt->nameNr];
2130 ctxt->nameTab[ctxt->nameNr] = NULL;
2131 return (ret);
2132 }
2133 #endif /* LIBXML_PUSH_ENABLED */
2134
2135 /**
2136 * namePush:
2137 * @ctxt: an XML parser context
2138 * @value: the element name
2139 *
2140 * DEPRECATED: Internal function, do not use.
2141 *
2142 * Pushes a new element name on top of the name stack
2143 *
2144 * Returns -1 in case of error, the index in the stack otherwise
2145 */
2146 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)2147 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2148 {
2149 if (ctxt == NULL) return (-1);
2150
2151 if (ctxt->nameNr >= ctxt->nameMax) {
2152 const xmlChar * *tmp;
2153 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2154 ctxt->nameMax * 2 *
2155 sizeof(ctxt->nameTab[0]));
2156 if (tmp == NULL) {
2157 goto mem_error;
2158 }
2159 ctxt->nameTab = tmp;
2160 ctxt->nameMax *= 2;
2161 }
2162 ctxt->nameTab[ctxt->nameNr] = value;
2163 ctxt->name = value;
2164 return (ctxt->nameNr++);
2165 mem_error:
2166 xmlErrMemory(ctxt);
2167 return (-1);
2168 }
2169
2170 /**
2171 * namePop:
2172 * @ctxt: an XML parser context
2173 *
2174 * DEPRECATED: Internal function, do not use.
2175 *
2176 * Pops the top element name from the name stack
2177 *
2178 * Returns the name just removed
2179 */
2180 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)2181 namePop(xmlParserCtxtPtr ctxt)
2182 {
2183 const xmlChar *ret;
2184
2185 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2186 return (NULL);
2187 ctxt->nameNr--;
2188 if (ctxt->nameNr > 0)
2189 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2190 else
2191 ctxt->name = NULL;
2192 ret = ctxt->nameTab[ctxt->nameNr];
2193 ctxt->nameTab[ctxt->nameNr] = NULL;
2194 return (ret);
2195 }
2196
spacePush(xmlParserCtxtPtr ctxt,int val)2197 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2198 if (ctxt->spaceNr >= ctxt->spaceMax) {
2199 int *tmp;
2200
2201 ctxt->spaceMax *= 2;
2202 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2204 if (tmp == NULL) {
2205 xmlErrMemory(ctxt);
2206 ctxt->spaceMax /=2;
2207 return(-1);
2208 }
2209 ctxt->spaceTab = tmp;
2210 }
2211 ctxt->spaceTab[ctxt->spaceNr] = val;
2212 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2213 return(ctxt->spaceNr++);
2214 }
2215
spacePop(xmlParserCtxtPtr ctxt)2216 static int spacePop(xmlParserCtxtPtr ctxt) {
2217 int ret;
2218 if (ctxt->spaceNr <= 0) return(0);
2219 ctxt->spaceNr--;
2220 if (ctxt->spaceNr > 0)
2221 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2222 else
2223 ctxt->space = &ctxt->spaceTab[0];
2224 ret = ctxt->spaceTab[ctxt->spaceNr];
2225 ctxt->spaceTab[ctxt->spaceNr] = -1;
2226 return(ret);
2227 }
2228
2229 /*
2230 * Macros for accessing the content. Those should be used only by the parser,
2231 * and not exported.
2232 *
2233 * Dirty macros, i.e. one often need to make assumption on the context to
2234 * use them
2235 *
2236 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2237 * To be used with extreme caution since operations consuming
2238 * characters may move the input buffer to a different location !
2239 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2240 * This should be used internally by the parser
2241 * only to compare to ASCII values otherwise it would break when
2242 * running with UTF-8 encoding.
2243 * RAW same as CUR but in the input buffer, bypass any token
2244 * extraction that may have been done
2245 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2246 * to compare on ASCII based substring.
2247 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2248 * strings without newlines within the parser.
2249 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2250 * defined char within the parser.
2251 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2252 *
2253 * NEXT Skip to the next character, this does the proper decoding
2254 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2255 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2256 * CUR_CHAR(l) returns the current unicode character (int), set l
2257 * to the number of xmlChars used for the encoding [0-5].
2258 * CUR_SCHAR same but operate on a string instead of the context
2259 * COPY_BUF copy the current unicode char to the target buffer, increment
2260 * the index
2261 * GROW, SHRINK handling of input buffers
2262 */
2263
2264 #define RAW (*ctxt->input->cur)
2265 #define CUR (*ctxt->input->cur)
2266 #define NXT(val) ctxt->input->cur[(val)]
2267 #define CUR_PTR ctxt->input->cur
2268 #define BASE_PTR ctxt->input->base
2269
2270 #define CMP4( s, c1, c2, c3, c4 ) \
2271 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2272 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2273 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2274 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2275 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2276 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2277 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2278 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2279 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2280 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2281 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2282 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2283 ((unsigned char *) s)[ 8 ] == c9 )
2284 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2285 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2286 ((unsigned char *) s)[ 9 ] == c10 )
2287
2288 #define SKIP(val) do { \
2289 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2290 if (*ctxt->input->cur == 0) \
2291 xmlParserGrow(ctxt); \
2292 } while (0)
2293
2294 #define SKIPL(val) do { \
2295 int skipl; \
2296 for(skipl=0; skipl<val; skipl++) { \
2297 if (*(ctxt->input->cur) == '\n') { \
2298 ctxt->input->line++; ctxt->input->col = 1; \
2299 } else ctxt->input->col++; \
2300 ctxt->input->cur++; \
2301 } \
2302 if (*ctxt->input->cur == 0) \
2303 xmlParserGrow(ctxt); \
2304 } while (0)
2305
2306 #define SHRINK \
2307 if ((!PARSER_PROGRESSIVE(ctxt)) && \
2308 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2309 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2310 xmlParserShrink(ctxt);
2311
2312 #define GROW \
2313 if ((!PARSER_PROGRESSIVE(ctxt)) && \
2314 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2315 xmlParserGrow(ctxt);
2316
2317 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2318
2319 #define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2320
2321 #define NEXT xmlNextChar(ctxt)
2322
2323 #define NEXT1 { \
2324 ctxt->input->col++; \
2325 ctxt->input->cur++; \
2326 if (*ctxt->input->cur == 0) \
2327 xmlParserGrow(ctxt); \
2328 }
2329
2330 #define NEXTL(l) do { \
2331 if (*(ctxt->input->cur) == '\n') { \
2332 ctxt->input->line++; ctxt->input->col = 1; \
2333 } else ctxt->input->col++; \
2334 ctxt->input->cur += l; \
2335 } while (0)
2336
2337 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2338 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2339
2340 #define COPY_BUF(b, i, v) \
2341 if (v < 0x80) b[i++] = v; \
2342 else i += xmlCopyCharMultiByte(&b[i],v)
2343
2344 /**
2345 * xmlSkipBlankChars:
2346 * @ctxt: the XML parser context
2347 *
2348 * DEPRECATED: Internal function, do not use.
2349 *
2350 * Skip whitespace in the input stream.
2351 *
2352 * Returns the number of space chars skipped
2353 */
2354 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2355 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2356 const xmlChar *cur;
2357 int res = 0;
2358
2359 /*
2360 * It's Okay to use CUR/NEXT here since all the blanks are on
2361 * the ASCII range.
2362 */
2363 cur = ctxt->input->cur;
2364 while (IS_BLANK_CH(*cur)) {
2365 if (*cur == '\n') {
2366 ctxt->input->line++; ctxt->input->col = 1;
2367 } else {
2368 ctxt->input->col++;
2369 }
2370 cur++;
2371 if (res < INT_MAX)
2372 res++;
2373 if (*cur == 0) {
2374 ctxt->input->cur = cur;
2375 xmlParserGrow(ctxt);
2376 cur = ctxt->input->cur;
2377 }
2378 }
2379 ctxt->input->cur = cur;
2380
2381 return(res);
2382 }
2383
2384 static void
xmlPopPE(xmlParserCtxtPtr ctxt)2385 xmlPopPE(xmlParserCtxtPtr ctxt) {
2386 unsigned long consumed;
2387 xmlEntityPtr ent;
2388
2389 ent = ctxt->input->entity;
2390
2391 ent->flags &= ~XML_ENT_EXPANDING;
2392
2393 if ((ent->flags & XML_ENT_CHECKED) == 0) {
2394 int result;
2395
2396 /*
2397 * Read the rest of the stream in case of errors. We want
2398 * to account for the whole entity size.
2399 */
2400 do {
2401 ctxt->input->cur = ctxt->input->end;
2402 xmlParserShrink(ctxt);
2403 result = xmlParserGrow(ctxt);
2404 } while (result > 0);
2405
2406 consumed = ctxt->input->consumed;
2407 xmlSaturatedAddSizeT(&consumed,
2408 ctxt->input->end - ctxt->input->base);
2409
2410 xmlSaturatedAdd(&ent->expandedSize, consumed);
2411
2412 /*
2413 * Add to sizeentities when parsing an external entity
2414 * for the first time.
2415 */
2416 if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2417 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2418 }
2419
2420 ent->flags |= XML_ENT_CHECKED;
2421 }
2422
2423 xmlPopInput(ctxt);
2424
2425 xmlParserEntityCheck(ctxt, ent->expandedSize);
2426 }
2427
2428 /**
2429 * xmlSkipBlankCharsPE:
2430 * @ctxt: the XML parser context
2431 *
2432 * Skip whitespace in the input stream, also handling parameter
2433 * entities.
2434 *
2435 * Returns the number of space chars skipped
2436 */
2437 static int
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt)2438 xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2439 int res = 0;
2440 int inParam;
2441 int expandParam;
2442
2443 inParam = PARSER_IN_PE(ctxt);
2444 expandParam = PARSER_EXTERNAL(ctxt);
2445
2446 if (!inParam && !expandParam)
2447 return(xmlSkipBlankChars(ctxt));
2448
2449 while (PARSER_STOPPED(ctxt) == 0) {
2450 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2451 NEXT;
2452 } else if (CUR == '%') {
2453 if ((expandParam == 0) ||
2454 (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2455 break;
2456
2457 /*
2458 * Expand parameter entity. We continue to consume
2459 * whitespace at the start of the entity and possible
2460 * even consume the whole entity and pop it. We might
2461 * even pop multiple PEs in this loop.
2462 */
2463 xmlParsePEReference(ctxt);
2464
2465 inParam = PARSER_IN_PE(ctxt);
2466 expandParam = PARSER_EXTERNAL(ctxt);
2467 } else if (CUR == 0) {
2468 if (inParam == 0)
2469 break;
2470
2471 xmlPopPE(ctxt);
2472
2473 inParam = PARSER_IN_PE(ctxt);
2474 expandParam = PARSER_EXTERNAL(ctxt);
2475 } else {
2476 break;
2477 }
2478
2479 /*
2480 * Also increase the counter when entering or exiting a PERef.
2481 * The spec says: "When a parameter-entity reference is recognized
2482 * in the DTD and included, its replacement text MUST be enlarged
2483 * by the attachment of one leading and one following space (#x20)
2484 * character."
2485 */
2486 if (res < INT_MAX)
2487 res++;
2488 }
2489
2490 return(res);
2491 }
2492
2493 /************************************************************************
2494 * *
2495 * Commodity functions to handle entities *
2496 * *
2497 ************************************************************************/
2498
2499 /**
2500 * xmlPopInput:
2501 * @ctxt: an XML parser context
2502 *
2503 * xmlPopInput: the current input pointed by ctxt->input came to an end
2504 * pop it and return the next char.
2505 *
2506 * Returns the current xmlChar in the parser context
2507 */
2508 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2509 xmlPopInput(xmlParserCtxtPtr ctxt) {
2510 xmlParserInputPtr input;
2511
2512 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2513 input = inputPop(ctxt);
2514 xmlFreeInputStream(input);
2515 if (*ctxt->input->cur == 0)
2516 xmlParserGrow(ctxt);
2517 return(CUR);
2518 }
2519
2520 /**
2521 * xmlPushInput:
2522 * @ctxt: an XML parser context
2523 * @input: an XML parser input fragment (entity, XML fragment ...).
2524 *
2525 * Push an input stream onto the stack.
2526 *
2527 * Returns -1 in case of error or the index in the input stack
2528 */
2529 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2530 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2531 int maxDepth;
2532 int ret;
2533
2534 if ((ctxt == NULL) || (input == NULL))
2535 return(-1);
2536
2537 maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2538 if (ctxt->inputNr > maxDepth) {
2539 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2540 "Maximum entity nesting depth exceeded");
2541 xmlHaltParser(ctxt);
2542 return(-1);
2543 }
2544 ret = inputPush(ctxt, input);
2545 GROW;
2546 return(ret);
2547 }
2548
2549 /**
2550 * xmlParseCharRef:
2551 * @ctxt: an XML parser context
2552 *
2553 * DEPRECATED: Internal function, don't use.
2554 *
2555 * Parse a numeric character reference. Always consumes '&'.
2556 *
2557 * [66] CharRef ::= '&#' [0-9]+ ';' |
2558 * '&#x' [0-9a-fA-F]+ ';'
2559 *
2560 * [ WFC: Legal Character ]
2561 * Characters referred to using character references must match the
2562 * production for Char.
2563 *
2564 * Returns the value parsed (as an int), 0 in case of error
2565 */
2566 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2567 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2568 int val = 0;
2569 int count = 0;
2570
2571 /*
2572 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2573 */
2574 if ((RAW == '&') && (NXT(1) == '#') &&
2575 (NXT(2) == 'x')) {
2576 SKIP(3);
2577 GROW;
2578 while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2579 if (count++ > 20) {
2580 count = 0;
2581 GROW;
2582 }
2583 if ((RAW >= '0') && (RAW <= '9'))
2584 val = val * 16 + (CUR - '0');
2585 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2586 val = val * 16 + (CUR - 'a') + 10;
2587 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2588 val = val * 16 + (CUR - 'A') + 10;
2589 else {
2590 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2591 val = 0;
2592 break;
2593 }
2594 if (val > 0x110000)
2595 val = 0x110000;
2596
2597 NEXT;
2598 count++;
2599 }
2600 if (RAW == ';') {
2601 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2602 ctxt->input->col++;
2603 ctxt->input->cur++;
2604 }
2605 } else if ((RAW == '&') && (NXT(1) == '#')) {
2606 SKIP(2);
2607 GROW;
2608 while (RAW != ';') { /* loop blocked by count */
2609 if (count++ > 20) {
2610 count = 0;
2611 GROW;
2612 }
2613 if ((RAW >= '0') && (RAW <= '9'))
2614 val = val * 10 + (CUR - '0');
2615 else {
2616 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2617 val = 0;
2618 break;
2619 }
2620 if (val > 0x110000)
2621 val = 0x110000;
2622
2623 NEXT;
2624 count++;
2625 }
2626 if (RAW == ';') {
2627 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2628 ctxt->input->col++;
2629 ctxt->input->cur++;
2630 }
2631 } else {
2632 if (RAW == '&')
2633 SKIP(1);
2634 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2635 }
2636
2637 /*
2638 * [ WFC: Legal Character ]
2639 * Characters referred to using character references must match the
2640 * production for Char.
2641 */
2642 if (val >= 0x110000) {
2643 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2644 "xmlParseCharRef: character reference out of bounds\n",
2645 val);
2646 } else if (IS_CHAR(val)) {
2647 return(val);
2648 } else {
2649 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2650 "xmlParseCharRef: invalid xmlChar value %d\n",
2651 val);
2652 }
2653 return(0);
2654 }
2655
2656 /**
2657 * xmlParseStringCharRef:
2658 * @ctxt: an XML parser context
2659 * @str: a pointer to an index in the string
2660 *
2661 * parse Reference declarations, variant parsing from a string rather
2662 * than an an input flow.
2663 *
2664 * [66] CharRef ::= '&#' [0-9]+ ';' |
2665 * '&#x' [0-9a-fA-F]+ ';'
2666 *
2667 * [ WFC: Legal Character ]
2668 * Characters referred to using character references must match the
2669 * production for Char.
2670 *
2671 * Returns the value parsed (as an int), 0 in case of error, str will be
2672 * updated to the current value of the index
2673 */
2674 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2675 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2676 const xmlChar *ptr;
2677 xmlChar cur;
2678 int val = 0;
2679
2680 if ((str == NULL) || (*str == NULL)) return(0);
2681 ptr = *str;
2682 cur = *ptr;
2683 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2684 ptr += 3;
2685 cur = *ptr;
2686 while (cur != ';') { /* Non input consuming loop */
2687 if ((cur >= '0') && (cur <= '9'))
2688 val = val * 16 + (cur - '0');
2689 else if ((cur >= 'a') && (cur <= 'f'))
2690 val = val * 16 + (cur - 'a') + 10;
2691 else if ((cur >= 'A') && (cur <= 'F'))
2692 val = val * 16 + (cur - 'A') + 10;
2693 else {
2694 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2695 val = 0;
2696 break;
2697 }
2698 if (val > 0x110000)
2699 val = 0x110000;
2700
2701 ptr++;
2702 cur = *ptr;
2703 }
2704 if (cur == ';')
2705 ptr++;
2706 } else if ((cur == '&') && (ptr[1] == '#')){
2707 ptr += 2;
2708 cur = *ptr;
2709 while (cur != ';') { /* Non input consuming loops */
2710 if ((cur >= '0') && (cur <= '9'))
2711 val = val * 10 + (cur - '0');
2712 else {
2713 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2714 val = 0;
2715 break;
2716 }
2717 if (val > 0x110000)
2718 val = 0x110000;
2719
2720 ptr++;
2721 cur = *ptr;
2722 }
2723 if (cur == ';')
2724 ptr++;
2725 } else {
2726 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2727 return(0);
2728 }
2729 *str = ptr;
2730
2731 /*
2732 * [ WFC: Legal Character ]
2733 * Characters referred to using character references must match the
2734 * production for Char.
2735 */
2736 if (val >= 0x110000) {
2737 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2738 "xmlParseStringCharRef: character reference out of bounds\n",
2739 val);
2740 } else if (IS_CHAR(val)) {
2741 return(val);
2742 } else {
2743 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2744 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2745 val);
2746 }
2747 return(0);
2748 }
2749
2750 /**
2751 * xmlParserHandlePEReference:
2752 * @ctxt: the parser context
2753 *
2754 * DEPRECATED: Internal function, do not use.
2755 *
2756 * [69] PEReference ::= '%' Name ';'
2757 *
2758 * [ WFC: No Recursion ]
2759 * A parsed entity must not contain a recursive
2760 * reference to itself, either directly or indirectly.
2761 *
2762 * [ WFC: Entity Declared ]
2763 * In a document without any DTD, a document with only an internal DTD
2764 * subset which contains no parameter entity references, or a document
2765 * with "standalone='yes'", ... ... The declaration of a parameter
2766 * entity must precede any reference to it...
2767 *
2768 * [ VC: Entity Declared ]
2769 * In a document with an external subset or external parameter entities
2770 * with "standalone='no'", ... ... The declaration of a parameter entity
2771 * must precede any reference to it...
2772 *
2773 * [ WFC: In DTD ]
2774 * Parameter-entity references may only appear in the DTD.
2775 * NOTE: misleading but this is handled.
2776 *
2777 * A PEReference may have been detected in the current input stream
2778 * the handling is done accordingly to
2779 * http://www.w3.org/TR/REC-xml#entproc
2780 * i.e.
2781 * - Included in literal in entity values
2782 * - Included as Parameter Entity reference within DTDs
2783 */
2784 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2785 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2786 xmlParsePEReference(ctxt);
2787 }
2788
2789 /**
2790 * xmlStringLenDecodeEntities:
2791 * @ctxt: the parser context
2792 * @str: the input string
2793 * @len: the string length
2794 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2795 * @end: an end marker xmlChar, 0 if none
2796 * @end2: an end marker xmlChar, 0 if none
2797 * @end3: an end marker xmlChar, 0 if none
2798 *
2799 * DEPRECATED: Internal function, don't use.
2800 *
2801 * Returns A newly allocated string with the substitution done. The caller
2802 * must deallocate it !
2803 */
2804 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2805 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806 int what ATTRIBUTE_UNUSED,
2807 xmlChar end, xmlChar end2, xmlChar end3) {
2808 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2809 return(NULL);
2810
2811 if ((str[len] != 0) ||
2812 (end != 0) || (end2 != 0) || (end3 != 0))
2813 return(NULL);
2814
2815 return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2816 }
2817
2818 /**
2819 * xmlStringDecodeEntities:
2820 * @ctxt: the parser context
2821 * @str: the input string
2822 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2823 * @end: an end marker xmlChar, 0 if none
2824 * @end2: an end marker xmlChar, 0 if none
2825 * @end3: an end marker xmlChar, 0 if none
2826 *
2827 * DEPRECATED: Internal function, don't use.
2828 *
2829 * Returns A newly allocated string with the substitution done. The caller
2830 * must deallocate it !
2831 */
2832 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2833 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2834 int what ATTRIBUTE_UNUSED,
2835 xmlChar end, xmlChar end2, xmlChar end3) {
2836 if ((ctxt == NULL) || (str == NULL))
2837 return(NULL);
2838
2839 if ((end != 0) || (end2 != 0) || (end3 != 0))
2840 return(NULL);
2841
2842 return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2843 }
2844
2845 /************************************************************************
2846 * *
2847 * Commodity functions, cleanup needed ? *
2848 * *
2849 ************************************************************************/
2850
2851 /**
2852 * areBlanks:
2853 * @ctxt: an XML parser context
2854 * @str: a xmlChar *
2855 * @len: the size of @str
2856 * @blank_chars: we know the chars are blanks
2857 *
2858 * Is this a sequence of blank chars that one can ignore ?
2859 *
2860 * Returns 1 if ignorable 0 otherwise.
2861 */
2862
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2863 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2864 int blank_chars) {
2865 int i;
2866 xmlNodePtr lastChild;
2867
2868 /*
2869 * Don't spend time trying to differentiate them, the same callback is
2870 * used !
2871 */
2872 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2873 return(0);
2874
2875 /*
2876 * Check for xml:space value.
2877 */
2878 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2879 (*(ctxt->space) == -2))
2880 return(0);
2881
2882 /*
2883 * Check that the string is made of blanks
2884 */
2885 if (blank_chars == 0) {
2886 for (i = 0;i < len;i++)
2887 if (!(IS_BLANK_CH(str[i]))) return(0);
2888 }
2889
2890 /*
2891 * Look if the element is mixed content in the DTD if available
2892 */
2893 if (ctxt->node == NULL) return(0);
2894 if (ctxt->myDoc != NULL) {
2895 xmlElementPtr elemDecl = NULL;
2896 xmlDocPtr doc = ctxt->myDoc;
2897 const xmlChar *prefix = NULL;
2898
2899 if (ctxt->node->ns)
2900 prefix = ctxt->node->ns->prefix;
2901 if (doc->intSubset != NULL)
2902 elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2903 prefix);
2904 if ((elemDecl == NULL) && (doc->extSubset != NULL))
2905 elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2906 prefix);
2907 if (elemDecl != NULL) {
2908 if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2909 return(1);
2910 if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2911 (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2912 return(0);
2913 }
2914 }
2915
2916 /*
2917 * Otherwise, heuristic :-\
2918 */
2919 if ((RAW != '<') && (RAW != 0xD)) return(0);
2920 if ((ctxt->node->children == NULL) &&
2921 (RAW == '<') && (NXT(1) == '/')) return(0);
2922
2923 lastChild = xmlGetLastChild(ctxt->node);
2924 if (lastChild == NULL) {
2925 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2926 (ctxt->node->content != NULL)) return(0);
2927 } else if (xmlNodeIsText(lastChild))
2928 return(0);
2929 else if ((ctxt->node->children != NULL) &&
2930 (xmlNodeIsText(ctxt->node->children)))
2931 return(0);
2932 return(1);
2933 }
2934
2935 /************************************************************************
2936 * *
2937 * Extra stuff for namespace support *
2938 * Relates to http://www.w3.org/TR/WD-xml-names *
2939 * *
2940 ************************************************************************/
2941
2942 /**
2943 * xmlSplitQName:
2944 * @ctxt: an XML parser context
2945 * @name: an XML parser context
2946 * @prefixOut: a xmlChar **
2947 *
2948 * parse an UTF8 encoded XML qualified name string
2949 *
2950 * [NS 5] QName ::= (Prefix ':')? LocalPart
2951 *
2952 * [NS 6] Prefix ::= NCName
2953 *
2954 * [NS 7] LocalPart ::= NCName
2955 *
2956 * Returns the local part, and prefix is updated
2957 * to get the Prefix if any.
2958 */
2959
2960 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefixOut)2961 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2962 xmlChar buf[XML_MAX_NAMELEN + 5];
2963 xmlChar *buffer = NULL;
2964 int len = 0;
2965 int max = XML_MAX_NAMELEN;
2966 xmlChar *ret = NULL;
2967 xmlChar *prefix;
2968 const xmlChar *cur = name;
2969 int c;
2970
2971 if (prefixOut == NULL) return(NULL);
2972 *prefixOut = NULL;
2973
2974 if (cur == NULL) return(NULL);
2975
2976 /* nasty but well=formed */
2977 if (cur[0] == ':')
2978 return(xmlStrdup(name));
2979
2980 c = *cur++;
2981 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2982 buf[len++] = c;
2983 c = *cur++;
2984 }
2985 if (len >= max) {
2986 /*
2987 * Okay someone managed to make a huge name, so he's ready to pay
2988 * for the processing speed.
2989 */
2990 max = len * 2;
2991
2992 buffer = (xmlChar *) xmlMallocAtomic(max);
2993 if (buffer == NULL) {
2994 xmlErrMemory(ctxt);
2995 return(NULL);
2996 }
2997 memcpy(buffer, buf, len);
2998 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2999 if (len + 10 > max) {
3000 xmlChar *tmp;
3001
3002 max *= 2;
3003 tmp = (xmlChar *) xmlRealloc(buffer, max);
3004 if (tmp == NULL) {
3005 xmlFree(buffer);
3006 xmlErrMemory(ctxt);
3007 return(NULL);
3008 }
3009 buffer = tmp;
3010 }
3011 buffer[len++] = c;
3012 c = *cur++;
3013 }
3014 buffer[len] = 0;
3015 }
3016
3017 if ((c == ':') && (*cur == 0)) {
3018 if (buffer != NULL)
3019 xmlFree(buffer);
3020 return(xmlStrdup(name));
3021 }
3022
3023 if (buffer == NULL) {
3024 ret = xmlStrndup(buf, len);
3025 if (ret == NULL) {
3026 xmlErrMemory(ctxt);
3027 return(NULL);
3028 }
3029 } else {
3030 ret = buffer;
3031 buffer = NULL;
3032 max = XML_MAX_NAMELEN;
3033 }
3034
3035
3036 if (c == ':') {
3037 c = *cur;
3038 prefix = ret;
3039 if (c == 0) {
3040 ret = xmlStrndup(BAD_CAST "", 0);
3041 if (ret == NULL) {
3042 xmlFree(prefix);
3043 return(NULL);
3044 }
3045 *prefixOut = prefix;
3046 return(ret);
3047 }
3048 len = 0;
3049
3050 /*
3051 * Check that the first character is proper to start
3052 * a new name
3053 */
3054 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3055 ((c >= 0x41) && (c <= 0x5A)) ||
3056 (c == '_') || (c == ':'))) {
3057 int l;
3058 int first = CUR_SCHAR(cur, l);
3059
3060 if (!IS_LETTER(first) && (first != '_')) {
3061 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3062 "Name %s is not XML Namespace compliant\n",
3063 name);
3064 }
3065 }
3066 cur++;
3067
3068 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3069 buf[len++] = c;
3070 c = *cur++;
3071 }
3072 if (len >= max) {
3073 /*
3074 * Okay someone managed to make a huge name, so he's ready to pay
3075 * for the processing speed.
3076 */
3077 max = len * 2;
3078
3079 buffer = (xmlChar *) xmlMallocAtomic(max);
3080 if (buffer == NULL) {
3081 xmlErrMemory(ctxt);
3082 xmlFree(prefix);
3083 return(NULL);
3084 }
3085 memcpy(buffer, buf, len);
3086 while (c != 0) { /* tested bigname2.xml */
3087 if (len + 10 > max) {
3088 xmlChar *tmp;
3089
3090 max *= 2;
3091 tmp = (xmlChar *) xmlRealloc(buffer, max);
3092 if (tmp == NULL) {
3093 xmlErrMemory(ctxt);
3094 xmlFree(prefix);
3095 xmlFree(buffer);
3096 return(NULL);
3097 }
3098 buffer = tmp;
3099 }
3100 buffer[len++] = c;
3101 c = *cur++;
3102 }
3103 buffer[len] = 0;
3104 }
3105
3106 if (buffer == NULL) {
3107 ret = xmlStrndup(buf, len);
3108 if (ret == NULL) {
3109 xmlFree(prefix);
3110 return(NULL);
3111 }
3112 } else {
3113 ret = buffer;
3114 }
3115
3116 *prefixOut = prefix;
3117 }
3118
3119 return(ret);
3120 }
3121
3122 /************************************************************************
3123 * *
3124 * The parser itself *
3125 * Relates to http://www.w3.org/TR/REC-xml *
3126 * *
3127 ************************************************************************/
3128
3129 /************************************************************************
3130 * *
3131 * Routines to parse Name, NCName and NmToken *
3132 * *
3133 ************************************************************************/
3134
3135 /*
3136 * The two following functions are related to the change of accepted
3137 * characters for Name and NmToken in the Revision 5 of XML-1.0
3138 * They correspond to the modified production [4] and the new production [4a]
3139 * changes in that revision. Also note that the macros used for the
3140 * productions Letter, Digit, CombiningChar and Extender are not needed
3141 * anymore.
3142 * We still keep compatibility to pre-revision5 parsing semantic if the
3143 * new XML_PARSE_OLD10 option is given to the parser.
3144 */
3145 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3146 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3147 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3148 /*
3149 * Use the new checks of production [4] [4a] amd [5] of the
3150 * Update 5 of XML-1.0
3151 */
3152 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3153 (((c >= 'a') && (c <= 'z')) ||
3154 ((c >= 'A') && (c <= 'Z')) ||
3155 (c == '_') || (c == ':') ||
3156 ((c >= 0xC0) && (c <= 0xD6)) ||
3157 ((c >= 0xD8) && (c <= 0xF6)) ||
3158 ((c >= 0xF8) && (c <= 0x2FF)) ||
3159 ((c >= 0x370) && (c <= 0x37D)) ||
3160 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3161 ((c >= 0x200C) && (c <= 0x200D)) ||
3162 ((c >= 0x2070) && (c <= 0x218F)) ||
3163 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3164 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3165 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3166 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3167 ((c >= 0x10000) && (c <= 0xEFFFF))))
3168 return(1);
3169 } else {
3170 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3171 return(1);
3172 }
3173 return(0);
3174 }
3175
3176 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3177 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3178 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3179 /*
3180 * Use the new checks of production [4] [4a] amd [5] of the
3181 * Update 5 of XML-1.0
3182 */
3183 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3184 (((c >= 'a') && (c <= 'z')) ||
3185 ((c >= 'A') && (c <= 'Z')) ||
3186 ((c >= '0') && (c <= '9')) || /* !start */
3187 (c == '_') || (c == ':') ||
3188 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3189 ((c >= 0xC0) && (c <= 0xD6)) ||
3190 ((c >= 0xD8) && (c <= 0xF6)) ||
3191 ((c >= 0xF8) && (c <= 0x2FF)) ||
3192 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3193 ((c >= 0x370) && (c <= 0x37D)) ||
3194 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195 ((c >= 0x200C) && (c <= 0x200D)) ||
3196 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3197 ((c >= 0x2070) && (c <= 0x218F)) ||
3198 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3199 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3200 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3201 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3202 ((c >= 0x10000) && (c <= 0xEFFFF))))
3203 return(1);
3204 } else {
3205 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3206 (c == '.') || (c == '-') ||
3207 (c == '_') || (c == ':') ||
3208 (IS_COMBINING(c)) ||
3209 (IS_EXTENDER(c)))
3210 return(1);
3211 }
3212 return(0);
3213 }
3214
3215 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3216 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3217 const xmlChar *ret;
3218 int len = 0, l;
3219 int c;
3220 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3221 XML_MAX_TEXT_LENGTH :
3222 XML_MAX_NAME_LENGTH;
3223
3224 /*
3225 * Handler for more complex cases
3226 */
3227 c = CUR_CHAR(l);
3228 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229 /*
3230 * Use the new checks of production [4] [4a] amd [5] of the
3231 * Update 5 of XML-1.0
3232 */
3233 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234 (!(((c >= 'a') && (c <= 'z')) ||
3235 ((c >= 'A') && (c <= 'Z')) ||
3236 (c == '_') || (c == ':') ||
3237 ((c >= 0xC0) && (c <= 0xD6)) ||
3238 ((c >= 0xD8) && (c <= 0xF6)) ||
3239 ((c >= 0xF8) && (c <= 0x2FF)) ||
3240 ((c >= 0x370) && (c <= 0x37D)) ||
3241 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242 ((c >= 0x200C) && (c <= 0x200D)) ||
3243 ((c >= 0x2070) && (c <= 0x218F)) ||
3244 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249 return(NULL);
3250 }
3251 len += l;
3252 NEXTL(l);
3253 c = CUR_CHAR(l);
3254 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255 (((c >= 'a') && (c <= 'z')) ||
3256 ((c >= 'A') && (c <= 'Z')) ||
3257 ((c >= '0') && (c <= '9')) || /* !start */
3258 (c == '_') || (c == ':') ||
3259 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260 ((c >= 0xC0) && (c <= 0xD6)) ||
3261 ((c >= 0xD8) && (c <= 0xF6)) ||
3262 ((c >= 0xF8) && (c <= 0x2FF)) ||
3263 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264 ((c >= 0x370) && (c <= 0x37D)) ||
3265 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266 ((c >= 0x200C) && (c <= 0x200D)) ||
3267 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268 ((c >= 0x2070) && (c <= 0x218F)) ||
3269 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273 ((c >= 0x10000) && (c <= 0xEFFFF))
3274 )) {
3275 if (len <= INT_MAX - l)
3276 len += l;
3277 NEXTL(l);
3278 c = CUR_CHAR(l);
3279 }
3280 } else {
3281 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3282 (!IS_LETTER(c) && (c != '_') &&
3283 (c != ':'))) {
3284 return(NULL);
3285 }
3286 len += l;
3287 NEXTL(l);
3288 c = CUR_CHAR(l);
3289
3290 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3291 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3292 (c == '.') || (c == '-') ||
3293 (c == '_') || (c == ':') ||
3294 (IS_COMBINING(c)) ||
3295 (IS_EXTENDER(c)))) {
3296 if (len <= INT_MAX - l)
3297 len += l;
3298 NEXTL(l);
3299 c = CUR_CHAR(l);
3300 }
3301 }
3302 if (len > maxLength) {
3303 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3304 return(NULL);
3305 }
3306 if (ctxt->input->cur - ctxt->input->base < len) {
3307 /*
3308 * There were a couple of bugs where PERefs lead to to a change
3309 * of the buffer. Check the buffer size to avoid passing an invalid
3310 * pointer to xmlDictLookup.
3311 */
3312 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3313 "unexpected change of input buffer");
3314 return (NULL);
3315 }
3316 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3317 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3318 else
3319 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3320 if (ret == NULL)
3321 xmlErrMemory(ctxt);
3322 return(ret);
3323 }
3324
3325 /**
3326 * xmlParseName:
3327 * @ctxt: an XML parser context
3328 *
3329 * DEPRECATED: Internal function, don't use.
3330 *
3331 * parse an XML name.
3332 *
3333 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3334 * CombiningChar | Extender
3335 *
3336 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3337 *
3338 * [6] Names ::= Name (#x20 Name)*
3339 *
3340 * Returns the Name parsed or NULL
3341 */
3342
3343 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3344 xmlParseName(xmlParserCtxtPtr ctxt) {
3345 const xmlChar *in;
3346 const xmlChar *ret;
3347 size_t count = 0;
3348 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349 XML_MAX_TEXT_LENGTH :
3350 XML_MAX_NAME_LENGTH;
3351
3352 GROW;
3353
3354 /*
3355 * Accelerator for simple ASCII names
3356 */
3357 in = ctxt->input->cur;
3358 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3359 ((*in >= 0x41) && (*in <= 0x5A)) ||
3360 (*in == '_') || (*in == ':')) {
3361 in++;
3362 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 ((*in >= 0x41) && (*in <= 0x5A)) ||
3364 ((*in >= 0x30) && (*in <= 0x39)) ||
3365 (*in == '_') || (*in == '-') ||
3366 (*in == ':') || (*in == '.'))
3367 in++;
3368 if ((*in > 0) && (*in < 0x80)) {
3369 count = in - ctxt->input->cur;
3370 if (count > maxLength) {
3371 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3372 return(NULL);
3373 }
3374 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3375 ctxt->input->cur = in;
3376 ctxt->input->col += count;
3377 if (ret == NULL)
3378 xmlErrMemory(ctxt);
3379 return(ret);
3380 }
3381 }
3382 /* accelerator for special cases */
3383 return(xmlParseNameComplex(ctxt));
3384 }
3385
3386 static xmlHashedString
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3387 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3388 xmlHashedString ret;
3389 int len = 0, l;
3390 int c;
3391 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3392 XML_MAX_TEXT_LENGTH :
3393 XML_MAX_NAME_LENGTH;
3394 size_t startPosition = 0;
3395
3396 ret.name = NULL;
3397 ret.hashValue = 0;
3398
3399 /*
3400 * Handler for more complex cases
3401 */
3402 startPosition = CUR_PTR - BASE_PTR;
3403 c = CUR_CHAR(l);
3404 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3405 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3406 return(ret);
3407 }
3408
3409 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3410 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3411 if (len <= INT_MAX - l)
3412 len += l;
3413 NEXTL(l);
3414 c = CUR_CHAR(l);
3415 }
3416 if (len > maxLength) {
3417 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3418 return(ret);
3419 }
3420 ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3421 if (ret.name == NULL)
3422 xmlErrMemory(ctxt);
3423 return(ret);
3424 }
3425
3426 /**
3427 * xmlParseNCName:
3428 * @ctxt: an XML parser context
3429 * @len: length of the string parsed
3430 *
3431 * parse an XML name.
3432 *
3433 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3434 * CombiningChar | Extender
3435 *
3436 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3437 *
3438 * Returns the Name parsed or NULL
3439 */
3440
3441 static xmlHashedString
xmlParseNCName(xmlParserCtxtPtr ctxt)3442 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3443 const xmlChar *in, *e;
3444 xmlHashedString ret;
3445 size_t count = 0;
3446 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3447 XML_MAX_TEXT_LENGTH :
3448 XML_MAX_NAME_LENGTH;
3449
3450 ret.name = NULL;
3451
3452 /*
3453 * Accelerator for simple ASCII names
3454 */
3455 in = ctxt->input->cur;
3456 e = ctxt->input->end;
3457 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3458 ((*in >= 0x41) && (*in <= 0x5A)) ||
3459 (*in == '_')) && (in < e)) {
3460 in++;
3461 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3462 ((*in >= 0x41) && (*in <= 0x5A)) ||
3463 ((*in >= 0x30) && (*in <= 0x39)) ||
3464 (*in == '_') || (*in == '-') ||
3465 (*in == '.')) && (in < e))
3466 in++;
3467 if (in >= e)
3468 goto complex;
3469 if ((*in > 0) && (*in < 0x80)) {
3470 count = in - ctxt->input->cur;
3471 if (count > maxLength) {
3472 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3473 return(ret);
3474 }
3475 ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3476 ctxt->input->cur = in;
3477 ctxt->input->col += count;
3478 if (ret.name == NULL) {
3479 xmlErrMemory(ctxt);
3480 }
3481 return(ret);
3482 }
3483 }
3484 complex:
3485 return(xmlParseNCNameComplex(ctxt));
3486 }
3487
3488 /**
3489 * xmlParseNameAndCompare:
3490 * @ctxt: an XML parser context
3491 *
3492 * parse an XML name and compares for match
3493 * (specialized for endtag parsing)
3494 *
3495 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3496 * and the name for mismatch
3497 */
3498
3499 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3500 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3501 register const xmlChar *cmp = other;
3502 register const xmlChar *in;
3503 const xmlChar *ret;
3504
3505 GROW;
3506
3507 in = ctxt->input->cur;
3508 while (*in != 0 && *in == *cmp) {
3509 ++in;
3510 ++cmp;
3511 }
3512 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3513 /* success */
3514 ctxt->input->col += in - ctxt->input->cur;
3515 ctxt->input->cur = in;
3516 return (const xmlChar*) 1;
3517 }
3518 /* failure (or end of input buffer), check with full function */
3519 ret = xmlParseName (ctxt);
3520 /* strings coming from the dictionary direct compare possible */
3521 if (ret == other) {
3522 return (const xmlChar*) 1;
3523 }
3524 return ret;
3525 }
3526
3527 /**
3528 * xmlParseStringName:
3529 * @ctxt: an XML parser context
3530 * @str: a pointer to the string pointer (IN/OUT)
3531 *
3532 * parse an XML name.
3533 *
3534 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3535 * CombiningChar | Extender
3536 *
3537 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3538 *
3539 * [6] Names ::= Name (#x20 Name)*
3540 *
3541 * Returns the Name parsed or NULL. The @str pointer
3542 * is updated to the current location in the string.
3543 */
3544
3545 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3546 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3547 xmlChar buf[XML_MAX_NAMELEN + 5];
3548 xmlChar *ret;
3549 const xmlChar *cur = *str;
3550 int len = 0, l;
3551 int c;
3552 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3553 XML_MAX_TEXT_LENGTH :
3554 XML_MAX_NAME_LENGTH;
3555
3556 c = CUR_SCHAR(cur, l);
3557 if (!xmlIsNameStartChar(ctxt, c)) {
3558 return(NULL);
3559 }
3560
3561 COPY_BUF(buf, len, c);
3562 cur += l;
3563 c = CUR_SCHAR(cur, l);
3564 while (xmlIsNameChar(ctxt, c)) {
3565 COPY_BUF(buf, len, c);
3566 cur += l;
3567 c = CUR_SCHAR(cur, l);
3568 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3569 /*
3570 * Okay someone managed to make a huge name, so he's ready to pay
3571 * for the processing speed.
3572 */
3573 xmlChar *buffer;
3574 int max = len * 2;
3575
3576 buffer = (xmlChar *) xmlMallocAtomic(max);
3577 if (buffer == NULL) {
3578 xmlErrMemory(ctxt);
3579 return(NULL);
3580 }
3581 memcpy(buffer, buf, len);
3582 while (xmlIsNameChar(ctxt, c)) {
3583 if (len + 10 > max) {
3584 xmlChar *tmp;
3585
3586 max *= 2;
3587 tmp = (xmlChar *) xmlRealloc(buffer, max);
3588 if (tmp == NULL) {
3589 xmlErrMemory(ctxt);
3590 xmlFree(buffer);
3591 return(NULL);
3592 }
3593 buffer = tmp;
3594 }
3595 COPY_BUF(buffer, len, c);
3596 cur += l;
3597 c = CUR_SCHAR(cur, l);
3598 if (len > maxLength) {
3599 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3600 xmlFree(buffer);
3601 return(NULL);
3602 }
3603 }
3604 buffer[len] = 0;
3605 *str = cur;
3606 return(buffer);
3607 }
3608 }
3609 if (len > maxLength) {
3610 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3611 return(NULL);
3612 }
3613 *str = cur;
3614 ret = xmlStrndup(buf, len);
3615 if (ret == NULL)
3616 xmlErrMemory(ctxt);
3617 return(ret);
3618 }
3619
3620 /**
3621 * xmlParseNmtoken:
3622 * @ctxt: an XML parser context
3623 *
3624 * DEPRECATED: Internal function, don't use.
3625 *
3626 * parse an XML Nmtoken.
3627 *
3628 * [7] Nmtoken ::= (NameChar)+
3629 *
3630 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3631 *
3632 * Returns the Nmtoken parsed or NULL
3633 */
3634
3635 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3636 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3637 xmlChar buf[XML_MAX_NAMELEN + 5];
3638 xmlChar *ret;
3639 int len = 0, l;
3640 int c;
3641 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3642 XML_MAX_TEXT_LENGTH :
3643 XML_MAX_NAME_LENGTH;
3644
3645 c = CUR_CHAR(l);
3646
3647 while (xmlIsNameChar(ctxt, c)) {
3648 COPY_BUF(buf, len, c);
3649 NEXTL(l);
3650 c = CUR_CHAR(l);
3651 if (len >= XML_MAX_NAMELEN) {
3652 /*
3653 * Okay someone managed to make a huge token, so he's ready to pay
3654 * for the processing speed.
3655 */
3656 xmlChar *buffer;
3657 int max = len * 2;
3658
3659 buffer = (xmlChar *) xmlMallocAtomic(max);
3660 if (buffer == NULL) {
3661 xmlErrMemory(ctxt);
3662 return(NULL);
3663 }
3664 memcpy(buffer, buf, len);
3665 while (xmlIsNameChar(ctxt, c)) {
3666 if (len + 10 > max) {
3667 xmlChar *tmp;
3668
3669 max *= 2;
3670 tmp = (xmlChar *) xmlRealloc(buffer, max);
3671 if (tmp == NULL) {
3672 xmlErrMemory(ctxt);
3673 xmlFree(buffer);
3674 return(NULL);
3675 }
3676 buffer = tmp;
3677 }
3678 COPY_BUF(buffer, len, c);
3679 if (len > maxLength) {
3680 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3681 xmlFree(buffer);
3682 return(NULL);
3683 }
3684 NEXTL(l);
3685 c = CUR_CHAR(l);
3686 }
3687 buffer[len] = 0;
3688 return(buffer);
3689 }
3690 }
3691 if (len == 0)
3692 return(NULL);
3693 if (len > maxLength) {
3694 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695 return(NULL);
3696 }
3697 ret = xmlStrndup(buf, len);
3698 if (ret == NULL)
3699 xmlErrMemory(ctxt);
3700 return(ret);
3701 }
3702
3703 /**
3704 * xmlExpandPEsInEntityValue:
3705 * @ctxt: parser context
3706 * @buf: string buffer
3707 * @str: entity value
3708 * @length: size of entity value
3709 * @depth: nesting depth
3710 *
3711 * Validate an entity value and expand parameter entities.
3712 */
3713 static void
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,int length,int depth)3714 xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3715 const xmlChar *str, int length, int depth) {
3716 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3717 const xmlChar *end, *chunk;
3718 int c, l;
3719
3720 if (str == NULL)
3721 return;
3722
3723 depth += 1;
3724 if (depth > maxDepth) {
3725 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3726 "Maximum entity nesting depth exceeded");
3727 return;
3728 }
3729
3730 end = str + length;
3731 chunk = str;
3732
3733 while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3734 c = *str;
3735
3736 if (c >= 0x80) {
3737 l = xmlUTF8MultibyteLen(ctxt, str,
3738 "invalid character in entity value\n");
3739 if (l == 0) {
3740 if (chunk < str)
3741 xmlSBufAddString(buf, chunk, str - chunk);
3742 xmlSBufAddReplChar(buf);
3743 str += 1;
3744 chunk = str;
3745 } else {
3746 str += l;
3747 }
3748 } else if (c == '&') {
3749 if (str[1] == '#') {
3750 if (chunk < str)
3751 xmlSBufAddString(buf, chunk, str - chunk);
3752
3753 c = xmlParseStringCharRef(ctxt, &str);
3754 if (c == 0)
3755 return;
3756
3757 xmlSBufAddChar(buf, c);
3758
3759 chunk = str;
3760 } else {
3761 xmlChar *name;
3762
3763 /*
3764 * General entity references are checked for
3765 * syntactic validity.
3766 */
3767 str++;
3768 name = xmlParseStringName(ctxt, &str);
3769
3770 if ((name == NULL) || (*str++ != ';')) {
3771 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3772 "EntityValue: '&' forbidden except for entities "
3773 "references\n");
3774 xmlFree(name);
3775 return;
3776 }
3777
3778 xmlFree(name);
3779 }
3780 } else if (c == '%') {
3781 xmlEntityPtr ent;
3782
3783 if (chunk < str)
3784 xmlSBufAddString(buf, chunk, str - chunk);
3785
3786 ent = xmlParseStringPEReference(ctxt, &str);
3787 if (ent == NULL)
3788 return;
3789
3790 if (!PARSER_EXTERNAL(ctxt)) {
3791 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3792 return;
3793 }
3794
3795 if (ent->content == NULL) {
3796 /*
3797 * Note: external parsed entities will not be loaded,
3798 * it is not required for a non-validating parser to
3799 * complete external PEReferences coming from the
3800 * internal subset
3801 */
3802 if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3803 ((ctxt->replaceEntities) ||
3804 (ctxt->validate))) {
3805 xmlLoadEntityContent(ctxt, ent);
3806 } else {
3807 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3808 "not validating will not read content for "
3809 "PE entity %s\n", ent->name, NULL);
3810 }
3811 }
3812
3813 /*
3814 * TODO: Skip if ent->content is still NULL.
3815 */
3816
3817 if (xmlParserEntityCheck(ctxt, ent->length))
3818 return;
3819
3820 if (ent->flags & XML_ENT_EXPANDING) {
3821 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3822 xmlHaltParser(ctxt);
3823 return;
3824 }
3825
3826 ent->flags |= XML_ENT_EXPANDING;
3827 xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3828 depth);
3829 ent->flags &= ~XML_ENT_EXPANDING;
3830
3831 chunk = str;
3832 } else {
3833 /* Normal ASCII char */
3834 if (!IS_BYTE_CHAR(c)) {
3835 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3836 "invalid character in entity value\n");
3837 if (chunk < str)
3838 xmlSBufAddString(buf, chunk, str - chunk);
3839 xmlSBufAddReplChar(buf);
3840 str += 1;
3841 chunk = str;
3842 } else {
3843 str += 1;
3844 }
3845 }
3846 }
3847
3848 if (chunk < str)
3849 xmlSBufAddString(buf, chunk, str - chunk);
3850
3851 return;
3852 }
3853
3854 /**
3855 * xmlParseEntityValue:
3856 * @ctxt: an XML parser context
3857 * @orig: if non-NULL store a copy of the original entity value
3858 *
3859 * DEPRECATED: Internal function, don't use.
3860 *
3861 * parse a value for ENTITY declarations
3862 *
3863 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3864 * "'" ([^%&'] | PEReference | Reference)* "'"
3865 *
3866 * Returns the EntityValue parsed with reference substituted or NULL
3867 */
3868 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3869 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3870 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3871 XML_MAX_HUGE_LENGTH :
3872 XML_MAX_TEXT_LENGTH;
3873 xmlSBuf buf;
3874 const xmlChar *start;
3875 int quote, length;
3876
3877 xmlSBufInit(&buf, maxLength);
3878
3879 GROW;
3880
3881 quote = CUR;
3882 if ((quote != '"') && (quote != '\'')) {
3883 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3884 return(NULL);
3885 }
3886 CUR_PTR++;
3887
3888 length = 0;
3889
3890 /*
3891 * Copy raw content of the entity into a buffer
3892 */
3893 while (1) {
3894 int c;
3895
3896 if (PARSER_STOPPED(ctxt))
3897 goto error;
3898
3899 if (CUR_PTR >= ctxt->input->end) {
3900 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3901 goto error;
3902 }
3903
3904 c = CUR;
3905
3906 if (c == 0) {
3907 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3908 "invalid character in entity value\n");
3909 goto error;
3910 }
3911 if (c == quote)
3912 break;
3913 NEXTL(1);
3914 length += 1;
3915
3916 /*
3917 * TODO: Check growth threshold
3918 */
3919 if (ctxt->input->end - CUR_PTR < 10)
3920 GROW;
3921 }
3922
3923 start = CUR_PTR - length;
3924
3925 if (orig != NULL) {
3926 *orig = xmlStrndup(start, length);
3927 if (*orig == NULL)
3928 xmlErrMemory(ctxt);
3929 }
3930
3931 xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3932
3933 NEXTL(1);
3934
3935 return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3936
3937 error:
3938 xmlSBufCleanup(&buf, ctxt, "entity length too long");
3939 return(NULL);
3940 }
3941
3942 /**
3943 * xmlCheckEntityInAttValue:
3944 * @ctxt: parser context
3945 * @pent: entity
3946 * @depth: nesting depth
3947 *
3948 * Check an entity reference in an attribute value for validity
3949 * without expanding it.
3950 */
3951 static void
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt,xmlEntityPtr pent,int depth)3952 xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3953 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3954 const xmlChar *str;
3955 unsigned long expandedSize = pent->length;
3956 int c, flags;
3957
3958 depth += 1;
3959 if (depth > maxDepth) {
3960 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3961 "Maximum entity nesting depth exceeded");
3962 return;
3963 }
3964
3965 if (pent->flags & XML_ENT_EXPANDING) {
3966 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3967 xmlHaltParser(ctxt);
3968 return;
3969 }
3970
3971 /*
3972 * If we're parsing a default attribute value in DTD content,
3973 * the entity might reference other entities which weren't
3974 * defined yet, so the check isn't reliable.
3975 */
3976 if (ctxt->inSubset == 0)
3977 flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3978 else
3979 flags = XML_ENT_VALIDATED;
3980
3981 str = pent->content;
3982 if (str == NULL)
3983 goto done;
3984
3985 /*
3986 * Note that entity values are already validated. We only check
3987 * for illegal less-than signs and compute the expanded size
3988 * of the entity. No special handling for multi-byte characters
3989 * is needed.
3990 */
3991 while (!PARSER_STOPPED(ctxt)) {
3992 c = *str;
3993
3994 if (c != '&') {
3995 if (c == 0)
3996 break;
3997
3998 if (c == '<')
3999 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4000 "'<' in entity '%s' is not allowed in attributes "
4001 "values\n", pent->name);
4002
4003 str += 1;
4004 } else if (str[1] == '#') {
4005 int val;
4006
4007 val = xmlParseStringCharRef(ctxt, &str);
4008 if (val == 0) {
4009 pent->content[0] = 0;
4010 break;
4011 }
4012 } else {
4013 xmlChar *name;
4014 xmlEntityPtr ent;
4015
4016 name = xmlParseStringEntityRef(ctxt, &str);
4017 if (name == NULL) {
4018 pent->content[0] = 0;
4019 break;
4020 }
4021
4022 ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4023 xmlFree(name);
4024
4025 if ((ent != NULL) &&
4026 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4027 if ((ent->flags & flags) != flags) {
4028 pent->flags |= XML_ENT_EXPANDING;
4029 xmlCheckEntityInAttValue(ctxt, ent, depth);
4030 pent->flags &= ~XML_ENT_EXPANDING;
4031 }
4032
4033 xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4034 xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4035 }
4036 }
4037 }
4038
4039 done:
4040 if (ctxt->inSubset == 0)
4041 pent->expandedSize = expandedSize;
4042
4043 pent->flags |= flags;
4044 }
4045
4046 /**
4047 * xmlExpandEntityInAttValue:
4048 * @ctxt: parser context
4049 * @buf: string buffer
4050 * @str: entity or attribute value
4051 * @pent: entity for entity value, NULL for attribute values
4052 * @normalize: whether to collapse whitespace
4053 * @inSpace: whitespace state
4054 * @depth: nesting depth
4055 * @check: whether to check for amplification
4056 *
4057 * Expand general entity references in an entity or attribute value.
4058 * Perform attribute value normalization.
4059 */
4060 static void
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,xmlEntityPtr pent,int normalize,int * inSpace,int depth,int check)4061 xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4062 const xmlChar *str, xmlEntityPtr pent, int normalize,
4063 int *inSpace, int depth, int check) {
4064 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4065 int c, chunkSize;
4066
4067 if (str == NULL)
4068 return;
4069
4070 depth += 1;
4071 if (depth > maxDepth) {
4072 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4073 "Maximum entity nesting depth exceeded");
4074 return;
4075 }
4076
4077 if (pent != NULL) {
4078 if (pent->flags & XML_ENT_EXPANDING) {
4079 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4080 xmlHaltParser(ctxt);
4081 return;
4082 }
4083
4084 if (check) {
4085 if (xmlParserEntityCheck(ctxt, pent->length))
4086 return;
4087 }
4088 }
4089
4090 chunkSize = 0;
4091
4092 /*
4093 * Note that entity values are already validated. No special
4094 * handling for multi-byte characters is needed.
4095 */
4096 while (!PARSER_STOPPED(ctxt)) {
4097 c = *str;
4098
4099 if (c != '&') {
4100 if (c == 0)
4101 break;
4102
4103 /*
4104 * If this function is called without an entity, it is used to
4105 * expand entities in an attribute content where less-than was
4106 * already unscaped and is allowed.
4107 */
4108 if ((pent != NULL) && (c == '<')) {
4109 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4110 "'<' in entity '%s' is not allowed in attributes "
4111 "values\n", pent->name);
4112 break;
4113 }
4114
4115 if (c <= 0x20) {
4116 if ((normalize) && (*inSpace)) {
4117 /* Skip char */
4118 if (chunkSize > 0) {
4119 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4120 chunkSize = 0;
4121 }
4122 } else if (c < 0x20) {
4123 if (chunkSize > 0) {
4124 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4125 chunkSize = 0;
4126 }
4127
4128 xmlSBufAddCString(buf, " ", 1);
4129 } else {
4130 chunkSize += 1;
4131 }
4132
4133 *inSpace = 1;
4134 } else {
4135 chunkSize += 1;
4136 *inSpace = 0;
4137 }
4138
4139 str += 1;
4140 } else if (str[1] == '#') {
4141 int val;
4142
4143 if (chunkSize > 0) {
4144 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4145 chunkSize = 0;
4146 }
4147
4148 val = xmlParseStringCharRef(ctxt, &str);
4149 if (val == 0) {
4150 if (pent != NULL)
4151 pent->content[0] = 0;
4152 break;
4153 }
4154
4155 if (val == ' ') {
4156 if ((!normalize) || (!*inSpace))
4157 xmlSBufAddCString(buf, " ", 1);
4158 *inSpace = 1;
4159 } else {
4160 xmlSBufAddChar(buf, val);
4161 *inSpace = 0;
4162 }
4163 } else {
4164 xmlChar *name;
4165 xmlEntityPtr ent;
4166
4167 if (chunkSize > 0) {
4168 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4169 chunkSize = 0;
4170 }
4171
4172 name = xmlParseStringEntityRef(ctxt, &str);
4173 if (name == NULL) {
4174 if (pent != NULL)
4175 pent->content[0] = 0;
4176 break;
4177 }
4178
4179 ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4180 xmlFree(name);
4181
4182 if ((ent != NULL) &&
4183 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4184 if (ent->content == NULL) {
4185 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4186 "predefined entity has no content\n");
4187 break;
4188 }
4189
4190 xmlSBufAddString(buf, ent->content, ent->length);
4191
4192 *inSpace = 0;
4193 } else if ((ent != NULL) && (ent->content != NULL)) {
4194 if (pent != NULL)
4195 pent->flags |= XML_ENT_EXPANDING;
4196 xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4197 normalize, inSpace, depth, check);
4198 if (pent != NULL)
4199 pent->flags &= ~XML_ENT_EXPANDING;
4200 }
4201 }
4202 }
4203
4204 if (chunkSize > 0)
4205 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4206
4207 return;
4208 }
4209
4210 /**
4211 * xmlExpandEntitiesInAttValue:
4212 * @ctxt: parser context
4213 * @str: entity or attribute value
4214 * @normalize: whether to collapse whitespace
4215 *
4216 * Expand general entity references in an entity or attribute value.
4217 * Perform attribute value normalization.
4218 *
4219 * Returns the expanded attribtue value.
4220 */
4221 xmlChar *
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt,const xmlChar * str,int normalize)4222 xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4223 int normalize) {
4224 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4225 XML_MAX_HUGE_LENGTH :
4226 XML_MAX_TEXT_LENGTH;
4227 xmlSBuf buf;
4228 int inSpace = 1;
4229
4230 xmlSBufInit(&buf, maxLength);
4231
4232 xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4233 ctxt->inputNr, /* check */ 0);
4234
4235 if ((normalize) && (inSpace) && (buf.size > 0))
4236 buf.size--;
4237
4238 return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4239 }
4240
4241 /**
4242 * xmlParseAttValueInternal:
4243 * @ctxt: an XML parser context
4244 * @len: attribute len result
4245 * @alloc: whether the attribute was reallocated as a new string
4246 * @normalize: if 1 then further non-CDATA normalization must be done
4247 *
4248 * parse a value for an attribute.
4249 * NOTE: if no normalization is needed, the routine will return pointers
4250 * directly from the data buffer.
4251 *
4252 * 3.3.3 Attribute-Value Normalization:
4253 * Before the value of an attribute is passed to the application or
4254 * checked for validity, the XML processor must normalize it as follows:
4255 * - a character reference is processed by appending the referenced
4256 * character to the attribute value
4257 * - an entity reference is processed by recursively processing the
4258 * replacement text of the entity
4259 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4260 * appending #x20 to the normalized value, except that only a single
4261 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4262 * parsed entity or the literal entity value of an internal parsed entity
4263 * - other characters are processed by appending them to the normalized value
4264 * If the declared value is not CDATA, then the XML processor must further
4265 * process the normalized attribute value by discarding any leading and
4266 * trailing space (#x20) characters, and by replacing sequences of space
4267 * (#x20) characters by a single space (#x20) character.
4268 * All attributes for which no declaration has been read should be treated
4269 * by a non-validating parser as if declared CDATA.
4270 *
4271 * Returns the AttValue parsed or NULL. The value has to be freed by the
4272 * caller if it was copied, this can be detected by val[*len] == 0.
4273 */
4274 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * attlen,int * alloc,int normalize,int isNamespace)4275 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4276 int normalize, int isNamespace) {
4277 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4278 XML_MAX_HUGE_LENGTH :
4279 XML_MAX_TEXT_LENGTH;
4280 xmlSBuf buf;
4281 xmlChar *ret;
4282 int c, l, quote, flags, chunkSize;
4283 int inSpace = 1;
4284 int replaceEntities;
4285
4286 /* Always expand namespace URIs */
4287 replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4288
4289 xmlSBufInit(&buf, maxLength);
4290
4291 GROW;
4292
4293 quote = CUR;
4294 if ((quote != '"') && (quote != '\'')) {
4295 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4296 return(NULL);
4297 }
4298 NEXTL(1);
4299
4300 if (ctxt->inSubset == 0)
4301 flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4302 else
4303 flags = XML_ENT_VALIDATED;
4304
4305 inSpace = 1;
4306 chunkSize = 0;
4307
4308 while (1) {
4309 if (PARSER_STOPPED(ctxt))
4310 goto error;
4311
4312 if (CUR_PTR >= ctxt->input->end) {
4313 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4314 "AttValue: ' expected\n");
4315 goto error;
4316 }
4317
4318 /*
4319 * TODO: Check growth threshold
4320 */
4321 if (ctxt->input->end - CUR_PTR < 10)
4322 GROW;
4323
4324 c = CUR;
4325
4326 if (c >= 0x80) {
4327 l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4328 "invalid character in attribute value\n");
4329 if (l == 0) {
4330 if (chunkSize > 0) {
4331 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4332 chunkSize = 0;
4333 }
4334 xmlSBufAddReplChar(&buf);
4335 NEXTL(1);
4336 } else {
4337 chunkSize += l;
4338 NEXTL(l);
4339 }
4340
4341 inSpace = 0;
4342 } else if (c != '&') {
4343 if (c > 0x20) {
4344 if (c == quote)
4345 break;
4346
4347 if (c == '<')
4348 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4349
4350 chunkSize += 1;
4351 inSpace = 0;
4352 } else if (!IS_BYTE_CHAR(c)) {
4353 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4354 "invalid character in attribute value\n");
4355 if (chunkSize > 0) {
4356 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4357 chunkSize = 0;
4358 }
4359 xmlSBufAddReplChar(&buf);
4360 inSpace = 0;
4361 } else {
4362 /* Whitespace */
4363 if ((normalize) && (inSpace)) {
4364 /* Skip char */
4365 if (chunkSize > 0) {
4366 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367 chunkSize = 0;
4368 }
4369 } else if (c < 0x20) {
4370 /* Convert to space */
4371 if (chunkSize > 0) {
4372 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4373 chunkSize = 0;
4374 }
4375
4376 xmlSBufAddCString(&buf, " ", 1);
4377 } else {
4378 chunkSize += 1;
4379 }
4380
4381 inSpace = 1;
4382
4383 if ((c == 0xD) && (NXT(1) == 0xA))
4384 CUR_PTR++;
4385 }
4386
4387 NEXTL(1);
4388 } else if (NXT(1) == '#') {
4389 int val;
4390
4391 if (chunkSize > 0) {
4392 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4393 chunkSize = 0;
4394 }
4395
4396 val = xmlParseCharRef(ctxt);
4397 if (val == 0)
4398 goto error;
4399
4400 if ((val == '&') && (!replaceEntities)) {
4401 /*
4402 * The reparsing will be done in xmlStringGetNodeList()
4403 * called by the attribute() function in SAX.c
4404 */
4405 xmlSBufAddCString(&buf, "&", 5);
4406 inSpace = 0;
4407 } else if (val == ' ') {
4408 if ((!normalize) || (!inSpace))
4409 xmlSBufAddCString(&buf, " ", 1);
4410 inSpace = 1;
4411 } else {
4412 xmlSBufAddChar(&buf, val);
4413 inSpace = 0;
4414 }
4415 } else {
4416 const xmlChar *name;
4417 xmlEntityPtr ent;
4418
4419 if (chunkSize > 0) {
4420 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4421 chunkSize = 0;
4422 }
4423
4424 name = xmlParseEntityRefInternal(ctxt);
4425 if (name == NULL) {
4426 /*
4427 * Probably a literal '&' which wasn't escaped.
4428 * TODO: Handle gracefully in recovery mode.
4429 */
4430 continue;
4431 }
4432
4433 ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4434 if (ent == NULL)
4435 continue;
4436
4437 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4438 if ((ent->content[0] == '&') && (!replaceEntities))
4439 xmlSBufAddCString(&buf, "&", 5);
4440 else
4441 xmlSBufAddString(&buf, ent->content, ent->length);
4442 inSpace = 0;
4443 } else if (replaceEntities) {
4444 xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4445 normalize, &inSpace, ctxt->inputNr,
4446 /* check */ 1);
4447 } else {
4448 if ((ent->flags & flags) != flags)
4449 xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4450
4451 if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4452 ent->content[0] = 0;
4453 goto error;
4454 }
4455
4456 /*
4457 * Just output the reference
4458 */
4459 xmlSBufAddCString(&buf, "&", 1);
4460 xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4461 xmlSBufAddCString(&buf, ";", 1);
4462
4463 inSpace = 0;
4464 }
4465 }
4466 }
4467
4468 if ((buf.mem == NULL) && (alloc != NULL)) {
4469 ret = (xmlChar *) CUR_PTR - chunkSize;
4470
4471 if (attlen != NULL)
4472 *attlen = chunkSize;
4473 if ((normalize) && (inSpace) && (chunkSize > 0))
4474 *attlen -= 1;
4475 *alloc = 0;
4476
4477 /* Report potential error */
4478 xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4479 } else {
4480 if (chunkSize > 0)
4481 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4482
4483 if ((normalize) && (inSpace) && (buf.size > 0))
4484 buf.size--;
4485
4486 ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4487
4488 if (ret != NULL) {
4489 if (attlen != NULL)
4490 *attlen = buf.size;
4491 if (alloc != NULL)
4492 *alloc = 1;
4493 }
4494 }
4495
4496 NEXTL(1);
4497
4498 return(ret);
4499
4500 error:
4501 xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4502 return(NULL);
4503 }
4504
4505 /**
4506 * xmlParseAttValue:
4507 * @ctxt: an XML parser context
4508 *
4509 * DEPRECATED: Internal function, don't use.
4510 *
4511 * parse a value for an attribute
4512 * Note: the parser won't do substitution of entities here, this
4513 * will be handled later in xmlStringGetNodeList
4514 *
4515 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4516 * "'" ([^<&'] | Reference)* "'"
4517 *
4518 * 3.3.3 Attribute-Value Normalization:
4519 * Before the value of an attribute is passed to the application or
4520 * checked for validity, the XML processor must normalize it as follows:
4521 * - a character reference is processed by appending the referenced
4522 * character to the attribute value
4523 * - an entity reference is processed by recursively processing the
4524 * replacement text of the entity
4525 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4526 * appending #x20 to the normalized value, except that only a single
4527 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4528 * parsed entity or the literal entity value of an internal parsed entity
4529 * - other characters are processed by appending them to the normalized value
4530 * If the declared value is not CDATA, then the XML processor must further
4531 * process the normalized attribute value by discarding any leading and
4532 * trailing space (#x20) characters, and by replacing sequences of space
4533 * (#x20) characters by a single space (#x20) character.
4534 * All attributes for which no declaration has been read should be treated
4535 * by a non-validating parser as if declared CDATA.
4536 *
4537 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4538 */
4539
4540
4541 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4542 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4543 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4544 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4545 }
4546
4547 /**
4548 * xmlParseSystemLiteral:
4549 * @ctxt: an XML parser context
4550 *
4551 * DEPRECATED: Internal function, don't use.
4552 *
4553 * parse an XML Literal
4554 *
4555 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4556 *
4557 * Returns the SystemLiteral parsed or NULL
4558 */
4559
4560 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4561 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4562 xmlChar *buf = NULL;
4563 int len = 0;
4564 int size = XML_PARSER_BUFFER_SIZE;
4565 int cur, l;
4566 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4567 XML_MAX_TEXT_LENGTH :
4568 XML_MAX_NAME_LENGTH;
4569 xmlChar stop;
4570
4571 if (RAW == '"') {
4572 NEXT;
4573 stop = '"';
4574 } else if (RAW == '\'') {
4575 NEXT;
4576 stop = '\'';
4577 } else {
4578 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4579 return(NULL);
4580 }
4581
4582 buf = (xmlChar *) xmlMallocAtomic(size);
4583 if (buf == NULL) {
4584 xmlErrMemory(ctxt);
4585 return(NULL);
4586 }
4587 cur = CUR_CHAR(l);
4588 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4589 if (len + 5 >= size) {
4590 xmlChar *tmp;
4591
4592 size *= 2;
4593 tmp = (xmlChar *) xmlRealloc(buf, size);
4594 if (tmp == NULL) {
4595 xmlFree(buf);
4596 xmlErrMemory(ctxt);
4597 return(NULL);
4598 }
4599 buf = tmp;
4600 }
4601 COPY_BUF(buf, len, cur);
4602 if (len > maxLength) {
4603 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4604 xmlFree(buf);
4605 return(NULL);
4606 }
4607 NEXTL(l);
4608 cur = CUR_CHAR(l);
4609 }
4610 buf[len] = 0;
4611 if (!IS_CHAR(cur)) {
4612 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4613 } else {
4614 NEXT;
4615 }
4616 return(buf);
4617 }
4618
4619 /**
4620 * xmlParsePubidLiteral:
4621 * @ctxt: an XML parser context
4622 *
4623 * DEPRECATED: Internal function, don't use.
4624 *
4625 * parse an XML public literal
4626 *
4627 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4628 *
4629 * Returns the PubidLiteral parsed or NULL.
4630 */
4631
4632 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4633 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4634 xmlChar *buf = NULL;
4635 int len = 0;
4636 int size = XML_PARSER_BUFFER_SIZE;
4637 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4638 XML_MAX_TEXT_LENGTH :
4639 XML_MAX_NAME_LENGTH;
4640 xmlChar cur;
4641 xmlChar stop;
4642
4643 if (RAW == '"') {
4644 NEXT;
4645 stop = '"';
4646 } else if (RAW == '\'') {
4647 NEXT;
4648 stop = '\'';
4649 } else {
4650 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4651 return(NULL);
4652 }
4653 buf = (xmlChar *) xmlMallocAtomic(size);
4654 if (buf == NULL) {
4655 xmlErrMemory(ctxt);
4656 return(NULL);
4657 }
4658 cur = CUR;
4659 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4660 (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4661 if (len + 1 >= size) {
4662 xmlChar *tmp;
4663
4664 size *= 2;
4665 tmp = (xmlChar *) xmlRealloc(buf, size);
4666 if (tmp == NULL) {
4667 xmlErrMemory(ctxt);
4668 xmlFree(buf);
4669 return(NULL);
4670 }
4671 buf = tmp;
4672 }
4673 buf[len++] = cur;
4674 if (len > maxLength) {
4675 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4676 xmlFree(buf);
4677 return(NULL);
4678 }
4679 NEXT;
4680 cur = CUR;
4681 }
4682 buf[len] = 0;
4683 if (cur != stop) {
4684 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4685 } else {
4686 NEXTL(1);
4687 }
4688 return(buf);
4689 }
4690
4691 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4692
4693 /*
4694 * used for the test in the inner loop of the char data testing
4695 */
4696 static const unsigned char test_char_data[256] = {
4697 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4698 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4699 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4700 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4701 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4702 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4703 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4704 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4705 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4706 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4707 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4708 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4709 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4710 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4711 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4712 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4713 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4714 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4715 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4716 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4717 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4718 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4719 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4720 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4721 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4722 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4723 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4724 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4729 };
4730
4731 /**
4732 * xmlParseCharDataInternal:
4733 * @ctxt: an XML parser context
4734 * @partial: buffer may contain partial UTF-8 sequences
4735 *
4736 * Parse character data. Always makes progress if the first char isn't
4737 * '<' or '&'.
4738 *
4739 * The right angle bracket (>) may be represented using the string ">",
4740 * and must, for compatibility, be escaped using ">" or a character
4741 * reference when it appears in the string "]]>" in content, when that
4742 * string is not marking the end of a CDATA section.
4743 *
4744 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4745 */
4746 static void
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt,int partial)4747 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4748 const xmlChar *in;
4749 int nbchar = 0;
4750 int line = ctxt->input->line;
4751 int col = ctxt->input->col;
4752 int ccol;
4753
4754 GROW;
4755 /*
4756 * Accelerated common case where input don't need to be
4757 * modified before passing it to the handler.
4758 */
4759 in = ctxt->input->cur;
4760 do {
4761 get_more_space:
4762 while (*in == 0x20) { in++; ctxt->input->col++; }
4763 if (*in == 0xA) {
4764 do {
4765 ctxt->input->line++; ctxt->input->col = 1;
4766 in++;
4767 } while (*in == 0xA);
4768 goto get_more_space;
4769 }
4770 if (*in == '<') {
4771 nbchar = in - ctxt->input->cur;
4772 if (nbchar > 0) {
4773 const xmlChar *tmp = ctxt->input->cur;
4774 ctxt->input->cur = in;
4775
4776 if ((ctxt->sax != NULL) &&
4777 (ctxt->disableSAX == 0) &&
4778 (ctxt->sax->ignorableWhitespace !=
4779 ctxt->sax->characters)) {
4780 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4781 if (ctxt->sax->ignorableWhitespace != NULL)
4782 ctxt->sax->ignorableWhitespace(ctxt->userData,
4783 tmp, nbchar);
4784 } else {
4785 if (ctxt->sax->characters != NULL)
4786 ctxt->sax->characters(ctxt->userData,
4787 tmp, nbchar);
4788 if (*ctxt->space == -1)
4789 *ctxt->space = -2;
4790 }
4791 } else if ((ctxt->sax != NULL) &&
4792 (ctxt->disableSAX == 0) &&
4793 (ctxt->sax->characters != NULL)) {
4794 ctxt->sax->characters(ctxt->userData,
4795 tmp, nbchar);
4796 }
4797 }
4798 return;
4799 }
4800
4801 get_more:
4802 ccol = ctxt->input->col;
4803 while (test_char_data[*in]) {
4804 in++;
4805 ccol++;
4806 }
4807 ctxt->input->col = ccol;
4808 if (*in == 0xA) {
4809 do {
4810 ctxt->input->line++; ctxt->input->col = 1;
4811 in++;
4812 } while (*in == 0xA);
4813 goto get_more;
4814 }
4815 if (*in == ']') {
4816 if ((in[1] == ']') && (in[2] == '>')) {
4817 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4818 ctxt->input->cur = in + 1;
4819 return;
4820 }
4821 in++;
4822 ctxt->input->col++;
4823 goto get_more;
4824 }
4825 nbchar = in - ctxt->input->cur;
4826 if (nbchar > 0) {
4827 if ((ctxt->sax != NULL) &&
4828 (ctxt->disableSAX == 0) &&
4829 (ctxt->sax->ignorableWhitespace !=
4830 ctxt->sax->characters) &&
4831 (IS_BLANK_CH(*ctxt->input->cur))) {
4832 const xmlChar *tmp = ctxt->input->cur;
4833 ctxt->input->cur = in;
4834
4835 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4836 if (ctxt->sax->ignorableWhitespace != NULL)
4837 ctxt->sax->ignorableWhitespace(ctxt->userData,
4838 tmp, nbchar);
4839 } else {
4840 if (ctxt->sax->characters != NULL)
4841 ctxt->sax->characters(ctxt->userData,
4842 tmp, nbchar);
4843 if (*ctxt->space == -1)
4844 *ctxt->space = -2;
4845 }
4846 line = ctxt->input->line;
4847 col = ctxt->input->col;
4848 } else if ((ctxt->sax != NULL) &&
4849 (ctxt->disableSAX == 0)) {
4850 if (ctxt->sax->characters != NULL)
4851 ctxt->sax->characters(ctxt->userData,
4852 ctxt->input->cur, nbchar);
4853 line = ctxt->input->line;
4854 col = ctxt->input->col;
4855 }
4856 }
4857 ctxt->input->cur = in;
4858 if (*in == 0xD) {
4859 in++;
4860 if (*in == 0xA) {
4861 ctxt->input->cur = in;
4862 in++;
4863 ctxt->input->line++; ctxt->input->col = 1;
4864 continue; /* while */
4865 }
4866 in--;
4867 }
4868 if (*in == '<') {
4869 return;
4870 }
4871 if (*in == '&') {
4872 return;
4873 }
4874 SHRINK;
4875 GROW;
4876 in = ctxt->input->cur;
4877 } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4878 (*in == 0x09) || (*in == 0x0a));
4879 ctxt->input->line = line;
4880 ctxt->input->col = col;
4881 xmlParseCharDataComplex(ctxt, partial);
4882 }
4883
4884 /**
4885 * xmlParseCharDataComplex:
4886 * @ctxt: an XML parser context
4887 * @cdata: int indicating whether we are within a CDATA section
4888 *
4889 * Always makes progress if the first char isn't '<' or '&'.
4890 *
4891 * parse a CharData section.this is the fallback function
4892 * of xmlParseCharData() when the parsing requires handling
4893 * of non-ASCII characters.
4894 */
4895 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int partial)4896 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4897 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4898 int nbchar = 0;
4899 int cur, l;
4900
4901 cur = CUR_CHAR(l);
4902 while ((cur != '<') && /* checked */
4903 (cur != '&') &&
4904 (IS_CHAR(cur))) {
4905 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4906 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4907 }
4908 COPY_BUF(buf, nbchar, cur);
4909 /* move current position before possible calling of ctxt->sax->characters */
4910 NEXTL(l);
4911 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4912 buf[nbchar] = 0;
4913
4914 /*
4915 * OK the segment is to be consumed as chars.
4916 */
4917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4918 if (areBlanks(ctxt, buf, nbchar, 0)) {
4919 if (ctxt->sax->ignorableWhitespace != NULL)
4920 ctxt->sax->ignorableWhitespace(ctxt->userData,
4921 buf, nbchar);
4922 } else {
4923 if (ctxt->sax->characters != NULL)
4924 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4925 if ((ctxt->sax->characters !=
4926 ctxt->sax->ignorableWhitespace) &&
4927 (*ctxt->space == -1))
4928 *ctxt->space = -2;
4929 }
4930 }
4931 nbchar = 0;
4932 SHRINK;
4933 }
4934 cur = CUR_CHAR(l);
4935 }
4936 if (nbchar != 0) {
4937 buf[nbchar] = 0;
4938 /*
4939 * OK the segment is to be consumed as chars.
4940 */
4941 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4942 if (areBlanks(ctxt, buf, nbchar, 0)) {
4943 if (ctxt->sax->ignorableWhitespace != NULL)
4944 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4945 } else {
4946 if (ctxt->sax->characters != NULL)
4947 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4948 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4949 (*ctxt->space == -1))
4950 *ctxt->space = -2;
4951 }
4952 }
4953 }
4954 /*
4955 * cur == 0 can mean
4956 *
4957 * - End of buffer.
4958 * - An actual 0 character.
4959 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4960 */
4961 if (ctxt->input->cur < ctxt->input->end) {
4962 if ((cur == 0) && (CUR != 0)) {
4963 if (partial == 0) {
4964 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4965 "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4966 NEXTL(1);
4967 }
4968 } else if ((cur != '<') && (cur != '&')) {
4969 /* Generate the error and skip the offending character */
4970 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4971 "PCDATA invalid Char value %d\n", cur);
4972 NEXTL(l);
4973 }
4974 }
4975 }
4976
4977 /**
4978 * xmlParseCharData:
4979 * @ctxt: an XML parser context
4980 * @cdata: unused
4981 *
4982 * DEPRECATED: Internal function, don't use.
4983 */
4984 void
xmlParseCharData(xmlParserCtxtPtr ctxt,ATTRIBUTE_UNUSED int cdata)4985 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4986 xmlParseCharDataInternal(ctxt, 0);
4987 }
4988
4989 /**
4990 * xmlParseExternalID:
4991 * @ctxt: an XML parser context
4992 * @publicID: a xmlChar** receiving PubidLiteral
4993 * @strict: indicate whether we should restrict parsing to only
4994 * production [75], see NOTE below
4995 *
4996 * DEPRECATED: Internal function, don't use.
4997 *
4998 * Parse an External ID or a Public ID
4999 *
5000 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5001 * 'PUBLIC' S PubidLiteral S SystemLiteral
5002 *
5003 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5004 * | 'PUBLIC' S PubidLiteral S SystemLiteral
5005 *
5006 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5007 *
5008 * Returns the function returns SystemLiteral and in the second
5009 * case publicID receives PubidLiteral, is strict is off
5010 * it is possible to return NULL and have publicID set.
5011 */
5012
5013 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)5014 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5015 xmlChar *URI = NULL;
5016
5017 *publicID = NULL;
5018 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5019 SKIP(6);
5020 if (SKIP_BLANKS == 0) {
5021 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5022 "Space required after 'SYSTEM'\n");
5023 }
5024 URI = xmlParseSystemLiteral(ctxt);
5025 if (URI == NULL) {
5026 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5027 }
5028 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5029 SKIP(6);
5030 if (SKIP_BLANKS == 0) {
5031 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032 "Space required after 'PUBLIC'\n");
5033 }
5034 *publicID = xmlParsePubidLiteral(ctxt);
5035 if (*publicID == NULL) {
5036 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5037 }
5038 if (strict) {
5039 /*
5040 * We don't handle [83] so "S SystemLiteral" is required.
5041 */
5042 if (SKIP_BLANKS == 0) {
5043 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5044 "Space required after the Public Identifier\n");
5045 }
5046 } else {
5047 /*
5048 * We handle [83] so we return immediately, if
5049 * "S SystemLiteral" is not detected. We skip blanks if no
5050 * system literal was found, but this is harmless since we must
5051 * be at the end of a NotationDecl.
5052 */
5053 if (SKIP_BLANKS == 0) return(NULL);
5054 if ((CUR != '\'') && (CUR != '"')) return(NULL);
5055 }
5056 URI = xmlParseSystemLiteral(ctxt);
5057 if (URI == NULL) {
5058 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5059 }
5060 }
5061 return(URI);
5062 }
5063
5064 /**
5065 * xmlParseCommentComplex:
5066 * @ctxt: an XML parser context
5067 * @buf: the already parsed part of the buffer
5068 * @len: number of bytes in the buffer
5069 * @size: allocated size of the buffer
5070 *
5071 * Skip an XML (SGML) comment <!-- .... -->
5072 * The spec says that "For compatibility, the string "--" (double-hyphen)
5073 * must not occur within comments. "
5074 * This is the slow routine in case the accelerator for ascii didn't work
5075 *
5076 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5077 */
5078 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)5079 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5080 size_t len, size_t size) {
5081 int q, ql;
5082 int r, rl;
5083 int cur, l;
5084 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5085 XML_MAX_HUGE_LENGTH :
5086 XML_MAX_TEXT_LENGTH;
5087
5088 if (buf == NULL) {
5089 len = 0;
5090 size = XML_PARSER_BUFFER_SIZE;
5091 buf = (xmlChar *) xmlMallocAtomic(size);
5092 if (buf == NULL) {
5093 xmlErrMemory(ctxt);
5094 return;
5095 }
5096 }
5097 q = CUR_CHAR(ql);
5098 if (q == 0)
5099 goto not_terminated;
5100 if (!IS_CHAR(q)) {
5101 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5102 "xmlParseComment: invalid xmlChar value %d\n",
5103 q);
5104 xmlFree (buf);
5105 return;
5106 }
5107 NEXTL(ql);
5108 r = CUR_CHAR(rl);
5109 if (r == 0)
5110 goto not_terminated;
5111 if (!IS_CHAR(r)) {
5112 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5113 "xmlParseComment: invalid xmlChar value %d\n",
5114 r);
5115 xmlFree (buf);
5116 return;
5117 }
5118 NEXTL(rl);
5119 cur = CUR_CHAR(l);
5120 if (cur == 0)
5121 goto not_terminated;
5122 while (IS_CHAR(cur) && /* checked */
5123 ((cur != '>') ||
5124 (r != '-') || (q != '-'))) {
5125 if ((r == '-') && (q == '-')) {
5126 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5127 }
5128 if (len + 5 >= size) {
5129 xmlChar *new_buf;
5130 size_t new_size;
5131
5132 new_size = size * 2;
5133 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5134 if (new_buf == NULL) {
5135 xmlFree (buf);
5136 xmlErrMemory(ctxt);
5137 return;
5138 }
5139 buf = new_buf;
5140 size = new_size;
5141 }
5142 COPY_BUF(buf, len, q);
5143 if (len > maxLength) {
5144 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5145 "Comment too big found", NULL);
5146 xmlFree (buf);
5147 return;
5148 }
5149
5150 q = r;
5151 ql = rl;
5152 r = cur;
5153 rl = l;
5154
5155 NEXTL(l);
5156 cur = CUR_CHAR(l);
5157
5158 }
5159 buf[len] = 0;
5160 if (cur == 0) {
5161 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5162 "Comment not terminated \n<!--%.50s\n", buf);
5163 } else if (!IS_CHAR(cur)) {
5164 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5165 "xmlParseComment: invalid xmlChar value %d\n",
5166 cur);
5167 } else {
5168 NEXT;
5169 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5170 (!ctxt->disableSAX))
5171 ctxt->sax->comment(ctxt->userData, buf);
5172 }
5173 xmlFree(buf);
5174 return;
5175 not_terminated:
5176 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5177 "Comment not terminated\n", NULL);
5178 xmlFree(buf);
5179 return;
5180 }
5181
5182 /**
5183 * xmlParseComment:
5184 * @ctxt: an XML parser context
5185 *
5186 * DEPRECATED: Internal function, don't use.
5187 *
5188 * Parse an XML (SGML) comment. Always consumes '<!'.
5189 *
5190 * The spec says that "For compatibility, the string "--" (double-hyphen)
5191 * must not occur within comments. "
5192 *
5193 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5194 */
5195 void
xmlParseComment(xmlParserCtxtPtr ctxt)5196 xmlParseComment(xmlParserCtxtPtr ctxt) {
5197 xmlChar *buf = NULL;
5198 size_t size = XML_PARSER_BUFFER_SIZE;
5199 size_t len = 0;
5200 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5201 XML_MAX_HUGE_LENGTH :
5202 XML_MAX_TEXT_LENGTH;
5203 const xmlChar *in;
5204 size_t nbchar = 0;
5205 int ccol;
5206
5207 /*
5208 * Check that there is a comment right here.
5209 */
5210 if ((RAW != '<') || (NXT(1) != '!'))
5211 return;
5212 SKIP(2);
5213 if ((RAW != '-') || (NXT(1) != '-'))
5214 return;
5215 SKIP(2);
5216 GROW;
5217
5218 /*
5219 * Accelerated common case where input don't need to be
5220 * modified before passing it to the handler.
5221 */
5222 in = ctxt->input->cur;
5223 do {
5224 if (*in == 0xA) {
5225 do {
5226 ctxt->input->line++; ctxt->input->col = 1;
5227 in++;
5228 } while (*in == 0xA);
5229 }
5230 get_more:
5231 ccol = ctxt->input->col;
5232 while (((*in > '-') && (*in <= 0x7F)) ||
5233 ((*in >= 0x20) && (*in < '-')) ||
5234 (*in == 0x09)) {
5235 in++;
5236 ccol++;
5237 }
5238 ctxt->input->col = ccol;
5239 if (*in == 0xA) {
5240 do {
5241 ctxt->input->line++; ctxt->input->col = 1;
5242 in++;
5243 } while (*in == 0xA);
5244 goto get_more;
5245 }
5246 nbchar = in - ctxt->input->cur;
5247 /*
5248 * save current set of data
5249 */
5250 if (nbchar > 0) {
5251 if (buf == NULL) {
5252 if ((*in == '-') && (in[1] == '-'))
5253 size = nbchar + 1;
5254 else
5255 size = XML_PARSER_BUFFER_SIZE + nbchar;
5256 buf = (xmlChar *) xmlMallocAtomic(size);
5257 if (buf == NULL) {
5258 xmlErrMemory(ctxt);
5259 return;
5260 }
5261 len = 0;
5262 } else if (len + nbchar + 1 >= size) {
5263 xmlChar *new_buf;
5264 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5265 new_buf = (xmlChar *) xmlRealloc(buf, size);
5266 if (new_buf == NULL) {
5267 xmlFree (buf);
5268 xmlErrMemory(ctxt);
5269 return;
5270 }
5271 buf = new_buf;
5272 }
5273 memcpy(&buf[len], ctxt->input->cur, nbchar);
5274 len += nbchar;
5275 buf[len] = 0;
5276 }
5277 if (len > maxLength) {
5278 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5279 "Comment too big found", NULL);
5280 xmlFree (buf);
5281 return;
5282 }
5283 ctxt->input->cur = in;
5284 if (*in == 0xA) {
5285 in++;
5286 ctxt->input->line++; ctxt->input->col = 1;
5287 }
5288 if (*in == 0xD) {
5289 in++;
5290 if (*in == 0xA) {
5291 ctxt->input->cur = in;
5292 in++;
5293 ctxt->input->line++; ctxt->input->col = 1;
5294 goto get_more;
5295 }
5296 in--;
5297 }
5298 SHRINK;
5299 GROW;
5300 in = ctxt->input->cur;
5301 if (*in == '-') {
5302 if (in[1] == '-') {
5303 if (in[2] == '>') {
5304 SKIP(3);
5305 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5306 (!ctxt->disableSAX)) {
5307 if (buf != NULL)
5308 ctxt->sax->comment(ctxt->userData, buf);
5309 else
5310 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5311 }
5312 if (buf != NULL)
5313 xmlFree(buf);
5314 return;
5315 }
5316 if (buf != NULL) {
5317 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5318 "Double hyphen within comment: "
5319 "<!--%.50s\n",
5320 buf);
5321 } else
5322 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5323 "Double hyphen within comment\n", NULL);
5324 in++;
5325 ctxt->input->col++;
5326 }
5327 in++;
5328 ctxt->input->col++;
5329 goto get_more;
5330 }
5331 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5332 xmlParseCommentComplex(ctxt, buf, len, size);
5333 return;
5334 }
5335
5336
5337 /**
5338 * xmlParsePITarget:
5339 * @ctxt: an XML parser context
5340 *
5341 * DEPRECATED: Internal function, don't use.
5342 *
5343 * parse the name of a PI
5344 *
5345 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5346 *
5347 * Returns the PITarget name or NULL
5348 */
5349
5350 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5351 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5352 const xmlChar *name;
5353
5354 name = xmlParseName(ctxt);
5355 if ((name != NULL) &&
5356 ((name[0] == 'x') || (name[0] == 'X')) &&
5357 ((name[1] == 'm') || (name[1] == 'M')) &&
5358 ((name[2] == 'l') || (name[2] == 'L'))) {
5359 int i;
5360 if ((name[0] == 'x') && (name[1] == 'm') &&
5361 (name[2] == 'l') && (name[3] == 0)) {
5362 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5363 "XML declaration allowed only at the start of the document\n");
5364 return(name);
5365 } else if (name[3] == 0) {
5366 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5367 return(name);
5368 }
5369 for (i = 0;;i++) {
5370 if (xmlW3CPIs[i] == NULL) break;
5371 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5372 return(name);
5373 }
5374 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5375 "xmlParsePITarget: invalid name prefix 'xml'\n",
5376 NULL, NULL);
5377 }
5378 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5379 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5381 }
5382 return(name);
5383 }
5384
5385 #ifdef LIBXML_CATALOG_ENABLED
5386 /**
5387 * xmlParseCatalogPI:
5388 * @ctxt: an XML parser context
5389 * @catalog: the PI value string
5390 *
5391 * parse an XML Catalog Processing Instruction.
5392 *
5393 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5394 *
5395 * Occurs only if allowed by the user and if happening in the Misc
5396 * part of the document before any doctype information
5397 * This will add the given catalog to the parsing context in order
5398 * to be used if there is a resolution need further down in the document
5399 */
5400
5401 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5402 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5403 xmlChar *URL = NULL;
5404 const xmlChar *tmp, *base;
5405 xmlChar marker;
5406
5407 tmp = catalog;
5408 while (IS_BLANK_CH(*tmp)) tmp++;
5409 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5410 goto error;
5411 tmp += 7;
5412 while (IS_BLANK_CH(*tmp)) tmp++;
5413 if (*tmp != '=') {
5414 return;
5415 }
5416 tmp++;
5417 while (IS_BLANK_CH(*tmp)) tmp++;
5418 marker = *tmp;
5419 if ((marker != '\'') && (marker != '"'))
5420 goto error;
5421 tmp++;
5422 base = tmp;
5423 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5424 if (*tmp == 0)
5425 goto error;
5426 URL = xmlStrndup(base, tmp - base);
5427 tmp++;
5428 while (IS_BLANK_CH(*tmp)) tmp++;
5429 if (*tmp != 0)
5430 goto error;
5431
5432 if (URL != NULL) {
5433 /*
5434 * Unfortunately, the catalog API doesn't report OOM errors.
5435 * xmlGetLastError isn't very helpful since we don't know
5436 * where the last error came from. We'd have to reset it
5437 * before this call and restore it afterwards.
5438 */
5439 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5440 xmlFree(URL);
5441 }
5442 return;
5443
5444 error:
5445 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5446 "Catalog PI syntax error: %s\n",
5447 catalog, NULL);
5448 if (URL != NULL)
5449 xmlFree(URL);
5450 }
5451 #endif
5452
5453 /**
5454 * xmlParsePI:
5455 * @ctxt: an XML parser context
5456 *
5457 * DEPRECATED: Internal function, don't use.
5458 *
5459 * parse an XML Processing Instruction.
5460 *
5461 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5462 *
5463 * The processing is transferred to SAX once parsed.
5464 */
5465
5466 void
xmlParsePI(xmlParserCtxtPtr ctxt)5467 xmlParsePI(xmlParserCtxtPtr ctxt) {
5468 xmlChar *buf = NULL;
5469 size_t len = 0;
5470 size_t size = XML_PARSER_BUFFER_SIZE;
5471 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5472 XML_MAX_HUGE_LENGTH :
5473 XML_MAX_TEXT_LENGTH;
5474 int cur, l;
5475 const xmlChar *target;
5476
5477 if ((RAW == '<') && (NXT(1) == '?')) {
5478 /*
5479 * this is a Processing Instruction.
5480 */
5481 SKIP(2);
5482
5483 /*
5484 * Parse the target name and check for special support like
5485 * namespace.
5486 */
5487 target = xmlParsePITarget(ctxt);
5488 if (target != NULL) {
5489 if ((RAW == '?') && (NXT(1) == '>')) {
5490 SKIP(2);
5491
5492 /*
5493 * SAX: PI detected.
5494 */
5495 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5496 (ctxt->sax->processingInstruction != NULL))
5497 ctxt->sax->processingInstruction(ctxt->userData,
5498 target, NULL);
5499 return;
5500 }
5501 buf = (xmlChar *) xmlMallocAtomic(size);
5502 if (buf == NULL) {
5503 xmlErrMemory(ctxt);
5504 return;
5505 }
5506 if (SKIP_BLANKS == 0) {
5507 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5508 "ParsePI: PI %s space expected\n", target);
5509 }
5510 cur = CUR_CHAR(l);
5511 while (IS_CHAR(cur) && /* checked */
5512 ((cur != '?') || (NXT(1) != '>'))) {
5513 if (len + 5 >= size) {
5514 xmlChar *tmp;
5515 size_t new_size = size * 2;
5516 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5517 if (tmp == NULL) {
5518 xmlErrMemory(ctxt);
5519 xmlFree(buf);
5520 return;
5521 }
5522 buf = tmp;
5523 size = new_size;
5524 }
5525 COPY_BUF(buf, len, cur);
5526 if (len > maxLength) {
5527 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5528 "PI %s too big found", target);
5529 xmlFree(buf);
5530 return;
5531 }
5532 NEXTL(l);
5533 cur = CUR_CHAR(l);
5534 }
5535 buf[len] = 0;
5536 if (cur != '?') {
5537 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5538 "ParsePI: PI %s never end ...\n", target);
5539 } else {
5540 SKIP(2);
5541
5542 #ifdef LIBXML_CATALOG_ENABLED
5543 if ((ctxt->inSubset == 0) &&
5544 (xmlStrEqual(target, XML_CATALOG_PI))) {
5545 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5546 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5547 (allow == XML_CATA_ALLOW_ALL))
5548 xmlParseCatalogPI(ctxt, buf);
5549 }
5550 #endif
5551
5552
5553 /*
5554 * SAX: PI detected.
5555 */
5556 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5557 (ctxt->sax->processingInstruction != NULL))
5558 ctxt->sax->processingInstruction(ctxt->userData,
5559 target, buf);
5560 }
5561 xmlFree(buf);
5562 } else {
5563 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5564 }
5565 }
5566 }
5567
5568 /**
5569 * xmlParseNotationDecl:
5570 * @ctxt: an XML parser context
5571 *
5572 * DEPRECATED: Internal function, don't use.
5573 *
5574 * Parse a notation declaration. Always consumes '<!'.
5575 *
5576 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5577 *
5578 * Hence there is actually 3 choices:
5579 * 'PUBLIC' S PubidLiteral
5580 * 'PUBLIC' S PubidLiteral S SystemLiteral
5581 * and 'SYSTEM' S SystemLiteral
5582 *
5583 * See the NOTE on xmlParseExternalID().
5584 */
5585
5586 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5587 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5588 const xmlChar *name;
5589 xmlChar *Pubid;
5590 xmlChar *Systemid;
5591
5592 if ((CUR != '<') || (NXT(1) != '!'))
5593 return;
5594 SKIP(2);
5595
5596 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5597 int inputid = ctxt->input->id;
5598 SKIP(8);
5599 if (SKIP_BLANKS_PE == 0) {
5600 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5601 "Space required after '<!NOTATION'\n");
5602 return;
5603 }
5604
5605 name = xmlParseName(ctxt);
5606 if (name == NULL) {
5607 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5608 return;
5609 }
5610 if (xmlStrchr(name, ':') != NULL) {
5611 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5612 "colons are forbidden from notation names '%s'\n",
5613 name, NULL, NULL);
5614 }
5615 if (SKIP_BLANKS_PE == 0) {
5616 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5617 "Space required after the NOTATION name'\n");
5618 return;
5619 }
5620
5621 /*
5622 * Parse the IDs.
5623 */
5624 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5625 SKIP_BLANKS_PE;
5626
5627 if (RAW == '>') {
5628 if (inputid != ctxt->input->id) {
5629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5630 "Notation declaration doesn't start and stop"
5631 " in the same entity\n");
5632 }
5633 NEXT;
5634 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5635 (ctxt->sax->notationDecl != NULL))
5636 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5637 } else {
5638 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5639 }
5640 if (Systemid != NULL) xmlFree(Systemid);
5641 if (Pubid != NULL) xmlFree(Pubid);
5642 }
5643 }
5644
5645 /**
5646 * xmlParseEntityDecl:
5647 * @ctxt: an XML parser context
5648 *
5649 * DEPRECATED: Internal function, don't use.
5650 *
5651 * Parse an entity declaration. Always consumes '<!'.
5652 *
5653 * [70] EntityDecl ::= GEDecl | PEDecl
5654 *
5655 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5656 *
5657 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5658 *
5659 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5660 *
5661 * [74] PEDef ::= EntityValue | ExternalID
5662 *
5663 * [76] NDataDecl ::= S 'NDATA' S Name
5664 *
5665 * [ VC: Notation Declared ]
5666 * The Name must match the declared name of a notation.
5667 */
5668
5669 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5670 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5671 const xmlChar *name = NULL;
5672 xmlChar *value = NULL;
5673 xmlChar *URI = NULL, *literal = NULL;
5674 const xmlChar *ndata = NULL;
5675 int isParameter = 0;
5676 xmlChar *orig = NULL;
5677
5678 if ((CUR != '<') || (NXT(1) != '!'))
5679 return;
5680 SKIP(2);
5681
5682 /* GROW; done in the caller */
5683 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5684 int inputid = ctxt->input->id;
5685 SKIP(6);
5686 if (SKIP_BLANKS_PE == 0) {
5687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5688 "Space required after '<!ENTITY'\n");
5689 }
5690
5691 if (RAW == '%') {
5692 NEXT;
5693 if (SKIP_BLANKS_PE == 0) {
5694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5695 "Space required after '%%'\n");
5696 }
5697 isParameter = 1;
5698 }
5699
5700 name = xmlParseName(ctxt);
5701 if (name == NULL) {
5702 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5703 "xmlParseEntityDecl: no name\n");
5704 return;
5705 }
5706 if (xmlStrchr(name, ':') != NULL) {
5707 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5708 "colons are forbidden from entities names '%s'\n",
5709 name, NULL, NULL);
5710 }
5711 if (SKIP_BLANKS_PE == 0) {
5712 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5713 "Space required after the entity name\n");
5714 }
5715
5716 /*
5717 * handle the various case of definitions...
5718 */
5719 if (isParameter) {
5720 if ((RAW == '"') || (RAW == '\'')) {
5721 value = xmlParseEntityValue(ctxt, &orig);
5722 if (value) {
5723 if ((ctxt->sax != NULL) &&
5724 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5725 ctxt->sax->entityDecl(ctxt->userData, name,
5726 XML_INTERNAL_PARAMETER_ENTITY,
5727 NULL, NULL, value);
5728 }
5729 } else {
5730 URI = xmlParseExternalID(ctxt, &literal, 1);
5731 if ((URI == NULL) && (literal == NULL)) {
5732 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5733 }
5734 if (URI) {
5735 if (xmlStrchr(URI, '#')) {
5736 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5737 } else {
5738 if ((ctxt->sax != NULL) &&
5739 (!ctxt->disableSAX) &&
5740 (ctxt->sax->entityDecl != NULL))
5741 ctxt->sax->entityDecl(ctxt->userData, name,
5742 XML_EXTERNAL_PARAMETER_ENTITY,
5743 literal, URI, NULL);
5744 }
5745 }
5746 }
5747 } else {
5748 if ((RAW == '"') || (RAW == '\'')) {
5749 value = xmlParseEntityValue(ctxt, &orig);
5750 if ((ctxt->sax != NULL) &&
5751 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5752 ctxt->sax->entityDecl(ctxt->userData, name,
5753 XML_INTERNAL_GENERAL_ENTITY,
5754 NULL, NULL, value);
5755 /*
5756 * For expat compatibility in SAX mode.
5757 */
5758 if ((ctxt->myDoc == NULL) ||
5759 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5760 if (ctxt->myDoc == NULL) {
5761 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5762 if (ctxt->myDoc == NULL) {
5763 xmlErrMemory(ctxt);
5764 goto done;
5765 }
5766 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5767 }
5768 if (ctxt->myDoc->intSubset == NULL) {
5769 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5770 BAD_CAST "fake", NULL, NULL);
5771 if (ctxt->myDoc->intSubset == NULL) {
5772 xmlErrMemory(ctxt);
5773 goto done;
5774 }
5775 }
5776
5777 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5778 NULL, NULL, value);
5779 }
5780 } else {
5781 URI = xmlParseExternalID(ctxt, &literal, 1);
5782 if ((URI == NULL) && (literal == NULL)) {
5783 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5784 }
5785 if (URI) {
5786 if (xmlStrchr(URI, '#')) {
5787 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5788 }
5789 }
5790 if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5791 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5792 "Space required before 'NDATA'\n");
5793 }
5794 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5795 SKIP(5);
5796 if (SKIP_BLANKS_PE == 0) {
5797 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5798 "Space required after 'NDATA'\n");
5799 }
5800 ndata = xmlParseName(ctxt);
5801 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5802 (ctxt->sax->unparsedEntityDecl != NULL))
5803 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5804 literal, URI, ndata);
5805 } else {
5806 if ((ctxt->sax != NULL) &&
5807 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5808 ctxt->sax->entityDecl(ctxt->userData, name,
5809 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5810 literal, URI, NULL);
5811 /*
5812 * For expat compatibility in SAX mode.
5813 * assuming the entity replacement was asked for
5814 */
5815 if ((ctxt->replaceEntities != 0) &&
5816 ((ctxt->myDoc == NULL) ||
5817 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5818 if (ctxt->myDoc == NULL) {
5819 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5820 if (ctxt->myDoc == NULL) {
5821 xmlErrMemory(ctxt);
5822 goto done;
5823 }
5824 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5825 }
5826
5827 if (ctxt->myDoc->intSubset == NULL) {
5828 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5829 BAD_CAST "fake", NULL, NULL);
5830 if (ctxt->myDoc->intSubset == NULL) {
5831 xmlErrMemory(ctxt);
5832 goto done;
5833 }
5834 }
5835 xmlSAX2EntityDecl(ctxt, name,
5836 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5837 literal, URI, NULL);
5838 }
5839 }
5840 }
5841 }
5842 SKIP_BLANKS_PE;
5843 if (RAW != '>') {
5844 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5845 "xmlParseEntityDecl: entity %s not terminated\n", name);
5846 xmlHaltParser(ctxt);
5847 } else {
5848 if (inputid != ctxt->input->id) {
5849 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5850 "Entity declaration doesn't start and stop in"
5851 " the same entity\n");
5852 }
5853 NEXT;
5854 }
5855 if (orig != NULL) {
5856 /*
5857 * Ugly mechanism to save the raw entity value.
5858 */
5859 xmlEntityPtr cur = NULL;
5860
5861 if (isParameter) {
5862 if ((ctxt->sax != NULL) &&
5863 (ctxt->sax->getParameterEntity != NULL))
5864 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5865 } else {
5866 if ((ctxt->sax != NULL) &&
5867 (ctxt->sax->getEntity != NULL))
5868 cur = ctxt->sax->getEntity(ctxt->userData, name);
5869 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5870 cur = xmlSAX2GetEntity(ctxt, name);
5871 }
5872 }
5873 if ((cur != NULL) && (cur->orig == NULL)) {
5874 cur->orig = orig;
5875 orig = NULL;
5876 }
5877 }
5878
5879 done:
5880 if (value != NULL) xmlFree(value);
5881 if (URI != NULL) xmlFree(URI);
5882 if (literal != NULL) xmlFree(literal);
5883 if (orig != NULL) xmlFree(orig);
5884 }
5885 }
5886
5887 /**
5888 * xmlParseDefaultDecl:
5889 * @ctxt: an XML parser context
5890 * @value: Receive a possible fixed default value for the attribute
5891 *
5892 * DEPRECATED: Internal function, don't use.
5893 *
5894 * Parse an attribute default declaration
5895 *
5896 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5897 *
5898 * [ VC: Required Attribute ]
5899 * if the default declaration is the keyword #REQUIRED, then the
5900 * attribute must be specified for all elements of the type in the
5901 * attribute-list declaration.
5902 *
5903 * [ VC: Attribute Default Legal ]
5904 * The declared default value must meet the lexical constraints of
5905 * the declared attribute type c.f. xmlValidateAttributeDecl()
5906 *
5907 * [ VC: Fixed Attribute Default ]
5908 * if an attribute has a default value declared with the #FIXED
5909 * keyword, instances of that attribute must match the default value.
5910 *
5911 * [ WFC: No < in Attribute Values ]
5912 * handled in xmlParseAttValue()
5913 *
5914 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5915 * or XML_ATTRIBUTE_FIXED.
5916 */
5917
5918 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5919 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5920 int val;
5921 xmlChar *ret;
5922
5923 *value = NULL;
5924 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5925 SKIP(9);
5926 return(XML_ATTRIBUTE_REQUIRED);
5927 }
5928 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5929 SKIP(8);
5930 return(XML_ATTRIBUTE_IMPLIED);
5931 }
5932 val = XML_ATTRIBUTE_NONE;
5933 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5934 SKIP(6);
5935 val = XML_ATTRIBUTE_FIXED;
5936 if (SKIP_BLANKS_PE == 0) {
5937 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5938 "Space required after '#FIXED'\n");
5939 }
5940 }
5941 ret = xmlParseAttValue(ctxt);
5942 if (ret == NULL) {
5943 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5944 "Attribute default value declaration error\n");
5945 } else
5946 *value = ret;
5947 return(val);
5948 }
5949
5950 /**
5951 * xmlParseNotationType:
5952 * @ctxt: an XML parser context
5953 *
5954 * DEPRECATED: Internal function, don't use.
5955 *
5956 * parse an Notation attribute type.
5957 *
5958 * Note: the leading 'NOTATION' S part has already being parsed...
5959 *
5960 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5961 *
5962 * [ VC: Notation Attributes ]
5963 * Values of this type must match one of the notation names included
5964 * in the declaration; all notation names in the declaration must be declared.
5965 *
5966 * Returns: the notation attribute tree built while parsing
5967 */
5968
5969 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5970 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5971 const xmlChar *name;
5972 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5973
5974 if (RAW != '(') {
5975 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5976 return(NULL);
5977 }
5978 do {
5979 NEXT;
5980 SKIP_BLANKS_PE;
5981 name = xmlParseName(ctxt);
5982 if (name == NULL) {
5983 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5984 "Name expected in NOTATION declaration\n");
5985 xmlFreeEnumeration(ret);
5986 return(NULL);
5987 }
5988 tmp = ret;
5989 while (tmp != NULL) {
5990 if (xmlStrEqual(name, tmp->name)) {
5991 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5992 "standalone: attribute notation value token %s duplicated\n",
5993 name, NULL);
5994 if (!xmlDictOwns(ctxt->dict, name))
5995 xmlFree((xmlChar *) name);
5996 break;
5997 }
5998 tmp = tmp->next;
5999 }
6000 if (tmp == NULL) {
6001 cur = xmlCreateEnumeration(name);
6002 if (cur == NULL) {
6003 xmlErrMemory(ctxt);
6004 xmlFreeEnumeration(ret);
6005 return(NULL);
6006 }
6007 if (last == NULL) ret = last = cur;
6008 else {
6009 last->next = cur;
6010 last = cur;
6011 }
6012 }
6013 SKIP_BLANKS_PE;
6014 } while (RAW == '|');
6015 if (RAW != ')') {
6016 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6017 xmlFreeEnumeration(ret);
6018 return(NULL);
6019 }
6020 NEXT;
6021 return(ret);
6022 }
6023
6024 /**
6025 * xmlParseEnumerationType:
6026 * @ctxt: an XML parser context
6027 *
6028 * DEPRECATED: Internal function, don't use.
6029 *
6030 * parse an Enumeration attribute type.
6031 *
6032 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6033 *
6034 * [ VC: Enumeration ]
6035 * Values of this type must match one of the Nmtoken tokens in
6036 * the declaration
6037 *
6038 * Returns: the enumeration attribute tree built while parsing
6039 */
6040
6041 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)6042 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6043 xmlChar *name;
6044 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6045
6046 if (RAW != '(') {
6047 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6048 return(NULL);
6049 }
6050 do {
6051 NEXT;
6052 SKIP_BLANKS_PE;
6053 name = xmlParseNmtoken(ctxt);
6054 if (name == NULL) {
6055 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6056 return(ret);
6057 }
6058 tmp = ret;
6059 while (tmp != NULL) {
6060 if (xmlStrEqual(name, tmp->name)) {
6061 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6062 "standalone: attribute enumeration value token %s duplicated\n",
6063 name, NULL);
6064 if (!xmlDictOwns(ctxt->dict, name))
6065 xmlFree(name);
6066 break;
6067 }
6068 tmp = tmp->next;
6069 }
6070 if (tmp == NULL) {
6071 cur = xmlCreateEnumeration(name);
6072 if (!xmlDictOwns(ctxt->dict, name))
6073 xmlFree(name);
6074 if (cur == NULL) {
6075 xmlErrMemory(ctxt);
6076 xmlFreeEnumeration(ret);
6077 return(NULL);
6078 }
6079 if (last == NULL) ret = last = cur;
6080 else {
6081 last->next = cur;
6082 last = cur;
6083 }
6084 }
6085 SKIP_BLANKS_PE;
6086 } while (RAW == '|');
6087 if (RAW != ')') {
6088 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6089 return(ret);
6090 }
6091 NEXT;
6092 return(ret);
6093 }
6094
6095 /**
6096 * xmlParseEnumeratedType:
6097 * @ctxt: an XML parser context
6098 * @tree: the enumeration tree built while parsing
6099 *
6100 * DEPRECATED: Internal function, don't use.
6101 *
6102 * parse an Enumerated attribute type.
6103 *
6104 * [57] EnumeratedType ::= NotationType | Enumeration
6105 *
6106 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6107 *
6108 *
6109 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6110 */
6111
6112 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6113 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6114 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6115 SKIP(8);
6116 if (SKIP_BLANKS_PE == 0) {
6117 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6118 "Space required after 'NOTATION'\n");
6119 return(0);
6120 }
6121 *tree = xmlParseNotationType(ctxt);
6122 if (*tree == NULL) return(0);
6123 return(XML_ATTRIBUTE_NOTATION);
6124 }
6125 *tree = xmlParseEnumerationType(ctxt);
6126 if (*tree == NULL) return(0);
6127 return(XML_ATTRIBUTE_ENUMERATION);
6128 }
6129
6130 /**
6131 * xmlParseAttributeType:
6132 * @ctxt: an XML parser context
6133 * @tree: the enumeration tree built while parsing
6134 *
6135 * DEPRECATED: Internal function, don't use.
6136 *
6137 * parse the Attribute list def for an element
6138 *
6139 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6140 *
6141 * [55] StringType ::= 'CDATA'
6142 *
6143 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6144 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6145 *
6146 * Validity constraints for attribute values syntax are checked in
6147 * xmlValidateAttributeValue()
6148 *
6149 * [ VC: ID ]
6150 * Values of type ID must match the Name production. A name must not
6151 * appear more than once in an XML document as a value of this type;
6152 * i.e., ID values must uniquely identify the elements which bear them.
6153 *
6154 * [ VC: One ID per Element Type ]
6155 * No element type may have more than one ID attribute specified.
6156 *
6157 * [ VC: ID Attribute Default ]
6158 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6159 *
6160 * [ VC: IDREF ]
6161 * Values of type IDREF must match the Name production, and values
6162 * of type IDREFS must match Names; each IDREF Name must match the value
6163 * of an ID attribute on some element in the XML document; i.e. IDREF
6164 * values must match the value of some ID attribute.
6165 *
6166 * [ VC: Entity Name ]
6167 * Values of type ENTITY must match the Name production, values
6168 * of type ENTITIES must match Names; each Entity Name must match the
6169 * name of an unparsed entity declared in the DTD.
6170 *
6171 * [ VC: Name Token ]
6172 * Values of type NMTOKEN must match the Nmtoken production; values
6173 * of type NMTOKENS must match Nmtokens.
6174 *
6175 * Returns the attribute type
6176 */
6177 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6178 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6179 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6180 SKIP(5);
6181 return(XML_ATTRIBUTE_CDATA);
6182 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6183 SKIP(6);
6184 return(XML_ATTRIBUTE_IDREFS);
6185 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6186 SKIP(5);
6187 return(XML_ATTRIBUTE_IDREF);
6188 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6189 SKIP(2);
6190 return(XML_ATTRIBUTE_ID);
6191 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6192 SKIP(6);
6193 return(XML_ATTRIBUTE_ENTITY);
6194 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6195 SKIP(8);
6196 return(XML_ATTRIBUTE_ENTITIES);
6197 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6198 SKIP(8);
6199 return(XML_ATTRIBUTE_NMTOKENS);
6200 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6201 SKIP(7);
6202 return(XML_ATTRIBUTE_NMTOKEN);
6203 }
6204 return(xmlParseEnumeratedType(ctxt, tree));
6205 }
6206
6207 /**
6208 * xmlParseAttributeListDecl:
6209 * @ctxt: an XML parser context
6210 *
6211 * DEPRECATED: Internal function, don't use.
6212 *
6213 * Parse an attribute list declaration for an element. Always consumes '<!'.
6214 *
6215 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6216 *
6217 * [53] AttDef ::= S Name S AttType S DefaultDecl
6218 *
6219 */
6220 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6221 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6222 const xmlChar *elemName;
6223 const xmlChar *attrName;
6224 xmlEnumerationPtr tree;
6225
6226 if ((CUR != '<') || (NXT(1) != '!'))
6227 return;
6228 SKIP(2);
6229
6230 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6231 int inputid = ctxt->input->id;
6232
6233 SKIP(7);
6234 if (SKIP_BLANKS_PE == 0) {
6235 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6236 "Space required after '<!ATTLIST'\n");
6237 }
6238 elemName = xmlParseName(ctxt);
6239 if (elemName == NULL) {
6240 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6241 "ATTLIST: no name for Element\n");
6242 return;
6243 }
6244 SKIP_BLANKS_PE;
6245 GROW;
6246 while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6247 int type;
6248 int def;
6249 xmlChar *defaultValue = NULL;
6250
6251 GROW;
6252 tree = NULL;
6253 attrName = xmlParseName(ctxt);
6254 if (attrName == NULL) {
6255 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6256 "ATTLIST: no name for Attribute\n");
6257 break;
6258 }
6259 GROW;
6260 if (SKIP_BLANKS_PE == 0) {
6261 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6262 "Space required after the attribute name\n");
6263 break;
6264 }
6265
6266 type = xmlParseAttributeType(ctxt, &tree);
6267 if (type <= 0) {
6268 break;
6269 }
6270
6271 GROW;
6272 if (SKIP_BLANKS_PE == 0) {
6273 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6274 "Space required after the attribute type\n");
6275 if (tree != NULL)
6276 xmlFreeEnumeration(tree);
6277 break;
6278 }
6279
6280 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6281 if (def <= 0) {
6282 if (defaultValue != NULL)
6283 xmlFree(defaultValue);
6284 if (tree != NULL)
6285 xmlFreeEnumeration(tree);
6286 break;
6287 }
6288 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6289 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6290
6291 GROW;
6292 if (RAW != '>') {
6293 if (SKIP_BLANKS_PE == 0) {
6294 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6295 "Space required after the attribute default value\n");
6296 if (defaultValue != NULL)
6297 xmlFree(defaultValue);
6298 if (tree != NULL)
6299 xmlFreeEnumeration(tree);
6300 break;
6301 }
6302 }
6303 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6304 (ctxt->sax->attributeDecl != NULL))
6305 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6306 type, def, defaultValue, tree);
6307 else if (tree != NULL)
6308 xmlFreeEnumeration(tree);
6309
6310 if ((ctxt->sax2) && (defaultValue != NULL) &&
6311 (def != XML_ATTRIBUTE_IMPLIED) &&
6312 (def != XML_ATTRIBUTE_REQUIRED)) {
6313 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6314 }
6315 if (ctxt->sax2) {
6316 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6317 }
6318 if (defaultValue != NULL)
6319 xmlFree(defaultValue);
6320 GROW;
6321 }
6322 if (RAW == '>') {
6323 if (inputid != ctxt->input->id) {
6324 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6325 "Attribute list declaration doesn't start and"
6326 " stop in the same entity\n");
6327 }
6328 NEXT;
6329 }
6330 }
6331 }
6332
6333 /**
6334 * xmlParseElementMixedContentDecl:
6335 * @ctxt: an XML parser context
6336 * @inputchk: the input used for the current entity, needed for boundary checks
6337 *
6338 * DEPRECATED: Internal function, don't use.
6339 *
6340 * parse the declaration for a Mixed Element content
6341 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6342 *
6343 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6344 * '(' S? '#PCDATA' S? ')'
6345 *
6346 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6347 *
6348 * [ VC: No Duplicate Types ]
6349 * The same name must not appear more than once in a single
6350 * mixed-content declaration.
6351 *
6352 * returns: the list of the xmlElementContentPtr describing the element choices
6353 */
6354 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6355 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6356 xmlElementContentPtr ret = NULL, cur = NULL, n;
6357 const xmlChar *elem = NULL;
6358
6359 GROW;
6360 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6361 SKIP(7);
6362 SKIP_BLANKS_PE;
6363 if (RAW == ')') {
6364 if (ctxt->input->id != inputchk) {
6365 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6366 "Element content declaration doesn't start and"
6367 " stop in the same entity\n");
6368 }
6369 NEXT;
6370 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6371 if (ret == NULL)
6372 goto mem_error;
6373 if (RAW == '*') {
6374 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6375 NEXT;
6376 }
6377 return(ret);
6378 }
6379 if ((RAW == '(') || (RAW == '|')) {
6380 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6381 if (ret == NULL)
6382 goto mem_error;
6383 }
6384 while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6385 NEXT;
6386 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6387 if (n == NULL)
6388 goto mem_error;
6389 if (elem == NULL) {
6390 n->c1 = cur;
6391 if (cur != NULL)
6392 cur->parent = n;
6393 ret = cur = n;
6394 } else {
6395 cur->c2 = n;
6396 n->parent = cur;
6397 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6398 if (n->c1 == NULL)
6399 goto mem_error;
6400 n->c1->parent = n;
6401 cur = n;
6402 }
6403 SKIP_BLANKS_PE;
6404 elem = xmlParseName(ctxt);
6405 if (elem == NULL) {
6406 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6407 "xmlParseElementMixedContentDecl : Name expected\n");
6408 xmlFreeDocElementContent(ctxt->myDoc, ret);
6409 return(NULL);
6410 }
6411 SKIP_BLANKS_PE;
6412 GROW;
6413 }
6414 if ((RAW == ')') && (NXT(1) == '*')) {
6415 if (elem != NULL) {
6416 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6417 XML_ELEMENT_CONTENT_ELEMENT);
6418 if (cur->c2 == NULL)
6419 goto mem_error;
6420 cur->c2->parent = cur;
6421 }
6422 if (ret != NULL)
6423 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6424 if (ctxt->input->id != inputchk) {
6425 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6426 "Element content declaration doesn't start and"
6427 " stop in the same entity\n");
6428 }
6429 SKIP(2);
6430 } else {
6431 xmlFreeDocElementContent(ctxt->myDoc, ret);
6432 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6433 return(NULL);
6434 }
6435
6436 } else {
6437 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6438 }
6439 return(ret);
6440
6441 mem_error:
6442 xmlErrMemory(ctxt);
6443 xmlFreeDocElementContent(ctxt->myDoc, ret);
6444 return(NULL);
6445 }
6446
6447 /**
6448 * xmlParseElementChildrenContentDeclPriv:
6449 * @ctxt: an XML parser context
6450 * @inputchk: the input used for the current entity, needed for boundary checks
6451 * @depth: the level of recursion
6452 *
6453 * parse the declaration for a Mixed Element content
6454 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6455 *
6456 *
6457 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6458 *
6459 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6460 *
6461 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6462 *
6463 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6464 *
6465 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6466 * TODO Parameter-entity replacement text must be properly nested
6467 * with parenthesized groups. That is to say, if either of the
6468 * opening or closing parentheses in a choice, seq, or Mixed
6469 * construct is contained in the replacement text for a parameter
6470 * entity, both must be contained in the same replacement text. For
6471 * interoperability, if a parameter-entity reference appears in a
6472 * choice, seq, or Mixed construct, its replacement text should not
6473 * be empty, and neither the first nor last non-blank character of
6474 * the replacement text should be a connector (| or ,).
6475 *
6476 * Returns the tree of xmlElementContentPtr describing the element
6477 * hierarchy.
6478 */
6479 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6480 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6481 int depth) {
6482 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6483 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6484 const xmlChar *elem;
6485 xmlChar type = 0;
6486
6487 if (depth > maxDepth) {
6488 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6489 "xmlParseElementChildrenContentDecl : depth %d too deep, "
6490 "use XML_PARSE_HUGE\n", depth);
6491 return(NULL);
6492 }
6493 SKIP_BLANKS_PE;
6494 GROW;
6495 if (RAW == '(') {
6496 int inputid = ctxt->input->id;
6497
6498 /* Recurse on first child */
6499 NEXT;
6500 SKIP_BLANKS_PE;
6501 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6502 depth + 1);
6503 if (cur == NULL)
6504 return(NULL);
6505 SKIP_BLANKS_PE;
6506 GROW;
6507 } else {
6508 elem = xmlParseName(ctxt);
6509 if (elem == NULL) {
6510 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6511 return(NULL);
6512 }
6513 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6514 if (cur == NULL) {
6515 xmlErrMemory(ctxt);
6516 return(NULL);
6517 }
6518 GROW;
6519 if (RAW == '?') {
6520 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6521 NEXT;
6522 } else if (RAW == '*') {
6523 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6524 NEXT;
6525 } else if (RAW == '+') {
6526 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6527 NEXT;
6528 } else {
6529 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6530 }
6531 GROW;
6532 }
6533 SKIP_BLANKS_PE;
6534 while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6535 /*
6536 * Each loop we parse one separator and one element.
6537 */
6538 if (RAW == ',') {
6539 if (type == 0) type = CUR;
6540
6541 /*
6542 * Detect "Name | Name , Name" error
6543 */
6544 else if (type != CUR) {
6545 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6546 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6547 type);
6548 if ((last != NULL) && (last != ret))
6549 xmlFreeDocElementContent(ctxt->myDoc, last);
6550 if (ret != NULL)
6551 xmlFreeDocElementContent(ctxt->myDoc, ret);
6552 return(NULL);
6553 }
6554 NEXT;
6555
6556 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6557 if (op == NULL) {
6558 xmlErrMemory(ctxt);
6559 if ((last != NULL) && (last != ret))
6560 xmlFreeDocElementContent(ctxt->myDoc, last);
6561 xmlFreeDocElementContent(ctxt->myDoc, ret);
6562 return(NULL);
6563 }
6564 if (last == NULL) {
6565 op->c1 = ret;
6566 if (ret != NULL)
6567 ret->parent = op;
6568 ret = cur = op;
6569 } else {
6570 cur->c2 = op;
6571 if (op != NULL)
6572 op->parent = cur;
6573 op->c1 = last;
6574 if (last != NULL)
6575 last->parent = op;
6576 cur =op;
6577 last = NULL;
6578 }
6579 } else if (RAW == '|') {
6580 if (type == 0) type = CUR;
6581
6582 /*
6583 * Detect "Name , Name | Name" error
6584 */
6585 else if (type != CUR) {
6586 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6587 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6588 type);
6589 if ((last != NULL) && (last != ret))
6590 xmlFreeDocElementContent(ctxt->myDoc, last);
6591 if (ret != NULL)
6592 xmlFreeDocElementContent(ctxt->myDoc, ret);
6593 return(NULL);
6594 }
6595 NEXT;
6596
6597 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6598 if (op == NULL) {
6599 xmlErrMemory(ctxt);
6600 if ((last != NULL) && (last != ret))
6601 xmlFreeDocElementContent(ctxt->myDoc, last);
6602 if (ret != NULL)
6603 xmlFreeDocElementContent(ctxt->myDoc, ret);
6604 return(NULL);
6605 }
6606 if (last == NULL) {
6607 op->c1 = ret;
6608 if (ret != NULL)
6609 ret->parent = op;
6610 ret = cur = op;
6611 } else {
6612 cur->c2 = op;
6613 if (op != NULL)
6614 op->parent = cur;
6615 op->c1 = last;
6616 if (last != NULL)
6617 last->parent = op;
6618 cur =op;
6619 last = NULL;
6620 }
6621 } else {
6622 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6623 if ((last != NULL) && (last != ret))
6624 xmlFreeDocElementContent(ctxt->myDoc, last);
6625 if (ret != NULL)
6626 xmlFreeDocElementContent(ctxt->myDoc, ret);
6627 return(NULL);
6628 }
6629 GROW;
6630 SKIP_BLANKS_PE;
6631 GROW;
6632 if (RAW == '(') {
6633 int inputid = ctxt->input->id;
6634 /* Recurse on second child */
6635 NEXT;
6636 SKIP_BLANKS_PE;
6637 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6638 depth + 1);
6639 if (last == NULL) {
6640 if (ret != NULL)
6641 xmlFreeDocElementContent(ctxt->myDoc, ret);
6642 return(NULL);
6643 }
6644 SKIP_BLANKS_PE;
6645 } else {
6646 elem = xmlParseName(ctxt);
6647 if (elem == NULL) {
6648 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6649 if (ret != NULL)
6650 xmlFreeDocElementContent(ctxt->myDoc, ret);
6651 return(NULL);
6652 }
6653 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6654 if (last == NULL) {
6655 xmlErrMemory(ctxt);
6656 if (ret != NULL)
6657 xmlFreeDocElementContent(ctxt->myDoc, ret);
6658 return(NULL);
6659 }
6660 if (RAW == '?') {
6661 last->ocur = XML_ELEMENT_CONTENT_OPT;
6662 NEXT;
6663 } else if (RAW == '*') {
6664 last->ocur = XML_ELEMENT_CONTENT_MULT;
6665 NEXT;
6666 } else if (RAW == '+') {
6667 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6668 NEXT;
6669 } else {
6670 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6671 }
6672 }
6673 SKIP_BLANKS_PE;
6674 GROW;
6675 }
6676 if ((cur != NULL) && (last != NULL)) {
6677 cur->c2 = last;
6678 if (last != NULL)
6679 last->parent = cur;
6680 }
6681 if (ctxt->input->id != inputchk) {
6682 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6683 "Element content declaration doesn't start and stop in"
6684 " the same entity\n");
6685 }
6686 NEXT;
6687 if (RAW == '?') {
6688 if (ret != NULL) {
6689 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6690 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6691 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6692 else
6693 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6694 }
6695 NEXT;
6696 } else if (RAW == '*') {
6697 if (ret != NULL) {
6698 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6699 cur = ret;
6700 /*
6701 * Some normalization:
6702 * (a | b* | c?)* == (a | b | c)*
6703 */
6704 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6705 if ((cur->c1 != NULL) &&
6706 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6707 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6708 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6709 if ((cur->c2 != NULL) &&
6710 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6711 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6712 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6713 cur = cur->c2;
6714 }
6715 }
6716 NEXT;
6717 } else if (RAW == '+') {
6718 if (ret != NULL) {
6719 int found = 0;
6720
6721 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6722 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6723 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6724 else
6725 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6726 /*
6727 * Some normalization:
6728 * (a | b*)+ == (a | b)*
6729 * (a | b?)+ == (a | b)*
6730 */
6731 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6732 if ((cur->c1 != NULL) &&
6733 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6734 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6735 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6736 found = 1;
6737 }
6738 if ((cur->c2 != NULL) &&
6739 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6740 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6741 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6742 found = 1;
6743 }
6744 cur = cur->c2;
6745 }
6746 if (found)
6747 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6748 }
6749 NEXT;
6750 }
6751 return(ret);
6752 }
6753
6754 /**
6755 * xmlParseElementChildrenContentDecl:
6756 * @ctxt: an XML parser context
6757 * @inputchk: the input used for the current entity, needed for boundary checks
6758 *
6759 * DEPRECATED: Internal function, don't use.
6760 *
6761 * parse the declaration for a Mixed Element content
6762 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6763 *
6764 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6765 *
6766 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6767 *
6768 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6769 *
6770 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6771 *
6772 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6773 * TODO Parameter-entity replacement text must be properly nested
6774 * with parenthesized groups. That is to say, if either of the
6775 * opening or closing parentheses in a choice, seq, or Mixed
6776 * construct is contained in the replacement text for a parameter
6777 * entity, both must be contained in the same replacement text. For
6778 * interoperability, if a parameter-entity reference appears in a
6779 * choice, seq, or Mixed construct, its replacement text should not
6780 * be empty, and neither the first nor last non-blank character of
6781 * the replacement text should be a connector (| or ,).
6782 *
6783 * Returns the tree of xmlElementContentPtr describing the element
6784 * hierarchy.
6785 */
6786 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6787 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6788 /* stub left for API/ABI compat */
6789 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6790 }
6791
6792 /**
6793 * xmlParseElementContentDecl:
6794 * @ctxt: an XML parser context
6795 * @name: the name of the element being defined.
6796 * @result: the Element Content pointer will be stored here if any
6797 *
6798 * DEPRECATED: Internal function, don't use.
6799 *
6800 * parse the declaration for an Element content either Mixed or Children,
6801 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6802 *
6803 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6804 *
6805 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6806 */
6807
6808 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6809 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6810 xmlElementContentPtr *result) {
6811
6812 xmlElementContentPtr tree = NULL;
6813 int inputid = ctxt->input->id;
6814 int res;
6815
6816 *result = NULL;
6817
6818 if (RAW != '(') {
6819 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6820 "xmlParseElementContentDecl : %s '(' expected\n", name);
6821 return(-1);
6822 }
6823 NEXT;
6824 GROW;
6825 SKIP_BLANKS_PE;
6826 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6827 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6828 res = XML_ELEMENT_TYPE_MIXED;
6829 } else {
6830 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6831 res = XML_ELEMENT_TYPE_ELEMENT;
6832 }
6833 SKIP_BLANKS_PE;
6834 *result = tree;
6835 return(res);
6836 }
6837
6838 /**
6839 * xmlParseElementDecl:
6840 * @ctxt: an XML parser context
6841 *
6842 * DEPRECATED: Internal function, don't use.
6843 *
6844 * Parse an element declaration. Always consumes '<!'.
6845 *
6846 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6847 *
6848 * [ VC: Unique Element Type Declaration ]
6849 * No element type may be declared more than once
6850 *
6851 * Returns the type of the element, or -1 in case of error
6852 */
6853 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6854 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6855 const xmlChar *name;
6856 int ret = -1;
6857 xmlElementContentPtr content = NULL;
6858
6859 if ((CUR != '<') || (NXT(1) != '!'))
6860 return(ret);
6861 SKIP(2);
6862
6863 /* GROW; done in the caller */
6864 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6865 int inputid = ctxt->input->id;
6866
6867 SKIP(7);
6868 if (SKIP_BLANKS_PE == 0) {
6869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6870 "Space required after 'ELEMENT'\n");
6871 return(-1);
6872 }
6873 name = xmlParseName(ctxt);
6874 if (name == NULL) {
6875 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6876 "xmlParseElementDecl: no name for Element\n");
6877 return(-1);
6878 }
6879 if (SKIP_BLANKS_PE == 0) {
6880 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6881 "Space required after the element name\n");
6882 }
6883 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6884 SKIP(5);
6885 /*
6886 * Element must always be empty.
6887 */
6888 ret = XML_ELEMENT_TYPE_EMPTY;
6889 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6890 (NXT(2) == 'Y')) {
6891 SKIP(3);
6892 /*
6893 * Element is a generic container.
6894 */
6895 ret = XML_ELEMENT_TYPE_ANY;
6896 } else if (RAW == '(') {
6897 ret = xmlParseElementContentDecl(ctxt, name, &content);
6898 } else {
6899 /*
6900 * [ WFC: PEs in Internal Subset ] error handling.
6901 */
6902 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6903 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6904 return(-1);
6905 }
6906
6907 SKIP_BLANKS_PE;
6908
6909 if (RAW != '>') {
6910 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6911 if (content != NULL) {
6912 xmlFreeDocElementContent(ctxt->myDoc, content);
6913 }
6914 } else {
6915 if (inputid != ctxt->input->id) {
6916 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6917 "Element declaration doesn't start and stop in"
6918 " the same entity\n");
6919 }
6920
6921 NEXT;
6922 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6923 (ctxt->sax->elementDecl != NULL)) {
6924 if (content != NULL)
6925 content->parent = NULL;
6926 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6927 content);
6928 if ((content != NULL) && (content->parent == NULL)) {
6929 /*
6930 * this is a trick: if xmlAddElementDecl is called,
6931 * instead of copying the full tree it is plugged directly
6932 * if called from the parser. Avoid duplicating the
6933 * interfaces or change the API/ABI
6934 */
6935 xmlFreeDocElementContent(ctxt->myDoc, content);
6936 }
6937 } else if (content != NULL) {
6938 xmlFreeDocElementContent(ctxt->myDoc, content);
6939 }
6940 }
6941 }
6942 return(ret);
6943 }
6944
6945 /**
6946 * xmlParseConditionalSections
6947 * @ctxt: an XML parser context
6948 *
6949 * Parse a conditional section. Always consumes '<!['.
6950 *
6951 * [61] conditionalSect ::= includeSect | ignoreSect
6952 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6953 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6954 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6955 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6956 */
6957
6958 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6959 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6960 int *inputIds = NULL;
6961 size_t inputIdsSize = 0;
6962 size_t depth = 0;
6963
6964 while (PARSER_STOPPED(ctxt) == 0) {
6965 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6966 int id = ctxt->input->id;
6967
6968 SKIP(3);
6969 SKIP_BLANKS_PE;
6970
6971 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6972 SKIP(7);
6973 SKIP_BLANKS_PE;
6974 if (RAW != '[') {
6975 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6976 xmlHaltParser(ctxt);
6977 goto error;
6978 }
6979 if (ctxt->input->id != id) {
6980 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6981 "All markup of the conditional section is"
6982 " not in the same entity\n");
6983 }
6984 NEXT;
6985
6986 if (inputIdsSize <= depth) {
6987 int *tmp;
6988
6989 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6990 tmp = (int *) xmlRealloc(inputIds,
6991 inputIdsSize * sizeof(int));
6992 if (tmp == NULL) {
6993 xmlErrMemory(ctxt);
6994 goto error;
6995 }
6996 inputIds = tmp;
6997 }
6998 inputIds[depth] = id;
6999 depth++;
7000 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7001 size_t ignoreDepth = 0;
7002
7003 SKIP(6);
7004 SKIP_BLANKS_PE;
7005 if (RAW != '[') {
7006 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7007 xmlHaltParser(ctxt);
7008 goto error;
7009 }
7010 if (ctxt->input->id != id) {
7011 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7012 "All markup of the conditional section is"
7013 " not in the same entity\n");
7014 }
7015 NEXT;
7016
7017 while (PARSER_STOPPED(ctxt) == 0) {
7018 if (RAW == 0) {
7019 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7020 goto error;
7021 }
7022 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7023 SKIP(3);
7024 ignoreDepth++;
7025 /* Check for integer overflow */
7026 if (ignoreDepth == 0) {
7027 xmlErrMemory(ctxt);
7028 goto error;
7029 }
7030 } else if ((RAW == ']') && (NXT(1) == ']') &&
7031 (NXT(2) == '>')) {
7032 SKIP(3);
7033 if (ignoreDepth == 0)
7034 break;
7035 ignoreDepth--;
7036 } else {
7037 NEXT;
7038 }
7039 }
7040
7041 if (ctxt->input->id != id) {
7042 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7043 "All markup of the conditional section is"
7044 " not in the same entity\n");
7045 }
7046 } else {
7047 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7048 xmlHaltParser(ctxt);
7049 goto error;
7050 }
7051 } else if ((depth > 0) &&
7052 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7053 depth--;
7054 if (ctxt->input->id != inputIds[depth]) {
7055 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7056 "All markup of the conditional section is not"
7057 " in the same entity\n");
7058 }
7059 SKIP(3);
7060 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7061 xmlParseMarkupDecl(ctxt);
7062 } else {
7063 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7064 xmlHaltParser(ctxt);
7065 goto error;
7066 }
7067
7068 if (depth == 0)
7069 break;
7070
7071 SKIP_BLANKS_PE;
7072 SHRINK;
7073 GROW;
7074 }
7075
7076 error:
7077 xmlFree(inputIds);
7078 }
7079
7080 /**
7081 * xmlParseMarkupDecl:
7082 * @ctxt: an XML parser context
7083 *
7084 * DEPRECATED: Internal function, don't use.
7085 *
7086 * Parse markup declarations. Always consumes '<!' or '<?'.
7087 *
7088 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7089 * NotationDecl | PI | Comment
7090 *
7091 * [ VC: Proper Declaration/PE Nesting ]
7092 * Parameter-entity replacement text must be properly nested with
7093 * markup declarations. That is to say, if either the first character
7094 * or the last character of a markup declaration (markupdecl above) is
7095 * contained in the replacement text for a parameter-entity reference,
7096 * both must be contained in the same replacement text.
7097 *
7098 * [ WFC: PEs in Internal Subset ]
7099 * In the internal DTD subset, parameter-entity references can occur
7100 * only where markup declarations can occur, not within markup declarations.
7101 * (This does not apply to references that occur in external parameter
7102 * entities or to the external subset.)
7103 */
7104 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)7105 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7106 GROW;
7107 if (CUR == '<') {
7108 if (NXT(1) == '!') {
7109 switch (NXT(2)) {
7110 case 'E':
7111 if (NXT(3) == 'L')
7112 xmlParseElementDecl(ctxt);
7113 else if (NXT(3) == 'N')
7114 xmlParseEntityDecl(ctxt);
7115 else
7116 SKIP(2);
7117 break;
7118 case 'A':
7119 xmlParseAttributeListDecl(ctxt);
7120 break;
7121 case 'N':
7122 xmlParseNotationDecl(ctxt);
7123 break;
7124 case '-':
7125 xmlParseComment(ctxt);
7126 break;
7127 default:
7128 /* there is an error but it will be detected later */
7129 SKIP(2);
7130 break;
7131 }
7132 } else if (NXT(1) == '?') {
7133 xmlParsePI(ctxt);
7134 }
7135 }
7136 }
7137
7138 /**
7139 * xmlParseTextDecl:
7140 * @ctxt: an XML parser context
7141 *
7142 * DEPRECATED: Internal function, don't use.
7143 *
7144 * parse an XML declaration header for external entities
7145 *
7146 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7147 */
7148
7149 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7150 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7151 xmlChar *version;
7152
7153 /*
7154 * We know that '<?xml' is here.
7155 */
7156 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7157 SKIP(5);
7158 } else {
7159 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7160 return;
7161 }
7162
7163 if (SKIP_BLANKS == 0) {
7164 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7165 "Space needed after '<?xml'\n");
7166 }
7167
7168 /*
7169 * We may have the VersionInfo here.
7170 */
7171 version = xmlParseVersionInfo(ctxt);
7172 if (version == NULL) {
7173 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7174 if (version == NULL) {
7175 xmlErrMemory(ctxt);
7176 return;
7177 }
7178 } else {
7179 if (SKIP_BLANKS == 0) {
7180 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7181 "Space needed here\n");
7182 }
7183 }
7184 ctxt->input->version = version;
7185
7186 /*
7187 * We must have the encoding declaration
7188 */
7189 xmlParseEncodingDecl(ctxt);
7190
7191 SKIP_BLANKS;
7192 if ((RAW == '?') && (NXT(1) == '>')) {
7193 SKIP(2);
7194 } else if (RAW == '>') {
7195 /* Deprecated old WD ... */
7196 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7197 NEXT;
7198 } else {
7199 int c;
7200
7201 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7202 while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7203 NEXT;
7204 if (c == '>')
7205 break;
7206 }
7207 }
7208 }
7209
7210 /**
7211 * xmlParseExternalSubset:
7212 * @ctxt: an XML parser context
7213 * @ExternalID: the external identifier
7214 * @SystemID: the system identifier (or URL)
7215 *
7216 * parse Markup declarations from an external subset
7217 *
7218 * [30] extSubset ::= textDecl? extSubsetDecl
7219 *
7220 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7221 */
7222 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7223 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7224 const xmlChar *SystemID) {
7225 int oldInputNr;
7226
7227 xmlCtxtInitializeLate(ctxt);
7228
7229 xmlDetectEncoding(ctxt);
7230
7231 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7232 xmlParseTextDecl(ctxt);
7233 }
7234 if (ctxt->myDoc == NULL) {
7235 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7236 if (ctxt->myDoc == NULL) {
7237 xmlErrMemory(ctxt);
7238 return;
7239 }
7240 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7241 }
7242 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7243 (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7244 xmlErrMemory(ctxt);
7245 }
7246
7247 ctxt->inSubset = 2;
7248 oldInputNr = ctxt->inputNr;
7249
7250 SKIP_BLANKS_PE;
7251 while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7252 (!PARSER_STOPPED(ctxt))) {
7253 GROW;
7254 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7255 xmlParseConditionalSections(ctxt);
7256 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7257 xmlParseMarkupDecl(ctxt);
7258 } else {
7259 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7260 xmlHaltParser(ctxt);
7261 return;
7262 }
7263 SKIP_BLANKS_PE;
7264 SHRINK;
7265 }
7266
7267 while (ctxt->inputNr > oldInputNr)
7268 xmlPopPE(ctxt);
7269
7270 if (RAW != 0) {
7271 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7272 }
7273 }
7274
7275 /**
7276 * xmlParseReference:
7277 * @ctxt: an XML parser context
7278 *
7279 * DEPRECATED: Internal function, don't use.
7280 *
7281 * parse and handle entity references in content, depending on the SAX
7282 * interface, this may end-up in a call to character() if this is a
7283 * CharRef, a predefined entity, if there is no reference() callback.
7284 * or if the parser was asked to switch to that mode.
7285 *
7286 * Always consumes '&'.
7287 *
7288 * [67] Reference ::= EntityRef | CharRef
7289 */
7290 void
xmlParseReference(xmlParserCtxtPtr ctxt)7291 xmlParseReference(xmlParserCtxtPtr ctxt) {
7292 xmlEntityPtr ent = NULL;
7293 const xmlChar *name;
7294 xmlChar *val;
7295
7296 if (RAW != '&')
7297 return;
7298
7299 /*
7300 * Simple case of a CharRef
7301 */
7302 if (NXT(1) == '#') {
7303 int i = 0;
7304 xmlChar out[16];
7305 int value = xmlParseCharRef(ctxt);
7306
7307 if (value == 0)
7308 return;
7309
7310 /*
7311 * Just encode the value in UTF-8
7312 */
7313 COPY_BUF(out, i, value);
7314 out[i] = 0;
7315 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7316 (!ctxt->disableSAX))
7317 ctxt->sax->characters(ctxt->userData, out, i);
7318 return;
7319 }
7320
7321 /*
7322 * We are seeing an entity reference
7323 */
7324 name = xmlParseEntityRefInternal(ctxt);
7325 if (name == NULL)
7326 return;
7327 ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7328 if (ent == NULL) {
7329 /*
7330 * Create a reference for undeclared entities.
7331 */
7332 if ((ctxt->replaceEntities == 0) &&
7333 (ctxt->sax != NULL) &&
7334 (ctxt->disableSAX == 0) &&
7335 (ctxt->sax->reference != NULL)) {
7336 ctxt->sax->reference(ctxt->userData, name);
7337 }
7338 return;
7339 }
7340 if (!ctxt->wellFormed)
7341 return;
7342
7343 /* special case of predefined entities */
7344 if ((ent->name == NULL) ||
7345 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7346 val = ent->content;
7347 if (val == NULL) return;
7348 /*
7349 * inline the entity.
7350 */
7351 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7352 (!ctxt->disableSAX))
7353 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7354 return;
7355 }
7356
7357 /*
7358 * The first reference to the entity trigger a parsing phase
7359 * where the ent->children is filled with the result from
7360 * the parsing.
7361 * Note: external parsed entities will not be loaded, it is not
7362 * required for a non-validating parser, unless the parsing option
7363 * of validating, or substituting entities were given. Doing so is
7364 * far more secure as the parser will only process data coming from
7365 * the document entity by default.
7366 *
7367 * FIXME: This doesn't work correctly since entities can be
7368 * expanded with different namespace declarations in scope.
7369 * For example:
7370 *
7371 * <!DOCTYPE doc [
7372 * <!ENTITY ent "<ns:elem/>">
7373 * ]>
7374 * <doc>
7375 * <decl1 xmlns:ns="urn:ns1">
7376 * &ent;
7377 * </decl1>
7378 * <decl2 xmlns:ns="urn:ns2">
7379 * &ent;
7380 * </decl2>
7381 * </doc>
7382 *
7383 * Proposed fix:
7384 *
7385 * - Ignore current namespace declarations when parsing the
7386 * entity. If a prefix can't be resolved, don't report an error
7387 * but mark it as unresolved.
7388 * - Try to resolve these prefixes when expanding the entity.
7389 * This will require a specialized version of xmlStaticCopyNode
7390 * which can also make use of the namespace hash table to avoid
7391 * quadratic behavior.
7392 *
7393 * Alternatively, we could simply reparse the entity on each
7394 * expansion like we already do with custom SAX callbacks.
7395 * External entity content should be cached in this case.
7396 */
7397 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7398 (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7399 ((ctxt->replaceEntities) ||
7400 (ctxt->validate)))) {
7401 if ((ent->flags & XML_ENT_PARSED) == 0) {
7402 xmlCtxtParseEntity(ctxt, ent);
7403 } else if (ent->children == NULL) {
7404 /*
7405 * Probably running in SAX mode and the callbacks don't
7406 * build the entity content. Parse the entity again.
7407 *
7408 * This will also be triggered in normal tree builder mode
7409 * if an entity happens to be empty, causing unnecessary
7410 * reloads. It's hard to come up with a reliable check in
7411 * which mode we're running.
7412 */
7413 xmlCtxtParseEntity(ctxt, ent);
7414 }
7415 }
7416
7417 /*
7418 * We also check for amplification if entities aren't substituted.
7419 * They might be expanded later.
7420 */
7421 if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7422 return;
7423
7424 if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7425 return;
7426
7427 if (ctxt->replaceEntities == 0) {
7428 /*
7429 * Create a reference
7430 */
7431 if (ctxt->sax->reference != NULL)
7432 ctxt->sax->reference(ctxt->userData, ent->name);
7433 } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7434 xmlNodePtr copy, cur;
7435
7436 /*
7437 * Seems we are generating the DOM content, copy the tree
7438 */
7439 cur = ent->children;
7440
7441 /*
7442 * Handle first text node with SAX to coalesce text efficiently
7443 */
7444 if ((cur->type == XML_TEXT_NODE) ||
7445 (cur->type == XML_CDATA_SECTION_NODE)) {
7446 int len = xmlStrlen(cur->content);
7447
7448 if ((cur->type == XML_TEXT_NODE) ||
7449 (ctxt->sax->cdataBlock == NULL)) {
7450 if (ctxt->sax->characters != NULL)
7451 ctxt->sax->characters(ctxt, cur->content, len);
7452 } else {
7453 if (ctxt->sax->cdataBlock != NULL)
7454 ctxt->sax->cdataBlock(ctxt, cur->content, len);
7455 }
7456
7457 cur = cur->next;
7458 }
7459
7460 while (cur != NULL) {
7461 xmlNodePtr last;
7462
7463 /*
7464 * Handle last text node with SAX to coalesce text efficiently
7465 */
7466 if ((cur->next == NULL) &&
7467 ((cur->type == XML_TEXT_NODE) ||
7468 (cur->type == XML_CDATA_SECTION_NODE))) {
7469 int len = xmlStrlen(cur->content);
7470
7471 if ((cur->type == XML_TEXT_NODE) ||
7472 (ctxt->sax->cdataBlock == NULL)) {
7473 if (ctxt->sax->characters != NULL)
7474 ctxt->sax->characters(ctxt, cur->content, len);
7475 } else {
7476 if (ctxt->sax->cdataBlock != NULL)
7477 ctxt->sax->cdataBlock(ctxt, cur->content, len);
7478 }
7479
7480 break;
7481 }
7482
7483 /*
7484 * Reset coalesce buffer stats only for non-text nodes.
7485 */
7486 ctxt->nodemem = 0;
7487 ctxt->nodelen = 0;
7488
7489 copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7490
7491 if (copy == NULL) {
7492 xmlErrMemory(ctxt);
7493 break;
7494 }
7495
7496 if (ctxt->parseMode == XML_PARSE_READER) {
7497 /* Needed for reader */
7498 copy->extra = cur->extra;
7499 /* Maybe needed for reader */
7500 copy->_private = cur->_private;
7501 }
7502
7503 copy->parent = ctxt->node;
7504 last = ctxt->node->last;
7505 if (last == NULL) {
7506 ctxt->node->children = copy;
7507 } else {
7508 last->next = copy;
7509 copy->prev = last;
7510 }
7511 ctxt->node->last = copy;
7512
7513 cur = cur->next;
7514 }
7515 }
7516 }
7517
7518 static void
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt,const xmlChar * name)7519 xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7520 /*
7521 * [ WFC: Entity Declared ]
7522 * In a document without any DTD, a document with only an
7523 * internal DTD subset which contains no parameter entity
7524 * references, or a document with "standalone='yes'", the
7525 * Name given in the entity reference must match that in an
7526 * entity declaration, except that well-formed documents
7527 * need not declare any of the following entities: amp, lt,
7528 * gt, apos, quot.
7529 * The declaration of a parameter entity must precede any
7530 * reference to it.
7531 * Similarly, the declaration of a general entity must
7532 * precede any reference to it which appears in a default
7533 * value in an attribute-list declaration. Note that if
7534 * entities are declared in the external subset or in
7535 * external parameter entities, a non-validating processor
7536 * is not obligated to read and process their declarations;
7537 * for such documents, the rule that an entity must be
7538 * declared is a well-formedness constraint only if
7539 * standalone='yes'.
7540 */
7541 if ((ctxt->standalone == 1) ||
7542 ((ctxt->hasExternalSubset == 0) &&
7543 (ctxt->hasPErefs == 0))) {
7544 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7545 "Entity '%s' not defined\n", name);
7546 } else if (ctxt->validate) {
7547 /*
7548 * [ VC: Entity Declared ]
7549 * In a document with an external subset or external
7550 * parameter entities with "standalone='no'", ...
7551 * ... The declaration of a parameter entity must
7552 * precede any reference to it...
7553 */
7554 xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7555 "Entity '%s' not defined\n", name, NULL);
7556 } else if ((ctxt->loadsubset) ||
7557 ((ctxt->replaceEntities) &&
7558 ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7559 /*
7560 * Also raise a non-fatal error
7561 *
7562 * - if the external subset is loaded and all entity declarations
7563 * should be available, or
7564 * - entity substition was requested without restricting
7565 * external entity access.
7566 */
7567 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7568 "Entity '%s' not defined\n", name);
7569 } else {
7570 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7571 "Entity '%s' not defined\n", name, NULL);
7572 }
7573
7574 ctxt->valid = 0;
7575 }
7576
7577 static xmlEntityPtr
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt,const xmlChar * name,int inAttr)7578 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7579 xmlEntityPtr ent;
7580
7581 /*
7582 * Predefined entities override any extra definition
7583 */
7584 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7585 ent = xmlGetPredefinedEntity(name);
7586 if (ent != NULL)
7587 return(ent);
7588 }
7589
7590 /*
7591 * Ask first SAX for entity resolution, otherwise try the
7592 * entities which may have stored in the parser context.
7593 */
7594 if (ctxt->sax != NULL) {
7595 if (ctxt->sax->getEntity != NULL)
7596 ent = ctxt->sax->getEntity(ctxt->userData, name);
7597 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7598 (ctxt->options & XML_PARSE_OLDSAX))
7599 ent = xmlGetPredefinedEntity(name);
7600 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7601 (ctxt->userData==ctxt)) {
7602 ent = xmlSAX2GetEntity(ctxt, name);
7603 }
7604 }
7605
7606 if (ent == NULL) {
7607 xmlHandleUndeclaredEntity(ctxt, name);
7608 }
7609
7610 /*
7611 * [ WFC: Parsed Entity ]
7612 * An entity reference must not contain the name of an
7613 * unparsed entity
7614 */
7615 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7616 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7617 "Entity reference to unparsed entity %s\n", name);
7618 ent = NULL;
7619 }
7620
7621 /*
7622 * [ WFC: No External Entity References ]
7623 * Attribute values cannot contain direct or indirect
7624 * entity references to external entities.
7625 */
7626 else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7627 if (inAttr) {
7628 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7629 "Attribute references external entity '%s'\n", name);
7630 ent = NULL;
7631 }
7632 }
7633
7634 return(ent);
7635 }
7636
7637 /**
7638 * xmlParseEntityRefInternal:
7639 * @ctxt: an XML parser context
7640 * @inAttr: whether we are in an attribute value
7641 *
7642 * Parse an entity reference. Always consumes '&'.
7643 *
7644 * [68] EntityRef ::= '&' Name ';'
7645 *
7646 * Returns the name, or NULL in case of error.
7647 */
7648 static const xmlChar *
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt)7649 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7650 const xmlChar *name;
7651
7652 GROW;
7653
7654 if (RAW != '&')
7655 return(NULL);
7656 NEXT;
7657 name = xmlParseName(ctxt);
7658 if (name == NULL) {
7659 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7660 "xmlParseEntityRef: no name\n");
7661 return(NULL);
7662 }
7663 if (RAW != ';') {
7664 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7665 return(NULL);
7666 }
7667 NEXT;
7668
7669 return(name);
7670 }
7671
7672 /**
7673 * xmlParseEntityRef:
7674 * @ctxt: an XML parser context
7675 *
7676 * DEPRECATED: Internal function, don't use.
7677 *
7678 * Returns the xmlEntityPtr if found, or NULL otherwise.
7679 */
7680 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7681 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7682 const xmlChar *name;
7683
7684 if (ctxt == NULL)
7685 return(NULL);
7686
7687 name = xmlParseEntityRefInternal(ctxt);
7688 if (name == NULL)
7689 return(NULL);
7690
7691 return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7692 }
7693
7694 /**
7695 * xmlParseStringEntityRef:
7696 * @ctxt: an XML parser context
7697 * @str: a pointer to an index in the string
7698 *
7699 * parse ENTITY references declarations, but this version parses it from
7700 * a string value.
7701 *
7702 * [68] EntityRef ::= '&' Name ';'
7703 *
7704 * [ WFC: Entity Declared ]
7705 * In a document without any DTD, a document with only an internal DTD
7706 * subset which contains no parameter entity references, or a document
7707 * with "standalone='yes'", the Name given in the entity reference
7708 * must match that in an entity declaration, except that well-formed
7709 * documents need not declare any of the following entities: amp, lt,
7710 * gt, apos, quot. The declaration of a parameter entity must precede
7711 * any reference to it. Similarly, the declaration of a general entity
7712 * must precede any reference to it which appears in a default value in an
7713 * attribute-list declaration. Note that if entities are declared in the
7714 * external subset or in external parameter entities, a non-validating
7715 * processor is not obligated to read and process their declarations;
7716 * for such documents, the rule that an entity must be declared is a
7717 * well-formedness constraint only if standalone='yes'.
7718 *
7719 * [ WFC: Parsed Entity ]
7720 * An entity reference must not contain the name of an unparsed entity
7721 *
7722 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7723 * is updated to the current location in the string.
7724 */
7725 static xmlChar *
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7726 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7727 xmlChar *name;
7728 const xmlChar *ptr;
7729 xmlChar cur;
7730
7731 if ((str == NULL) || (*str == NULL))
7732 return(NULL);
7733 ptr = *str;
7734 cur = *ptr;
7735 if (cur != '&')
7736 return(NULL);
7737
7738 ptr++;
7739 name = xmlParseStringName(ctxt, &ptr);
7740 if (name == NULL) {
7741 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7742 "xmlParseStringEntityRef: no name\n");
7743 *str = ptr;
7744 return(NULL);
7745 }
7746 if (*ptr != ';') {
7747 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7748 xmlFree(name);
7749 *str = ptr;
7750 return(NULL);
7751 }
7752 ptr++;
7753
7754 *str = ptr;
7755 return(name);
7756 }
7757
7758 /**
7759 * xmlParsePEReference:
7760 * @ctxt: an XML parser context
7761 *
7762 * DEPRECATED: Internal function, don't use.
7763 *
7764 * Parse a parameter entity reference. Always consumes '%'.
7765 *
7766 * The entity content is handled directly by pushing it's content as
7767 * a new input stream.
7768 *
7769 * [69] PEReference ::= '%' Name ';'
7770 *
7771 * [ WFC: No Recursion ]
7772 * A parsed entity must not contain a recursive
7773 * reference to itself, either directly or indirectly.
7774 *
7775 * [ WFC: Entity Declared ]
7776 * In a document without any DTD, a document with only an internal DTD
7777 * subset which contains no parameter entity references, or a document
7778 * with "standalone='yes'", ... ... The declaration of a parameter
7779 * entity must precede any reference to it...
7780 *
7781 * [ VC: Entity Declared ]
7782 * In a document with an external subset or external parameter entities
7783 * with "standalone='no'", ... ... The declaration of a parameter entity
7784 * must precede any reference to it...
7785 *
7786 * [ WFC: In DTD ]
7787 * Parameter-entity references may only appear in the DTD.
7788 * NOTE: misleading but this is handled.
7789 */
7790 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7791 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7792 {
7793 const xmlChar *name;
7794 xmlEntityPtr entity = NULL;
7795 xmlParserInputPtr input;
7796
7797 if (RAW != '%')
7798 return;
7799 NEXT;
7800 name = xmlParseName(ctxt);
7801 if (name == NULL) {
7802 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7803 return;
7804 }
7805 if (RAW != ';') {
7806 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7807 return;
7808 }
7809
7810 NEXT;
7811
7812 /* Must be set before xmlHandleUndeclaredEntity */
7813 ctxt->hasPErefs = 1;
7814
7815 /*
7816 * Request the entity from SAX
7817 */
7818 if ((ctxt->sax != NULL) &&
7819 (ctxt->sax->getParameterEntity != NULL))
7820 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7821
7822 if (entity == NULL) {
7823 xmlHandleUndeclaredEntity(ctxt, name);
7824 } else {
7825 /*
7826 * Internal checking in case the entity quest barfed
7827 */
7828 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7829 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7830 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7831 "Internal: %%%s; is not a parameter entity\n",
7832 name, NULL);
7833 } else {
7834 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7835 ((ctxt->options & XML_PARSE_NO_XXE) ||
7836 ((ctxt->loadsubset == 0) &&
7837 (ctxt->replaceEntities == 0) &&
7838 (ctxt->validate == 0))))
7839 return;
7840
7841 if (entity->flags & XML_ENT_EXPANDING) {
7842 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7843 xmlHaltParser(ctxt);
7844 return;
7845 }
7846
7847 if (ctxt->input_id >= INT_MAX) {
7848 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
7849 "Input ID overflow\n");
7850 return;
7851 }
7852
7853 input = xmlNewEntityInputStream(ctxt, entity);
7854 if (xmlPushInput(ctxt, input) < 0) {
7855 xmlFreeInputStream(input);
7856 return;
7857 }
7858
7859 input->id = ++ctxt->input_id;
7860
7861 entity->flags |= XML_ENT_EXPANDING;
7862
7863 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7864 xmlDetectEncoding(ctxt);
7865
7866 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7867 (IS_BLANK_CH(NXT(5)))) {
7868 xmlParseTextDecl(ctxt);
7869 }
7870 }
7871 }
7872 }
7873 }
7874
7875 /**
7876 * xmlLoadEntityContent:
7877 * @ctxt: an XML parser context
7878 * @entity: an unloaded system entity
7879 *
7880 * Load the content of an entity.
7881 *
7882 * Returns 0 in case of success and -1 in case of failure
7883 */
7884 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7885 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7886 xmlParserInputPtr oldinput, input = NULL;
7887 xmlParserInputPtr *oldinputTab;
7888 const xmlChar *oldencoding;
7889 xmlChar *content = NULL;
7890 xmlResourceType rtype;
7891 size_t length, i;
7892 int oldinputNr, oldinputMax;
7893 int ret = -1;
7894 int res;
7895
7896 if ((ctxt == NULL) || (entity == NULL) ||
7897 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7898 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7899 (entity->content != NULL)) {
7900 xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7901 "xmlLoadEntityContent parameter error");
7902 return(-1);
7903 }
7904
7905 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7906 rtype = XML_RESOURCE_PARAMETER_ENTITY;
7907 else
7908 rtype = XML_RESOURCE_GENERAL_ENTITY;
7909
7910 input = xmlLoadResource(ctxt, (char *) entity->URI,
7911 (char *) entity->ExternalID, rtype);
7912 if (input == NULL)
7913 return(-1);
7914
7915 oldinput = ctxt->input;
7916 oldinputNr = ctxt->inputNr;
7917 oldinputMax = ctxt->inputMax;
7918 oldinputTab = ctxt->inputTab;
7919 oldencoding = ctxt->encoding;
7920
7921 ctxt->input = NULL;
7922 ctxt->inputNr = 0;
7923 ctxt->inputMax = 1;
7924 ctxt->encoding = NULL;
7925 ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7926 if (ctxt->inputTab == NULL) {
7927 xmlErrMemory(ctxt);
7928 xmlFreeInputStream(input);
7929 goto error;
7930 }
7931
7932 xmlBufResetInput(input->buf->buffer, input);
7933
7934 inputPush(ctxt, input);
7935
7936 xmlDetectEncoding(ctxt);
7937
7938 /*
7939 * Parse a possible text declaration first
7940 */
7941 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7942 xmlParseTextDecl(ctxt);
7943 /*
7944 * An XML-1.0 document can't reference an entity not XML-1.0
7945 */
7946 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7947 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7948 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7949 "Version mismatch between document and entity\n");
7950 }
7951 }
7952
7953 length = input->cur - input->base;
7954 xmlBufShrink(input->buf->buffer, length);
7955 xmlSaturatedAdd(&ctxt->sizeentities, length);
7956
7957 while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7958 ;
7959
7960 xmlBufResetInput(input->buf->buffer, input);
7961
7962 if (res < 0) {
7963 xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7964 goto error;
7965 }
7966
7967 length = xmlBufUse(input->buf->buffer);
7968 content = xmlBufDetach(input->buf->buffer);
7969
7970 if (length > INT_MAX) {
7971 xmlErrMemory(ctxt);
7972 goto error;
7973 }
7974
7975 for (i = 0; i < length; ) {
7976 int clen = length - i;
7977 int c = xmlGetUTF8Char(content + i, &clen);
7978
7979 if ((c < 0) || (!IS_CHAR(c))) {
7980 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7981 "xmlLoadEntityContent: invalid char value %d\n",
7982 content[i]);
7983 goto error;
7984 }
7985 i += clen;
7986 }
7987
7988 xmlSaturatedAdd(&ctxt->sizeentities, length);
7989 entity->content = content;
7990 entity->length = length;
7991 content = NULL;
7992 ret = 0;
7993
7994 error:
7995 while (ctxt->inputNr > 0)
7996 xmlFreeInputStream(inputPop(ctxt));
7997 xmlFree(ctxt->inputTab);
7998 xmlFree((xmlChar *) ctxt->encoding);
7999
8000 ctxt->input = oldinput;
8001 ctxt->inputNr = oldinputNr;
8002 ctxt->inputMax = oldinputMax;
8003 ctxt->inputTab = oldinputTab;
8004 ctxt->encoding = oldencoding;
8005
8006 xmlFree(content);
8007
8008 return(ret);
8009 }
8010
8011 /**
8012 * xmlParseStringPEReference:
8013 * @ctxt: an XML parser context
8014 * @str: a pointer to an index in the string
8015 *
8016 * parse PEReference declarations
8017 *
8018 * [69] PEReference ::= '%' Name ';'
8019 *
8020 * [ WFC: No Recursion ]
8021 * A parsed entity must not contain a recursive
8022 * reference to itself, either directly or indirectly.
8023 *
8024 * [ WFC: Entity Declared ]
8025 * In a document without any DTD, a document with only an internal DTD
8026 * subset which contains no parameter entity references, or a document
8027 * with "standalone='yes'", ... ... The declaration of a parameter
8028 * entity must precede any reference to it...
8029 *
8030 * [ VC: Entity Declared ]
8031 * In a document with an external subset or external parameter entities
8032 * with "standalone='no'", ... ... The declaration of a parameter entity
8033 * must precede any reference to it...
8034 *
8035 * [ WFC: In DTD ]
8036 * Parameter-entity references may only appear in the DTD.
8037 * NOTE: misleading but this is handled.
8038 *
8039 * Returns the string of the entity content.
8040 * str is updated to the current value of the index
8041 */
8042 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8043 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8044 const xmlChar *ptr;
8045 xmlChar cur;
8046 xmlChar *name;
8047 xmlEntityPtr entity = NULL;
8048
8049 if ((str == NULL) || (*str == NULL)) return(NULL);
8050 ptr = *str;
8051 cur = *ptr;
8052 if (cur != '%')
8053 return(NULL);
8054 ptr++;
8055 name = xmlParseStringName(ctxt, &ptr);
8056 if (name == NULL) {
8057 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8058 "xmlParseStringPEReference: no name\n");
8059 *str = ptr;
8060 return(NULL);
8061 }
8062 cur = *ptr;
8063 if (cur != ';') {
8064 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8065 xmlFree(name);
8066 *str = ptr;
8067 return(NULL);
8068 }
8069 ptr++;
8070
8071 /* Must be set before xmlHandleUndeclaredEntity */
8072 ctxt->hasPErefs = 1;
8073
8074 /*
8075 * Request the entity from SAX
8076 */
8077 if ((ctxt->sax != NULL) &&
8078 (ctxt->sax->getParameterEntity != NULL))
8079 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8080
8081 if (entity == NULL) {
8082 xmlHandleUndeclaredEntity(ctxt, name);
8083 } else {
8084 /*
8085 * Internal checking in case the entity quest barfed
8086 */
8087 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8088 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8089 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8090 "%%%s; is not a parameter entity\n",
8091 name, NULL);
8092 }
8093 }
8094
8095 xmlFree(name);
8096 *str = ptr;
8097 return(entity);
8098 }
8099
8100 /**
8101 * xmlParseDocTypeDecl:
8102 * @ctxt: an XML parser context
8103 *
8104 * DEPRECATED: Internal function, don't use.
8105 *
8106 * parse a DOCTYPE declaration
8107 *
8108 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8109 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8110 *
8111 * [ VC: Root Element Type ]
8112 * The Name in the document type declaration must match the element
8113 * type of the root element.
8114 */
8115
8116 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8117 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8118 const xmlChar *name = NULL;
8119 xmlChar *ExternalID = NULL;
8120 xmlChar *URI = NULL;
8121
8122 /*
8123 * We know that '<!DOCTYPE' has been detected.
8124 */
8125 SKIP(9);
8126
8127 SKIP_BLANKS;
8128
8129 /*
8130 * Parse the DOCTYPE name.
8131 */
8132 name = xmlParseName(ctxt);
8133 if (name == NULL) {
8134 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8135 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8136 }
8137 ctxt->intSubName = name;
8138
8139 SKIP_BLANKS;
8140
8141 /*
8142 * Check for SystemID and ExternalID
8143 */
8144 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8145
8146 if ((URI != NULL) || (ExternalID != NULL)) {
8147 ctxt->hasExternalSubset = 1;
8148 }
8149 ctxt->extSubURI = URI;
8150 ctxt->extSubSystem = ExternalID;
8151
8152 SKIP_BLANKS;
8153
8154 /*
8155 * Create and update the internal subset.
8156 */
8157 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8158 (!ctxt->disableSAX))
8159 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8160
8161 /*
8162 * Is there any internal subset declarations ?
8163 * they are handled separately in xmlParseInternalSubset()
8164 */
8165 if (RAW == '[')
8166 return;
8167
8168 /*
8169 * We should be at the end of the DOCTYPE declaration.
8170 */
8171 if (RAW != '>') {
8172 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8173 }
8174 NEXT;
8175 }
8176
8177 /**
8178 * xmlParseInternalSubset:
8179 * @ctxt: an XML parser context
8180 *
8181 * parse the internal subset declaration
8182 *
8183 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8184 */
8185
8186 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8187 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8188 /*
8189 * Is there any DTD definition ?
8190 */
8191 if (RAW == '[') {
8192 int oldInputNr = ctxt->inputNr;
8193
8194 NEXT;
8195 /*
8196 * Parse the succession of Markup declarations and
8197 * PEReferences.
8198 * Subsequence (markupdecl | PEReference | S)*
8199 */
8200 SKIP_BLANKS;
8201 while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8202 (PARSER_STOPPED(ctxt) == 0)) {
8203
8204 /*
8205 * Conditional sections are allowed from external entities included
8206 * by PE References in the internal subset.
8207 */
8208 if ((PARSER_EXTERNAL(ctxt)) &&
8209 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8210 xmlParseConditionalSections(ctxt);
8211 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8212 xmlParseMarkupDecl(ctxt);
8213 } else if (RAW == '%') {
8214 xmlParsePEReference(ctxt);
8215 } else {
8216 xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8217 break;
8218 }
8219 SKIP_BLANKS_PE;
8220 SHRINK;
8221 GROW;
8222 }
8223
8224 while (ctxt->inputNr > oldInputNr)
8225 xmlPopPE(ctxt);
8226
8227 if (RAW == ']') {
8228 NEXT;
8229 SKIP_BLANKS;
8230 }
8231 }
8232
8233 /*
8234 * We should be at the end of the DOCTYPE declaration.
8235 */
8236 if ((ctxt->wellFormed) && (RAW != '>')) {
8237 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8238 return;
8239 }
8240 NEXT;
8241 }
8242
8243 #ifdef LIBXML_SAX1_ENABLED
8244 /**
8245 * xmlParseAttribute:
8246 * @ctxt: an XML parser context
8247 * @value: a xmlChar ** used to store the value of the attribute
8248 *
8249 * DEPRECATED: Internal function, don't use.
8250 *
8251 * parse an attribute
8252 *
8253 * [41] Attribute ::= Name Eq AttValue
8254 *
8255 * [ WFC: No External Entity References ]
8256 * Attribute values cannot contain direct or indirect entity references
8257 * to external entities.
8258 *
8259 * [ WFC: No < in Attribute Values ]
8260 * The replacement text of any entity referred to directly or indirectly in
8261 * an attribute value (other than "<") must not contain a <.
8262 *
8263 * [ VC: Attribute Value Type ]
8264 * The attribute must have been declared; the value must be of the type
8265 * declared for it.
8266 *
8267 * [25] Eq ::= S? '=' S?
8268 *
8269 * With namespace:
8270 *
8271 * [NS 11] Attribute ::= QName Eq AttValue
8272 *
8273 * Also the case QName == xmlns:??? is handled independently as a namespace
8274 * definition.
8275 *
8276 * Returns the attribute name, and the value in *value.
8277 */
8278
8279 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8280 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8281 const xmlChar *name;
8282 xmlChar *val;
8283
8284 *value = NULL;
8285 GROW;
8286 name = xmlParseName(ctxt);
8287 if (name == NULL) {
8288 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8289 "error parsing attribute name\n");
8290 return(NULL);
8291 }
8292
8293 /*
8294 * read the value
8295 */
8296 SKIP_BLANKS;
8297 if (RAW == '=') {
8298 NEXT;
8299 SKIP_BLANKS;
8300 val = xmlParseAttValue(ctxt);
8301 } else {
8302 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8303 "Specification mandates value for attribute %s\n", name);
8304 return(name);
8305 }
8306
8307 /*
8308 * Check that xml:lang conforms to the specification
8309 * No more registered as an error, just generate a warning now
8310 * since this was deprecated in XML second edition
8311 */
8312 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8313 if (!xmlCheckLanguageID(val)) {
8314 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8315 "Malformed value for xml:lang : %s\n",
8316 val, NULL);
8317 }
8318 }
8319
8320 /*
8321 * Check that xml:space conforms to the specification
8322 */
8323 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8324 if (xmlStrEqual(val, BAD_CAST "default"))
8325 *(ctxt->space) = 0;
8326 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8327 *(ctxt->space) = 1;
8328 else {
8329 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8330 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8331 val, NULL);
8332 }
8333 }
8334
8335 *value = val;
8336 return(name);
8337 }
8338
8339 /**
8340 * xmlParseStartTag:
8341 * @ctxt: an XML parser context
8342 *
8343 * DEPRECATED: Internal function, don't use.
8344 *
8345 * Parse a start tag. Always consumes '<'.
8346 *
8347 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8348 *
8349 * [ WFC: Unique Att Spec ]
8350 * No attribute name may appear more than once in the same start-tag or
8351 * empty-element tag.
8352 *
8353 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8354 *
8355 * [ WFC: Unique Att Spec ]
8356 * No attribute name may appear more than once in the same start-tag or
8357 * empty-element tag.
8358 *
8359 * With namespace:
8360 *
8361 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8362 *
8363 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8364 *
8365 * Returns the element name parsed
8366 */
8367
8368 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8369 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8370 const xmlChar *name;
8371 const xmlChar *attname;
8372 xmlChar *attvalue;
8373 const xmlChar **atts = ctxt->atts;
8374 int nbatts = 0;
8375 int maxatts = ctxt->maxatts;
8376 int i;
8377
8378 if (RAW != '<') return(NULL);
8379 NEXT1;
8380
8381 name = xmlParseName(ctxt);
8382 if (name == NULL) {
8383 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8384 "xmlParseStartTag: invalid element name\n");
8385 return(NULL);
8386 }
8387
8388 /*
8389 * Now parse the attributes, it ends up with the ending
8390 *
8391 * (S Attribute)* S?
8392 */
8393 SKIP_BLANKS;
8394 GROW;
8395
8396 while (((RAW != '>') &&
8397 ((RAW != '/') || (NXT(1) != '>')) &&
8398 (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8399 attname = xmlParseAttribute(ctxt, &attvalue);
8400 if (attname == NULL)
8401 break;
8402 if (attvalue != NULL) {
8403 /*
8404 * [ WFC: Unique Att Spec ]
8405 * No attribute name may appear more than once in the same
8406 * start-tag or empty-element tag.
8407 */
8408 for (i = 0; i < nbatts;i += 2) {
8409 if (xmlStrEqual(atts[i], attname)) {
8410 xmlErrAttributeDup(ctxt, NULL, attname);
8411 xmlFree(attvalue);
8412 goto failed;
8413 }
8414 }
8415 /*
8416 * Add the pair to atts
8417 */
8418 if (atts == NULL) {
8419 maxatts = 22; /* allow for 10 attrs by default */
8420 atts = (const xmlChar **)
8421 xmlMalloc(maxatts * sizeof(xmlChar *));
8422 if (atts == NULL) {
8423 xmlErrMemory(ctxt);
8424 if (attvalue != NULL)
8425 xmlFree(attvalue);
8426 goto failed;
8427 }
8428 ctxt->atts = atts;
8429 ctxt->maxatts = maxatts;
8430 } else if (nbatts + 4 > maxatts) {
8431 const xmlChar **n;
8432
8433 maxatts *= 2;
8434 n = (const xmlChar **) xmlRealloc((void *) atts,
8435 maxatts * sizeof(const xmlChar *));
8436 if (n == NULL) {
8437 xmlErrMemory(ctxt);
8438 if (attvalue != NULL)
8439 xmlFree(attvalue);
8440 goto failed;
8441 }
8442 atts = n;
8443 ctxt->atts = atts;
8444 ctxt->maxatts = maxatts;
8445 }
8446 atts[nbatts++] = attname;
8447 atts[nbatts++] = attvalue;
8448 atts[nbatts] = NULL;
8449 atts[nbatts + 1] = NULL;
8450 } else {
8451 if (attvalue != NULL)
8452 xmlFree(attvalue);
8453 }
8454
8455 failed:
8456
8457 GROW
8458 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8459 break;
8460 if (SKIP_BLANKS == 0) {
8461 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8462 "attributes construct error\n");
8463 }
8464 SHRINK;
8465 GROW;
8466 }
8467
8468 /*
8469 * SAX: Start of Element !
8470 */
8471 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8472 (!ctxt->disableSAX)) {
8473 if (nbatts > 0)
8474 ctxt->sax->startElement(ctxt->userData, name, atts);
8475 else
8476 ctxt->sax->startElement(ctxt->userData, name, NULL);
8477 }
8478
8479 if (atts != NULL) {
8480 /* Free only the content strings */
8481 for (i = 1;i < nbatts;i+=2)
8482 if (atts[i] != NULL)
8483 xmlFree((xmlChar *) atts[i]);
8484 }
8485 return(name);
8486 }
8487
8488 /**
8489 * xmlParseEndTag1:
8490 * @ctxt: an XML parser context
8491 * @line: line of the start tag
8492 * @nsNr: number of namespaces on the start tag
8493 *
8494 * Parse an end tag. Always consumes '</'.
8495 *
8496 * [42] ETag ::= '</' Name S? '>'
8497 *
8498 * With namespace
8499 *
8500 * [NS 9] ETag ::= '</' QName S? '>'
8501 */
8502
8503 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8504 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8505 const xmlChar *name;
8506
8507 GROW;
8508 if ((RAW != '<') || (NXT(1) != '/')) {
8509 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8510 "xmlParseEndTag: '</' not found\n");
8511 return;
8512 }
8513 SKIP(2);
8514
8515 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8516
8517 /*
8518 * We should definitely be at the ending "S? '>'" part
8519 */
8520 GROW;
8521 SKIP_BLANKS;
8522 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8523 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8524 } else
8525 NEXT1;
8526
8527 /*
8528 * [ WFC: Element Type Match ]
8529 * The Name in an element's end-tag must match the element type in the
8530 * start-tag.
8531 *
8532 */
8533 if (name != (xmlChar*)1) {
8534 if (name == NULL) name = BAD_CAST "unparsable";
8535 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8536 "Opening and ending tag mismatch: %s line %d and %s\n",
8537 ctxt->name, line, name);
8538 }
8539
8540 /*
8541 * SAX: End of Tag
8542 */
8543 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8544 (!ctxt->disableSAX))
8545 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8546
8547 namePop(ctxt);
8548 spacePop(ctxt);
8549 return;
8550 }
8551
8552 /**
8553 * xmlParseEndTag:
8554 * @ctxt: an XML parser context
8555 *
8556 * DEPRECATED: Internal function, don't use.
8557 *
8558 * parse an end of tag
8559 *
8560 * [42] ETag ::= '</' Name S? '>'
8561 *
8562 * With namespace
8563 *
8564 * [NS 9] ETag ::= '</' QName S? '>'
8565 */
8566
8567 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8568 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8569 xmlParseEndTag1(ctxt, 0);
8570 }
8571 #endif /* LIBXML_SAX1_ENABLED */
8572
8573 /************************************************************************
8574 * *
8575 * SAX 2 specific operations *
8576 * *
8577 ************************************************************************/
8578
8579 /**
8580 * xmlParseQNameHashed:
8581 * @ctxt: an XML parser context
8582 * @prefix: pointer to store the prefix part
8583 *
8584 * parse an XML Namespace QName
8585 *
8586 * [6] QName ::= (Prefix ':')? LocalPart
8587 * [7] Prefix ::= NCName
8588 * [8] LocalPart ::= NCName
8589 *
8590 * Returns the Name parsed or NULL
8591 */
8592
8593 static xmlHashedString
xmlParseQNameHashed(xmlParserCtxtPtr ctxt,xmlHashedString * prefix)8594 xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8595 xmlHashedString l, p;
8596 int start, isNCName = 0;
8597
8598 l.name = NULL;
8599 p.name = NULL;
8600
8601 GROW;
8602 start = CUR_PTR - BASE_PTR;
8603
8604 l = xmlParseNCName(ctxt);
8605 if (l.name != NULL) {
8606 isNCName = 1;
8607 if (CUR == ':') {
8608 NEXT;
8609 p = l;
8610 l = xmlParseNCName(ctxt);
8611 }
8612 }
8613 if ((l.name == NULL) || (CUR == ':')) {
8614 xmlChar *tmp;
8615
8616 l.name = NULL;
8617 p.name = NULL;
8618 if ((isNCName == 0) && (CUR != ':'))
8619 return(l);
8620 tmp = xmlParseNmtoken(ctxt);
8621 if (tmp != NULL)
8622 xmlFree(tmp);
8623 l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8624 CUR_PTR - (BASE_PTR + start));
8625 if (l.name == NULL) {
8626 xmlErrMemory(ctxt);
8627 return(l);
8628 }
8629 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8630 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8631 }
8632
8633 *prefix = p;
8634 return(l);
8635 }
8636
8637 /**
8638 * xmlParseQName:
8639 * @ctxt: an XML parser context
8640 * @prefix: pointer to store the prefix part
8641 *
8642 * parse an XML Namespace QName
8643 *
8644 * [6] QName ::= (Prefix ':')? LocalPart
8645 * [7] Prefix ::= NCName
8646 * [8] LocalPart ::= NCName
8647 *
8648 * Returns the Name parsed or NULL
8649 */
8650
8651 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8652 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8653 xmlHashedString n, p;
8654
8655 n = xmlParseQNameHashed(ctxt, &p);
8656 if (n.name == NULL)
8657 return(NULL);
8658 *prefix = p.name;
8659 return(n.name);
8660 }
8661
8662 /**
8663 * xmlParseQNameAndCompare:
8664 * @ctxt: an XML parser context
8665 * @name: the localname
8666 * @prefix: the prefix, if any.
8667 *
8668 * parse an XML name and compares for match
8669 * (specialized for endtag parsing)
8670 *
8671 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8672 * and the name for mismatch
8673 */
8674
8675 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8676 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8677 xmlChar const *prefix) {
8678 const xmlChar *cmp;
8679 const xmlChar *in;
8680 const xmlChar *ret;
8681 const xmlChar *prefix2;
8682
8683 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8684
8685 GROW;
8686 in = ctxt->input->cur;
8687
8688 cmp = prefix;
8689 while (*in != 0 && *in == *cmp) {
8690 ++in;
8691 ++cmp;
8692 }
8693 if ((*cmp == 0) && (*in == ':')) {
8694 in++;
8695 cmp = name;
8696 while (*in != 0 && *in == *cmp) {
8697 ++in;
8698 ++cmp;
8699 }
8700 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8701 /* success */
8702 ctxt->input->col += in - ctxt->input->cur;
8703 ctxt->input->cur = in;
8704 return((const xmlChar*) 1);
8705 }
8706 }
8707 /*
8708 * all strings coms from the dictionary, equality can be done directly
8709 */
8710 ret = xmlParseQName (ctxt, &prefix2);
8711 if (ret == NULL)
8712 return(NULL);
8713 if ((ret == name) && (prefix == prefix2))
8714 return((const xmlChar*) 1);
8715 return ret;
8716 }
8717
8718 /**
8719 * xmlParseAttribute2:
8720 * @ctxt: an XML parser context
8721 * @pref: the element prefix
8722 * @elem: the element name
8723 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8724 * @value: a xmlChar ** used to store the value of the attribute
8725 * @len: an int * to save the length of the attribute
8726 * @alloc: an int * to indicate if the attribute was allocated
8727 *
8728 * parse an attribute in the new SAX2 framework.
8729 *
8730 * Returns the attribute name, and the value in *value, .
8731 */
8732
8733 static xmlHashedString
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,xmlHashedString * hprefix,xmlChar ** value,int * len,int * alloc)8734 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8735 const xmlChar * pref, const xmlChar * elem,
8736 xmlHashedString * hprefix, xmlChar ** value,
8737 int *len, int *alloc)
8738 {
8739 xmlHashedString hname;
8740 const xmlChar *prefix, *name;
8741 xmlChar *val = NULL, *internal_val = NULL;
8742 int normalize = 0;
8743 int isNamespace;
8744
8745 *value = NULL;
8746 GROW;
8747 hname = xmlParseQNameHashed(ctxt, hprefix);
8748 if (hname.name == NULL) {
8749 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8750 "error parsing attribute name\n");
8751 return(hname);
8752 }
8753 name = hname.name;
8754 if (hprefix->name != NULL)
8755 prefix = hprefix->name;
8756 else
8757 prefix = NULL;
8758
8759 /*
8760 * get the type if needed
8761 */
8762 if (ctxt->attsSpecial != NULL) {
8763 int type;
8764
8765 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8766 pref, elem,
8767 prefix, name);
8768 if (type != 0)
8769 normalize = 1;
8770 }
8771
8772 /*
8773 * read the value
8774 */
8775 SKIP_BLANKS;
8776 if (RAW == '=') {
8777 NEXT;
8778 SKIP_BLANKS;
8779 isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8780 (prefix == ctxt->str_xmlns));
8781 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8782 isNamespace);
8783 if (val == NULL)
8784 goto error;
8785 } else {
8786 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8787 "Specification mandates value for attribute %s\n",
8788 name);
8789 goto error;
8790 }
8791
8792 if (prefix == ctxt->str_xml) {
8793 /*
8794 * Check that xml:lang conforms to the specification
8795 * No more registered as an error, just generate a warning now
8796 * since this was deprecated in XML second edition
8797 */
8798 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8799 internal_val = xmlStrndup(val, *len);
8800 if (internal_val == NULL)
8801 goto mem_error;
8802 if (!xmlCheckLanguageID(internal_val)) {
8803 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8804 "Malformed value for xml:lang : %s\n",
8805 internal_val, NULL);
8806 }
8807 }
8808
8809 /*
8810 * Check that xml:space conforms to the specification
8811 */
8812 if (xmlStrEqual(name, BAD_CAST "space")) {
8813 internal_val = xmlStrndup(val, *len);
8814 if (internal_val == NULL)
8815 goto mem_error;
8816 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8817 *(ctxt->space) = 0;
8818 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8819 *(ctxt->space) = 1;
8820 else {
8821 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8822 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8823 internal_val, NULL);
8824 }
8825 }
8826 if (internal_val) {
8827 xmlFree(internal_val);
8828 }
8829 }
8830
8831 *value = val;
8832 return (hname);
8833
8834 mem_error:
8835 xmlErrMemory(ctxt);
8836 error:
8837 if ((val != NULL) && (*alloc != 0))
8838 xmlFree(val);
8839 return(hname);
8840 }
8841
8842 /**
8843 * xmlAttrHashInsert:
8844 * @ctxt: parser context
8845 * @size: size of the hash table
8846 * @name: attribute name
8847 * @uri: namespace uri
8848 * @hashValue: combined hash value of name and uri
8849 * @aindex: attribute index (this is a multiple of 5)
8850 *
8851 * Inserts a new attribute into the hash table.
8852 *
8853 * Returns INT_MAX if no existing attribute was found, the attribute
8854 * index if an attribute was found, -1 if a memory allocation failed.
8855 */
8856 static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt,unsigned size,const xmlChar * name,const xmlChar * uri,unsigned hashValue,int aindex)8857 xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8858 const xmlChar *uri, unsigned hashValue, int aindex) {
8859 xmlAttrHashBucket *table = ctxt->attrHash;
8860 xmlAttrHashBucket *bucket;
8861 unsigned hindex;
8862
8863 hindex = hashValue & (size - 1);
8864 bucket = &table[hindex];
8865
8866 while (bucket->index >= 0) {
8867 const xmlChar **atts = &ctxt->atts[bucket->index];
8868
8869 if (name == atts[0]) {
8870 int nsIndex = (int) (ptrdiff_t) atts[2];
8871
8872 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8873 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8874 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8875 return(bucket->index);
8876 }
8877
8878 hindex++;
8879 bucket++;
8880 if (hindex >= size) {
8881 hindex = 0;
8882 bucket = table;
8883 }
8884 }
8885
8886 bucket->index = aindex;
8887
8888 return(INT_MAX);
8889 }
8890
8891 /**
8892 * xmlParseStartTag2:
8893 * @ctxt: an XML parser context
8894 *
8895 * Parse a start tag. Always consumes '<'.
8896 *
8897 * This routine is called when running SAX2 parsing
8898 *
8899 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8900 *
8901 * [ WFC: Unique Att Spec ]
8902 * No attribute name may appear more than once in the same start-tag or
8903 * empty-element tag.
8904 *
8905 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8906 *
8907 * [ WFC: Unique Att Spec ]
8908 * No attribute name may appear more than once in the same start-tag or
8909 * empty-element tag.
8910 *
8911 * With namespace:
8912 *
8913 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8914 *
8915 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8916 *
8917 * Returns the element name parsed
8918 */
8919
8920 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * nbNsPtr)8921 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8922 const xmlChar **URI, int *nbNsPtr) {
8923 xmlHashedString hlocalname;
8924 xmlHashedString hprefix;
8925 xmlHashedString hattname;
8926 xmlHashedString haprefix;
8927 const xmlChar *localname;
8928 const xmlChar *prefix;
8929 const xmlChar *attname;
8930 const xmlChar *aprefix;
8931 const xmlChar *uri;
8932 xmlChar *attvalue = NULL;
8933 const xmlChar **atts = ctxt->atts;
8934 unsigned attrHashSize = 0;
8935 int maxatts = ctxt->maxatts;
8936 int nratts, nbatts, nbdef;
8937 int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8938 int alloc = 0;
8939
8940 if (RAW != '<') return(NULL);
8941 NEXT1;
8942
8943 nbatts = 0;
8944 nratts = 0;
8945 nbdef = 0;
8946 nbNs = 0;
8947 nbTotalDef = 0;
8948 attval = 0;
8949
8950 if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8951 xmlErrMemory(ctxt);
8952 return(NULL);
8953 }
8954
8955 hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8956 if (hlocalname.name == NULL) {
8957 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8958 "StartTag: invalid element name\n");
8959 return(NULL);
8960 }
8961 localname = hlocalname.name;
8962 prefix = hprefix.name;
8963
8964 /*
8965 * Now parse the attributes, it ends up with the ending
8966 *
8967 * (S Attribute)* S?
8968 */
8969 SKIP_BLANKS;
8970 GROW;
8971
8972 /*
8973 * The ctxt->atts array will be ultimately passed to the SAX callback
8974 * containing five xmlChar pointers for each attribute:
8975 *
8976 * [0] attribute name
8977 * [1] attribute prefix
8978 * [2] namespace URI
8979 * [3] attribute value
8980 * [4] end of attribute value
8981 *
8982 * To save memory, we reuse this array temporarily and store integers
8983 * in these pointer variables.
8984 *
8985 * [0] attribute name
8986 * [1] attribute prefix
8987 * [2] hash value of attribute prefix, and later namespace index
8988 * [3] for non-allocated values: ptrdiff_t offset into input buffer
8989 * [4] for non-allocated values: ptrdiff_t offset into input buffer
8990 *
8991 * The ctxt->attallocs array contains an additional unsigned int for
8992 * each attribute, containing the hash value of the attribute name
8993 * and the alloc flag in bit 31.
8994 */
8995
8996 while (((RAW != '>') &&
8997 ((RAW != '/') || (NXT(1) != '>')) &&
8998 (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8999 int len = -1;
9000
9001 hattname = xmlParseAttribute2(ctxt, prefix, localname,
9002 &haprefix, &attvalue, &len,
9003 &alloc);
9004 if (hattname.name == NULL)
9005 break;
9006 if (attvalue == NULL)
9007 goto next_attr;
9008 attname = hattname.name;
9009 aprefix = haprefix.name;
9010 if (len < 0) len = xmlStrlen(attvalue);
9011
9012 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9013 xmlHashedString huri;
9014 xmlURIPtr parsedUri;
9015
9016 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9017 uri = huri.name;
9018 if (uri == NULL) {
9019 xmlErrMemory(ctxt);
9020 goto next_attr;
9021 }
9022 if (*uri != 0) {
9023 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9024 xmlErrMemory(ctxt);
9025 goto next_attr;
9026 }
9027 if (parsedUri == NULL) {
9028 xmlNsErr(ctxt, XML_WAR_NS_URI,
9029 "xmlns: '%s' is not a valid URI\n",
9030 uri, NULL, NULL);
9031 } else {
9032 if (parsedUri->scheme == NULL) {
9033 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9034 "xmlns: URI %s is not absolute\n",
9035 uri, NULL, NULL);
9036 }
9037 xmlFreeURI(parsedUri);
9038 }
9039 if (uri == ctxt->str_xml_ns) {
9040 if (attname != ctxt->str_xml) {
9041 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9042 "xml namespace URI cannot be the default namespace\n",
9043 NULL, NULL, NULL);
9044 }
9045 goto next_attr;
9046 }
9047 if ((len == 29) &&
9048 (xmlStrEqual(uri,
9049 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9050 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9051 "reuse of the xmlns namespace name is forbidden\n",
9052 NULL, NULL, NULL);
9053 goto next_attr;
9054 }
9055 }
9056
9057 if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9058 nbNs++;
9059 } else if (aprefix == ctxt->str_xmlns) {
9060 xmlHashedString huri;
9061 xmlURIPtr parsedUri;
9062
9063 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9064 uri = huri.name;
9065 if (uri == NULL) {
9066 xmlErrMemory(ctxt);
9067 goto next_attr;
9068 }
9069
9070 if (attname == ctxt->str_xml) {
9071 if (uri != ctxt->str_xml_ns) {
9072 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9073 "xml namespace prefix mapped to wrong URI\n",
9074 NULL, NULL, NULL);
9075 }
9076 /*
9077 * Do not keep a namespace definition node
9078 */
9079 goto next_attr;
9080 }
9081 if (uri == ctxt->str_xml_ns) {
9082 if (attname != ctxt->str_xml) {
9083 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9084 "xml namespace URI mapped to wrong prefix\n",
9085 NULL, NULL, NULL);
9086 }
9087 goto next_attr;
9088 }
9089 if (attname == ctxt->str_xmlns) {
9090 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091 "redefinition of the xmlns prefix is forbidden\n",
9092 NULL, NULL, NULL);
9093 goto next_attr;
9094 }
9095 if ((len == 29) &&
9096 (xmlStrEqual(uri,
9097 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9098 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9099 "reuse of the xmlns namespace name is forbidden\n",
9100 NULL, NULL, NULL);
9101 goto next_attr;
9102 }
9103 if ((uri == NULL) || (uri[0] == 0)) {
9104 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9105 "xmlns:%s: Empty XML namespace is not allowed\n",
9106 attname, NULL, NULL);
9107 goto next_attr;
9108 } else {
9109 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9110 xmlErrMemory(ctxt);
9111 goto next_attr;
9112 }
9113 if (parsedUri == NULL) {
9114 xmlNsErr(ctxt, XML_WAR_NS_URI,
9115 "xmlns:%s: '%s' is not a valid URI\n",
9116 attname, uri, NULL);
9117 } else {
9118 if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9119 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9120 "xmlns:%s: URI %s is not absolute\n",
9121 attname, uri, NULL);
9122 }
9123 xmlFreeURI(parsedUri);
9124 }
9125 }
9126
9127 if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9128 nbNs++;
9129 } else {
9130 /*
9131 * Populate attributes array, see above for repurposing
9132 * of xmlChar pointers.
9133 */
9134 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9135 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9136 goto next_attr;
9137 }
9138 maxatts = ctxt->maxatts;
9139 atts = ctxt->atts;
9140 }
9141 ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9142 ((unsigned) alloc << 31);
9143 atts[nbatts++] = attname;
9144 atts[nbatts++] = aprefix;
9145 atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9146 if (alloc) {
9147 atts[nbatts++] = attvalue;
9148 attvalue += len;
9149 atts[nbatts++] = attvalue;
9150 } else {
9151 /*
9152 * attvalue points into the input buffer which can be
9153 * reallocated. Store differences to input->base instead.
9154 * The pointers will be reconstructed later.
9155 */
9156 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9157 attvalue += len;
9158 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9159 }
9160 /*
9161 * tag if some deallocation is needed
9162 */
9163 if (alloc != 0) attval = 1;
9164 attvalue = NULL; /* moved into atts */
9165 }
9166
9167 next_attr:
9168 if ((attvalue != NULL) && (alloc != 0)) {
9169 xmlFree(attvalue);
9170 attvalue = NULL;
9171 }
9172
9173 GROW
9174 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9175 break;
9176 if (SKIP_BLANKS == 0) {
9177 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9178 "attributes construct error\n");
9179 break;
9180 }
9181 GROW;
9182 }
9183
9184 /*
9185 * Namespaces from default attributes
9186 */
9187 if (ctxt->attsDefault != NULL) {
9188 xmlDefAttrsPtr defaults;
9189
9190 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9191 if (defaults != NULL) {
9192 for (i = 0; i < defaults->nbAttrs; i++) {
9193 xmlDefAttr *attr = &defaults->attrs[i];
9194
9195 attname = attr->name.name;
9196 aprefix = attr->prefix.name;
9197
9198 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9199 xmlParserEntityCheck(ctxt, attr->expandedSize);
9200
9201 if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9202 nbNs++;
9203 } else if (aprefix == ctxt->str_xmlns) {
9204 xmlParserEntityCheck(ctxt, attr->expandedSize);
9205
9206 if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9207 NULL, 1) > 0)
9208 nbNs++;
9209 } else {
9210 nbTotalDef += 1;
9211 }
9212 }
9213 }
9214 }
9215
9216 /*
9217 * Resolve attribute namespaces
9218 */
9219 for (i = 0; i < nbatts; i += 5) {
9220 attname = atts[i];
9221 aprefix = atts[i+1];
9222
9223 /*
9224 * The default namespace does not apply to attribute names.
9225 */
9226 if (aprefix == NULL) {
9227 nsIndex = NS_INDEX_EMPTY;
9228 } else if (aprefix == ctxt->str_xml) {
9229 nsIndex = NS_INDEX_XML;
9230 } else {
9231 haprefix.name = aprefix;
9232 haprefix.hashValue = (size_t) atts[i+2];
9233 nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9234
9235 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9236 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9237 "Namespace prefix %s for %s on %s is not defined\n",
9238 aprefix, attname, localname);
9239 nsIndex = NS_INDEX_EMPTY;
9240 }
9241 }
9242
9243 atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9244 }
9245
9246 /*
9247 * Maximum number of attributes including default attributes.
9248 */
9249 maxAtts = nratts + nbTotalDef;
9250
9251 /*
9252 * Verify that attribute names are unique.
9253 */
9254 if (maxAtts > 1) {
9255 attrHashSize = 4;
9256 while (attrHashSize / 2 < (unsigned) maxAtts)
9257 attrHashSize *= 2;
9258
9259 if (attrHashSize > ctxt->attrHashMax) {
9260 xmlAttrHashBucket *tmp;
9261
9262 tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9263 if (tmp == NULL) {
9264 xmlErrMemory(ctxt);
9265 goto done;
9266 }
9267
9268 ctxt->attrHash = tmp;
9269 ctxt->attrHashMax = attrHashSize;
9270 }
9271
9272 memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9273
9274 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9275 const xmlChar *nsuri;
9276 unsigned hashValue, nameHashValue, uriHashValue;
9277 int res;
9278
9279 attname = atts[i];
9280 aprefix = atts[i+1];
9281 nsIndex = (ptrdiff_t) atts[i+2];
9282 /* Hash values always have bit 31 set, see dict.c */
9283 nameHashValue = ctxt->attallocs[j] | 0x80000000;
9284
9285 if (nsIndex == NS_INDEX_EMPTY) {
9286 /*
9287 * Prefix with empty namespace means an undeclared
9288 * prefix which was already reported above.
9289 */
9290 if (aprefix != NULL)
9291 continue;
9292 nsuri = NULL;
9293 uriHashValue = URI_HASH_EMPTY;
9294 } else if (nsIndex == NS_INDEX_XML) {
9295 nsuri = ctxt->str_xml_ns;
9296 uriHashValue = URI_HASH_XML;
9297 } else {
9298 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9299 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9300 }
9301
9302 hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9303 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9304 hashValue, i);
9305 if (res < 0)
9306 continue;
9307
9308 /*
9309 * [ WFC: Unique Att Spec ]
9310 * No attribute name may appear more than once in the same
9311 * start-tag or empty-element tag.
9312 * As extended by the Namespace in XML REC.
9313 */
9314 if (res < INT_MAX) {
9315 if (aprefix == atts[res+1]) {
9316 xmlErrAttributeDup(ctxt, aprefix, attname);
9317 } else {
9318 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9319 "Namespaced Attribute %s in '%s' redefined\n",
9320 attname, nsuri, NULL);
9321 }
9322 }
9323 }
9324 }
9325
9326 /*
9327 * Default attributes
9328 */
9329 if (ctxt->attsDefault != NULL) {
9330 xmlDefAttrsPtr defaults;
9331
9332 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9333 if (defaults != NULL) {
9334 for (i = 0; i < defaults->nbAttrs; i++) {
9335 xmlDefAttr *attr = &defaults->attrs[i];
9336 const xmlChar *nsuri;
9337 unsigned hashValue, uriHashValue;
9338 int res;
9339
9340 attname = attr->name.name;
9341 aprefix = attr->prefix.name;
9342
9343 if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9344 continue;
9345 if (aprefix == ctxt->str_xmlns)
9346 continue;
9347
9348 if (aprefix == NULL) {
9349 nsIndex = NS_INDEX_EMPTY;
9350 nsuri = NULL;
9351 uriHashValue = URI_HASH_EMPTY;
9352 } if (aprefix == ctxt->str_xml) {
9353 nsIndex = NS_INDEX_XML;
9354 nsuri = ctxt->str_xml_ns;
9355 uriHashValue = URI_HASH_XML;
9356 } else if (aprefix != NULL) {
9357 nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9358 if ((nsIndex == INT_MAX) ||
9359 (nsIndex < ctxt->nsdb->minNsIndex)) {
9360 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9361 "Namespace prefix %s for %s on %s is not "
9362 "defined\n",
9363 aprefix, attname, localname);
9364 nsIndex = NS_INDEX_EMPTY;
9365 nsuri = NULL;
9366 uriHashValue = URI_HASH_EMPTY;
9367 } else {
9368 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9369 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9370 }
9371 }
9372
9373 /*
9374 * Check whether the attribute exists
9375 */
9376 if (maxAtts > 1) {
9377 hashValue = xmlDictCombineHash(attr->name.hashValue,
9378 uriHashValue);
9379 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9380 hashValue, nbatts);
9381 if (res < 0)
9382 continue;
9383 if (res < INT_MAX) {
9384 if (aprefix == atts[res+1])
9385 continue;
9386 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9387 "Namespaced Attribute %s in '%s' redefined\n",
9388 attname, nsuri, NULL);
9389 }
9390 }
9391
9392 xmlParserEntityCheck(ctxt, attr->expandedSize);
9393
9394 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9395 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9396 localname = NULL;
9397 goto done;
9398 }
9399 maxatts = ctxt->maxatts;
9400 atts = ctxt->atts;
9401 }
9402
9403 atts[nbatts++] = attname;
9404 atts[nbatts++] = aprefix;
9405 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9406 atts[nbatts++] = attr->value.name;
9407 atts[nbatts++] = attr->valueEnd;
9408 if ((ctxt->standalone == 1) && (attr->external != 0)) {
9409 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9410 "standalone: attribute %s on %s defaulted "
9411 "from external subset\n",
9412 attname, localname);
9413 }
9414 nbdef++;
9415 }
9416 }
9417 }
9418
9419 /*
9420 * Reconstruct attribute pointers
9421 */
9422 for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9423 /* namespace URI */
9424 nsIndex = (ptrdiff_t) atts[i+2];
9425 if (nsIndex == INT_MAX)
9426 atts[i+2] = NULL;
9427 else if (nsIndex == INT_MAX - 1)
9428 atts[i+2] = ctxt->str_xml_ns;
9429 else
9430 atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9431
9432 if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9433 atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */
9434 atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */
9435 }
9436 }
9437
9438 uri = xmlParserNsLookupUri(ctxt, &hprefix);
9439 if ((prefix != NULL) && (uri == NULL)) {
9440 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9441 "Namespace prefix %s on %s is not defined\n",
9442 prefix, localname, NULL);
9443 }
9444 *pref = prefix;
9445 *URI = uri;
9446
9447 /*
9448 * SAX callback
9449 */
9450 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9451 (!ctxt->disableSAX)) {
9452 if (nbNs > 0)
9453 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9454 nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9455 nbatts / 5, nbdef, atts);
9456 else
9457 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9458 0, NULL, nbatts / 5, nbdef, atts);
9459 }
9460
9461 done:
9462 /*
9463 * Free allocated attribute values
9464 */
9465 if (attval != 0) {
9466 for (i = 0, j = 0; j < nratts; i += 5, j++)
9467 if (ctxt->attallocs[j] & 0x80000000)
9468 xmlFree((xmlChar *) atts[i+3]);
9469 }
9470
9471 *nbNsPtr = nbNs;
9472 return(localname);
9473 }
9474
9475 /**
9476 * xmlParseEndTag2:
9477 * @ctxt: an XML parser context
9478 * @line: line of the start tag
9479 * @nsNr: number of namespaces on the start tag
9480 *
9481 * Parse an end tag. Always consumes '</'.
9482 *
9483 * [42] ETag ::= '</' Name S? '>'
9484 *
9485 * With namespace
9486 *
9487 * [NS 9] ETag ::= '</' QName S? '>'
9488 */
9489
9490 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9491 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9492 const xmlChar *name;
9493
9494 GROW;
9495 if ((RAW != '<') || (NXT(1) != '/')) {
9496 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9497 return;
9498 }
9499 SKIP(2);
9500
9501 if (tag->prefix == NULL)
9502 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9503 else
9504 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9505
9506 /*
9507 * We should definitely be at the ending "S? '>'" part
9508 */
9509 GROW;
9510 SKIP_BLANKS;
9511 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9512 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9513 } else
9514 NEXT1;
9515
9516 /*
9517 * [ WFC: Element Type Match ]
9518 * The Name in an element's end-tag must match the element type in the
9519 * start-tag.
9520 *
9521 */
9522 if (name != (xmlChar*)1) {
9523 if (name == NULL) name = BAD_CAST "unparsable";
9524 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9525 "Opening and ending tag mismatch: %s line %d and %s\n",
9526 ctxt->name, tag->line, name);
9527 }
9528
9529 /*
9530 * SAX: End of Tag
9531 */
9532 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9533 (!ctxt->disableSAX))
9534 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9535 tag->URI);
9536
9537 spacePop(ctxt);
9538 if (tag->nsNr != 0)
9539 xmlParserNsPop(ctxt, tag->nsNr);
9540 }
9541
9542 /**
9543 * xmlParseCDSect:
9544 * @ctxt: an XML parser context
9545 *
9546 * DEPRECATED: Internal function, don't use.
9547 *
9548 * Parse escaped pure raw content. Always consumes '<!['.
9549 *
9550 * [18] CDSect ::= CDStart CData CDEnd
9551 *
9552 * [19] CDStart ::= '<![CDATA['
9553 *
9554 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9555 *
9556 * [21] CDEnd ::= ']]>'
9557 */
9558 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9559 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9560 xmlChar *buf = NULL;
9561 int len = 0;
9562 int size = XML_PARSER_BUFFER_SIZE;
9563 int r, rl;
9564 int s, sl;
9565 int cur, l;
9566 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9567 XML_MAX_HUGE_LENGTH :
9568 XML_MAX_TEXT_LENGTH;
9569
9570 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9571 return;
9572 SKIP(3);
9573
9574 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9575 return;
9576 SKIP(6);
9577
9578 r = CUR_CHAR(rl);
9579 if (!IS_CHAR(r)) {
9580 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9581 goto out;
9582 }
9583 NEXTL(rl);
9584 s = CUR_CHAR(sl);
9585 if (!IS_CHAR(s)) {
9586 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9587 goto out;
9588 }
9589 NEXTL(sl);
9590 cur = CUR_CHAR(l);
9591 buf = (xmlChar *) xmlMallocAtomic(size);
9592 if (buf == NULL) {
9593 xmlErrMemory(ctxt);
9594 goto out;
9595 }
9596 while (IS_CHAR(cur) &&
9597 ((r != ']') || (s != ']') || (cur != '>'))) {
9598 if (len + 5 >= size) {
9599 xmlChar *tmp;
9600
9601 tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9602 if (tmp == NULL) {
9603 xmlErrMemory(ctxt);
9604 goto out;
9605 }
9606 buf = tmp;
9607 size *= 2;
9608 }
9609 COPY_BUF(buf, len, r);
9610 if (len > maxLength) {
9611 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9612 "CData section too big found\n");
9613 goto out;
9614 }
9615 r = s;
9616 rl = sl;
9617 s = cur;
9618 sl = l;
9619 NEXTL(l);
9620 cur = CUR_CHAR(l);
9621 }
9622 buf[len] = 0;
9623 if (cur != '>') {
9624 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9625 "CData section not finished\n%.50s\n", buf);
9626 goto out;
9627 }
9628 NEXTL(l);
9629
9630 /*
9631 * OK the buffer is to be consumed as cdata.
9632 */
9633 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9634 if (ctxt->sax->cdataBlock != NULL)
9635 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9636 else if (ctxt->sax->characters != NULL)
9637 ctxt->sax->characters(ctxt->userData, buf, len);
9638 }
9639
9640 out:
9641 xmlFree(buf);
9642 }
9643
9644 /**
9645 * xmlParseContentInternal:
9646 * @ctxt: an XML parser context
9647 *
9648 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9649 * unexpected EOF to the caller.
9650 */
9651
9652 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9653 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9654 int oldNameNr = ctxt->nameNr;
9655 int oldSpaceNr = ctxt->spaceNr;
9656 int oldNodeNr = ctxt->nodeNr;
9657
9658 GROW;
9659 while ((ctxt->input->cur < ctxt->input->end) &&
9660 (PARSER_STOPPED(ctxt) == 0)) {
9661 const xmlChar *cur = ctxt->input->cur;
9662
9663 /*
9664 * First case : a Processing Instruction.
9665 */
9666 if ((*cur == '<') && (cur[1] == '?')) {
9667 xmlParsePI(ctxt);
9668 }
9669
9670 /*
9671 * Second case : a CDSection
9672 */
9673 /* 2.6.0 test was *cur not RAW */
9674 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9675 xmlParseCDSect(ctxt);
9676 }
9677
9678 /*
9679 * Third case : a comment
9680 */
9681 else if ((*cur == '<') && (NXT(1) == '!') &&
9682 (NXT(2) == '-') && (NXT(3) == '-')) {
9683 xmlParseComment(ctxt);
9684 }
9685
9686 /*
9687 * Fourth case : a sub-element.
9688 */
9689 else if (*cur == '<') {
9690 if (NXT(1) == '/') {
9691 if (ctxt->nameNr <= oldNameNr)
9692 break;
9693 xmlParseElementEnd(ctxt);
9694 } else {
9695 xmlParseElementStart(ctxt);
9696 }
9697 }
9698
9699 /*
9700 * Fifth case : a reference. If if has not been resolved,
9701 * parsing returns it's Name, create the node
9702 */
9703
9704 else if (*cur == '&') {
9705 xmlParseReference(ctxt);
9706 }
9707
9708 /*
9709 * Last case, text. Note that References are handled directly.
9710 */
9711 else {
9712 xmlParseCharDataInternal(ctxt, 0);
9713 }
9714
9715 SHRINK;
9716 GROW;
9717 }
9718
9719 if ((ctxt->nameNr > oldNameNr) &&
9720 (ctxt->input->cur >= ctxt->input->end) &&
9721 (ctxt->wellFormed)) {
9722 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9723 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9724 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9725 "Premature end of data in tag %s line %d\n",
9726 name, line, NULL);
9727 }
9728
9729 /*
9730 * Clean up in error case
9731 */
9732
9733 while (ctxt->nodeNr > oldNodeNr)
9734 nodePop(ctxt);
9735
9736 while (ctxt->nameNr > oldNameNr) {
9737 xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9738
9739 if (tag->nsNr != 0)
9740 xmlParserNsPop(ctxt, tag->nsNr);
9741
9742 namePop(ctxt);
9743 }
9744
9745 while (ctxt->spaceNr > oldSpaceNr)
9746 spacePop(ctxt);
9747 }
9748
9749 /**
9750 * xmlParseContent:
9751 * @ctxt: an XML parser context
9752 *
9753 * Parse XML element content. This is useful if you're only interested
9754 * in custom SAX callbacks. If you want a node list, use
9755 * xmlParseInNodeContext.
9756 */
9757 void
xmlParseContent(xmlParserCtxtPtr ctxt)9758 xmlParseContent(xmlParserCtxtPtr ctxt) {
9759 if ((ctxt == NULL) || (ctxt->input == NULL))
9760 return;
9761
9762 xmlCtxtInitializeLate(ctxt);
9763
9764 xmlParseContentInternal(ctxt);
9765
9766 if (ctxt->input->cur < ctxt->input->end)
9767 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9768 }
9769
9770 /**
9771 * xmlParseElement:
9772 * @ctxt: an XML parser context
9773 *
9774 * DEPRECATED: Internal function, don't use.
9775 *
9776 * parse an XML element
9777 *
9778 * [39] element ::= EmptyElemTag | STag content ETag
9779 *
9780 * [ WFC: Element Type Match ]
9781 * The Name in an element's end-tag must match the element type in the
9782 * start-tag.
9783 *
9784 */
9785
9786 void
xmlParseElement(xmlParserCtxtPtr ctxt)9787 xmlParseElement(xmlParserCtxtPtr ctxt) {
9788 if (xmlParseElementStart(ctxt) != 0)
9789 return;
9790
9791 xmlParseContentInternal(ctxt);
9792
9793 if (ctxt->input->cur >= ctxt->input->end) {
9794 if (ctxt->wellFormed) {
9795 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9796 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9797 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9798 "Premature end of data in tag %s line %d\n",
9799 name, line, NULL);
9800 }
9801 return;
9802 }
9803
9804 xmlParseElementEnd(ctxt);
9805 }
9806
9807 /**
9808 * xmlParseElementStart:
9809 * @ctxt: an XML parser context
9810 *
9811 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9812 * opening tag was parsed, 1 if an empty element was parsed.
9813 *
9814 * Always consumes '<'.
9815 */
9816 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)9817 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9818 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9819 const xmlChar *name;
9820 const xmlChar *prefix = NULL;
9821 const xmlChar *URI = NULL;
9822 xmlParserNodeInfo node_info;
9823 int line;
9824 xmlNodePtr cur;
9825 int nbNs = 0;
9826
9827 if (ctxt->nameNr > maxDepth) {
9828 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9829 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9830 ctxt->nameNr);
9831 xmlHaltParser(ctxt);
9832 return(-1);
9833 }
9834
9835 /* Capture start position */
9836 if (ctxt->record_info) {
9837 node_info.begin_pos = ctxt->input->consumed +
9838 (CUR_PTR - ctxt->input->base);
9839 node_info.begin_line = ctxt->input->line;
9840 }
9841
9842 if (ctxt->spaceNr == 0)
9843 spacePush(ctxt, -1);
9844 else if (*ctxt->space == -2)
9845 spacePush(ctxt, -1);
9846 else
9847 spacePush(ctxt, *ctxt->space);
9848
9849 line = ctxt->input->line;
9850 #ifdef LIBXML_SAX1_ENABLED
9851 if (ctxt->sax2)
9852 #endif /* LIBXML_SAX1_ENABLED */
9853 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9854 #ifdef LIBXML_SAX1_ENABLED
9855 else
9856 name = xmlParseStartTag(ctxt);
9857 #endif /* LIBXML_SAX1_ENABLED */
9858 if (name == NULL) {
9859 spacePop(ctxt);
9860 return(-1);
9861 }
9862 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9863 cur = ctxt->node;
9864
9865 #ifdef LIBXML_VALID_ENABLED
9866 /*
9867 * [ VC: Root Element Type ]
9868 * The Name in the document type declaration must match the element
9869 * type of the root element.
9870 */
9871 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9872 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9873 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9874 #endif /* LIBXML_VALID_ENABLED */
9875
9876 /*
9877 * Check for an Empty Element.
9878 */
9879 if ((RAW == '/') && (NXT(1) == '>')) {
9880 SKIP(2);
9881 if (ctxt->sax2) {
9882 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9883 (!ctxt->disableSAX))
9884 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9885 #ifdef LIBXML_SAX1_ENABLED
9886 } else {
9887 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9888 (!ctxt->disableSAX))
9889 ctxt->sax->endElement(ctxt->userData, name);
9890 #endif /* LIBXML_SAX1_ENABLED */
9891 }
9892 namePop(ctxt);
9893 spacePop(ctxt);
9894 if (nbNs > 0)
9895 xmlParserNsPop(ctxt, nbNs);
9896 if (cur != NULL && ctxt->record_info) {
9897 node_info.node = cur;
9898 node_info.end_pos = ctxt->input->consumed +
9899 (CUR_PTR - ctxt->input->base);
9900 node_info.end_line = ctxt->input->line;
9901 xmlParserAddNodeInfo(ctxt, &node_info);
9902 }
9903 return(1);
9904 }
9905 if (RAW == '>') {
9906 NEXT1;
9907 if (cur != NULL && ctxt->record_info) {
9908 node_info.node = cur;
9909 node_info.end_pos = 0;
9910 node_info.end_line = 0;
9911 xmlParserAddNodeInfo(ctxt, &node_info);
9912 }
9913 } else {
9914 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9915 "Couldn't find end of Start Tag %s line %d\n",
9916 name, line, NULL);
9917
9918 /*
9919 * end of parsing of this node.
9920 */
9921 nodePop(ctxt);
9922 namePop(ctxt);
9923 spacePop(ctxt);
9924 if (nbNs > 0)
9925 xmlParserNsPop(ctxt, nbNs);
9926 return(-1);
9927 }
9928
9929 return(0);
9930 }
9931
9932 /**
9933 * xmlParseElementEnd:
9934 * @ctxt: an XML parser context
9935 *
9936 * Parse the end of an XML element. Always consumes '</'.
9937 */
9938 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)9939 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9940 xmlNodePtr cur = ctxt->node;
9941
9942 if (ctxt->nameNr <= 0) {
9943 if ((RAW == '<') && (NXT(1) == '/'))
9944 SKIP(2);
9945 return;
9946 }
9947
9948 /*
9949 * parse the end of tag: '</' should be here.
9950 */
9951 if (ctxt->sax2) {
9952 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9953 namePop(ctxt);
9954 }
9955 #ifdef LIBXML_SAX1_ENABLED
9956 else
9957 xmlParseEndTag1(ctxt, 0);
9958 #endif /* LIBXML_SAX1_ENABLED */
9959
9960 /*
9961 * Capture end position
9962 */
9963 if (cur != NULL && ctxt->record_info) {
9964 xmlParserNodeInfoPtr node_info;
9965
9966 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9967 if (node_info != NULL) {
9968 node_info->end_pos = ctxt->input->consumed +
9969 (CUR_PTR - ctxt->input->base);
9970 node_info->end_line = ctxt->input->line;
9971 }
9972 }
9973 }
9974
9975 /**
9976 * xmlParseVersionNum:
9977 * @ctxt: an XML parser context
9978 *
9979 * DEPRECATED: Internal function, don't use.
9980 *
9981 * parse the XML version value.
9982 *
9983 * [26] VersionNum ::= '1.' [0-9]+
9984 *
9985 * In practice allow [0-9].[0-9]+ at that level
9986 *
9987 * Returns the string giving the XML version number, or NULL
9988 */
9989 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9990 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9991 xmlChar *buf = NULL;
9992 int len = 0;
9993 int size = 10;
9994 xmlChar cur;
9995
9996 buf = (xmlChar *) xmlMallocAtomic(size);
9997 if (buf == NULL) {
9998 xmlErrMemory(ctxt);
9999 return(NULL);
10000 }
10001 cur = CUR;
10002 if (!((cur >= '0') && (cur <= '9'))) {
10003 xmlFree(buf);
10004 return(NULL);
10005 }
10006 buf[len++] = cur;
10007 NEXT;
10008 cur=CUR;
10009 if (cur != '.') {
10010 xmlFree(buf);
10011 return(NULL);
10012 }
10013 buf[len++] = cur;
10014 NEXT;
10015 cur=CUR;
10016 while ((cur >= '0') && (cur <= '9')) {
10017 if (len + 1 >= size) {
10018 xmlChar *tmp;
10019
10020 size *= 2;
10021 tmp = (xmlChar *) xmlRealloc(buf, size);
10022 if (tmp == NULL) {
10023 xmlFree(buf);
10024 xmlErrMemory(ctxt);
10025 return(NULL);
10026 }
10027 buf = tmp;
10028 }
10029 buf[len++] = cur;
10030 NEXT;
10031 cur=CUR;
10032 }
10033 buf[len] = 0;
10034 return(buf);
10035 }
10036
10037 /**
10038 * xmlParseVersionInfo:
10039 * @ctxt: an XML parser context
10040 *
10041 * DEPRECATED: Internal function, don't use.
10042 *
10043 * parse the XML version.
10044 *
10045 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10046 *
10047 * [25] Eq ::= S? '=' S?
10048 *
10049 * Returns the version string, e.g. "1.0"
10050 */
10051
10052 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10053 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10054 xmlChar *version = NULL;
10055
10056 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10057 SKIP(7);
10058 SKIP_BLANKS;
10059 if (RAW != '=') {
10060 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10061 return(NULL);
10062 }
10063 NEXT;
10064 SKIP_BLANKS;
10065 if (RAW == '"') {
10066 NEXT;
10067 version = xmlParseVersionNum(ctxt);
10068 if (RAW != '"') {
10069 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10070 } else
10071 NEXT;
10072 } else if (RAW == '\''){
10073 NEXT;
10074 version = xmlParseVersionNum(ctxt);
10075 if (RAW != '\'') {
10076 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10077 } else
10078 NEXT;
10079 } else {
10080 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10081 }
10082 }
10083 return(version);
10084 }
10085
10086 /**
10087 * xmlParseEncName:
10088 * @ctxt: an XML parser context
10089 *
10090 * DEPRECATED: Internal function, don't use.
10091 *
10092 * parse the XML encoding name
10093 *
10094 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10095 *
10096 * Returns the encoding name value or NULL
10097 */
10098 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10099 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10100 xmlChar *buf = NULL;
10101 int len = 0;
10102 int size = 10;
10103 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10104 XML_MAX_TEXT_LENGTH :
10105 XML_MAX_NAME_LENGTH;
10106 xmlChar cur;
10107
10108 cur = CUR;
10109 if (((cur >= 'a') && (cur <= 'z')) ||
10110 ((cur >= 'A') && (cur <= 'Z'))) {
10111 buf = (xmlChar *) xmlMallocAtomic(size);
10112 if (buf == NULL) {
10113 xmlErrMemory(ctxt);
10114 return(NULL);
10115 }
10116
10117 buf[len++] = cur;
10118 NEXT;
10119 cur = CUR;
10120 while (((cur >= 'a') && (cur <= 'z')) ||
10121 ((cur >= 'A') && (cur <= 'Z')) ||
10122 ((cur >= '0') && (cur <= '9')) ||
10123 (cur == '.') || (cur == '_') ||
10124 (cur == '-')) {
10125 if (len + 1 >= size) {
10126 xmlChar *tmp;
10127
10128 size *= 2;
10129 tmp = (xmlChar *) xmlRealloc(buf, size);
10130 if (tmp == NULL) {
10131 xmlErrMemory(ctxt);
10132 xmlFree(buf);
10133 return(NULL);
10134 }
10135 buf = tmp;
10136 }
10137 buf[len++] = cur;
10138 if (len > maxLength) {
10139 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10140 xmlFree(buf);
10141 return(NULL);
10142 }
10143 NEXT;
10144 cur = CUR;
10145 }
10146 buf[len] = 0;
10147 } else {
10148 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10149 }
10150 return(buf);
10151 }
10152
10153 /**
10154 * xmlParseEncodingDecl:
10155 * @ctxt: an XML parser context
10156 *
10157 * DEPRECATED: Internal function, don't use.
10158 *
10159 * parse the XML encoding declaration
10160 *
10161 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10162 *
10163 * this setups the conversion filters.
10164 *
10165 * Returns the encoding value or NULL
10166 */
10167
10168 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10169 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10170 xmlChar *encoding = NULL;
10171
10172 SKIP_BLANKS;
10173 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10174 return(NULL);
10175
10176 SKIP(8);
10177 SKIP_BLANKS;
10178 if (RAW != '=') {
10179 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10180 return(NULL);
10181 }
10182 NEXT;
10183 SKIP_BLANKS;
10184 if (RAW == '"') {
10185 NEXT;
10186 encoding = xmlParseEncName(ctxt);
10187 if (RAW != '"') {
10188 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10189 xmlFree((xmlChar *) encoding);
10190 return(NULL);
10191 } else
10192 NEXT;
10193 } else if (RAW == '\''){
10194 NEXT;
10195 encoding = xmlParseEncName(ctxt);
10196 if (RAW != '\'') {
10197 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198 xmlFree((xmlChar *) encoding);
10199 return(NULL);
10200 } else
10201 NEXT;
10202 } else {
10203 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10204 }
10205
10206 if (encoding == NULL)
10207 return(NULL);
10208
10209 xmlSetDeclaredEncoding(ctxt, encoding);
10210
10211 return(ctxt->encoding);
10212 }
10213
10214 /**
10215 * xmlParseSDDecl:
10216 * @ctxt: an XML parser context
10217 *
10218 * DEPRECATED: Internal function, don't use.
10219 *
10220 * parse the XML standalone declaration
10221 *
10222 * [32] SDDecl ::= S 'standalone' Eq
10223 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10224 *
10225 * [ VC: Standalone Document Declaration ]
10226 * TODO The standalone document declaration must have the value "no"
10227 * if any external markup declarations contain declarations of:
10228 * - attributes with default values, if elements to which these
10229 * attributes apply appear in the document without specifications
10230 * of values for these attributes, or
10231 * - entities (other than amp, lt, gt, apos, quot), if references
10232 * to those entities appear in the document, or
10233 * - attributes with values subject to normalization, where the
10234 * attribute appears in the document with a value which will change
10235 * as a result of normalization, or
10236 * - element types with element content, if white space occurs directly
10237 * within any instance of those types.
10238 *
10239 * Returns:
10240 * 1 if standalone="yes"
10241 * 0 if standalone="no"
10242 * -2 if standalone attribute is missing or invalid
10243 * (A standalone value of -2 means that the XML declaration was found,
10244 * but no value was specified for the standalone attribute).
10245 */
10246
10247 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10248 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10249 int standalone = -2;
10250
10251 SKIP_BLANKS;
10252 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10253 SKIP(10);
10254 SKIP_BLANKS;
10255 if (RAW != '=') {
10256 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10257 return(standalone);
10258 }
10259 NEXT;
10260 SKIP_BLANKS;
10261 if (RAW == '\''){
10262 NEXT;
10263 if ((RAW == 'n') && (NXT(1) == 'o')) {
10264 standalone = 0;
10265 SKIP(2);
10266 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10267 (NXT(2) == 's')) {
10268 standalone = 1;
10269 SKIP(3);
10270 } else {
10271 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10272 }
10273 if (RAW != '\'') {
10274 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10275 } else
10276 NEXT;
10277 } else if (RAW == '"'){
10278 NEXT;
10279 if ((RAW == 'n') && (NXT(1) == 'o')) {
10280 standalone = 0;
10281 SKIP(2);
10282 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10283 (NXT(2) == 's')) {
10284 standalone = 1;
10285 SKIP(3);
10286 } else {
10287 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10288 }
10289 if (RAW != '"') {
10290 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10291 } else
10292 NEXT;
10293 } else {
10294 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10295 }
10296 }
10297 return(standalone);
10298 }
10299
10300 /**
10301 * xmlParseXMLDecl:
10302 * @ctxt: an XML parser context
10303 *
10304 * DEPRECATED: Internal function, don't use.
10305 *
10306 * parse an XML declaration header
10307 *
10308 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10309 */
10310
10311 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10312 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10313 xmlChar *version;
10314
10315 /*
10316 * This value for standalone indicates that the document has an
10317 * XML declaration but it does not have a standalone attribute.
10318 * It will be overwritten later if a standalone attribute is found.
10319 */
10320
10321 ctxt->standalone = -2;
10322
10323 /*
10324 * We know that '<?xml' is here.
10325 */
10326 SKIP(5);
10327
10328 if (!IS_BLANK_CH(RAW)) {
10329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10330 "Blank needed after '<?xml'\n");
10331 }
10332 SKIP_BLANKS;
10333
10334 /*
10335 * We must have the VersionInfo here.
10336 */
10337 version = xmlParseVersionInfo(ctxt);
10338 if (version == NULL) {
10339 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10340 } else {
10341 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10342 /*
10343 * Changed here for XML-1.0 5th edition
10344 */
10345 if (ctxt->options & XML_PARSE_OLD10) {
10346 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10347 "Unsupported version '%s'\n",
10348 version);
10349 } else {
10350 if ((version[0] == '1') && ((version[1] == '.'))) {
10351 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10352 "Unsupported version '%s'\n",
10353 version, NULL);
10354 } else {
10355 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10356 "Unsupported version '%s'\n",
10357 version);
10358 }
10359 }
10360 }
10361 if (ctxt->version != NULL)
10362 xmlFree((void *) ctxt->version);
10363 ctxt->version = version;
10364 }
10365
10366 /*
10367 * We may have the encoding declaration
10368 */
10369 if (!IS_BLANK_CH(RAW)) {
10370 if ((RAW == '?') && (NXT(1) == '>')) {
10371 SKIP(2);
10372 return;
10373 }
10374 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10375 }
10376 xmlParseEncodingDecl(ctxt);
10377
10378 /*
10379 * We may have the standalone status.
10380 */
10381 if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10382 if ((RAW == '?') && (NXT(1) == '>')) {
10383 SKIP(2);
10384 return;
10385 }
10386 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10387 }
10388
10389 /*
10390 * We can grow the input buffer freely at that point
10391 */
10392 GROW;
10393
10394 SKIP_BLANKS;
10395 ctxt->standalone = xmlParseSDDecl(ctxt);
10396
10397 SKIP_BLANKS;
10398 if ((RAW == '?') && (NXT(1) == '>')) {
10399 SKIP(2);
10400 } else if (RAW == '>') {
10401 /* Deprecated old WD ... */
10402 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10403 NEXT;
10404 } else {
10405 int c;
10406
10407 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10408 while ((PARSER_STOPPED(ctxt) == 0) &&
10409 ((c = CUR) != 0)) {
10410 NEXT;
10411 if (c == '>')
10412 break;
10413 }
10414 }
10415 }
10416
10417 /**
10418 * xmlParseMisc:
10419 * @ctxt: an XML parser context
10420 *
10421 * DEPRECATED: Internal function, don't use.
10422 *
10423 * parse an XML Misc* optional field.
10424 *
10425 * [27] Misc ::= Comment | PI | S
10426 */
10427
10428 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10429 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10430 while (PARSER_STOPPED(ctxt) == 0) {
10431 SKIP_BLANKS;
10432 GROW;
10433 if ((RAW == '<') && (NXT(1) == '?')) {
10434 xmlParsePI(ctxt);
10435 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10436 xmlParseComment(ctxt);
10437 } else {
10438 break;
10439 }
10440 }
10441 }
10442
10443 static void
xmlFinishDocument(xmlParserCtxtPtr ctxt)10444 xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10445 xmlDocPtr doc;
10446
10447 /*
10448 * SAX: end of the document processing.
10449 */
10450 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10451 ctxt->sax->endDocument(ctxt->userData);
10452
10453 doc = ctxt->myDoc;
10454 if (doc != NULL) {
10455 if (ctxt->wellFormed) {
10456 doc->properties |= XML_DOC_WELLFORMED;
10457 if (ctxt->valid)
10458 doc->properties |= XML_DOC_DTDVALID;
10459 if (ctxt->nsWellFormed)
10460 doc->properties |= XML_DOC_NSVALID;
10461 }
10462
10463 if (ctxt->options & XML_PARSE_OLD10)
10464 doc->properties |= XML_DOC_OLD10;
10465
10466 /*
10467 * Remove locally kept entity definitions if the tree was not built
10468 */
10469 if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10470 xmlFreeDoc(doc);
10471 ctxt->myDoc = NULL;
10472 }
10473 }
10474 }
10475
10476 /**
10477 * xmlParseDocument:
10478 * @ctxt: an XML parser context
10479 *
10480 * Parse an XML document and invoke the SAX handlers. This is useful
10481 * if you're only interested in custom SAX callbacks. If you want a
10482 * document tree, use xmlCtxtParseDocument.
10483 *
10484 * Returns 0, -1 in case of error.
10485 */
10486
10487 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10488 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10489 if ((ctxt == NULL) || (ctxt->input == NULL))
10490 return(-1);
10491
10492 GROW;
10493
10494 /*
10495 * SAX: detecting the level.
10496 */
10497 xmlCtxtInitializeLate(ctxt);
10498
10499 /*
10500 * Document locator is unused. Only for backward compatibility.
10501 */
10502 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10503 xmlSAXLocator copy = xmlDefaultSAXLocator;
10504 ctxt->sax->setDocumentLocator(ctxt->userData, ©);
10505 }
10506
10507 xmlDetectEncoding(ctxt);
10508
10509 if (CUR == 0) {
10510 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10511 return(-1);
10512 }
10513
10514 GROW;
10515 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10516
10517 /*
10518 * Note that we will switch encoding on the fly.
10519 */
10520 xmlParseXMLDecl(ctxt);
10521 SKIP_BLANKS;
10522 } else {
10523 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10524 if (ctxt->version == NULL) {
10525 xmlErrMemory(ctxt);
10526 return(-1);
10527 }
10528 }
10529 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10530 ctxt->sax->startDocument(ctxt->userData);
10531 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10532 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10533 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10534 }
10535
10536 /*
10537 * The Misc part of the Prolog
10538 */
10539 xmlParseMisc(ctxt);
10540
10541 /*
10542 * Then possibly doc type declaration(s) and more Misc
10543 * (doctypedecl Misc*)?
10544 */
10545 GROW;
10546 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10547
10548 ctxt->inSubset = 1;
10549 xmlParseDocTypeDecl(ctxt);
10550 if (RAW == '[') {
10551 xmlParseInternalSubset(ctxt);
10552 }
10553
10554 /*
10555 * Create and update the external subset.
10556 */
10557 ctxt->inSubset = 2;
10558 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10559 (!ctxt->disableSAX))
10560 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10561 ctxt->extSubSystem, ctxt->extSubURI);
10562 ctxt->inSubset = 0;
10563
10564 xmlCleanSpecialAttr(ctxt);
10565
10566 xmlParseMisc(ctxt);
10567 }
10568
10569 /*
10570 * Time to start parsing the tree itself
10571 */
10572 GROW;
10573 if (RAW != '<') {
10574 if (ctxt->wellFormed)
10575 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10576 "Start tag expected, '<' not found\n");
10577 } else {
10578 xmlParseElement(ctxt);
10579
10580 /*
10581 * The Misc part at the end
10582 */
10583 xmlParseMisc(ctxt);
10584
10585 if (ctxt->input->cur < ctxt->input->end) {
10586 if (ctxt->wellFormed)
10587 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10588 } else if ((ctxt->input->buf != NULL) &&
10589 (ctxt->input->buf->encoder != NULL) &&
10590 (ctxt->input->buf->error == 0) &&
10591 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10592 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10593 "Truncated multi-byte sequence at EOF\n");
10594 }
10595 }
10596
10597 ctxt->instate = XML_PARSER_EOF;
10598 xmlFinishDocument(ctxt);
10599
10600 if (! ctxt->wellFormed) {
10601 ctxt->valid = 0;
10602 return(-1);
10603 }
10604
10605 return(0);
10606 }
10607
10608 /**
10609 * xmlParseExtParsedEnt:
10610 * @ctxt: an XML parser context
10611 *
10612 * parse a general parsed entity
10613 * An external general parsed entity is well-formed if it matches the
10614 * production labeled extParsedEnt.
10615 *
10616 * [78] extParsedEnt ::= TextDecl? content
10617 *
10618 * Returns 0, -1 in case of error. the parser context is augmented
10619 * as a result of the parsing.
10620 */
10621
10622 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10623 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10624 if ((ctxt == NULL) || (ctxt->input == NULL))
10625 return(-1);
10626
10627 xmlCtxtInitializeLate(ctxt);
10628
10629 /*
10630 * Document locator is unused. Only for backward compatibility.
10631 */
10632 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10633 xmlSAXLocator copy = xmlDefaultSAXLocator;
10634 ctxt->sax->setDocumentLocator(ctxt->userData, ©);
10635 }
10636
10637 xmlDetectEncoding(ctxt);
10638
10639 if (CUR == 0) {
10640 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10641 }
10642
10643 /*
10644 * Check for the XMLDecl in the Prolog.
10645 */
10646 GROW;
10647 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10648
10649 /*
10650 * Note that we will switch encoding on the fly.
10651 */
10652 xmlParseXMLDecl(ctxt);
10653 SKIP_BLANKS;
10654 } else {
10655 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10656 }
10657 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10658 ctxt->sax->startDocument(ctxt->userData);
10659
10660 /*
10661 * Doing validity checking on chunk doesn't make sense
10662 */
10663 ctxt->options &= ~XML_PARSE_DTDVALID;
10664 ctxt->validate = 0;
10665 ctxt->depth = 0;
10666
10667 xmlParseContentInternal(ctxt);
10668
10669 if (ctxt->input->cur < ctxt->input->end)
10670 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10671
10672 /*
10673 * SAX: end of the document processing.
10674 */
10675 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10676 ctxt->sax->endDocument(ctxt->userData);
10677
10678 if (! ctxt->wellFormed) return(-1);
10679 return(0);
10680 }
10681
10682 #ifdef LIBXML_PUSH_ENABLED
10683 /************************************************************************
10684 * *
10685 * Progressive parsing interfaces *
10686 * *
10687 ************************************************************************/
10688
10689 /**
10690 * xmlParseLookupChar:
10691 * @ctxt: an XML parser context
10692 * @c: character
10693 *
10694 * Check whether the input buffer contains a character.
10695 */
10696 static int
xmlParseLookupChar(xmlParserCtxtPtr ctxt,int c)10697 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10698 const xmlChar *cur;
10699
10700 if (ctxt->checkIndex == 0) {
10701 cur = ctxt->input->cur + 1;
10702 } else {
10703 cur = ctxt->input->cur + ctxt->checkIndex;
10704 }
10705
10706 if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10707 size_t index = ctxt->input->end - ctxt->input->cur;
10708
10709 if (index > LONG_MAX) {
10710 ctxt->checkIndex = 0;
10711 return(1);
10712 }
10713 ctxt->checkIndex = index;
10714 return(0);
10715 } else {
10716 ctxt->checkIndex = 0;
10717 return(1);
10718 }
10719 }
10720
10721 /**
10722 * xmlParseLookupString:
10723 * @ctxt: an XML parser context
10724 * @startDelta: delta to apply at the start
10725 * @str: string
10726 * @strLen: length of string
10727 *
10728 * Check whether the input buffer contains a string.
10729 */
10730 static const xmlChar *
xmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen)10731 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10732 const char *str, size_t strLen) {
10733 const xmlChar *cur, *term;
10734
10735 if (ctxt->checkIndex == 0) {
10736 cur = ctxt->input->cur + startDelta;
10737 } else {
10738 cur = ctxt->input->cur + ctxt->checkIndex;
10739 }
10740
10741 term = BAD_CAST strstr((const char *) cur, str);
10742 if (term == NULL) {
10743 const xmlChar *end = ctxt->input->end;
10744 size_t index;
10745
10746 /* Rescan (strLen - 1) characters. */
10747 if ((size_t) (end - cur) < strLen)
10748 end = cur;
10749 else
10750 end -= strLen - 1;
10751 index = end - ctxt->input->cur;
10752 if (index > LONG_MAX) {
10753 ctxt->checkIndex = 0;
10754 return(ctxt->input->end - strLen);
10755 }
10756 ctxt->checkIndex = index;
10757 } else {
10758 ctxt->checkIndex = 0;
10759 }
10760
10761 return(term);
10762 }
10763
10764 /**
10765 * xmlParseLookupCharData:
10766 * @ctxt: an XML parser context
10767 *
10768 * Check whether the input buffer contains terminated char data.
10769 */
10770 static int
xmlParseLookupCharData(xmlParserCtxtPtr ctxt)10771 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10772 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10773 const xmlChar *end = ctxt->input->end;
10774 size_t index;
10775
10776 while (cur < end) {
10777 if ((*cur == '<') || (*cur == '&')) {
10778 ctxt->checkIndex = 0;
10779 return(1);
10780 }
10781 cur++;
10782 }
10783
10784 index = cur - ctxt->input->cur;
10785 if (index > LONG_MAX) {
10786 ctxt->checkIndex = 0;
10787 return(1);
10788 }
10789 ctxt->checkIndex = index;
10790 return(0);
10791 }
10792
10793 /**
10794 * xmlParseLookupGt:
10795 * @ctxt: an XML parser context
10796 *
10797 * Check whether there's enough data in the input buffer to finish parsing
10798 * a start tag. This has to take quotes into account.
10799 */
10800 static int
xmlParseLookupGt(xmlParserCtxtPtr ctxt)10801 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10802 const xmlChar *cur;
10803 const xmlChar *end = ctxt->input->end;
10804 int state = ctxt->endCheckState;
10805 size_t index;
10806
10807 if (ctxt->checkIndex == 0)
10808 cur = ctxt->input->cur + 1;
10809 else
10810 cur = ctxt->input->cur + ctxt->checkIndex;
10811
10812 while (cur < end) {
10813 if (state) {
10814 if (*cur == state)
10815 state = 0;
10816 } else if (*cur == '\'' || *cur == '"') {
10817 state = *cur;
10818 } else if (*cur == '>') {
10819 ctxt->checkIndex = 0;
10820 ctxt->endCheckState = 0;
10821 return(1);
10822 }
10823 cur++;
10824 }
10825
10826 index = cur - ctxt->input->cur;
10827 if (index > LONG_MAX) {
10828 ctxt->checkIndex = 0;
10829 ctxt->endCheckState = 0;
10830 return(1);
10831 }
10832 ctxt->checkIndex = index;
10833 ctxt->endCheckState = state;
10834 return(0);
10835 }
10836
10837 /**
10838 * xmlParseLookupInternalSubset:
10839 * @ctxt: an XML parser context
10840 *
10841 * Check whether there's enough data in the input buffer to finish parsing
10842 * the internal subset.
10843 */
10844 static int
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt)10845 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10846 /*
10847 * Sorry, but progressive parsing of the internal subset is not
10848 * supported. We first check that the full content of the internal
10849 * subset is available and parsing is launched only at that point.
10850 * Internal subset ends with "']' S? '>'" in an unescaped section and
10851 * not in a ']]>' sequence which are conditional sections.
10852 */
10853 const xmlChar *cur, *start;
10854 const xmlChar *end = ctxt->input->end;
10855 int state = ctxt->endCheckState;
10856 size_t index;
10857
10858 if (ctxt->checkIndex == 0) {
10859 cur = ctxt->input->cur + 1;
10860 } else {
10861 cur = ctxt->input->cur + ctxt->checkIndex;
10862 }
10863 start = cur;
10864
10865 while (cur < end) {
10866 if (state == '-') {
10867 if ((*cur == '-') &&
10868 (cur[1] == '-') &&
10869 (cur[2] == '>')) {
10870 state = 0;
10871 cur += 3;
10872 start = cur;
10873 continue;
10874 }
10875 }
10876 else if (state == ']') {
10877 if (*cur == '>') {
10878 ctxt->checkIndex = 0;
10879 ctxt->endCheckState = 0;
10880 return(1);
10881 }
10882 if (IS_BLANK_CH(*cur)) {
10883 state = ' ';
10884 } else if (*cur != ']') {
10885 state = 0;
10886 start = cur;
10887 continue;
10888 }
10889 }
10890 else if (state == ' ') {
10891 if (*cur == '>') {
10892 ctxt->checkIndex = 0;
10893 ctxt->endCheckState = 0;
10894 return(1);
10895 }
10896 if (!IS_BLANK_CH(*cur)) {
10897 state = 0;
10898 start = cur;
10899 continue;
10900 }
10901 }
10902 else if (state != 0) {
10903 if (*cur == state) {
10904 state = 0;
10905 start = cur + 1;
10906 }
10907 }
10908 else if (*cur == '<') {
10909 if ((cur[1] == '!') &&
10910 (cur[2] == '-') &&
10911 (cur[3] == '-')) {
10912 state = '-';
10913 cur += 4;
10914 /* Don't treat <!--> as comment */
10915 start = cur;
10916 continue;
10917 }
10918 }
10919 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10920 state = *cur;
10921 }
10922
10923 cur++;
10924 }
10925
10926 /*
10927 * Rescan the three last characters to detect "<!--" and "-->"
10928 * split across chunks.
10929 */
10930 if ((state == 0) || (state == '-')) {
10931 if (cur - start < 3)
10932 cur = start;
10933 else
10934 cur -= 3;
10935 }
10936 index = cur - ctxt->input->cur;
10937 if (index > LONG_MAX) {
10938 ctxt->checkIndex = 0;
10939 ctxt->endCheckState = 0;
10940 return(1);
10941 }
10942 ctxt->checkIndex = index;
10943 ctxt->endCheckState = state;
10944 return(0);
10945 }
10946
10947 /**
10948 * xmlCheckCdataPush:
10949 * @cur: pointer to the block of characters
10950 * @len: length of the block in bytes
10951 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10952 *
10953 * Check that the block of characters is okay as SCdata content [20]
10954 *
10955 * Returns the number of bytes to pass if okay, a negative index where an
10956 * UTF-8 error occurred otherwise
10957 */
10958 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)10959 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10960 int ix;
10961 unsigned char c;
10962 int codepoint;
10963
10964 if ((utf == NULL) || (len <= 0))
10965 return(0);
10966
10967 for (ix = 0; ix < len;) { /* string is 0-terminated */
10968 c = utf[ix];
10969 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10970 if (c >= 0x20)
10971 ix++;
10972 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10973 ix++;
10974 else
10975 return(-ix);
10976 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10977 if (ix + 2 > len) return(complete ? -ix : ix);
10978 if ((utf[ix+1] & 0xc0 ) != 0x80)
10979 return(-ix);
10980 codepoint = (utf[ix] & 0x1f) << 6;
10981 codepoint |= utf[ix+1] & 0x3f;
10982 if (!xmlIsCharQ(codepoint))
10983 return(-ix);
10984 ix += 2;
10985 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10986 if (ix + 3 > len) return(complete ? -ix : ix);
10987 if (((utf[ix+1] & 0xc0) != 0x80) ||
10988 ((utf[ix+2] & 0xc0) != 0x80))
10989 return(-ix);
10990 codepoint = (utf[ix] & 0xf) << 12;
10991 codepoint |= (utf[ix+1] & 0x3f) << 6;
10992 codepoint |= utf[ix+2] & 0x3f;
10993 if (!xmlIsCharQ(codepoint))
10994 return(-ix);
10995 ix += 3;
10996 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10997 if (ix + 4 > len) return(complete ? -ix : ix);
10998 if (((utf[ix+1] & 0xc0) != 0x80) ||
10999 ((utf[ix+2] & 0xc0) != 0x80) ||
11000 ((utf[ix+3] & 0xc0) != 0x80))
11001 return(-ix);
11002 codepoint = (utf[ix] & 0x7) << 18;
11003 codepoint |= (utf[ix+1] & 0x3f) << 12;
11004 codepoint |= (utf[ix+2] & 0x3f) << 6;
11005 codepoint |= utf[ix+3] & 0x3f;
11006 if (!xmlIsCharQ(codepoint))
11007 return(-ix);
11008 ix += 4;
11009 } else /* unknown encoding */
11010 return(-ix);
11011 }
11012 return(ix);
11013 }
11014
11015 /**
11016 * xmlParseTryOrFinish:
11017 * @ctxt: an XML parser context
11018 * @terminate: last chunk indicator
11019 *
11020 * Try to progress on parsing
11021 *
11022 * Returns zero if no parsing was possible
11023 */
11024 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11025 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11026 int ret = 0;
11027 size_t avail;
11028 xmlChar cur, next;
11029
11030 if (ctxt->input == NULL)
11031 return(0);
11032
11033 if ((ctxt->input != NULL) &&
11034 (ctxt->input->cur - ctxt->input->base > 4096)) {
11035 xmlParserShrink(ctxt);
11036 }
11037
11038 while (ctxt->disableSAX == 0) {
11039 avail = ctxt->input->end - ctxt->input->cur;
11040 if (avail < 1)
11041 goto done;
11042 switch (ctxt->instate) {
11043 case XML_PARSER_EOF:
11044 /*
11045 * Document parsing is done !
11046 */
11047 goto done;
11048 case XML_PARSER_START:
11049 /*
11050 * Very first chars read from the document flow.
11051 */
11052 if ((!terminate) && (avail < 4))
11053 goto done;
11054
11055 /*
11056 * We need more bytes to detect EBCDIC code pages.
11057 * See xmlDetectEBCDIC.
11058 */
11059 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11060 (!terminate) && (avail < 200))
11061 goto done;
11062
11063 xmlDetectEncoding(ctxt);
11064 ctxt->instate = XML_PARSER_XML_DECL;
11065 break;
11066
11067 case XML_PARSER_XML_DECL:
11068 if ((!terminate) && (avail < 2))
11069 goto done;
11070 cur = ctxt->input->cur[0];
11071 next = ctxt->input->cur[1];
11072 if ((cur == '<') && (next == '?')) {
11073 /* PI or XML decl */
11074 if ((!terminate) &&
11075 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11076 goto done;
11077 if ((ctxt->input->cur[2] == 'x') &&
11078 (ctxt->input->cur[3] == 'm') &&
11079 (ctxt->input->cur[4] == 'l') &&
11080 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11081 ret += 5;
11082 xmlParseXMLDecl(ctxt);
11083 } else {
11084 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11085 if (ctxt->version == NULL) {
11086 xmlErrMemory(ctxt);
11087 break;
11088 }
11089 }
11090 } else {
11091 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11092 if (ctxt->version == NULL) {
11093 xmlErrMemory(ctxt);
11094 break;
11095 }
11096 }
11097 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11098 xmlSAXLocator copy = xmlDefaultSAXLocator;
11099 ctxt->sax->setDocumentLocator(ctxt->userData, ©);
11100 }
11101 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11102 (!ctxt->disableSAX))
11103 ctxt->sax->startDocument(ctxt->userData);
11104 ctxt->instate = XML_PARSER_MISC;
11105 break;
11106 case XML_PARSER_START_TAG: {
11107 const xmlChar *name;
11108 const xmlChar *prefix = NULL;
11109 const xmlChar *URI = NULL;
11110 int line = ctxt->input->line;
11111 int nbNs = 0;
11112
11113 if ((!terminate) && (avail < 2))
11114 goto done;
11115 cur = ctxt->input->cur[0];
11116 if (cur != '<') {
11117 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11118 "Start tag expected, '<' not found");
11119 ctxt->instate = XML_PARSER_EOF;
11120 xmlFinishDocument(ctxt);
11121 goto done;
11122 }
11123 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11124 goto done;
11125 if (ctxt->spaceNr == 0)
11126 spacePush(ctxt, -1);
11127 else if (*ctxt->space == -2)
11128 spacePush(ctxt, -1);
11129 else
11130 spacePush(ctxt, *ctxt->space);
11131 #ifdef LIBXML_SAX1_ENABLED
11132 if (ctxt->sax2)
11133 #endif /* LIBXML_SAX1_ENABLED */
11134 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11135 #ifdef LIBXML_SAX1_ENABLED
11136 else
11137 name = xmlParseStartTag(ctxt);
11138 #endif /* LIBXML_SAX1_ENABLED */
11139 if (name == NULL) {
11140 spacePop(ctxt);
11141 ctxt->instate = XML_PARSER_EOF;
11142 xmlFinishDocument(ctxt);
11143 goto done;
11144 }
11145 #ifdef LIBXML_VALID_ENABLED
11146 /*
11147 * [ VC: Root Element Type ]
11148 * The Name in the document type declaration must match
11149 * the element type of the root element.
11150 */
11151 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11152 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11153 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11154 #endif /* LIBXML_VALID_ENABLED */
11155
11156 /*
11157 * Check for an Empty Element.
11158 */
11159 if ((RAW == '/') && (NXT(1) == '>')) {
11160 SKIP(2);
11161
11162 if (ctxt->sax2) {
11163 if ((ctxt->sax != NULL) &&
11164 (ctxt->sax->endElementNs != NULL) &&
11165 (!ctxt->disableSAX))
11166 ctxt->sax->endElementNs(ctxt->userData, name,
11167 prefix, URI);
11168 if (nbNs > 0)
11169 xmlParserNsPop(ctxt, nbNs);
11170 #ifdef LIBXML_SAX1_ENABLED
11171 } else {
11172 if ((ctxt->sax != NULL) &&
11173 (ctxt->sax->endElement != NULL) &&
11174 (!ctxt->disableSAX))
11175 ctxt->sax->endElement(ctxt->userData, name);
11176 #endif /* LIBXML_SAX1_ENABLED */
11177 }
11178 spacePop(ctxt);
11179 } else if (RAW == '>') {
11180 NEXT;
11181 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11182 } else {
11183 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11184 "Couldn't find end of Start Tag %s\n",
11185 name);
11186 nodePop(ctxt);
11187 spacePop(ctxt);
11188 if (nbNs > 0)
11189 xmlParserNsPop(ctxt, nbNs);
11190 }
11191
11192 if (ctxt->nameNr == 0)
11193 ctxt->instate = XML_PARSER_EPILOG;
11194 else
11195 ctxt->instate = XML_PARSER_CONTENT;
11196 break;
11197 }
11198 case XML_PARSER_CONTENT: {
11199 cur = ctxt->input->cur[0];
11200
11201 if (cur == '<') {
11202 if ((!terminate) && (avail < 2))
11203 goto done;
11204 next = ctxt->input->cur[1];
11205
11206 if (next == '/') {
11207 ctxt->instate = XML_PARSER_END_TAG;
11208 break;
11209 } else if (next == '?') {
11210 if ((!terminate) &&
11211 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11212 goto done;
11213 xmlParsePI(ctxt);
11214 ctxt->instate = XML_PARSER_CONTENT;
11215 break;
11216 } else if (next == '!') {
11217 if ((!terminate) && (avail < 3))
11218 goto done;
11219 next = ctxt->input->cur[2];
11220
11221 if (next == '-') {
11222 if ((!terminate) && (avail < 4))
11223 goto done;
11224 if (ctxt->input->cur[3] == '-') {
11225 if ((!terminate) &&
11226 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11227 goto done;
11228 xmlParseComment(ctxt);
11229 ctxt->instate = XML_PARSER_CONTENT;
11230 break;
11231 }
11232 } else if (next == '[') {
11233 if ((!terminate) && (avail < 9))
11234 goto done;
11235 if ((ctxt->input->cur[2] == '[') &&
11236 (ctxt->input->cur[3] == 'C') &&
11237 (ctxt->input->cur[4] == 'D') &&
11238 (ctxt->input->cur[5] == 'A') &&
11239 (ctxt->input->cur[6] == 'T') &&
11240 (ctxt->input->cur[7] == 'A') &&
11241 (ctxt->input->cur[8] == '[')) {
11242 SKIP(9);
11243 ctxt->instate = XML_PARSER_CDATA_SECTION;
11244 break;
11245 }
11246 }
11247 }
11248 } else if (cur == '&') {
11249 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11250 goto done;
11251 xmlParseReference(ctxt);
11252 break;
11253 } else {
11254 /* TODO Avoid the extra copy, handle directly !!! */
11255 /*
11256 * Goal of the following test is:
11257 * - minimize calls to the SAX 'character' callback
11258 * when they are mergeable
11259 * - handle an problem for isBlank when we only parse
11260 * a sequence of blank chars and the next one is
11261 * not available to check against '<' presence.
11262 * - tries to homogenize the differences in SAX
11263 * callbacks between the push and pull versions
11264 * of the parser.
11265 */
11266 if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11267 if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11268 goto done;
11269 }
11270 ctxt->checkIndex = 0;
11271 xmlParseCharDataInternal(ctxt, !terminate);
11272 break;
11273 }
11274
11275 ctxt->instate = XML_PARSER_START_TAG;
11276 break;
11277 }
11278 case XML_PARSER_END_TAG:
11279 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11280 goto done;
11281 if (ctxt->sax2) {
11282 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11283 nameNsPop(ctxt);
11284 }
11285 #ifdef LIBXML_SAX1_ENABLED
11286 else
11287 xmlParseEndTag1(ctxt, 0);
11288 #endif /* LIBXML_SAX1_ENABLED */
11289 if (ctxt->nameNr == 0) {
11290 ctxt->instate = XML_PARSER_EPILOG;
11291 } else {
11292 ctxt->instate = XML_PARSER_CONTENT;
11293 }
11294 break;
11295 case XML_PARSER_CDATA_SECTION: {
11296 /*
11297 * The Push mode need to have the SAX callback for
11298 * cdataBlock merge back contiguous callbacks.
11299 */
11300 const xmlChar *term;
11301
11302 if (terminate) {
11303 /*
11304 * Don't call xmlParseLookupString. If 'terminate'
11305 * is set, checkIndex is invalid.
11306 */
11307 term = BAD_CAST strstr((const char *) ctxt->input->cur,
11308 "]]>");
11309 } else {
11310 term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11311 }
11312
11313 if (term == NULL) {
11314 int tmp, size;
11315
11316 if (terminate) {
11317 /* Unfinished CDATA section */
11318 size = ctxt->input->end - ctxt->input->cur;
11319 } else {
11320 if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11321 goto done;
11322 ctxt->checkIndex = 0;
11323 /* XXX: Why don't we pass the full buffer? */
11324 size = XML_PARSER_BIG_BUFFER_SIZE;
11325 }
11326 tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11327 if (tmp <= 0) {
11328 tmp = -tmp;
11329 ctxt->input->cur += tmp;
11330 goto encoding_error;
11331 }
11332 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11333 if (ctxt->sax->cdataBlock != NULL)
11334 ctxt->sax->cdataBlock(ctxt->userData,
11335 ctxt->input->cur, tmp);
11336 else if (ctxt->sax->characters != NULL)
11337 ctxt->sax->characters(ctxt->userData,
11338 ctxt->input->cur, tmp);
11339 }
11340 SKIPL(tmp);
11341 } else {
11342 int base = term - CUR_PTR;
11343 int tmp;
11344
11345 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11346 if ((tmp < 0) || (tmp != base)) {
11347 tmp = -tmp;
11348 ctxt->input->cur += tmp;
11349 goto encoding_error;
11350 }
11351 if ((ctxt->sax != NULL) && (base == 0) &&
11352 (ctxt->sax->cdataBlock != NULL) &&
11353 (!ctxt->disableSAX)) {
11354 /*
11355 * Special case to provide identical behaviour
11356 * between pull and push parsers on enpty CDATA
11357 * sections
11358 */
11359 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11360 (!strncmp((const char *)&ctxt->input->cur[-9],
11361 "<![CDATA[", 9)))
11362 ctxt->sax->cdataBlock(ctxt->userData,
11363 BAD_CAST "", 0);
11364 } else if ((ctxt->sax != NULL) && (base > 0) &&
11365 (!ctxt->disableSAX)) {
11366 if (ctxt->sax->cdataBlock != NULL)
11367 ctxt->sax->cdataBlock(ctxt->userData,
11368 ctxt->input->cur, base);
11369 else if (ctxt->sax->characters != NULL)
11370 ctxt->sax->characters(ctxt->userData,
11371 ctxt->input->cur, base);
11372 }
11373 SKIPL(base + 3);
11374 ctxt->instate = XML_PARSER_CONTENT;
11375 }
11376 break;
11377 }
11378 case XML_PARSER_MISC:
11379 case XML_PARSER_PROLOG:
11380 case XML_PARSER_EPILOG:
11381 SKIP_BLANKS;
11382 avail = ctxt->input->end - ctxt->input->cur;
11383 if (avail < 1)
11384 goto done;
11385 if (ctxt->input->cur[0] == '<') {
11386 if ((!terminate) && (avail < 2))
11387 goto done;
11388 next = ctxt->input->cur[1];
11389 if (next == '?') {
11390 if ((!terminate) &&
11391 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11392 goto done;
11393 xmlParsePI(ctxt);
11394 break;
11395 } else if (next == '!') {
11396 if ((!terminate) && (avail < 3))
11397 goto done;
11398
11399 if (ctxt->input->cur[2] == '-') {
11400 if ((!terminate) && (avail < 4))
11401 goto done;
11402 if (ctxt->input->cur[3] == '-') {
11403 if ((!terminate) &&
11404 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11405 goto done;
11406 xmlParseComment(ctxt);
11407 break;
11408 }
11409 } else if (ctxt->instate == XML_PARSER_MISC) {
11410 if ((!terminate) && (avail < 9))
11411 goto done;
11412 if ((ctxt->input->cur[2] == 'D') &&
11413 (ctxt->input->cur[3] == 'O') &&
11414 (ctxt->input->cur[4] == 'C') &&
11415 (ctxt->input->cur[5] == 'T') &&
11416 (ctxt->input->cur[6] == 'Y') &&
11417 (ctxt->input->cur[7] == 'P') &&
11418 (ctxt->input->cur[8] == 'E')) {
11419 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11420 goto done;
11421 ctxt->inSubset = 1;
11422 xmlParseDocTypeDecl(ctxt);
11423 if (RAW == '[') {
11424 ctxt->instate = XML_PARSER_DTD;
11425 } else {
11426 /*
11427 * Create and update the external subset.
11428 */
11429 ctxt->inSubset = 2;
11430 if ((ctxt->sax != NULL) &&
11431 (!ctxt->disableSAX) &&
11432 (ctxt->sax->externalSubset != NULL))
11433 ctxt->sax->externalSubset(
11434 ctxt->userData,
11435 ctxt->intSubName,
11436 ctxt->extSubSystem,
11437 ctxt->extSubURI);
11438 ctxt->inSubset = 0;
11439 xmlCleanSpecialAttr(ctxt);
11440 ctxt->instate = XML_PARSER_PROLOG;
11441 }
11442 break;
11443 }
11444 }
11445 }
11446 }
11447
11448 if (ctxt->instate == XML_PARSER_EPILOG) {
11449 if (ctxt->errNo == XML_ERR_OK)
11450 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11451 ctxt->instate = XML_PARSER_EOF;
11452 xmlFinishDocument(ctxt);
11453 } else {
11454 ctxt->instate = XML_PARSER_START_TAG;
11455 }
11456 break;
11457 case XML_PARSER_DTD: {
11458 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11459 goto done;
11460 xmlParseInternalSubset(ctxt);
11461 ctxt->inSubset = 2;
11462 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11463 (ctxt->sax->externalSubset != NULL))
11464 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11465 ctxt->extSubSystem, ctxt->extSubURI);
11466 ctxt->inSubset = 0;
11467 xmlCleanSpecialAttr(ctxt);
11468 ctxt->instate = XML_PARSER_PROLOG;
11469 break;
11470 }
11471 default:
11472 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11473 "PP: internal error\n");
11474 ctxt->instate = XML_PARSER_EOF;
11475 break;
11476 }
11477 }
11478 done:
11479 return(ret);
11480 encoding_error:
11481 /* Only report the first error */
11482 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11483 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11484 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11485 }
11486 return(0);
11487 }
11488
11489 /**
11490 * xmlParseChunk:
11491 * @ctxt: an XML parser context
11492 * @chunk: chunk of memory
11493 * @size: size of chunk in bytes
11494 * @terminate: last chunk indicator
11495 *
11496 * Parse a chunk of memory in push parser mode.
11497 *
11498 * Assumes that the parser context was initialized with
11499 * xmlCreatePushParserCtxt.
11500 *
11501 * The last chunk, which will often be empty, must be marked with
11502 * the @terminate flag. With the default SAX callbacks, the resulting
11503 * document will be available in ctxt->myDoc. This pointer will not
11504 * be freed by the library.
11505 *
11506 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11507 * The push parser doesn't support recovery mode.
11508 *
11509 * Returns an xmlParserErrors code (0 on success).
11510 */
11511 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11512 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11513 int terminate) {
11514 size_t curBase;
11515 size_t maxLength;
11516 int end_in_lf = 0;
11517
11518 if ((ctxt == NULL) || (size < 0))
11519 return(XML_ERR_ARGUMENT);
11520 if (ctxt->disableSAX != 0)
11521 return(ctxt->errNo);
11522 if (ctxt->input == NULL)
11523 return(XML_ERR_INTERNAL_ERROR);
11524
11525 ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11526 if (ctxt->instate == XML_PARSER_START)
11527 xmlCtxtInitializeLate(ctxt);
11528 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11529 (chunk[size - 1] == '\r')) {
11530 end_in_lf = 1;
11531 size--;
11532 }
11533
11534 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11535 (ctxt->input->buf != NULL)) {
11536 size_t pos = ctxt->input->cur - ctxt->input->base;
11537 int res;
11538
11539 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11540 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11541 if (res < 0) {
11542 xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11543 xmlHaltParser(ctxt);
11544 return(ctxt->errNo);
11545 }
11546 }
11547
11548 xmlParseTryOrFinish(ctxt, terminate);
11549
11550 curBase = ctxt->input->cur - ctxt->input->base;
11551 maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11552 XML_MAX_HUGE_LENGTH :
11553 XML_MAX_LOOKUP_LIMIT;
11554 if (curBase > maxLength) {
11555 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11556 "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11557 xmlHaltParser(ctxt);
11558 }
11559
11560 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11561 return(ctxt->errNo);
11562
11563 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11564 (ctxt->input->buf != NULL)) {
11565 size_t pos = ctxt->input->cur - ctxt->input->base;
11566 int res;
11567
11568 res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11569 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11570 if (res < 0) {
11571 xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11572 xmlHaltParser(ctxt);
11573 return(ctxt->errNo);
11574 }
11575 }
11576 if (terminate) {
11577 /*
11578 * Check for termination
11579 */
11580 if ((ctxt->instate != XML_PARSER_EOF) &&
11581 (ctxt->instate != XML_PARSER_EPILOG)) {
11582 if (ctxt->nameNr > 0) {
11583 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11584 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11585 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11586 "Premature end of data in tag %s line %d\n",
11587 name, line, NULL);
11588 } else if (ctxt->instate == XML_PARSER_START) {
11589 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11590 } else {
11591 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11592 "Start tag expected, '<' not found\n");
11593 }
11594 } else if ((ctxt->input->buf != NULL) &&
11595 (ctxt->input->buf->encoder != NULL) &&
11596 (ctxt->input->buf->error == 0) &&
11597 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11598 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11599 "Truncated multi-byte sequence at EOF\n");
11600 }
11601 if (ctxt->instate != XML_PARSER_EOF) {
11602 ctxt->instate = XML_PARSER_EOF;
11603 xmlFinishDocument(ctxt);
11604 }
11605 }
11606 if (ctxt->wellFormed == 0)
11607 return((xmlParserErrors) ctxt->errNo);
11608 else
11609 return(0);
11610 }
11611
11612 /************************************************************************
11613 * *
11614 * I/O front end functions to the parser *
11615 * *
11616 ************************************************************************/
11617
11618 /**
11619 * xmlCreatePushParserCtxt:
11620 * @sax: a SAX handler (optional)
11621 * @user_data: user data for SAX callbacks (optional)
11622 * @chunk: initial chunk (optional, deprecated)
11623 * @size: size of initial chunk in bytes
11624 * @filename: file name or URI (optional)
11625 *
11626 * Create a parser context for using the XML parser in push mode.
11627 * See xmlParseChunk.
11628 *
11629 * Passing an initial chunk is useless and deprecated.
11630 *
11631 * @filename is used as base URI to fetch external entities and for
11632 * error reports.
11633 *
11634 * Returns the new parser context or NULL if a memory allocation
11635 * failed.
11636 */
11637
11638 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11639 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11640 const char *chunk, int size, const char *filename) {
11641 xmlParserCtxtPtr ctxt;
11642 xmlParserInputPtr input;
11643
11644 ctxt = xmlNewSAXParserCtxt(sax, user_data);
11645 if (ctxt == NULL)
11646 return(NULL);
11647
11648 ctxt->options &= ~XML_PARSE_NODICT;
11649 ctxt->dictNames = 1;
11650
11651 input = xmlInputCreatePush(filename, chunk, size);
11652 if (input == NULL) {
11653 xmlFreeParserCtxt(ctxt);
11654 return(NULL);
11655 }
11656 inputPush(ctxt, input);
11657
11658 return(ctxt);
11659 }
11660 #endif /* LIBXML_PUSH_ENABLED */
11661
11662 /**
11663 * xmlStopParser:
11664 * @ctxt: an XML parser context
11665 *
11666 * Blocks further parser processing
11667 */
11668 void
xmlStopParser(xmlParserCtxtPtr ctxt)11669 xmlStopParser(xmlParserCtxtPtr ctxt) {
11670 if (ctxt == NULL)
11671 return;
11672 xmlHaltParser(ctxt);
11673 if (ctxt->errNo != XML_ERR_NO_MEMORY)
11674 ctxt->errNo = XML_ERR_USER_STOP;
11675 }
11676
11677 /**
11678 * xmlCreateIOParserCtxt:
11679 * @sax: a SAX handler (optional)
11680 * @user_data: user data for SAX callbacks (optional)
11681 * @ioread: an I/O read function
11682 * @ioclose: an I/O close function (optional)
11683 * @ioctx: an I/O handler
11684 * @enc: the charset encoding if known (deprecated)
11685 *
11686 * Create a parser context for using the XML parser with an existing
11687 * I/O stream
11688 *
11689 * Returns the new parser context or NULL
11690 */
11691 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11692 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11693 xmlInputReadCallback ioread,
11694 xmlInputCloseCallback ioclose,
11695 void *ioctx, xmlCharEncoding enc) {
11696 xmlParserCtxtPtr ctxt;
11697 xmlParserInputPtr input;
11698 const char *encoding;
11699
11700 ctxt = xmlNewSAXParserCtxt(sax, user_data);
11701 if (ctxt == NULL)
11702 return(NULL);
11703
11704 encoding = xmlGetCharEncodingName(enc);
11705 input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11706 if (input == NULL) {
11707 xmlFreeParserCtxt(ctxt);
11708 return (NULL);
11709 }
11710 inputPush(ctxt, input);
11711
11712 return(ctxt);
11713 }
11714
11715 #ifdef LIBXML_VALID_ENABLED
11716 /************************************************************************
11717 * *
11718 * Front ends when parsing a DTD *
11719 * *
11720 ************************************************************************/
11721
11722 /**
11723 * xmlIOParseDTD:
11724 * @sax: the SAX handler block or NULL
11725 * @input: an Input Buffer
11726 * @enc: the charset encoding if known
11727 *
11728 * Load and parse a DTD
11729 *
11730 * Returns the resulting xmlDtdPtr or NULL in case of error.
11731 * @input will be freed by the function in any case.
11732 */
11733
11734 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)11735 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11736 xmlCharEncoding enc) {
11737 xmlDtdPtr ret = NULL;
11738 xmlParserCtxtPtr ctxt;
11739 xmlParserInputPtr pinput = NULL;
11740
11741 if (input == NULL)
11742 return(NULL);
11743
11744 ctxt = xmlNewSAXParserCtxt(sax, NULL);
11745 if (ctxt == NULL) {
11746 xmlFreeParserInputBuffer(input);
11747 return(NULL);
11748 }
11749
11750 /*
11751 * generate a parser input from the I/O handler
11752 */
11753
11754 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11755 if (pinput == NULL) {
11756 xmlFreeParserInputBuffer(input);
11757 xmlFreeParserCtxt(ctxt);
11758 return(NULL);
11759 }
11760
11761 /*
11762 * plug some encoding conversion routines here.
11763 */
11764 if (xmlPushInput(ctxt, pinput) < 0) {
11765 xmlFreeParserCtxt(ctxt);
11766 return(NULL);
11767 }
11768 if (enc != XML_CHAR_ENCODING_NONE) {
11769 xmlSwitchEncoding(ctxt, enc);
11770 }
11771
11772 /*
11773 * let's parse that entity knowing it's an external subset.
11774 */
11775 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11776 if (ctxt->myDoc == NULL) {
11777 xmlErrMemory(ctxt);
11778 return(NULL);
11779 }
11780 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11781 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11782 BAD_CAST "none", BAD_CAST "none");
11783
11784 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11785
11786 if (ctxt->myDoc != NULL) {
11787 if (ctxt->wellFormed) {
11788 ret = ctxt->myDoc->extSubset;
11789 ctxt->myDoc->extSubset = NULL;
11790 if (ret != NULL) {
11791 xmlNodePtr tmp;
11792
11793 ret->doc = NULL;
11794 tmp = ret->children;
11795 while (tmp != NULL) {
11796 tmp->doc = NULL;
11797 tmp = tmp->next;
11798 }
11799 }
11800 } else {
11801 ret = NULL;
11802 }
11803 xmlFreeDoc(ctxt->myDoc);
11804 ctxt->myDoc = NULL;
11805 }
11806 xmlFreeParserCtxt(ctxt);
11807
11808 return(ret);
11809 }
11810
11811 /**
11812 * xmlSAXParseDTD:
11813 * @sax: the SAX handler block
11814 * @ExternalID: a NAME* containing the External ID of the DTD
11815 * @SystemID: a NAME* containing the URL to the DTD
11816 *
11817 * DEPRECATED: Don't use.
11818 *
11819 * Load and parse an external subset.
11820 *
11821 * Returns the resulting xmlDtdPtr or NULL in case of error.
11822 */
11823
11824 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)11825 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11826 const xmlChar *SystemID) {
11827 xmlDtdPtr ret = NULL;
11828 xmlParserCtxtPtr ctxt;
11829 xmlParserInputPtr input = NULL;
11830 xmlChar* systemIdCanonic;
11831
11832 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11833
11834 ctxt = xmlNewSAXParserCtxt(sax, NULL);
11835 if (ctxt == NULL) {
11836 return(NULL);
11837 }
11838
11839 /*
11840 * Canonicalise the system ID
11841 */
11842 systemIdCanonic = xmlCanonicPath(SystemID);
11843 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11844 xmlFreeParserCtxt(ctxt);
11845 return(NULL);
11846 }
11847
11848 /*
11849 * Ask the Entity resolver to load the damn thing
11850 */
11851
11852 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11853 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11854 systemIdCanonic);
11855 if (input == NULL) {
11856 xmlFreeParserCtxt(ctxt);
11857 if (systemIdCanonic != NULL)
11858 xmlFree(systemIdCanonic);
11859 return(NULL);
11860 }
11861
11862 /*
11863 * plug some encoding conversion routines here.
11864 */
11865 if (xmlPushInput(ctxt, input) < 0) {
11866 xmlFreeParserCtxt(ctxt);
11867 if (systemIdCanonic != NULL)
11868 xmlFree(systemIdCanonic);
11869 return(NULL);
11870 }
11871
11872 xmlDetectEncoding(ctxt);
11873
11874 if (input->filename == NULL)
11875 input->filename = (char *) systemIdCanonic;
11876 else
11877 xmlFree(systemIdCanonic);
11878
11879 /*
11880 * let's parse that entity knowing it's an external subset.
11881 */
11882 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11883 if (ctxt->myDoc == NULL) {
11884 xmlErrMemory(ctxt);
11885 xmlFreeParserCtxt(ctxt);
11886 return(NULL);
11887 }
11888 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11889 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11890 ExternalID, SystemID);
11891 if (ctxt->myDoc->extSubset == NULL) {
11892 xmlFreeDoc(ctxt->myDoc);
11893 xmlFreeParserCtxt(ctxt);
11894 return(NULL);
11895 }
11896 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11897
11898 if (ctxt->myDoc != NULL) {
11899 if (ctxt->wellFormed) {
11900 ret = ctxt->myDoc->extSubset;
11901 ctxt->myDoc->extSubset = NULL;
11902 if (ret != NULL) {
11903 xmlNodePtr tmp;
11904
11905 ret->doc = NULL;
11906 tmp = ret->children;
11907 while (tmp != NULL) {
11908 tmp->doc = NULL;
11909 tmp = tmp->next;
11910 }
11911 }
11912 } else {
11913 ret = NULL;
11914 }
11915 xmlFreeDoc(ctxt->myDoc);
11916 ctxt->myDoc = NULL;
11917 }
11918 xmlFreeParserCtxt(ctxt);
11919
11920 return(ret);
11921 }
11922
11923
11924 /**
11925 * xmlParseDTD:
11926 * @ExternalID: a NAME* containing the External ID of the DTD
11927 * @SystemID: a NAME* containing the URL to the DTD
11928 *
11929 * Load and parse an external subset.
11930 *
11931 * Returns the resulting xmlDtdPtr or NULL in case of error.
11932 */
11933
11934 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)11935 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11936 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11937 }
11938 #endif /* LIBXML_VALID_ENABLED */
11939
11940 /************************************************************************
11941 * *
11942 * Front ends when parsing an Entity *
11943 * *
11944 ************************************************************************/
11945
11946 static xmlNodePtr
xmlCtxtParseContent(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,int hasTextDecl,int buildTree)11947 xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11948 int hasTextDecl, int buildTree) {
11949 xmlNodePtr root = NULL;
11950 xmlNodePtr list = NULL;
11951 xmlChar *rootName = BAD_CAST "#root";
11952 int result;
11953
11954 if (buildTree) {
11955 root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11956 if (root == NULL) {
11957 xmlErrMemory(ctxt);
11958 goto error;
11959 }
11960 }
11961
11962 if (xmlPushInput(ctxt, input) < 0)
11963 goto error;
11964
11965 nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11966 spacePush(ctxt, -1);
11967
11968 if (buildTree)
11969 nodePush(ctxt, root);
11970
11971 if (hasTextDecl) {
11972 xmlDetectEncoding(ctxt);
11973
11974 /*
11975 * Parse a possible text declaration first
11976 */
11977 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11978 (IS_BLANK_CH(NXT(5)))) {
11979 xmlParseTextDecl(ctxt);
11980 /*
11981 * An XML-1.0 document can't reference an entity not XML-1.0
11982 */
11983 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11984 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11985 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11986 "Version mismatch between document and "
11987 "entity\n");
11988 }
11989 }
11990 }
11991
11992 xmlParseContentInternal(ctxt);
11993
11994 if (ctxt->input->cur < ctxt->input->end)
11995 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11996
11997 if ((ctxt->wellFormed) ||
11998 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
11999 if (root != NULL) {
12000 xmlNodePtr cur;
12001
12002 /*
12003 * Return the newly created nodeset after unlinking it from
12004 * its pseudo parent.
12005 */
12006 cur = root->children;
12007 list = cur;
12008 while (cur != NULL) {
12009 cur->parent = NULL;
12010 cur = cur->next;
12011 }
12012 root->children = NULL;
12013 root->last = NULL;
12014 }
12015 }
12016
12017 /*
12018 * Read the rest of the stream in case of errors. We want
12019 * to account for the whole entity size.
12020 */
12021 do {
12022 ctxt->input->cur = ctxt->input->end;
12023 xmlParserShrink(ctxt);
12024 result = xmlParserGrow(ctxt);
12025 } while (result > 0);
12026
12027 if (buildTree)
12028 nodePop(ctxt);
12029
12030 namePop(ctxt);
12031 spacePop(ctxt);
12032
12033 /* xmlPopInput would free the stream */
12034 inputPop(ctxt);
12035
12036 error:
12037 xmlFreeNode(root);
12038
12039 return(list);
12040 }
12041
12042 static void
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)12043 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12044 xmlParserInputPtr input;
12045 xmlNodePtr list;
12046 unsigned long consumed;
12047 int isExternal;
12048 int buildTree;
12049 int oldMinNsIndex;
12050 int oldNodelen, oldNodemem;
12051
12052 isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12053 buildTree = (ctxt->node != NULL);
12054
12055 /*
12056 * Recursion check
12057 */
12058 if (ent->flags & XML_ENT_EXPANDING) {
12059 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12060 xmlHaltParser(ctxt);
12061 goto error;
12062 }
12063
12064 /*
12065 * Load entity
12066 */
12067 input = xmlNewEntityInputStream(ctxt, ent);
12068 if (input == NULL)
12069 goto error;
12070
12071 /*
12072 * When building a tree, we need to limit the scope of namespace
12073 * declarations, so that entities don't reference xmlNs structs
12074 * from the parent of a reference.
12075 */
12076 oldMinNsIndex = ctxt->nsdb->minNsIndex;
12077 if (buildTree)
12078 ctxt->nsdb->minNsIndex = ctxt->nsNr;
12079
12080 oldNodelen = ctxt->nodelen;
12081 oldNodemem = ctxt->nodemem;
12082 ctxt->nodelen = 0;
12083 ctxt->nodemem = 0;
12084
12085 /*
12086 * Parse content
12087 *
12088 * This initiates a recursive call chain:
12089 *
12090 * - xmlCtxtParseContent
12091 * - xmlParseContentInternal
12092 * - xmlParseReference
12093 * - xmlCtxtParseEntity
12094 *
12095 * The nesting depth is limited by the maximum number of inputs,
12096 * see xmlPushInput.
12097 *
12098 * It's possible to make this non-recursive (minNsIndex must be
12099 * stored in the input struct) at the expense of code readability.
12100 */
12101
12102 ent->flags |= XML_ENT_EXPANDING;
12103
12104 list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12105
12106 ent->flags &= ~XML_ENT_EXPANDING;
12107
12108 ctxt->nsdb->minNsIndex = oldMinNsIndex;
12109 ctxt->nodelen = oldNodelen;
12110 ctxt->nodemem = oldNodemem;
12111
12112 /*
12113 * Entity size accounting
12114 */
12115 consumed = input->consumed;
12116 xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12117
12118 if ((ent->flags & XML_ENT_CHECKED) == 0)
12119 xmlSaturatedAdd(&ent->expandedSize, consumed);
12120
12121 if ((ent->flags & XML_ENT_PARSED) == 0) {
12122 if (isExternal)
12123 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12124
12125 ent->children = list;
12126
12127 while (list != NULL) {
12128 list->parent = (xmlNodePtr) ent;
12129 if (list->next == NULL)
12130 ent->last = list;
12131 list = list->next;
12132 }
12133 } else {
12134 xmlFreeNodeList(list);
12135 }
12136
12137 xmlFreeInputStream(input);
12138
12139 error:
12140 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12141 }
12142
12143 /**
12144 * xmlParseCtxtExternalEntity:
12145 * @ctxt: the existing parsing context
12146 * @URL: the URL for the entity to load
12147 * @ID: the System ID for the entity to load
12148 * @listOut: the return value for the set of parsed nodes
12149 *
12150 * Parse an external general entity within an existing parsing context
12151 * An external general parsed entity is well-formed if it matches the
12152 * production labeled extParsedEnt.
12153 *
12154 * [78] extParsedEnt ::= TextDecl? content
12155 *
12156 * Returns 0 if the entity is well formed, -1 in case of args problem and
12157 * the parser error code otherwise
12158 */
12159
12160 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * listOut)12161 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12162 const xmlChar *ID, xmlNodePtr *listOut) {
12163 xmlParserInputPtr input;
12164 xmlNodePtr list;
12165
12166 if (listOut != NULL)
12167 *listOut = NULL;
12168
12169 if (ctxt == NULL)
12170 return(XML_ERR_ARGUMENT);
12171
12172 input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12173 XML_RESOURCE_GENERAL_ENTITY);
12174 if (input == NULL)
12175 return(ctxt->errNo);
12176
12177 xmlCtxtInitializeLate(ctxt);
12178
12179 list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12180 if (listOut != NULL)
12181 *listOut = list;
12182 else
12183 xmlFreeNodeList(list);
12184
12185 xmlFreeInputStream(input);
12186 return(ctxt->errNo);
12187 }
12188
12189 #ifdef LIBXML_SAX1_ENABLED
12190 /**
12191 * xmlParseExternalEntity:
12192 * @doc: the document the chunk pertains to
12193 * @sax: the SAX handler block (possibly NULL)
12194 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12195 * @depth: Used for loop detection, use 0
12196 * @URL: the URL for the entity to load
12197 * @ID: the System ID for the entity to load
12198 * @list: the return value for the set of parsed nodes
12199 *
12200 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12201 *
12202 * Parse an external general entity
12203 * An external general parsed entity is well-formed if it matches the
12204 * production labeled extParsedEnt.
12205 *
12206 * [78] extParsedEnt ::= TextDecl? content
12207 *
12208 * Returns 0 if the entity is well formed, -1 in case of args problem and
12209 * the parser error code otherwise
12210 */
12211
12212 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12213 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12214 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12215 xmlParserCtxtPtr ctxt;
12216 int ret;
12217
12218 if (list != NULL)
12219 *list = NULL;
12220
12221 if (doc == NULL)
12222 return(XML_ERR_ARGUMENT);
12223
12224 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12225 if (ctxt == NULL)
12226 return(XML_ERR_NO_MEMORY);
12227
12228 ctxt->depth = depth;
12229 ctxt->myDoc = doc;
12230 ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12231
12232 xmlFreeParserCtxt(ctxt);
12233 return(ret);
12234 }
12235
12236 /**
12237 * xmlParseBalancedChunkMemory:
12238 * @doc: the document the chunk pertains to (must not be NULL)
12239 * @sax: the SAX handler block (possibly NULL)
12240 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12241 * @depth: Used for loop detection, use 0
12242 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12243 * @lst: the return value for the set of parsed nodes
12244 *
12245 * Parse a well-balanced chunk of an XML document
12246 * called by the parser
12247 * The allowed sequence for the Well Balanced Chunk is the one defined by
12248 * the content production in the XML grammar:
12249 *
12250 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12251 *
12252 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12253 * the parser error code otherwise
12254 */
12255
12256 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12257 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12258 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12259 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12260 depth, string, lst, 0 );
12261 }
12262 #endif /* LIBXML_SAX1_ENABLED */
12263
12264 /**
12265 * xmlParseInNodeContext:
12266 * @node: the context node
12267 * @data: the input string
12268 * @datalen: the input string length in bytes
12269 * @options: a combination of xmlParserOption
12270 * @lst: the return value for the set of parsed nodes
12271 *
12272 * Parse a well-balanced chunk of an XML document
12273 * within the context (DTD, namespaces, etc ...) of the given node.
12274 *
12275 * The allowed sequence for the data is a Well Balanced Chunk defined by
12276 * the content production in the XML grammar:
12277 *
12278 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12279 *
12280 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12281 * error code otherwise
12282 */
12283 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12284 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12285 int options, xmlNodePtr *lst) {
12286 xmlParserCtxtPtr ctxt;
12287 xmlDocPtr doc = NULL;
12288 xmlNodePtr fake, cur;
12289 int nsnr = 0;
12290
12291 xmlParserErrors ret = XML_ERR_OK;
12292
12293 /*
12294 * check all input parameters, grab the document
12295 */
12296 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12297 return(XML_ERR_ARGUMENT);
12298 switch (node->type) {
12299 case XML_ELEMENT_NODE:
12300 case XML_ATTRIBUTE_NODE:
12301 case XML_TEXT_NODE:
12302 case XML_CDATA_SECTION_NODE:
12303 case XML_ENTITY_REF_NODE:
12304 case XML_PI_NODE:
12305 case XML_COMMENT_NODE:
12306 case XML_DOCUMENT_NODE:
12307 case XML_HTML_DOCUMENT_NODE:
12308 break;
12309 default:
12310 return(XML_ERR_INTERNAL_ERROR);
12311
12312 }
12313 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12314 (node->type != XML_DOCUMENT_NODE) &&
12315 (node->type != XML_HTML_DOCUMENT_NODE))
12316 node = node->parent;
12317 if (node == NULL)
12318 return(XML_ERR_INTERNAL_ERROR);
12319 if (node->type == XML_ELEMENT_NODE)
12320 doc = node->doc;
12321 else
12322 doc = (xmlDocPtr) node;
12323 if (doc == NULL)
12324 return(XML_ERR_INTERNAL_ERROR);
12325
12326 /*
12327 * allocate a context and set-up everything not related to the
12328 * node position in the tree
12329 */
12330 if (doc->type == XML_DOCUMENT_NODE)
12331 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12332 #ifdef LIBXML_HTML_ENABLED
12333 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12334 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12335 /*
12336 * When parsing in context, it makes no sense to add implied
12337 * elements like html/body/etc...
12338 */
12339 options |= HTML_PARSE_NOIMPLIED;
12340 }
12341 #endif
12342 else
12343 return(XML_ERR_INTERNAL_ERROR);
12344
12345 if (ctxt == NULL)
12346 return(XML_ERR_NO_MEMORY);
12347
12348 /*
12349 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12350 * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12351 * we must wait until the last moment to free the original one.
12352 */
12353 if (doc->dict != NULL) {
12354 if (ctxt->dict != NULL)
12355 xmlDictFree(ctxt->dict);
12356 ctxt->dict = doc->dict;
12357 } else {
12358 options |= XML_PARSE_NODICT;
12359 ctxt->dictNames = 0;
12360 }
12361
12362 if (doc->encoding != NULL)
12363 xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12364
12365 xmlCtxtUseOptions(ctxt, options);
12366 xmlCtxtInitializeLate(ctxt);
12367 ctxt->myDoc = doc;
12368 /* parsing in context, i.e. as within existing content */
12369 ctxt->input_id = 2;
12370
12371 /*
12372 * TODO: Use xmlCtxtParseContent
12373 */
12374
12375 fake = xmlNewDocComment(node->doc, NULL);
12376 if (fake == NULL) {
12377 xmlFreeParserCtxt(ctxt);
12378 return(XML_ERR_NO_MEMORY);
12379 }
12380 xmlAddChild(node, fake);
12381
12382 if (node->type == XML_ELEMENT_NODE)
12383 nodePush(ctxt, node);
12384
12385 if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12386 /*
12387 * initialize the SAX2 namespaces stack
12388 */
12389 cur = node;
12390 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12391 xmlNsPtr ns = cur->nsDef;
12392 xmlHashedString hprefix, huri;
12393
12394 while (ns != NULL) {
12395 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12396 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12397 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12398 nsnr++;
12399 ns = ns->next;
12400 }
12401 cur = cur->parent;
12402 }
12403 }
12404
12405 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12406 /*
12407 * ID/IDREF registration will be done in xmlValidateElement below
12408 */
12409 ctxt->loadsubset |= XML_SKIP_IDS;
12410 }
12411
12412 #ifdef LIBXML_HTML_ENABLED
12413 if (doc->type == XML_HTML_DOCUMENT_NODE)
12414 __htmlParseContent(ctxt);
12415 else
12416 #endif
12417 xmlParseContentInternal(ctxt);
12418
12419 if (ctxt->input->cur < ctxt->input->end)
12420 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12421
12422 xmlParserNsPop(ctxt, nsnr);
12423
12424 if ((ctxt->wellFormed) ||
12425 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12426 ret = XML_ERR_OK;
12427 } else {
12428 ret = (xmlParserErrors) ctxt->errNo;
12429 }
12430
12431 /*
12432 * Return the newly created nodeset after unlinking it from
12433 * the pseudo sibling.
12434 */
12435
12436 cur = fake->next;
12437 fake->next = NULL;
12438 node->last = fake;
12439
12440 if (cur != NULL) {
12441 cur->prev = NULL;
12442 }
12443
12444 *lst = cur;
12445
12446 while (cur != NULL) {
12447 cur->parent = NULL;
12448 cur = cur->next;
12449 }
12450
12451 xmlUnlinkNode(fake);
12452 xmlFreeNode(fake);
12453
12454
12455 if (ret != XML_ERR_OK) {
12456 xmlFreeNodeList(*lst);
12457 *lst = NULL;
12458 }
12459
12460 if (doc->dict != NULL)
12461 ctxt->dict = NULL;
12462 xmlFreeParserCtxt(ctxt);
12463
12464 return(ret);
12465 }
12466
12467 #ifdef LIBXML_SAX1_ENABLED
12468 /**
12469 * xmlParseBalancedChunkMemoryRecover:
12470 * @doc: the document the chunk pertains to (must not be NULL)
12471 * @sax: the SAX handler block (possibly NULL)
12472 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12473 * @depth: Used for loop detection, use 0
12474 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12475 * @listOut: the return value for the set of parsed nodes
12476 * @recover: return nodes even if the data is broken (use 0)
12477 *
12478 * Parse a well-balanced chunk of an XML document
12479 *
12480 * The allowed sequence for the Well Balanced Chunk is the one defined by
12481 * the content production in the XML grammar:
12482 *
12483 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12484 *
12485 * Returns 0 if the chunk is well balanced, or thehe parser error code
12486 * otherwise.
12487 *
12488 * In case recover is set to 1, the nodelist will not be empty even if
12489 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12490 * some extent.
12491 */
12492 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * listOut,int recover)12493 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12494 void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12495 int recover) {
12496 xmlParserCtxtPtr ctxt;
12497 xmlParserInputPtr input;
12498 xmlNodePtr list;
12499 int ret;
12500
12501 if (listOut != NULL)
12502 *listOut = NULL;
12503
12504 if (string == NULL)
12505 return(XML_ERR_ARGUMENT);
12506
12507 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12508 if (ctxt == NULL)
12509 return(XML_ERR_NO_MEMORY);
12510
12511 xmlCtxtInitializeLate(ctxt);
12512
12513 ctxt->depth = depth;
12514 ctxt->myDoc = doc;
12515 if (recover) {
12516 ctxt->options |= XML_PARSE_RECOVER;
12517 ctxt->recovery = 1;
12518 }
12519
12520 input = xmlNewStringInputStream(ctxt, string);
12521 if (input == NULL)
12522 return(ctxt->errNo);
12523
12524 list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12525 if (listOut != NULL)
12526 *listOut = list;
12527 else
12528 xmlFreeNodeList(list);
12529
12530 ret = ctxt->errNo;
12531
12532 xmlFreeInputStream(input);
12533 xmlFreeParserCtxt(ctxt);
12534 return(ret);
12535 }
12536
12537 /**
12538 * xmlSAXParseEntity:
12539 * @sax: the SAX handler block
12540 * @filename: the filename
12541 *
12542 * DEPRECATED: Don't use.
12543 *
12544 * parse an XML external entity out of context and build a tree.
12545 * It use the given SAX function block to handle the parsing callback.
12546 * If sax is NULL, fallback to the default DOM tree building routines.
12547 *
12548 * [78] extParsedEnt ::= TextDecl? content
12549 *
12550 * This correspond to a "Well Balanced" chunk
12551 *
12552 * Returns the resulting document tree
12553 */
12554
12555 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)12556 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12557 xmlDocPtr ret;
12558 xmlParserCtxtPtr ctxt;
12559
12560 ctxt = xmlCreateFileParserCtxt(filename);
12561 if (ctxt == NULL) {
12562 return(NULL);
12563 }
12564 if (sax != NULL) {
12565 if (sax->initialized == XML_SAX2_MAGIC) {
12566 *ctxt->sax = *sax;
12567 } else {
12568 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12569 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12570 }
12571 ctxt->userData = NULL;
12572 }
12573
12574 xmlParseExtParsedEnt(ctxt);
12575
12576 if (ctxt->wellFormed) {
12577 ret = ctxt->myDoc;
12578 } else {
12579 ret = NULL;
12580 xmlFreeDoc(ctxt->myDoc);
12581 }
12582
12583 xmlFreeParserCtxt(ctxt);
12584
12585 return(ret);
12586 }
12587
12588 /**
12589 * xmlParseEntity:
12590 * @filename: the filename
12591 *
12592 * parse an XML external entity out of context and build a tree.
12593 *
12594 * [78] extParsedEnt ::= TextDecl? content
12595 *
12596 * This correspond to a "Well Balanced" chunk
12597 *
12598 * Returns the resulting document tree
12599 */
12600
12601 xmlDocPtr
xmlParseEntity(const char * filename)12602 xmlParseEntity(const char *filename) {
12603 return(xmlSAXParseEntity(NULL, filename));
12604 }
12605 #endif /* LIBXML_SAX1_ENABLED */
12606
12607 /**
12608 * xmlCreateEntityParserCtxt:
12609 * @URL: the entity URL
12610 * @ID: the entity PUBLIC ID
12611 * @base: a possible base for the target URI
12612 *
12613 * DEPRECATED: Don't use.
12614 *
12615 * Create a parser context for an external entity
12616 * Automatic support for ZLIB/Compress compressed document is provided
12617 * by default if found at compile-time.
12618 *
12619 * Returns the new parser context or NULL
12620 */
12621 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)12622 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12623 const xmlChar *base) {
12624 xmlParserCtxtPtr ctxt;
12625 xmlParserInputPtr input;
12626 xmlChar *uri = NULL;
12627
12628 ctxt = xmlNewParserCtxt();
12629 if (ctxt == NULL)
12630 return(NULL);
12631
12632 if (base != NULL) {
12633 if (xmlBuildURISafe(URL, base, &uri) < 0)
12634 goto error;
12635 if (uri != NULL)
12636 URL = uri;
12637 }
12638
12639 input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12640 XML_RESOURCE_UNKNOWN);
12641 if (input == NULL)
12642 goto error;
12643
12644 if (inputPush(ctxt, input) < 0)
12645 goto error;
12646
12647 xmlFree(uri);
12648 return(ctxt);
12649
12650 error:
12651 xmlFree(uri);
12652 xmlFreeParserCtxt(ctxt);
12653 return(NULL);
12654 }
12655
12656 /************************************************************************
12657 * *
12658 * Front ends when parsing from a file *
12659 * *
12660 ************************************************************************/
12661
12662 /**
12663 * xmlCreateURLParserCtxt:
12664 * @filename: the filename or URL
12665 * @options: a combination of xmlParserOption
12666 *
12667 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12668 *
12669 * Create a parser context for a file or URL content.
12670 * Automatic support for ZLIB/Compress compressed document is provided
12671 * by default if found at compile-time and for file accesses
12672 *
12673 * Returns the new parser context or NULL
12674 */
12675 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)12676 xmlCreateURLParserCtxt(const char *filename, int options)
12677 {
12678 xmlParserCtxtPtr ctxt;
12679 xmlParserInputPtr input;
12680
12681 ctxt = xmlNewParserCtxt();
12682 if (ctxt == NULL)
12683 return(NULL);
12684
12685 xmlCtxtUseOptions(ctxt, options);
12686 ctxt->linenumbers = 1;
12687
12688 input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12689 if (input == NULL) {
12690 xmlFreeParserCtxt(ctxt);
12691 return(NULL);
12692 }
12693 inputPush(ctxt, input);
12694
12695 return(ctxt);
12696 }
12697
12698 /**
12699 * xmlCreateFileParserCtxt:
12700 * @filename: the filename
12701 *
12702 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12703 *
12704 * Create a parser context for a file content.
12705 * Automatic support for ZLIB/Compress compressed document is provided
12706 * by default if found at compile-time.
12707 *
12708 * Returns the new parser context or NULL
12709 */
12710 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)12711 xmlCreateFileParserCtxt(const char *filename)
12712 {
12713 return(xmlCreateURLParserCtxt(filename, 0));
12714 }
12715
12716 #ifdef LIBXML_SAX1_ENABLED
12717 /**
12718 * xmlSAXParseFileWithData:
12719 * @sax: the SAX handler block
12720 * @filename: the filename
12721 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12722 * documents
12723 * @data: the userdata
12724 *
12725 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12726 *
12727 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12728 * compressed document is provided by default if found at compile-time.
12729 * It use the given SAX function block to handle the parsing callback.
12730 * If sax is NULL, fallback to the default DOM tree building routines.
12731 *
12732 * User data (void *) is stored within the parser context in the
12733 * context's _private member, so it is available nearly everywhere in libxml
12734 *
12735 * Returns the resulting document tree
12736 */
12737
12738 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)12739 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12740 int recovery, void *data) {
12741 xmlDocPtr ret;
12742 xmlParserCtxtPtr ctxt;
12743 xmlParserInputPtr input;
12744
12745 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12746 if (ctxt == NULL)
12747 return(NULL);
12748
12749 if (data != NULL)
12750 ctxt->_private = data;
12751
12752 if (recovery) {
12753 ctxt->options |= XML_PARSE_RECOVER;
12754 ctxt->recovery = 1;
12755 }
12756
12757 input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12758
12759 ret = xmlCtxtParseDocument(ctxt, input);
12760
12761 xmlFreeParserCtxt(ctxt);
12762 return(ret);
12763 }
12764
12765 /**
12766 * xmlSAXParseFile:
12767 * @sax: the SAX handler block
12768 * @filename: the filename
12769 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12770 * documents
12771 *
12772 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12773 *
12774 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12775 * compressed document is provided by default if found at compile-time.
12776 * It use the given SAX function block to handle the parsing callback.
12777 * If sax is NULL, fallback to the default DOM tree building routines.
12778 *
12779 * Returns the resulting document tree
12780 */
12781
12782 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)12783 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12784 int recovery) {
12785 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12786 }
12787
12788 /**
12789 * xmlRecoverDoc:
12790 * @cur: a pointer to an array of xmlChar
12791 *
12792 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12793 *
12794 * parse an XML in-memory document and build a tree.
12795 * In the case the document is not Well Formed, a attempt to build a
12796 * tree is tried anyway
12797 *
12798 * Returns the resulting document tree or NULL in case of failure
12799 */
12800
12801 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)12802 xmlRecoverDoc(const xmlChar *cur) {
12803 return(xmlSAXParseDoc(NULL, cur, 1));
12804 }
12805
12806 /**
12807 * xmlParseFile:
12808 * @filename: the filename
12809 *
12810 * DEPRECATED: Use xmlReadFile.
12811 *
12812 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12813 * compressed document is provided by default if found at compile-time.
12814 *
12815 * Returns the resulting document tree if the file was wellformed,
12816 * NULL otherwise.
12817 */
12818
12819 xmlDocPtr
xmlParseFile(const char * filename)12820 xmlParseFile(const char *filename) {
12821 return(xmlSAXParseFile(NULL, filename, 0));
12822 }
12823
12824 /**
12825 * xmlRecoverFile:
12826 * @filename: the filename
12827 *
12828 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12829 *
12830 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12831 * compressed document is provided by default if found at compile-time.
12832 * In the case the document is not Well Formed, it attempts to build
12833 * a tree anyway
12834 *
12835 * Returns the resulting document tree or NULL in case of failure
12836 */
12837
12838 xmlDocPtr
xmlRecoverFile(const char * filename)12839 xmlRecoverFile(const char *filename) {
12840 return(xmlSAXParseFile(NULL, filename, 1));
12841 }
12842
12843
12844 /**
12845 * xmlSetupParserForBuffer:
12846 * @ctxt: an XML parser context
12847 * @buffer: a xmlChar * buffer
12848 * @filename: a file name
12849 *
12850 * DEPRECATED: Don't use.
12851 *
12852 * Setup the parser context to parse a new buffer; Clears any prior
12853 * contents from the parser context. The buffer parameter must not be
12854 * NULL, but the filename parameter can be
12855 */
12856 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)12857 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12858 const char* filename)
12859 {
12860 xmlParserInputPtr input;
12861
12862 if ((ctxt == NULL) || (buffer == NULL))
12863 return;
12864
12865 xmlClearParserCtxt(ctxt);
12866
12867 input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12868 if (input == NULL)
12869 return;
12870 inputPush(ctxt, input);
12871 }
12872
12873 /**
12874 * xmlSAXUserParseFile:
12875 * @sax: a SAX handler
12876 * @user_data: The user data returned on SAX callbacks
12877 * @filename: a file name
12878 *
12879 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12880 *
12881 * parse an XML file and call the given SAX handler routines.
12882 * Automatic support for ZLIB/Compress compressed document is provided
12883 *
12884 * Returns 0 in case of success or a error number otherwise
12885 */
12886 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)12887 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12888 const char *filename) {
12889 int ret = 0;
12890 xmlParserCtxtPtr ctxt;
12891
12892 ctxt = xmlCreateFileParserCtxt(filename);
12893 if (ctxt == NULL) return -1;
12894 if (sax != NULL) {
12895 if (sax->initialized == XML_SAX2_MAGIC) {
12896 *ctxt->sax = *sax;
12897 } else {
12898 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12899 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12900 }
12901 ctxt->userData = user_data;
12902 }
12903
12904 xmlParseDocument(ctxt);
12905
12906 if (ctxt->wellFormed)
12907 ret = 0;
12908 else {
12909 if (ctxt->errNo != 0)
12910 ret = ctxt->errNo;
12911 else
12912 ret = -1;
12913 }
12914 if (ctxt->myDoc != NULL) {
12915 xmlFreeDoc(ctxt->myDoc);
12916 ctxt->myDoc = NULL;
12917 }
12918 xmlFreeParserCtxt(ctxt);
12919
12920 return ret;
12921 }
12922 #endif /* LIBXML_SAX1_ENABLED */
12923
12924 /************************************************************************
12925 * *
12926 * Front ends when parsing from memory *
12927 * *
12928 ************************************************************************/
12929
12930 /**
12931 * xmlCreateMemoryParserCtxt:
12932 * @buffer: a pointer to a char array
12933 * @size: the size of the array
12934 *
12935 * Create a parser context for an XML in-memory document. The input buffer
12936 * must not contain a terminating null byte.
12937 *
12938 * Returns the new parser context or NULL
12939 */
12940 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)12941 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12942 xmlParserCtxtPtr ctxt;
12943 xmlParserInputPtr input;
12944
12945 if (size < 0)
12946 return(NULL);
12947
12948 ctxt = xmlNewParserCtxt();
12949 if (ctxt == NULL)
12950 return(NULL);
12951
12952 input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12953 if (input == NULL) {
12954 xmlFreeParserCtxt(ctxt);
12955 return(NULL);
12956 }
12957 inputPush(ctxt, input);
12958
12959 return(ctxt);
12960 }
12961
12962 #ifdef LIBXML_SAX1_ENABLED
12963 /**
12964 * xmlSAXParseMemoryWithData:
12965 * @sax: the SAX handler block
12966 * @buffer: an pointer to a char array
12967 * @size: the size of the array
12968 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12969 * documents
12970 * @data: the userdata
12971 *
12972 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12973 *
12974 * parse an XML in-memory block and use the given SAX function block
12975 * to handle the parsing callback. If sax is NULL, fallback to the default
12976 * DOM tree building routines.
12977 *
12978 * User data (void *) is stored within the parser context in the
12979 * context's _private member, so it is available nearly everywhere in libxml
12980 *
12981 * Returns the resulting document tree
12982 */
12983
12984 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)12985 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12986 int size, int recovery, void *data) {
12987 xmlDocPtr ret;
12988 xmlParserCtxtPtr ctxt;
12989 xmlParserInputPtr input;
12990
12991 if (size < 0)
12992 return(NULL);
12993
12994 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12995 if (ctxt == NULL)
12996 return(NULL);
12997
12998 if (data != NULL)
12999 ctxt->_private=data;
13000
13001 if (recovery) {
13002 ctxt->options |= XML_PARSE_RECOVER;
13003 ctxt->recovery = 1;
13004 }
13005
13006 input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
13007 XML_INPUT_BUF_STATIC);
13008
13009 ret = xmlCtxtParseDocument(ctxt, input);
13010
13011 xmlFreeParserCtxt(ctxt);
13012 return(ret);
13013 }
13014
13015 /**
13016 * xmlSAXParseMemory:
13017 * @sax: the SAX handler block
13018 * @buffer: an pointer to a char array
13019 * @size: the size of the array
13020 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13021 * documents
13022 *
13023 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13024 *
13025 * parse an XML in-memory block and use the given SAX function block
13026 * to handle the parsing callback. If sax is NULL, fallback to the default
13027 * DOM tree building routines.
13028 *
13029 * Returns the resulting document tree
13030 */
13031 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13032 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13033 int size, int recovery) {
13034 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13035 }
13036
13037 /**
13038 * xmlParseMemory:
13039 * @buffer: an pointer to a char array
13040 * @size: the size of the array
13041 *
13042 * DEPRECATED: Use xmlReadMemory.
13043 *
13044 * parse an XML in-memory block and build a tree.
13045 *
13046 * Returns the resulting document tree
13047 */
13048
xmlParseMemory(const char * buffer,int size)13049 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13050 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13051 }
13052
13053 /**
13054 * xmlRecoverMemory:
13055 * @buffer: an pointer to a char array
13056 * @size: the size of the array
13057 *
13058 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13059 *
13060 * parse an XML in-memory block and build a tree.
13061 * In the case the document is not Well Formed, an attempt to
13062 * build a tree is tried anyway
13063 *
13064 * Returns the resulting document tree or NULL in case of error
13065 */
13066
xmlRecoverMemory(const char * buffer,int size)13067 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13068 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13069 }
13070
13071 /**
13072 * xmlSAXUserParseMemory:
13073 * @sax: a SAX handler
13074 * @user_data: The user data returned on SAX callbacks
13075 * @buffer: an in-memory XML document input
13076 * @size: the length of the XML document in bytes
13077 *
13078 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13079 *
13080 * parse an XML in-memory buffer and call the given SAX handler routines.
13081 *
13082 * Returns 0 in case of success or a error number otherwise
13083 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13084 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13085 const char *buffer, int size) {
13086 int ret = 0;
13087 xmlParserCtxtPtr ctxt;
13088
13089 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13090 if (ctxt == NULL) return -1;
13091 if (sax != NULL) {
13092 if (sax->initialized == XML_SAX2_MAGIC) {
13093 *ctxt->sax = *sax;
13094 } else {
13095 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13096 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13097 }
13098 ctxt->userData = user_data;
13099 }
13100
13101 xmlParseDocument(ctxt);
13102
13103 if (ctxt->wellFormed)
13104 ret = 0;
13105 else {
13106 if (ctxt->errNo != 0)
13107 ret = ctxt->errNo;
13108 else
13109 ret = -1;
13110 }
13111 if (ctxt->myDoc != NULL) {
13112 xmlFreeDoc(ctxt->myDoc);
13113 ctxt->myDoc = NULL;
13114 }
13115 xmlFreeParserCtxt(ctxt);
13116
13117 return ret;
13118 }
13119 #endif /* LIBXML_SAX1_ENABLED */
13120
13121 /**
13122 * xmlCreateDocParserCtxt:
13123 * @str: a pointer to an array of xmlChar
13124 *
13125 * Creates a parser context for an XML in-memory document.
13126 *
13127 * Returns the new parser context or NULL
13128 */
13129 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * str)13130 xmlCreateDocParserCtxt(const xmlChar *str) {
13131 xmlParserCtxtPtr ctxt;
13132 xmlParserInputPtr input;
13133
13134 ctxt = xmlNewParserCtxt();
13135 if (ctxt == NULL)
13136 return(NULL);
13137
13138 input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13139 if (input == NULL) {
13140 xmlFreeParserCtxt(ctxt);
13141 return(NULL);
13142 }
13143 inputPush(ctxt, input);
13144
13145 return(ctxt);
13146 }
13147
13148 #ifdef LIBXML_SAX1_ENABLED
13149 /**
13150 * xmlSAXParseDoc:
13151 * @sax: the SAX handler block
13152 * @cur: a pointer to an array of xmlChar
13153 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13154 * documents
13155 *
13156 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13157 *
13158 * parse an XML in-memory document and build a tree.
13159 * It use the given SAX function block to handle the parsing callback.
13160 * If sax is NULL, fallback to the default DOM tree building routines.
13161 *
13162 * Returns the resulting document tree
13163 */
13164
13165 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)13166 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13167 xmlDocPtr ret;
13168 xmlParserCtxtPtr ctxt;
13169 xmlSAXHandlerPtr oldsax = NULL;
13170
13171 if (cur == NULL) return(NULL);
13172
13173
13174 ctxt = xmlCreateDocParserCtxt(cur);
13175 if (ctxt == NULL) return(NULL);
13176 if (sax != NULL) {
13177 oldsax = ctxt->sax;
13178 ctxt->sax = sax;
13179 ctxt->userData = NULL;
13180 }
13181
13182 xmlParseDocument(ctxt);
13183 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13184 else {
13185 ret = NULL;
13186 xmlFreeDoc(ctxt->myDoc);
13187 ctxt->myDoc = NULL;
13188 }
13189 if (sax != NULL)
13190 ctxt->sax = oldsax;
13191 xmlFreeParserCtxt(ctxt);
13192
13193 return(ret);
13194 }
13195
13196 /**
13197 * xmlParseDoc:
13198 * @cur: a pointer to an array of xmlChar
13199 *
13200 * DEPRECATED: Use xmlReadDoc.
13201 *
13202 * parse an XML in-memory document and build a tree.
13203 *
13204 * Returns the resulting document tree
13205 */
13206
13207 xmlDocPtr
xmlParseDoc(const xmlChar * cur)13208 xmlParseDoc(const xmlChar *cur) {
13209 return(xmlSAXParseDoc(NULL, cur, 0));
13210 }
13211 #endif /* LIBXML_SAX1_ENABLED */
13212
13213 /************************************************************************
13214 * *
13215 * New set (2.6.0) of simpler and more flexible APIs *
13216 * *
13217 ************************************************************************/
13218
13219 /**
13220 * DICT_FREE:
13221 * @str: a string
13222 *
13223 * Free a string if it is not owned by the "dict" dictionary in the
13224 * current scope
13225 */
13226 #define DICT_FREE(str) \
13227 if ((str) && ((!dict) || \
13228 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13229 xmlFree((char *)(str));
13230
13231 /**
13232 * xmlCtxtReset:
13233 * @ctxt: an XML parser context
13234 *
13235 * Reset a parser context
13236 */
13237 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)13238 xmlCtxtReset(xmlParserCtxtPtr ctxt)
13239 {
13240 xmlParserInputPtr input;
13241 xmlDictPtr dict;
13242
13243 if (ctxt == NULL)
13244 return;
13245
13246 dict = ctxt->dict;
13247
13248 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13249 xmlFreeInputStream(input);
13250 }
13251 ctxt->inputNr = 0;
13252 ctxt->input = NULL;
13253
13254 ctxt->spaceNr = 0;
13255 if (ctxt->spaceTab != NULL) {
13256 ctxt->spaceTab[0] = -1;
13257 ctxt->space = &ctxt->spaceTab[0];
13258 } else {
13259 ctxt->space = NULL;
13260 }
13261
13262
13263 ctxt->nodeNr = 0;
13264 ctxt->node = NULL;
13265
13266 ctxt->nameNr = 0;
13267 ctxt->name = NULL;
13268
13269 ctxt->nsNr = 0;
13270 xmlParserNsReset(ctxt->nsdb);
13271
13272 DICT_FREE(ctxt->version);
13273 ctxt->version = NULL;
13274 DICT_FREE(ctxt->encoding);
13275 ctxt->encoding = NULL;
13276 DICT_FREE(ctxt->extSubURI);
13277 ctxt->extSubURI = NULL;
13278 DICT_FREE(ctxt->extSubSystem);
13279 ctxt->extSubSystem = NULL;
13280 if (ctxt->myDoc != NULL)
13281 xmlFreeDoc(ctxt->myDoc);
13282 ctxt->myDoc = NULL;
13283
13284 ctxt->standalone = -1;
13285 ctxt->hasExternalSubset = 0;
13286 ctxt->hasPErefs = 0;
13287 ctxt->html = 0;
13288 ctxt->instate = XML_PARSER_START;
13289
13290 ctxt->wellFormed = 1;
13291 ctxt->nsWellFormed = 1;
13292 ctxt->disableSAX = 0;
13293 ctxt->valid = 1;
13294 #if 0
13295 ctxt->vctxt.userData = ctxt;
13296 ctxt->vctxt.error = xmlParserValidityError;
13297 ctxt->vctxt.warning = xmlParserValidityWarning;
13298 #endif
13299 ctxt->record_info = 0;
13300 ctxt->checkIndex = 0;
13301 ctxt->endCheckState = 0;
13302 ctxt->inSubset = 0;
13303 ctxt->errNo = XML_ERR_OK;
13304 ctxt->depth = 0;
13305 ctxt->catalogs = NULL;
13306 ctxt->sizeentities = 0;
13307 ctxt->sizeentcopy = 0;
13308 xmlInitNodeInfoSeq(&ctxt->node_seq);
13309
13310 if (ctxt->attsDefault != NULL) {
13311 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13312 ctxt->attsDefault = NULL;
13313 }
13314 if (ctxt->attsSpecial != NULL) {
13315 xmlHashFree(ctxt->attsSpecial, NULL);
13316 ctxt->attsSpecial = NULL;
13317 }
13318
13319 #ifdef LIBXML_CATALOG_ENABLED
13320 if (ctxt->catalogs != NULL)
13321 xmlCatalogFreeLocal(ctxt->catalogs);
13322 #endif
13323 ctxt->nbErrors = 0;
13324 ctxt->nbWarnings = 0;
13325 if (ctxt->lastError.code != XML_ERR_OK)
13326 xmlResetError(&ctxt->lastError);
13327 }
13328
13329 /**
13330 * xmlCtxtResetPush:
13331 * @ctxt: an XML parser context
13332 * @chunk: a pointer to an array of chars
13333 * @size: number of chars in the array
13334 * @filename: an optional file name or URI
13335 * @encoding: the document encoding, or NULL
13336 *
13337 * Reset a push parser context
13338 *
13339 * Returns 0 in case of success and 1 in case of error
13340 */
13341 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)13342 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13343 int size, const char *filename, const char *encoding)
13344 {
13345 xmlParserInputPtr input;
13346
13347 if (ctxt == NULL)
13348 return(1);
13349
13350 xmlCtxtReset(ctxt);
13351
13352 input = xmlInputCreatePush(filename, chunk, size);
13353 if (input == NULL)
13354 return(1);
13355
13356 inputPush(ctxt, input);
13357
13358 if (encoding != NULL)
13359 xmlSwitchEncodingName(ctxt, encoding);
13360
13361 return(0);
13362 }
13363
13364 static int
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt,int options,int keepMask)13365 xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13366 {
13367 int allMask;
13368
13369 if (ctxt == NULL)
13370 return(-1);
13371
13372 /*
13373 * XInclude options aren't handled by the parser.
13374 *
13375 * XML_PARSE_XINCLUDE
13376 * XML_PARSE_NOXINCNODE
13377 * XML_PARSE_NOBASEFIX
13378 */
13379 allMask = XML_PARSE_RECOVER |
13380 XML_PARSE_NOENT |
13381 XML_PARSE_DTDLOAD |
13382 XML_PARSE_DTDATTR |
13383 XML_PARSE_DTDVALID |
13384 XML_PARSE_NOERROR |
13385 XML_PARSE_NOWARNING |
13386 XML_PARSE_PEDANTIC |
13387 XML_PARSE_NOBLANKS |
13388 #ifdef LIBXML_SAX1_ENABLED
13389 XML_PARSE_SAX1 |
13390 #endif
13391 XML_PARSE_NONET |
13392 XML_PARSE_NODICT |
13393 XML_PARSE_NSCLEAN |
13394 XML_PARSE_NOCDATA |
13395 XML_PARSE_COMPACT |
13396 XML_PARSE_OLD10 |
13397 XML_PARSE_HUGE |
13398 XML_PARSE_OLDSAX |
13399 XML_PARSE_IGNORE_ENC |
13400 XML_PARSE_BIG_LINES |
13401 XML_PARSE_NO_XXE;
13402
13403 ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13404
13405 /*
13406 * For some options, struct members are historically the source
13407 * of truth. The values are initalized from global variables and
13408 * old code could also modify them directly. Several older API
13409 * functions that don't take an options argument rely on these
13410 * deprecated mechanisms.
13411 *
13412 * Once public access to struct members and the globals are
13413 * disabled, we can use the options bitmask as source of
13414 * truth, making all these struct members obsolete.
13415 *
13416 * The XML_DETECT_IDS flags is misnamed. It simply enables
13417 * loading of the external subset.
13418 */
13419 ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13420 ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13421 ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13422 ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13423 ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13424 ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13425 ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13426 ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13427
13428 /*
13429 * Changing SAX callbacks is a bad idea. This should be fixed.
13430 */
13431 if (options & XML_PARSE_NOBLANKS) {
13432 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13433 }
13434 if (options & XML_PARSE_NOCDATA) {
13435 ctxt->sax->cdataBlock = NULL;
13436 }
13437 if (options & XML_PARSE_HUGE) {
13438 if (ctxt->dict != NULL)
13439 xmlDictSetLimit(ctxt->dict, 0);
13440 }
13441
13442 ctxt->linenumbers = 1;
13443
13444 return(options & ~allMask);
13445 }
13446
13447 /**
13448 * xmlCtxtSetOptions:
13449 * @ctxt: an XML parser context
13450 * @options: a bitmask of xmlParserOption values
13451 *
13452 * Applies the options to the parser context. Unset options are
13453 * cleared.
13454 *
13455 * Available since 2.13.0. With older versions, you can use
13456 * xmlCtxtUseOptions.
13457 *
13458 * XML_PARSE_RECOVER
13459 *
13460 * Enable "recovery" mode which allows non-wellformed documents.
13461 * How this mode behaves exactly is unspecified and may change
13462 * without further notice. Use of this feature is DISCOURAGED.
13463 *
13464 * XML_PARSE_NOENT
13465 *
13466 * Despite the confusing name, this option enables substitution
13467 * of entities. The resulting tree won't contain any entity
13468 * reference nodes.
13469 *
13470 * This option also enables loading of external entities (both
13471 * general and parameter entities) which is dangerous. If you
13472 * process untrusted data, it's recommended to set the
13473 * XML_PARSE_NO_XXE option to disable loading of external
13474 * entities.
13475 *
13476 * XML_PARSE_DTDLOAD
13477 *
13478 * Enables loading of an external DTD and the loading and
13479 * substitution of external parameter entities. Has no effect
13480 * if XML_PARSE_NO_XXE is set.
13481 *
13482 * XML_PARSE_DTDATTR
13483 *
13484 * Adds default attributes from the DTD to the result document.
13485 *
13486 * Implies XML_PARSE_DTDLOAD, but loading of external content
13487 * can be disabled with XML_PARSE_NO_XXE.
13488 *
13489 * XML_PARSE_DTDVALID
13490 *
13491 * This option enables DTD validation which requires to load
13492 * external DTDs and external entities (both general and
13493 * parameter entities) unless XML_PARSE_NO_XXE was set.
13494 *
13495 * XML_PARSE_NO_XXE
13496 *
13497 * Disables loading of external DTDs or entities.
13498 *
13499 * XML_PARSE_NOERROR
13500 *
13501 * Disable error and warning reports to the error handlers.
13502 * Errors are still accessible with xmlCtxtGetLastError.
13503 *
13504 * XML_PARSE_NOWARNING
13505 *
13506 * Disable warning reports.
13507 *
13508 * XML_PARSE_PEDANTIC
13509 *
13510 * Enable some pedantic warnings.
13511 *
13512 * XML_PARSE_NOBLANKS
13513 *
13514 * Remove some text nodes containing only whitespace from the
13515 * result document. Which nodes are removed depends on DTD
13516 * element declarations or a conservative heuristic. The
13517 * reindenting feature of the serialization code relies on this
13518 * option to be set when parsing. Use of this option is
13519 * DISCOURAGED.
13520 *
13521 * XML_PARSE_SAX1
13522 *
13523 * Always invoke the deprecated SAX1 startElement and endElement
13524 * handlers. This option is DEPRECATED.
13525 *
13526 * XML_PARSE_NONET
13527 *
13528 * Disable network access with the builtin HTTP client.
13529 *
13530 * XML_PARSE_NODICT
13531 *
13532 * Create a document without interned strings, making all
13533 * strings separate memory allocations.
13534 *
13535 * XML_PARSE_NSCLEAN
13536 *
13537 * Remove redundant namespace declarations from the result
13538 * document.
13539 *
13540 * XML_PARSE_NOCDATA
13541 *
13542 * Output normal text nodes instead of CDATA nodes.
13543 *
13544 * XML_PARSE_COMPACT
13545 *
13546 * Store small strings directly in the node struct to save
13547 * memory.
13548 *
13549 * XML_PARSE_OLD10
13550 *
13551 * Use old Name productions from before XML 1.0 Fifth Edition.
13552 * This options is DEPRECATED.
13553 *
13554 * XML_PARSE_HUGE
13555 *
13556 * Relax some internal limits.
13557 *
13558 * Maximum size of text nodes, tags, comments, processing instructions,
13559 * CDATA sections, entity values
13560 *
13561 * normal: 10M
13562 * huge: 1B
13563 *
13564 * Maximum size of names, system literals, pubid literals
13565 *
13566 * normal: 50K
13567 * huge: 10M
13568 *
13569 * Maximum nesting depth of elements
13570 *
13571 * normal: 256
13572 * huge: 2048
13573 *
13574 * Maximum nesting depth of entities
13575 *
13576 * normal: 20
13577 * huge: 40
13578 *
13579 * XML_PARSE_OLDSAX
13580 *
13581 * Enable an unspecified legacy mode for SAX parsers. This
13582 * option is DEPRECATED.
13583 *
13584 * XML_PARSE_IGNORE_ENC
13585 *
13586 * Ignore the encoding in the XML declaration. This option is
13587 * mostly unneeded these days. The only effect is to enforce
13588 * UTF-8 decoding of ASCII-like data.
13589 *
13590 * XML_PARSE_BIG_LINES
13591 *
13592 * Enable reporting of line numbers larger than 65535.
13593 *
13594 * XML_PARSE_NO_UNZIP
13595 *
13596 * Disables input decompression. Setting this option is recommended
13597 * to avoid zip bombs.
13598 *
13599 * Available since 2.14.0.
13600 *
13601 * Returns 0 in case of success, the set of unknown or unimplemented options
13602 * in case of error.
13603 */
13604 int
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt,int options)13605 xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13606 {
13607 return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13608 }
13609
13610 /**
13611 * xmlCtxtUseOptions:
13612 * @ctxt: an XML parser context
13613 * @options: a combination of xmlParserOption
13614 *
13615 * DEPRECATED: Use xmlCtxtSetOptions.
13616 *
13617 * Applies the options to the parser context. The following options
13618 * are never cleared and can only be enabled:
13619 *
13620 * XML_PARSE_NOERROR
13621 * XML_PARSE_NOWARNING
13622 * XML_PARSE_NONET
13623 * XML_PARSE_NSCLEAN
13624 * XML_PARSE_NOCDATA
13625 * XML_PARSE_COMPACT
13626 * XML_PARSE_OLD10
13627 * XML_PARSE_HUGE
13628 * XML_PARSE_OLDSAX
13629 * XML_PARSE_IGNORE_ENC
13630 * XML_PARSE_BIG_LINES
13631 *
13632 * Returns 0 in case of success, the set of unknown or unimplemented options
13633 * in case of error.
13634 */
13635 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)13636 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13637 {
13638 int keepMask;
13639
13640 /*
13641 * For historic reasons, some options can only be enabled.
13642 */
13643 keepMask = XML_PARSE_NOERROR |
13644 XML_PARSE_NOWARNING |
13645 XML_PARSE_NONET |
13646 XML_PARSE_NSCLEAN |
13647 XML_PARSE_NOCDATA |
13648 XML_PARSE_COMPACT |
13649 XML_PARSE_OLD10 |
13650 XML_PARSE_HUGE |
13651 XML_PARSE_OLDSAX |
13652 XML_PARSE_IGNORE_ENC |
13653 XML_PARSE_BIG_LINES;
13654
13655 return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13656 }
13657
13658 /**
13659 * xmlCtxtSetMaxAmplification:
13660 * @ctxt: an XML parser context
13661 * @maxAmpl: maximum amplification factor
13662 *
13663 * To protect against exponential entity expansion ("billion laughs"), the
13664 * size of serialized output is (roughly) limited to the input size
13665 * multiplied by this factor. The default value is 5.
13666 *
13667 * When working with documents making heavy use of entity expansion, it can
13668 * be necessary to increase the value. For security reasons, this should only
13669 * be considered when processing trusted input.
13670 */
13671 void
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,unsigned maxAmpl)13672 xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13673 {
13674 ctxt->maxAmpl = maxAmpl;
13675 }
13676
13677 /**
13678 * xmlCtxtParseDocument:
13679 * @ctxt: an XML parser context
13680 * @input: parser input
13681 *
13682 * Parse an XML document and return the resulting document tree.
13683 * Takes ownership of the input object.
13684 *
13685 * Available since 2.13.0.
13686 *
13687 * Returns the resulting document tree or NULL
13688 */
13689 xmlDocPtr
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)13690 xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13691 {
13692 xmlDocPtr ret = NULL;
13693
13694 if ((ctxt == NULL) || (input == NULL))
13695 return(NULL);
13696
13697 /* assert(ctxt->inputNr == 0); */
13698 while (ctxt->inputNr > 0)
13699 xmlFreeInputStream(inputPop(ctxt));
13700
13701 if (inputPush(ctxt, input) < 0) {
13702 xmlFreeInputStream(input);
13703 return(NULL);
13704 }
13705
13706 xmlParseDocument(ctxt);
13707
13708 if ((ctxt->wellFormed) ||
13709 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13710 ret = ctxt->myDoc;
13711 } else {
13712 if (ctxt->errNo == XML_ERR_OK)
13713 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13714
13715 ret = NULL;
13716 xmlFreeDoc(ctxt->myDoc);
13717 }
13718 ctxt->myDoc = NULL;
13719
13720 /* assert(ctxt->inputNr == 1); */
13721 while (ctxt->inputNr > 0)
13722 xmlFreeInputStream(inputPop(ctxt));
13723
13724 return(ret);
13725 }
13726
13727 /**
13728 * xmlReadDoc:
13729 * @cur: a pointer to a zero terminated string
13730 * @URL: base URL (optional)
13731 * @encoding: the document encoding (optional)
13732 * @options: a combination of xmlParserOption
13733 *
13734 * Convenience function to parse an XML document from a
13735 * zero-terminated string.
13736 *
13737 * See xmlCtxtReadDoc for details.
13738 *
13739 * Returns the resulting document tree
13740 */
13741 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)13742 xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13743 int options)
13744 {
13745 xmlParserCtxtPtr ctxt;
13746 xmlParserInputPtr input;
13747 xmlDocPtr doc;
13748
13749 ctxt = xmlNewParserCtxt();
13750 if (ctxt == NULL)
13751 return(NULL);
13752
13753 xmlCtxtUseOptions(ctxt, options);
13754
13755 input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13756 XML_INPUT_BUF_STATIC);
13757
13758 doc = xmlCtxtParseDocument(ctxt, input);
13759
13760 xmlFreeParserCtxt(ctxt);
13761 return(doc);
13762 }
13763
13764 /**
13765 * xmlReadFile:
13766 * @filename: a file or URL
13767 * @encoding: the document encoding (optional)
13768 * @options: a combination of xmlParserOption
13769 *
13770 * Convenience function to parse an XML file from the filesystem,
13771 * the network or a global user-define resource loader.
13772 *
13773 * See xmlCtxtReadFile for details.
13774 *
13775 * Returns the resulting document tree
13776 */
13777 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)13778 xmlReadFile(const char *filename, const char *encoding, int options)
13779 {
13780 xmlParserCtxtPtr ctxt;
13781 xmlParserInputPtr input;
13782 xmlDocPtr doc;
13783
13784 ctxt = xmlNewParserCtxt();
13785 if (ctxt == NULL)
13786 return(NULL);
13787
13788 xmlCtxtUseOptions(ctxt, options);
13789
13790 input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13791
13792 doc = xmlCtxtParseDocument(ctxt, input);
13793
13794 xmlFreeParserCtxt(ctxt);
13795 return(doc);
13796 }
13797
13798 /**
13799 * xmlReadMemory:
13800 * @buffer: a pointer to a char array
13801 * @size: the size of the array
13802 * @url: base URL (optional)
13803 * @encoding: the document encoding (optional)
13804 * @options: a combination of xmlParserOption
13805 *
13806 * Parse an XML in-memory document and build a tree. The input buffer must
13807 * not contain a terminating null byte.
13808 *
13809 * See xmlCtxtReadMemory for details.
13810 *
13811 * Returns the resulting document tree
13812 */
13813 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * url,const char * encoding,int options)13814 xmlReadMemory(const char *buffer, int size, const char *url,
13815 const char *encoding, int options)
13816 {
13817 xmlParserCtxtPtr ctxt;
13818 xmlParserInputPtr input;
13819 xmlDocPtr doc;
13820
13821 if (size < 0)
13822 return(NULL);
13823
13824 ctxt = xmlNewParserCtxt();
13825 if (ctxt == NULL)
13826 return(NULL);
13827
13828 xmlCtxtUseOptions(ctxt, options);
13829
13830 input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13831 XML_INPUT_BUF_STATIC);
13832
13833 doc = xmlCtxtParseDocument(ctxt, input);
13834
13835 xmlFreeParserCtxt(ctxt);
13836 return(doc);
13837 }
13838
13839 /**
13840 * xmlReadFd:
13841 * @fd: an open file descriptor
13842 * @URL: base URL (optional)
13843 * @encoding: the document encoding (optional)
13844 * @options: a combination of xmlParserOption
13845 *
13846 * Parse an XML from a file descriptor and build a tree.
13847 *
13848 * See xmlCtxtReadFd for details.
13849 *
13850 * NOTE that the file descriptor will not be closed when the
13851 * context is freed or reset.
13852 *
13853 * Returns the resulting document tree
13854 */
13855 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)13856 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13857 {
13858 xmlParserCtxtPtr ctxt;
13859 xmlParserInputPtr input;
13860 xmlDocPtr doc;
13861
13862 ctxt = xmlNewParserCtxt();
13863 if (ctxt == NULL)
13864 return(NULL);
13865
13866 xmlCtxtUseOptions(ctxt, options);
13867
13868 input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13869
13870 doc = xmlCtxtParseDocument(ctxt, input);
13871
13872 xmlFreeParserCtxt(ctxt);
13873 return(doc);
13874 }
13875
13876 /**
13877 * xmlReadIO:
13878 * @ioread: an I/O read function
13879 * @ioclose: an I/O close function (optional)
13880 * @ioctx: an I/O handler
13881 * @URL: base URL (optional)
13882 * @encoding: the document encoding (optional)
13883 * @options: a combination of xmlParserOption
13884 *
13885 * Parse an XML document from I/O functions and context and build a tree.
13886 *
13887 * See xmlCtxtReadIO for details.
13888 *
13889 * Returns the resulting document tree
13890 */
13891 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)13892 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13893 void *ioctx, const char *URL, const char *encoding, int options)
13894 {
13895 xmlParserCtxtPtr ctxt;
13896 xmlParserInputPtr input;
13897 xmlDocPtr doc;
13898
13899 ctxt = xmlNewParserCtxt();
13900 if (ctxt == NULL)
13901 return(NULL);
13902
13903 xmlCtxtUseOptions(ctxt, options);
13904
13905 input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13906
13907 doc = xmlCtxtParseDocument(ctxt, input);
13908
13909 xmlFreeParserCtxt(ctxt);
13910 return(doc);
13911 }
13912
13913 /**
13914 * xmlCtxtReadDoc:
13915 * @ctxt: an XML parser context
13916 * @str: a pointer to a zero terminated string
13917 * @URL: base URL (optional)
13918 * @encoding: the document encoding (optional)
13919 * @options: a combination of xmlParserOption
13920 *
13921 * Parse an XML in-memory document and build a tree.
13922 *
13923 * @URL is used as base to resolve external entities and for error
13924 * reporting.
13925 *
13926 * See xmlCtxtUseOptions for details.
13927 *
13928 * Returns the resulting document tree
13929 */
13930 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)13931 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13932 const char *URL, const char *encoding, int options)
13933 {
13934 xmlParserInputPtr input;
13935
13936 if (ctxt == NULL)
13937 return(NULL);
13938
13939 xmlCtxtReset(ctxt);
13940 xmlCtxtUseOptions(ctxt, options);
13941
13942 input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13943 XML_INPUT_BUF_STATIC);
13944
13945 return(xmlCtxtParseDocument(ctxt, input));
13946 }
13947
13948 /**
13949 * xmlCtxtReadFile:
13950 * @ctxt: an XML parser context
13951 * @filename: a file or URL
13952 * @encoding: the document encoding (optional)
13953 * @options: a combination of xmlParserOption
13954 *
13955 * Parse an XML file from the filesystem, the network or a user-defined
13956 * resource loader.
13957 *
13958 * Returns the resulting document tree
13959 */
13960 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)13961 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13962 const char *encoding, int options)
13963 {
13964 xmlParserInputPtr input;
13965
13966 if (ctxt == NULL)
13967 return(NULL);
13968
13969 xmlCtxtReset(ctxt);
13970 xmlCtxtUseOptions(ctxt, options);
13971
13972 input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13973
13974 return(xmlCtxtParseDocument(ctxt, input));
13975 }
13976
13977 /**
13978 * xmlCtxtReadMemory:
13979 * @ctxt: an XML parser context
13980 * @buffer: a pointer to a char array
13981 * @size: the size of the array
13982 * @URL: base URL (optional)
13983 * @encoding: the document encoding (optional)
13984 * @options: a combination of xmlParserOption
13985 *
13986 * Parse an XML in-memory document and build a tree. The input buffer must
13987 * not contain a terminating null byte.
13988 *
13989 * @URL is used as base to resolve external entities and for error
13990 * reporting.
13991 *
13992 * See xmlCtxtUseOptions for details.
13993 *
13994 * Returns the resulting document tree
13995 */
13996 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)13997 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13998 const char *URL, const char *encoding, int options)
13999 {
14000 xmlParserInputPtr input;
14001
14002 if ((ctxt == NULL) || (size < 0))
14003 return(NULL);
14004
14005 xmlCtxtReset(ctxt);
14006 xmlCtxtUseOptions(ctxt, options);
14007
14008 input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
14009 XML_INPUT_BUF_STATIC);
14010
14011 return(xmlCtxtParseDocument(ctxt, input));
14012 }
14013
14014 /**
14015 * xmlCtxtReadFd:
14016 * @ctxt: an XML parser context
14017 * @fd: an open file descriptor
14018 * @URL: base URL (optional)
14019 * @encoding: the document encoding (optional)
14020 * @options: a combination of xmlParserOption
14021 *
14022 * Parse an XML document from a file descriptor and build a tree.
14023 *
14024 * NOTE that the file descriptor will not be closed when the
14025 * context is freed or reset.
14026 *
14027 * @URL is used as base to resolve external entities and for error
14028 * reporting.
14029 *
14030 * See xmlCtxtUseOptions for details.
14031 *
14032 * Returns the resulting document tree
14033 */
14034 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14035 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14036 const char *URL, const char *encoding, int options)
14037 {
14038 xmlParserInputPtr input;
14039
14040 if (ctxt == NULL)
14041 return(NULL);
14042
14043 xmlCtxtReset(ctxt);
14044 xmlCtxtUseOptions(ctxt, options);
14045
14046 input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14047
14048 return(xmlCtxtParseDocument(ctxt, input));
14049 }
14050
14051 /**
14052 * xmlCtxtReadIO:
14053 * @ctxt: an XML parser context
14054 * @ioread: an I/O read function
14055 * @ioclose: an I/O close function
14056 * @ioctx: an I/O handler
14057 * @URL: the base URL to use for the document
14058 * @encoding: the document encoding, or NULL
14059 * @options: a combination of xmlParserOption
14060 *
14061 * parse an XML document from I/O functions and source and build a tree.
14062 * This reuses the existing @ctxt parser context
14063 *
14064 * @URL is used as base to resolve external entities and for error
14065 * reporting.
14066 *
14067 * See xmlCtxtUseOptions for details.
14068 *
14069 * Returns the resulting document tree
14070 */
14071 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14072 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14073 xmlInputCloseCallback ioclose, void *ioctx,
14074 const char *URL,
14075 const char *encoding, int options)
14076 {
14077 xmlParserInputPtr input;
14078
14079 if (ctxt == NULL)
14080 return(NULL);
14081
14082 xmlCtxtReset(ctxt);
14083 xmlCtxtUseOptions(ctxt, options);
14084
14085 input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14086
14087 return(xmlCtxtParseDocument(ctxt, input));
14088 }
14089
14090