• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/parser.h>
55 #include <libxml/xmlmemory.h>
56 #include <libxml/tree.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #include <libxml/SAX2.h>
65 #ifdef LIBXML_CATALOG_ENABLED
66 #include <libxml/catalog.h>
67 #endif
68 
69 #include "private/buf.h"
70 #include "private/dict.h"
71 #include "private/entities.h"
72 #include "private/error.h"
73 #include "private/html.h"
74 #include "private/io.h"
75 #include "private/parser.h"
76 
77 #define NS_INDEX_EMPTY  INT_MAX
78 #define NS_INDEX_XML    (INT_MAX - 1)
79 #define URI_HASH_EMPTY  0xD943A04E
80 #define URI_HASH_XML    0xF0451F02
81 
82 struct _xmlStartTag {
83     const xmlChar *prefix;
84     const xmlChar *URI;
85     int line;
86     int nsNr;
87 };
88 
89 typedef struct {
90     void *saxData;
91     unsigned prefixHashValue;
92     unsigned uriHashValue;
93     unsigned elementId;
94     int oldIndex;
95 } xmlParserNsExtra;
96 
97 typedef struct {
98     unsigned hashValue;
99     int index;
100 } xmlParserNsBucket;
101 
102 struct _xmlParserNsData {
103     xmlParserNsExtra *extra;
104 
105     unsigned hashSize;
106     unsigned hashElems;
107     xmlParserNsBucket *hash;
108 
109     unsigned elementId;
110     int defaultNsIndex;
111     int minNsIndex;
112 };
113 
114 struct _xmlAttrHashBucket {
115     int index;
116 };
117 
118 static int
119 xmlParseElementStart(xmlParserCtxtPtr ctxt);
120 
121 static void
122 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123 
124 static xmlEntityPtr
125 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126 
127 static const xmlChar *
128 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129 
130 /************************************************************************
131  *									*
132  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
133  *									*
134  ************************************************************************/
135 
136 #define XML_PARSER_BIG_ENTITY 1000
137 #define XML_PARSER_LOT_ENTITY 5000
138 
139 /*
140  * Constants for protection against abusive entity expansion
141  * ("billion laughs").
142  */
143 
144 /*
145  * A certain amount of entity expansion which is always allowed.
146  */
147 #define XML_PARSER_ALLOWED_EXPANSION 1000000
148 
149 /*
150  * Fixed cost for each entity reference. This crudely models processing time
151  * as well to protect, for example, against exponential expansion of empty
152  * or very short entities.
153  */
154 #define XML_ENT_FIXED_COST 20
155 
156 /**
157  * xmlParserMaxDepth:
158  *
159  * arbitrary depth limit for the XML documents that we allow to
160  * process. This is not a limitation of the parser but a safety
161  * boundary feature. It can be disabled with the XML_PARSE_HUGE
162  * parser option.
163  */
164 const unsigned int xmlParserMaxDepth = 256;
165 
166 
167 
168 #define XML_PARSER_BIG_BUFFER_SIZE 300
169 #define XML_PARSER_BUFFER_SIZE 100
170 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171 
172 /**
173  * XML_PARSER_CHUNK_SIZE
174  *
175  * When calling GROW that's the minimal amount of data
176  * the parser expected to have received. It is not a hard
177  * limit but an optimization when reading strings like Names
178  * It is not strictly needed as long as inputs available characters
179  * are followed by 0, which should be provided by the I/O level
180  */
181 #define XML_PARSER_CHUNK_SIZE 100
182 
183 /**
184  * xmlParserVersion:
185  *
186  * Constant string describing the internal version of the library
187  */
188 const char *const
189 xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190 
191 /*
192  * List of XML prefixed PI allowed by W3C specs
193  */
194 
195 static const char* const xmlW3CPIs[] = {
196     "xml-stylesheet",
197     "xml-model",
198     NULL
199 };
200 
201 
202 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204                                               const xmlChar **str);
205 
206 static void
207 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208 
209 static int
210 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211 
212 /************************************************************************
213  *									*
214  *		Some factorized error routines				*
215  *									*
216  ************************************************************************/
217 
218 static void
xmlErrMemory(xmlParserCtxtPtr ctxt)219 xmlErrMemory(xmlParserCtxtPtr ctxt) {
220     xmlCtxtErrMemory(ctxt);
221 }
222 
223 /**
224  * xmlErrAttributeDup:
225  * @ctxt:  an XML parser context
226  * @prefix:  the attribute prefix
227  * @localname:  the attribute localname
228  *
229  * Handle a redefinition of attribute error
230  */
231 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)232 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233                    const xmlChar * localname)
234 {
235     if (prefix == NULL)
236         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237                    XML_ERR_FATAL, localname, NULL, NULL, 0,
238                    "Attribute %s redefined\n", localname);
239     else
240         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241                    XML_ERR_FATAL, prefix, localname, NULL, 0,
242                    "Attribute %s:%s redefined\n", prefix, localname);
243 }
244 
245 /**
246  * xmlFatalErrMsg:
247  * @ctxt:  an XML parser context
248  * @error:  the error number
249  * @msg:  the error message
250  *
251  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252  */
253 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)254 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255                const char *msg)
256 {
257     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258                NULL, NULL, NULL, 0, "%s", msg);
259 }
260 
261 /**
262  * xmlWarningMsg:
263  * @ctxt:  an XML parser context
264  * @error:  the error number
265  * @msg:  the error message
266  * @str1:  extra data
267  * @str2:  extra data
268  *
269  * Handle a warning.
270  */
271 void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)272 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273               const char *msg, const xmlChar *str1, const xmlChar *str2)
274 {
275     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276                str1, str2, NULL, 0, msg, str1, str2);
277 }
278 
279 /**
280  * xmlValidityError:
281  * @ctxt:  an XML parser context
282  * @error:  the error number
283  * @msg:  the error message
284  * @str1:  extra data
285  *
286  * Handle a validity error.
287  */
288 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)289 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290               const char *msg, const xmlChar *str1, const xmlChar *str2)
291 {
292     ctxt->valid = 0;
293 
294     xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295                str1, str2, NULL, 0, msg, str1, str2);
296 }
297 
298 /**
299  * xmlFatalErrMsgInt:
300  * @ctxt:  an XML parser context
301  * @error:  the error number
302  * @msg:  the error message
303  * @val:  an integer value
304  *
305  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306  */
307 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)308 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309                   const char *msg, int val)
310 {
311     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312                NULL, NULL, NULL, val, msg, val);
313 }
314 
315 /**
316  * xmlFatalErrMsgStrIntStr:
317  * @ctxt:  an XML parser context
318  * @error:  the error number
319  * @msg:  the error message
320  * @str1:  an string info
321  * @val:  an integer value
322  * @str2:  an string info
323  *
324  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325  */
326 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)327 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328                   const char *msg, const xmlChar *str1, int val,
329 		  const xmlChar *str2)
330 {
331     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332                str1, str2, NULL, val, msg, str1, val, str2);
333 }
334 
335 /**
336  * xmlFatalErrMsgStr:
337  * @ctxt:  an XML parser context
338  * @error:  the error number
339  * @msg:  the error message
340  * @val:  a string value
341  *
342  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343  */
344 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)345 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346                   const char *msg, const xmlChar * val)
347 {
348     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349                val, NULL, NULL, 0, msg, val);
350 }
351 
352 /**
353  * xmlErrMsgStr:
354  * @ctxt:  an XML parser context
355  * @error:  the error number
356  * @msg:  the error message
357  * @val:  a string value
358  *
359  * Handle a non fatal parser error
360  */
361 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)362 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363                   const char *msg, const xmlChar * val)
364 {
365     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366                val, NULL, NULL, 0, msg, val);
367 }
368 
369 /**
370  * xmlNsErr:
371  * @ctxt:  an XML parser context
372  * @error:  the error number
373  * @msg:  the message
374  * @info1:  extra information string
375  * @info2:  extra information string
376  *
377  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378  */
379 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)380 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381          const char *msg,
382          const xmlChar * info1, const xmlChar * info2,
383          const xmlChar * info3)
384 {
385     ctxt->nsWellFormed = 0;
386 
387     xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388                info1, info2, info3, 0, msg, info1, info2, info3);
389 }
390 
391 /**
392  * xmlNsWarn
393  * @ctxt:  an XML parser context
394  * @error:  the error number
395  * @msg:  the message
396  * @info1:  extra information string
397  * @info2:  extra information string
398  *
399  * Handle a namespace warning error
400  */
401 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)402 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403          const char *msg,
404          const xmlChar * info1, const xmlChar * info2,
405          const xmlChar * info3)
406 {
407     xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408                info1, info2, info3, 0, msg, info1, info2, info3);
409 }
410 
411 static void
xmlSaturatedAdd(unsigned long * dst,unsigned long val)412 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413     if (val > ULONG_MAX - *dst)
414         *dst = ULONG_MAX;
415     else
416         *dst += val;
417 }
418 
419 static void
xmlSaturatedAddSizeT(unsigned long * dst,unsigned long val)420 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421     if (val > ULONG_MAX - *dst)
422         *dst = ULONG_MAX;
423     else
424         *dst += val;
425 }
426 
427 /**
428  * xmlParserEntityCheck:
429  * @ctxt:  parser context
430  * @extra:  sum of unexpanded entity sizes
431  *
432  * Check for non-linear entity expansion behaviour.
433  *
434  * In some cases like xmlExpandEntityInAttValue, this function is called
435  * for each, possibly nested entity and its unexpanded content length.
436  *
437  * In other cases like xmlParseReference, it's only called for each
438  * top-level entity with its unexpanded content length plus the sum of
439  * the unexpanded content lengths (plus fixed cost) of all nested
440  * entities.
441  *
442  * Summing the unexpanded lengths also adds the length of the reference.
443  * This is by design. Taking the length of the entity name into account
444  * discourages attacks that try to waste CPU time with abusively long
445  * entity names. See test/recurse/lol6.xml for example. Each call also
446  * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447  * short entities.
448  *
449  * Returns 1 on error, 0 on success.
450  */
451 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long extra)452 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453 {
454     unsigned long consumed;
455     unsigned long *expandedSize;
456     xmlParserInputPtr input = ctxt->input;
457     xmlEntityPtr entity = input->entity;
458 
459     if ((entity) && (entity->flags & XML_ENT_CHECKED))
460         return(0);
461 
462     /*
463      * Compute total consumed bytes so far, including input streams of
464      * external entities.
465      */
466     consumed = input->consumed;
467     xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468     xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469 
470     if (entity)
471         expandedSize = &entity->expandedSize;
472     else
473         expandedSize = &ctxt->sizeentcopy;
474 
475     /*
476      * Add extra cost and some fixed cost.
477      */
478     xmlSaturatedAdd(expandedSize, extra);
479     xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480 
481     /*
482      * It's important to always use saturation arithmetic when tracking
483      * entity sizes to make the size checks reliable. If "sizeentcopy"
484      * overflows, we have to abort.
485      */
486     if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487         ((*expandedSize >= ULONG_MAX) ||
488          (*expandedSize / ctxt->maxAmpl > consumed))) {
489         xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490                        "Maximum entity amplification factor exceeded, see "
491                        "xmlCtxtSetMaxAmplification.\n");
492         xmlHaltParser(ctxt);
493         return(1);
494     }
495 
496     return(0);
497 }
498 
499 /************************************************************************
500  *									*
501  *		Library wide options					*
502  *									*
503  ************************************************************************/
504 
505 /**
506   * xmlHasFeature:
507   * @feature: the feature to be examined
508   *
509   * Examines if the library has been compiled with a given feature.
510   *
511   * Returns a non-zero value if the feature exist, otherwise zero.
512   * Returns zero (0) if the feature does not exist or an unknown
513   * unknown feature is requested, non-zero otherwise.
514   */
515 int
xmlHasFeature(xmlFeature feature)516 xmlHasFeature(xmlFeature feature)
517 {
518     switch (feature) {
519 	case XML_WITH_THREAD:
520 #ifdef LIBXML_THREAD_ENABLED
521 	    return(1);
522 #else
523 	    return(0);
524 #endif
525         case XML_WITH_TREE:
526 #ifdef LIBXML_TREE_ENABLED
527             return(1);
528 #else
529             return(0);
530 #endif
531         case XML_WITH_OUTPUT:
532 #ifdef LIBXML_OUTPUT_ENABLED
533             return(1);
534 #else
535             return(0);
536 #endif
537         case XML_WITH_PUSH:
538 #ifdef LIBXML_PUSH_ENABLED
539             return(1);
540 #else
541             return(0);
542 #endif
543         case XML_WITH_READER:
544 #ifdef LIBXML_READER_ENABLED
545             return(1);
546 #else
547             return(0);
548 #endif
549         case XML_WITH_PATTERN:
550 #ifdef LIBXML_PATTERN_ENABLED
551             return(1);
552 #else
553             return(0);
554 #endif
555         case XML_WITH_WRITER:
556 #ifdef LIBXML_WRITER_ENABLED
557             return(1);
558 #else
559             return(0);
560 #endif
561         case XML_WITH_SAX1:
562 #ifdef LIBXML_SAX1_ENABLED
563             return(1);
564 #else
565             return(0);
566 #endif
567         case XML_WITH_HTTP:
568 #ifdef LIBXML_HTTP_ENABLED
569             return(1);
570 #else
571             return(0);
572 #endif
573         case XML_WITH_VALID:
574 #ifdef LIBXML_VALID_ENABLED
575             return(1);
576 #else
577             return(0);
578 #endif
579         case XML_WITH_HTML:
580 #ifdef LIBXML_HTML_ENABLED
581             return(1);
582 #else
583             return(0);
584 #endif
585         case XML_WITH_LEGACY:
586 #ifdef LIBXML_LEGACY_ENABLED
587             return(1);
588 #else
589             return(0);
590 #endif
591         case XML_WITH_C14N:
592 #ifdef LIBXML_C14N_ENABLED
593             return(1);
594 #else
595             return(0);
596 #endif
597         case XML_WITH_CATALOG:
598 #ifdef LIBXML_CATALOG_ENABLED
599             return(1);
600 #else
601             return(0);
602 #endif
603         case XML_WITH_XPATH:
604 #ifdef LIBXML_XPATH_ENABLED
605             return(1);
606 #else
607             return(0);
608 #endif
609         case XML_WITH_XPTR:
610 #ifdef LIBXML_XPTR_ENABLED
611             return(1);
612 #else
613             return(0);
614 #endif
615         case XML_WITH_XINCLUDE:
616 #ifdef LIBXML_XINCLUDE_ENABLED
617             return(1);
618 #else
619             return(0);
620 #endif
621         case XML_WITH_ICONV:
622 #ifdef LIBXML_ICONV_ENABLED
623             return(1);
624 #else
625             return(0);
626 #endif
627         case XML_WITH_ISO8859X:
628 #ifdef LIBXML_ISO8859X_ENABLED
629             return(1);
630 #else
631             return(0);
632 #endif
633         case XML_WITH_UNICODE:
634 #ifdef LIBXML_UNICODE_ENABLED
635             return(1);
636 #else
637             return(0);
638 #endif
639         case XML_WITH_REGEXP:
640 #ifdef LIBXML_REGEXP_ENABLED
641             return(1);
642 #else
643             return(0);
644 #endif
645         case XML_WITH_AUTOMATA:
646 #ifdef LIBXML_AUTOMATA_ENABLED
647             return(1);
648 #else
649             return(0);
650 #endif
651         case XML_WITH_EXPR:
652 #ifdef LIBXML_EXPR_ENABLED
653             return(1);
654 #else
655             return(0);
656 #endif
657         case XML_WITH_SCHEMAS:
658 #ifdef LIBXML_SCHEMAS_ENABLED
659             return(1);
660 #else
661             return(0);
662 #endif
663         case XML_WITH_SCHEMATRON:
664 #ifdef LIBXML_SCHEMATRON_ENABLED
665             return(1);
666 #else
667             return(0);
668 #endif
669         case XML_WITH_MODULES:
670 #ifdef LIBXML_MODULES_ENABLED
671             return(1);
672 #else
673             return(0);
674 #endif
675         case XML_WITH_DEBUG:
676 #ifdef LIBXML_DEBUG_ENABLED
677             return(1);
678 #else
679             return(0);
680 #endif
681         case XML_WITH_DEBUG_MEM:
682             return(0);
683         case XML_WITH_ZLIB:
684 #ifdef LIBXML_ZLIB_ENABLED
685             return(1);
686 #else
687             return(0);
688 #endif
689         case XML_WITH_LZMA:
690 #ifdef LIBXML_LZMA_ENABLED
691             return(1);
692 #else
693             return(0);
694 #endif
695         case XML_WITH_ICU:
696 #ifdef LIBXML_ICU_ENABLED
697             return(1);
698 #else
699             return(0);
700 #endif
701         default:
702 	    break;
703      }
704      return(0);
705 }
706 
707 /************************************************************************
708  *									*
709  *			Simple string buffer				*
710  *									*
711  ************************************************************************/
712 
713 typedef struct {
714     xmlChar *mem;
715     unsigned size;
716     unsigned cap; /* size < cap */
717     unsigned max; /* size <= max */
718     xmlParserErrors code;
719 } xmlSBuf;
720 
721 static void
xmlSBufInit(xmlSBuf * buf,unsigned max)722 xmlSBufInit(xmlSBuf *buf, unsigned max) {
723     buf->mem = NULL;
724     buf->size = 0;
725     buf->cap = 0;
726     buf->max = max;
727     buf->code = XML_ERR_OK;
728 }
729 
730 static int
xmlSBufGrow(xmlSBuf * buf,unsigned len)731 xmlSBufGrow(xmlSBuf *buf, unsigned len) {
732     xmlChar *mem;
733     unsigned cap;
734 
735     if (len >= UINT_MAX / 2 - buf->size) {
736         if (buf->code == XML_ERR_OK)
737             buf->code = XML_ERR_RESOURCE_LIMIT;
738         return(-1);
739     }
740 
741     cap = (buf->size + len) * 2;
742     if (cap < 240)
743         cap = 240;
744 
745     mem = xmlRealloc(buf->mem, cap);
746     if (mem == NULL) {
747         buf->code = XML_ERR_NO_MEMORY;
748         return(-1);
749     }
750 
751     buf->mem = mem;
752     buf->cap = cap;
753 
754     return(0);
755 }
756 
757 static void
xmlSBufAddString(xmlSBuf * buf,const xmlChar * str,unsigned len)758 xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
759     if (buf->max - buf->size < len) {
760         if (buf->code == XML_ERR_OK)
761             buf->code = XML_ERR_RESOURCE_LIMIT;
762         return;
763     }
764 
765     if (buf->cap - buf->size <= len) {
766         if (xmlSBufGrow(buf, len) < 0)
767             return;
768     }
769 
770     if (len > 0)
771         memcpy(buf->mem + buf->size, str, len);
772     buf->size += len;
773 }
774 
775 static void
xmlSBufAddCString(xmlSBuf * buf,const char * str,unsigned len)776 xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
777     xmlSBufAddString(buf, (const xmlChar *) str, len);
778 }
779 
780 static void
xmlSBufAddChar(xmlSBuf * buf,int c)781 xmlSBufAddChar(xmlSBuf *buf, int c) {
782     xmlChar *end;
783 
784     if (buf->max - buf->size < 4) {
785         if (buf->code == XML_ERR_OK)
786             buf->code = XML_ERR_RESOURCE_LIMIT;
787         return;
788     }
789 
790     if (buf->cap - buf->size <= 4) {
791         if (xmlSBufGrow(buf, 4) < 0)
792             return;
793     }
794 
795     end = buf->mem + buf->size;
796 
797     if (c < 0x80) {
798         *end = (xmlChar) c;
799         buf->size += 1;
800     } else {
801         buf->size += xmlCopyCharMultiByte(end, c);
802     }
803 }
804 
805 static void
xmlSBufAddReplChar(xmlSBuf * buf)806 xmlSBufAddReplChar(xmlSBuf *buf) {
807     xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
808 }
809 
810 static void
xmlSBufReportError(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)811 xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
812     if (buf->code == XML_ERR_NO_MEMORY)
813         xmlCtxtErrMemory(ctxt);
814     else
815         xmlFatalErr(ctxt, buf->code, errMsg);
816 }
817 
818 static xmlChar *
xmlSBufFinish(xmlSBuf * buf,int * sizeOut,xmlParserCtxtPtr ctxt,const char * errMsg)819 xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
820               const char *errMsg) {
821     if (buf->mem == NULL) {
822         buf->mem = xmlMalloc(1);
823         if (buf->mem == NULL) {
824             buf->code = XML_ERR_NO_MEMORY;
825         } else {
826             buf->mem[0] = 0;
827         }
828     } else {
829         buf->mem[buf->size] = 0;
830     }
831 
832     if (buf->code == XML_ERR_OK) {
833         if (sizeOut != NULL)
834             *sizeOut = buf->size;
835         return(buf->mem);
836     }
837 
838     xmlSBufReportError(buf, ctxt, errMsg);
839 
840     xmlFree(buf->mem);
841 
842     if (sizeOut != NULL)
843         *sizeOut = 0;
844     return(NULL);
845 }
846 
847 static void
xmlSBufCleanup(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)848 xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
849     if (buf->code != XML_ERR_OK)
850         xmlSBufReportError(buf, ctxt, errMsg);
851 
852     xmlFree(buf->mem);
853 }
854 
855 static int
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * errMsg)856 xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
857                     const char *errMsg) {
858     int c = str[0];
859     int c1 = str[1];
860 
861     if ((c1 & 0xC0) != 0x80)
862         goto encoding_error;
863 
864     if (c < 0xE0) {
865         /* 2-byte sequence */
866         if (c < 0xC2)
867             goto encoding_error;
868 
869         return(2);
870     } else {
871         int c2 = str[2];
872 
873         if ((c2 & 0xC0) != 0x80)
874             goto encoding_error;
875 
876         if (c < 0xF0) {
877             /* 3-byte sequence */
878             if (c == 0xE0) {
879                 /* overlong */
880                 if (c1 < 0xA0)
881                     goto encoding_error;
882             } else if (c == 0xED) {
883                 /* surrogate */
884                 if (c1 >= 0xA0)
885                     goto encoding_error;
886             } else if (c == 0xEF) {
887                 /* U+FFFE and U+FFFF are invalid Chars */
888                 if ((c1 == 0xBF) && (c2 >= 0xBE))
889                     xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
890             }
891 
892             return(3);
893         } else {
894             /* 4-byte sequence */
895             if ((str[3] & 0xC0) != 0x80)
896                 goto encoding_error;
897             if (c == 0xF0) {
898                 /* overlong */
899                 if (c1 < 0x90)
900                     goto encoding_error;
901             } else if (c >= 0xF4) {
902                 /* greater than 0x10FFFF */
903                 if ((c > 0xF4) || (c1 >= 0x90))
904                     goto encoding_error;
905             }
906 
907             return(4);
908         }
909     }
910 
911 encoding_error:
912     /* Only report the first error */
913     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
914         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
915         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
916     }
917 
918     return(0);
919 }
920 
921 /************************************************************************
922  *									*
923  *		SAX2 defaulted attributes handling			*
924  *									*
925  ************************************************************************/
926 
927 /**
928  * xmlCtxtInitializeLate:
929  * @ctxt:  an XML parser context
930  *
931  * Final initialization of the parser context before starting to parse.
932  *
933  * This accounts for users modifying struct members of parser context
934  * directly.
935  */
936 static void
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt)937 xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
938     xmlSAXHandlerPtr sax;
939 
940     /* Avoid unused variable warning if features are disabled. */
941     (void) sax;
942 
943     /*
944      * Changing the SAX struct directly is still widespread practice
945      * in internal and external code.
946      */
947     if (ctxt == NULL) return;
948     sax = ctxt->sax;
949 #ifdef LIBXML_SAX1_ENABLED
950     /*
951      * Only enable SAX2 if there SAX2 element handlers, except when there
952      * are no element handlers at all.
953      */
954     if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
955         (sax) &&
956         (sax->initialized == XML_SAX2_MAGIC) &&
957         ((sax->startElementNs != NULL) ||
958          (sax->endElementNs != NULL) ||
959          ((sax->startElement == NULL) && (sax->endElement == NULL))))
960         ctxt->sax2 = 1;
961 #else
962     ctxt->sax2 = 1;
963 #endif /* LIBXML_SAX1_ENABLED */
964 
965     /*
966      * Some users replace the dictionary directly in the context struct.
967      * We really need an API function to do that cleanly.
968      */
969     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
970     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
971     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
972     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
973 		(ctxt->str_xml_ns == NULL)) {
974         xmlErrMemory(ctxt);
975     }
976 }
977 
978 typedef struct {
979     xmlHashedString prefix;
980     xmlHashedString name;
981     xmlHashedString value;
982     const xmlChar *valueEnd;
983     int external;
984     int expandedSize;
985 } xmlDefAttr;
986 
987 typedef struct _xmlDefAttrs xmlDefAttrs;
988 typedef xmlDefAttrs *xmlDefAttrsPtr;
989 struct _xmlDefAttrs {
990     int nbAttrs;	/* number of defaulted attributes on that element */
991     int maxAttrs;       /* the size of the array */
992 #if __STDC_VERSION__ >= 199901L
993     /* Using a C99 flexible array member avoids UBSan errors. */
994     xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
995 #else
996     xmlDefAttr attrs[1];
997 #endif
998 };
999 
1000 /**
1001  * xmlAttrNormalizeSpace:
1002  * @src: the source string
1003  * @dst: the target string
1004  *
1005  * Normalize the space in non CDATA attribute values:
1006  * If the attribute type is not CDATA, then the XML processor MUST further
1007  * process the normalized attribute value by discarding any leading and
1008  * trailing space (#x20) characters, and by replacing sequences of space
1009  * (#x20) characters by a single space (#x20) character.
1010  * Note that the size of dst need to be at least src, and if one doesn't need
1011  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1012  * passing src as dst is just fine.
1013  *
1014  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1015  *         is needed.
1016  */
1017 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1018 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1019 {
1020     if ((src == NULL) || (dst == NULL))
1021         return(NULL);
1022 
1023     while (*src == 0x20) src++;
1024     while (*src != 0) {
1025 	if (*src == 0x20) {
1026 	    while (*src == 0x20) src++;
1027 	    if (*src != 0)
1028 		*dst++ = 0x20;
1029 	} else {
1030 	    *dst++ = *src++;
1031 	}
1032     }
1033     *dst = 0;
1034     if (dst == src)
1035        return(NULL);
1036     return(dst);
1037 }
1038 
1039 /**
1040  * xmlAddDefAttrs:
1041  * @ctxt:  an XML parser context
1042  * @fullname:  the element fullname
1043  * @fullattr:  the attribute fullname
1044  * @value:  the attribute value
1045  *
1046  * Add a defaulted attribute for an element
1047  */
1048 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1049 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1050                const xmlChar *fullname,
1051                const xmlChar *fullattr,
1052                const xmlChar *value) {
1053     xmlDefAttrsPtr defaults;
1054     xmlDefAttr *attr;
1055     int len, expandedSize;
1056     xmlHashedString name;
1057     xmlHashedString prefix;
1058     xmlHashedString hvalue;
1059     const xmlChar *localname;
1060 
1061     /*
1062      * Allows to detect attribute redefinitions
1063      */
1064     if (ctxt->attsSpecial != NULL) {
1065         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1066 	    return;
1067     }
1068 
1069     if (ctxt->attsDefault == NULL) {
1070         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1071 	if (ctxt->attsDefault == NULL)
1072 	    goto mem_error;
1073     }
1074 
1075     /*
1076      * split the element name into prefix:localname , the string found
1077      * are within the DTD and then not associated to namespace names.
1078      */
1079     localname = xmlSplitQName3(fullname, &len);
1080     if (localname == NULL) {
1081         name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1082 	prefix.name = NULL;
1083     } else {
1084         name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1085 	prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1086         if (prefix.name == NULL)
1087             goto mem_error;
1088     }
1089     if (name.name == NULL)
1090         goto mem_error;
1091 
1092     /*
1093      * make sure there is some storage
1094      */
1095     defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1096     if ((defaults == NULL) ||
1097         (defaults->nbAttrs >= defaults->maxAttrs)) {
1098         xmlDefAttrsPtr temp;
1099         int newSize;
1100 
1101         newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1102         temp = xmlRealloc(defaults,
1103                           sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1104 	if (temp == NULL)
1105 	    goto mem_error;
1106         if (defaults == NULL)
1107             temp->nbAttrs = 0;
1108 	temp->maxAttrs = newSize;
1109         defaults = temp;
1110 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1111 	                        defaults, NULL) < 0) {
1112 	    xmlFree(defaults);
1113 	    goto mem_error;
1114 	}
1115     }
1116 
1117     /*
1118      * Split the attribute name into prefix:localname , the string found
1119      * are within the DTD and hen not associated to namespace names.
1120      */
1121     localname = xmlSplitQName3(fullattr, &len);
1122     if (localname == NULL) {
1123         name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1124 	prefix.name = NULL;
1125     } else {
1126         name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1127 	prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1128         if (prefix.name == NULL)
1129             goto mem_error;
1130     }
1131     if (name.name == NULL)
1132         goto mem_error;
1133 
1134     /* intern the string and precompute the end */
1135     len = strlen((const char *) value);
1136     hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1137     if (hvalue.name == NULL)
1138         goto mem_error;
1139 
1140     expandedSize = strlen((const char *) name.name);
1141     if (prefix.name != NULL)
1142         expandedSize += strlen((const char *) prefix.name);
1143     expandedSize += len;
1144 
1145     attr = &defaults->attrs[defaults->nbAttrs++];
1146     attr->name = name;
1147     attr->prefix = prefix;
1148     attr->value = hvalue;
1149     attr->valueEnd = hvalue.name + len;
1150     attr->external = PARSER_EXTERNAL(ctxt);
1151     attr->expandedSize = expandedSize;
1152 
1153     return;
1154 
1155 mem_error:
1156     xmlErrMemory(ctxt);
1157     return;
1158 }
1159 
1160 /**
1161  * xmlAddSpecialAttr:
1162  * @ctxt:  an XML parser context
1163  * @fullname:  the element fullname
1164  * @fullattr:  the attribute fullname
1165  * @type:  the attribute type
1166  *
1167  * Register this attribute type
1168  */
1169 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1170 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1171 		  const xmlChar *fullname,
1172 		  const xmlChar *fullattr,
1173 		  int type)
1174 {
1175     if (ctxt->attsSpecial == NULL) {
1176         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1177 	if (ctxt->attsSpecial == NULL)
1178 	    goto mem_error;
1179     }
1180 
1181     if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1182                     (void *) (ptrdiff_t) type) < 0)
1183         goto mem_error;
1184     return;
1185 
1186 mem_error:
1187     xmlErrMemory(ctxt);
1188     return;
1189 }
1190 
1191 /**
1192  * xmlCleanSpecialAttrCallback:
1193  *
1194  * Removes CDATA attributes from the special attribute table
1195  */
1196 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1197 xmlCleanSpecialAttrCallback(void *payload, void *data,
1198                             const xmlChar *fullname, const xmlChar *fullattr,
1199                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1200     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1201 
1202     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1203         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1204     }
1205 }
1206 
1207 /**
1208  * xmlCleanSpecialAttr:
1209  * @ctxt:  an XML parser context
1210  *
1211  * Trim the list of attributes defined to remove all those of type
1212  * CDATA as they are not special. This call should be done when finishing
1213  * to parse the DTD and before starting to parse the document root.
1214  */
1215 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1216 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1217 {
1218     if (ctxt->attsSpecial == NULL)
1219         return;
1220 
1221     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1222 
1223     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1224         xmlHashFree(ctxt->attsSpecial, NULL);
1225         ctxt->attsSpecial = NULL;
1226     }
1227     return;
1228 }
1229 
1230 /**
1231  * xmlCheckLanguageID:
1232  * @lang:  pointer to the string value
1233  *
1234  * DEPRECATED: Internal function, do not use.
1235  *
1236  * Checks that the value conforms to the LanguageID production:
1237  *
1238  * NOTE: this is somewhat deprecated, those productions were removed from
1239  *       the XML Second edition.
1240  *
1241  * [33] LanguageID ::= Langcode ('-' Subcode)*
1242  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1243  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1244  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1245  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1246  * [38] Subcode ::= ([a-z] | [A-Z])+
1247  *
1248  * The current REC reference the successors of RFC 1766, currently 5646
1249  *
1250  * http://www.rfc-editor.org/rfc/rfc5646.txt
1251  * langtag       = language
1252  *                 ["-" script]
1253  *                 ["-" region]
1254  *                 *("-" variant)
1255  *                 *("-" extension)
1256  *                 ["-" privateuse]
1257  * language      = 2*3ALPHA            ; shortest ISO 639 code
1258  *                 ["-" extlang]       ; sometimes followed by
1259  *                                     ; extended language subtags
1260  *               / 4ALPHA              ; or reserved for future use
1261  *               / 5*8ALPHA            ; or registered language subtag
1262  *
1263  * extlang       = 3ALPHA              ; selected ISO 639 codes
1264  *                 *2("-" 3ALPHA)      ; permanently reserved
1265  *
1266  * script        = 4ALPHA              ; ISO 15924 code
1267  *
1268  * region        = 2ALPHA              ; ISO 3166-1 code
1269  *               / 3DIGIT              ; UN M.49 code
1270  *
1271  * variant       = 5*8alphanum         ; registered variants
1272  *               / (DIGIT 3alphanum)
1273  *
1274  * extension     = singleton 1*("-" (2*8alphanum))
1275  *
1276  *                                     ; Single alphanumerics
1277  *                                     ; "x" reserved for private use
1278  * singleton     = DIGIT               ; 0 - 9
1279  *               / %x41-57             ; A - W
1280  *               / %x59-5A             ; Y - Z
1281  *               / %x61-77             ; a - w
1282  *               / %x79-7A             ; y - z
1283  *
1284  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1285  * The parser below doesn't try to cope with extension or privateuse
1286  * that could be added but that's not interoperable anyway
1287  *
1288  * Returns 1 if correct 0 otherwise
1289  **/
1290 int
xmlCheckLanguageID(const xmlChar * lang)1291 xmlCheckLanguageID(const xmlChar * lang)
1292 {
1293     const xmlChar *cur = lang, *nxt;
1294 
1295     if (cur == NULL)
1296         return (0);
1297     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1298         ((cur[0] == 'I') && (cur[1] == '-')) ||
1299         ((cur[0] == 'x') && (cur[1] == '-')) ||
1300         ((cur[0] == 'X') && (cur[1] == '-'))) {
1301         /*
1302          * Still allow IANA code and user code which were coming
1303          * from the previous version of the XML-1.0 specification
1304          * it's deprecated but we should not fail
1305          */
1306         cur += 2;
1307         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1308                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1309             cur++;
1310         return(cur[0] == 0);
1311     }
1312     nxt = cur;
1313     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1314            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1315            nxt++;
1316     if (nxt - cur >= 4) {
1317         /*
1318          * Reserved
1319          */
1320         if ((nxt - cur > 8) || (nxt[0] != 0))
1321             return(0);
1322         return(1);
1323     }
1324     if (nxt - cur < 2)
1325         return(0);
1326     /* we got an ISO 639 code */
1327     if (nxt[0] == 0)
1328         return(1);
1329     if (nxt[0] != '-')
1330         return(0);
1331 
1332     nxt++;
1333     cur = nxt;
1334     /* now we can have extlang or script or region or variant */
1335     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1336         goto region_m49;
1337 
1338     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1339            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1340            nxt++;
1341     if (nxt - cur == 4)
1342         goto script;
1343     if (nxt - cur == 2)
1344         goto region;
1345     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1346         goto variant;
1347     if (nxt - cur != 3)
1348         return(0);
1349     /* we parsed an extlang */
1350     if (nxt[0] == 0)
1351         return(1);
1352     if (nxt[0] != '-')
1353         return(0);
1354 
1355     nxt++;
1356     cur = nxt;
1357     /* now we can have script or region or variant */
1358     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1359         goto region_m49;
1360 
1361     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1362            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1363            nxt++;
1364     if (nxt - cur == 2)
1365         goto region;
1366     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1367         goto variant;
1368     if (nxt - cur != 4)
1369         return(0);
1370     /* we parsed a script */
1371 script:
1372     if (nxt[0] == 0)
1373         return(1);
1374     if (nxt[0] != '-')
1375         return(0);
1376 
1377     nxt++;
1378     cur = nxt;
1379     /* now we can have region or variant */
1380     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1381         goto region_m49;
1382 
1383     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1384            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1385            nxt++;
1386 
1387     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1388         goto variant;
1389     if (nxt - cur != 2)
1390         return(0);
1391     /* we parsed a region */
1392 region:
1393     if (nxt[0] == 0)
1394         return(1);
1395     if (nxt[0] != '-')
1396         return(0);
1397 
1398     nxt++;
1399     cur = nxt;
1400     /* now we can just have a variant */
1401     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1402            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1403            nxt++;
1404 
1405     if ((nxt - cur < 5) || (nxt - cur > 8))
1406         return(0);
1407 
1408     /* we parsed a variant */
1409 variant:
1410     if (nxt[0] == 0)
1411         return(1);
1412     if (nxt[0] != '-')
1413         return(0);
1414     /* extensions and private use subtags not checked */
1415     return (1);
1416 
1417 region_m49:
1418     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1419         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1420         nxt += 3;
1421         goto region;
1422     }
1423     return(0);
1424 }
1425 
1426 /************************************************************************
1427  *									*
1428  *		Parser stacks related functions and macros		*
1429  *									*
1430  ************************************************************************/
1431 
1432 static xmlChar *
1433 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1434 
1435 /**
1436  * xmlParserNsCreate:
1437  *
1438  * Create a new namespace database.
1439  *
1440  * Returns the new obejct.
1441  */
1442 xmlParserNsData *
xmlParserNsCreate(void)1443 xmlParserNsCreate(void) {
1444     xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1445 
1446     if (nsdb == NULL)
1447         return(NULL);
1448     memset(nsdb, 0, sizeof(*nsdb));
1449     nsdb->defaultNsIndex = INT_MAX;
1450 
1451     return(nsdb);
1452 }
1453 
1454 /**
1455  * xmlParserNsFree:
1456  * @nsdb: namespace database
1457  *
1458  * Free a namespace database.
1459  */
1460 void
xmlParserNsFree(xmlParserNsData * nsdb)1461 xmlParserNsFree(xmlParserNsData *nsdb) {
1462     if (nsdb == NULL)
1463         return;
1464 
1465     xmlFree(nsdb->extra);
1466     xmlFree(nsdb->hash);
1467     xmlFree(nsdb);
1468 }
1469 
1470 /**
1471  * xmlParserNsReset:
1472  * @nsdb: namespace database
1473  *
1474  * Reset a namespace database.
1475  */
1476 static void
xmlParserNsReset(xmlParserNsData * nsdb)1477 xmlParserNsReset(xmlParserNsData *nsdb) {
1478     if (nsdb == NULL)
1479         return;
1480 
1481     nsdb->hashElems = 0;
1482     nsdb->elementId = 0;
1483     nsdb->defaultNsIndex = INT_MAX;
1484 
1485     if (nsdb->hash)
1486         memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1487 }
1488 
1489 /**
1490  * xmlParserStartElement:
1491  * @nsdb: namespace database
1492  *
1493  * Signal that a new element has started.
1494  *
1495  * Returns 0 on success, -1 if the element counter overflowed.
1496  */
1497 static int
xmlParserNsStartElement(xmlParserNsData * nsdb)1498 xmlParserNsStartElement(xmlParserNsData *nsdb) {
1499     if (nsdb->elementId == UINT_MAX)
1500         return(-1);
1501     nsdb->elementId++;
1502 
1503     return(0);
1504 }
1505 
1506 /**
1507  * xmlParserNsLookup:
1508  * @ctxt: parser context
1509  * @prefix: namespace prefix
1510  * @bucketPtr: optional bucket (return value)
1511  *
1512  * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1513  * be set to the matching bucket, or the first empty bucket if no match
1514  * was found.
1515  *
1516  * Returns the namespace index on success, INT_MAX if no namespace was
1517  * found.
1518  */
1519 static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,xmlParserNsBucket ** bucketPtr)1520 xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1521                   xmlParserNsBucket **bucketPtr) {
1522     xmlParserNsBucket *bucket, *tombstone;
1523     unsigned index, hashValue;
1524 
1525     if (prefix->name == NULL)
1526         return(ctxt->nsdb->defaultNsIndex);
1527 
1528     if (ctxt->nsdb->hashSize == 0)
1529         return(INT_MAX);
1530 
1531     hashValue = prefix->hashValue;
1532     index = hashValue & (ctxt->nsdb->hashSize - 1);
1533     bucket = &ctxt->nsdb->hash[index];
1534     tombstone = NULL;
1535 
1536     while (bucket->hashValue) {
1537         if (bucket->index == INT_MAX) {
1538             if (tombstone == NULL)
1539                 tombstone = bucket;
1540         } else if (bucket->hashValue == hashValue) {
1541             if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1542                 if (bucketPtr != NULL)
1543                     *bucketPtr = bucket;
1544                 return(bucket->index);
1545             }
1546         }
1547 
1548         index++;
1549         bucket++;
1550         if (index == ctxt->nsdb->hashSize) {
1551             index = 0;
1552             bucket = ctxt->nsdb->hash;
1553         }
1554     }
1555 
1556     if (bucketPtr != NULL)
1557         *bucketPtr = tombstone ? tombstone : bucket;
1558     return(INT_MAX);
1559 }
1560 
1561 /**
1562  * xmlParserNsLookupUri:
1563  * @ctxt: parser context
1564  * @prefix: namespace prefix
1565  *
1566  * Lookup namespace URI with given prefix.
1567  *
1568  * Returns the namespace URI on success, NULL if no namespace was found.
1569  */
1570 static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix)1571 xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1572     const xmlChar *ret;
1573     int nsIndex;
1574 
1575     if (prefix->name == ctxt->str_xml)
1576         return(ctxt->str_xml_ns);
1577 
1578     /*
1579      * minNsIndex is used when building an entity tree. We must
1580      * ignore namespaces declared outside the entity.
1581      */
1582     nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1583     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1584         return(NULL);
1585 
1586     ret = ctxt->nsTab[nsIndex * 2 + 1];
1587     if (ret[0] == 0)
1588         ret = NULL;
1589     return(ret);
1590 }
1591 
1592 /**
1593  * xmlParserNsLookupSax:
1594  * @ctxt: parser context
1595  * @prefix: namespace prefix
1596  *
1597  * Lookup extra data for the given prefix. This returns data stored
1598  * with xmlParserNsUdpateSax.
1599  *
1600  * Returns the data on success, NULL if no namespace was found.
1601  */
1602 void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix)1603 xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1604     xmlHashedString hprefix;
1605     int nsIndex;
1606 
1607     if (prefix == ctxt->str_xml)
1608         return(NULL);
1609 
1610     hprefix.name = prefix;
1611     if (prefix != NULL)
1612         hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1613     else
1614         hprefix.hashValue = 0;
1615     nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1616     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1617         return(NULL);
1618 
1619     return(ctxt->nsdb->extra[nsIndex].saxData);
1620 }
1621 
1622 /**
1623  * xmlParserNsUpdateSax:
1624  * @ctxt: parser context
1625  * @prefix: namespace prefix
1626  * @saxData: extra data for SAX handler
1627  *
1628  * Sets or updates extra data for the given prefix. This value will be
1629  * returned by xmlParserNsLookupSax as long as the namespace with the
1630  * given prefix is in scope.
1631  *
1632  * Returns the data on success, NULL if no namespace was found.
1633  */
1634 int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix,void * saxData)1635 xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1636                      void *saxData) {
1637     xmlHashedString hprefix;
1638     int nsIndex;
1639 
1640     if (prefix == ctxt->str_xml)
1641         return(-1);
1642 
1643     hprefix.name = prefix;
1644     if (prefix != NULL)
1645         hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1646     else
1647         hprefix.hashValue = 0;
1648     nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1649     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1650         return(-1);
1651 
1652     ctxt->nsdb->extra[nsIndex].saxData = saxData;
1653     return(0);
1654 }
1655 
1656 /**
1657  * xmlParserNsGrow:
1658  * @ctxt: parser context
1659  *
1660  * Grows the namespace tables.
1661  *
1662  * Returns 0 on success, -1 if a memory allocation failed.
1663  */
1664 static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt)1665 xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1666     const xmlChar **table;
1667     xmlParserNsExtra *extra;
1668     int newSize;
1669 
1670     if (ctxt->nsMax > INT_MAX / 2)
1671         goto error;
1672     newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1673 
1674     table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1675     if (table == NULL)
1676         goto error;
1677     ctxt->nsTab = table;
1678 
1679     extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1680     if (extra == NULL)
1681         goto error;
1682     ctxt->nsdb->extra = extra;
1683 
1684     ctxt->nsMax = newSize;
1685     return(0);
1686 
1687 error:
1688     xmlErrMemory(ctxt);
1689     return(-1);
1690 }
1691 
1692 /**
1693  * xmlParserNsPush:
1694  * @ctxt: parser context
1695  * @prefix: prefix with hash value
1696  * @uri: uri with hash value
1697  * @saxData: extra data for SAX handler
1698  * @defAttr: whether the namespace comes from a default attribute
1699  *
1700  * Push a new namespace on the table.
1701  *
1702  * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1703  * -1 if a memory allocation failed.
1704  */
1705 static int
xmlParserNsPush(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,const xmlHashedString * uri,void * saxData,int defAttr)1706 xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1707                 const xmlHashedString *uri, void *saxData, int defAttr) {
1708     xmlParserNsBucket *bucket = NULL;
1709     xmlParserNsExtra *extra;
1710     const xmlChar **ns;
1711     unsigned hashValue, nsIndex, oldIndex;
1712 
1713     if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1714         return(0);
1715 
1716     if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1717         xmlErrMemory(ctxt);
1718         return(-1);
1719     }
1720 
1721     /*
1722      * Default namespace and 'xml' namespace
1723      */
1724     if ((prefix == NULL) || (prefix->name == NULL)) {
1725         oldIndex = ctxt->nsdb->defaultNsIndex;
1726 
1727         if (oldIndex != INT_MAX) {
1728             extra = &ctxt->nsdb->extra[oldIndex];
1729 
1730             if (extra->elementId == ctxt->nsdb->elementId) {
1731                 if (defAttr == 0)
1732                     xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1733                 return(0);
1734             }
1735 
1736             if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1737                 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1738                 return(0);
1739         }
1740 
1741         ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1742         goto populate_entry;
1743     }
1744 
1745     /*
1746      * Hash table lookup
1747      */
1748     oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1749     if (oldIndex != INT_MAX) {
1750         extra = &ctxt->nsdb->extra[oldIndex];
1751 
1752         /*
1753          * Check for duplicate definitions on the same element.
1754          */
1755         if (extra->elementId == ctxt->nsdb->elementId) {
1756             if (defAttr == 0)
1757                 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1758             return(0);
1759         }
1760 
1761         if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1762             (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1763             return(0);
1764 
1765         bucket->index = ctxt->nsNr;
1766         goto populate_entry;
1767     }
1768 
1769     /*
1770      * Insert new bucket
1771      */
1772 
1773     hashValue = prefix->hashValue;
1774 
1775     /*
1776      * Grow hash table, 50% fill factor
1777      */
1778     if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1779         xmlParserNsBucket *newHash;
1780         unsigned newSize, i, index;
1781 
1782         if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1783             xmlErrMemory(ctxt);
1784             return(-1);
1785         }
1786         newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1787         newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1788         if (newHash == NULL) {
1789             xmlErrMemory(ctxt);
1790             return(-1);
1791         }
1792         memset(newHash, 0, newSize * sizeof(newHash[0]));
1793 
1794         for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1795             unsigned hv = ctxt->nsdb->hash[i].hashValue;
1796             unsigned newIndex;
1797 
1798             if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1799                 continue;
1800             newIndex = hv & (newSize - 1);
1801 
1802             while (newHash[newIndex].hashValue != 0) {
1803                 newIndex++;
1804                 if (newIndex == newSize)
1805                     newIndex = 0;
1806             }
1807 
1808             newHash[newIndex] = ctxt->nsdb->hash[i];
1809         }
1810 
1811         xmlFree(ctxt->nsdb->hash);
1812         ctxt->nsdb->hash = newHash;
1813         ctxt->nsdb->hashSize = newSize;
1814 
1815         /*
1816          * Relookup
1817          */
1818         index = hashValue & (newSize - 1);
1819 
1820         while (newHash[index].hashValue != 0) {
1821             index++;
1822             if (index == newSize)
1823                 index = 0;
1824         }
1825 
1826         bucket = &newHash[index];
1827     }
1828 
1829     bucket->hashValue = hashValue;
1830     bucket->index = ctxt->nsNr;
1831     ctxt->nsdb->hashElems++;
1832     oldIndex = INT_MAX;
1833 
1834 populate_entry:
1835     nsIndex = ctxt->nsNr;
1836 
1837     ns = &ctxt->nsTab[nsIndex * 2];
1838     ns[0] = prefix ? prefix->name : NULL;
1839     ns[1] = uri->name;
1840 
1841     extra = &ctxt->nsdb->extra[nsIndex];
1842     extra->saxData = saxData;
1843     extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1844     extra->uriHashValue = uri->hashValue;
1845     extra->elementId = ctxt->nsdb->elementId;
1846     extra->oldIndex = oldIndex;
1847 
1848     ctxt->nsNr++;
1849 
1850     return(1);
1851 }
1852 
1853 /**
1854  * xmlParserNsPop:
1855  * @ctxt: an XML parser context
1856  * @nr:  the number to pop
1857  *
1858  * Pops the top @nr namespaces and restores the hash table.
1859  *
1860  * Returns the number of namespaces popped.
1861  */
1862 static int
xmlParserNsPop(xmlParserCtxtPtr ctxt,int nr)1863 xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1864 {
1865     int i;
1866 
1867     /* assert(nr <= ctxt->nsNr); */
1868 
1869     for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1870         const xmlChar *prefix = ctxt->nsTab[i * 2];
1871         xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1872 
1873         if (prefix == NULL) {
1874             ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1875         } else {
1876             xmlHashedString hprefix;
1877             xmlParserNsBucket *bucket = NULL;
1878 
1879             hprefix.name = prefix;
1880             hprefix.hashValue = extra->prefixHashValue;
1881             xmlParserNsLookup(ctxt, &hprefix, &bucket);
1882             /* assert(bucket && bucket->hashValue); */
1883             bucket->index = extra->oldIndex;
1884         }
1885     }
1886 
1887     ctxt->nsNr -= nr;
1888     return(nr);
1889 }
1890 
1891 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1892 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1893     const xmlChar **atts;
1894     unsigned *attallocs;
1895     int maxatts;
1896 
1897     if (nr + 5 > ctxt->maxatts) {
1898 	maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1899 	atts = (const xmlChar **) xmlMalloc(
1900 				     maxatts * sizeof(const xmlChar *));
1901 	if (atts == NULL) goto mem_error;
1902 	attallocs = xmlRealloc(ctxt->attallocs,
1903                                (maxatts / 5) * sizeof(attallocs[0]));
1904 	if (attallocs == NULL) {
1905             xmlFree(atts);
1906             goto mem_error;
1907         }
1908         if (ctxt->maxatts > 0)
1909             memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1910         xmlFree(ctxt->atts);
1911 	ctxt->atts = atts;
1912 	ctxt->attallocs = attallocs;
1913 	ctxt->maxatts = maxatts;
1914     }
1915     return(ctxt->maxatts);
1916 mem_error:
1917     xmlErrMemory(ctxt);
1918     return(-1);
1919 }
1920 
1921 /**
1922  * inputPush:
1923  * @ctxt:  an XML parser context
1924  * @value:  the parser input
1925  *
1926  * Pushes a new parser input on top of the input stack
1927  *
1928  * Returns -1 in case of error, the index in the stack otherwise
1929  */
1930 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1931 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1932 {
1933     if ((ctxt == NULL) || (value == NULL))
1934         return(-1);
1935     if (ctxt->inputNr >= ctxt->inputMax) {
1936         size_t newSize = ctxt->inputMax * 2;
1937         xmlParserInputPtr *tmp;
1938 
1939         tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1940                                                newSize * sizeof(*tmp));
1941         if (tmp == NULL) {
1942             xmlErrMemory(ctxt);
1943             return (-1);
1944         }
1945         ctxt->inputTab = tmp;
1946         ctxt->inputMax = newSize;
1947     }
1948     ctxt->inputTab[ctxt->inputNr] = value;
1949     ctxt->input = value;
1950     return (ctxt->inputNr++);
1951 }
1952 /**
1953  * inputPop:
1954  * @ctxt: an XML parser context
1955  *
1956  * Pops the top parser input from the input stack
1957  *
1958  * Returns the input just removed
1959  */
1960 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1961 inputPop(xmlParserCtxtPtr ctxt)
1962 {
1963     xmlParserInputPtr ret;
1964 
1965     if (ctxt == NULL)
1966         return(NULL);
1967     if (ctxt->inputNr <= 0)
1968         return (NULL);
1969     ctxt->inputNr--;
1970     if (ctxt->inputNr > 0)
1971         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1972     else
1973         ctxt->input = NULL;
1974     ret = ctxt->inputTab[ctxt->inputNr];
1975     ctxt->inputTab[ctxt->inputNr] = NULL;
1976     return (ret);
1977 }
1978 /**
1979  * nodePush:
1980  * @ctxt:  an XML parser context
1981  * @value:  the element node
1982  *
1983  * DEPRECATED: Internal function, do not use.
1984  *
1985  * Pushes a new element node on top of the node stack
1986  *
1987  * Returns -1 in case of error, the index in the stack otherwise
1988  */
1989 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1990 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1991 {
1992     int maxDepth;
1993 
1994     if (ctxt == NULL)
1995         return(0);
1996 
1997     maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1998     if (ctxt->nodeNr > maxDepth) {
1999         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2000                 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2001                 ctxt->nodeNr);
2002         xmlHaltParser(ctxt);
2003         return(-1);
2004     }
2005     if (ctxt->nodeNr >= ctxt->nodeMax) {
2006         xmlNodePtr *tmp;
2007 
2008 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2009                                       ctxt->nodeMax * 2 *
2010                                       sizeof(ctxt->nodeTab[0]));
2011         if (tmp == NULL) {
2012             xmlErrMemory(ctxt);
2013             return (-1);
2014         }
2015         ctxt->nodeTab = tmp;
2016 	ctxt->nodeMax *= 2;
2017     }
2018     ctxt->nodeTab[ctxt->nodeNr] = value;
2019     ctxt->node = value;
2020     return (ctxt->nodeNr++);
2021 }
2022 
2023 /**
2024  * nodePop:
2025  * @ctxt: an XML parser context
2026  *
2027  * DEPRECATED: Internal function, do not use.
2028  *
2029  * Pops the top element node from the node stack
2030  *
2031  * Returns the node just removed
2032  */
2033 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)2034 nodePop(xmlParserCtxtPtr ctxt)
2035 {
2036     xmlNodePtr ret;
2037 
2038     if (ctxt == NULL) return(NULL);
2039     if (ctxt->nodeNr <= 0)
2040         return (NULL);
2041     ctxt->nodeNr--;
2042     if (ctxt->nodeNr > 0)
2043         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2044     else
2045         ctxt->node = NULL;
2046     ret = ctxt->nodeTab[ctxt->nodeNr];
2047     ctxt->nodeTab[ctxt->nodeNr] = NULL;
2048     return (ret);
2049 }
2050 
2051 /**
2052  * nameNsPush:
2053  * @ctxt:  an XML parser context
2054  * @value:  the element name
2055  * @prefix:  the element prefix
2056  * @URI:  the element namespace name
2057  * @line:  the current line number for error messages
2058  * @nsNr:  the number of namespaces pushed on the namespace table
2059  *
2060  * Pushes a new element name/prefix/URL on top of the name stack
2061  *
2062  * Returns -1 in case of error, the index in the stack otherwise
2063  */
2064 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)2065 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2066            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2067 {
2068     xmlStartTag *tag;
2069 
2070     if (ctxt->nameNr >= ctxt->nameMax) {
2071         const xmlChar * *tmp;
2072         xmlStartTag *tmp2;
2073         ctxt->nameMax *= 2;
2074         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2075                                     ctxt->nameMax *
2076                                     sizeof(ctxt->nameTab[0]));
2077         if (tmp == NULL) {
2078 	    ctxt->nameMax /= 2;
2079 	    goto mem_error;
2080         }
2081 	ctxt->nameTab = tmp;
2082         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2083                                     ctxt->nameMax *
2084                                     sizeof(ctxt->pushTab[0]));
2085         if (tmp2 == NULL) {
2086 	    ctxt->nameMax /= 2;
2087 	    goto mem_error;
2088         }
2089 	ctxt->pushTab = tmp2;
2090     } else if (ctxt->pushTab == NULL) {
2091         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2092                                             sizeof(ctxt->pushTab[0]));
2093         if (ctxt->pushTab == NULL)
2094             goto mem_error;
2095     }
2096     ctxt->nameTab[ctxt->nameNr] = value;
2097     ctxt->name = value;
2098     tag = &ctxt->pushTab[ctxt->nameNr];
2099     tag->prefix = prefix;
2100     tag->URI = URI;
2101     tag->line = line;
2102     tag->nsNr = nsNr;
2103     return (ctxt->nameNr++);
2104 mem_error:
2105     xmlErrMemory(ctxt);
2106     return (-1);
2107 }
2108 #ifdef LIBXML_PUSH_ENABLED
2109 /**
2110  * nameNsPop:
2111  * @ctxt: an XML parser context
2112  *
2113  * Pops the top element/prefix/URI name from the name stack
2114  *
2115  * Returns the name just removed
2116  */
2117 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)2118 nameNsPop(xmlParserCtxtPtr ctxt)
2119 {
2120     const xmlChar *ret;
2121 
2122     if (ctxt->nameNr <= 0)
2123         return (NULL);
2124     ctxt->nameNr--;
2125     if (ctxt->nameNr > 0)
2126         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2127     else
2128         ctxt->name = NULL;
2129     ret = ctxt->nameTab[ctxt->nameNr];
2130     ctxt->nameTab[ctxt->nameNr] = NULL;
2131     return (ret);
2132 }
2133 #endif /* LIBXML_PUSH_ENABLED */
2134 
2135 /**
2136  * namePush:
2137  * @ctxt:  an XML parser context
2138  * @value:  the element name
2139  *
2140  * DEPRECATED: Internal function, do not use.
2141  *
2142  * Pushes a new element name on top of the name stack
2143  *
2144  * Returns -1 in case of error, the index in the stack otherwise
2145  */
2146 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)2147 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2148 {
2149     if (ctxt == NULL) return (-1);
2150 
2151     if (ctxt->nameNr >= ctxt->nameMax) {
2152         const xmlChar * *tmp;
2153         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2154                                     ctxt->nameMax * 2 *
2155                                     sizeof(ctxt->nameTab[0]));
2156         if (tmp == NULL) {
2157 	    goto mem_error;
2158         }
2159 	ctxt->nameTab = tmp;
2160         ctxt->nameMax *= 2;
2161     }
2162     ctxt->nameTab[ctxt->nameNr] = value;
2163     ctxt->name = value;
2164     return (ctxt->nameNr++);
2165 mem_error:
2166     xmlErrMemory(ctxt);
2167     return (-1);
2168 }
2169 
2170 /**
2171  * namePop:
2172  * @ctxt: an XML parser context
2173  *
2174  * DEPRECATED: Internal function, do not use.
2175  *
2176  * Pops the top element name from the name stack
2177  *
2178  * Returns the name just removed
2179  */
2180 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)2181 namePop(xmlParserCtxtPtr ctxt)
2182 {
2183     const xmlChar *ret;
2184 
2185     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2186         return (NULL);
2187     ctxt->nameNr--;
2188     if (ctxt->nameNr > 0)
2189         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2190     else
2191         ctxt->name = NULL;
2192     ret = ctxt->nameTab[ctxt->nameNr];
2193     ctxt->nameTab[ctxt->nameNr] = NULL;
2194     return (ret);
2195 }
2196 
spacePush(xmlParserCtxtPtr ctxt,int val)2197 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2198     if (ctxt->spaceNr >= ctxt->spaceMax) {
2199         int *tmp;
2200 
2201 	ctxt->spaceMax *= 2;
2202         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2203 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2204         if (tmp == NULL) {
2205 	    xmlErrMemory(ctxt);
2206 	    ctxt->spaceMax /=2;
2207 	    return(-1);
2208 	}
2209 	ctxt->spaceTab = tmp;
2210     }
2211     ctxt->spaceTab[ctxt->spaceNr] = val;
2212     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2213     return(ctxt->spaceNr++);
2214 }
2215 
spacePop(xmlParserCtxtPtr ctxt)2216 static int spacePop(xmlParserCtxtPtr ctxt) {
2217     int ret;
2218     if (ctxt->spaceNr <= 0) return(0);
2219     ctxt->spaceNr--;
2220     if (ctxt->spaceNr > 0)
2221 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2222     else
2223         ctxt->space = &ctxt->spaceTab[0];
2224     ret = ctxt->spaceTab[ctxt->spaceNr];
2225     ctxt->spaceTab[ctxt->spaceNr] = -1;
2226     return(ret);
2227 }
2228 
2229 /*
2230  * Macros for accessing the content. Those should be used only by the parser,
2231  * and not exported.
2232  *
2233  * Dirty macros, i.e. one often need to make assumption on the context to
2234  * use them
2235  *
2236  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2237  *           To be used with extreme caution since operations consuming
2238  *           characters may move the input buffer to a different location !
2239  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2240  *           This should be used internally by the parser
2241  *           only to compare to ASCII values otherwise it would break when
2242  *           running with UTF-8 encoding.
2243  *   RAW     same as CUR but in the input buffer, bypass any token
2244  *           extraction that may have been done
2245  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2246  *           to compare on ASCII based substring.
2247  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2248  *           strings without newlines within the parser.
2249  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2250  *           defined char within the parser.
2251  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2252  *
2253  *   NEXT    Skip to the next character, this does the proper decoding
2254  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2255  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2256  *   CUR_CHAR(l) returns the current unicode character (int), set l
2257  *           to the number of xmlChars used for the encoding [0-5].
2258  *   CUR_SCHAR  same but operate on a string instead of the context
2259  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2260  *            the index
2261  *   GROW, SHRINK  handling of input buffers
2262  */
2263 
2264 #define RAW (*ctxt->input->cur)
2265 #define CUR (*ctxt->input->cur)
2266 #define NXT(val) ctxt->input->cur[(val)]
2267 #define CUR_PTR ctxt->input->cur
2268 #define BASE_PTR ctxt->input->base
2269 
2270 #define CMP4( s, c1, c2, c3, c4 ) \
2271   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2272     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2273 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2274   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2275 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2276   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2277 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2278   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2279 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2280   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2281 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2282   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2283     ((unsigned char *) s)[ 8 ] == c9 )
2284 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2285   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2286     ((unsigned char *) s)[ 9 ] == c10 )
2287 
2288 #define SKIP(val) do {							\
2289     ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2290     if (*ctxt->input->cur == 0)						\
2291         xmlParserGrow(ctxt);						\
2292   } while (0)
2293 
2294 #define SKIPL(val) do {							\
2295     int skipl;								\
2296     for(skipl=0; skipl<val; skipl++) {					\
2297 	if (*(ctxt->input->cur) == '\n') {				\
2298 	ctxt->input->line++; ctxt->input->col = 1;			\
2299 	} else ctxt->input->col++;					\
2300 	ctxt->input->cur++;						\
2301     }									\
2302     if (*ctxt->input->cur == 0)						\
2303         xmlParserGrow(ctxt);						\
2304   } while (0)
2305 
2306 #define SHRINK \
2307     if ((!PARSER_PROGRESSIVE(ctxt)) && \
2308         (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2309 	(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2310 	xmlParserShrink(ctxt);
2311 
2312 #define GROW \
2313     if ((!PARSER_PROGRESSIVE(ctxt)) && \
2314         (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2315 	xmlParserGrow(ctxt);
2316 
2317 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2318 
2319 #define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2320 
2321 #define NEXT xmlNextChar(ctxt)
2322 
2323 #define NEXT1 {								\
2324 	ctxt->input->col++;						\
2325 	ctxt->input->cur++;						\
2326 	if (*ctxt->input->cur == 0)					\
2327 	    xmlParserGrow(ctxt);						\
2328     }
2329 
2330 #define NEXTL(l) do {							\
2331     if (*(ctxt->input->cur) == '\n') {					\
2332 	ctxt->input->line++; ctxt->input->col = 1;			\
2333     } else ctxt->input->col++;						\
2334     ctxt->input->cur += l;				\
2335   } while (0)
2336 
2337 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2338 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2339 
2340 #define COPY_BUF(b, i, v)						\
2341     if (v < 0x80) b[i++] = v;						\
2342     else i += xmlCopyCharMultiByte(&b[i],v)
2343 
2344 /**
2345  * xmlSkipBlankChars:
2346  * @ctxt:  the XML parser context
2347  *
2348  * DEPRECATED: Internal function, do not use.
2349  *
2350  * Skip whitespace in the input stream.
2351  *
2352  * Returns the number of space chars skipped
2353  */
2354 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2355 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2356     const xmlChar *cur;
2357     int res = 0;
2358 
2359     /*
2360      * It's Okay to use CUR/NEXT here since all the blanks are on
2361      * the ASCII range.
2362      */
2363     cur = ctxt->input->cur;
2364     while (IS_BLANK_CH(*cur)) {
2365         if (*cur == '\n') {
2366             ctxt->input->line++; ctxt->input->col = 1;
2367         } else {
2368             ctxt->input->col++;
2369         }
2370         cur++;
2371         if (res < INT_MAX)
2372             res++;
2373         if (*cur == 0) {
2374             ctxt->input->cur = cur;
2375             xmlParserGrow(ctxt);
2376             cur = ctxt->input->cur;
2377         }
2378     }
2379     ctxt->input->cur = cur;
2380 
2381     return(res);
2382 }
2383 
2384 static void
xmlPopPE(xmlParserCtxtPtr ctxt)2385 xmlPopPE(xmlParserCtxtPtr ctxt) {
2386     unsigned long consumed;
2387     xmlEntityPtr ent;
2388 
2389     ent = ctxt->input->entity;
2390 
2391     ent->flags &= ~XML_ENT_EXPANDING;
2392 
2393     if ((ent->flags & XML_ENT_CHECKED) == 0) {
2394         int result;
2395 
2396         /*
2397          * Read the rest of the stream in case of errors. We want
2398          * to account for the whole entity size.
2399          */
2400         do {
2401             ctxt->input->cur = ctxt->input->end;
2402             xmlParserShrink(ctxt);
2403             result = xmlParserGrow(ctxt);
2404         } while (result > 0);
2405 
2406         consumed = ctxt->input->consumed;
2407         xmlSaturatedAddSizeT(&consumed,
2408                              ctxt->input->end - ctxt->input->base);
2409 
2410         xmlSaturatedAdd(&ent->expandedSize, consumed);
2411 
2412         /*
2413          * Add to sizeentities when parsing an external entity
2414          * for the first time.
2415          */
2416         if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2417             xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2418         }
2419 
2420         ent->flags |= XML_ENT_CHECKED;
2421     }
2422 
2423     xmlPopInput(ctxt);
2424 
2425     xmlParserEntityCheck(ctxt, ent->expandedSize);
2426 }
2427 
2428 /**
2429  * xmlSkipBlankCharsPE:
2430  * @ctxt:  the XML parser context
2431  *
2432  * Skip whitespace in the input stream, also handling parameter
2433  * entities.
2434  *
2435  * Returns the number of space chars skipped
2436  */
2437 static int
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt)2438 xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2439     int res = 0;
2440     int inParam;
2441     int expandParam;
2442 
2443     inParam = PARSER_IN_PE(ctxt);
2444     expandParam = PARSER_EXTERNAL(ctxt);
2445 
2446     if (!inParam && !expandParam)
2447         return(xmlSkipBlankChars(ctxt));
2448 
2449     while (PARSER_STOPPED(ctxt) == 0) {
2450         if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2451             NEXT;
2452         } else if (CUR == '%') {
2453             if ((expandParam == 0) ||
2454                 (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2455                 break;
2456 
2457             /*
2458              * Expand parameter entity. We continue to consume
2459              * whitespace at the start of the entity and possible
2460              * even consume the whole entity and pop it. We might
2461              * even pop multiple PEs in this loop.
2462              */
2463             xmlParsePEReference(ctxt);
2464 
2465             inParam = PARSER_IN_PE(ctxt);
2466             expandParam = PARSER_EXTERNAL(ctxt);
2467         } else if (CUR == 0) {
2468             if (inParam == 0)
2469                 break;
2470 
2471             xmlPopPE(ctxt);
2472 
2473             inParam = PARSER_IN_PE(ctxt);
2474             expandParam = PARSER_EXTERNAL(ctxt);
2475         } else {
2476             break;
2477         }
2478 
2479         /*
2480          * Also increase the counter when entering or exiting a PERef.
2481          * The spec says: "When a parameter-entity reference is recognized
2482          * in the DTD and included, its replacement text MUST be enlarged
2483          * by the attachment of one leading and one following space (#x20)
2484          * character."
2485          */
2486         if (res < INT_MAX)
2487             res++;
2488     }
2489 
2490     return(res);
2491 }
2492 
2493 /************************************************************************
2494  *									*
2495  *		Commodity functions to handle entities			*
2496  *									*
2497  ************************************************************************/
2498 
2499 /**
2500  * xmlPopInput:
2501  * @ctxt:  an XML parser context
2502  *
2503  * xmlPopInput: the current input pointed by ctxt->input came to an end
2504  *          pop it and return the next char.
2505  *
2506  * Returns the current xmlChar in the parser context
2507  */
2508 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2509 xmlPopInput(xmlParserCtxtPtr ctxt) {
2510     xmlParserInputPtr input;
2511 
2512     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2513     input = inputPop(ctxt);
2514     xmlFreeInputStream(input);
2515     if (*ctxt->input->cur == 0)
2516         xmlParserGrow(ctxt);
2517     return(CUR);
2518 }
2519 
2520 /**
2521  * xmlPushInput:
2522  * @ctxt:  an XML parser context
2523  * @input:  an XML parser input fragment (entity, XML fragment ...).
2524  *
2525  * Push an input stream onto the stack.
2526  *
2527  * Returns -1 in case of error or the index in the input stack
2528  */
2529 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2530 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2531     int maxDepth;
2532     int ret;
2533 
2534     if ((ctxt == NULL) || (input == NULL))
2535         return(-1);
2536 
2537     maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2538     if (ctxt->inputNr > maxDepth) {
2539         xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2540                        "Maximum entity nesting depth exceeded");
2541         xmlHaltParser(ctxt);
2542 	return(-1);
2543     }
2544     ret = inputPush(ctxt, input);
2545     GROW;
2546     return(ret);
2547 }
2548 
2549 /**
2550  * xmlParseCharRef:
2551  * @ctxt:  an XML parser context
2552  *
2553  * DEPRECATED: Internal function, don't use.
2554  *
2555  * Parse a numeric character reference. Always consumes '&'.
2556  *
2557  * [66] CharRef ::= '&#' [0-9]+ ';' |
2558  *                  '&#x' [0-9a-fA-F]+ ';'
2559  *
2560  * [ WFC: Legal Character ]
2561  * Characters referred to using character references must match the
2562  * production for Char.
2563  *
2564  * Returns the value parsed (as an int), 0 in case of error
2565  */
2566 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2567 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2568     int val = 0;
2569     int count = 0;
2570 
2571     /*
2572      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2573      */
2574     if ((RAW == '&') && (NXT(1) == '#') &&
2575         (NXT(2) == 'x')) {
2576 	SKIP(3);
2577 	GROW;
2578 	while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2579 	    if (count++ > 20) {
2580 		count = 0;
2581 		GROW;
2582 	    }
2583 	    if ((RAW >= '0') && (RAW <= '9'))
2584 	        val = val * 16 + (CUR - '0');
2585 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2586 	        val = val * 16 + (CUR - 'a') + 10;
2587 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2588 	        val = val * 16 + (CUR - 'A') + 10;
2589 	    else {
2590 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2591 		val = 0;
2592 		break;
2593 	    }
2594 	    if (val > 0x110000)
2595 	        val = 0x110000;
2596 
2597 	    NEXT;
2598 	    count++;
2599 	}
2600 	if (RAW == ';') {
2601 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2602 	    ctxt->input->col++;
2603 	    ctxt->input->cur++;
2604 	}
2605     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2606 	SKIP(2);
2607 	GROW;
2608 	while (RAW != ';') { /* loop blocked by count */
2609 	    if (count++ > 20) {
2610 		count = 0;
2611 		GROW;
2612 	    }
2613 	    if ((RAW >= '0') && (RAW <= '9'))
2614 	        val = val * 10 + (CUR - '0');
2615 	    else {
2616 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2617 		val = 0;
2618 		break;
2619 	    }
2620 	    if (val > 0x110000)
2621 	        val = 0x110000;
2622 
2623 	    NEXT;
2624 	    count++;
2625 	}
2626 	if (RAW == ';') {
2627 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2628 	    ctxt->input->col++;
2629 	    ctxt->input->cur++;
2630 	}
2631     } else {
2632         if (RAW == '&')
2633             SKIP(1);
2634         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2635     }
2636 
2637     /*
2638      * [ WFC: Legal Character ]
2639      * Characters referred to using character references must match the
2640      * production for Char.
2641      */
2642     if (val >= 0x110000) {
2643         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2644                 "xmlParseCharRef: character reference out of bounds\n",
2645 	        val);
2646     } else if (IS_CHAR(val)) {
2647         return(val);
2648     } else {
2649         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2650                           "xmlParseCharRef: invalid xmlChar value %d\n",
2651 	                  val);
2652     }
2653     return(0);
2654 }
2655 
2656 /**
2657  * xmlParseStringCharRef:
2658  * @ctxt:  an XML parser context
2659  * @str:  a pointer to an index in the string
2660  *
2661  * parse Reference declarations, variant parsing from a string rather
2662  * than an an input flow.
2663  *
2664  * [66] CharRef ::= '&#' [0-9]+ ';' |
2665  *                  '&#x' [0-9a-fA-F]+ ';'
2666  *
2667  * [ WFC: Legal Character ]
2668  * Characters referred to using character references must match the
2669  * production for Char.
2670  *
2671  * Returns the value parsed (as an int), 0 in case of error, str will be
2672  *         updated to the current value of the index
2673  */
2674 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2675 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2676     const xmlChar *ptr;
2677     xmlChar cur;
2678     int val = 0;
2679 
2680     if ((str == NULL) || (*str == NULL)) return(0);
2681     ptr = *str;
2682     cur = *ptr;
2683     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2684 	ptr += 3;
2685 	cur = *ptr;
2686 	while (cur != ';') { /* Non input consuming loop */
2687 	    if ((cur >= '0') && (cur <= '9'))
2688 	        val = val * 16 + (cur - '0');
2689 	    else if ((cur >= 'a') && (cur <= 'f'))
2690 	        val = val * 16 + (cur - 'a') + 10;
2691 	    else if ((cur >= 'A') && (cur <= 'F'))
2692 	        val = val * 16 + (cur - 'A') + 10;
2693 	    else {
2694 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2695 		val = 0;
2696 		break;
2697 	    }
2698 	    if (val > 0x110000)
2699 	        val = 0x110000;
2700 
2701 	    ptr++;
2702 	    cur = *ptr;
2703 	}
2704 	if (cur == ';')
2705 	    ptr++;
2706     } else if  ((cur == '&') && (ptr[1] == '#')){
2707 	ptr += 2;
2708 	cur = *ptr;
2709 	while (cur != ';') { /* Non input consuming loops */
2710 	    if ((cur >= '0') && (cur <= '9'))
2711 	        val = val * 10 + (cur - '0');
2712 	    else {
2713 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2714 		val = 0;
2715 		break;
2716 	    }
2717 	    if (val > 0x110000)
2718 	        val = 0x110000;
2719 
2720 	    ptr++;
2721 	    cur = *ptr;
2722 	}
2723 	if (cur == ';')
2724 	    ptr++;
2725     } else {
2726 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2727 	return(0);
2728     }
2729     *str = ptr;
2730 
2731     /*
2732      * [ WFC: Legal Character ]
2733      * Characters referred to using character references must match the
2734      * production for Char.
2735      */
2736     if (val >= 0x110000) {
2737         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2738                 "xmlParseStringCharRef: character reference out of bounds\n",
2739                 val);
2740     } else if (IS_CHAR(val)) {
2741         return(val);
2742     } else {
2743         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2744 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2745 			  val);
2746     }
2747     return(0);
2748 }
2749 
2750 /**
2751  * xmlParserHandlePEReference:
2752  * @ctxt:  the parser context
2753  *
2754  * DEPRECATED: Internal function, do not use.
2755  *
2756  * [69] PEReference ::= '%' Name ';'
2757  *
2758  * [ WFC: No Recursion ]
2759  * A parsed entity must not contain a recursive
2760  * reference to itself, either directly or indirectly.
2761  *
2762  * [ WFC: Entity Declared ]
2763  * In a document without any DTD, a document with only an internal DTD
2764  * subset which contains no parameter entity references, or a document
2765  * with "standalone='yes'", ...  ... The declaration of a parameter
2766  * entity must precede any reference to it...
2767  *
2768  * [ VC: Entity Declared ]
2769  * In a document with an external subset or external parameter entities
2770  * with "standalone='no'", ...  ... The declaration of a parameter entity
2771  * must precede any reference to it...
2772  *
2773  * [ WFC: In DTD ]
2774  * Parameter-entity references may only appear in the DTD.
2775  * NOTE: misleading but this is handled.
2776  *
2777  * A PEReference may have been detected in the current input stream
2778  * the handling is done accordingly to
2779  *      http://www.w3.org/TR/REC-xml#entproc
2780  * i.e.
2781  *   - Included in literal in entity values
2782  *   - Included as Parameter Entity reference within DTDs
2783  */
2784 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2785 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2786     xmlParsePEReference(ctxt);
2787 }
2788 
2789 /**
2790  * xmlStringLenDecodeEntities:
2791  * @ctxt:  the parser context
2792  * @str:  the input string
2793  * @len: the string length
2794  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2795  * @end:  an end marker xmlChar, 0 if none
2796  * @end2:  an end marker xmlChar, 0 if none
2797  * @end3:  an end marker xmlChar, 0 if none
2798  *
2799  * DEPRECATED: Internal function, don't use.
2800  *
2801  * Returns A newly allocated string with the substitution done. The caller
2802  *      must deallocate it !
2803  */
2804 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2805 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806                            int what ATTRIBUTE_UNUSED,
2807                            xmlChar end, xmlChar end2, xmlChar end3) {
2808     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2809         return(NULL);
2810 
2811     if ((str[len] != 0) ||
2812         (end != 0) || (end2 != 0) || (end3 != 0))
2813         return(NULL);
2814 
2815     return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2816 }
2817 
2818 /**
2819  * xmlStringDecodeEntities:
2820  * @ctxt:  the parser context
2821  * @str:  the input string
2822  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2823  * @end:  an end marker xmlChar, 0 if none
2824  * @end2:  an end marker xmlChar, 0 if none
2825  * @end3:  an end marker xmlChar, 0 if none
2826  *
2827  * DEPRECATED: Internal function, don't use.
2828  *
2829  * Returns A newly allocated string with the substitution done. The caller
2830  *      must deallocate it !
2831  */
2832 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2833 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2834                         int what ATTRIBUTE_UNUSED,
2835 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2836     if ((ctxt == NULL) || (str == NULL))
2837         return(NULL);
2838 
2839     if ((end != 0) || (end2 != 0) || (end3 != 0))
2840         return(NULL);
2841 
2842     return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2843 }
2844 
2845 /************************************************************************
2846  *									*
2847  *		Commodity functions, cleanup needed ?			*
2848  *									*
2849  ************************************************************************/
2850 
2851 /**
2852  * areBlanks:
2853  * @ctxt:  an XML parser context
2854  * @str:  a xmlChar *
2855  * @len:  the size of @str
2856  * @blank_chars: we know the chars are blanks
2857  *
2858  * Is this a sequence of blank chars that one can ignore ?
2859  *
2860  * Returns 1 if ignorable 0 otherwise.
2861  */
2862 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2863 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2864                      int blank_chars) {
2865     int i;
2866     xmlNodePtr lastChild;
2867 
2868     /*
2869      * Don't spend time trying to differentiate them, the same callback is
2870      * used !
2871      */
2872     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2873 	return(0);
2874 
2875     /*
2876      * Check for xml:space value.
2877      */
2878     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2879         (*(ctxt->space) == -2))
2880 	return(0);
2881 
2882     /*
2883      * Check that the string is made of blanks
2884      */
2885     if (blank_chars == 0) {
2886 	for (i = 0;i < len;i++)
2887 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2888     }
2889 
2890     /*
2891      * Look if the element is mixed content in the DTD if available
2892      */
2893     if (ctxt->node == NULL) return(0);
2894     if (ctxt->myDoc != NULL) {
2895         xmlElementPtr elemDecl = NULL;
2896         xmlDocPtr doc = ctxt->myDoc;
2897         const xmlChar *prefix = NULL;
2898 
2899         if (ctxt->node->ns)
2900             prefix = ctxt->node->ns->prefix;
2901         if (doc->intSubset != NULL)
2902             elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2903                                       prefix);
2904         if ((elemDecl == NULL) && (doc->extSubset != NULL))
2905             elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2906                                       prefix);
2907         if (elemDecl != NULL) {
2908             if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2909                 return(1);
2910             if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2911                 (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2912                 return(0);
2913         }
2914     }
2915 
2916     /*
2917      * Otherwise, heuristic :-\
2918      */
2919     if ((RAW != '<') && (RAW != 0xD)) return(0);
2920     if ((ctxt->node->children == NULL) &&
2921 	(RAW == '<') && (NXT(1) == '/')) return(0);
2922 
2923     lastChild = xmlGetLastChild(ctxt->node);
2924     if (lastChild == NULL) {
2925         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2926             (ctxt->node->content != NULL)) return(0);
2927     } else if (xmlNodeIsText(lastChild))
2928         return(0);
2929     else if ((ctxt->node->children != NULL) &&
2930              (xmlNodeIsText(ctxt->node->children)))
2931         return(0);
2932     return(1);
2933 }
2934 
2935 /************************************************************************
2936  *									*
2937  *		Extra stuff for namespace support			*
2938  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2939  *									*
2940  ************************************************************************/
2941 
2942 /**
2943  * xmlSplitQName:
2944  * @ctxt:  an XML parser context
2945  * @name:  an XML parser context
2946  * @prefixOut:  a xmlChar **
2947  *
2948  * parse an UTF8 encoded XML qualified name string
2949  *
2950  * [NS 5] QName ::= (Prefix ':')? LocalPart
2951  *
2952  * [NS 6] Prefix ::= NCName
2953  *
2954  * [NS 7] LocalPart ::= NCName
2955  *
2956  * Returns the local part, and prefix is updated
2957  *   to get the Prefix if any.
2958  */
2959 
2960 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefixOut)2961 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2962     xmlChar buf[XML_MAX_NAMELEN + 5];
2963     xmlChar *buffer = NULL;
2964     int len = 0;
2965     int max = XML_MAX_NAMELEN;
2966     xmlChar *ret = NULL;
2967     xmlChar *prefix;
2968     const xmlChar *cur = name;
2969     int c;
2970 
2971     if (prefixOut == NULL) return(NULL);
2972     *prefixOut = NULL;
2973 
2974     if (cur == NULL) return(NULL);
2975 
2976     /* nasty but well=formed */
2977     if (cur[0] == ':')
2978 	return(xmlStrdup(name));
2979 
2980     c = *cur++;
2981     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2982 	buf[len++] = c;
2983 	c = *cur++;
2984     }
2985     if (len >= max) {
2986 	/*
2987 	 * Okay someone managed to make a huge name, so he's ready to pay
2988 	 * for the processing speed.
2989 	 */
2990 	max = len * 2;
2991 
2992 	buffer = (xmlChar *) xmlMallocAtomic(max);
2993 	if (buffer == NULL) {
2994 	    xmlErrMemory(ctxt);
2995 	    return(NULL);
2996 	}
2997 	memcpy(buffer, buf, len);
2998 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2999 	    if (len + 10 > max) {
3000 	        xmlChar *tmp;
3001 
3002 		max *= 2;
3003 		tmp = (xmlChar *) xmlRealloc(buffer, max);
3004 		if (tmp == NULL) {
3005 		    xmlFree(buffer);
3006 		    xmlErrMemory(ctxt);
3007 		    return(NULL);
3008 		}
3009 		buffer = tmp;
3010 	    }
3011 	    buffer[len++] = c;
3012 	    c = *cur++;
3013 	}
3014 	buffer[len] = 0;
3015     }
3016 
3017     if ((c == ':') && (*cur == 0)) {
3018         if (buffer != NULL)
3019 	    xmlFree(buffer);
3020 	return(xmlStrdup(name));
3021     }
3022 
3023     if (buffer == NULL) {
3024 	ret = xmlStrndup(buf, len);
3025         if (ret == NULL) {
3026 	    xmlErrMemory(ctxt);
3027 	    return(NULL);
3028         }
3029     } else {
3030 	ret = buffer;
3031 	buffer = NULL;
3032 	max = XML_MAX_NAMELEN;
3033     }
3034 
3035 
3036     if (c == ':') {
3037 	c = *cur;
3038         prefix = ret;
3039 	if (c == 0) {
3040 	    ret = xmlStrndup(BAD_CAST "", 0);
3041             if (ret == NULL) {
3042                 xmlFree(prefix);
3043                 return(NULL);
3044             }
3045             *prefixOut = prefix;
3046             return(ret);
3047 	}
3048 	len = 0;
3049 
3050 	/*
3051 	 * Check that the first character is proper to start
3052 	 * a new name
3053 	 */
3054 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3055 	      ((c >= 0x41) && (c <= 0x5A)) ||
3056 	      (c == '_') || (c == ':'))) {
3057 	    int l;
3058 	    int first = CUR_SCHAR(cur, l);
3059 
3060 	    if (!IS_LETTER(first) && (first != '_')) {
3061 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3062 			    "Name %s is not XML Namespace compliant\n",
3063 				  name);
3064 	    }
3065 	}
3066 	cur++;
3067 
3068 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3069 	    buf[len++] = c;
3070 	    c = *cur++;
3071 	}
3072 	if (len >= max) {
3073 	    /*
3074 	     * Okay someone managed to make a huge name, so he's ready to pay
3075 	     * for the processing speed.
3076 	     */
3077 	    max = len * 2;
3078 
3079 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3080 	    if (buffer == NULL) {
3081 	        xmlErrMemory(ctxt);
3082                 xmlFree(prefix);
3083 		return(NULL);
3084 	    }
3085 	    memcpy(buffer, buf, len);
3086 	    while (c != 0) { /* tested bigname2.xml */
3087 		if (len + 10 > max) {
3088 		    xmlChar *tmp;
3089 
3090 		    max *= 2;
3091 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3092 		    if (tmp == NULL) {
3093 			xmlErrMemory(ctxt);
3094                         xmlFree(prefix);
3095 			xmlFree(buffer);
3096 			return(NULL);
3097 		    }
3098 		    buffer = tmp;
3099 		}
3100 		buffer[len++] = c;
3101 		c = *cur++;
3102 	    }
3103 	    buffer[len] = 0;
3104 	}
3105 
3106 	if (buffer == NULL) {
3107 	    ret = xmlStrndup(buf, len);
3108             if (ret == NULL) {
3109                 xmlFree(prefix);
3110                 return(NULL);
3111             }
3112 	} else {
3113 	    ret = buffer;
3114 	}
3115 
3116         *prefixOut = prefix;
3117     }
3118 
3119     return(ret);
3120 }
3121 
3122 /************************************************************************
3123  *									*
3124  *			The parser itself				*
3125  *	Relates to http://www.w3.org/TR/REC-xml				*
3126  *									*
3127  ************************************************************************/
3128 
3129 /************************************************************************
3130  *									*
3131  *	Routines to parse Name, NCName and NmToken			*
3132  *									*
3133  ************************************************************************/
3134 
3135 /*
3136  * The two following functions are related to the change of accepted
3137  * characters for Name and NmToken in the Revision 5 of XML-1.0
3138  * They correspond to the modified production [4] and the new production [4a]
3139  * changes in that revision. Also note that the macros used for the
3140  * productions Letter, Digit, CombiningChar and Extender are not needed
3141  * anymore.
3142  * We still keep compatibility to pre-revision5 parsing semantic if the
3143  * new XML_PARSE_OLD10 option is given to the parser.
3144  */
3145 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3146 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3147     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3148         /*
3149 	 * Use the new checks of production [4] [4a] amd [5] of the
3150 	 * Update 5 of XML-1.0
3151 	 */
3152 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3153 	    (((c >= 'a') && (c <= 'z')) ||
3154 	     ((c >= 'A') && (c <= 'Z')) ||
3155 	     (c == '_') || (c == ':') ||
3156 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3157 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3158 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3159 	     ((c >= 0x370) && (c <= 0x37D)) ||
3160 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3161 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3162 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3163 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3164 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3165 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3166 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3167 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3168 	    return(1);
3169     } else {
3170         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3171 	    return(1);
3172     }
3173     return(0);
3174 }
3175 
3176 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3177 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3178     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3179         /*
3180 	 * Use the new checks of production [4] [4a] amd [5] of the
3181 	 * Update 5 of XML-1.0
3182 	 */
3183 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3184 	    (((c >= 'a') && (c <= 'z')) ||
3185 	     ((c >= 'A') && (c <= 'Z')) ||
3186 	     ((c >= '0') && (c <= '9')) || /* !start */
3187 	     (c == '_') || (c == ':') ||
3188 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3189 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3190 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3191 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3192 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3193 	     ((c >= 0x370) && (c <= 0x37D)) ||
3194 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3196 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3197 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3198 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3199 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3200 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3201 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3202 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3203 	     return(1);
3204     } else {
3205         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3206             (c == '.') || (c == '-') ||
3207 	    (c == '_') || (c == ':') ||
3208 	    (IS_COMBINING(c)) ||
3209 	    (IS_EXTENDER(c)))
3210 	    return(1);
3211     }
3212     return(0);
3213 }
3214 
3215 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3216 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3217     const xmlChar *ret;
3218     int len = 0, l;
3219     int c;
3220     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3221                     XML_MAX_TEXT_LENGTH :
3222                     XML_MAX_NAME_LENGTH;
3223 
3224     /*
3225      * Handler for more complex cases
3226      */
3227     c = CUR_CHAR(l);
3228     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229         /*
3230 	 * Use the new checks of production [4] [4a] amd [5] of the
3231 	 * Update 5 of XML-1.0
3232 	 */
3233 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234 	    (!(((c >= 'a') && (c <= 'z')) ||
3235 	       ((c >= 'A') && (c <= 'Z')) ||
3236 	       (c == '_') || (c == ':') ||
3237 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3238 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3239 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3240 	       ((c >= 0x370) && (c <= 0x37D)) ||
3241 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3243 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3244 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249 	    return(NULL);
3250 	}
3251 	len += l;
3252 	NEXTL(l);
3253 	c = CUR_CHAR(l);
3254 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255 	       (((c >= 'a') && (c <= 'z')) ||
3256 	        ((c >= 'A') && (c <= 'Z')) ||
3257 	        ((c >= '0') && (c <= '9')) || /* !start */
3258 	        (c == '_') || (c == ':') ||
3259 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3261 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3262 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3263 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264 	        ((c >= 0x370) && (c <= 0x37D)) ||
3265 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3267 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3269 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3274 		)) {
3275             if (len <= INT_MAX - l)
3276 	        len += l;
3277 	    NEXTL(l);
3278 	    c = CUR_CHAR(l);
3279 	}
3280     } else {
3281 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3282 	    (!IS_LETTER(c) && (c != '_') &&
3283 	     (c != ':'))) {
3284 	    return(NULL);
3285 	}
3286 	len += l;
3287 	NEXTL(l);
3288 	c = CUR_CHAR(l);
3289 
3290 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3291 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3292 		(c == '.') || (c == '-') ||
3293 		(c == '_') || (c == ':') ||
3294 		(IS_COMBINING(c)) ||
3295 		(IS_EXTENDER(c)))) {
3296             if (len <= INT_MAX - l)
3297 	        len += l;
3298 	    NEXTL(l);
3299 	    c = CUR_CHAR(l);
3300 	}
3301     }
3302     if (len > maxLength) {
3303         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3304         return(NULL);
3305     }
3306     if (ctxt->input->cur - ctxt->input->base < len) {
3307         /*
3308          * There were a couple of bugs where PERefs lead to to a change
3309          * of the buffer. Check the buffer size to avoid passing an invalid
3310          * pointer to xmlDictLookup.
3311          */
3312         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3313                     "unexpected change of input buffer");
3314         return (NULL);
3315     }
3316     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3317         ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3318     else
3319         ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3320     if (ret == NULL)
3321         xmlErrMemory(ctxt);
3322     return(ret);
3323 }
3324 
3325 /**
3326  * xmlParseName:
3327  * @ctxt:  an XML parser context
3328  *
3329  * DEPRECATED: Internal function, don't use.
3330  *
3331  * parse an XML name.
3332  *
3333  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3334  *                  CombiningChar | Extender
3335  *
3336  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3337  *
3338  * [6] Names ::= Name (#x20 Name)*
3339  *
3340  * Returns the Name parsed or NULL
3341  */
3342 
3343 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3344 xmlParseName(xmlParserCtxtPtr ctxt) {
3345     const xmlChar *in;
3346     const xmlChar *ret;
3347     size_t count = 0;
3348     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349                        XML_MAX_TEXT_LENGTH :
3350                        XML_MAX_NAME_LENGTH;
3351 
3352     GROW;
3353 
3354     /*
3355      * Accelerator for simple ASCII names
3356      */
3357     in = ctxt->input->cur;
3358     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3359 	((*in >= 0x41) && (*in <= 0x5A)) ||
3360 	(*in == '_') || (*in == ':')) {
3361 	in++;
3362 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3364 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3365 	       (*in == '_') || (*in == '-') ||
3366 	       (*in == ':') || (*in == '.'))
3367 	    in++;
3368 	if ((*in > 0) && (*in < 0x80)) {
3369 	    count = in - ctxt->input->cur;
3370             if (count > maxLength) {
3371                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3372                 return(NULL);
3373             }
3374 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3375 	    ctxt->input->cur = in;
3376 	    ctxt->input->col += count;
3377 	    if (ret == NULL)
3378 	        xmlErrMemory(ctxt);
3379 	    return(ret);
3380 	}
3381     }
3382     /* accelerator for special cases */
3383     return(xmlParseNameComplex(ctxt));
3384 }
3385 
3386 static xmlHashedString
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3387 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3388     xmlHashedString ret;
3389     int len = 0, l;
3390     int c;
3391     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3392                     XML_MAX_TEXT_LENGTH :
3393                     XML_MAX_NAME_LENGTH;
3394     size_t startPosition = 0;
3395 
3396     ret.name = NULL;
3397     ret.hashValue = 0;
3398 
3399     /*
3400      * Handler for more complex cases
3401      */
3402     startPosition = CUR_PTR - BASE_PTR;
3403     c = CUR_CHAR(l);
3404     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3405 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3406 	return(ret);
3407     }
3408 
3409     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3410 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3411         if (len <= INT_MAX - l)
3412 	    len += l;
3413 	NEXTL(l);
3414 	c = CUR_CHAR(l);
3415     }
3416     if (len > maxLength) {
3417         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3418         return(ret);
3419     }
3420     ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3421     if (ret.name == NULL)
3422         xmlErrMemory(ctxt);
3423     return(ret);
3424 }
3425 
3426 /**
3427  * xmlParseNCName:
3428  * @ctxt:  an XML parser context
3429  * @len:  length of the string parsed
3430  *
3431  * parse an XML name.
3432  *
3433  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3434  *                      CombiningChar | Extender
3435  *
3436  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3437  *
3438  * Returns the Name parsed or NULL
3439  */
3440 
3441 static xmlHashedString
xmlParseNCName(xmlParserCtxtPtr ctxt)3442 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3443     const xmlChar *in, *e;
3444     xmlHashedString ret;
3445     size_t count = 0;
3446     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3447                        XML_MAX_TEXT_LENGTH :
3448                        XML_MAX_NAME_LENGTH;
3449 
3450     ret.name = NULL;
3451 
3452     /*
3453      * Accelerator for simple ASCII names
3454      */
3455     in = ctxt->input->cur;
3456     e = ctxt->input->end;
3457     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3458 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3459 	 (*in == '_')) && (in < e)) {
3460 	in++;
3461 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3462 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3463 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3464 	        (*in == '_') || (*in == '-') ||
3465 	        (*in == '.')) && (in < e))
3466 	    in++;
3467 	if (in >= e)
3468 	    goto complex;
3469 	if ((*in > 0) && (*in < 0x80)) {
3470 	    count = in - ctxt->input->cur;
3471             if (count > maxLength) {
3472                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3473                 return(ret);
3474             }
3475 	    ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3476 	    ctxt->input->cur = in;
3477 	    ctxt->input->col += count;
3478 	    if (ret.name == NULL) {
3479 	        xmlErrMemory(ctxt);
3480 	    }
3481 	    return(ret);
3482 	}
3483     }
3484 complex:
3485     return(xmlParseNCNameComplex(ctxt));
3486 }
3487 
3488 /**
3489  * xmlParseNameAndCompare:
3490  * @ctxt:  an XML parser context
3491  *
3492  * parse an XML name and compares for match
3493  * (specialized for endtag parsing)
3494  *
3495  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3496  * and the name for mismatch
3497  */
3498 
3499 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3500 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3501     register const xmlChar *cmp = other;
3502     register const xmlChar *in;
3503     const xmlChar *ret;
3504 
3505     GROW;
3506 
3507     in = ctxt->input->cur;
3508     while (*in != 0 && *in == *cmp) {
3509 	++in;
3510 	++cmp;
3511     }
3512     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3513 	/* success */
3514 	ctxt->input->col += in - ctxt->input->cur;
3515 	ctxt->input->cur = in;
3516 	return (const xmlChar*) 1;
3517     }
3518     /* failure (or end of input buffer), check with full function */
3519     ret = xmlParseName (ctxt);
3520     /* strings coming from the dictionary direct compare possible */
3521     if (ret == other) {
3522 	return (const xmlChar*) 1;
3523     }
3524     return ret;
3525 }
3526 
3527 /**
3528  * xmlParseStringName:
3529  * @ctxt:  an XML parser context
3530  * @str:  a pointer to the string pointer (IN/OUT)
3531  *
3532  * parse an XML name.
3533  *
3534  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3535  *                  CombiningChar | Extender
3536  *
3537  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3538  *
3539  * [6] Names ::= Name (#x20 Name)*
3540  *
3541  * Returns the Name parsed or NULL. The @str pointer
3542  * is updated to the current location in the string.
3543  */
3544 
3545 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3546 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3547     xmlChar buf[XML_MAX_NAMELEN + 5];
3548     xmlChar *ret;
3549     const xmlChar *cur = *str;
3550     int len = 0, l;
3551     int c;
3552     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3553                     XML_MAX_TEXT_LENGTH :
3554                     XML_MAX_NAME_LENGTH;
3555 
3556     c = CUR_SCHAR(cur, l);
3557     if (!xmlIsNameStartChar(ctxt, c)) {
3558 	return(NULL);
3559     }
3560 
3561     COPY_BUF(buf, len, c);
3562     cur += l;
3563     c = CUR_SCHAR(cur, l);
3564     while (xmlIsNameChar(ctxt, c)) {
3565 	COPY_BUF(buf, len, c);
3566 	cur += l;
3567 	c = CUR_SCHAR(cur, l);
3568 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3569 	    /*
3570 	     * Okay someone managed to make a huge name, so he's ready to pay
3571 	     * for the processing speed.
3572 	     */
3573 	    xmlChar *buffer;
3574 	    int max = len * 2;
3575 
3576 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3577 	    if (buffer == NULL) {
3578 	        xmlErrMemory(ctxt);
3579 		return(NULL);
3580 	    }
3581 	    memcpy(buffer, buf, len);
3582 	    while (xmlIsNameChar(ctxt, c)) {
3583 		if (len + 10 > max) {
3584 		    xmlChar *tmp;
3585 
3586 		    max *= 2;
3587 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3588 		    if (tmp == NULL) {
3589 			xmlErrMemory(ctxt);
3590 			xmlFree(buffer);
3591 			return(NULL);
3592 		    }
3593 		    buffer = tmp;
3594 		}
3595 		COPY_BUF(buffer, len, c);
3596 		cur += l;
3597 		c = CUR_SCHAR(cur, l);
3598                 if (len > maxLength) {
3599                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3600                     xmlFree(buffer);
3601                     return(NULL);
3602                 }
3603 	    }
3604 	    buffer[len] = 0;
3605 	    *str = cur;
3606 	    return(buffer);
3607 	}
3608     }
3609     if (len > maxLength) {
3610         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3611         return(NULL);
3612     }
3613     *str = cur;
3614     ret = xmlStrndup(buf, len);
3615     if (ret == NULL)
3616         xmlErrMemory(ctxt);
3617     return(ret);
3618 }
3619 
3620 /**
3621  * xmlParseNmtoken:
3622  * @ctxt:  an XML parser context
3623  *
3624  * DEPRECATED: Internal function, don't use.
3625  *
3626  * parse an XML Nmtoken.
3627  *
3628  * [7] Nmtoken ::= (NameChar)+
3629  *
3630  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3631  *
3632  * Returns the Nmtoken parsed or NULL
3633  */
3634 
3635 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3636 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3637     xmlChar buf[XML_MAX_NAMELEN + 5];
3638     xmlChar *ret;
3639     int len = 0, l;
3640     int c;
3641     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3642                     XML_MAX_TEXT_LENGTH :
3643                     XML_MAX_NAME_LENGTH;
3644 
3645     c = CUR_CHAR(l);
3646 
3647     while (xmlIsNameChar(ctxt, c)) {
3648 	COPY_BUF(buf, len, c);
3649 	NEXTL(l);
3650 	c = CUR_CHAR(l);
3651 	if (len >= XML_MAX_NAMELEN) {
3652 	    /*
3653 	     * Okay someone managed to make a huge token, so he's ready to pay
3654 	     * for the processing speed.
3655 	     */
3656 	    xmlChar *buffer;
3657 	    int max = len * 2;
3658 
3659 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3660 	    if (buffer == NULL) {
3661 	        xmlErrMemory(ctxt);
3662 		return(NULL);
3663 	    }
3664 	    memcpy(buffer, buf, len);
3665 	    while (xmlIsNameChar(ctxt, c)) {
3666 		if (len + 10 > max) {
3667 		    xmlChar *tmp;
3668 
3669 		    max *= 2;
3670 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3671 		    if (tmp == NULL) {
3672 			xmlErrMemory(ctxt);
3673 			xmlFree(buffer);
3674 			return(NULL);
3675 		    }
3676 		    buffer = tmp;
3677 		}
3678 		COPY_BUF(buffer, len, c);
3679                 if (len > maxLength) {
3680                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3681                     xmlFree(buffer);
3682                     return(NULL);
3683                 }
3684 		NEXTL(l);
3685 		c = CUR_CHAR(l);
3686 	    }
3687 	    buffer[len] = 0;
3688 	    return(buffer);
3689 	}
3690     }
3691     if (len == 0)
3692         return(NULL);
3693     if (len > maxLength) {
3694         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695         return(NULL);
3696     }
3697     ret = xmlStrndup(buf, len);
3698     if (ret == NULL)
3699         xmlErrMemory(ctxt);
3700     return(ret);
3701 }
3702 
3703 /**
3704  * xmlExpandPEsInEntityValue:
3705  * @ctxt:  parser context
3706  * @buf:  string buffer
3707  * @str:  entity value
3708  * @length:  size of entity value
3709  * @depth:  nesting depth
3710  *
3711  * Validate an entity value and expand parameter entities.
3712  */
3713 static void
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,int length,int depth)3714 xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3715                           const xmlChar *str, int length, int depth) {
3716     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3717     const xmlChar *end, *chunk;
3718     int c, l;
3719 
3720     if (str == NULL)
3721         return;
3722 
3723     depth += 1;
3724     if (depth > maxDepth) {
3725 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3726                        "Maximum entity nesting depth exceeded");
3727 	return;
3728     }
3729 
3730     end = str + length;
3731     chunk = str;
3732 
3733     while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3734         c = *str;
3735 
3736         if (c >= 0x80) {
3737             l = xmlUTF8MultibyteLen(ctxt, str,
3738                     "invalid character in entity value\n");
3739             if (l == 0) {
3740                 if (chunk < str)
3741                     xmlSBufAddString(buf, chunk, str - chunk);
3742                 xmlSBufAddReplChar(buf);
3743                 str += 1;
3744                 chunk = str;
3745             } else {
3746                 str += l;
3747             }
3748         } else if (c == '&') {
3749             if (str[1] == '#') {
3750                 if (chunk < str)
3751                     xmlSBufAddString(buf, chunk, str - chunk);
3752 
3753                 c = xmlParseStringCharRef(ctxt, &str);
3754                 if (c == 0)
3755                     return;
3756 
3757                 xmlSBufAddChar(buf, c);
3758 
3759                 chunk = str;
3760             } else {
3761                 xmlChar *name;
3762 
3763                 /*
3764                  * General entity references are checked for
3765                  * syntactic validity.
3766                  */
3767                 str++;
3768                 name = xmlParseStringName(ctxt, &str);
3769 
3770                 if ((name == NULL) || (*str++ != ';')) {
3771                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3772                             "EntityValue: '&' forbidden except for entities "
3773                             "references\n");
3774                     xmlFree(name);
3775                     return;
3776                 }
3777 
3778                 xmlFree(name);
3779             }
3780         } else if (c == '%') {
3781             xmlEntityPtr ent;
3782 
3783             if (chunk < str)
3784                 xmlSBufAddString(buf, chunk, str - chunk);
3785 
3786             ent = xmlParseStringPEReference(ctxt, &str);
3787             if (ent == NULL)
3788                 return;
3789 
3790             if (!PARSER_EXTERNAL(ctxt)) {
3791                 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3792                 return;
3793             }
3794 
3795             if (ent->content == NULL) {
3796                 /*
3797                  * Note: external parsed entities will not be loaded,
3798                  * it is not required for a non-validating parser to
3799                  * complete external PEReferences coming from the
3800                  * internal subset
3801                  */
3802                 if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3803                     ((ctxt->replaceEntities) ||
3804                      (ctxt->validate))) {
3805                     xmlLoadEntityContent(ctxt, ent);
3806                 } else {
3807                     xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3808                                   "not validating will not read content for "
3809                                   "PE entity %s\n", ent->name, NULL);
3810                 }
3811             }
3812 
3813             /*
3814              * TODO: Skip if ent->content is still NULL.
3815              */
3816 
3817             if (xmlParserEntityCheck(ctxt, ent->length))
3818                 return;
3819 
3820             if (ent->flags & XML_ENT_EXPANDING) {
3821                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3822                 xmlHaltParser(ctxt);
3823                 return;
3824             }
3825 
3826             ent->flags |= XML_ENT_EXPANDING;
3827             xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3828                                       depth);
3829             ent->flags &= ~XML_ENT_EXPANDING;
3830 
3831             chunk = str;
3832         } else {
3833             /* Normal ASCII char */
3834             if (!IS_BYTE_CHAR(c)) {
3835                 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3836                         "invalid character in entity value\n");
3837                 if (chunk < str)
3838                     xmlSBufAddString(buf, chunk, str - chunk);
3839                 xmlSBufAddReplChar(buf);
3840                 str += 1;
3841                 chunk = str;
3842             } else {
3843                 str += 1;
3844             }
3845         }
3846     }
3847 
3848     if (chunk < str)
3849         xmlSBufAddString(buf, chunk, str - chunk);
3850 
3851     return;
3852 }
3853 
3854 /**
3855  * xmlParseEntityValue:
3856  * @ctxt:  an XML parser context
3857  * @orig:  if non-NULL store a copy of the original entity value
3858  *
3859  * DEPRECATED: Internal function, don't use.
3860  *
3861  * parse a value for ENTITY declarations
3862  *
3863  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3864  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3865  *
3866  * Returns the EntityValue parsed with reference substituted or NULL
3867  */
3868 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3869 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3870     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3871                          XML_MAX_HUGE_LENGTH :
3872                          XML_MAX_TEXT_LENGTH;
3873     xmlSBuf buf;
3874     const xmlChar *start;
3875     int quote, length;
3876 
3877     xmlSBufInit(&buf, maxLength);
3878 
3879     GROW;
3880 
3881     quote = CUR;
3882     if ((quote != '"') && (quote != '\'')) {
3883 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3884 	return(NULL);
3885     }
3886     CUR_PTR++;
3887 
3888     length = 0;
3889 
3890     /*
3891      * Copy raw content of the entity into a buffer
3892      */
3893     while (1) {
3894         int c;
3895 
3896         if (PARSER_STOPPED(ctxt))
3897             goto error;
3898 
3899         if (CUR_PTR >= ctxt->input->end) {
3900             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3901             goto error;
3902         }
3903 
3904         c = CUR;
3905 
3906         if (c == 0) {
3907             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3908                     "invalid character in entity value\n");
3909             goto error;
3910         }
3911         if (c == quote)
3912             break;
3913         NEXTL(1);
3914         length += 1;
3915 
3916         /*
3917          * TODO: Check growth threshold
3918          */
3919         if (ctxt->input->end - CUR_PTR < 10)
3920             GROW;
3921     }
3922 
3923     start = CUR_PTR - length;
3924 
3925     if (orig != NULL) {
3926         *orig = xmlStrndup(start, length);
3927         if (*orig == NULL)
3928             xmlErrMemory(ctxt);
3929     }
3930 
3931     xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3932 
3933     NEXTL(1);
3934 
3935     return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3936 
3937 error:
3938     xmlSBufCleanup(&buf, ctxt, "entity length too long");
3939     return(NULL);
3940 }
3941 
3942 /**
3943  * xmlCheckEntityInAttValue:
3944  * @ctxt:  parser context
3945  * @pent:  entity
3946  * @depth:  nesting depth
3947  *
3948  * Check an entity reference in an attribute value for validity
3949  * without expanding it.
3950  */
3951 static void
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt,xmlEntityPtr pent,int depth)3952 xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3953     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3954     const xmlChar *str;
3955     unsigned long expandedSize = pent->length;
3956     int c, flags;
3957 
3958     depth += 1;
3959     if (depth > maxDepth) {
3960 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3961                        "Maximum entity nesting depth exceeded");
3962 	return;
3963     }
3964 
3965     if (pent->flags & XML_ENT_EXPANDING) {
3966         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3967         xmlHaltParser(ctxt);
3968         return;
3969     }
3970 
3971     /*
3972      * If we're parsing a default attribute value in DTD content,
3973      * the entity might reference other entities which weren't
3974      * defined yet, so the check isn't reliable.
3975      */
3976     if (ctxt->inSubset == 0)
3977         flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3978     else
3979         flags = XML_ENT_VALIDATED;
3980 
3981     str = pent->content;
3982     if (str == NULL)
3983         goto done;
3984 
3985     /*
3986      * Note that entity values are already validated. We only check
3987      * for illegal less-than signs and compute the expanded size
3988      * of the entity. No special handling for multi-byte characters
3989      * is needed.
3990      */
3991     while (!PARSER_STOPPED(ctxt)) {
3992         c = *str;
3993 
3994 	if (c != '&') {
3995             if (c == 0)
3996                 break;
3997 
3998             if (c == '<')
3999                 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4000                         "'<' in entity '%s' is not allowed in attributes "
4001                         "values\n", pent->name);
4002 
4003             str += 1;
4004         } else if (str[1] == '#') {
4005             int val;
4006 
4007 	    val = xmlParseStringCharRef(ctxt, &str);
4008 	    if (val == 0) {
4009                 pent->content[0] = 0;
4010                 break;
4011             }
4012 	} else {
4013             xmlChar *name;
4014             xmlEntityPtr ent;
4015 
4016 	    name = xmlParseStringEntityRef(ctxt, &str);
4017 	    if (name == NULL) {
4018                 pent->content[0] = 0;
4019                 break;
4020             }
4021 
4022             ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4023             xmlFree(name);
4024 
4025             if ((ent != NULL) &&
4026                 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4027                 if ((ent->flags & flags) != flags) {
4028                     pent->flags |= XML_ENT_EXPANDING;
4029                     xmlCheckEntityInAttValue(ctxt, ent, depth);
4030                     pent->flags &= ~XML_ENT_EXPANDING;
4031                 }
4032 
4033                 xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4034                 xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4035             }
4036         }
4037     }
4038 
4039 done:
4040     if (ctxt->inSubset == 0)
4041         pent->expandedSize = expandedSize;
4042 
4043     pent->flags |= flags;
4044 }
4045 
4046 /**
4047  * xmlExpandEntityInAttValue:
4048  * @ctxt:  parser context
4049  * @buf:  string buffer
4050  * @str:  entity or attribute value
4051  * @pent:  entity for entity value, NULL for attribute values
4052  * @normalize:  whether to collapse whitespace
4053  * @inSpace:  whitespace state
4054  * @depth:  nesting depth
4055  * @check:  whether to check for amplification
4056  *
4057  * Expand general entity references in an entity or attribute value.
4058  * Perform attribute value normalization.
4059  */
4060 static void
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,xmlEntityPtr pent,int normalize,int * inSpace,int depth,int check)4061 xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4062                           const xmlChar *str, xmlEntityPtr pent, int normalize,
4063                           int *inSpace, int depth, int check) {
4064     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4065     int c, chunkSize;
4066 
4067     if (str == NULL)
4068         return;
4069 
4070     depth += 1;
4071     if (depth > maxDepth) {
4072 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4073                        "Maximum entity nesting depth exceeded");
4074 	return;
4075     }
4076 
4077     if (pent != NULL) {
4078         if (pent->flags & XML_ENT_EXPANDING) {
4079             xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4080             xmlHaltParser(ctxt);
4081             return;
4082         }
4083 
4084         if (check) {
4085             if (xmlParserEntityCheck(ctxt, pent->length))
4086                 return;
4087         }
4088     }
4089 
4090     chunkSize = 0;
4091 
4092     /*
4093      * Note that entity values are already validated. No special
4094      * handling for multi-byte characters is needed.
4095      */
4096     while (!PARSER_STOPPED(ctxt)) {
4097         c = *str;
4098 
4099 	if (c != '&') {
4100             if (c == 0)
4101                 break;
4102 
4103             /*
4104              * If this function is called without an entity, it is used to
4105              * expand entities in an attribute content where less-than was
4106              * already unscaped and is allowed.
4107              */
4108             if ((pent != NULL) && (c == '<')) {
4109                 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4110                         "'<' in entity '%s' is not allowed in attributes "
4111                         "values\n", pent->name);
4112                 break;
4113             }
4114 
4115             if (c <= 0x20) {
4116                 if ((normalize) && (*inSpace)) {
4117                     /* Skip char */
4118                     if (chunkSize > 0) {
4119                         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4120                         chunkSize = 0;
4121                     }
4122                 } else if (c < 0x20) {
4123                     if (chunkSize > 0) {
4124                         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4125                         chunkSize = 0;
4126                     }
4127 
4128                     xmlSBufAddCString(buf, " ", 1);
4129                 } else {
4130                     chunkSize += 1;
4131                 }
4132 
4133                 *inSpace = 1;
4134             } else {
4135                 chunkSize += 1;
4136                 *inSpace = 0;
4137             }
4138 
4139             str += 1;
4140         } else if (str[1] == '#') {
4141             int val;
4142 
4143             if (chunkSize > 0) {
4144                 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4145                 chunkSize = 0;
4146             }
4147 
4148 	    val = xmlParseStringCharRef(ctxt, &str);
4149 	    if (val == 0) {
4150                 if (pent != NULL)
4151                     pent->content[0] = 0;
4152                 break;
4153             }
4154 
4155             if (val == ' ') {
4156                 if ((!normalize) || (!*inSpace))
4157                     xmlSBufAddCString(buf, " ", 1);
4158                 *inSpace = 1;
4159             } else {
4160                 xmlSBufAddChar(buf, val);
4161                 *inSpace = 0;
4162             }
4163 	} else {
4164             xmlChar *name;
4165             xmlEntityPtr ent;
4166 
4167             if (chunkSize > 0) {
4168                 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4169                 chunkSize = 0;
4170             }
4171 
4172 	    name = xmlParseStringEntityRef(ctxt, &str);
4173             if (name == NULL) {
4174                 if (pent != NULL)
4175                     pent->content[0] = 0;
4176                 break;
4177             }
4178 
4179             ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4180             xmlFree(name);
4181 
4182 	    if ((ent != NULL) &&
4183 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4184 		if (ent->content == NULL) {
4185 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4186 			    "predefined entity has no content\n");
4187                     break;
4188                 }
4189 
4190                 xmlSBufAddString(buf, ent->content, ent->length);
4191 
4192                 *inSpace = 0;
4193 	    } else if ((ent != NULL) && (ent->content != NULL)) {
4194                 if (pent != NULL)
4195                     pent->flags |= XML_ENT_EXPANDING;
4196 		xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4197                                           normalize, inSpace, depth, check);
4198                 if (pent != NULL)
4199                     pent->flags &= ~XML_ENT_EXPANDING;
4200 	    }
4201         }
4202     }
4203 
4204     if (chunkSize > 0)
4205         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4206 
4207     return;
4208 }
4209 
4210 /**
4211  * xmlExpandEntitiesInAttValue:
4212  * @ctxt:  parser context
4213  * @str:  entity or attribute value
4214  * @normalize:  whether to collapse whitespace
4215  *
4216  * Expand general entity references in an entity or attribute value.
4217  * Perform attribute value normalization.
4218  *
4219  * Returns the expanded attribtue value.
4220  */
4221 xmlChar *
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt,const xmlChar * str,int normalize)4222 xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4223                             int normalize) {
4224     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4225                          XML_MAX_HUGE_LENGTH :
4226                          XML_MAX_TEXT_LENGTH;
4227     xmlSBuf buf;
4228     int inSpace = 1;
4229 
4230     xmlSBufInit(&buf, maxLength);
4231 
4232     xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4233                               ctxt->inputNr, /* check */ 0);
4234 
4235     if ((normalize) && (inSpace) && (buf.size > 0))
4236         buf.size--;
4237 
4238     return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4239 }
4240 
4241 /**
4242  * xmlParseAttValueInternal:
4243  * @ctxt:  an XML parser context
4244  * @len:  attribute len result
4245  * @alloc:  whether the attribute was reallocated as a new string
4246  * @normalize:  if 1 then further non-CDATA normalization must be done
4247  *
4248  * parse a value for an attribute.
4249  * NOTE: if no normalization is needed, the routine will return pointers
4250  *       directly from the data buffer.
4251  *
4252  * 3.3.3 Attribute-Value Normalization:
4253  * Before the value of an attribute is passed to the application or
4254  * checked for validity, the XML processor must normalize it as follows:
4255  * - a character reference is processed by appending the referenced
4256  *   character to the attribute value
4257  * - an entity reference is processed by recursively processing the
4258  *   replacement text of the entity
4259  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4260  *   appending #x20 to the normalized value, except that only a single
4261  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4262  *   parsed entity or the literal entity value of an internal parsed entity
4263  * - other characters are processed by appending them to the normalized value
4264  * If the declared value is not CDATA, then the XML processor must further
4265  * process the normalized attribute value by discarding any leading and
4266  * trailing space (#x20) characters, and by replacing sequences of space
4267  * (#x20) characters by a single space (#x20) character.
4268  * All attributes for which no declaration has been read should be treated
4269  * by a non-validating parser as if declared CDATA.
4270  *
4271  * Returns the AttValue parsed or NULL. The value has to be freed by the
4272  *     caller if it was copied, this can be detected by val[*len] == 0.
4273  */
4274 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * attlen,int * alloc,int normalize,int isNamespace)4275 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4276                          int normalize, int isNamespace) {
4277     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4278                          XML_MAX_HUGE_LENGTH :
4279                          XML_MAX_TEXT_LENGTH;
4280     xmlSBuf buf;
4281     xmlChar *ret;
4282     int c, l, quote, flags, chunkSize;
4283     int inSpace = 1;
4284     int replaceEntities;
4285 
4286     /* Always expand namespace URIs */
4287     replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4288 
4289     xmlSBufInit(&buf, maxLength);
4290 
4291     GROW;
4292 
4293     quote = CUR;
4294     if ((quote != '"') && (quote != '\'')) {
4295 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4296 	return(NULL);
4297     }
4298     NEXTL(1);
4299 
4300     if (ctxt->inSubset == 0)
4301         flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4302     else
4303         flags = XML_ENT_VALIDATED;
4304 
4305     inSpace = 1;
4306     chunkSize = 0;
4307 
4308     while (1) {
4309         if (PARSER_STOPPED(ctxt))
4310             goto error;
4311 
4312         if (CUR_PTR >= ctxt->input->end) {
4313             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4314                            "AttValue: ' expected\n");
4315             goto error;
4316         }
4317 
4318         /*
4319          * TODO: Check growth threshold
4320          */
4321         if (ctxt->input->end - CUR_PTR < 10)
4322             GROW;
4323 
4324         c = CUR;
4325 
4326         if (c >= 0x80) {
4327             l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4328                     "invalid character in attribute value\n");
4329             if (l == 0) {
4330                 if (chunkSize > 0) {
4331                     xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4332                     chunkSize = 0;
4333                 }
4334                 xmlSBufAddReplChar(&buf);
4335                 NEXTL(1);
4336             } else {
4337                 chunkSize += l;
4338                 NEXTL(l);
4339             }
4340 
4341             inSpace = 0;
4342         } else if (c != '&') {
4343             if (c > 0x20) {
4344                 if (c == quote)
4345                     break;
4346 
4347                 if (c == '<')
4348                     xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4349 
4350                 chunkSize += 1;
4351                 inSpace = 0;
4352             } else if (!IS_BYTE_CHAR(c)) {
4353                 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4354                         "invalid character in attribute value\n");
4355                 if (chunkSize > 0) {
4356                     xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4357                     chunkSize = 0;
4358                 }
4359                 xmlSBufAddReplChar(&buf);
4360                 inSpace = 0;
4361             } else {
4362                 /* Whitespace */
4363                 if ((normalize) && (inSpace)) {
4364                     /* Skip char */
4365                     if (chunkSize > 0) {
4366                         xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367                         chunkSize = 0;
4368                     }
4369                 } else if (c < 0x20) {
4370                     /* Convert to space */
4371                     if (chunkSize > 0) {
4372                         xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4373                         chunkSize = 0;
4374                     }
4375 
4376                     xmlSBufAddCString(&buf, " ", 1);
4377                 } else {
4378                     chunkSize += 1;
4379                 }
4380 
4381                 inSpace = 1;
4382 
4383                 if ((c == 0xD) && (NXT(1) == 0xA))
4384                     CUR_PTR++;
4385             }
4386 
4387             NEXTL(1);
4388         } else if (NXT(1) == '#') {
4389             int val;
4390 
4391             if (chunkSize > 0) {
4392                 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4393                 chunkSize = 0;
4394             }
4395 
4396             val = xmlParseCharRef(ctxt);
4397             if (val == 0)
4398                 goto error;
4399 
4400             if ((val == '&') && (!replaceEntities)) {
4401                 /*
4402                  * The reparsing will be done in xmlStringGetNodeList()
4403                  * called by the attribute() function in SAX.c
4404                  */
4405                 xmlSBufAddCString(&buf, "&#38;", 5);
4406                 inSpace = 0;
4407             } else if (val == ' ') {
4408                 if ((!normalize) || (!inSpace))
4409                     xmlSBufAddCString(&buf, " ", 1);
4410                 inSpace = 1;
4411             } else {
4412                 xmlSBufAddChar(&buf, val);
4413                 inSpace = 0;
4414             }
4415         } else {
4416             const xmlChar *name;
4417             xmlEntityPtr ent;
4418 
4419             if (chunkSize > 0) {
4420                 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4421                 chunkSize = 0;
4422             }
4423 
4424             name = xmlParseEntityRefInternal(ctxt);
4425             if (name == NULL) {
4426                 /*
4427                  * Probably a literal '&' which wasn't escaped.
4428                  * TODO: Handle gracefully in recovery mode.
4429                  */
4430                 continue;
4431             }
4432 
4433             ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4434             if (ent == NULL)
4435                 continue;
4436 
4437             if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4438                 if ((ent->content[0] == '&') && (!replaceEntities))
4439                     xmlSBufAddCString(&buf, "&#38;", 5);
4440                 else
4441                     xmlSBufAddString(&buf, ent->content, ent->length);
4442                 inSpace = 0;
4443             } else if (replaceEntities) {
4444                 xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4445                                           normalize, &inSpace, ctxt->inputNr,
4446                                           /* check */ 1);
4447             } else {
4448                 if ((ent->flags & flags) != flags)
4449                     xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4450 
4451                 if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4452                     ent->content[0] = 0;
4453                     goto error;
4454                 }
4455 
4456                 /*
4457                  * Just output the reference
4458                  */
4459                 xmlSBufAddCString(&buf, "&", 1);
4460                 xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4461                 xmlSBufAddCString(&buf, ";", 1);
4462 
4463                 inSpace = 0;
4464             }
4465 	}
4466     }
4467 
4468     if ((buf.mem == NULL) && (alloc != NULL)) {
4469         ret = (xmlChar *) CUR_PTR - chunkSize;
4470 
4471         if (attlen != NULL)
4472             *attlen = chunkSize;
4473         if ((normalize) && (inSpace) && (chunkSize > 0))
4474             *attlen -= 1;
4475         *alloc = 0;
4476 
4477         /* Report potential error */
4478         xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4479     } else {
4480         if (chunkSize > 0)
4481             xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4482 
4483         if ((normalize) && (inSpace) && (buf.size > 0))
4484             buf.size--;
4485 
4486         ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4487 
4488         if (ret != NULL) {
4489             if (attlen != NULL)
4490                 *attlen = buf.size;
4491             if (alloc != NULL)
4492                 *alloc = 1;
4493         }
4494     }
4495 
4496     NEXTL(1);
4497 
4498     return(ret);
4499 
4500 error:
4501     xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4502     return(NULL);
4503 }
4504 
4505 /**
4506  * xmlParseAttValue:
4507  * @ctxt:  an XML parser context
4508  *
4509  * DEPRECATED: Internal function, don't use.
4510  *
4511  * parse a value for an attribute
4512  * Note: the parser won't do substitution of entities here, this
4513  * will be handled later in xmlStringGetNodeList
4514  *
4515  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4516  *                   "'" ([^<&'] | Reference)* "'"
4517  *
4518  * 3.3.3 Attribute-Value Normalization:
4519  * Before the value of an attribute is passed to the application or
4520  * checked for validity, the XML processor must normalize it as follows:
4521  * - a character reference is processed by appending the referenced
4522  *   character to the attribute value
4523  * - an entity reference is processed by recursively processing the
4524  *   replacement text of the entity
4525  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4526  *   appending #x20 to the normalized value, except that only a single
4527  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4528  *   parsed entity or the literal entity value of an internal parsed entity
4529  * - other characters are processed by appending them to the normalized value
4530  * If the declared value is not CDATA, then the XML processor must further
4531  * process the normalized attribute value by discarding any leading and
4532  * trailing space (#x20) characters, and by replacing sequences of space
4533  * (#x20) characters by a single space (#x20) character.
4534  * All attributes for which no declaration has been read should be treated
4535  * by a non-validating parser as if declared CDATA.
4536  *
4537  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4538  */
4539 
4540 
4541 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4542 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4543     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4544     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4545 }
4546 
4547 /**
4548  * xmlParseSystemLiteral:
4549  * @ctxt:  an XML parser context
4550  *
4551  * DEPRECATED: Internal function, don't use.
4552  *
4553  * parse an XML Literal
4554  *
4555  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4556  *
4557  * Returns the SystemLiteral parsed or NULL
4558  */
4559 
4560 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4561 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4562     xmlChar *buf = NULL;
4563     int len = 0;
4564     int size = XML_PARSER_BUFFER_SIZE;
4565     int cur, l;
4566     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4567                     XML_MAX_TEXT_LENGTH :
4568                     XML_MAX_NAME_LENGTH;
4569     xmlChar stop;
4570 
4571     if (RAW == '"') {
4572         NEXT;
4573 	stop = '"';
4574     } else if (RAW == '\'') {
4575         NEXT;
4576 	stop = '\'';
4577     } else {
4578 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4579 	return(NULL);
4580     }
4581 
4582     buf = (xmlChar *) xmlMallocAtomic(size);
4583     if (buf == NULL) {
4584         xmlErrMemory(ctxt);
4585 	return(NULL);
4586     }
4587     cur = CUR_CHAR(l);
4588     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4589 	if (len + 5 >= size) {
4590 	    xmlChar *tmp;
4591 
4592 	    size *= 2;
4593 	    tmp = (xmlChar *) xmlRealloc(buf, size);
4594 	    if (tmp == NULL) {
4595 	        xmlFree(buf);
4596 		xmlErrMemory(ctxt);
4597 		return(NULL);
4598 	    }
4599 	    buf = tmp;
4600 	}
4601 	COPY_BUF(buf, len, cur);
4602         if (len > maxLength) {
4603             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4604             xmlFree(buf);
4605             return(NULL);
4606         }
4607 	NEXTL(l);
4608 	cur = CUR_CHAR(l);
4609     }
4610     buf[len] = 0;
4611     if (!IS_CHAR(cur)) {
4612 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4613     } else {
4614 	NEXT;
4615     }
4616     return(buf);
4617 }
4618 
4619 /**
4620  * xmlParsePubidLiteral:
4621  * @ctxt:  an XML parser context
4622  *
4623  * DEPRECATED: Internal function, don't use.
4624  *
4625  * parse an XML public literal
4626  *
4627  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4628  *
4629  * Returns the PubidLiteral parsed or NULL.
4630  */
4631 
4632 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4633 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4634     xmlChar *buf = NULL;
4635     int len = 0;
4636     int size = XML_PARSER_BUFFER_SIZE;
4637     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4638                     XML_MAX_TEXT_LENGTH :
4639                     XML_MAX_NAME_LENGTH;
4640     xmlChar cur;
4641     xmlChar stop;
4642 
4643     if (RAW == '"') {
4644         NEXT;
4645 	stop = '"';
4646     } else if (RAW == '\'') {
4647         NEXT;
4648 	stop = '\'';
4649     } else {
4650 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4651 	return(NULL);
4652     }
4653     buf = (xmlChar *) xmlMallocAtomic(size);
4654     if (buf == NULL) {
4655 	xmlErrMemory(ctxt);
4656 	return(NULL);
4657     }
4658     cur = CUR;
4659     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4660            (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4661 	if (len + 1 >= size) {
4662 	    xmlChar *tmp;
4663 
4664 	    size *= 2;
4665 	    tmp = (xmlChar *) xmlRealloc(buf, size);
4666 	    if (tmp == NULL) {
4667 		xmlErrMemory(ctxt);
4668 		xmlFree(buf);
4669 		return(NULL);
4670 	    }
4671 	    buf = tmp;
4672 	}
4673 	buf[len++] = cur;
4674         if (len > maxLength) {
4675             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4676             xmlFree(buf);
4677             return(NULL);
4678         }
4679 	NEXT;
4680 	cur = CUR;
4681     }
4682     buf[len] = 0;
4683     if (cur != stop) {
4684 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4685     } else {
4686 	NEXTL(1);
4687     }
4688     return(buf);
4689 }
4690 
4691 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4692 
4693 /*
4694  * used for the test in the inner loop of the char data testing
4695  */
4696 static const unsigned char test_char_data[256] = {
4697     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4698     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4699     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4700     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4701     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4702     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4703     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4704     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4705     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4706     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4707     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4708     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4709     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4710     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4711     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4712     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4713     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4714     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4715     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4716     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4717     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4718     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4719     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4720     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4721     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4722     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4723     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4724     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4729 };
4730 
4731 /**
4732  * xmlParseCharDataInternal:
4733  * @ctxt:  an XML parser context
4734  * @partial:  buffer may contain partial UTF-8 sequences
4735  *
4736  * Parse character data. Always makes progress if the first char isn't
4737  * '<' or '&'.
4738  *
4739  * The right angle bracket (>) may be represented using the string "&gt;",
4740  * and must, for compatibility, be escaped using "&gt;" or a character
4741  * reference when it appears in the string "]]>" in content, when that
4742  * string is not marking the end of a CDATA section.
4743  *
4744  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4745  */
4746 static void
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt,int partial)4747 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4748     const xmlChar *in;
4749     int nbchar = 0;
4750     int line = ctxt->input->line;
4751     int col = ctxt->input->col;
4752     int ccol;
4753 
4754     GROW;
4755     /*
4756      * Accelerated common case where input don't need to be
4757      * modified before passing it to the handler.
4758      */
4759     in = ctxt->input->cur;
4760     do {
4761 get_more_space:
4762         while (*in == 0x20) { in++; ctxt->input->col++; }
4763         if (*in == 0xA) {
4764             do {
4765                 ctxt->input->line++; ctxt->input->col = 1;
4766                 in++;
4767             } while (*in == 0xA);
4768             goto get_more_space;
4769         }
4770         if (*in == '<') {
4771             nbchar = in - ctxt->input->cur;
4772             if (nbchar > 0) {
4773                 const xmlChar *tmp = ctxt->input->cur;
4774                 ctxt->input->cur = in;
4775 
4776                 if ((ctxt->sax != NULL) &&
4777                     (ctxt->disableSAX == 0) &&
4778                     (ctxt->sax->ignorableWhitespace !=
4779                      ctxt->sax->characters)) {
4780                     if (areBlanks(ctxt, tmp, nbchar, 1)) {
4781                         if (ctxt->sax->ignorableWhitespace != NULL)
4782                             ctxt->sax->ignorableWhitespace(ctxt->userData,
4783                                                    tmp, nbchar);
4784                     } else {
4785                         if (ctxt->sax->characters != NULL)
4786                             ctxt->sax->characters(ctxt->userData,
4787                                                   tmp, nbchar);
4788                         if (*ctxt->space == -1)
4789                             *ctxt->space = -2;
4790                     }
4791                 } else if ((ctxt->sax != NULL) &&
4792                            (ctxt->disableSAX == 0) &&
4793                            (ctxt->sax->characters != NULL)) {
4794                     ctxt->sax->characters(ctxt->userData,
4795                                           tmp, nbchar);
4796                 }
4797             }
4798             return;
4799         }
4800 
4801 get_more:
4802         ccol = ctxt->input->col;
4803         while (test_char_data[*in]) {
4804             in++;
4805             ccol++;
4806         }
4807         ctxt->input->col = ccol;
4808         if (*in == 0xA) {
4809             do {
4810                 ctxt->input->line++; ctxt->input->col = 1;
4811                 in++;
4812             } while (*in == 0xA);
4813             goto get_more;
4814         }
4815         if (*in == ']') {
4816             if ((in[1] == ']') && (in[2] == '>')) {
4817                 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4818                 ctxt->input->cur = in + 1;
4819                 return;
4820             }
4821             in++;
4822             ctxt->input->col++;
4823             goto get_more;
4824         }
4825         nbchar = in - ctxt->input->cur;
4826         if (nbchar > 0) {
4827             if ((ctxt->sax != NULL) &&
4828                 (ctxt->disableSAX == 0) &&
4829                 (ctxt->sax->ignorableWhitespace !=
4830                  ctxt->sax->characters) &&
4831                 (IS_BLANK_CH(*ctxt->input->cur))) {
4832                 const xmlChar *tmp = ctxt->input->cur;
4833                 ctxt->input->cur = in;
4834 
4835                 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4836                     if (ctxt->sax->ignorableWhitespace != NULL)
4837                         ctxt->sax->ignorableWhitespace(ctxt->userData,
4838                                                        tmp, nbchar);
4839                 } else {
4840                     if (ctxt->sax->characters != NULL)
4841                         ctxt->sax->characters(ctxt->userData,
4842                                               tmp, nbchar);
4843                     if (*ctxt->space == -1)
4844                         *ctxt->space = -2;
4845                 }
4846                 line = ctxt->input->line;
4847                 col = ctxt->input->col;
4848             } else if ((ctxt->sax != NULL) &&
4849                        (ctxt->disableSAX == 0)) {
4850                 if (ctxt->sax->characters != NULL)
4851                     ctxt->sax->characters(ctxt->userData,
4852                                           ctxt->input->cur, nbchar);
4853                 line = ctxt->input->line;
4854                 col = ctxt->input->col;
4855             }
4856         }
4857         ctxt->input->cur = in;
4858         if (*in == 0xD) {
4859             in++;
4860             if (*in == 0xA) {
4861                 ctxt->input->cur = in;
4862                 in++;
4863                 ctxt->input->line++; ctxt->input->col = 1;
4864                 continue; /* while */
4865             }
4866             in--;
4867         }
4868         if (*in == '<') {
4869             return;
4870         }
4871         if (*in == '&') {
4872             return;
4873         }
4874         SHRINK;
4875         GROW;
4876         in = ctxt->input->cur;
4877     } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4878              (*in == 0x09) || (*in == 0x0a));
4879     ctxt->input->line = line;
4880     ctxt->input->col = col;
4881     xmlParseCharDataComplex(ctxt, partial);
4882 }
4883 
4884 /**
4885  * xmlParseCharDataComplex:
4886  * @ctxt:  an XML parser context
4887  * @cdata:  int indicating whether we are within a CDATA section
4888  *
4889  * Always makes progress if the first char isn't '<' or '&'.
4890  *
4891  * parse a CharData section.this is the fallback function
4892  * of xmlParseCharData() when the parsing requires handling
4893  * of non-ASCII characters.
4894  */
4895 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int partial)4896 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4897     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4898     int nbchar = 0;
4899     int cur, l;
4900 
4901     cur = CUR_CHAR(l);
4902     while ((cur != '<') && /* checked */
4903            (cur != '&') &&
4904 	   (IS_CHAR(cur))) {
4905 	if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4906 	    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4907 	}
4908 	COPY_BUF(buf, nbchar, cur);
4909 	/* move current position before possible calling of ctxt->sax->characters */
4910 	NEXTL(l);
4911 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4912 	    buf[nbchar] = 0;
4913 
4914 	    /*
4915 	     * OK the segment is to be consumed as chars.
4916 	     */
4917 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4918 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4919 		    if (ctxt->sax->ignorableWhitespace != NULL)
4920 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4921 			                               buf, nbchar);
4922 		} else {
4923 		    if (ctxt->sax->characters != NULL)
4924 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4925 		    if ((ctxt->sax->characters !=
4926 		         ctxt->sax->ignorableWhitespace) &&
4927 			(*ctxt->space == -1))
4928 			*ctxt->space = -2;
4929 		}
4930 	    }
4931 	    nbchar = 0;
4932             SHRINK;
4933 	}
4934 	cur = CUR_CHAR(l);
4935     }
4936     if (nbchar != 0) {
4937         buf[nbchar] = 0;
4938 	/*
4939 	 * OK the segment is to be consumed as chars.
4940 	 */
4941 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4942 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4943 		if (ctxt->sax->ignorableWhitespace != NULL)
4944 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4945 	    } else {
4946 		if (ctxt->sax->characters != NULL)
4947 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4948 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4949 		    (*ctxt->space == -1))
4950 		    *ctxt->space = -2;
4951 	    }
4952 	}
4953     }
4954     /*
4955      * cur == 0 can mean
4956      *
4957      * - End of buffer.
4958      * - An actual 0 character.
4959      * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4960      */
4961     if (ctxt->input->cur < ctxt->input->end) {
4962         if ((cur == 0) && (CUR != 0)) {
4963             if (partial == 0) {
4964                 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4965                         "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4966                 NEXTL(1);
4967             }
4968         } else if ((cur != '<') && (cur != '&')) {
4969             /* Generate the error and skip the offending character */
4970             xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4971                               "PCDATA invalid Char value %d\n", cur);
4972             NEXTL(l);
4973         }
4974     }
4975 }
4976 
4977 /**
4978  * xmlParseCharData:
4979  * @ctxt:  an XML parser context
4980  * @cdata:  unused
4981  *
4982  * DEPRECATED: Internal function, don't use.
4983  */
4984 void
xmlParseCharData(xmlParserCtxtPtr ctxt,ATTRIBUTE_UNUSED int cdata)4985 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4986     xmlParseCharDataInternal(ctxt, 0);
4987 }
4988 
4989 /**
4990  * xmlParseExternalID:
4991  * @ctxt:  an XML parser context
4992  * @publicID:  a xmlChar** receiving PubidLiteral
4993  * @strict: indicate whether we should restrict parsing to only
4994  *          production [75], see NOTE below
4995  *
4996  * DEPRECATED: Internal function, don't use.
4997  *
4998  * Parse an External ID or a Public ID
4999  *
5000  * NOTE: Productions [75] and [83] interact badly since [75] can generate
5001  *       'PUBLIC' S PubidLiteral S SystemLiteral
5002  *
5003  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5004  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5005  *
5006  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5007  *
5008  * Returns the function returns SystemLiteral and in the second
5009  *                case publicID receives PubidLiteral, is strict is off
5010  *                it is possible to return NULL and have publicID set.
5011  */
5012 
5013 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)5014 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5015     xmlChar *URI = NULL;
5016 
5017     *publicID = NULL;
5018     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5019         SKIP(6);
5020 	if (SKIP_BLANKS == 0) {
5021 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5022 	                   "Space required after 'SYSTEM'\n");
5023 	}
5024 	URI = xmlParseSystemLiteral(ctxt);
5025 	if (URI == NULL) {
5026 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5027         }
5028     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5029         SKIP(6);
5030 	if (SKIP_BLANKS == 0) {
5031 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032 		    "Space required after 'PUBLIC'\n");
5033 	}
5034 	*publicID = xmlParsePubidLiteral(ctxt);
5035 	if (*publicID == NULL) {
5036 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5037 	}
5038 	if (strict) {
5039 	    /*
5040 	     * We don't handle [83] so "S SystemLiteral" is required.
5041 	     */
5042 	    if (SKIP_BLANKS == 0) {
5043 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5044 			"Space required after the Public Identifier\n");
5045 	    }
5046 	} else {
5047 	    /*
5048 	     * We handle [83] so we return immediately, if
5049 	     * "S SystemLiteral" is not detected. We skip blanks if no
5050              * system literal was found, but this is harmless since we must
5051              * be at the end of a NotationDecl.
5052 	     */
5053 	    if (SKIP_BLANKS == 0) return(NULL);
5054 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
5055 	}
5056 	URI = xmlParseSystemLiteral(ctxt);
5057 	if (URI == NULL) {
5058 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5059         }
5060     }
5061     return(URI);
5062 }
5063 
5064 /**
5065  * xmlParseCommentComplex:
5066  * @ctxt:  an XML parser context
5067  * @buf:  the already parsed part of the buffer
5068  * @len:  number of bytes in the buffer
5069  * @size:  allocated size of the buffer
5070  *
5071  * Skip an XML (SGML) comment <!-- .... -->
5072  *  The spec says that "For compatibility, the string "--" (double-hyphen)
5073  *  must not occur within comments. "
5074  * This is the slow routine in case the accelerator for ascii didn't work
5075  *
5076  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5077  */
5078 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)5079 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5080                        size_t len, size_t size) {
5081     int q, ql;
5082     int r, rl;
5083     int cur, l;
5084     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5085                        XML_MAX_HUGE_LENGTH :
5086                        XML_MAX_TEXT_LENGTH;
5087 
5088     if (buf == NULL) {
5089         len = 0;
5090 	size = XML_PARSER_BUFFER_SIZE;
5091 	buf = (xmlChar *) xmlMallocAtomic(size);
5092 	if (buf == NULL) {
5093 	    xmlErrMemory(ctxt);
5094 	    return;
5095 	}
5096     }
5097     q = CUR_CHAR(ql);
5098     if (q == 0)
5099         goto not_terminated;
5100     if (!IS_CHAR(q)) {
5101         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5102                           "xmlParseComment: invalid xmlChar value %d\n",
5103 	                  q);
5104 	xmlFree (buf);
5105 	return;
5106     }
5107     NEXTL(ql);
5108     r = CUR_CHAR(rl);
5109     if (r == 0)
5110         goto not_terminated;
5111     if (!IS_CHAR(r)) {
5112         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5113                           "xmlParseComment: invalid xmlChar value %d\n",
5114 	                  r);
5115 	xmlFree (buf);
5116 	return;
5117     }
5118     NEXTL(rl);
5119     cur = CUR_CHAR(l);
5120     if (cur == 0)
5121         goto not_terminated;
5122     while (IS_CHAR(cur) && /* checked */
5123            ((cur != '>') ||
5124 	    (r != '-') || (q != '-'))) {
5125 	if ((r == '-') && (q == '-')) {
5126 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5127 	}
5128 	if (len + 5 >= size) {
5129 	    xmlChar *new_buf;
5130             size_t new_size;
5131 
5132 	    new_size = size * 2;
5133 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5134 	    if (new_buf == NULL) {
5135 		xmlFree (buf);
5136 		xmlErrMemory(ctxt);
5137 		return;
5138 	    }
5139 	    buf = new_buf;
5140             size = new_size;
5141 	}
5142 	COPY_BUF(buf, len, q);
5143         if (len > maxLength) {
5144             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5145                          "Comment too big found", NULL);
5146             xmlFree (buf);
5147             return;
5148         }
5149 
5150 	q = r;
5151 	ql = rl;
5152 	r = cur;
5153 	rl = l;
5154 
5155 	NEXTL(l);
5156 	cur = CUR_CHAR(l);
5157 
5158     }
5159     buf[len] = 0;
5160     if (cur == 0) {
5161 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5162 	                     "Comment not terminated \n<!--%.50s\n", buf);
5163     } else if (!IS_CHAR(cur)) {
5164         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5165                           "xmlParseComment: invalid xmlChar value %d\n",
5166 	                  cur);
5167     } else {
5168         NEXT;
5169 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5170 	    (!ctxt->disableSAX))
5171 	    ctxt->sax->comment(ctxt->userData, buf);
5172     }
5173     xmlFree(buf);
5174     return;
5175 not_terminated:
5176     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5177 			 "Comment not terminated\n", NULL);
5178     xmlFree(buf);
5179     return;
5180 }
5181 
5182 /**
5183  * xmlParseComment:
5184  * @ctxt:  an XML parser context
5185  *
5186  * DEPRECATED: Internal function, don't use.
5187  *
5188  * Parse an XML (SGML) comment. Always consumes '<!'.
5189  *
5190  *  The spec says that "For compatibility, the string "--" (double-hyphen)
5191  *  must not occur within comments. "
5192  *
5193  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5194  */
5195 void
xmlParseComment(xmlParserCtxtPtr ctxt)5196 xmlParseComment(xmlParserCtxtPtr ctxt) {
5197     xmlChar *buf = NULL;
5198     size_t size = XML_PARSER_BUFFER_SIZE;
5199     size_t len = 0;
5200     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5201                        XML_MAX_HUGE_LENGTH :
5202                        XML_MAX_TEXT_LENGTH;
5203     const xmlChar *in;
5204     size_t nbchar = 0;
5205     int ccol;
5206 
5207     /*
5208      * Check that there is a comment right here.
5209      */
5210     if ((RAW != '<') || (NXT(1) != '!'))
5211         return;
5212     SKIP(2);
5213     if ((RAW != '-') || (NXT(1) != '-'))
5214         return;
5215     SKIP(2);
5216     GROW;
5217 
5218     /*
5219      * Accelerated common case where input don't need to be
5220      * modified before passing it to the handler.
5221      */
5222     in = ctxt->input->cur;
5223     do {
5224 	if (*in == 0xA) {
5225 	    do {
5226 		ctxt->input->line++; ctxt->input->col = 1;
5227 		in++;
5228 	    } while (*in == 0xA);
5229 	}
5230 get_more:
5231         ccol = ctxt->input->col;
5232 	while (((*in > '-') && (*in <= 0x7F)) ||
5233 	       ((*in >= 0x20) && (*in < '-')) ||
5234 	       (*in == 0x09)) {
5235 		    in++;
5236 		    ccol++;
5237 	}
5238 	ctxt->input->col = ccol;
5239 	if (*in == 0xA) {
5240 	    do {
5241 		ctxt->input->line++; ctxt->input->col = 1;
5242 		in++;
5243 	    } while (*in == 0xA);
5244 	    goto get_more;
5245 	}
5246 	nbchar = in - ctxt->input->cur;
5247 	/*
5248 	 * save current set of data
5249 	 */
5250 	if (nbchar > 0) {
5251             if (buf == NULL) {
5252                 if ((*in == '-') && (in[1] == '-'))
5253                     size = nbchar + 1;
5254                 else
5255                     size = XML_PARSER_BUFFER_SIZE + nbchar;
5256                 buf = (xmlChar *) xmlMallocAtomic(size);
5257                 if (buf == NULL) {
5258                     xmlErrMemory(ctxt);
5259                     return;
5260                 }
5261                 len = 0;
5262             } else if (len + nbchar + 1 >= size) {
5263                 xmlChar *new_buf;
5264                 size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5265                 new_buf = (xmlChar *) xmlRealloc(buf, size);
5266                 if (new_buf == NULL) {
5267                     xmlFree (buf);
5268                     xmlErrMemory(ctxt);
5269                     return;
5270                 }
5271                 buf = new_buf;
5272             }
5273             memcpy(&buf[len], ctxt->input->cur, nbchar);
5274             len += nbchar;
5275             buf[len] = 0;
5276 	}
5277         if (len > maxLength) {
5278             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5279                          "Comment too big found", NULL);
5280             xmlFree (buf);
5281             return;
5282         }
5283 	ctxt->input->cur = in;
5284 	if (*in == 0xA) {
5285 	    in++;
5286 	    ctxt->input->line++; ctxt->input->col = 1;
5287 	}
5288 	if (*in == 0xD) {
5289 	    in++;
5290 	    if (*in == 0xA) {
5291 		ctxt->input->cur = in;
5292 		in++;
5293 		ctxt->input->line++; ctxt->input->col = 1;
5294 		goto get_more;
5295 	    }
5296 	    in--;
5297 	}
5298 	SHRINK;
5299 	GROW;
5300 	in = ctxt->input->cur;
5301 	if (*in == '-') {
5302 	    if (in[1] == '-') {
5303 	        if (in[2] == '>') {
5304 		    SKIP(3);
5305 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5306 		        (!ctxt->disableSAX)) {
5307 			if (buf != NULL)
5308 			    ctxt->sax->comment(ctxt->userData, buf);
5309 			else
5310 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5311 		    }
5312 		    if (buf != NULL)
5313 		        xmlFree(buf);
5314 		    return;
5315 		}
5316 		if (buf != NULL) {
5317 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5318 		                      "Double hyphen within comment: "
5319                                       "<!--%.50s\n",
5320 				      buf);
5321 		} else
5322 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5323 		                      "Double hyphen within comment\n", NULL);
5324 		in++;
5325 		ctxt->input->col++;
5326 	    }
5327 	    in++;
5328 	    ctxt->input->col++;
5329 	    goto get_more;
5330 	}
5331     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5332     xmlParseCommentComplex(ctxt, buf, len, size);
5333     return;
5334 }
5335 
5336 
5337 /**
5338  * xmlParsePITarget:
5339  * @ctxt:  an XML parser context
5340  *
5341  * DEPRECATED: Internal function, don't use.
5342  *
5343  * parse the name of a PI
5344  *
5345  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5346  *
5347  * Returns the PITarget name or NULL
5348  */
5349 
5350 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5351 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5352     const xmlChar *name;
5353 
5354     name = xmlParseName(ctxt);
5355     if ((name != NULL) &&
5356         ((name[0] == 'x') || (name[0] == 'X')) &&
5357         ((name[1] == 'm') || (name[1] == 'M')) &&
5358         ((name[2] == 'l') || (name[2] == 'L'))) {
5359 	int i;
5360 	if ((name[0] == 'x') && (name[1] == 'm') &&
5361 	    (name[2] == 'l') && (name[3] == 0)) {
5362 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5363 		 "XML declaration allowed only at the start of the document\n");
5364 	    return(name);
5365 	} else if (name[3] == 0) {
5366 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5367 	    return(name);
5368 	}
5369 	for (i = 0;;i++) {
5370 	    if (xmlW3CPIs[i] == NULL) break;
5371 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5372 	        return(name);
5373 	}
5374 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5375 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5376 		      NULL, NULL);
5377     }
5378     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5379 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5381     }
5382     return(name);
5383 }
5384 
5385 #ifdef LIBXML_CATALOG_ENABLED
5386 /**
5387  * xmlParseCatalogPI:
5388  * @ctxt:  an XML parser context
5389  * @catalog:  the PI value string
5390  *
5391  * parse an XML Catalog Processing Instruction.
5392  *
5393  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5394  *
5395  * Occurs only if allowed by the user and if happening in the Misc
5396  * part of the document before any doctype information
5397  * This will add the given catalog to the parsing context in order
5398  * to be used if there is a resolution need further down in the document
5399  */
5400 
5401 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5402 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5403     xmlChar *URL = NULL;
5404     const xmlChar *tmp, *base;
5405     xmlChar marker;
5406 
5407     tmp = catalog;
5408     while (IS_BLANK_CH(*tmp)) tmp++;
5409     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5410 	goto error;
5411     tmp += 7;
5412     while (IS_BLANK_CH(*tmp)) tmp++;
5413     if (*tmp != '=') {
5414 	return;
5415     }
5416     tmp++;
5417     while (IS_BLANK_CH(*tmp)) tmp++;
5418     marker = *tmp;
5419     if ((marker != '\'') && (marker != '"'))
5420 	goto error;
5421     tmp++;
5422     base = tmp;
5423     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5424     if (*tmp == 0)
5425 	goto error;
5426     URL = xmlStrndup(base, tmp - base);
5427     tmp++;
5428     while (IS_BLANK_CH(*tmp)) tmp++;
5429     if (*tmp != 0)
5430 	goto error;
5431 
5432     if (URL != NULL) {
5433         /*
5434          * Unfortunately, the catalog API doesn't report OOM errors.
5435          * xmlGetLastError isn't very helpful since we don't know
5436          * where the last error came from. We'd have to reset it
5437          * before this call and restore it afterwards.
5438          */
5439 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5440 	xmlFree(URL);
5441     }
5442     return;
5443 
5444 error:
5445     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5446 	          "Catalog PI syntax error: %s\n",
5447 		  catalog, NULL);
5448     if (URL != NULL)
5449 	xmlFree(URL);
5450 }
5451 #endif
5452 
5453 /**
5454  * xmlParsePI:
5455  * @ctxt:  an XML parser context
5456  *
5457  * DEPRECATED: Internal function, don't use.
5458  *
5459  * parse an XML Processing Instruction.
5460  *
5461  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5462  *
5463  * The processing is transferred to SAX once parsed.
5464  */
5465 
5466 void
xmlParsePI(xmlParserCtxtPtr ctxt)5467 xmlParsePI(xmlParserCtxtPtr ctxt) {
5468     xmlChar *buf = NULL;
5469     size_t len = 0;
5470     size_t size = XML_PARSER_BUFFER_SIZE;
5471     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5472                        XML_MAX_HUGE_LENGTH :
5473                        XML_MAX_TEXT_LENGTH;
5474     int cur, l;
5475     const xmlChar *target;
5476 
5477     if ((RAW == '<') && (NXT(1) == '?')) {
5478 	/*
5479 	 * this is a Processing Instruction.
5480 	 */
5481 	SKIP(2);
5482 
5483 	/*
5484 	 * Parse the target name and check for special support like
5485 	 * namespace.
5486 	 */
5487         target = xmlParsePITarget(ctxt);
5488 	if (target != NULL) {
5489 	    if ((RAW == '?') && (NXT(1) == '>')) {
5490 		SKIP(2);
5491 
5492 		/*
5493 		 * SAX: PI detected.
5494 		 */
5495 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5496 		    (ctxt->sax->processingInstruction != NULL))
5497 		    ctxt->sax->processingInstruction(ctxt->userData,
5498 		                                     target, NULL);
5499 		return;
5500 	    }
5501 	    buf = (xmlChar *) xmlMallocAtomic(size);
5502 	    if (buf == NULL) {
5503 		xmlErrMemory(ctxt);
5504 		return;
5505 	    }
5506 	    if (SKIP_BLANKS == 0) {
5507 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5508 			  "ParsePI: PI %s space expected\n", target);
5509 	    }
5510 	    cur = CUR_CHAR(l);
5511 	    while (IS_CHAR(cur) && /* checked */
5512 		   ((cur != '?') || (NXT(1) != '>'))) {
5513 		if (len + 5 >= size) {
5514 		    xmlChar *tmp;
5515                     size_t new_size = size * 2;
5516 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5517 		    if (tmp == NULL) {
5518 			xmlErrMemory(ctxt);
5519 			xmlFree(buf);
5520 			return;
5521 		    }
5522 		    buf = tmp;
5523                     size = new_size;
5524 		}
5525 		COPY_BUF(buf, len, cur);
5526                 if (len > maxLength) {
5527                     xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5528                                       "PI %s too big found", target);
5529                     xmlFree(buf);
5530                     return;
5531                 }
5532 		NEXTL(l);
5533 		cur = CUR_CHAR(l);
5534 	    }
5535 	    buf[len] = 0;
5536 	    if (cur != '?') {
5537 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5538 		      "ParsePI: PI %s never end ...\n", target);
5539 	    } else {
5540 		SKIP(2);
5541 
5542 #ifdef LIBXML_CATALOG_ENABLED
5543 		if ((ctxt->inSubset == 0) &&
5544 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5545 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5546 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5547 			(allow == XML_CATA_ALLOW_ALL))
5548 			xmlParseCatalogPI(ctxt, buf);
5549 		}
5550 #endif
5551 
5552 
5553 		/*
5554 		 * SAX: PI detected.
5555 		 */
5556 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5557 		    (ctxt->sax->processingInstruction != NULL))
5558 		    ctxt->sax->processingInstruction(ctxt->userData,
5559 		                                     target, buf);
5560 	    }
5561 	    xmlFree(buf);
5562 	} else {
5563 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5564 	}
5565     }
5566 }
5567 
5568 /**
5569  * xmlParseNotationDecl:
5570  * @ctxt:  an XML parser context
5571  *
5572  * DEPRECATED: Internal function, don't use.
5573  *
5574  * Parse a notation declaration. Always consumes '<!'.
5575  *
5576  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5577  *
5578  * Hence there is actually 3 choices:
5579  *     'PUBLIC' S PubidLiteral
5580  *     'PUBLIC' S PubidLiteral S SystemLiteral
5581  * and 'SYSTEM' S SystemLiteral
5582  *
5583  * See the NOTE on xmlParseExternalID().
5584  */
5585 
5586 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5587 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5588     const xmlChar *name;
5589     xmlChar *Pubid;
5590     xmlChar *Systemid;
5591 
5592     if ((CUR != '<') || (NXT(1) != '!'))
5593         return;
5594     SKIP(2);
5595 
5596     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5597 	int inputid = ctxt->input->id;
5598 	SKIP(8);
5599 	if (SKIP_BLANKS_PE == 0) {
5600 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5601 			   "Space required after '<!NOTATION'\n");
5602 	    return;
5603 	}
5604 
5605         name = xmlParseName(ctxt);
5606 	if (name == NULL) {
5607 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5608 	    return;
5609 	}
5610 	if (xmlStrchr(name, ':') != NULL) {
5611 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5612 		     "colons are forbidden from notation names '%s'\n",
5613 		     name, NULL, NULL);
5614 	}
5615 	if (SKIP_BLANKS_PE == 0) {
5616 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5617 		     "Space required after the NOTATION name'\n");
5618 	    return;
5619 	}
5620 
5621 	/*
5622 	 * Parse the IDs.
5623 	 */
5624 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5625 	SKIP_BLANKS_PE;
5626 
5627 	if (RAW == '>') {
5628 	    if (inputid != ctxt->input->id) {
5629 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5630 	                       "Notation declaration doesn't start and stop"
5631                                " in the same entity\n");
5632 	    }
5633 	    NEXT;
5634 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5635 		(ctxt->sax->notationDecl != NULL))
5636 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5637 	} else {
5638 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5639 	}
5640 	if (Systemid != NULL) xmlFree(Systemid);
5641 	if (Pubid != NULL) xmlFree(Pubid);
5642     }
5643 }
5644 
5645 /**
5646  * xmlParseEntityDecl:
5647  * @ctxt:  an XML parser context
5648  *
5649  * DEPRECATED: Internal function, don't use.
5650  *
5651  * Parse an entity declaration. Always consumes '<!'.
5652  *
5653  * [70] EntityDecl ::= GEDecl | PEDecl
5654  *
5655  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5656  *
5657  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5658  *
5659  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5660  *
5661  * [74] PEDef ::= EntityValue | ExternalID
5662  *
5663  * [76] NDataDecl ::= S 'NDATA' S Name
5664  *
5665  * [ VC: Notation Declared ]
5666  * The Name must match the declared name of a notation.
5667  */
5668 
5669 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5670 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5671     const xmlChar *name = NULL;
5672     xmlChar *value = NULL;
5673     xmlChar *URI = NULL, *literal = NULL;
5674     const xmlChar *ndata = NULL;
5675     int isParameter = 0;
5676     xmlChar *orig = NULL;
5677 
5678     if ((CUR != '<') || (NXT(1) != '!'))
5679         return;
5680     SKIP(2);
5681 
5682     /* GROW; done in the caller */
5683     if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5684 	int inputid = ctxt->input->id;
5685 	SKIP(6);
5686 	if (SKIP_BLANKS_PE == 0) {
5687 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5688 			   "Space required after '<!ENTITY'\n");
5689 	}
5690 
5691 	if (RAW == '%') {
5692 	    NEXT;
5693 	    if (SKIP_BLANKS_PE == 0) {
5694 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5695 			       "Space required after '%%'\n");
5696 	    }
5697 	    isParameter = 1;
5698 	}
5699 
5700         name = xmlParseName(ctxt);
5701 	if (name == NULL) {
5702 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5703 	                   "xmlParseEntityDecl: no name\n");
5704             return;
5705 	}
5706 	if (xmlStrchr(name, ':') != NULL) {
5707 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5708 		     "colons are forbidden from entities names '%s'\n",
5709 		     name, NULL, NULL);
5710 	}
5711 	if (SKIP_BLANKS_PE == 0) {
5712 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5713 			   "Space required after the entity name\n");
5714 	}
5715 
5716 	/*
5717 	 * handle the various case of definitions...
5718 	 */
5719 	if (isParameter) {
5720 	    if ((RAW == '"') || (RAW == '\'')) {
5721 	        value = xmlParseEntityValue(ctxt, &orig);
5722 		if (value) {
5723 		    if ((ctxt->sax != NULL) &&
5724 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5725 			ctxt->sax->entityDecl(ctxt->userData, name,
5726 		                    XML_INTERNAL_PARAMETER_ENTITY,
5727 				    NULL, NULL, value);
5728 		}
5729 	    } else {
5730 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5731 		if ((URI == NULL) && (literal == NULL)) {
5732 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5733 		}
5734 		if (URI) {
5735                     if (xmlStrchr(URI, '#')) {
5736                         xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5737                     } else {
5738                         if ((ctxt->sax != NULL) &&
5739                             (!ctxt->disableSAX) &&
5740                             (ctxt->sax->entityDecl != NULL))
5741                             ctxt->sax->entityDecl(ctxt->userData, name,
5742                                         XML_EXTERNAL_PARAMETER_ENTITY,
5743                                         literal, URI, NULL);
5744                     }
5745 		}
5746 	    }
5747 	} else {
5748 	    if ((RAW == '"') || (RAW == '\'')) {
5749 	        value = xmlParseEntityValue(ctxt, &orig);
5750 		if ((ctxt->sax != NULL) &&
5751 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5752 		    ctxt->sax->entityDecl(ctxt->userData, name,
5753 				XML_INTERNAL_GENERAL_ENTITY,
5754 				NULL, NULL, value);
5755 		/*
5756 		 * For expat compatibility in SAX mode.
5757 		 */
5758 		if ((ctxt->myDoc == NULL) ||
5759 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5760 		    if (ctxt->myDoc == NULL) {
5761 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5762 			if (ctxt->myDoc == NULL) {
5763 			    xmlErrMemory(ctxt);
5764 			    goto done;
5765 			}
5766 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5767 		    }
5768 		    if (ctxt->myDoc->intSubset == NULL) {
5769 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5770 					    BAD_CAST "fake", NULL, NULL);
5771                         if (ctxt->myDoc->intSubset == NULL) {
5772                             xmlErrMemory(ctxt);
5773                             goto done;
5774                         }
5775                     }
5776 
5777 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5778 			              NULL, NULL, value);
5779 		}
5780 	    } else {
5781 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5782 		if ((URI == NULL) && (literal == NULL)) {
5783 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5784 		}
5785 		if (URI) {
5786                     if (xmlStrchr(URI, '#')) {
5787                         xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5788                     }
5789 		}
5790 		if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5791 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5792 				   "Space required before 'NDATA'\n");
5793 		}
5794 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5795 		    SKIP(5);
5796 		    if (SKIP_BLANKS_PE == 0) {
5797 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5798 				       "Space required after 'NDATA'\n");
5799 		    }
5800 		    ndata = xmlParseName(ctxt);
5801 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5802 		        (ctxt->sax->unparsedEntityDecl != NULL))
5803 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5804 				    literal, URI, ndata);
5805 		} else {
5806 		    if ((ctxt->sax != NULL) &&
5807 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5808 			ctxt->sax->entityDecl(ctxt->userData, name,
5809 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5810 				    literal, URI, NULL);
5811 		    /*
5812 		     * For expat compatibility in SAX mode.
5813 		     * assuming the entity replacement was asked for
5814 		     */
5815 		    if ((ctxt->replaceEntities != 0) &&
5816 			((ctxt->myDoc == NULL) ||
5817 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5818 			if (ctxt->myDoc == NULL) {
5819 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5820 			    if (ctxt->myDoc == NULL) {
5821 			        xmlErrMemory(ctxt);
5822 				goto done;
5823 			    }
5824 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5825 			}
5826 
5827 			if (ctxt->myDoc->intSubset == NULL) {
5828 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5829 						BAD_CAST "fake", NULL, NULL);
5830                             if (ctxt->myDoc->intSubset == NULL) {
5831                                 xmlErrMemory(ctxt);
5832                                 goto done;
5833                             }
5834                         }
5835 			xmlSAX2EntityDecl(ctxt, name,
5836 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5837 				          literal, URI, NULL);
5838 		    }
5839 		}
5840 	    }
5841 	}
5842 	SKIP_BLANKS_PE;
5843 	if (RAW != '>') {
5844 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5845 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5846 	    xmlHaltParser(ctxt);
5847 	} else {
5848 	    if (inputid != ctxt->input->id) {
5849 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5850 	                       "Entity declaration doesn't start and stop in"
5851                                " the same entity\n");
5852 	    }
5853 	    NEXT;
5854 	}
5855 	if (orig != NULL) {
5856 	    /*
5857 	     * Ugly mechanism to save the raw entity value.
5858 	     */
5859 	    xmlEntityPtr cur = NULL;
5860 
5861 	    if (isParameter) {
5862 	        if ((ctxt->sax != NULL) &&
5863 		    (ctxt->sax->getParameterEntity != NULL))
5864 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5865 	    } else {
5866 	        if ((ctxt->sax != NULL) &&
5867 		    (ctxt->sax->getEntity != NULL))
5868 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5869 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5870 		    cur = xmlSAX2GetEntity(ctxt, name);
5871 		}
5872 	    }
5873             if ((cur != NULL) && (cur->orig == NULL)) {
5874 		cur->orig = orig;
5875                 orig = NULL;
5876 	    }
5877 	}
5878 
5879 done:
5880 	if (value != NULL) xmlFree(value);
5881 	if (URI != NULL) xmlFree(URI);
5882 	if (literal != NULL) xmlFree(literal);
5883         if (orig != NULL) xmlFree(orig);
5884     }
5885 }
5886 
5887 /**
5888  * xmlParseDefaultDecl:
5889  * @ctxt:  an XML parser context
5890  * @value:  Receive a possible fixed default value for the attribute
5891  *
5892  * DEPRECATED: Internal function, don't use.
5893  *
5894  * Parse an attribute default declaration
5895  *
5896  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5897  *
5898  * [ VC: Required Attribute ]
5899  * if the default declaration is the keyword #REQUIRED, then the
5900  * attribute must be specified for all elements of the type in the
5901  * attribute-list declaration.
5902  *
5903  * [ VC: Attribute Default Legal ]
5904  * The declared default value must meet the lexical constraints of
5905  * the declared attribute type c.f. xmlValidateAttributeDecl()
5906  *
5907  * [ VC: Fixed Attribute Default ]
5908  * if an attribute has a default value declared with the #FIXED
5909  * keyword, instances of that attribute must match the default value.
5910  *
5911  * [ WFC: No < in Attribute Values ]
5912  * handled in xmlParseAttValue()
5913  *
5914  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5915  *          or XML_ATTRIBUTE_FIXED.
5916  */
5917 
5918 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5919 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5920     int val;
5921     xmlChar *ret;
5922 
5923     *value = NULL;
5924     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5925 	SKIP(9);
5926 	return(XML_ATTRIBUTE_REQUIRED);
5927     }
5928     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5929 	SKIP(8);
5930 	return(XML_ATTRIBUTE_IMPLIED);
5931     }
5932     val = XML_ATTRIBUTE_NONE;
5933     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5934 	SKIP(6);
5935 	val = XML_ATTRIBUTE_FIXED;
5936 	if (SKIP_BLANKS_PE == 0) {
5937 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5938 			   "Space required after '#FIXED'\n");
5939 	}
5940     }
5941     ret = xmlParseAttValue(ctxt);
5942     if (ret == NULL) {
5943 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5944 		       "Attribute default value declaration error\n");
5945     } else
5946         *value = ret;
5947     return(val);
5948 }
5949 
5950 /**
5951  * xmlParseNotationType:
5952  * @ctxt:  an XML parser context
5953  *
5954  * DEPRECATED: Internal function, don't use.
5955  *
5956  * parse an Notation attribute type.
5957  *
5958  * Note: the leading 'NOTATION' S part has already being parsed...
5959  *
5960  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5961  *
5962  * [ VC: Notation Attributes ]
5963  * Values of this type must match one of the notation names included
5964  * in the declaration; all notation names in the declaration must be declared.
5965  *
5966  * Returns: the notation attribute tree built while parsing
5967  */
5968 
5969 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5970 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5971     const xmlChar *name;
5972     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5973 
5974     if (RAW != '(') {
5975 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5976 	return(NULL);
5977     }
5978     do {
5979         NEXT;
5980 	SKIP_BLANKS_PE;
5981         name = xmlParseName(ctxt);
5982 	if (name == NULL) {
5983 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5984 			   "Name expected in NOTATION declaration\n");
5985             xmlFreeEnumeration(ret);
5986 	    return(NULL);
5987 	}
5988 	tmp = ret;
5989 	while (tmp != NULL) {
5990 	    if (xmlStrEqual(name, tmp->name)) {
5991 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5992 	  "standalone: attribute notation value token %s duplicated\n",
5993 				 name, NULL);
5994 		if (!xmlDictOwns(ctxt->dict, name))
5995 		    xmlFree((xmlChar *) name);
5996 		break;
5997 	    }
5998 	    tmp = tmp->next;
5999 	}
6000 	if (tmp == NULL) {
6001 	    cur = xmlCreateEnumeration(name);
6002 	    if (cur == NULL) {
6003                 xmlErrMemory(ctxt);
6004                 xmlFreeEnumeration(ret);
6005                 return(NULL);
6006             }
6007 	    if (last == NULL) ret = last = cur;
6008 	    else {
6009 		last->next = cur;
6010 		last = cur;
6011 	    }
6012 	}
6013 	SKIP_BLANKS_PE;
6014     } while (RAW == '|');
6015     if (RAW != ')') {
6016 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6017         xmlFreeEnumeration(ret);
6018 	return(NULL);
6019     }
6020     NEXT;
6021     return(ret);
6022 }
6023 
6024 /**
6025  * xmlParseEnumerationType:
6026  * @ctxt:  an XML parser context
6027  *
6028  * DEPRECATED: Internal function, don't use.
6029  *
6030  * parse an Enumeration attribute type.
6031  *
6032  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6033  *
6034  * [ VC: Enumeration ]
6035  * Values of this type must match one of the Nmtoken tokens in
6036  * the declaration
6037  *
6038  * Returns: the enumeration attribute tree built while parsing
6039  */
6040 
6041 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)6042 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6043     xmlChar *name;
6044     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6045 
6046     if (RAW != '(') {
6047 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6048 	return(NULL);
6049     }
6050     do {
6051         NEXT;
6052 	SKIP_BLANKS_PE;
6053         name = xmlParseNmtoken(ctxt);
6054 	if (name == NULL) {
6055 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6056 	    return(ret);
6057 	}
6058 	tmp = ret;
6059 	while (tmp != NULL) {
6060 	    if (xmlStrEqual(name, tmp->name)) {
6061 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6062 	  "standalone: attribute enumeration value token %s duplicated\n",
6063 				 name, NULL);
6064 		if (!xmlDictOwns(ctxt->dict, name))
6065 		    xmlFree(name);
6066 		break;
6067 	    }
6068 	    tmp = tmp->next;
6069 	}
6070 	if (tmp == NULL) {
6071 	    cur = xmlCreateEnumeration(name);
6072 	    if (!xmlDictOwns(ctxt->dict, name))
6073 		xmlFree(name);
6074 	    if (cur == NULL) {
6075                 xmlErrMemory(ctxt);
6076                 xmlFreeEnumeration(ret);
6077                 return(NULL);
6078             }
6079 	    if (last == NULL) ret = last = cur;
6080 	    else {
6081 		last->next = cur;
6082 		last = cur;
6083 	    }
6084 	}
6085 	SKIP_BLANKS_PE;
6086     } while (RAW == '|');
6087     if (RAW != ')') {
6088 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6089 	return(ret);
6090     }
6091     NEXT;
6092     return(ret);
6093 }
6094 
6095 /**
6096  * xmlParseEnumeratedType:
6097  * @ctxt:  an XML parser context
6098  * @tree:  the enumeration tree built while parsing
6099  *
6100  * DEPRECATED: Internal function, don't use.
6101  *
6102  * parse an Enumerated attribute type.
6103  *
6104  * [57] EnumeratedType ::= NotationType | Enumeration
6105  *
6106  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6107  *
6108  *
6109  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6110  */
6111 
6112 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6113 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6114     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6115 	SKIP(8);
6116 	if (SKIP_BLANKS_PE == 0) {
6117 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6118 			   "Space required after 'NOTATION'\n");
6119 	    return(0);
6120 	}
6121 	*tree = xmlParseNotationType(ctxt);
6122 	if (*tree == NULL) return(0);
6123 	return(XML_ATTRIBUTE_NOTATION);
6124     }
6125     *tree = xmlParseEnumerationType(ctxt);
6126     if (*tree == NULL) return(0);
6127     return(XML_ATTRIBUTE_ENUMERATION);
6128 }
6129 
6130 /**
6131  * xmlParseAttributeType:
6132  * @ctxt:  an XML parser context
6133  * @tree:  the enumeration tree built while parsing
6134  *
6135  * DEPRECATED: Internal function, don't use.
6136  *
6137  * parse the Attribute list def for an element
6138  *
6139  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6140  *
6141  * [55] StringType ::= 'CDATA'
6142  *
6143  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6144  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6145  *
6146  * Validity constraints for attribute values syntax are checked in
6147  * xmlValidateAttributeValue()
6148  *
6149  * [ VC: ID ]
6150  * Values of type ID must match the Name production. A name must not
6151  * appear more than once in an XML document as a value of this type;
6152  * i.e., ID values must uniquely identify the elements which bear them.
6153  *
6154  * [ VC: One ID per Element Type ]
6155  * No element type may have more than one ID attribute specified.
6156  *
6157  * [ VC: ID Attribute Default ]
6158  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6159  *
6160  * [ VC: IDREF ]
6161  * Values of type IDREF must match the Name production, and values
6162  * of type IDREFS must match Names; each IDREF Name must match the value
6163  * of an ID attribute on some element in the XML document; i.e. IDREF
6164  * values must match the value of some ID attribute.
6165  *
6166  * [ VC: Entity Name ]
6167  * Values of type ENTITY must match the Name production, values
6168  * of type ENTITIES must match Names; each Entity Name must match the
6169  * name of an unparsed entity declared in the DTD.
6170  *
6171  * [ VC: Name Token ]
6172  * Values of type NMTOKEN must match the Nmtoken production; values
6173  * of type NMTOKENS must match Nmtokens.
6174  *
6175  * Returns the attribute type
6176  */
6177 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6178 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6179     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6180 	SKIP(5);
6181 	return(XML_ATTRIBUTE_CDATA);
6182      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6183 	SKIP(6);
6184 	return(XML_ATTRIBUTE_IDREFS);
6185      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6186 	SKIP(5);
6187 	return(XML_ATTRIBUTE_IDREF);
6188      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6189         SKIP(2);
6190 	return(XML_ATTRIBUTE_ID);
6191      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6192 	SKIP(6);
6193 	return(XML_ATTRIBUTE_ENTITY);
6194      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6195 	SKIP(8);
6196 	return(XML_ATTRIBUTE_ENTITIES);
6197      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6198 	SKIP(8);
6199 	return(XML_ATTRIBUTE_NMTOKENS);
6200      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6201 	SKIP(7);
6202 	return(XML_ATTRIBUTE_NMTOKEN);
6203      }
6204      return(xmlParseEnumeratedType(ctxt, tree));
6205 }
6206 
6207 /**
6208  * xmlParseAttributeListDecl:
6209  * @ctxt:  an XML parser context
6210  *
6211  * DEPRECATED: Internal function, don't use.
6212  *
6213  * Parse an attribute list declaration for an element. Always consumes '<!'.
6214  *
6215  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6216  *
6217  * [53] AttDef ::= S Name S AttType S DefaultDecl
6218  *
6219  */
6220 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6221 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6222     const xmlChar *elemName;
6223     const xmlChar *attrName;
6224     xmlEnumerationPtr tree;
6225 
6226     if ((CUR != '<') || (NXT(1) != '!'))
6227         return;
6228     SKIP(2);
6229 
6230     if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6231 	int inputid = ctxt->input->id;
6232 
6233 	SKIP(7);
6234 	if (SKIP_BLANKS_PE == 0) {
6235 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6236 		                 "Space required after '<!ATTLIST'\n");
6237 	}
6238         elemName = xmlParseName(ctxt);
6239 	if (elemName == NULL) {
6240 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6241 			   "ATTLIST: no name for Element\n");
6242 	    return;
6243 	}
6244 	SKIP_BLANKS_PE;
6245 	GROW;
6246 	while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6247 	    int type;
6248 	    int def;
6249 	    xmlChar *defaultValue = NULL;
6250 
6251 	    GROW;
6252             tree = NULL;
6253 	    attrName = xmlParseName(ctxt);
6254 	    if (attrName == NULL) {
6255 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6256 			       "ATTLIST: no name for Attribute\n");
6257 		break;
6258 	    }
6259 	    GROW;
6260 	    if (SKIP_BLANKS_PE == 0) {
6261 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6262 		        "Space required after the attribute name\n");
6263 		break;
6264 	    }
6265 
6266 	    type = xmlParseAttributeType(ctxt, &tree);
6267 	    if (type <= 0) {
6268 	        break;
6269 	    }
6270 
6271 	    GROW;
6272 	    if (SKIP_BLANKS_PE == 0) {
6273 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6274 			       "Space required after the attribute type\n");
6275 	        if (tree != NULL)
6276 		    xmlFreeEnumeration(tree);
6277 		break;
6278 	    }
6279 
6280 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6281 	    if (def <= 0) {
6282                 if (defaultValue != NULL)
6283 		    xmlFree(defaultValue);
6284 	        if (tree != NULL)
6285 		    xmlFreeEnumeration(tree);
6286 	        break;
6287 	    }
6288 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6289 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6290 
6291 	    GROW;
6292             if (RAW != '>') {
6293 		if (SKIP_BLANKS_PE == 0) {
6294 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6295 			"Space required after the attribute default value\n");
6296 		    if (defaultValue != NULL)
6297 			xmlFree(defaultValue);
6298 		    if (tree != NULL)
6299 			xmlFreeEnumeration(tree);
6300 		    break;
6301 		}
6302 	    }
6303 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6304 		(ctxt->sax->attributeDecl != NULL))
6305 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6306 	                        type, def, defaultValue, tree);
6307 	    else if (tree != NULL)
6308 		xmlFreeEnumeration(tree);
6309 
6310 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6311 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6312 		(def != XML_ATTRIBUTE_REQUIRED)) {
6313 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6314 	    }
6315 	    if (ctxt->sax2) {
6316 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6317 	    }
6318 	    if (defaultValue != NULL)
6319 	        xmlFree(defaultValue);
6320 	    GROW;
6321 	}
6322 	if (RAW == '>') {
6323 	    if (inputid != ctxt->input->id) {
6324 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6325                                "Attribute list declaration doesn't start and"
6326                                " stop in the same entity\n");
6327 	    }
6328 	    NEXT;
6329 	}
6330     }
6331 }
6332 
6333 /**
6334  * xmlParseElementMixedContentDecl:
6335  * @ctxt:  an XML parser context
6336  * @inputchk:  the input used for the current entity, needed for boundary checks
6337  *
6338  * DEPRECATED: Internal function, don't use.
6339  *
6340  * parse the declaration for a Mixed Element content
6341  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6342  *
6343  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6344  *                '(' S? '#PCDATA' S? ')'
6345  *
6346  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6347  *
6348  * [ VC: No Duplicate Types ]
6349  * The same name must not appear more than once in a single
6350  * mixed-content declaration.
6351  *
6352  * returns: the list of the xmlElementContentPtr describing the element choices
6353  */
6354 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6355 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6356     xmlElementContentPtr ret = NULL, cur = NULL, n;
6357     const xmlChar *elem = NULL;
6358 
6359     GROW;
6360     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6361 	SKIP(7);
6362 	SKIP_BLANKS_PE;
6363 	if (RAW == ')') {
6364 	    if (ctxt->input->id != inputchk) {
6365 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6366                                "Element content declaration doesn't start and"
6367                                " stop in the same entity\n");
6368 	    }
6369 	    NEXT;
6370 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6371 	    if (ret == NULL)
6372                 goto mem_error;
6373 	    if (RAW == '*') {
6374 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6375 		NEXT;
6376 	    }
6377 	    return(ret);
6378 	}
6379 	if ((RAW == '(') || (RAW == '|')) {
6380 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6381 	    if (ret == NULL)
6382                 goto mem_error;
6383 	}
6384 	while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6385 	    NEXT;
6386             n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6387             if (n == NULL)
6388                 goto mem_error;
6389 	    if (elem == NULL) {
6390 		n->c1 = cur;
6391 		if (cur != NULL)
6392 		    cur->parent = n;
6393 		ret = cur = n;
6394 	    } else {
6395 	        cur->c2 = n;
6396 		n->parent = cur;
6397 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6398                 if (n->c1 == NULL)
6399                     goto mem_error;
6400 		n->c1->parent = n;
6401 		cur = n;
6402 	    }
6403 	    SKIP_BLANKS_PE;
6404 	    elem = xmlParseName(ctxt);
6405 	    if (elem == NULL) {
6406 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6407 			"xmlParseElementMixedContentDecl : Name expected\n");
6408 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6409 		return(NULL);
6410 	    }
6411 	    SKIP_BLANKS_PE;
6412 	    GROW;
6413 	}
6414 	if ((RAW == ')') && (NXT(1) == '*')) {
6415 	    if (elem != NULL) {
6416 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6417 		                               XML_ELEMENT_CONTENT_ELEMENT);
6418 		if (cur->c2 == NULL)
6419                     goto mem_error;
6420 		cur->c2->parent = cur;
6421             }
6422             if (ret != NULL)
6423                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6424 	    if (ctxt->input->id != inputchk) {
6425 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6426                                "Element content declaration doesn't start and"
6427                                " stop in the same entity\n");
6428 	    }
6429 	    SKIP(2);
6430 	} else {
6431 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6432 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6433 	    return(NULL);
6434 	}
6435 
6436     } else {
6437 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6438     }
6439     return(ret);
6440 
6441 mem_error:
6442     xmlErrMemory(ctxt);
6443     xmlFreeDocElementContent(ctxt->myDoc, ret);
6444     return(NULL);
6445 }
6446 
6447 /**
6448  * xmlParseElementChildrenContentDeclPriv:
6449  * @ctxt:  an XML parser context
6450  * @inputchk:  the input used for the current entity, needed for boundary checks
6451  * @depth: the level of recursion
6452  *
6453  * parse the declaration for a Mixed Element content
6454  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6455  *
6456  *
6457  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6458  *
6459  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6460  *
6461  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6462  *
6463  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6464  *
6465  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6466  * TODO Parameter-entity replacement text must be properly nested
6467  *	with parenthesized groups. That is to say, if either of the
6468  *	opening or closing parentheses in a choice, seq, or Mixed
6469  *	construct is contained in the replacement text for a parameter
6470  *	entity, both must be contained in the same replacement text. For
6471  *	interoperability, if a parameter-entity reference appears in a
6472  *	choice, seq, or Mixed construct, its replacement text should not
6473  *	be empty, and neither the first nor last non-blank character of
6474  *	the replacement text should be a connector (| or ,).
6475  *
6476  * Returns the tree of xmlElementContentPtr describing the element
6477  *          hierarchy.
6478  */
6479 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6480 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6481                                        int depth) {
6482     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6483     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6484     const xmlChar *elem;
6485     xmlChar type = 0;
6486 
6487     if (depth > maxDepth) {
6488         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6489                 "xmlParseElementChildrenContentDecl : depth %d too deep, "
6490                 "use XML_PARSE_HUGE\n", depth);
6491 	return(NULL);
6492     }
6493     SKIP_BLANKS_PE;
6494     GROW;
6495     if (RAW == '(') {
6496 	int inputid = ctxt->input->id;
6497 
6498         /* Recurse on first child */
6499 	NEXT;
6500 	SKIP_BLANKS_PE;
6501         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6502                                                            depth + 1);
6503         if (cur == NULL)
6504             return(NULL);
6505 	SKIP_BLANKS_PE;
6506 	GROW;
6507     } else {
6508 	elem = xmlParseName(ctxt);
6509 	if (elem == NULL) {
6510 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6511 	    return(NULL);
6512 	}
6513         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6514 	if (cur == NULL) {
6515 	    xmlErrMemory(ctxt);
6516 	    return(NULL);
6517 	}
6518 	GROW;
6519 	if (RAW == '?') {
6520 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6521 	    NEXT;
6522 	} else if (RAW == '*') {
6523 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6524 	    NEXT;
6525 	} else if (RAW == '+') {
6526 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6527 	    NEXT;
6528 	} else {
6529 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6530 	}
6531 	GROW;
6532     }
6533     SKIP_BLANKS_PE;
6534     while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6535         /*
6536 	 * Each loop we parse one separator and one element.
6537 	 */
6538         if (RAW == ',') {
6539 	    if (type == 0) type = CUR;
6540 
6541 	    /*
6542 	     * Detect "Name | Name , Name" error
6543 	     */
6544 	    else if (type != CUR) {
6545 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6546 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6547 		                  type);
6548 		if ((last != NULL) && (last != ret))
6549 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6550 		if (ret != NULL)
6551 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6552 		return(NULL);
6553 	    }
6554 	    NEXT;
6555 
6556 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6557 	    if (op == NULL) {
6558                 xmlErrMemory(ctxt);
6559 		if ((last != NULL) && (last != ret))
6560 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6561 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6562 		return(NULL);
6563 	    }
6564 	    if (last == NULL) {
6565 		op->c1 = ret;
6566 		if (ret != NULL)
6567 		    ret->parent = op;
6568 		ret = cur = op;
6569 	    } else {
6570 	        cur->c2 = op;
6571 		if (op != NULL)
6572 		    op->parent = cur;
6573 		op->c1 = last;
6574 		if (last != NULL)
6575 		    last->parent = op;
6576 		cur =op;
6577 		last = NULL;
6578 	    }
6579 	} else if (RAW == '|') {
6580 	    if (type == 0) type = CUR;
6581 
6582 	    /*
6583 	     * Detect "Name , Name | Name" error
6584 	     */
6585 	    else if (type != CUR) {
6586 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6587 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6588 				  type);
6589 		if ((last != NULL) && (last != ret))
6590 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6591 		if (ret != NULL)
6592 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6593 		return(NULL);
6594 	    }
6595 	    NEXT;
6596 
6597 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6598 	    if (op == NULL) {
6599                 xmlErrMemory(ctxt);
6600 		if ((last != NULL) && (last != ret))
6601 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6602 		if (ret != NULL)
6603 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6604 		return(NULL);
6605 	    }
6606 	    if (last == NULL) {
6607 		op->c1 = ret;
6608 		if (ret != NULL)
6609 		    ret->parent = op;
6610 		ret = cur = op;
6611 	    } else {
6612 	        cur->c2 = op;
6613 		if (op != NULL)
6614 		    op->parent = cur;
6615 		op->c1 = last;
6616 		if (last != NULL)
6617 		    last->parent = op;
6618 		cur =op;
6619 		last = NULL;
6620 	    }
6621 	} else {
6622 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6623 	    if ((last != NULL) && (last != ret))
6624 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6625 	    if (ret != NULL)
6626 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6627 	    return(NULL);
6628 	}
6629 	GROW;
6630 	SKIP_BLANKS_PE;
6631 	GROW;
6632 	if (RAW == '(') {
6633 	    int inputid = ctxt->input->id;
6634 	    /* Recurse on second child */
6635 	    NEXT;
6636 	    SKIP_BLANKS_PE;
6637 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6638                                                           depth + 1);
6639             if (last == NULL) {
6640 		if (ret != NULL)
6641 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6642 		return(NULL);
6643             }
6644 	    SKIP_BLANKS_PE;
6645 	} else {
6646 	    elem = xmlParseName(ctxt);
6647 	    if (elem == NULL) {
6648 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6649 		if (ret != NULL)
6650 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6651 		return(NULL);
6652 	    }
6653 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6654 	    if (last == NULL) {
6655                 xmlErrMemory(ctxt);
6656 		if (ret != NULL)
6657 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6658 		return(NULL);
6659 	    }
6660 	    if (RAW == '?') {
6661 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6662 		NEXT;
6663 	    } else if (RAW == '*') {
6664 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6665 		NEXT;
6666 	    } else if (RAW == '+') {
6667 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6668 		NEXT;
6669 	    } else {
6670 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6671 	    }
6672 	}
6673 	SKIP_BLANKS_PE;
6674 	GROW;
6675     }
6676     if ((cur != NULL) && (last != NULL)) {
6677         cur->c2 = last;
6678 	if (last != NULL)
6679 	    last->parent = cur;
6680     }
6681     if (ctxt->input->id != inputchk) {
6682 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6683                        "Element content declaration doesn't start and stop in"
6684                        " the same entity\n");
6685     }
6686     NEXT;
6687     if (RAW == '?') {
6688 	if (ret != NULL) {
6689 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6690 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6691 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6692 	    else
6693 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6694 	}
6695 	NEXT;
6696     } else if (RAW == '*') {
6697 	if (ret != NULL) {
6698 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6699 	    cur = ret;
6700 	    /*
6701 	     * Some normalization:
6702 	     * (a | b* | c?)* == (a | b | c)*
6703 	     */
6704 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6705 		if ((cur->c1 != NULL) &&
6706 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6707 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6708 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6709 		if ((cur->c2 != NULL) &&
6710 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6711 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6712 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6713 		cur = cur->c2;
6714 	    }
6715 	}
6716 	NEXT;
6717     } else if (RAW == '+') {
6718 	if (ret != NULL) {
6719 	    int found = 0;
6720 
6721 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6722 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6723 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6724 	    else
6725 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6726 	    /*
6727 	     * Some normalization:
6728 	     * (a | b*)+ == (a | b)*
6729 	     * (a | b?)+ == (a | b)*
6730 	     */
6731 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6732 		if ((cur->c1 != NULL) &&
6733 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6734 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6735 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6736 		    found = 1;
6737 		}
6738 		if ((cur->c2 != NULL) &&
6739 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6740 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6741 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6742 		    found = 1;
6743 		}
6744 		cur = cur->c2;
6745 	    }
6746 	    if (found)
6747 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6748 	}
6749 	NEXT;
6750     }
6751     return(ret);
6752 }
6753 
6754 /**
6755  * xmlParseElementChildrenContentDecl:
6756  * @ctxt:  an XML parser context
6757  * @inputchk:  the input used for the current entity, needed for boundary checks
6758  *
6759  * DEPRECATED: Internal function, don't use.
6760  *
6761  * parse the declaration for a Mixed Element content
6762  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6763  *
6764  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6765  *
6766  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6767  *
6768  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6769  *
6770  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6771  *
6772  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6773  * TODO Parameter-entity replacement text must be properly nested
6774  *	with parenthesized groups. That is to say, if either of the
6775  *	opening or closing parentheses in a choice, seq, or Mixed
6776  *	construct is contained in the replacement text for a parameter
6777  *	entity, both must be contained in the same replacement text. For
6778  *	interoperability, if a parameter-entity reference appears in a
6779  *	choice, seq, or Mixed construct, its replacement text should not
6780  *	be empty, and neither the first nor last non-blank character of
6781  *	the replacement text should be a connector (| or ,).
6782  *
6783  * Returns the tree of xmlElementContentPtr describing the element
6784  *          hierarchy.
6785  */
6786 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6787 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6788     /* stub left for API/ABI compat */
6789     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6790 }
6791 
6792 /**
6793  * xmlParseElementContentDecl:
6794  * @ctxt:  an XML parser context
6795  * @name:  the name of the element being defined.
6796  * @result:  the Element Content pointer will be stored here if any
6797  *
6798  * DEPRECATED: Internal function, don't use.
6799  *
6800  * parse the declaration for an Element content either Mixed or Children,
6801  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6802  *
6803  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6804  *
6805  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6806  */
6807 
6808 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6809 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6810                            xmlElementContentPtr *result) {
6811 
6812     xmlElementContentPtr tree = NULL;
6813     int inputid = ctxt->input->id;
6814     int res;
6815 
6816     *result = NULL;
6817 
6818     if (RAW != '(') {
6819 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6820 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6821 	return(-1);
6822     }
6823     NEXT;
6824     GROW;
6825     SKIP_BLANKS_PE;
6826     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6827         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6828 	res = XML_ELEMENT_TYPE_MIXED;
6829     } else {
6830         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6831 	res = XML_ELEMENT_TYPE_ELEMENT;
6832     }
6833     SKIP_BLANKS_PE;
6834     *result = tree;
6835     return(res);
6836 }
6837 
6838 /**
6839  * xmlParseElementDecl:
6840  * @ctxt:  an XML parser context
6841  *
6842  * DEPRECATED: Internal function, don't use.
6843  *
6844  * Parse an element declaration. Always consumes '<!'.
6845  *
6846  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6847  *
6848  * [ VC: Unique Element Type Declaration ]
6849  * No element type may be declared more than once
6850  *
6851  * Returns the type of the element, or -1 in case of error
6852  */
6853 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6854 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6855     const xmlChar *name;
6856     int ret = -1;
6857     xmlElementContentPtr content  = NULL;
6858 
6859     if ((CUR != '<') || (NXT(1) != '!'))
6860         return(ret);
6861     SKIP(2);
6862 
6863     /* GROW; done in the caller */
6864     if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6865 	int inputid = ctxt->input->id;
6866 
6867 	SKIP(7);
6868 	if (SKIP_BLANKS_PE == 0) {
6869 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6870 		           "Space required after 'ELEMENT'\n");
6871 	    return(-1);
6872 	}
6873         name = xmlParseName(ctxt);
6874 	if (name == NULL) {
6875 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6876 			   "xmlParseElementDecl: no name for Element\n");
6877 	    return(-1);
6878 	}
6879 	if (SKIP_BLANKS_PE == 0) {
6880 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6881 			   "Space required after the element name\n");
6882 	}
6883 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6884 	    SKIP(5);
6885 	    /*
6886 	     * Element must always be empty.
6887 	     */
6888 	    ret = XML_ELEMENT_TYPE_EMPTY;
6889 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6890 	           (NXT(2) == 'Y')) {
6891 	    SKIP(3);
6892 	    /*
6893 	     * Element is a generic container.
6894 	     */
6895 	    ret = XML_ELEMENT_TYPE_ANY;
6896 	} else if (RAW == '(') {
6897 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6898 	} else {
6899 	    /*
6900 	     * [ WFC: PEs in Internal Subset ] error handling.
6901 	     */
6902             xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6903                   "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6904 	    return(-1);
6905 	}
6906 
6907 	SKIP_BLANKS_PE;
6908 
6909 	if (RAW != '>') {
6910 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6911 	    if (content != NULL) {
6912 		xmlFreeDocElementContent(ctxt->myDoc, content);
6913 	    }
6914 	} else {
6915 	    if (inputid != ctxt->input->id) {
6916 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6917                                "Element declaration doesn't start and stop in"
6918                                " the same entity\n");
6919 	    }
6920 
6921 	    NEXT;
6922 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6923 		(ctxt->sax->elementDecl != NULL)) {
6924 		if (content != NULL)
6925 		    content->parent = NULL;
6926 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6927 		                       content);
6928 		if ((content != NULL) && (content->parent == NULL)) {
6929 		    /*
6930 		     * this is a trick: if xmlAddElementDecl is called,
6931 		     * instead of copying the full tree it is plugged directly
6932 		     * if called from the parser. Avoid duplicating the
6933 		     * interfaces or change the API/ABI
6934 		     */
6935 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6936 		}
6937 	    } else if (content != NULL) {
6938 		xmlFreeDocElementContent(ctxt->myDoc, content);
6939 	    }
6940 	}
6941     }
6942     return(ret);
6943 }
6944 
6945 /**
6946  * xmlParseConditionalSections
6947  * @ctxt:  an XML parser context
6948  *
6949  * Parse a conditional section. Always consumes '<!['.
6950  *
6951  * [61] conditionalSect ::= includeSect | ignoreSect
6952  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6953  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6954  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6955  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6956  */
6957 
6958 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6959 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6960     int *inputIds = NULL;
6961     size_t inputIdsSize = 0;
6962     size_t depth = 0;
6963 
6964     while (PARSER_STOPPED(ctxt) == 0) {
6965         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6966             int id = ctxt->input->id;
6967 
6968             SKIP(3);
6969             SKIP_BLANKS_PE;
6970 
6971             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6972                 SKIP(7);
6973                 SKIP_BLANKS_PE;
6974                 if (RAW != '[') {
6975                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6976                     xmlHaltParser(ctxt);
6977                     goto error;
6978                 }
6979                 if (ctxt->input->id != id) {
6980                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6981                                    "All markup of the conditional section is"
6982                                    " not in the same entity\n");
6983                 }
6984                 NEXT;
6985 
6986                 if (inputIdsSize <= depth) {
6987                     int *tmp;
6988 
6989                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6990                     tmp = (int *) xmlRealloc(inputIds,
6991                             inputIdsSize * sizeof(int));
6992                     if (tmp == NULL) {
6993                         xmlErrMemory(ctxt);
6994                         goto error;
6995                     }
6996                     inputIds = tmp;
6997                 }
6998                 inputIds[depth] = id;
6999                 depth++;
7000             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7001                 size_t ignoreDepth = 0;
7002 
7003                 SKIP(6);
7004                 SKIP_BLANKS_PE;
7005                 if (RAW != '[') {
7006                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7007                     xmlHaltParser(ctxt);
7008                     goto error;
7009                 }
7010                 if (ctxt->input->id != id) {
7011                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7012                                    "All markup of the conditional section is"
7013                                    " not in the same entity\n");
7014                 }
7015                 NEXT;
7016 
7017                 while (PARSER_STOPPED(ctxt) == 0) {
7018                     if (RAW == 0) {
7019                         xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7020                         goto error;
7021                     }
7022                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7023                         SKIP(3);
7024                         ignoreDepth++;
7025                         /* Check for integer overflow */
7026                         if (ignoreDepth == 0) {
7027                             xmlErrMemory(ctxt);
7028                             goto error;
7029                         }
7030                     } else if ((RAW == ']') && (NXT(1) == ']') &&
7031                                (NXT(2) == '>')) {
7032                         SKIP(3);
7033                         if (ignoreDepth == 0)
7034                             break;
7035                         ignoreDepth--;
7036                     } else {
7037                         NEXT;
7038                     }
7039                 }
7040 
7041                 if (ctxt->input->id != id) {
7042                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7043                                    "All markup of the conditional section is"
7044                                    " not in the same entity\n");
7045                 }
7046             } else {
7047                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7048                 xmlHaltParser(ctxt);
7049                 goto error;
7050             }
7051         } else if ((depth > 0) &&
7052                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7053             depth--;
7054             if (ctxt->input->id != inputIds[depth]) {
7055                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7056                                "All markup of the conditional section is not"
7057                                " in the same entity\n");
7058             }
7059             SKIP(3);
7060         } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7061             xmlParseMarkupDecl(ctxt);
7062         } else {
7063             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7064             xmlHaltParser(ctxt);
7065             goto error;
7066         }
7067 
7068         if (depth == 0)
7069             break;
7070 
7071         SKIP_BLANKS_PE;
7072         SHRINK;
7073         GROW;
7074     }
7075 
7076 error:
7077     xmlFree(inputIds);
7078 }
7079 
7080 /**
7081  * xmlParseMarkupDecl:
7082  * @ctxt:  an XML parser context
7083  *
7084  * DEPRECATED: Internal function, don't use.
7085  *
7086  * Parse markup declarations. Always consumes '<!' or '<?'.
7087  *
7088  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7089  *                     NotationDecl | PI | Comment
7090  *
7091  * [ VC: Proper Declaration/PE Nesting ]
7092  * Parameter-entity replacement text must be properly nested with
7093  * markup declarations. That is to say, if either the first character
7094  * or the last character of a markup declaration (markupdecl above) is
7095  * contained in the replacement text for a parameter-entity reference,
7096  * both must be contained in the same replacement text.
7097  *
7098  * [ WFC: PEs in Internal Subset ]
7099  * In the internal DTD subset, parameter-entity references can occur
7100  * only where markup declarations can occur, not within markup declarations.
7101  * (This does not apply to references that occur in external parameter
7102  * entities or to the external subset.)
7103  */
7104 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)7105 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7106     GROW;
7107     if (CUR == '<') {
7108         if (NXT(1) == '!') {
7109 	    switch (NXT(2)) {
7110 	        case 'E':
7111 		    if (NXT(3) == 'L')
7112 			xmlParseElementDecl(ctxt);
7113 		    else if (NXT(3) == 'N')
7114 			xmlParseEntityDecl(ctxt);
7115                     else
7116                         SKIP(2);
7117 		    break;
7118 	        case 'A':
7119 		    xmlParseAttributeListDecl(ctxt);
7120 		    break;
7121 	        case 'N':
7122 		    xmlParseNotationDecl(ctxt);
7123 		    break;
7124 	        case '-':
7125 		    xmlParseComment(ctxt);
7126 		    break;
7127 		default:
7128 		    /* there is an error but it will be detected later */
7129                     SKIP(2);
7130 		    break;
7131 	    }
7132 	} else if (NXT(1) == '?') {
7133 	    xmlParsePI(ctxt);
7134 	}
7135     }
7136 }
7137 
7138 /**
7139  * xmlParseTextDecl:
7140  * @ctxt:  an XML parser context
7141  *
7142  * DEPRECATED: Internal function, don't use.
7143  *
7144  * parse an XML declaration header for external entities
7145  *
7146  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7147  */
7148 
7149 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7150 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7151     xmlChar *version;
7152 
7153     /*
7154      * We know that '<?xml' is here.
7155      */
7156     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7157 	SKIP(5);
7158     } else {
7159 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7160 	return;
7161     }
7162 
7163     if (SKIP_BLANKS == 0) {
7164 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7165 		       "Space needed after '<?xml'\n");
7166     }
7167 
7168     /*
7169      * We may have the VersionInfo here.
7170      */
7171     version = xmlParseVersionInfo(ctxt);
7172     if (version == NULL) {
7173 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
7174         if (version == NULL) {
7175             xmlErrMemory(ctxt);
7176             return;
7177         }
7178     } else {
7179 	if (SKIP_BLANKS == 0) {
7180 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7181 		           "Space needed here\n");
7182 	}
7183     }
7184     ctxt->input->version = version;
7185 
7186     /*
7187      * We must have the encoding declaration
7188      */
7189     xmlParseEncodingDecl(ctxt);
7190 
7191     SKIP_BLANKS;
7192     if ((RAW == '?') && (NXT(1) == '>')) {
7193         SKIP(2);
7194     } else if (RAW == '>') {
7195         /* Deprecated old WD ... */
7196 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7197 	NEXT;
7198     } else {
7199         int c;
7200 
7201 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7202         while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7203             NEXT;
7204             if (c == '>')
7205                 break;
7206         }
7207     }
7208 }
7209 
7210 /**
7211  * xmlParseExternalSubset:
7212  * @ctxt:  an XML parser context
7213  * @ExternalID: the external identifier
7214  * @SystemID: the system identifier (or URL)
7215  *
7216  * parse Markup declarations from an external subset
7217  *
7218  * [30] extSubset ::= textDecl? extSubsetDecl
7219  *
7220  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7221  */
7222 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7223 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7224                        const xmlChar *SystemID) {
7225     int oldInputNr;
7226 
7227     xmlCtxtInitializeLate(ctxt);
7228 
7229     xmlDetectEncoding(ctxt);
7230 
7231     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7232 	xmlParseTextDecl(ctxt);
7233     }
7234     if (ctxt->myDoc == NULL) {
7235         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7236 	if (ctxt->myDoc == NULL) {
7237 	    xmlErrMemory(ctxt);
7238 	    return;
7239 	}
7240 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7241     }
7242     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7243         (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7244         xmlErrMemory(ctxt);
7245     }
7246 
7247     ctxt->inSubset = 2;
7248     oldInputNr = ctxt->inputNr;
7249 
7250     SKIP_BLANKS_PE;
7251     while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7252            (!PARSER_STOPPED(ctxt))) {
7253 	GROW;
7254         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7255             xmlParseConditionalSections(ctxt);
7256         } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7257             xmlParseMarkupDecl(ctxt);
7258         } else {
7259             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7260             xmlHaltParser(ctxt);
7261             return;
7262         }
7263         SKIP_BLANKS_PE;
7264         SHRINK;
7265     }
7266 
7267     while (ctxt->inputNr > oldInputNr)
7268         xmlPopPE(ctxt);
7269 
7270     if (RAW != 0) {
7271 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7272     }
7273 }
7274 
7275 /**
7276  * xmlParseReference:
7277  * @ctxt:  an XML parser context
7278  *
7279  * DEPRECATED: Internal function, don't use.
7280  *
7281  * parse and handle entity references in content, depending on the SAX
7282  * interface, this may end-up in a call to character() if this is a
7283  * CharRef, a predefined entity, if there is no reference() callback.
7284  * or if the parser was asked to switch to that mode.
7285  *
7286  * Always consumes '&'.
7287  *
7288  * [67] Reference ::= EntityRef | CharRef
7289  */
7290 void
xmlParseReference(xmlParserCtxtPtr ctxt)7291 xmlParseReference(xmlParserCtxtPtr ctxt) {
7292     xmlEntityPtr ent = NULL;
7293     const xmlChar *name;
7294     xmlChar *val;
7295 
7296     if (RAW != '&')
7297         return;
7298 
7299     /*
7300      * Simple case of a CharRef
7301      */
7302     if (NXT(1) == '#') {
7303 	int i = 0;
7304 	xmlChar out[16];
7305 	int value = xmlParseCharRef(ctxt);
7306 
7307 	if (value == 0)
7308 	    return;
7309 
7310         /*
7311          * Just encode the value in UTF-8
7312          */
7313         COPY_BUF(out, i, value);
7314         out[i] = 0;
7315         if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7316             (!ctxt->disableSAX))
7317             ctxt->sax->characters(ctxt->userData, out, i);
7318 	return;
7319     }
7320 
7321     /*
7322      * We are seeing an entity reference
7323      */
7324     name = xmlParseEntityRefInternal(ctxt);
7325     if (name == NULL)
7326         return;
7327     ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7328     if (ent == NULL) {
7329         /*
7330          * Create a reference for undeclared entities.
7331          */
7332         if ((ctxt->replaceEntities == 0) &&
7333             (ctxt->sax != NULL) &&
7334             (ctxt->disableSAX == 0) &&
7335             (ctxt->sax->reference != NULL)) {
7336             ctxt->sax->reference(ctxt->userData, name);
7337         }
7338         return;
7339     }
7340     if (!ctxt->wellFormed)
7341 	return;
7342 
7343     /* special case of predefined entities */
7344     if ((ent->name == NULL) ||
7345         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7346 	val = ent->content;
7347 	if (val == NULL) return;
7348 	/*
7349 	 * inline the entity.
7350 	 */
7351 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7352 	    (!ctxt->disableSAX))
7353 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7354 	return;
7355     }
7356 
7357     /*
7358      * The first reference to the entity trigger a parsing phase
7359      * where the ent->children is filled with the result from
7360      * the parsing.
7361      * Note: external parsed entities will not be loaded, it is not
7362      * required for a non-validating parser, unless the parsing option
7363      * of validating, or substituting entities were given. Doing so is
7364      * far more secure as the parser will only process data coming from
7365      * the document entity by default.
7366      *
7367      * FIXME: This doesn't work correctly since entities can be
7368      * expanded with different namespace declarations in scope.
7369      * For example:
7370      *
7371      * <!DOCTYPE doc [
7372      *   <!ENTITY ent "<ns:elem/>">
7373      * ]>
7374      * <doc>
7375      *   <decl1 xmlns:ns="urn:ns1">
7376      *     &ent;
7377      *   </decl1>
7378      *   <decl2 xmlns:ns="urn:ns2">
7379      *     &ent;
7380      *   </decl2>
7381      * </doc>
7382      *
7383      * Proposed fix:
7384      *
7385      * - Ignore current namespace declarations when parsing the
7386      *   entity. If a prefix can't be resolved, don't report an error
7387      *   but mark it as unresolved.
7388      * - Try to resolve these prefixes when expanding the entity.
7389      *   This will require a specialized version of xmlStaticCopyNode
7390      *   which can also make use of the namespace hash table to avoid
7391      *   quadratic behavior.
7392      *
7393      * Alternatively, we could simply reparse the entity on each
7394      * expansion like we already do with custom SAX callbacks.
7395      * External entity content should be cached in this case.
7396      */
7397     if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7398         (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7399          ((ctxt->replaceEntities) ||
7400           (ctxt->validate)))) {
7401         if ((ent->flags & XML_ENT_PARSED) == 0) {
7402             xmlCtxtParseEntity(ctxt, ent);
7403         } else if (ent->children == NULL) {
7404             /*
7405              * Probably running in SAX mode and the callbacks don't
7406              * build the entity content. Parse the entity again.
7407              *
7408              * This will also be triggered in normal tree builder mode
7409              * if an entity happens to be empty, causing unnecessary
7410              * reloads. It's hard to come up with a reliable check in
7411              * which mode we're running.
7412              */
7413             xmlCtxtParseEntity(ctxt, ent);
7414         }
7415     }
7416 
7417     /*
7418      * We also check for amplification if entities aren't substituted.
7419      * They might be expanded later.
7420      */
7421     if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7422         return;
7423 
7424     if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7425         return;
7426 
7427     if (ctxt->replaceEntities == 0) {
7428 	/*
7429 	 * Create a reference
7430 	 */
7431         if (ctxt->sax->reference != NULL)
7432 	    ctxt->sax->reference(ctxt->userData, ent->name);
7433     } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7434         xmlNodePtr copy, cur;
7435 
7436         /*
7437          * Seems we are generating the DOM content, copy the tree
7438 	 */
7439         cur = ent->children;
7440 
7441         /*
7442          * Handle first text node with SAX to coalesce text efficiently
7443          */
7444         if ((cur->type == XML_TEXT_NODE) ||
7445             (cur->type == XML_CDATA_SECTION_NODE)) {
7446             int len = xmlStrlen(cur->content);
7447 
7448             if ((cur->type == XML_TEXT_NODE) ||
7449                 (ctxt->sax->cdataBlock == NULL)) {
7450                 if (ctxt->sax->characters != NULL)
7451                     ctxt->sax->characters(ctxt, cur->content, len);
7452             } else {
7453                 if (ctxt->sax->cdataBlock != NULL)
7454                     ctxt->sax->cdataBlock(ctxt, cur->content, len);
7455             }
7456 
7457             cur = cur->next;
7458         }
7459 
7460         while (cur != NULL) {
7461             xmlNodePtr last;
7462 
7463             /*
7464              * Handle last text node with SAX to coalesce text efficiently
7465              */
7466             if ((cur->next == NULL) &&
7467                 ((cur->type == XML_TEXT_NODE) ||
7468                  (cur->type == XML_CDATA_SECTION_NODE))) {
7469                 int len = xmlStrlen(cur->content);
7470 
7471                 if ((cur->type == XML_TEXT_NODE) ||
7472                     (ctxt->sax->cdataBlock == NULL)) {
7473                     if (ctxt->sax->characters != NULL)
7474                         ctxt->sax->characters(ctxt, cur->content, len);
7475                 } else {
7476                     if (ctxt->sax->cdataBlock != NULL)
7477                         ctxt->sax->cdataBlock(ctxt, cur->content, len);
7478                 }
7479 
7480                 break;
7481             }
7482 
7483             /*
7484              * Reset coalesce buffer stats only for non-text nodes.
7485              */
7486             ctxt->nodemem = 0;
7487             ctxt->nodelen = 0;
7488 
7489             copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7490 
7491             if (copy == NULL) {
7492                 xmlErrMemory(ctxt);
7493                 break;
7494             }
7495 
7496             if (ctxt->parseMode == XML_PARSE_READER) {
7497                 /* Needed for reader */
7498                 copy->extra = cur->extra;
7499                 /* Maybe needed for reader */
7500                 copy->_private = cur->_private;
7501             }
7502 
7503             copy->parent = ctxt->node;
7504             last = ctxt->node->last;
7505             if (last == NULL) {
7506                 ctxt->node->children = copy;
7507             } else {
7508                 last->next = copy;
7509                 copy->prev = last;
7510             }
7511             ctxt->node->last = copy;
7512 
7513             cur = cur->next;
7514         }
7515     }
7516 }
7517 
7518 static void
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt,const xmlChar * name)7519 xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7520     /*
7521      * [ WFC: Entity Declared ]
7522      * In a document without any DTD, a document with only an
7523      * internal DTD subset which contains no parameter entity
7524      * references, or a document with "standalone='yes'", the
7525      * Name given in the entity reference must match that in an
7526      * entity declaration, except that well-formed documents
7527      * need not declare any of the following entities: amp, lt,
7528      * gt, apos, quot.
7529      * The declaration of a parameter entity must precede any
7530      * reference to it.
7531      * Similarly, the declaration of a general entity must
7532      * precede any reference to it which appears in a default
7533      * value in an attribute-list declaration. Note that if
7534      * entities are declared in the external subset or in
7535      * external parameter entities, a non-validating processor
7536      * is not obligated to read and process their declarations;
7537      * for such documents, the rule that an entity must be
7538      * declared is a well-formedness constraint only if
7539      * standalone='yes'.
7540      */
7541     if ((ctxt->standalone == 1) ||
7542         ((ctxt->hasExternalSubset == 0) &&
7543          (ctxt->hasPErefs == 0))) {
7544         xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7545                           "Entity '%s' not defined\n", name);
7546     } else if (ctxt->validate) {
7547         /*
7548          * [ VC: Entity Declared ]
7549          * In a document with an external subset or external
7550          * parameter entities with "standalone='no'", ...
7551          * ... The declaration of a parameter entity must
7552          * precede any reference to it...
7553          */
7554         xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7555                          "Entity '%s' not defined\n", name, NULL);
7556     } else if ((ctxt->loadsubset) ||
7557                ((ctxt->replaceEntities) &&
7558                 ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7559         /*
7560          * Also raise a non-fatal error
7561          *
7562          * - if the external subset is loaded and all entity declarations
7563          *   should be available, or
7564          * - entity substition was requested without restricting
7565          *   external entity access.
7566          */
7567         xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7568                      "Entity '%s' not defined\n", name);
7569     } else {
7570         xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7571                       "Entity '%s' not defined\n", name, NULL);
7572     }
7573 
7574     ctxt->valid = 0;
7575 }
7576 
7577 static xmlEntityPtr
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt,const xmlChar * name,int inAttr)7578 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7579     xmlEntityPtr ent;
7580 
7581     /*
7582      * Predefined entities override any extra definition
7583      */
7584     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7585         ent = xmlGetPredefinedEntity(name);
7586         if (ent != NULL)
7587             return(ent);
7588     }
7589 
7590     /*
7591      * Ask first SAX for entity resolution, otherwise try the
7592      * entities which may have stored in the parser context.
7593      */
7594     if (ctxt->sax != NULL) {
7595 	if (ctxt->sax->getEntity != NULL)
7596 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7597 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7598 	    (ctxt->options & XML_PARSE_OLDSAX))
7599 	    ent = xmlGetPredefinedEntity(name);
7600 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7601 	    (ctxt->userData==ctxt)) {
7602 	    ent = xmlSAX2GetEntity(ctxt, name);
7603 	}
7604     }
7605 
7606     if (ent == NULL) {
7607         xmlHandleUndeclaredEntity(ctxt, name);
7608     }
7609 
7610     /*
7611      * [ WFC: Parsed Entity ]
7612      * An entity reference must not contain the name of an
7613      * unparsed entity
7614      */
7615     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7616 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7617 		 "Entity reference to unparsed entity %s\n", name);
7618         ent = NULL;
7619     }
7620 
7621     /*
7622      * [ WFC: No External Entity References ]
7623      * Attribute values cannot contain direct or indirect
7624      * entity references to external entities.
7625      */
7626     else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7627         if (inAttr) {
7628             xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7629                  "Attribute references external entity '%s'\n", name);
7630             ent = NULL;
7631         }
7632     }
7633 
7634     return(ent);
7635 }
7636 
7637 /**
7638  * xmlParseEntityRefInternal:
7639  * @ctxt:  an XML parser context
7640  * @inAttr:  whether we are in an attribute value
7641  *
7642  * Parse an entity reference. Always consumes '&'.
7643  *
7644  * [68] EntityRef ::= '&' Name ';'
7645  *
7646  * Returns the name, or NULL in case of error.
7647  */
7648 static const xmlChar *
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt)7649 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7650     const xmlChar *name;
7651 
7652     GROW;
7653 
7654     if (RAW != '&')
7655         return(NULL);
7656     NEXT;
7657     name = xmlParseName(ctxt);
7658     if (name == NULL) {
7659 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7660 		       "xmlParseEntityRef: no name\n");
7661         return(NULL);
7662     }
7663     if (RAW != ';') {
7664 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7665 	return(NULL);
7666     }
7667     NEXT;
7668 
7669     return(name);
7670 }
7671 
7672 /**
7673  * xmlParseEntityRef:
7674  * @ctxt:  an XML parser context
7675  *
7676  * DEPRECATED: Internal function, don't use.
7677  *
7678  * Returns the xmlEntityPtr if found, or NULL otherwise.
7679  */
7680 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7681 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7682     const xmlChar *name;
7683 
7684     if (ctxt == NULL)
7685         return(NULL);
7686 
7687     name = xmlParseEntityRefInternal(ctxt);
7688     if (name == NULL)
7689         return(NULL);
7690 
7691     return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7692 }
7693 
7694 /**
7695  * xmlParseStringEntityRef:
7696  * @ctxt:  an XML parser context
7697  * @str:  a pointer to an index in the string
7698  *
7699  * parse ENTITY references declarations, but this version parses it from
7700  * a string value.
7701  *
7702  * [68] EntityRef ::= '&' Name ';'
7703  *
7704  * [ WFC: Entity Declared ]
7705  * In a document without any DTD, a document with only an internal DTD
7706  * subset which contains no parameter entity references, or a document
7707  * with "standalone='yes'", the Name given in the entity reference
7708  * must match that in an entity declaration, except that well-formed
7709  * documents need not declare any of the following entities: amp, lt,
7710  * gt, apos, quot.  The declaration of a parameter entity must precede
7711  * any reference to it.  Similarly, the declaration of a general entity
7712  * must precede any reference to it which appears in a default value in an
7713  * attribute-list declaration. Note that if entities are declared in the
7714  * external subset or in external parameter entities, a non-validating
7715  * processor is not obligated to read and process their declarations;
7716  * for such documents, the rule that an entity must be declared is a
7717  * well-formedness constraint only if standalone='yes'.
7718  *
7719  * [ WFC: Parsed Entity ]
7720  * An entity reference must not contain the name of an unparsed entity
7721  *
7722  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7723  * is updated to the current location in the string.
7724  */
7725 static xmlChar *
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7726 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7727     xmlChar *name;
7728     const xmlChar *ptr;
7729     xmlChar cur;
7730 
7731     if ((str == NULL) || (*str == NULL))
7732         return(NULL);
7733     ptr = *str;
7734     cur = *ptr;
7735     if (cur != '&')
7736 	return(NULL);
7737 
7738     ptr++;
7739     name = xmlParseStringName(ctxt, &ptr);
7740     if (name == NULL) {
7741 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7742 		       "xmlParseStringEntityRef: no name\n");
7743 	*str = ptr;
7744 	return(NULL);
7745     }
7746     if (*ptr != ';') {
7747 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7748         xmlFree(name);
7749 	*str = ptr;
7750 	return(NULL);
7751     }
7752     ptr++;
7753 
7754     *str = ptr;
7755     return(name);
7756 }
7757 
7758 /**
7759  * xmlParsePEReference:
7760  * @ctxt:  an XML parser context
7761  *
7762  * DEPRECATED: Internal function, don't use.
7763  *
7764  * Parse a parameter entity reference. Always consumes '%'.
7765  *
7766  * The entity content is handled directly by pushing it's content as
7767  * a new input stream.
7768  *
7769  * [69] PEReference ::= '%' Name ';'
7770  *
7771  * [ WFC: No Recursion ]
7772  * A parsed entity must not contain a recursive
7773  * reference to itself, either directly or indirectly.
7774  *
7775  * [ WFC: Entity Declared ]
7776  * In a document without any DTD, a document with only an internal DTD
7777  * subset which contains no parameter entity references, or a document
7778  * with "standalone='yes'", ...  ... The declaration of a parameter
7779  * entity must precede any reference to it...
7780  *
7781  * [ VC: Entity Declared ]
7782  * In a document with an external subset or external parameter entities
7783  * with "standalone='no'", ...  ... The declaration of a parameter entity
7784  * must precede any reference to it...
7785  *
7786  * [ WFC: In DTD ]
7787  * Parameter-entity references may only appear in the DTD.
7788  * NOTE: misleading but this is handled.
7789  */
7790 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7791 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7792 {
7793     const xmlChar *name;
7794     xmlEntityPtr entity = NULL;
7795     xmlParserInputPtr input;
7796 
7797     if (RAW != '%')
7798         return;
7799     NEXT;
7800     name = xmlParseName(ctxt);
7801     if (name == NULL) {
7802 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7803 	return;
7804     }
7805     if (RAW != ';') {
7806 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7807         return;
7808     }
7809 
7810     NEXT;
7811 
7812     /* Must be set before xmlHandleUndeclaredEntity */
7813     ctxt->hasPErefs = 1;
7814 
7815     /*
7816      * Request the entity from SAX
7817      */
7818     if ((ctxt->sax != NULL) &&
7819 	(ctxt->sax->getParameterEntity != NULL))
7820 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7821 
7822     if (entity == NULL) {
7823         xmlHandleUndeclaredEntity(ctxt, name);
7824     } else {
7825 	/*
7826 	 * Internal checking in case the entity quest barfed
7827 	 */
7828 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7829 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7830 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7831 		  "Internal: %%%s; is not a parameter entity\n",
7832 			  name, NULL);
7833 	} else {
7834 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7835                 ((ctxt->options & XML_PARSE_NO_XXE) ||
7836 		 ((ctxt->loadsubset == 0) &&
7837 		  (ctxt->replaceEntities == 0) &&
7838 		  (ctxt->validate == 0))))
7839 		return;
7840 
7841             if (entity->flags & XML_ENT_EXPANDING) {
7842                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7843                 xmlHaltParser(ctxt);
7844                 return;
7845             }
7846 
7847             if (ctxt->input_id >= INT_MAX) {
7848                 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
7849                             "Input ID overflow\n");
7850                 return;
7851             }
7852 
7853 	    input = xmlNewEntityInputStream(ctxt, entity);
7854 	    if (xmlPushInput(ctxt, input) < 0) {
7855                 xmlFreeInputStream(input);
7856 		return;
7857             }
7858 
7859             input->id = ++ctxt->input_id;
7860 
7861             entity->flags |= XML_ENT_EXPANDING;
7862 
7863 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7864                 xmlDetectEncoding(ctxt);
7865 
7866                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7867                     (IS_BLANK_CH(NXT(5)))) {
7868                     xmlParseTextDecl(ctxt);
7869                 }
7870             }
7871 	}
7872     }
7873 }
7874 
7875 /**
7876  * xmlLoadEntityContent:
7877  * @ctxt:  an XML parser context
7878  * @entity: an unloaded system entity
7879  *
7880  * Load the content of an entity.
7881  *
7882  * Returns 0 in case of success and -1 in case of failure
7883  */
7884 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7885 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7886     xmlParserInputPtr oldinput, input = NULL;
7887     xmlParserInputPtr *oldinputTab;
7888     const xmlChar *oldencoding;
7889     xmlChar *content = NULL;
7890     xmlResourceType rtype;
7891     size_t length, i;
7892     int oldinputNr, oldinputMax;
7893     int ret = -1;
7894     int res;
7895 
7896     if ((ctxt == NULL) || (entity == NULL) ||
7897         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7898 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7899 	(entity->content != NULL)) {
7900 	xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7901 	            "xmlLoadEntityContent parameter error");
7902         return(-1);
7903     }
7904 
7905     if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7906         rtype = XML_RESOURCE_PARAMETER_ENTITY;
7907     else
7908         rtype = XML_RESOURCE_GENERAL_ENTITY;
7909 
7910     input = xmlLoadResource(ctxt, (char *) entity->URI,
7911                             (char *) entity->ExternalID, rtype);
7912     if (input == NULL)
7913         return(-1);
7914 
7915     oldinput = ctxt->input;
7916     oldinputNr = ctxt->inputNr;
7917     oldinputMax = ctxt->inputMax;
7918     oldinputTab = ctxt->inputTab;
7919     oldencoding = ctxt->encoding;
7920 
7921     ctxt->input = NULL;
7922     ctxt->inputNr = 0;
7923     ctxt->inputMax = 1;
7924     ctxt->encoding = NULL;
7925     ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7926     if (ctxt->inputTab == NULL) {
7927         xmlErrMemory(ctxt);
7928         xmlFreeInputStream(input);
7929         goto error;
7930     }
7931 
7932     xmlBufResetInput(input->buf->buffer, input);
7933 
7934     inputPush(ctxt, input);
7935 
7936     xmlDetectEncoding(ctxt);
7937 
7938     /*
7939      * Parse a possible text declaration first
7940      */
7941     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7942 	xmlParseTextDecl(ctxt);
7943         /*
7944          * An XML-1.0 document can't reference an entity not XML-1.0
7945          */
7946         if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7947             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7948             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7949                            "Version mismatch between document and entity\n");
7950         }
7951     }
7952 
7953     length = input->cur - input->base;
7954     xmlBufShrink(input->buf->buffer, length);
7955     xmlSaturatedAdd(&ctxt->sizeentities, length);
7956 
7957     while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7958         ;
7959 
7960     xmlBufResetInput(input->buf->buffer, input);
7961 
7962     if (res < 0) {
7963         xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7964         goto error;
7965     }
7966 
7967     length = xmlBufUse(input->buf->buffer);
7968     content = xmlBufDetach(input->buf->buffer);
7969 
7970     if (length > INT_MAX) {
7971         xmlErrMemory(ctxt);
7972         goto error;
7973     }
7974 
7975     for (i = 0; i < length; ) {
7976         int clen = length - i;
7977         int c = xmlGetUTF8Char(content + i, &clen);
7978 
7979         if ((c < 0) || (!IS_CHAR(c))) {
7980             xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7981                               "xmlLoadEntityContent: invalid char value %d\n",
7982                               content[i]);
7983             goto error;
7984         }
7985         i += clen;
7986     }
7987 
7988     xmlSaturatedAdd(&ctxt->sizeentities, length);
7989     entity->content = content;
7990     entity->length = length;
7991     content = NULL;
7992     ret = 0;
7993 
7994 error:
7995     while (ctxt->inputNr > 0)
7996         xmlFreeInputStream(inputPop(ctxt));
7997     xmlFree(ctxt->inputTab);
7998     xmlFree((xmlChar *) ctxt->encoding);
7999 
8000     ctxt->input = oldinput;
8001     ctxt->inputNr = oldinputNr;
8002     ctxt->inputMax = oldinputMax;
8003     ctxt->inputTab = oldinputTab;
8004     ctxt->encoding = oldencoding;
8005 
8006     xmlFree(content);
8007 
8008     return(ret);
8009 }
8010 
8011 /**
8012  * xmlParseStringPEReference:
8013  * @ctxt:  an XML parser context
8014  * @str:  a pointer to an index in the string
8015  *
8016  * parse PEReference declarations
8017  *
8018  * [69] PEReference ::= '%' Name ';'
8019  *
8020  * [ WFC: No Recursion ]
8021  * A parsed entity must not contain a recursive
8022  * reference to itself, either directly or indirectly.
8023  *
8024  * [ WFC: Entity Declared ]
8025  * In a document without any DTD, a document with only an internal DTD
8026  * subset which contains no parameter entity references, or a document
8027  * with "standalone='yes'", ...  ... The declaration of a parameter
8028  * entity must precede any reference to it...
8029  *
8030  * [ VC: Entity Declared ]
8031  * In a document with an external subset or external parameter entities
8032  * with "standalone='no'", ...  ... The declaration of a parameter entity
8033  * must precede any reference to it...
8034  *
8035  * [ WFC: In DTD ]
8036  * Parameter-entity references may only appear in the DTD.
8037  * NOTE: misleading but this is handled.
8038  *
8039  * Returns the string of the entity content.
8040  *         str is updated to the current value of the index
8041  */
8042 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8043 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8044     const xmlChar *ptr;
8045     xmlChar cur;
8046     xmlChar *name;
8047     xmlEntityPtr entity = NULL;
8048 
8049     if ((str == NULL) || (*str == NULL)) return(NULL);
8050     ptr = *str;
8051     cur = *ptr;
8052     if (cur != '%')
8053         return(NULL);
8054     ptr++;
8055     name = xmlParseStringName(ctxt, &ptr);
8056     if (name == NULL) {
8057 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8058 		       "xmlParseStringPEReference: no name\n");
8059 	*str = ptr;
8060 	return(NULL);
8061     }
8062     cur = *ptr;
8063     if (cur != ';') {
8064 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8065 	xmlFree(name);
8066 	*str = ptr;
8067 	return(NULL);
8068     }
8069     ptr++;
8070 
8071     /* Must be set before xmlHandleUndeclaredEntity */
8072     ctxt->hasPErefs = 1;
8073 
8074     /*
8075      * Request the entity from SAX
8076      */
8077     if ((ctxt->sax != NULL) &&
8078 	(ctxt->sax->getParameterEntity != NULL))
8079 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8080 
8081     if (entity == NULL) {
8082         xmlHandleUndeclaredEntity(ctxt, name);
8083     } else {
8084 	/*
8085 	 * Internal checking in case the entity quest barfed
8086 	 */
8087 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8088 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8089 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8090 			  "%%%s; is not a parameter entity\n",
8091 			  name, NULL);
8092 	}
8093     }
8094 
8095     xmlFree(name);
8096     *str = ptr;
8097     return(entity);
8098 }
8099 
8100 /**
8101  * xmlParseDocTypeDecl:
8102  * @ctxt:  an XML parser context
8103  *
8104  * DEPRECATED: Internal function, don't use.
8105  *
8106  * parse a DOCTYPE declaration
8107  *
8108  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8109  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8110  *
8111  * [ VC: Root Element Type ]
8112  * The Name in the document type declaration must match the element
8113  * type of the root element.
8114  */
8115 
8116 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8117 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8118     const xmlChar *name = NULL;
8119     xmlChar *ExternalID = NULL;
8120     xmlChar *URI = NULL;
8121 
8122     /*
8123      * We know that '<!DOCTYPE' has been detected.
8124      */
8125     SKIP(9);
8126 
8127     SKIP_BLANKS;
8128 
8129     /*
8130      * Parse the DOCTYPE name.
8131      */
8132     name = xmlParseName(ctxt);
8133     if (name == NULL) {
8134 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8135 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8136     }
8137     ctxt->intSubName = name;
8138 
8139     SKIP_BLANKS;
8140 
8141     /*
8142      * Check for SystemID and ExternalID
8143      */
8144     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8145 
8146     if ((URI != NULL) || (ExternalID != NULL)) {
8147         ctxt->hasExternalSubset = 1;
8148     }
8149     ctxt->extSubURI = URI;
8150     ctxt->extSubSystem = ExternalID;
8151 
8152     SKIP_BLANKS;
8153 
8154     /*
8155      * Create and update the internal subset.
8156      */
8157     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8158 	(!ctxt->disableSAX))
8159 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8160 
8161     /*
8162      * Is there any internal subset declarations ?
8163      * they are handled separately in xmlParseInternalSubset()
8164      */
8165     if (RAW == '[')
8166 	return;
8167 
8168     /*
8169      * We should be at the end of the DOCTYPE declaration.
8170      */
8171     if (RAW != '>') {
8172 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8173     }
8174     NEXT;
8175 }
8176 
8177 /**
8178  * xmlParseInternalSubset:
8179  * @ctxt:  an XML parser context
8180  *
8181  * parse the internal subset declaration
8182  *
8183  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8184  */
8185 
8186 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8187 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8188     /*
8189      * Is there any DTD definition ?
8190      */
8191     if (RAW == '[') {
8192         int oldInputNr = ctxt->inputNr;
8193 
8194         NEXT;
8195 	/*
8196 	 * Parse the succession of Markup declarations and
8197 	 * PEReferences.
8198 	 * Subsequence (markupdecl | PEReference | S)*
8199 	 */
8200 	SKIP_BLANKS;
8201 	while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8202                (PARSER_STOPPED(ctxt) == 0)) {
8203 
8204             /*
8205              * Conditional sections are allowed from external entities included
8206              * by PE References in the internal subset.
8207              */
8208             if ((PARSER_EXTERNAL(ctxt)) &&
8209                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8210                 xmlParseConditionalSections(ctxt);
8211             } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8212 	        xmlParseMarkupDecl(ctxt);
8213             } else if (RAW == '%') {
8214 	        xmlParsePEReference(ctxt);
8215             } else {
8216 		xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8217                 break;
8218             }
8219 	    SKIP_BLANKS_PE;
8220             SHRINK;
8221             GROW;
8222 	}
8223 
8224         while (ctxt->inputNr > oldInputNr)
8225             xmlPopPE(ctxt);
8226 
8227 	if (RAW == ']') {
8228 	    NEXT;
8229 	    SKIP_BLANKS;
8230 	}
8231     }
8232 
8233     /*
8234      * We should be at the end of the DOCTYPE declaration.
8235      */
8236     if ((ctxt->wellFormed) && (RAW != '>')) {
8237 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8238 	return;
8239     }
8240     NEXT;
8241 }
8242 
8243 #ifdef LIBXML_SAX1_ENABLED
8244 /**
8245  * xmlParseAttribute:
8246  * @ctxt:  an XML parser context
8247  * @value:  a xmlChar ** used to store the value of the attribute
8248  *
8249  * DEPRECATED: Internal function, don't use.
8250  *
8251  * parse an attribute
8252  *
8253  * [41] Attribute ::= Name Eq AttValue
8254  *
8255  * [ WFC: No External Entity References ]
8256  * Attribute values cannot contain direct or indirect entity references
8257  * to external entities.
8258  *
8259  * [ WFC: No < in Attribute Values ]
8260  * The replacement text of any entity referred to directly or indirectly in
8261  * an attribute value (other than "&lt;") must not contain a <.
8262  *
8263  * [ VC: Attribute Value Type ]
8264  * The attribute must have been declared; the value must be of the type
8265  * declared for it.
8266  *
8267  * [25] Eq ::= S? '=' S?
8268  *
8269  * With namespace:
8270  *
8271  * [NS 11] Attribute ::= QName Eq AttValue
8272  *
8273  * Also the case QName == xmlns:??? is handled independently as a namespace
8274  * definition.
8275  *
8276  * Returns the attribute name, and the value in *value.
8277  */
8278 
8279 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8280 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8281     const xmlChar *name;
8282     xmlChar *val;
8283 
8284     *value = NULL;
8285     GROW;
8286     name = xmlParseName(ctxt);
8287     if (name == NULL) {
8288 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8289 	               "error parsing attribute name\n");
8290         return(NULL);
8291     }
8292 
8293     /*
8294      * read the value
8295      */
8296     SKIP_BLANKS;
8297     if (RAW == '=') {
8298         NEXT;
8299 	SKIP_BLANKS;
8300 	val = xmlParseAttValue(ctxt);
8301     } else {
8302 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8303 	       "Specification mandates value for attribute %s\n", name);
8304 	return(name);
8305     }
8306 
8307     /*
8308      * Check that xml:lang conforms to the specification
8309      * No more registered as an error, just generate a warning now
8310      * since this was deprecated in XML second edition
8311      */
8312     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8313 	if (!xmlCheckLanguageID(val)) {
8314 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8315 		          "Malformed value for xml:lang : %s\n",
8316 			  val, NULL);
8317 	}
8318     }
8319 
8320     /*
8321      * Check that xml:space conforms to the specification
8322      */
8323     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8324 	if (xmlStrEqual(val, BAD_CAST "default"))
8325 	    *(ctxt->space) = 0;
8326 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8327 	    *(ctxt->space) = 1;
8328 	else {
8329 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8330 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8331                                  val, NULL);
8332 	}
8333     }
8334 
8335     *value = val;
8336     return(name);
8337 }
8338 
8339 /**
8340  * xmlParseStartTag:
8341  * @ctxt:  an XML parser context
8342  *
8343  * DEPRECATED: Internal function, don't use.
8344  *
8345  * Parse a start tag. Always consumes '<'.
8346  *
8347  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8348  *
8349  * [ WFC: Unique Att Spec ]
8350  * No attribute name may appear more than once in the same start-tag or
8351  * empty-element tag.
8352  *
8353  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8354  *
8355  * [ WFC: Unique Att Spec ]
8356  * No attribute name may appear more than once in the same start-tag or
8357  * empty-element tag.
8358  *
8359  * With namespace:
8360  *
8361  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8362  *
8363  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8364  *
8365  * Returns the element name parsed
8366  */
8367 
8368 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8369 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8370     const xmlChar *name;
8371     const xmlChar *attname;
8372     xmlChar *attvalue;
8373     const xmlChar **atts = ctxt->atts;
8374     int nbatts = 0;
8375     int maxatts = ctxt->maxatts;
8376     int i;
8377 
8378     if (RAW != '<') return(NULL);
8379     NEXT1;
8380 
8381     name = xmlParseName(ctxt);
8382     if (name == NULL) {
8383 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8384 	     "xmlParseStartTag: invalid element name\n");
8385         return(NULL);
8386     }
8387 
8388     /*
8389      * Now parse the attributes, it ends up with the ending
8390      *
8391      * (S Attribute)* S?
8392      */
8393     SKIP_BLANKS;
8394     GROW;
8395 
8396     while (((RAW != '>') &&
8397 	   ((RAW != '/') || (NXT(1) != '>')) &&
8398 	   (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8399 	attname = xmlParseAttribute(ctxt, &attvalue);
8400         if (attname == NULL)
8401 	    break;
8402         if (attvalue != NULL) {
8403 	    /*
8404 	     * [ WFC: Unique Att Spec ]
8405 	     * No attribute name may appear more than once in the same
8406 	     * start-tag or empty-element tag.
8407 	     */
8408 	    for (i = 0; i < nbatts;i += 2) {
8409 	        if (xmlStrEqual(atts[i], attname)) {
8410 		    xmlErrAttributeDup(ctxt, NULL, attname);
8411 		    xmlFree(attvalue);
8412 		    goto failed;
8413 		}
8414 	    }
8415 	    /*
8416 	     * Add the pair to atts
8417 	     */
8418 	    if (atts == NULL) {
8419 	        maxatts = 22; /* allow for 10 attrs by default */
8420 	        atts = (const xmlChar **)
8421 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8422 		if (atts == NULL) {
8423 		    xmlErrMemory(ctxt);
8424 		    if (attvalue != NULL)
8425 			xmlFree(attvalue);
8426 		    goto failed;
8427 		}
8428 		ctxt->atts = atts;
8429 		ctxt->maxatts = maxatts;
8430 	    } else if (nbatts + 4 > maxatts) {
8431 	        const xmlChar **n;
8432 
8433 	        maxatts *= 2;
8434 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8435 					     maxatts * sizeof(const xmlChar *));
8436 		if (n == NULL) {
8437 		    xmlErrMemory(ctxt);
8438 		    if (attvalue != NULL)
8439 			xmlFree(attvalue);
8440 		    goto failed;
8441 		}
8442 		atts = n;
8443 		ctxt->atts = atts;
8444 		ctxt->maxatts = maxatts;
8445 	    }
8446 	    atts[nbatts++] = attname;
8447 	    atts[nbatts++] = attvalue;
8448 	    atts[nbatts] = NULL;
8449 	    atts[nbatts + 1] = NULL;
8450 	} else {
8451 	    if (attvalue != NULL)
8452 		xmlFree(attvalue);
8453 	}
8454 
8455 failed:
8456 
8457 	GROW
8458 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8459 	    break;
8460 	if (SKIP_BLANKS == 0) {
8461 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8462 			   "attributes construct error\n");
8463 	}
8464 	SHRINK;
8465         GROW;
8466     }
8467 
8468     /*
8469      * SAX: Start of Element !
8470      */
8471     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8472 	(!ctxt->disableSAX)) {
8473 	if (nbatts > 0)
8474 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8475 	else
8476 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8477     }
8478 
8479     if (atts != NULL) {
8480         /* Free only the content strings */
8481         for (i = 1;i < nbatts;i+=2)
8482 	    if (atts[i] != NULL)
8483 	       xmlFree((xmlChar *) atts[i]);
8484     }
8485     return(name);
8486 }
8487 
8488 /**
8489  * xmlParseEndTag1:
8490  * @ctxt:  an XML parser context
8491  * @line:  line of the start tag
8492  * @nsNr:  number of namespaces on the start tag
8493  *
8494  * Parse an end tag. Always consumes '</'.
8495  *
8496  * [42] ETag ::= '</' Name S? '>'
8497  *
8498  * With namespace
8499  *
8500  * [NS 9] ETag ::= '</' QName S? '>'
8501  */
8502 
8503 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8504 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8505     const xmlChar *name;
8506 
8507     GROW;
8508     if ((RAW != '<') || (NXT(1) != '/')) {
8509 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8510 		       "xmlParseEndTag: '</' not found\n");
8511 	return;
8512     }
8513     SKIP(2);
8514 
8515     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8516 
8517     /*
8518      * We should definitely be at the ending "S? '>'" part
8519      */
8520     GROW;
8521     SKIP_BLANKS;
8522     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8523 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8524     } else
8525 	NEXT1;
8526 
8527     /*
8528      * [ WFC: Element Type Match ]
8529      * The Name in an element's end-tag must match the element type in the
8530      * start-tag.
8531      *
8532      */
8533     if (name != (xmlChar*)1) {
8534         if (name == NULL) name = BAD_CAST "unparsable";
8535         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8536 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8537 		                ctxt->name, line, name);
8538     }
8539 
8540     /*
8541      * SAX: End of Tag
8542      */
8543     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8544 	(!ctxt->disableSAX))
8545         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8546 
8547     namePop(ctxt);
8548     spacePop(ctxt);
8549     return;
8550 }
8551 
8552 /**
8553  * xmlParseEndTag:
8554  * @ctxt:  an XML parser context
8555  *
8556  * DEPRECATED: Internal function, don't use.
8557  *
8558  * parse an end of tag
8559  *
8560  * [42] ETag ::= '</' Name S? '>'
8561  *
8562  * With namespace
8563  *
8564  * [NS 9] ETag ::= '</' QName S? '>'
8565  */
8566 
8567 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8568 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8569     xmlParseEndTag1(ctxt, 0);
8570 }
8571 #endif /* LIBXML_SAX1_ENABLED */
8572 
8573 /************************************************************************
8574  *									*
8575  *		      SAX 2 specific operations				*
8576  *									*
8577  ************************************************************************/
8578 
8579 /**
8580  * xmlParseQNameHashed:
8581  * @ctxt:  an XML parser context
8582  * @prefix:  pointer to store the prefix part
8583  *
8584  * parse an XML Namespace QName
8585  *
8586  * [6]  QName  ::= (Prefix ':')? LocalPart
8587  * [7]  Prefix  ::= NCName
8588  * [8]  LocalPart  ::= NCName
8589  *
8590  * Returns the Name parsed or NULL
8591  */
8592 
8593 static xmlHashedString
xmlParseQNameHashed(xmlParserCtxtPtr ctxt,xmlHashedString * prefix)8594 xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8595     xmlHashedString l, p;
8596     int start, isNCName = 0;
8597 
8598     l.name = NULL;
8599     p.name = NULL;
8600 
8601     GROW;
8602     start = CUR_PTR - BASE_PTR;
8603 
8604     l = xmlParseNCName(ctxt);
8605     if (l.name != NULL) {
8606         isNCName = 1;
8607         if (CUR == ':') {
8608             NEXT;
8609             p = l;
8610             l = xmlParseNCName(ctxt);
8611         }
8612     }
8613     if ((l.name == NULL) || (CUR == ':')) {
8614         xmlChar *tmp;
8615 
8616         l.name = NULL;
8617         p.name = NULL;
8618         if ((isNCName == 0) && (CUR != ':'))
8619             return(l);
8620         tmp = xmlParseNmtoken(ctxt);
8621         if (tmp != NULL)
8622             xmlFree(tmp);
8623         l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8624                                 CUR_PTR - (BASE_PTR + start));
8625         if (l.name == NULL) {
8626             xmlErrMemory(ctxt);
8627             return(l);
8628         }
8629         xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8630                  "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8631     }
8632 
8633     *prefix = p;
8634     return(l);
8635 }
8636 
8637 /**
8638  * xmlParseQName:
8639  * @ctxt:  an XML parser context
8640  * @prefix:  pointer to store the prefix part
8641  *
8642  * parse an XML Namespace QName
8643  *
8644  * [6]  QName  ::= (Prefix ':')? LocalPart
8645  * [7]  Prefix  ::= NCName
8646  * [8]  LocalPart  ::= NCName
8647  *
8648  * Returns the Name parsed or NULL
8649  */
8650 
8651 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8652 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8653     xmlHashedString n, p;
8654 
8655     n = xmlParseQNameHashed(ctxt, &p);
8656     if (n.name == NULL)
8657         return(NULL);
8658     *prefix = p.name;
8659     return(n.name);
8660 }
8661 
8662 /**
8663  * xmlParseQNameAndCompare:
8664  * @ctxt:  an XML parser context
8665  * @name:  the localname
8666  * @prefix:  the prefix, if any.
8667  *
8668  * parse an XML name and compares for match
8669  * (specialized for endtag parsing)
8670  *
8671  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8672  * and the name for mismatch
8673  */
8674 
8675 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8676 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8677                         xmlChar const *prefix) {
8678     const xmlChar *cmp;
8679     const xmlChar *in;
8680     const xmlChar *ret;
8681     const xmlChar *prefix2;
8682 
8683     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8684 
8685     GROW;
8686     in = ctxt->input->cur;
8687 
8688     cmp = prefix;
8689     while (*in != 0 && *in == *cmp) {
8690 	++in;
8691 	++cmp;
8692     }
8693     if ((*cmp == 0) && (*in == ':')) {
8694         in++;
8695 	cmp = name;
8696 	while (*in != 0 && *in == *cmp) {
8697 	    ++in;
8698 	    ++cmp;
8699 	}
8700 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8701 	    /* success */
8702             ctxt->input->col += in - ctxt->input->cur;
8703 	    ctxt->input->cur = in;
8704 	    return((const xmlChar*) 1);
8705 	}
8706     }
8707     /*
8708      * all strings coms from the dictionary, equality can be done directly
8709      */
8710     ret = xmlParseQName (ctxt, &prefix2);
8711     if (ret == NULL)
8712         return(NULL);
8713     if ((ret == name) && (prefix == prefix2))
8714 	return((const xmlChar*) 1);
8715     return ret;
8716 }
8717 
8718 /**
8719  * xmlParseAttribute2:
8720  * @ctxt:  an XML parser context
8721  * @pref:  the element prefix
8722  * @elem:  the element name
8723  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8724  * @value:  a xmlChar ** used to store the value of the attribute
8725  * @len:  an int * to save the length of the attribute
8726  * @alloc:  an int * to indicate if the attribute was allocated
8727  *
8728  * parse an attribute in the new SAX2 framework.
8729  *
8730  * Returns the attribute name, and the value in *value, .
8731  */
8732 
8733 static xmlHashedString
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,xmlHashedString * hprefix,xmlChar ** value,int * len,int * alloc)8734 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8735                    const xmlChar * pref, const xmlChar * elem,
8736                    xmlHashedString * hprefix, xmlChar ** value,
8737                    int *len, int *alloc)
8738 {
8739     xmlHashedString hname;
8740     const xmlChar *prefix, *name;
8741     xmlChar *val = NULL, *internal_val = NULL;
8742     int normalize = 0;
8743     int isNamespace;
8744 
8745     *value = NULL;
8746     GROW;
8747     hname = xmlParseQNameHashed(ctxt, hprefix);
8748     if (hname.name == NULL) {
8749         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8750                        "error parsing attribute name\n");
8751         return(hname);
8752     }
8753     name = hname.name;
8754     if (hprefix->name != NULL)
8755         prefix = hprefix->name;
8756     else
8757         prefix = NULL;
8758 
8759     /*
8760      * get the type if needed
8761      */
8762     if (ctxt->attsSpecial != NULL) {
8763         int type;
8764 
8765         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8766                                                  pref, elem,
8767                                                  prefix, name);
8768         if (type != 0)
8769             normalize = 1;
8770     }
8771 
8772     /*
8773      * read the value
8774      */
8775     SKIP_BLANKS;
8776     if (RAW == '=') {
8777         NEXT;
8778         SKIP_BLANKS;
8779         isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8780                        (prefix == ctxt->str_xmlns));
8781         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8782                                        isNamespace);
8783         if (val == NULL)
8784             goto error;
8785     } else {
8786         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8787                           "Specification mandates value for attribute %s\n",
8788                           name);
8789         goto error;
8790     }
8791 
8792     if (prefix == ctxt->str_xml) {
8793         /*
8794          * Check that xml:lang conforms to the specification
8795          * No more registered as an error, just generate a warning now
8796          * since this was deprecated in XML second edition
8797          */
8798         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8799             internal_val = xmlStrndup(val, *len);
8800             if (internal_val == NULL)
8801                 goto mem_error;
8802             if (!xmlCheckLanguageID(internal_val)) {
8803                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8804                               "Malformed value for xml:lang : %s\n",
8805                               internal_val, NULL);
8806             }
8807         }
8808 
8809         /*
8810          * Check that xml:space conforms to the specification
8811          */
8812         if (xmlStrEqual(name, BAD_CAST "space")) {
8813             internal_val = xmlStrndup(val, *len);
8814             if (internal_val == NULL)
8815                 goto mem_error;
8816             if (xmlStrEqual(internal_val, BAD_CAST "default"))
8817                 *(ctxt->space) = 0;
8818             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8819                 *(ctxt->space) = 1;
8820             else {
8821                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8822                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8823                               internal_val, NULL);
8824             }
8825         }
8826         if (internal_val) {
8827             xmlFree(internal_val);
8828         }
8829     }
8830 
8831     *value = val;
8832     return (hname);
8833 
8834 mem_error:
8835     xmlErrMemory(ctxt);
8836 error:
8837     if ((val != NULL) && (*alloc != 0))
8838         xmlFree(val);
8839     return(hname);
8840 }
8841 
8842 /**
8843  * xmlAttrHashInsert:
8844  * @ctxt: parser context
8845  * @size: size of the hash table
8846  * @name: attribute name
8847  * @uri: namespace uri
8848  * @hashValue: combined hash value of name and uri
8849  * @aindex: attribute index (this is a multiple of 5)
8850  *
8851  * Inserts a new attribute into the hash table.
8852  *
8853  * Returns INT_MAX if no existing attribute was found, the attribute
8854  * index if an attribute was found, -1 if a memory allocation failed.
8855  */
8856 static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt,unsigned size,const xmlChar * name,const xmlChar * uri,unsigned hashValue,int aindex)8857 xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8858                   const xmlChar *uri, unsigned hashValue, int aindex) {
8859     xmlAttrHashBucket *table = ctxt->attrHash;
8860     xmlAttrHashBucket *bucket;
8861     unsigned hindex;
8862 
8863     hindex = hashValue & (size - 1);
8864     bucket = &table[hindex];
8865 
8866     while (bucket->index >= 0) {
8867         const xmlChar **atts = &ctxt->atts[bucket->index];
8868 
8869         if (name == atts[0]) {
8870             int nsIndex = (int) (ptrdiff_t) atts[2];
8871 
8872             if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8873                 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8874                 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8875                 return(bucket->index);
8876         }
8877 
8878         hindex++;
8879         bucket++;
8880         if (hindex >= size) {
8881             hindex = 0;
8882             bucket = table;
8883         }
8884     }
8885 
8886     bucket->index = aindex;
8887 
8888     return(INT_MAX);
8889 }
8890 
8891 /**
8892  * xmlParseStartTag2:
8893  * @ctxt:  an XML parser context
8894  *
8895  * Parse a start tag. Always consumes '<'.
8896  *
8897  * This routine is called when running SAX2 parsing
8898  *
8899  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8900  *
8901  * [ WFC: Unique Att Spec ]
8902  * No attribute name may appear more than once in the same start-tag or
8903  * empty-element tag.
8904  *
8905  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8906  *
8907  * [ WFC: Unique Att Spec ]
8908  * No attribute name may appear more than once in the same start-tag or
8909  * empty-element tag.
8910  *
8911  * With namespace:
8912  *
8913  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8914  *
8915  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8916  *
8917  * Returns the element name parsed
8918  */
8919 
8920 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * nbNsPtr)8921 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8922                   const xmlChar **URI, int *nbNsPtr) {
8923     xmlHashedString hlocalname;
8924     xmlHashedString hprefix;
8925     xmlHashedString hattname;
8926     xmlHashedString haprefix;
8927     const xmlChar *localname;
8928     const xmlChar *prefix;
8929     const xmlChar *attname;
8930     const xmlChar *aprefix;
8931     const xmlChar *uri;
8932     xmlChar *attvalue = NULL;
8933     const xmlChar **atts = ctxt->atts;
8934     unsigned attrHashSize = 0;
8935     int maxatts = ctxt->maxatts;
8936     int nratts, nbatts, nbdef;
8937     int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8938     int alloc = 0;
8939 
8940     if (RAW != '<') return(NULL);
8941     NEXT1;
8942 
8943     nbatts = 0;
8944     nratts = 0;
8945     nbdef = 0;
8946     nbNs = 0;
8947     nbTotalDef = 0;
8948     attval = 0;
8949 
8950     if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8951         xmlErrMemory(ctxt);
8952         return(NULL);
8953     }
8954 
8955     hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8956     if (hlocalname.name == NULL) {
8957 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8958 		       "StartTag: invalid element name\n");
8959         return(NULL);
8960     }
8961     localname = hlocalname.name;
8962     prefix = hprefix.name;
8963 
8964     /*
8965      * Now parse the attributes, it ends up with the ending
8966      *
8967      * (S Attribute)* S?
8968      */
8969     SKIP_BLANKS;
8970     GROW;
8971 
8972     /*
8973      * The ctxt->atts array will be ultimately passed to the SAX callback
8974      * containing five xmlChar pointers for each attribute:
8975      *
8976      * [0] attribute name
8977      * [1] attribute prefix
8978      * [2] namespace URI
8979      * [3] attribute value
8980      * [4] end of attribute value
8981      *
8982      * To save memory, we reuse this array temporarily and store integers
8983      * in these pointer variables.
8984      *
8985      * [0] attribute name
8986      * [1] attribute prefix
8987      * [2] hash value of attribute prefix, and later namespace index
8988      * [3] for non-allocated values: ptrdiff_t offset into input buffer
8989      * [4] for non-allocated values: ptrdiff_t offset into input buffer
8990      *
8991      * The ctxt->attallocs array contains an additional unsigned int for
8992      * each attribute, containing the hash value of the attribute name
8993      * and the alloc flag in bit 31.
8994      */
8995 
8996     while (((RAW != '>') &&
8997 	   ((RAW != '/') || (NXT(1) != '>')) &&
8998 	   (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8999 	int len = -1;
9000 
9001 	hattname = xmlParseAttribute2(ctxt, prefix, localname,
9002                                           &haprefix, &attvalue, &len,
9003                                           &alloc);
9004         if (hattname.name == NULL)
9005 	    break;
9006         if (attvalue == NULL)
9007             goto next_attr;
9008         attname = hattname.name;
9009         aprefix = haprefix.name;
9010 	if (len < 0) len = xmlStrlen(attvalue);
9011 
9012         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9013             xmlHashedString huri;
9014             xmlURIPtr parsedUri;
9015 
9016             huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9017             uri = huri.name;
9018             if (uri == NULL) {
9019                 xmlErrMemory(ctxt);
9020                 goto next_attr;
9021             }
9022             if (*uri != 0) {
9023                 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9024                     xmlErrMemory(ctxt);
9025                     goto next_attr;
9026                 }
9027                 if (parsedUri == NULL) {
9028                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9029                              "xmlns: '%s' is not a valid URI\n",
9030                                        uri, NULL, NULL);
9031                 } else {
9032                     if (parsedUri->scheme == NULL) {
9033                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9034                                   "xmlns: URI %s is not absolute\n",
9035                                   uri, NULL, NULL);
9036                     }
9037                     xmlFreeURI(parsedUri);
9038                 }
9039                 if (uri == ctxt->str_xml_ns) {
9040                     if (attname != ctxt->str_xml) {
9041                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9042                      "xml namespace URI cannot be the default namespace\n",
9043                                  NULL, NULL, NULL);
9044                     }
9045                     goto next_attr;
9046                 }
9047                 if ((len == 29) &&
9048                     (xmlStrEqual(uri,
9049                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9050                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9051                          "reuse of the xmlns namespace name is forbidden\n",
9052                              NULL, NULL, NULL);
9053                     goto next_attr;
9054                 }
9055             }
9056 
9057             if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9058                 nbNs++;
9059         } else if (aprefix == ctxt->str_xmlns) {
9060             xmlHashedString huri;
9061             xmlURIPtr parsedUri;
9062 
9063             huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9064             uri = huri.name;
9065             if (uri == NULL) {
9066                 xmlErrMemory(ctxt);
9067                 goto next_attr;
9068             }
9069 
9070             if (attname == ctxt->str_xml) {
9071                 if (uri != ctxt->str_xml_ns) {
9072                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9073                              "xml namespace prefix mapped to wrong URI\n",
9074                              NULL, NULL, NULL);
9075                 }
9076                 /*
9077                  * Do not keep a namespace definition node
9078                  */
9079                 goto next_attr;
9080             }
9081             if (uri == ctxt->str_xml_ns) {
9082                 if (attname != ctxt->str_xml) {
9083                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9084                              "xml namespace URI mapped to wrong prefix\n",
9085                              NULL, NULL, NULL);
9086                 }
9087                 goto next_attr;
9088             }
9089             if (attname == ctxt->str_xmlns) {
9090                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091                          "redefinition of the xmlns prefix is forbidden\n",
9092                          NULL, NULL, NULL);
9093                 goto next_attr;
9094             }
9095             if ((len == 29) &&
9096                 (xmlStrEqual(uri,
9097                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9098                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9099                          "reuse of the xmlns namespace name is forbidden\n",
9100                          NULL, NULL, NULL);
9101                 goto next_attr;
9102             }
9103             if ((uri == NULL) || (uri[0] == 0)) {
9104                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9105                          "xmlns:%s: Empty XML namespace is not allowed\n",
9106                               attname, NULL, NULL);
9107                 goto next_attr;
9108             } else {
9109                 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9110                     xmlErrMemory(ctxt);
9111                     goto next_attr;
9112                 }
9113                 if (parsedUri == NULL) {
9114                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9115                          "xmlns:%s: '%s' is not a valid URI\n",
9116                                        attname, uri, NULL);
9117                 } else {
9118                     if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9119                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9120                                   "xmlns:%s: URI %s is not absolute\n",
9121                                   attname, uri, NULL);
9122                     }
9123                     xmlFreeURI(parsedUri);
9124                 }
9125             }
9126 
9127             if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9128                 nbNs++;
9129         } else {
9130             /*
9131              * Populate attributes array, see above for repurposing
9132              * of xmlChar pointers.
9133              */
9134             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9135                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9136                     goto next_attr;
9137                 }
9138                 maxatts = ctxt->maxatts;
9139                 atts = ctxt->atts;
9140             }
9141             ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9142                                         ((unsigned) alloc << 31);
9143             atts[nbatts++] = attname;
9144             atts[nbatts++] = aprefix;
9145             atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9146             if (alloc) {
9147                 atts[nbatts++] = attvalue;
9148                 attvalue += len;
9149                 atts[nbatts++] = attvalue;
9150             } else {
9151                 /*
9152                  * attvalue points into the input buffer which can be
9153                  * reallocated. Store differences to input->base instead.
9154                  * The pointers will be reconstructed later.
9155                  */
9156                 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9157                 attvalue += len;
9158                 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9159             }
9160             /*
9161              * tag if some deallocation is needed
9162              */
9163             if (alloc != 0) attval = 1;
9164             attvalue = NULL; /* moved into atts */
9165         }
9166 
9167 next_attr:
9168         if ((attvalue != NULL) && (alloc != 0)) {
9169             xmlFree(attvalue);
9170             attvalue = NULL;
9171         }
9172 
9173 	GROW
9174 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9175 	    break;
9176 	if (SKIP_BLANKS == 0) {
9177 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9178 			   "attributes construct error\n");
9179 	    break;
9180 	}
9181         GROW;
9182     }
9183 
9184     /*
9185      * Namespaces from default attributes
9186      */
9187     if (ctxt->attsDefault != NULL) {
9188         xmlDefAttrsPtr defaults;
9189 
9190 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9191 	if (defaults != NULL) {
9192 	    for (i = 0; i < defaults->nbAttrs; i++) {
9193                 xmlDefAttr *attr = &defaults->attrs[i];
9194 
9195 	        attname = attr->name.name;
9196 		aprefix = attr->prefix.name;
9197 
9198 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9199                     xmlParserEntityCheck(ctxt, attr->expandedSize);
9200 
9201                     if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9202                         nbNs++;
9203 		} else if (aprefix == ctxt->str_xmlns) {
9204                     xmlParserEntityCheck(ctxt, attr->expandedSize);
9205 
9206                     if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9207                                       NULL, 1) > 0)
9208                         nbNs++;
9209 		} else {
9210                     nbTotalDef += 1;
9211                 }
9212 	    }
9213 	}
9214     }
9215 
9216     /*
9217      * Resolve attribute namespaces
9218      */
9219     for (i = 0; i < nbatts; i += 5) {
9220         attname = atts[i];
9221         aprefix = atts[i+1];
9222 
9223         /*
9224 	* The default namespace does not apply to attribute names.
9225 	*/
9226 	if (aprefix == NULL) {
9227             nsIndex = NS_INDEX_EMPTY;
9228         } else if (aprefix == ctxt->str_xml) {
9229             nsIndex = NS_INDEX_XML;
9230         } else {
9231             haprefix.name = aprefix;
9232             haprefix.hashValue = (size_t) atts[i+2];
9233             nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9234 
9235 	    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9236                 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9237 		    "Namespace prefix %s for %s on %s is not defined\n",
9238 		    aprefix, attname, localname);
9239                 nsIndex = NS_INDEX_EMPTY;
9240             }
9241         }
9242 
9243         atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9244     }
9245 
9246     /*
9247      * Maximum number of attributes including default attributes.
9248      */
9249     maxAtts = nratts + nbTotalDef;
9250 
9251     /*
9252      * Verify that attribute names are unique.
9253      */
9254     if (maxAtts > 1) {
9255         attrHashSize = 4;
9256         while (attrHashSize / 2 < (unsigned) maxAtts)
9257             attrHashSize *= 2;
9258 
9259         if (attrHashSize > ctxt->attrHashMax) {
9260             xmlAttrHashBucket *tmp;
9261 
9262             tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9263             if (tmp == NULL) {
9264                 xmlErrMemory(ctxt);
9265                 goto done;
9266             }
9267 
9268             ctxt->attrHash = tmp;
9269             ctxt->attrHashMax = attrHashSize;
9270         }
9271 
9272         memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9273 
9274         for (i = 0, j = 0; j < nratts; i += 5, j++) {
9275             const xmlChar *nsuri;
9276             unsigned hashValue, nameHashValue, uriHashValue;
9277             int res;
9278 
9279             attname = atts[i];
9280             aprefix = atts[i+1];
9281             nsIndex = (ptrdiff_t) atts[i+2];
9282             /* Hash values always have bit 31 set, see dict.c */
9283             nameHashValue = ctxt->attallocs[j] | 0x80000000;
9284 
9285             if (nsIndex == NS_INDEX_EMPTY) {
9286                 /*
9287                  * Prefix with empty namespace means an undeclared
9288                  * prefix which was already reported above.
9289                  */
9290                 if (aprefix != NULL)
9291                     continue;
9292                 nsuri = NULL;
9293                 uriHashValue = URI_HASH_EMPTY;
9294             } else if (nsIndex == NS_INDEX_XML) {
9295                 nsuri = ctxt->str_xml_ns;
9296                 uriHashValue = URI_HASH_XML;
9297             } else {
9298                 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9299                 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9300             }
9301 
9302             hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9303             res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9304                                     hashValue, i);
9305             if (res < 0)
9306                 continue;
9307 
9308             /*
9309              * [ WFC: Unique Att Spec ]
9310              * No attribute name may appear more than once in the same
9311              * start-tag or empty-element tag.
9312              * As extended by the Namespace in XML REC.
9313              */
9314             if (res < INT_MAX) {
9315                 if (aprefix == atts[res+1]) {
9316                     xmlErrAttributeDup(ctxt, aprefix, attname);
9317                 } else {
9318                     xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9319                              "Namespaced Attribute %s in '%s' redefined\n",
9320                              attname, nsuri, NULL);
9321                 }
9322             }
9323         }
9324     }
9325 
9326     /*
9327      * Default attributes
9328      */
9329     if (ctxt->attsDefault != NULL) {
9330         xmlDefAttrsPtr defaults;
9331 
9332 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9333 	if (defaults != NULL) {
9334 	    for (i = 0; i < defaults->nbAttrs; i++) {
9335                 xmlDefAttr *attr = &defaults->attrs[i];
9336                 const xmlChar *nsuri;
9337                 unsigned hashValue, uriHashValue;
9338                 int res;
9339 
9340 	        attname = attr->name.name;
9341 		aprefix = attr->prefix.name;
9342 
9343 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9344                     continue;
9345 		if (aprefix == ctxt->str_xmlns)
9346                     continue;
9347 
9348                 if (aprefix == NULL) {
9349                     nsIndex = NS_INDEX_EMPTY;
9350                     nsuri = NULL;
9351                     uriHashValue = URI_HASH_EMPTY;
9352                 } if (aprefix == ctxt->str_xml) {
9353                     nsIndex = NS_INDEX_XML;
9354                     nsuri = ctxt->str_xml_ns;
9355                     uriHashValue = URI_HASH_XML;
9356                 } else if (aprefix != NULL) {
9357                     nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9358                     if ((nsIndex == INT_MAX) ||
9359                         (nsIndex < ctxt->nsdb->minNsIndex)) {
9360                         xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9361                                  "Namespace prefix %s for %s on %s is not "
9362                                  "defined\n",
9363                                  aprefix, attname, localname);
9364                         nsIndex = NS_INDEX_EMPTY;
9365                         nsuri = NULL;
9366                         uriHashValue = URI_HASH_EMPTY;
9367                     } else {
9368                         nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9369                         uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9370                     }
9371                 }
9372 
9373                 /*
9374                  * Check whether the attribute exists
9375                  */
9376                 if (maxAtts > 1) {
9377                     hashValue = xmlDictCombineHash(attr->name.hashValue,
9378                                                    uriHashValue);
9379                     res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9380                                             hashValue, nbatts);
9381                     if (res < 0)
9382                         continue;
9383                     if (res < INT_MAX) {
9384                         if (aprefix == atts[res+1])
9385                             continue;
9386                         xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9387                                  "Namespaced Attribute %s in '%s' redefined\n",
9388                                  attname, nsuri, NULL);
9389                     }
9390                 }
9391 
9392                 xmlParserEntityCheck(ctxt, attr->expandedSize);
9393 
9394                 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9395                     if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9396                         localname = NULL;
9397                         goto done;
9398                     }
9399                     maxatts = ctxt->maxatts;
9400                     atts = ctxt->atts;
9401                 }
9402 
9403                 atts[nbatts++] = attname;
9404                 atts[nbatts++] = aprefix;
9405                 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9406                 atts[nbatts++] = attr->value.name;
9407                 atts[nbatts++] = attr->valueEnd;
9408                 if ((ctxt->standalone == 1) && (attr->external != 0)) {
9409                     xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9410                             "standalone: attribute %s on %s defaulted "
9411                             "from external subset\n",
9412                             attname, localname);
9413                 }
9414                 nbdef++;
9415 	    }
9416 	}
9417     }
9418 
9419     /*
9420      * Reconstruct attribute pointers
9421      */
9422     for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9423         /* namespace URI */
9424         nsIndex = (ptrdiff_t) atts[i+2];
9425         if (nsIndex == INT_MAX)
9426             atts[i+2] = NULL;
9427         else if (nsIndex == INT_MAX - 1)
9428             atts[i+2] = ctxt->str_xml_ns;
9429         else
9430             atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9431 
9432         if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9433             atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9434             atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9435         }
9436     }
9437 
9438     uri = xmlParserNsLookupUri(ctxt, &hprefix);
9439     if ((prefix != NULL) && (uri == NULL)) {
9440 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9441 	         "Namespace prefix %s on %s is not defined\n",
9442 		 prefix, localname, NULL);
9443     }
9444     *pref = prefix;
9445     *URI = uri;
9446 
9447     /*
9448      * SAX callback
9449      */
9450     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9451 	(!ctxt->disableSAX)) {
9452 	if (nbNs > 0)
9453 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9454                           nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9455 			  nbatts / 5, nbdef, atts);
9456 	else
9457 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9458                           0, NULL, nbatts / 5, nbdef, atts);
9459     }
9460 
9461 done:
9462     /*
9463      * Free allocated attribute values
9464      */
9465     if (attval != 0) {
9466 	for (i = 0, j = 0; j < nratts; i += 5, j++)
9467 	    if (ctxt->attallocs[j] & 0x80000000)
9468 	        xmlFree((xmlChar *) atts[i+3]);
9469     }
9470 
9471     *nbNsPtr = nbNs;
9472     return(localname);
9473 }
9474 
9475 /**
9476  * xmlParseEndTag2:
9477  * @ctxt:  an XML parser context
9478  * @line:  line of the start tag
9479  * @nsNr:  number of namespaces on the start tag
9480  *
9481  * Parse an end tag. Always consumes '</'.
9482  *
9483  * [42] ETag ::= '</' Name S? '>'
9484  *
9485  * With namespace
9486  *
9487  * [NS 9] ETag ::= '</' QName S? '>'
9488  */
9489 
9490 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9491 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9492     const xmlChar *name;
9493 
9494     GROW;
9495     if ((RAW != '<') || (NXT(1) != '/')) {
9496 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9497 	return;
9498     }
9499     SKIP(2);
9500 
9501     if (tag->prefix == NULL)
9502         name = xmlParseNameAndCompare(ctxt, ctxt->name);
9503     else
9504         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9505 
9506     /*
9507      * We should definitely be at the ending "S? '>'" part
9508      */
9509     GROW;
9510     SKIP_BLANKS;
9511     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9512 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9513     } else
9514 	NEXT1;
9515 
9516     /*
9517      * [ WFC: Element Type Match ]
9518      * The Name in an element's end-tag must match the element type in the
9519      * start-tag.
9520      *
9521      */
9522     if (name != (xmlChar*)1) {
9523         if (name == NULL) name = BAD_CAST "unparsable";
9524         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9525 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9526 		                ctxt->name, tag->line, name);
9527     }
9528 
9529     /*
9530      * SAX: End of Tag
9531      */
9532     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9533 	(!ctxt->disableSAX))
9534 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9535                                 tag->URI);
9536 
9537     spacePop(ctxt);
9538     if (tag->nsNr != 0)
9539 	xmlParserNsPop(ctxt, tag->nsNr);
9540 }
9541 
9542 /**
9543  * xmlParseCDSect:
9544  * @ctxt:  an XML parser context
9545  *
9546  * DEPRECATED: Internal function, don't use.
9547  *
9548  * Parse escaped pure raw content. Always consumes '<!['.
9549  *
9550  * [18] CDSect ::= CDStart CData CDEnd
9551  *
9552  * [19] CDStart ::= '<![CDATA['
9553  *
9554  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9555  *
9556  * [21] CDEnd ::= ']]>'
9557  */
9558 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9559 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9560     xmlChar *buf = NULL;
9561     int len = 0;
9562     int size = XML_PARSER_BUFFER_SIZE;
9563     int r, rl;
9564     int	s, sl;
9565     int cur, l;
9566     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9567                     XML_MAX_HUGE_LENGTH :
9568                     XML_MAX_TEXT_LENGTH;
9569 
9570     if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9571         return;
9572     SKIP(3);
9573 
9574     if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9575         return;
9576     SKIP(6);
9577 
9578     r = CUR_CHAR(rl);
9579     if (!IS_CHAR(r)) {
9580 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9581         goto out;
9582     }
9583     NEXTL(rl);
9584     s = CUR_CHAR(sl);
9585     if (!IS_CHAR(s)) {
9586 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9587         goto out;
9588     }
9589     NEXTL(sl);
9590     cur = CUR_CHAR(l);
9591     buf = (xmlChar *) xmlMallocAtomic(size);
9592     if (buf == NULL) {
9593 	xmlErrMemory(ctxt);
9594         goto out;
9595     }
9596     while (IS_CHAR(cur) &&
9597            ((r != ']') || (s != ']') || (cur != '>'))) {
9598 	if (len + 5 >= size) {
9599 	    xmlChar *tmp;
9600 
9601 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9602 	    if (tmp == NULL) {
9603 		xmlErrMemory(ctxt);
9604                 goto out;
9605 	    }
9606 	    buf = tmp;
9607 	    size *= 2;
9608 	}
9609 	COPY_BUF(buf, len, r);
9610         if (len > maxLength) {
9611             xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9612                            "CData section too big found\n");
9613             goto out;
9614         }
9615 	r = s;
9616 	rl = sl;
9617 	s = cur;
9618 	sl = l;
9619 	NEXTL(l);
9620 	cur = CUR_CHAR(l);
9621     }
9622     buf[len] = 0;
9623     if (cur != '>') {
9624 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9625 	                     "CData section not finished\n%.50s\n", buf);
9626         goto out;
9627     }
9628     NEXTL(l);
9629 
9630     /*
9631      * OK the buffer is to be consumed as cdata.
9632      */
9633     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9634 	if (ctxt->sax->cdataBlock != NULL)
9635 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9636 	else if (ctxt->sax->characters != NULL)
9637 	    ctxt->sax->characters(ctxt->userData, buf, len);
9638     }
9639 
9640 out:
9641     xmlFree(buf);
9642 }
9643 
9644 /**
9645  * xmlParseContentInternal:
9646  * @ctxt:  an XML parser context
9647  *
9648  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9649  * unexpected EOF to the caller.
9650  */
9651 
9652 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9653 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9654     int oldNameNr = ctxt->nameNr;
9655     int oldSpaceNr = ctxt->spaceNr;
9656     int oldNodeNr = ctxt->nodeNr;
9657 
9658     GROW;
9659     while ((ctxt->input->cur < ctxt->input->end) &&
9660 	   (PARSER_STOPPED(ctxt) == 0)) {
9661 	const xmlChar *cur = ctxt->input->cur;
9662 
9663 	/*
9664 	 * First case : a Processing Instruction.
9665 	 */
9666 	if ((*cur == '<') && (cur[1] == '?')) {
9667 	    xmlParsePI(ctxt);
9668 	}
9669 
9670 	/*
9671 	 * Second case : a CDSection
9672 	 */
9673 	/* 2.6.0 test was *cur not RAW */
9674 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9675 	    xmlParseCDSect(ctxt);
9676 	}
9677 
9678 	/*
9679 	 * Third case :  a comment
9680 	 */
9681 	else if ((*cur == '<') && (NXT(1) == '!') &&
9682 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9683 	    xmlParseComment(ctxt);
9684 	}
9685 
9686 	/*
9687 	 * Fourth case :  a sub-element.
9688 	 */
9689 	else if (*cur == '<') {
9690             if (NXT(1) == '/') {
9691                 if (ctxt->nameNr <= oldNameNr)
9692                     break;
9693 	        xmlParseElementEnd(ctxt);
9694             } else {
9695 	        xmlParseElementStart(ctxt);
9696             }
9697 	}
9698 
9699 	/*
9700 	 * Fifth case : a reference. If if has not been resolved,
9701 	 *    parsing returns it's Name, create the node
9702 	 */
9703 
9704 	else if (*cur == '&') {
9705 	    xmlParseReference(ctxt);
9706 	}
9707 
9708 	/*
9709 	 * Last case, text. Note that References are handled directly.
9710 	 */
9711 	else {
9712 	    xmlParseCharDataInternal(ctxt, 0);
9713 	}
9714 
9715 	SHRINK;
9716 	GROW;
9717     }
9718 
9719     if ((ctxt->nameNr > oldNameNr) &&
9720         (ctxt->input->cur >= ctxt->input->end) &&
9721         (ctxt->wellFormed)) {
9722         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9723         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9724         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9725                 "Premature end of data in tag %s line %d\n",
9726                 name, line, NULL);
9727     }
9728 
9729     /*
9730      * Clean up in error case
9731      */
9732 
9733     while (ctxt->nodeNr > oldNodeNr)
9734         nodePop(ctxt);
9735 
9736     while (ctxt->nameNr > oldNameNr) {
9737         xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9738 
9739         if (tag->nsNr != 0)
9740             xmlParserNsPop(ctxt, tag->nsNr);
9741 
9742         namePop(ctxt);
9743     }
9744 
9745     while (ctxt->spaceNr > oldSpaceNr)
9746         spacePop(ctxt);
9747 }
9748 
9749 /**
9750  * xmlParseContent:
9751  * @ctxt:  an XML parser context
9752  *
9753  * Parse XML element content. This is useful if you're only interested
9754  * in custom SAX callbacks. If you want a node list, use
9755  * xmlParseInNodeContext.
9756  */
9757 void
xmlParseContent(xmlParserCtxtPtr ctxt)9758 xmlParseContent(xmlParserCtxtPtr ctxt) {
9759     if ((ctxt == NULL) || (ctxt->input == NULL))
9760         return;
9761 
9762     xmlCtxtInitializeLate(ctxt);
9763 
9764     xmlParseContentInternal(ctxt);
9765 
9766     if (ctxt->input->cur < ctxt->input->end)
9767 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9768 }
9769 
9770 /**
9771  * xmlParseElement:
9772  * @ctxt:  an XML parser context
9773  *
9774  * DEPRECATED: Internal function, don't use.
9775  *
9776  * parse an XML element
9777  *
9778  * [39] element ::= EmptyElemTag | STag content ETag
9779  *
9780  * [ WFC: Element Type Match ]
9781  * The Name in an element's end-tag must match the element type in the
9782  * start-tag.
9783  *
9784  */
9785 
9786 void
xmlParseElement(xmlParserCtxtPtr ctxt)9787 xmlParseElement(xmlParserCtxtPtr ctxt) {
9788     if (xmlParseElementStart(ctxt) != 0)
9789         return;
9790 
9791     xmlParseContentInternal(ctxt);
9792 
9793     if (ctxt->input->cur >= ctxt->input->end) {
9794         if (ctxt->wellFormed) {
9795             const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9796             int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9797             xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9798                     "Premature end of data in tag %s line %d\n",
9799                     name, line, NULL);
9800         }
9801         return;
9802     }
9803 
9804     xmlParseElementEnd(ctxt);
9805 }
9806 
9807 /**
9808  * xmlParseElementStart:
9809  * @ctxt:  an XML parser context
9810  *
9811  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9812  * opening tag was parsed, 1 if an empty element was parsed.
9813  *
9814  * Always consumes '<'.
9815  */
9816 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)9817 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9818     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9819     const xmlChar *name;
9820     const xmlChar *prefix = NULL;
9821     const xmlChar *URI = NULL;
9822     xmlParserNodeInfo node_info;
9823     int line;
9824     xmlNodePtr cur;
9825     int nbNs = 0;
9826 
9827     if (ctxt->nameNr > maxDepth) {
9828         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9829                 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9830                 ctxt->nameNr);
9831 	xmlHaltParser(ctxt);
9832 	return(-1);
9833     }
9834 
9835     /* Capture start position */
9836     if (ctxt->record_info) {
9837         node_info.begin_pos = ctxt->input->consumed +
9838                           (CUR_PTR - ctxt->input->base);
9839 	node_info.begin_line = ctxt->input->line;
9840     }
9841 
9842     if (ctxt->spaceNr == 0)
9843 	spacePush(ctxt, -1);
9844     else if (*ctxt->space == -2)
9845 	spacePush(ctxt, -1);
9846     else
9847 	spacePush(ctxt, *ctxt->space);
9848 
9849     line = ctxt->input->line;
9850 #ifdef LIBXML_SAX1_ENABLED
9851     if (ctxt->sax2)
9852 #endif /* LIBXML_SAX1_ENABLED */
9853         name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9854 #ifdef LIBXML_SAX1_ENABLED
9855     else
9856 	name = xmlParseStartTag(ctxt);
9857 #endif /* LIBXML_SAX1_ENABLED */
9858     if (name == NULL) {
9859 	spacePop(ctxt);
9860         return(-1);
9861     }
9862     nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9863     cur = ctxt->node;
9864 
9865 #ifdef LIBXML_VALID_ENABLED
9866     /*
9867      * [ VC: Root Element Type ]
9868      * The Name in the document type declaration must match the element
9869      * type of the root element.
9870      */
9871     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9872         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9873         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9874 #endif /* LIBXML_VALID_ENABLED */
9875 
9876     /*
9877      * Check for an Empty Element.
9878      */
9879     if ((RAW == '/') && (NXT(1) == '>')) {
9880         SKIP(2);
9881 	if (ctxt->sax2) {
9882 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9883 		(!ctxt->disableSAX))
9884 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9885 #ifdef LIBXML_SAX1_ENABLED
9886 	} else {
9887 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9888 		(!ctxt->disableSAX))
9889 		ctxt->sax->endElement(ctxt->userData, name);
9890 #endif /* LIBXML_SAX1_ENABLED */
9891 	}
9892 	namePop(ctxt);
9893 	spacePop(ctxt);
9894 	if (nbNs > 0)
9895 	    xmlParserNsPop(ctxt, nbNs);
9896 	if (cur != NULL && ctxt->record_info) {
9897             node_info.node = cur;
9898             node_info.end_pos = ctxt->input->consumed +
9899                                 (CUR_PTR - ctxt->input->base);
9900             node_info.end_line = ctxt->input->line;
9901             xmlParserAddNodeInfo(ctxt, &node_info);
9902 	}
9903 	return(1);
9904     }
9905     if (RAW == '>') {
9906         NEXT1;
9907         if (cur != NULL && ctxt->record_info) {
9908             node_info.node = cur;
9909             node_info.end_pos = 0;
9910             node_info.end_line = 0;
9911             xmlParserAddNodeInfo(ctxt, &node_info);
9912         }
9913     } else {
9914         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9915 		     "Couldn't find end of Start Tag %s line %d\n",
9916 		                name, line, NULL);
9917 
9918 	/*
9919 	 * end of parsing of this node.
9920 	 */
9921 	nodePop(ctxt);
9922 	namePop(ctxt);
9923 	spacePop(ctxt);
9924 	if (nbNs > 0)
9925 	    xmlParserNsPop(ctxt, nbNs);
9926 	return(-1);
9927     }
9928 
9929     return(0);
9930 }
9931 
9932 /**
9933  * xmlParseElementEnd:
9934  * @ctxt:  an XML parser context
9935  *
9936  * Parse the end of an XML element. Always consumes '</'.
9937  */
9938 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)9939 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9940     xmlNodePtr cur = ctxt->node;
9941 
9942     if (ctxt->nameNr <= 0) {
9943         if ((RAW == '<') && (NXT(1) == '/'))
9944             SKIP(2);
9945         return;
9946     }
9947 
9948     /*
9949      * parse the end of tag: '</' should be here.
9950      */
9951     if (ctxt->sax2) {
9952 	xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9953 	namePop(ctxt);
9954     }
9955 #ifdef LIBXML_SAX1_ENABLED
9956     else
9957 	xmlParseEndTag1(ctxt, 0);
9958 #endif /* LIBXML_SAX1_ENABLED */
9959 
9960     /*
9961      * Capture end position
9962      */
9963     if (cur != NULL && ctxt->record_info) {
9964         xmlParserNodeInfoPtr node_info;
9965 
9966         node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9967         if (node_info != NULL) {
9968             node_info->end_pos = ctxt->input->consumed +
9969                                  (CUR_PTR - ctxt->input->base);
9970             node_info->end_line = ctxt->input->line;
9971         }
9972     }
9973 }
9974 
9975 /**
9976  * xmlParseVersionNum:
9977  * @ctxt:  an XML parser context
9978  *
9979  * DEPRECATED: Internal function, don't use.
9980  *
9981  * parse the XML version value.
9982  *
9983  * [26] VersionNum ::= '1.' [0-9]+
9984  *
9985  * In practice allow [0-9].[0-9]+ at that level
9986  *
9987  * Returns the string giving the XML version number, or NULL
9988  */
9989 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9990 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9991     xmlChar *buf = NULL;
9992     int len = 0;
9993     int size = 10;
9994     xmlChar cur;
9995 
9996     buf = (xmlChar *) xmlMallocAtomic(size);
9997     if (buf == NULL) {
9998 	xmlErrMemory(ctxt);
9999 	return(NULL);
10000     }
10001     cur = CUR;
10002     if (!((cur >= '0') && (cur <= '9'))) {
10003 	xmlFree(buf);
10004 	return(NULL);
10005     }
10006     buf[len++] = cur;
10007     NEXT;
10008     cur=CUR;
10009     if (cur != '.') {
10010 	xmlFree(buf);
10011 	return(NULL);
10012     }
10013     buf[len++] = cur;
10014     NEXT;
10015     cur=CUR;
10016     while ((cur >= '0') && (cur <= '9')) {
10017 	if (len + 1 >= size) {
10018 	    xmlChar *tmp;
10019 
10020 	    size *= 2;
10021 	    tmp = (xmlChar *) xmlRealloc(buf, size);
10022 	    if (tmp == NULL) {
10023 	        xmlFree(buf);
10024 		xmlErrMemory(ctxt);
10025 		return(NULL);
10026 	    }
10027 	    buf = tmp;
10028 	}
10029 	buf[len++] = cur;
10030 	NEXT;
10031 	cur=CUR;
10032     }
10033     buf[len] = 0;
10034     return(buf);
10035 }
10036 
10037 /**
10038  * xmlParseVersionInfo:
10039  * @ctxt:  an XML parser context
10040  *
10041  * DEPRECATED: Internal function, don't use.
10042  *
10043  * parse the XML version.
10044  *
10045  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10046  *
10047  * [25] Eq ::= S? '=' S?
10048  *
10049  * Returns the version string, e.g. "1.0"
10050  */
10051 
10052 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10053 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10054     xmlChar *version = NULL;
10055 
10056     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10057 	SKIP(7);
10058 	SKIP_BLANKS;
10059 	if (RAW != '=') {
10060 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10061 	    return(NULL);
10062         }
10063 	NEXT;
10064 	SKIP_BLANKS;
10065 	if (RAW == '"') {
10066 	    NEXT;
10067 	    version = xmlParseVersionNum(ctxt);
10068 	    if (RAW != '"') {
10069 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10070 	    } else
10071 	        NEXT;
10072 	} else if (RAW == '\''){
10073 	    NEXT;
10074 	    version = xmlParseVersionNum(ctxt);
10075 	    if (RAW != '\'') {
10076 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10077 	    } else
10078 	        NEXT;
10079 	} else {
10080 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10081 	}
10082     }
10083     return(version);
10084 }
10085 
10086 /**
10087  * xmlParseEncName:
10088  * @ctxt:  an XML parser context
10089  *
10090  * DEPRECATED: Internal function, don't use.
10091  *
10092  * parse the XML encoding name
10093  *
10094  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10095  *
10096  * Returns the encoding name value or NULL
10097  */
10098 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10099 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10100     xmlChar *buf = NULL;
10101     int len = 0;
10102     int size = 10;
10103     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10104                     XML_MAX_TEXT_LENGTH :
10105                     XML_MAX_NAME_LENGTH;
10106     xmlChar cur;
10107 
10108     cur = CUR;
10109     if (((cur >= 'a') && (cur <= 'z')) ||
10110         ((cur >= 'A') && (cur <= 'Z'))) {
10111 	buf = (xmlChar *) xmlMallocAtomic(size);
10112 	if (buf == NULL) {
10113 	    xmlErrMemory(ctxt);
10114 	    return(NULL);
10115 	}
10116 
10117 	buf[len++] = cur;
10118 	NEXT;
10119 	cur = CUR;
10120 	while (((cur >= 'a') && (cur <= 'z')) ||
10121 	       ((cur >= 'A') && (cur <= 'Z')) ||
10122 	       ((cur >= '0') && (cur <= '9')) ||
10123 	       (cur == '.') || (cur == '_') ||
10124 	       (cur == '-')) {
10125 	    if (len + 1 >= size) {
10126 	        xmlChar *tmp;
10127 
10128 		size *= 2;
10129 		tmp = (xmlChar *) xmlRealloc(buf, size);
10130 		if (tmp == NULL) {
10131 		    xmlErrMemory(ctxt);
10132 		    xmlFree(buf);
10133 		    return(NULL);
10134 		}
10135 		buf = tmp;
10136 	    }
10137 	    buf[len++] = cur;
10138             if (len > maxLength) {
10139                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10140                 xmlFree(buf);
10141                 return(NULL);
10142             }
10143 	    NEXT;
10144 	    cur = CUR;
10145         }
10146 	buf[len] = 0;
10147     } else {
10148 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10149     }
10150     return(buf);
10151 }
10152 
10153 /**
10154  * xmlParseEncodingDecl:
10155  * @ctxt:  an XML parser context
10156  *
10157  * DEPRECATED: Internal function, don't use.
10158  *
10159  * parse the XML encoding declaration
10160  *
10161  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10162  *
10163  * this setups the conversion filters.
10164  *
10165  * Returns the encoding value or NULL
10166  */
10167 
10168 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10169 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10170     xmlChar *encoding = NULL;
10171 
10172     SKIP_BLANKS;
10173     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10174         return(NULL);
10175 
10176     SKIP(8);
10177     SKIP_BLANKS;
10178     if (RAW != '=') {
10179         xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10180         return(NULL);
10181     }
10182     NEXT;
10183     SKIP_BLANKS;
10184     if (RAW == '"') {
10185         NEXT;
10186         encoding = xmlParseEncName(ctxt);
10187         if (RAW != '"') {
10188             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10189             xmlFree((xmlChar *) encoding);
10190             return(NULL);
10191         } else
10192             NEXT;
10193     } else if (RAW == '\''){
10194         NEXT;
10195         encoding = xmlParseEncName(ctxt);
10196         if (RAW != '\'') {
10197             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198             xmlFree((xmlChar *) encoding);
10199             return(NULL);
10200         } else
10201             NEXT;
10202     } else {
10203         xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10204     }
10205 
10206     if (encoding == NULL)
10207         return(NULL);
10208 
10209     xmlSetDeclaredEncoding(ctxt, encoding);
10210 
10211     return(ctxt->encoding);
10212 }
10213 
10214 /**
10215  * xmlParseSDDecl:
10216  * @ctxt:  an XML parser context
10217  *
10218  * DEPRECATED: Internal function, don't use.
10219  *
10220  * parse the XML standalone declaration
10221  *
10222  * [32] SDDecl ::= S 'standalone' Eq
10223  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10224  *
10225  * [ VC: Standalone Document Declaration ]
10226  * TODO The standalone document declaration must have the value "no"
10227  * if any external markup declarations contain declarations of:
10228  *  - attributes with default values, if elements to which these
10229  *    attributes apply appear in the document without specifications
10230  *    of values for these attributes, or
10231  *  - entities (other than amp, lt, gt, apos, quot), if references
10232  *    to those entities appear in the document, or
10233  *  - attributes with values subject to normalization, where the
10234  *    attribute appears in the document with a value which will change
10235  *    as a result of normalization, or
10236  *  - element types with element content, if white space occurs directly
10237  *    within any instance of those types.
10238  *
10239  * Returns:
10240  *   1 if standalone="yes"
10241  *   0 if standalone="no"
10242  *  -2 if standalone attribute is missing or invalid
10243  *	  (A standalone value of -2 means that the XML declaration was found,
10244  *	   but no value was specified for the standalone attribute).
10245  */
10246 
10247 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10248 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10249     int standalone = -2;
10250 
10251     SKIP_BLANKS;
10252     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10253 	SKIP(10);
10254         SKIP_BLANKS;
10255 	if (RAW != '=') {
10256 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10257 	    return(standalone);
10258         }
10259 	NEXT;
10260 	SKIP_BLANKS;
10261         if (RAW == '\''){
10262 	    NEXT;
10263 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10264 	        standalone = 0;
10265                 SKIP(2);
10266 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10267 	               (NXT(2) == 's')) {
10268 	        standalone = 1;
10269 		SKIP(3);
10270             } else {
10271 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10272 	    }
10273 	    if (RAW != '\'') {
10274 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10275 	    } else
10276 	        NEXT;
10277 	} else if (RAW == '"'){
10278 	    NEXT;
10279 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10280 	        standalone = 0;
10281 		SKIP(2);
10282 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10283 	               (NXT(2) == 's')) {
10284 	        standalone = 1;
10285                 SKIP(3);
10286             } else {
10287 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10288 	    }
10289 	    if (RAW != '"') {
10290 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10291 	    } else
10292 	        NEXT;
10293 	} else {
10294 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10295         }
10296     }
10297     return(standalone);
10298 }
10299 
10300 /**
10301  * xmlParseXMLDecl:
10302  * @ctxt:  an XML parser context
10303  *
10304  * DEPRECATED: Internal function, don't use.
10305  *
10306  * parse an XML declaration header
10307  *
10308  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10309  */
10310 
10311 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10312 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10313     xmlChar *version;
10314 
10315     /*
10316      * This value for standalone indicates that the document has an
10317      * XML declaration but it does not have a standalone attribute.
10318      * It will be overwritten later if a standalone attribute is found.
10319      */
10320 
10321     ctxt->standalone = -2;
10322 
10323     /*
10324      * We know that '<?xml' is here.
10325      */
10326     SKIP(5);
10327 
10328     if (!IS_BLANK_CH(RAW)) {
10329 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10330 	               "Blank needed after '<?xml'\n");
10331     }
10332     SKIP_BLANKS;
10333 
10334     /*
10335      * We must have the VersionInfo here.
10336      */
10337     version = xmlParseVersionInfo(ctxt);
10338     if (version == NULL) {
10339 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10340     } else {
10341 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10342 	    /*
10343 	     * Changed here for XML-1.0 5th edition
10344 	     */
10345 	    if (ctxt->options & XML_PARSE_OLD10) {
10346 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10347 			          "Unsupported version '%s'\n",
10348 			          version);
10349 	    } else {
10350 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10351 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10352 		                  "Unsupported version '%s'\n",
10353 				  version, NULL);
10354 		} else {
10355 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10356 				      "Unsupported version '%s'\n",
10357 				      version);
10358 		}
10359 	    }
10360 	}
10361 	if (ctxt->version != NULL)
10362 	    xmlFree((void *) ctxt->version);
10363 	ctxt->version = version;
10364     }
10365 
10366     /*
10367      * We may have the encoding declaration
10368      */
10369     if (!IS_BLANK_CH(RAW)) {
10370         if ((RAW == '?') && (NXT(1) == '>')) {
10371 	    SKIP(2);
10372 	    return;
10373 	}
10374 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10375     }
10376     xmlParseEncodingDecl(ctxt);
10377 
10378     /*
10379      * We may have the standalone status.
10380      */
10381     if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10382         if ((RAW == '?') && (NXT(1) == '>')) {
10383 	    SKIP(2);
10384 	    return;
10385 	}
10386 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10387     }
10388 
10389     /*
10390      * We can grow the input buffer freely at that point
10391      */
10392     GROW;
10393 
10394     SKIP_BLANKS;
10395     ctxt->standalone = xmlParseSDDecl(ctxt);
10396 
10397     SKIP_BLANKS;
10398     if ((RAW == '?') && (NXT(1) == '>')) {
10399         SKIP(2);
10400     } else if (RAW == '>') {
10401         /* Deprecated old WD ... */
10402 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10403 	NEXT;
10404     } else {
10405         int c;
10406 
10407 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10408         while ((PARSER_STOPPED(ctxt) == 0) &&
10409                ((c = CUR) != 0)) {
10410             NEXT;
10411             if (c == '>')
10412                 break;
10413         }
10414     }
10415 }
10416 
10417 /**
10418  * xmlParseMisc:
10419  * @ctxt:  an XML parser context
10420  *
10421  * DEPRECATED: Internal function, don't use.
10422  *
10423  * parse an XML Misc* optional field.
10424  *
10425  * [27] Misc ::= Comment | PI |  S
10426  */
10427 
10428 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10429 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10430     while (PARSER_STOPPED(ctxt) == 0) {
10431         SKIP_BLANKS;
10432         GROW;
10433         if ((RAW == '<') && (NXT(1) == '?')) {
10434 	    xmlParsePI(ctxt);
10435         } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10436 	    xmlParseComment(ctxt);
10437         } else {
10438             break;
10439         }
10440     }
10441 }
10442 
10443 static void
xmlFinishDocument(xmlParserCtxtPtr ctxt)10444 xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10445     xmlDocPtr doc;
10446 
10447     /*
10448      * SAX: end of the document processing.
10449      */
10450     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10451         ctxt->sax->endDocument(ctxt->userData);
10452 
10453     doc = ctxt->myDoc;
10454     if (doc != NULL) {
10455         if (ctxt->wellFormed) {
10456             doc->properties |= XML_DOC_WELLFORMED;
10457             if (ctxt->valid)
10458                 doc->properties |= XML_DOC_DTDVALID;
10459             if (ctxt->nsWellFormed)
10460                 doc->properties |= XML_DOC_NSVALID;
10461         }
10462 
10463         if (ctxt->options & XML_PARSE_OLD10)
10464             doc->properties |= XML_DOC_OLD10;
10465 
10466         /*
10467          * Remove locally kept entity definitions if the tree was not built
10468          */
10469 	if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10470             xmlFreeDoc(doc);
10471             ctxt->myDoc = NULL;
10472         }
10473     }
10474 }
10475 
10476 /**
10477  * xmlParseDocument:
10478  * @ctxt:  an XML parser context
10479  *
10480  * Parse an XML document and invoke the SAX handlers. This is useful
10481  * if you're only interested in custom SAX callbacks. If you want a
10482  * document tree, use xmlCtxtParseDocument.
10483  *
10484  * Returns 0, -1 in case of error.
10485  */
10486 
10487 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10488 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10489     if ((ctxt == NULL) || (ctxt->input == NULL))
10490         return(-1);
10491 
10492     GROW;
10493 
10494     /*
10495      * SAX: detecting the level.
10496      */
10497     xmlCtxtInitializeLate(ctxt);
10498 
10499     /*
10500      * Document locator is unused. Only for backward compatibility.
10501      */
10502     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10503         xmlSAXLocator copy = xmlDefaultSAXLocator;
10504         ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10505     }
10506 
10507     xmlDetectEncoding(ctxt);
10508 
10509     if (CUR == 0) {
10510 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10511 	return(-1);
10512     }
10513 
10514     GROW;
10515     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10516 
10517 	/*
10518 	 * Note that we will switch encoding on the fly.
10519 	 */
10520 	xmlParseXMLDecl(ctxt);
10521 	SKIP_BLANKS;
10522     } else {
10523 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10524         if (ctxt->version == NULL) {
10525             xmlErrMemory(ctxt);
10526             return(-1);
10527         }
10528     }
10529     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10530         ctxt->sax->startDocument(ctxt->userData);
10531     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10532         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10533 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10534     }
10535 
10536     /*
10537      * The Misc part of the Prolog
10538      */
10539     xmlParseMisc(ctxt);
10540 
10541     /*
10542      * Then possibly doc type declaration(s) and more Misc
10543      * (doctypedecl Misc*)?
10544      */
10545     GROW;
10546     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10547 
10548 	ctxt->inSubset = 1;
10549 	xmlParseDocTypeDecl(ctxt);
10550 	if (RAW == '[') {
10551 	    xmlParseInternalSubset(ctxt);
10552 	}
10553 
10554 	/*
10555 	 * Create and update the external subset.
10556 	 */
10557 	ctxt->inSubset = 2;
10558 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10559 	    (!ctxt->disableSAX))
10560 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10561 	                              ctxt->extSubSystem, ctxt->extSubURI);
10562 	ctxt->inSubset = 0;
10563 
10564         xmlCleanSpecialAttr(ctxt);
10565 
10566 	xmlParseMisc(ctxt);
10567     }
10568 
10569     /*
10570      * Time to start parsing the tree itself
10571      */
10572     GROW;
10573     if (RAW != '<') {
10574         if (ctxt->wellFormed)
10575             xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10576                            "Start tag expected, '<' not found\n");
10577     } else {
10578 	xmlParseElement(ctxt);
10579 
10580 	/*
10581 	 * The Misc part at the end
10582 	 */
10583 	xmlParseMisc(ctxt);
10584 
10585         if (ctxt->input->cur < ctxt->input->end) {
10586             if (ctxt->wellFormed)
10587 	        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10588         } else if ((ctxt->input->buf != NULL) &&
10589                    (ctxt->input->buf->encoder != NULL) &&
10590                    (ctxt->input->buf->error == 0) &&
10591                    (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10592             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10593                            "Truncated multi-byte sequence at EOF\n");
10594         }
10595     }
10596 
10597     ctxt->instate = XML_PARSER_EOF;
10598     xmlFinishDocument(ctxt);
10599 
10600     if (! ctxt->wellFormed) {
10601 	ctxt->valid = 0;
10602 	return(-1);
10603     }
10604 
10605     return(0);
10606 }
10607 
10608 /**
10609  * xmlParseExtParsedEnt:
10610  * @ctxt:  an XML parser context
10611  *
10612  * parse a general parsed entity
10613  * An external general parsed entity is well-formed if it matches the
10614  * production labeled extParsedEnt.
10615  *
10616  * [78] extParsedEnt ::= TextDecl? content
10617  *
10618  * Returns 0, -1 in case of error. the parser context is augmented
10619  *                as a result of the parsing.
10620  */
10621 
10622 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10623 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10624     if ((ctxt == NULL) || (ctxt->input == NULL))
10625         return(-1);
10626 
10627     xmlCtxtInitializeLate(ctxt);
10628 
10629     /*
10630      * Document locator is unused. Only for backward compatibility.
10631      */
10632     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10633         xmlSAXLocator copy = xmlDefaultSAXLocator;
10634         ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10635     }
10636 
10637     xmlDetectEncoding(ctxt);
10638 
10639     if (CUR == 0) {
10640 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10641     }
10642 
10643     /*
10644      * Check for the XMLDecl in the Prolog.
10645      */
10646     GROW;
10647     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10648 
10649 	/*
10650 	 * Note that we will switch encoding on the fly.
10651 	 */
10652 	xmlParseXMLDecl(ctxt);
10653 	SKIP_BLANKS;
10654     } else {
10655 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10656     }
10657     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10658         ctxt->sax->startDocument(ctxt->userData);
10659 
10660     /*
10661      * Doing validity checking on chunk doesn't make sense
10662      */
10663     ctxt->options &= ~XML_PARSE_DTDVALID;
10664     ctxt->validate = 0;
10665     ctxt->depth = 0;
10666 
10667     xmlParseContentInternal(ctxt);
10668 
10669     if (ctxt->input->cur < ctxt->input->end)
10670 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10671 
10672     /*
10673      * SAX: end of the document processing.
10674      */
10675     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10676         ctxt->sax->endDocument(ctxt->userData);
10677 
10678     if (! ctxt->wellFormed) return(-1);
10679     return(0);
10680 }
10681 
10682 #ifdef LIBXML_PUSH_ENABLED
10683 /************************************************************************
10684  *									*
10685  *		Progressive parsing interfaces				*
10686  *									*
10687  ************************************************************************/
10688 
10689 /**
10690  * xmlParseLookupChar:
10691  * @ctxt:  an XML parser context
10692  * @c:  character
10693  *
10694  * Check whether the input buffer contains a character.
10695  */
10696 static int
xmlParseLookupChar(xmlParserCtxtPtr ctxt,int c)10697 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10698     const xmlChar *cur;
10699 
10700     if (ctxt->checkIndex == 0) {
10701         cur = ctxt->input->cur + 1;
10702     } else {
10703         cur = ctxt->input->cur + ctxt->checkIndex;
10704     }
10705 
10706     if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10707         size_t index = ctxt->input->end - ctxt->input->cur;
10708 
10709         if (index > LONG_MAX) {
10710             ctxt->checkIndex = 0;
10711             return(1);
10712         }
10713         ctxt->checkIndex = index;
10714         return(0);
10715     } else {
10716         ctxt->checkIndex = 0;
10717         return(1);
10718     }
10719 }
10720 
10721 /**
10722  * xmlParseLookupString:
10723  * @ctxt:  an XML parser context
10724  * @startDelta: delta to apply at the start
10725  * @str:  string
10726  * @strLen:  length of string
10727  *
10728  * Check whether the input buffer contains a string.
10729  */
10730 static const xmlChar *
xmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen)10731 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10732                      const char *str, size_t strLen) {
10733     const xmlChar *cur, *term;
10734 
10735     if (ctxt->checkIndex == 0) {
10736         cur = ctxt->input->cur + startDelta;
10737     } else {
10738         cur = ctxt->input->cur + ctxt->checkIndex;
10739     }
10740 
10741     term = BAD_CAST strstr((const char *) cur, str);
10742     if (term == NULL) {
10743         const xmlChar *end = ctxt->input->end;
10744         size_t index;
10745 
10746         /* Rescan (strLen - 1) characters. */
10747         if ((size_t) (end - cur) < strLen)
10748             end = cur;
10749         else
10750             end -= strLen - 1;
10751         index = end - ctxt->input->cur;
10752         if (index > LONG_MAX) {
10753             ctxt->checkIndex = 0;
10754             return(ctxt->input->end - strLen);
10755         }
10756         ctxt->checkIndex = index;
10757     } else {
10758         ctxt->checkIndex = 0;
10759     }
10760 
10761     return(term);
10762 }
10763 
10764 /**
10765  * xmlParseLookupCharData:
10766  * @ctxt:  an XML parser context
10767  *
10768  * Check whether the input buffer contains terminated char data.
10769  */
10770 static int
xmlParseLookupCharData(xmlParserCtxtPtr ctxt)10771 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10772     const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10773     const xmlChar *end = ctxt->input->end;
10774     size_t index;
10775 
10776     while (cur < end) {
10777         if ((*cur == '<') || (*cur == '&')) {
10778             ctxt->checkIndex = 0;
10779             return(1);
10780         }
10781         cur++;
10782     }
10783 
10784     index = cur - ctxt->input->cur;
10785     if (index > LONG_MAX) {
10786         ctxt->checkIndex = 0;
10787         return(1);
10788     }
10789     ctxt->checkIndex = index;
10790     return(0);
10791 }
10792 
10793 /**
10794  * xmlParseLookupGt:
10795  * @ctxt:  an XML parser context
10796  *
10797  * Check whether there's enough data in the input buffer to finish parsing
10798  * a start tag. This has to take quotes into account.
10799  */
10800 static int
xmlParseLookupGt(xmlParserCtxtPtr ctxt)10801 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10802     const xmlChar *cur;
10803     const xmlChar *end = ctxt->input->end;
10804     int state = ctxt->endCheckState;
10805     size_t index;
10806 
10807     if (ctxt->checkIndex == 0)
10808         cur = ctxt->input->cur + 1;
10809     else
10810         cur = ctxt->input->cur + ctxt->checkIndex;
10811 
10812     while (cur < end) {
10813         if (state) {
10814             if (*cur == state)
10815                 state = 0;
10816         } else if (*cur == '\'' || *cur == '"') {
10817             state = *cur;
10818         } else if (*cur == '>') {
10819             ctxt->checkIndex = 0;
10820             ctxt->endCheckState = 0;
10821             return(1);
10822         }
10823         cur++;
10824     }
10825 
10826     index = cur - ctxt->input->cur;
10827     if (index > LONG_MAX) {
10828         ctxt->checkIndex = 0;
10829         ctxt->endCheckState = 0;
10830         return(1);
10831     }
10832     ctxt->checkIndex = index;
10833     ctxt->endCheckState = state;
10834     return(0);
10835 }
10836 
10837 /**
10838  * xmlParseLookupInternalSubset:
10839  * @ctxt:  an XML parser context
10840  *
10841  * Check whether there's enough data in the input buffer to finish parsing
10842  * the internal subset.
10843  */
10844 static int
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt)10845 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10846     /*
10847      * Sorry, but progressive parsing of the internal subset is not
10848      * supported. We first check that the full content of the internal
10849      * subset is available and parsing is launched only at that point.
10850      * Internal subset ends with "']' S? '>'" in an unescaped section and
10851      * not in a ']]>' sequence which are conditional sections.
10852      */
10853     const xmlChar *cur, *start;
10854     const xmlChar *end = ctxt->input->end;
10855     int state = ctxt->endCheckState;
10856     size_t index;
10857 
10858     if (ctxt->checkIndex == 0) {
10859         cur = ctxt->input->cur + 1;
10860     } else {
10861         cur = ctxt->input->cur + ctxt->checkIndex;
10862     }
10863     start = cur;
10864 
10865     while (cur < end) {
10866         if (state == '-') {
10867             if ((*cur == '-') &&
10868                 (cur[1] == '-') &&
10869                 (cur[2] == '>')) {
10870                 state = 0;
10871                 cur += 3;
10872                 start = cur;
10873                 continue;
10874             }
10875         }
10876         else if (state == ']') {
10877             if (*cur == '>') {
10878                 ctxt->checkIndex = 0;
10879                 ctxt->endCheckState = 0;
10880                 return(1);
10881             }
10882             if (IS_BLANK_CH(*cur)) {
10883                 state = ' ';
10884             } else if (*cur != ']') {
10885                 state = 0;
10886                 start = cur;
10887                 continue;
10888             }
10889         }
10890         else if (state == ' ') {
10891             if (*cur == '>') {
10892                 ctxt->checkIndex = 0;
10893                 ctxt->endCheckState = 0;
10894                 return(1);
10895             }
10896             if (!IS_BLANK_CH(*cur)) {
10897                 state = 0;
10898                 start = cur;
10899                 continue;
10900             }
10901         }
10902         else if (state != 0) {
10903             if (*cur == state) {
10904                 state = 0;
10905                 start = cur + 1;
10906             }
10907         }
10908         else if (*cur == '<') {
10909             if ((cur[1] == '!') &&
10910                 (cur[2] == '-') &&
10911                 (cur[3] == '-')) {
10912                 state = '-';
10913                 cur += 4;
10914                 /* Don't treat <!--> as comment */
10915                 start = cur;
10916                 continue;
10917             }
10918         }
10919         else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10920             state = *cur;
10921         }
10922 
10923         cur++;
10924     }
10925 
10926     /*
10927      * Rescan the three last characters to detect "<!--" and "-->"
10928      * split across chunks.
10929      */
10930     if ((state == 0) || (state == '-')) {
10931         if (cur - start < 3)
10932             cur = start;
10933         else
10934             cur -= 3;
10935     }
10936     index = cur - ctxt->input->cur;
10937     if (index > LONG_MAX) {
10938         ctxt->checkIndex = 0;
10939         ctxt->endCheckState = 0;
10940         return(1);
10941     }
10942     ctxt->checkIndex = index;
10943     ctxt->endCheckState = state;
10944     return(0);
10945 }
10946 
10947 /**
10948  * xmlCheckCdataPush:
10949  * @cur: pointer to the block of characters
10950  * @len: length of the block in bytes
10951  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10952  *
10953  * Check that the block of characters is okay as SCdata content [20]
10954  *
10955  * Returns the number of bytes to pass if okay, a negative index where an
10956  *         UTF-8 error occurred otherwise
10957  */
10958 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)10959 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10960     int ix;
10961     unsigned char c;
10962     int codepoint;
10963 
10964     if ((utf == NULL) || (len <= 0))
10965         return(0);
10966 
10967     for (ix = 0; ix < len;) {      /* string is 0-terminated */
10968         c = utf[ix];
10969         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
10970 	    if (c >= 0x20)
10971 		ix++;
10972 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10973 	        ix++;
10974 	    else
10975 	        return(-ix);
10976 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10977 	    if (ix + 2 > len) return(complete ? -ix : ix);
10978 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
10979 	        return(-ix);
10980 	    codepoint = (utf[ix] & 0x1f) << 6;
10981 	    codepoint |= utf[ix+1] & 0x3f;
10982 	    if (!xmlIsCharQ(codepoint))
10983 	        return(-ix);
10984 	    ix += 2;
10985 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10986 	    if (ix + 3 > len) return(complete ? -ix : ix);
10987 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10988 	        ((utf[ix+2] & 0xc0) != 0x80))
10989 		    return(-ix);
10990 	    codepoint = (utf[ix] & 0xf) << 12;
10991 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
10992 	    codepoint |= utf[ix+2] & 0x3f;
10993 	    if (!xmlIsCharQ(codepoint))
10994 	        return(-ix);
10995 	    ix += 3;
10996 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10997 	    if (ix + 4 > len) return(complete ? -ix : ix);
10998 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10999 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11000 		((utf[ix+3] & 0xc0) != 0x80))
11001 		    return(-ix);
11002 	    codepoint = (utf[ix] & 0x7) << 18;
11003 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11004 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11005 	    codepoint |= utf[ix+3] & 0x3f;
11006 	    if (!xmlIsCharQ(codepoint))
11007 	        return(-ix);
11008 	    ix += 4;
11009 	} else				/* unknown encoding */
11010 	    return(-ix);
11011       }
11012       return(ix);
11013 }
11014 
11015 /**
11016  * xmlParseTryOrFinish:
11017  * @ctxt:  an XML parser context
11018  * @terminate:  last chunk indicator
11019  *
11020  * Try to progress on parsing
11021  *
11022  * Returns zero if no parsing was possible
11023  */
11024 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11025 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11026     int ret = 0;
11027     size_t avail;
11028     xmlChar cur, next;
11029 
11030     if (ctxt->input == NULL)
11031         return(0);
11032 
11033     if ((ctxt->input != NULL) &&
11034         (ctxt->input->cur - ctxt->input->base > 4096)) {
11035         xmlParserShrink(ctxt);
11036     }
11037 
11038     while (ctxt->disableSAX == 0) {
11039         avail = ctxt->input->end - ctxt->input->cur;
11040         if (avail < 1)
11041 	    goto done;
11042         switch (ctxt->instate) {
11043             case XML_PARSER_EOF:
11044 	        /*
11045 		 * Document parsing is done !
11046 		 */
11047 	        goto done;
11048             case XML_PARSER_START:
11049                 /*
11050                  * Very first chars read from the document flow.
11051                  */
11052                 if ((!terminate) && (avail < 4))
11053                     goto done;
11054 
11055                 /*
11056                  * We need more bytes to detect EBCDIC code pages.
11057                  * See xmlDetectEBCDIC.
11058                  */
11059                 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11060                     (!terminate) && (avail < 200))
11061                     goto done;
11062 
11063                 xmlDetectEncoding(ctxt);
11064                 ctxt->instate = XML_PARSER_XML_DECL;
11065 		break;
11066 
11067             case XML_PARSER_XML_DECL:
11068 		if ((!terminate) && (avail < 2))
11069 		    goto done;
11070 		cur = ctxt->input->cur[0];
11071 		next = ctxt->input->cur[1];
11072 	        if ((cur == '<') && (next == '?')) {
11073 		    /* PI or XML decl */
11074 		    if ((!terminate) &&
11075                         (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11076 			goto done;
11077 		    if ((ctxt->input->cur[2] == 'x') &&
11078 			(ctxt->input->cur[3] == 'm') &&
11079 			(ctxt->input->cur[4] == 'l') &&
11080 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11081 			ret += 5;
11082 			xmlParseXMLDecl(ctxt);
11083 		    } else {
11084 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11085                         if (ctxt->version == NULL) {
11086                             xmlErrMemory(ctxt);
11087                             break;
11088                         }
11089 		    }
11090 		} else {
11091 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11092 		    if (ctxt->version == NULL) {
11093 		        xmlErrMemory(ctxt);
11094 			break;
11095 		    }
11096 		}
11097                 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11098                     xmlSAXLocator copy = xmlDefaultSAXLocator;
11099                     ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
11100                 }
11101                 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11102                     (!ctxt->disableSAX))
11103                     ctxt->sax->startDocument(ctxt->userData);
11104                 ctxt->instate = XML_PARSER_MISC;
11105 		break;
11106             case XML_PARSER_START_TAG: {
11107 	        const xmlChar *name;
11108 		const xmlChar *prefix = NULL;
11109 		const xmlChar *URI = NULL;
11110                 int line = ctxt->input->line;
11111 		int nbNs = 0;
11112 
11113 		if ((!terminate) && (avail < 2))
11114 		    goto done;
11115 		cur = ctxt->input->cur[0];
11116 	        if (cur != '<') {
11117 		    xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11118                                    "Start tag expected, '<' not found");
11119                     ctxt->instate = XML_PARSER_EOF;
11120                     xmlFinishDocument(ctxt);
11121 		    goto done;
11122 		}
11123 		if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11124                     goto done;
11125 		if (ctxt->spaceNr == 0)
11126 		    spacePush(ctxt, -1);
11127 		else if (*ctxt->space == -2)
11128 		    spacePush(ctxt, -1);
11129 		else
11130 		    spacePush(ctxt, *ctxt->space);
11131 #ifdef LIBXML_SAX1_ENABLED
11132 		if (ctxt->sax2)
11133 #endif /* LIBXML_SAX1_ENABLED */
11134 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11135 #ifdef LIBXML_SAX1_ENABLED
11136 		else
11137 		    name = xmlParseStartTag(ctxt);
11138 #endif /* LIBXML_SAX1_ENABLED */
11139 		if (name == NULL) {
11140 		    spacePop(ctxt);
11141                     ctxt->instate = XML_PARSER_EOF;
11142                     xmlFinishDocument(ctxt);
11143 		    goto done;
11144 		}
11145 #ifdef LIBXML_VALID_ENABLED
11146 		/*
11147 		 * [ VC: Root Element Type ]
11148 		 * The Name in the document type declaration must match
11149 		 * the element type of the root element.
11150 		 */
11151 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11152 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11153 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11154 #endif /* LIBXML_VALID_ENABLED */
11155 
11156 		/*
11157 		 * Check for an Empty Element.
11158 		 */
11159 		if ((RAW == '/') && (NXT(1) == '>')) {
11160 		    SKIP(2);
11161 
11162 		    if (ctxt->sax2) {
11163 			if ((ctxt->sax != NULL) &&
11164 			    (ctxt->sax->endElementNs != NULL) &&
11165 			    (!ctxt->disableSAX))
11166 			    ctxt->sax->endElementNs(ctxt->userData, name,
11167 			                            prefix, URI);
11168 			if (nbNs > 0)
11169 			    xmlParserNsPop(ctxt, nbNs);
11170 #ifdef LIBXML_SAX1_ENABLED
11171 		    } else {
11172 			if ((ctxt->sax != NULL) &&
11173 			    (ctxt->sax->endElement != NULL) &&
11174 			    (!ctxt->disableSAX))
11175 			    ctxt->sax->endElement(ctxt->userData, name);
11176 #endif /* LIBXML_SAX1_ENABLED */
11177 		    }
11178 		    spacePop(ctxt);
11179 		} else if (RAW == '>') {
11180 		    NEXT;
11181                     nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11182 		} else {
11183 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11184 					 "Couldn't find end of Start Tag %s\n",
11185 					 name);
11186 		    nodePop(ctxt);
11187 		    spacePop(ctxt);
11188                     if (nbNs > 0)
11189                         xmlParserNsPop(ctxt, nbNs);
11190 		}
11191 
11192                 if (ctxt->nameNr == 0)
11193                     ctxt->instate = XML_PARSER_EPILOG;
11194                 else
11195                     ctxt->instate = XML_PARSER_CONTENT;
11196                 break;
11197 	    }
11198             case XML_PARSER_CONTENT: {
11199 		cur = ctxt->input->cur[0];
11200 
11201 		if (cur == '<') {
11202                     if ((!terminate) && (avail < 2))
11203                         goto done;
11204 		    next = ctxt->input->cur[1];
11205 
11206                     if (next == '/') {
11207                         ctxt->instate = XML_PARSER_END_TAG;
11208                         break;
11209                     } else if (next == '?') {
11210                         if ((!terminate) &&
11211                             (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11212                             goto done;
11213                         xmlParsePI(ctxt);
11214                         ctxt->instate = XML_PARSER_CONTENT;
11215                         break;
11216                     } else if (next == '!') {
11217                         if ((!terminate) && (avail < 3))
11218                             goto done;
11219                         next = ctxt->input->cur[2];
11220 
11221                         if (next == '-') {
11222                             if ((!terminate) && (avail < 4))
11223                                 goto done;
11224                             if (ctxt->input->cur[3] == '-') {
11225                                 if ((!terminate) &&
11226                                     (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11227                                     goto done;
11228                                 xmlParseComment(ctxt);
11229                                 ctxt->instate = XML_PARSER_CONTENT;
11230                                 break;
11231                             }
11232                         } else if (next == '[') {
11233                             if ((!terminate) && (avail < 9))
11234                                 goto done;
11235                             if ((ctxt->input->cur[2] == '[') &&
11236                                 (ctxt->input->cur[3] == 'C') &&
11237                                 (ctxt->input->cur[4] == 'D') &&
11238                                 (ctxt->input->cur[5] == 'A') &&
11239                                 (ctxt->input->cur[6] == 'T') &&
11240                                 (ctxt->input->cur[7] == 'A') &&
11241                                 (ctxt->input->cur[8] == '[')) {
11242                                 SKIP(9);
11243                                 ctxt->instate = XML_PARSER_CDATA_SECTION;
11244                                 break;
11245                             }
11246                         }
11247                     }
11248 		} else if (cur == '&') {
11249 		    if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11250 			goto done;
11251 		    xmlParseReference(ctxt);
11252                     break;
11253 		} else {
11254 		    /* TODO Avoid the extra copy, handle directly !!! */
11255 		    /*
11256 		     * Goal of the following test is:
11257 		     *  - minimize calls to the SAX 'character' callback
11258 		     *    when they are mergeable
11259 		     *  - handle an problem for isBlank when we only parse
11260 		     *    a sequence of blank chars and the next one is
11261 		     *    not available to check against '<' presence.
11262 		     *  - tries to homogenize the differences in SAX
11263 		     *    callbacks between the push and pull versions
11264 		     *    of the parser.
11265 		     */
11266 		    if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11267 			if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11268 			    goto done;
11269                     }
11270                     ctxt->checkIndex = 0;
11271 		    xmlParseCharDataInternal(ctxt, !terminate);
11272                     break;
11273 		}
11274 
11275                 ctxt->instate = XML_PARSER_START_TAG;
11276 		break;
11277 	    }
11278             case XML_PARSER_END_TAG:
11279 		if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11280 		    goto done;
11281 		if (ctxt->sax2) {
11282 	            xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11283 		    nameNsPop(ctxt);
11284 		}
11285 #ifdef LIBXML_SAX1_ENABLED
11286 		  else
11287 		    xmlParseEndTag1(ctxt, 0);
11288 #endif /* LIBXML_SAX1_ENABLED */
11289 		if (ctxt->nameNr == 0) {
11290 		    ctxt->instate = XML_PARSER_EPILOG;
11291 		} else {
11292 		    ctxt->instate = XML_PARSER_CONTENT;
11293 		}
11294 		break;
11295             case XML_PARSER_CDATA_SECTION: {
11296 	        /*
11297 		 * The Push mode need to have the SAX callback for
11298 		 * cdataBlock merge back contiguous callbacks.
11299 		 */
11300 		const xmlChar *term;
11301 
11302                 if (terminate) {
11303                     /*
11304                      * Don't call xmlParseLookupString. If 'terminate'
11305                      * is set, checkIndex is invalid.
11306                      */
11307                     term = BAD_CAST strstr((const char *) ctxt->input->cur,
11308                                            "]]>");
11309                 } else {
11310 		    term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11311                 }
11312 
11313 		if (term == NULL) {
11314 		    int tmp, size;
11315 
11316                     if (terminate) {
11317                         /* Unfinished CDATA section */
11318                         size = ctxt->input->end - ctxt->input->cur;
11319                     } else {
11320                         if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11321                             goto done;
11322                         ctxt->checkIndex = 0;
11323                         /* XXX: Why don't we pass the full buffer? */
11324                         size = XML_PARSER_BIG_BUFFER_SIZE;
11325                     }
11326                     tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11327                     if (tmp <= 0) {
11328                         tmp = -tmp;
11329                         ctxt->input->cur += tmp;
11330                         goto encoding_error;
11331                     }
11332                     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11333                         if (ctxt->sax->cdataBlock != NULL)
11334                             ctxt->sax->cdataBlock(ctxt->userData,
11335                                                   ctxt->input->cur, tmp);
11336                         else if (ctxt->sax->characters != NULL)
11337                             ctxt->sax->characters(ctxt->userData,
11338                                                   ctxt->input->cur, tmp);
11339                     }
11340                     SKIPL(tmp);
11341 		} else {
11342                     int base = term - CUR_PTR;
11343 		    int tmp;
11344 
11345 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11346 		    if ((tmp < 0) || (tmp != base)) {
11347 			tmp = -tmp;
11348 			ctxt->input->cur += tmp;
11349 			goto encoding_error;
11350 		    }
11351 		    if ((ctxt->sax != NULL) && (base == 0) &&
11352 		        (ctxt->sax->cdataBlock != NULL) &&
11353 		        (!ctxt->disableSAX)) {
11354 			/*
11355 			 * Special case to provide identical behaviour
11356 			 * between pull and push parsers on enpty CDATA
11357 			 * sections
11358 			 */
11359 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11360 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11361 			               "<![CDATA[", 9)))
11362 			     ctxt->sax->cdataBlock(ctxt->userData,
11363 			                           BAD_CAST "", 0);
11364 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11365 			(!ctxt->disableSAX)) {
11366 			if (ctxt->sax->cdataBlock != NULL)
11367 			    ctxt->sax->cdataBlock(ctxt->userData,
11368 						  ctxt->input->cur, base);
11369 			else if (ctxt->sax->characters != NULL)
11370 			    ctxt->sax->characters(ctxt->userData,
11371 						  ctxt->input->cur, base);
11372 		    }
11373 		    SKIPL(base + 3);
11374 		    ctxt->instate = XML_PARSER_CONTENT;
11375 		}
11376 		break;
11377 	    }
11378             case XML_PARSER_MISC:
11379             case XML_PARSER_PROLOG:
11380             case XML_PARSER_EPILOG:
11381 		SKIP_BLANKS;
11382                 avail = ctxt->input->end - ctxt->input->cur;
11383 		if (avail < 1)
11384 		    goto done;
11385 		if (ctxt->input->cur[0] == '<') {
11386                     if ((!terminate) && (avail < 2))
11387                         goto done;
11388                     next = ctxt->input->cur[1];
11389                     if (next == '?') {
11390                         if ((!terminate) &&
11391                             (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11392                             goto done;
11393                         xmlParsePI(ctxt);
11394                         break;
11395                     } else if (next == '!') {
11396                         if ((!terminate) && (avail < 3))
11397                             goto done;
11398 
11399                         if (ctxt->input->cur[2] == '-') {
11400                             if ((!terminate) && (avail < 4))
11401                                 goto done;
11402                             if (ctxt->input->cur[3] == '-') {
11403                                 if ((!terminate) &&
11404                                     (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11405                                     goto done;
11406                                 xmlParseComment(ctxt);
11407                                 break;
11408                             }
11409                         } else if (ctxt->instate == XML_PARSER_MISC) {
11410                             if ((!terminate) && (avail < 9))
11411                                 goto done;
11412                             if ((ctxt->input->cur[2] == 'D') &&
11413                                 (ctxt->input->cur[3] == 'O') &&
11414                                 (ctxt->input->cur[4] == 'C') &&
11415                                 (ctxt->input->cur[5] == 'T') &&
11416                                 (ctxt->input->cur[6] == 'Y') &&
11417                                 (ctxt->input->cur[7] == 'P') &&
11418                                 (ctxt->input->cur[8] == 'E')) {
11419                                 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11420                                     goto done;
11421                                 ctxt->inSubset = 1;
11422                                 xmlParseDocTypeDecl(ctxt);
11423                                 if (RAW == '[') {
11424                                     ctxt->instate = XML_PARSER_DTD;
11425                                 } else {
11426                                     /*
11427                                      * Create and update the external subset.
11428                                      */
11429                                     ctxt->inSubset = 2;
11430                                     if ((ctxt->sax != NULL) &&
11431                                         (!ctxt->disableSAX) &&
11432                                         (ctxt->sax->externalSubset != NULL))
11433                                         ctxt->sax->externalSubset(
11434                                                 ctxt->userData,
11435                                                 ctxt->intSubName,
11436                                                 ctxt->extSubSystem,
11437                                                 ctxt->extSubURI);
11438                                     ctxt->inSubset = 0;
11439                                     xmlCleanSpecialAttr(ctxt);
11440                                     ctxt->instate = XML_PARSER_PROLOG;
11441                                 }
11442                                 break;
11443                             }
11444                         }
11445                     }
11446                 }
11447 
11448                 if (ctxt->instate == XML_PARSER_EPILOG) {
11449                     if (ctxt->errNo == XML_ERR_OK)
11450                         xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11451 		    ctxt->instate = XML_PARSER_EOF;
11452                     xmlFinishDocument(ctxt);
11453                 } else {
11454 		    ctxt->instate = XML_PARSER_START_TAG;
11455 		}
11456 		break;
11457             case XML_PARSER_DTD: {
11458                 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11459                     goto done;
11460 		xmlParseInternalSubset(ctxt);
11461 		ctxt->inSubset = 2;
11462 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11463 		    (ctxt->sax->externalSubset != NULL))
11464 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11465 			    ctxt->extSubSystem, ctxt->extSubURI);
11466 		ctxt->inSubset = 0;
11467 		xmlCleanSpecialAttr(ctxt);
11468 		ctxt->instate = XML_PARSER_PROLOG;
11469                 break;
11470 	    }
11471             default:
11472                 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11473 			"PP: internal error\n");
11474 		ctxt->instate = XML_PARSER_EOF;
11475 		break;
11476 	}
11477     }
11478 done:
11479     return(ret);
11480 encoding_error:
11481     /* Only report the first error */
11482     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11483         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11484         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11485     }
11486     return(0);
11487 }
11488 
11489 /**
11490  * xmlParseChunk:
11491  * @ctxt:  an XML parser context
11492  * @chunk:  chunk of memory
11493  * @size:  size of chunk in bytes
11494  * @terminate:  last chunk indicator
11495  *
11496  * Parse a chunk of memory in push parser mode.
11497  *
11498  * Assumes that the parser context was initialized with
11499  * xmlCreatePushParserCtxt.
11500  *
11501  * The last chunk, which will often be empty, must be marked with
11502  * the @terminate flag. With the default SAX callbacks, the resulting
11503  * document will be available in ctxt->myDoc. This pointer will not
11504  * be freed by the library.
11505  *
11506  * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11507  * The push parser doesn't support recovery mode.
11508  *
11509  * Returns an xmlParserErrors code (0 on success).
11510  */
11511 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11512 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11513               int terminate) {
11514     size_t curBase;
11515     size_t maxLength;
11516     int end_in_lf = 0;
11517 
11518     if ((ctxt == NULL) || (size < 0))
11519         return(XML_ERR_ARGUMENT);
11520     if (ctxt->disableSAX != 0)
11521         return(ctxt->errNo);
11522     if (ctxt->input == NULL)
11523         return(XML_ERR_INTERNAL_ERROR);
11524 
11525     ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11526     if (ctxt->instate == XML_PARSER_START)
11527         xmlCtxtInitializeLate(ctxt);
11528     if ((size > 0) && (chunk != NULL) && (!terminate) &&
11529         (chunk[size - 1] == '\r')) {
11530 	end_in_lf = 1;
11531 	size--;
11532     }
11533 
11534     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11535         (ctxt->input->buf != NULL))  {
11536 	size_t pos = ctxt->input->cur - ctxt->input->base;
11537 	int res;
11538 
11539 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11540         xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11541 	if (res < 0) {
11542             xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11543 	    xmlHaltParser(ctxt);
11544 	    return(ctxt->errNo);
11545 	}
11546     }
11547 
11548     xmlParseTryOrFinish(ctxt, terminate);
11549 
11550     curBase = ctxt->input->cur - ctxt->input->base;
11551     maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11552                 XML_MAX_HUGE_LENGTH :
11553                 XML_MAX_LOOKUP_LIMIT;
11554     if (curBase > maxLength) {
11555         xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11556                     "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11557         xmlHaltParser(ctxt);
11558     }
11559 
11560     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11561         return(ctxt->errNo);
11562 
11563     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11564         (ctxt->input->buf != NULL)) {
11565 	size_t pos = ctxt->input->cur - ctxt->input->base;
11566         int res;
11567 
11568 	res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11569 	xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11570         if (res < 0) {
11571             xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11572             xmlHaltParser(ctxt);
11573             return(ctxt->errNo);
11574         }
11575     }
11576     if (terminate) {
11577 	/*
11578 	 * Check for termination
11579 	 */
11580         if ((ctxt->instate != XML_PARSER_EOF) &&
11581             (ctxt->instate != XML_PARSER_EPILOG)) {
11582             if (ctxt->nameNr > 0) {
11583                 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11584                 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11585                 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11586                         "Premature end of data in tag %s line %d\n",
11587                         name, line, NULL);
11588             } else if (ctxt->instate == XML_PARSER_START) {
11589                 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11590             } else {
11591                 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11592                                "Start tag expected, '<' not found\n");
11593             }
11594         } else if ((ctxt->input->buf != NULL) &&
11595                    (ctxt->input->buf->encoder != NULL) &&
11596                    (ctxt->input->buf->error == 0) &&
11597                    (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11598             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11599                            "Truncated multi-byte sequence at EOF\n");
11600         }
11601 	if (ctxt->instate != XML_PARSER_EOF) {
11602             ctxt->instate = XML_PARSER_EOF;
11603             xmlFinishDocument(ctxt);
11604 	}
11605     }
11606     if (ctxt->wellFormed == 0)
11607 	return((xmlParserErrors) ctxt->errNo);
11608     else
11609         return(0);
11610 }
11611 
11612 /************************************************************************
11613  *									*
11614  *		I/O front end functions to the parser			*
11615  *									*
11616  ************************************************************************/
11617 
11618 /**
11619  * xmlCreatePushParserCtxt:
11620  * @sax:  a SAX handler (optional)
11621  * @user_data:  user data for SAX callbacks (optional)
11622  * @chunk:  initial chunk (optional, deprecated)
11623  * @size:  size of initial chunk in bytes
11624  * @filename:  file name or URI (optional)
11625  *
11626  * Create a parser context for using the XML parser in push mode.
11627  * See xmlParseChunk.
11628  *
11629  * Passing an initial chunk is useless and deprecated.
11630  *
11631  * @filename is used as base URI to fetch external entities and for
11632  * error reports.
11633  *
11634  * Returns the new parser context or NULL if a memory allocation
11635  * failed.
11636  */
11637 
11638 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11639 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11640                         const char *chunk, int size, const char *filename) {
11641     xmlParserCtxtPtr ctxt;
11642     xmlParserInputPtr input;
11643 
11644     ctxt = xmlNewSAXParserCtxt(sax, user_data);
11645     if (ctxt == NULL)
11646 	return(NULL);
11647 
11648     ctxt->options &= ~XML_PARSE_NODICT;
11649     ctxt->dictNames = 1;
11650 
11651     input = xmlInputCreatePush(filename, chunk, size);
11652     if (input == NULL) {
11653 	xmlFreeParserCtxt(ctxt);
11654 	return(NULL);
11655     }
11656     inputPush(ctxt, input);
11657 
11658     return(ctxt);
11659 }
11660 #endif /* LIBXML_PUSH_ENABLED */
11661 
11662 /**
11663  * xmlStopParser:
11664  * @ctxt:  an XML parser context
11665  *
11666  * Blocks further parser processing
11667  */
11668 void
xmlStopParser(xmlParserCtxtPtr ctxt)11669 xmlStopParser(xmlParserCtxtPtr ctxt) {
11670     if (ctxt == NULL)
11671         return;
11672     xmlHaltParser(ctxt);
11673     if (ctxt->errNo != XML_ERR_NO_MEMORY)
11674         ctxt->errNo = XML_ERR_USER_STOP;
11675 }
11676 
11677 /**
11678  * xmlCreateIOParserCtxt:
11679  * @sax:  a SAX handler (optional)
11680  * @user_data:  user data for SAX callbacks (optional)
11681  * @ioread:  an I/O read function
11682  * @ioclose:  an I/O close function (optional)
11683  * @ioctx:  an I/O handler
11684  * @enc:  the charset encoding if known (deprecated)
11685  *
11686  * Create a parser context for using the XML parser with an existing
11687  * I/O stream
11688  *
11689  * Returns the new parser context or NULL
11690  */
11691 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11692 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11693                       xmlInputReadCallback ioread,
11694                       xmlInputCloseCallback ioclose,
11695                       void *ioctx, xmlCharEncoding enc) {
11696     xmlParserCtxtPtr ctxt;
11697     xmlParserInputPtr input;
11698     const char *encoding;
11699 
11700     ctxt = xmlNewSAXParserCtxt(sax, user_data);
11701     if (ctxt == NULL)
11702 	return(NULL);
11703 
11704     encoding = xmlGetCharEncodingName(enc);
11705     input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11706     if (input == NULL) {
11707 	xmlFreeParserCtxt(ctxt);
11708         return (NULL);
11709     }
11710     inputPush(ctxt, input);
11711 
11712     return(ctxt);
11713 }
11714 
11715 #ifdef LIBXML_VALID_ENABLED
11716 /************************************************************************
11717  *									*
11718  *		Front ends when parsing a DTD				*
11719  *									*
11720  ************************************************************************/
11721 
11722 /**
11723  * xmlIOParseDTD:
11724  * @sax:  the SAX handler block or NULL
11725  * @input:  an Input Buffer
11726  * @enc:  the charset encoding if known
11727  *
11728  * Load and parse a DTD
11729  *
11730  * Returns the resulting xmlDtdPtr or NULL in case of error.
11731  * @input will be freed by the function in any case.
11732  */
11733 
11734 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)11735 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11736 	      xmlCharEncoding enc) {
11737     xmlDtdPtr ret = NULL;
11738     xmlParserCtxtPtr ctxt;
11739     xmlParserInputPtr pinput = NULL;
11740 
11741     if (input == NULL)
11742 	return(NULL);
11743 
11744     ctxt = xmlNewSAXParserCtxt(sax, NULL);
11745     if (ctxt == NULL) {
11746         xmlFreeParserInputBuffer(input);
11747 	return(NULL);
11748     }
11749 
11750     /*
11751      * generate a parser input from the I/O handler
11752      */
11753 
11754     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11755     if (pinput == NULL) {
11756         xmlFreeParserInputBuffer(input);
11757 	xmlFreeParserCtxt(ctxt);
11758 	return(NULL);
11759     }
11760 
11761     /*
11762      * plug some encoding conversion routines here.
11763      */
11764     if (xmlPushInput(ctxt, pinput) < 0) {
11765 	xmlFreeParserCtxt(ctxt);
11766 	return(NULL);
11767     }
11768     if (enc != XML_CHAR_ENCODING_NONE) {
11769         xmlSwitchEncoding(ctxt, enc);
11770     }
11771 
11772     /*
11773      * let's parse that entity knowing it's an external subset.
11774      */
11775     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11776     if (ctxt->myDoc == NULL) {
11777 	xmlErrMemory(ctxt);
11778 	return(NULL);
11779     }
11780     ctxt->myDoc->properties = XML_DOC_INTERNAL;
11781     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11782 	                               BAD_CAST "none", BAD_CAST "none");
11783 
11784     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11785 
11786     if (ctxt->myDoc != NULL) {
11787 	if (ctxt->wellFormed) {
11788 	    ret = ctxt->myDoc->extSubset;
11789 	    ctxt->myDoc->extSubset = NULL;
11790 	    if (ret != NULL) {
11791 		xmlNodePtr tmp;
11792 
11793 		ret->doc = NULL;
11794 		tmp = ret->children;
11795 		while (tmp != NULL) {
11796 		    tmp->doc = NULL;
11797 		    tmp = tmp->next;
11798 		}
11799 	    }
11800 	} else {
11801 	    ret = NULL;
11802 	}
11803         xmlFreeDoc(ctxt->myDoc);
11804         ctxt->myDoc = NULL;
11805     }
11806     xmlFreeParserCtxt(ctxt);
11807 
11808     return(ret);
11809 }
11810 
11811 /**
11812  * xmlSAXParseDTD:
11813  * @sax:  the SAX handler block
11814  * @ExternalID:  a NAME* containing the External ID of the DTD
11815  * @SystemID:  a NAME* containing the URL to the DTD
11816  *
11817  * DEPRECATED: Don't use.
11818  *
11819  * Load and parse an external subset.
11820  *
11821  * Returns the resulting xmlDtdPtr or NULL in case of error.
11822  */
11823 
11824 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)11825 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11826                           const xmlChar *SystemID) {
11827     xmlDtdPtr ret = NULL;
11828     xmlParserCtxtPtr ctxt;
11829     xmlParserInputPtr input = NULL;
11830     xmlChar* systemIdCanonic;
11831 
11832     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11833 
11834     ctxt = xmlNewSAXParserCtxt(sax, NULL);
11835     if (ctxt == NULL) {
11836 	return(NULL);
11837     }
11838 
11839     /*
11840      * Canonicalise the system ID
11841      */
11842     systemIdCanonic = xmlCanonicPath(SystemID);
11843     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11844 	xmlFreeParserCtxt(ctxt);
11845 	return(NULL);
11846     }
11847 
11848     /*
11849      * Ask the Entity resolver to load the damn thing
11850      */
11851 
11852     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11853 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11854 	                                 systemIdCanonic);
11855     if (input == NULL) {
11856 	xmlFreeParserCtxt(ctxt);
11857 	if (systemIdCanonic != NULL)
11858 	    xmlFree(systemIdCanonic);
11859 	return(NULL);
11860     }
11861 
11862     /*
11863      * plug some encoding conversion routines here.
11864      */
11865     if (xmlPushInput(ctxt, input) < 0) {
11866 	xmlFreeParserCtxt(ctxt);
11867 	if (systemIdCanonic != NULL)
11868 	    xmlFree(systemIdCanonic);
11869 	return(NULL);
11870     }
11871 
11872     xmlDetectEncoding(ctxt);
11873 
11874     if (input->filename == NULL)
11875 	input->filename = (char *) systemIdCanonic;
11876     else
11877 	xmlFree(systemIdCanonic);
11878 
11879     /*
11880      * let's parse that entity knowing it's an external subset.
11881      */
11882     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11883     if (ctxt->myDoc == NULL) {
11884 	xmlErrMemory(ctxt);
11885 	xmlFreeParserCtxt(ctxt);
11886 	return(NULL);
11887     }
11888     ctxt->myDoc->properties = XML_DOC_INTERNAL;
11889     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11890 	                               ExternalID, SystemID);
11891     if (ctxt->myDoc->extSubset == NULL) {
11892         xmlFreeDoc(ctxt->myDoc);
11893         xmlFreeParserCtxt(ctxt);
11894         return(NULL);
11895     }
11896     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11897 
11898     if (ctxt->myDoc != NULL) {
11899 	if (ctxt->wellFormed) {
11900 	    ret = ctxt->myDoc->extSubset;
11901 	    ctxt->myDoc->extSubset = NULL;
11902 	    if (ret != NULL) {
11903 		xmlNodePtr tmp;
11904 
11905 		ret->doc = NULL;
11906 		tmp = ret->children;
11907 		while (tmp != NULL) {
11908 		    tmp->doc = NULL;
11909 		    tmp = tmp->next;
11910 		}
11911 	    }
11912 	} else {
11913 	    ret = NULL;
11914 	}
11915         xmlFreeDoc(ctxt->myDoc);
11916         ctxt->myDoc = NULL;
11917     }
11918     xmlFreeParserCtxt(ctxt);
11919 
11920     return(ret);
11921 }
11922 
11923 
11924 /**
11925  * xmlParseDTD:
11926  * @ExternalID:  a NAME* containing the External ID of the DTD
11927  * @SystemID:  a NAME* containing the URL to the DTD
11928  *
11929  * Load and parse an external subset.
11930  *
11931  * Returns the resulting xmlDtdPtr or NULL in case of error.
11932  */
11933 
11934 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)11935 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11936     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11937 }
11938 #endif /* LIBXML_VALID_ENABLED */
11939 
11940 /************************************************************************
11941  *									*
11942  *		Front ends when parsing an Entity			*
11943  *									*
11944  ************************************************************************/
11945 
11946 static xmlNodePtr
xmlCtxtParseContent(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,int hasTextDecl,int buildTree)11947 xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11948                     int hasTextDecl, int buildTree) {
11949     xmlNodePtr root = NULL;
11950     xmlNodePtr list = NULL;
11951     xmlChar *rootName = BAD_CAST "#root";
11952     int result;
11953 
11954     if (buildTree) {
11955         root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11956         if (root == NULL) {
11957             xmlErrMemory(ctxt);
11958             goto error;
11959         }
11960     }
11961 
11962     if (xmlPushInput(ctxt, input) < 0)
11963         goto error;
11964 
11965     nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11966     spacePush(ctxt, -1);
11967 
11968     if (buildTree)
11969         nodePush(ctxt, root);
11970 
11971     if (hasTextDecl) {
11972         xmlDetectEncoding(ctxt);
11973 
11974         /*
11975          * Parse a possible text declaration first
11976          */
11977         if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11978             (IS_BLANK_CH(NXT(5)))) {
11979             xmlParseTextDecl(ctxt);
11980             /*
11981              * An XML-1.0 document can't reference an entity not XML-1.0
11982              */
11983             if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11984                 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11985                 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11986                                "Version mismatch between document and "
11987                                "entity\n");
11988             }
11989         }
11990     }
11991 
11992     xmlParseContentInternal(ctxt);
11993 
11994     if (ctxt->input->cur < ctxt->input->end)
11995 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11996 
11997     if ((ctxt->wellFormed) ||
11998         ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
11999         if (root != NULL) {
12000             xmlNodePtr cur;
12001 
12002             /*
12003              * Return the newly created nodeset after unlinking it from
12004              * its pseudo parent.
12005              */
12006             cur = root->children;
12007             list = cur;
12008             while (cur != NULL) {
12009                 cur->parent = NULL;
12010                 cur = cur->next;
12011             }
12012             root->children = NULL;
12013             root->last = NULL;
12014         }
12015     }
12016 
12017     /*
12018      * Read the rest of the stream in case of errors. We want
12019      * to account for the whole entity size.
12020      */
12021     do {
12022         ctxt->input->cur = ctxt->input->end;
12023         xmlParserShrink(ctxt);
12024         result = xmlParserGrow(ctxt);
12025     } while (result > 0);
12026 
12027     if (buildTree)
12028         nodePop(ctxt);
12029 
12030     namePop(ctxt);
12031     spacePop(ctxt);
12032 
12033     /* xmlPopInput would free the stream */
12034     inputPop(ctxt);
12035 
12036 error:
12037     xmlFreeNode(root);
12038 
12039     return(list);
12040 }
12041 
12042 static void
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)12043 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12044     xmlParserInputPtr input;
12045     xmlNodePtr list;
12046     unsigned long consumed;
12047     int isExternal;
12048     int buildTree;
12049     int oldMinNsIndex;
12050     int oldNodelen, oldNodemem;
12051 
12052     isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12053     buildTree = (ctxt->node != NULL);
12054 
12055     /*
12056      * Recursion check
12057      */
12058     if (ent->flags & XML_ENT_EXPANDING) {
12059         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12060         xmlHaltParser(ctxt);
12061         goto error;
12062     }
12063 
12064     /*
12065      * Load entity
12066      */
12067     input = xmlNewEntityInputStream(ctxt, ent);
12068     if (input == NULL)
12069         goto error;
12070 
12071     /*
12072      * When building a tree, we need to limit the scope of namespace
12073      * declarations, so that entities don't reference xmlNs structs
12074      * from the parent of a reference.
12075      */
12076     oldMinNsIndex = ctxt->nsdb->minNsIndex;
12077     if (buildTree)
12078         ctxt->nsdb->minNsIndex = ctxt->nsNr;
12079 
12080     oldNodelen = ctxt->nodelen;
12081     oldNodemem = ctxt->nodemem;
12082     ctxt->nodelen = 0;
12083     ctxt->nodemem = 0;
12084 
12085     /*
12086      * Parse content
12087      *
12088      * This initiates a recursive call chain:
12089      *
12090      * - xmlCtxtParseContent
12091      * - xmlParseContentInternal
12092      * - xmlParseReference
12093      * - xmlCtxtParseEntity
12094      *
12095      * The nesting depth is limited by the maximum number of inputs,
12096      * see xmlPushInput.
12097      *
12098      * It's possible to make this non-recursive (minNsIndex must be
12099      * stored in the input struct) at the expense of code readability.
12100      */
12101 
12102     ent->flags |= XML_ENT_EXPANDING;
12103 
12104     list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12105 
12106     ent->flags &= ~XML_ENT_EXPANDING;
12107 
12108     ctxt->nsdb->minNsIndex = oldMinNsIndex;
12109     ctxt->nodelen = oldNodelen;
12110     ctxt->nodemem = oldNodemem;
12111 
12112     /*
12113      * Entity size accounting
12114      */
12115     consumed = input->consumed;
12116     xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12117 
12118     if ((ent->flags & XML_ENT_CHECKED) == 0)
12119         xmlSaturatedAdd(&ent->expandedSize, consumed);
12120 
12121     if ((ent->flags & XML_ENT_PARSED) == 0) {
12122         if (isExternal)
12123             xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12124 
12125         ent->children = list;
12126 
12127         while (list != NULL) {
12128             list->parent = (xmlNodePtr) ent;
12129             if (list->next == NULL)
12130                 ent->last = list;
12131             list = list->next;
12132         }
12133     } else {
12134         xmlFreeNodeList(list);
12135     }
12136 
12137     xmlFreeInputStream(input);
12138 
12139 error:
12140     ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12141 }
12142 
12143 /**
12144  * xmlParseCtxtExternalEntity:
12145  * @ctxt:  the existing parsing context
12146  * @URL:  the URL for the entity to load
12147  * @ID:  the System ID for the entity to load
12148  * @listOut:  the return value for the set of parsed nodes
12149  *
12150  * Parse an external general entity within an existing parsing context
12151  * An external general parsed entity is well-formed if it matches the
12152  * production labeled extParsedEnt.
12153  *
12154  * [78] extParsedEnt ::= TextDecl? content
12155  *
12156  * Returns 0 if the entity is well formed, -1 in case of args problem and
12157  *    the parser error code otherwise
12158  */
12159 
12160 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * listOut)12161 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12162                            const xmlChar *ID, xmlNodePtr *listOut) {
12163     xmlParserInputPtr input;
12164     xmlNodePtr list;
12165 
12166     if (listOut != NULL)
12167         *listOut = NULL;
12168 
12169     if (ctxt == NULL)
12170         return(XML_ERR_ARGUMENT);
12171 
12172     input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12173                             XML_RESOURCE_GENERAL_ENTITY);
12174     if (input == NULL)
12175         return(ctxt->errNo);
12176 
12177     xmlCtxtInitializeLate(ctxt);
12178 
12179     list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12180     if (listOut != NULL)
12181         *listOut = list;
12182     else
12183         xmlFreeNodeList(list);
12184 
12185     xmlFreeInputStream(input);
12186     return(ctxt->errNo);
12187 }
12188 
12189 #ifdef LIBXML_SAX1_ENABLED
12190 /**
12191  * xmlParseExternalEntity:
12192  * @doc:  the document the chunk pertains to
12193  * @sax:  the SAX handler block (possibly NULL)
12194  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12195  * @depth:  Used for loop detection, use 0
12196  * @URL:  the URL for the entity to load
12197  * @ID:  the System ID for the entity to load
12198  * @list:  the return value for the set of parsed nodes
12199  *
12200  * DEPRECATED: Use xmlParseCtxtExternalEntity.
12201  *
12202  * Parse an external general entity
12203  * An external general parsed entity is well-formed if it matches the
12204  * production labeled extParsedEnt.
12205  *
12206  * [78] extParsedEnt ::= TextDecl? content
12207  *
12208  * Returns 0 if the entity is well formed, -1 in case of args problem and
12209  *    the parser error code otherwise
12210  */
12211 
12212 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12213 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12214 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12215     xmlParserCtxtPtr ctxt;
12216     int ret;
12217 
12218     if (list != NULL)
12219         *list = NULL;
12220 
12221     if (doc == NULL)
12222         return(XML_ERR_ARGUMENT);
12223 
12224     ctxt = xmlNewSAXParserCtxt(sax, user_data);
12225     if (ctxt == NULL)
12226         return(XML_ERR_NO_MEMORY);
12227 
12228     ctxt->depth = depth;
12229     ctxt->myDoc = doc;
12230     ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12231 
12232     xmlFreeParserCtxt(ctxt);
12233     return(ret);
12234 }
12235 
12236 /**
12237  * xmlParseBalancedChunkMemory:
12238  * @doc:  the document the chunk pertains to (must not be NULL)
12239  * @sax:  the SAX handler block (possibly NULL)
12240  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12241  * @depth:  Used for loop detection, use 0
12242  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12243  * @lst:  the return value for the set of parsed nodes
12244  *
12245  * Parse a well-balanced chunk of an XML document
12246  * called by the parser
12247  * The allowed sequence for the Well Balanced Chunk is the one defined by
12248  * the content production in the XML grammar:
12249  *
12250  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12251  *
12252  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12253  *    the parser error code otherwise
12254  */
12255 
12256 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12257 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12258      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12259     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12260                                                 depth, string, lst, 0 );
12261 }
12262 #endif /* LIBXML_SAX1_ENABLED */
12263 
12264 /**
12265  * xmlParseInNodeContext:
12266  * @node:  the context node
12267  * @data:  the input string
12268  * @datalen:  the input string length in bytes
12269  * @options:  a combination of xmlParserOption
12270  * @lst:  the return value for the set of parsed nodes
12271  *
12272  * Parse a well-balanced chunk of an XML document
12273  * within the context (DTD, namespaces, etc ...) of the given node.
12274  *
12275  * The allowed sequence for the data is a Well Balanced Chunk defined by
12276  * the content production in the XML grammar:
12277  *
12278  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12279  *
12280  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12281  * error code otherwise
12282  */
12283 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12284 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12285                       int options, xmlNodePtr *lst) {
12286     xmlParserCtxtPtr ctxt;
12287     xmlDocPtr doc = NULL;
12288     xmlNodePtr fake, cur;
12289     int nsnr = 0;
12290 
12291     xmlParserErrors ret = XML_ERR_OK;
12292 
12293     /*
12294      * check all input parameters, grab the document
12295      */
12296     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12297         return(XML_ERR_ARGUMENT);
12298     switch (node->type) {
12299         case XML_ELEMENT_NODE:
12300         case XML_ATTRIBUTE_NODE:
12301         case XML_TEXT_NODE:
12302         case XML_CDATA_SECTION_NODE:
12303         case XML_ENTITY_REF_NODE:
12304         case XML_PI_NODE:
12305         case XML_COMMENT_NODE:
12306         case XML_DOCUMENT_NODE:
12307         case XML_HTML_DOCUMENT_NODE:
12308 	    break;
12309 	default:
12310 	    return(XML_ERR_INTERNAL_ERROR);
12311 
12312     }
12313     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12314            (node->type != XML_DOCUMENT_NODE) &&
12315 	   (node->type != XML_HTML_DOCUMENT_NODE))
12316 	node = node->parent;
12317     if (node == NULL)
12318 	return(XML_ERR_INTERNAL_ERROR);
12319     if (node->type == XML_ELEMENT_NODE)
12320 	doc = node->doc;
12321     else
12322         doc = (xmlDocPtr) node;
12323     if (doc == NULL)
12324 	return(XML_ERR_INTERNAL_ERROR);
12325 
12326     /*
12327      * allocate a context and set-up everything not related to the
12328      * node position in the tree
12329      */
12330     if (doc->type == XML_DOCUMENT_NODE)
12331 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12332 #ifdef LIBXML_HTML_ENABLED
12333     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12334 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12335         /*
12336          * When parsing in context, it makes no sense to add implied
12337          * elements like html/body/etc...
12338          */
12339         options |= HTML_PARSE_NOIMPLIED;
12340     }
12341 #endif
12342     else
12343         return(XML_ERR_INTERNAL_ERROR);
12344 
12345     if (ctxt == NULL)
12346         return(XML_ERR_NO_MEMORY);
12347 
12348     /*
12349      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12350      * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12351      * we must wait until the last moment to free the original one.
12352      */
12353     if (doc->dict != NULL) {
12354         if (ctxt->dict != NULL)
12355 	    xmlDictFree(ctxt->dict);
12356 	ctxt->dict = doc->dict;
12357     } else {
12358         options |= XML_PARSE_NODICT;
12359         ctxt->dictNames = 0;
12360     }
12361 
12362     if (doc->encoding != NULL)
12363         xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12364 
12365     xmlCtxtUseOptions(ctxt, options);
12366     xmlCtxtInitializeLate(ctxt);
12367     ctxt->myDoc = doc;
12368     /* parsing in context, i.e. as within existing content */
12369     ctxt->input_id = 2;
12370 
12371     /*
12372      * TODO: Use xmlCtxtParseContent
12373      */
12374 
12375     fake = xmlNewDocComment(node->doc, NULL);
12376     if (fake == NULL) {
12377         xmlFreeParserCtxt(ctxt);
12378 	return(XML_ERR_NO_MEMORY);
12379     }
12380     xmlAddChild(node, fake);
12381 
12382     if (node->type == XML_ELEMENT_NODE)
12383 	nodePush(ctxt, node);
12384 
12385     if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12386 	/*
12387 	 * initialize the SAX2 namespaces stack
12388 	 */
12389 	cur = node;
12390 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12391 	    xmlNsPtr ns = cur->nsDef;
12392             xmlHashedString hprefix, huri;
12393 
12394 	    while (ns != NULL) {
12395                 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12396                 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12397                 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12398                     nsnr++;
12399 		ns = ns->next;
12400 	    }
12401 	    cur = cur->parent;
12402 	}
12403     }
12404 
12405     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12406 	/*
12407 	 * ID/IDREF registration will be done in xmlValidateElement below
12408 	 */
12409 	ctxt->loadsubset |= XML_SKIP_IDS;
12410     }
12411 
12412 #ifdef LIBXML_HTML_ENABLED
12413     if (doc->type == XML_HTML_DOCUMENT_NODE)
12414         __htmlParseContent(ctxt);
12415     else
12416 #endif
12417 	xmlParseContentInternal(ctxt);
12418 
12419     if (ctxt->input->cur < ctxt->input->end)
12420 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12421 
12422     xmlParserNsPop(ctxt, nsnr);
12423 
12424     if ((ctxt->wellFormed) ||
12425         ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12426         ret = XML_ERR_OK;
12427     } else {
12428 	ret = (xmlParserErrors) ctxt->errNo;
12429     }
12430 
12431     /*
12432      * Return the newly created nodeset after unlinking it from
12433      * the pseudo sibling.
12434      */
12435 
12436     cur = fake->next;
12437     fake->next = NULL;
12438     node->last = fake;
12439 
12440     if (cur != NULL) {
12441 	cur->prev = NULL;
12442     }
12443 
12444     *lst = cur;
12445 
12446     while (cur != NULL) {
12447 	cur->parent = NULL;
12448 	cur = cur->next;
12449     }
12450 
12451     xmlUnlinkNode(fake);
12452     xmlFreeNode(fake);
12453 
12454 
12455     if (ret != XML_ERR_OK) {
12456         xmlFreeNodeList(*lst);
12457 	*lst = NULL;
12458     }
12459 
12460     if (doc->dict != NULL)
12461         ctxt->dict = NULL;
12462     xmlFreeParserCtxt(ctxt);
12463 
12464     return(ret);
12465 }
12466 
12467 #ifdef LIBXML_SAX1_ENABLED
12468 /**
12469  * xmlParseBalancedChunkMemoryRecover:
12470  * @doc:  the document the chunk pertains to (must not be NULL)
12471  * @sax:  the SAX handler block (possibly NULL)
12472  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12473  * @depth:  Used for loop detection, use 0
12474  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12475  * @listOut:  the return value for the set of parsed nodes
12476  * @recover: return nodes even if the data is broken (use 0)
12477  *
12478  * Parse a well-balanced chunk of an XML document
12479  *
12480  * The allowed sequence for the Well Balanced Chunk is the one defined by
12481  * the content production in the XML grammar:
12482  *
12483  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12484  *
12485  * Returns 0 if the chunk is well balanced, or thehe parser error code
12486  * otherwise.
12487  *
12488  * In case recover is set to 1, the nodelist will not be empty even if
12489  * the parsed chunk is not well balanced, assuming the parsing succeeded to
12490  * some extent.
12491  */
12492 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * listOut,int recover)12493 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12494      void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12495      int recover) {
12496     xmlParserCtxtPtr ctxt;
12497     xmlParserInputPtr input;
12498     xmlNodePtr list;
12499     int ret;
12500 
12501     if (listOut != NULL)
12502         *listOut = NULL;
12503 
12504     if (string == NULL)
12505         return(XML_ERR_ARGUMENT);
12506 
12507     ctxt = xmlNewSAXParserCtxt(sax, user_data);
12508     if (ctxt == NULL)
12509         return(XML_ERR_NO_MEMORY);
12510 
12511     xmlCtxtInitializeLate(ctxt);
12512 
12513     ctxt->depth = depth;
12514     ctxt->myDoc = doc;
12515     if (recover) {
12516         ctxt->options |= XML_PARSE_RECOVER;
12517         ctxt->recovery = 1;
12518     }
12519 
12520     input = xmlNewStringInputStream(ctxt, string);
12521     if (input == NULL)
12522         return(ctxt->errNo);
12523 
12524     list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12525     if (listOut != NULL)
12526         *listOut = list;
12527     else
12528         xmlFreeNodeList(list);
12529 
12530     ret = ctxt->errNo;
12531 
12532     xmlFreeInputStream(input);
12533     xmlFreeParserCtxt(ctxt);
12534     return(ret);
12535 }
12536 
12537 /**
12538  * xmlSAXParseEntity:
12539  * @sax:  the SAX handler block
12540  * @filename:  the filename
12541  *
12542  * DEPRECATED: Don't use.
12543  *
12544  * parse an XML external entity out of context and build a tree.
12545  * It use the given SAX function block to handle the parsing callback.
12546  * If sax is NULL, fallback to the default DOM tree building routines.
12547  *
12548  * [78] extParsedEnt ::= TextDecl? content
12549  *
12550  * This correspond to a "Well Balanced" chunk
12551  *
12552  * Returns the resulting document tree
12553  */
12554 
12555 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)12556 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12557     xmlDocPtr ret;
12558     xmlParserCtxtPtr ctxt;
12559 
12560     ctxt = xmlCreateFileParserCtxt(filename);
12561     if (ctxt == NULL) {
12562 	return(NULL);
12563     }
12564     if (sax != NULL) {
12565         if (sax->initialized == XML_SAX2_MAGIC) {
12566             *ctxt->sax = *sax;
12567         } else {
12568             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12569             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12570         }
12571         ctxt->userData = NULL;
12572     }
12573 
12574     xmlParseExtParsedEnt(ctxt);
12575 
12576     if (ctxt->wellFormed) {
12577 	ret = ctxt->myDoc;
12578     } else {
12579         ret = NULL;
12580         xmlFreeDoc(ctxt->myDoc);
12581     }
12582 
12583     xmlFreeParserCtxt(ctxt);
12584 
12585     return(ret);
12586 }
12587 
12588 /**
12589  * xmlParseEntity:
12590  * @filename:  the filename
12591  *
12592  * parse an XML external entity out of context and build a tree.
12593  *
12594  * [78] extParsedEnt ::= TextDecl? content
12595  *
12596  * This correspond to a "Well Balanced" chunk
12597  *
12598  * Returns the resulting document tree
12599  */
12600 
12601 xmlDocPtr
xmlParseEntity(const char * filename)12602 xmlParseEntity(const char *filename) {
12603     return(xmlSAXParseEntity(NULL, filename));
12604 }
12605 #endif /* LIBXML_SAX1_ENABLED */
12606 
12607 /**
12608  * xmlCreateEntityParserCtxt:
12609  * @URL:  the entity URL
12610  * @ID:  the entity PUBLIC ID
12611  * @base:  a possible base for the target URI
12612  *
12613  * DEPRECATED: Don't use.
12614  *
12615  * Create a parser context for an external entity
12616  * Automatic support for ZLIB/Compress compressed document is provided
12617  * by default if found at compile-time.
12618  *
12619  * Returns the new parser context or NULL
12620  */
12621 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)12622 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12623 	                  const xmlChar *base) {
12624     xmlParserCtxtPtr ctxt;
12625     xmlParserInputPtr input;
12626     xmlChar *uri = NULL;
12627 
12628     ctxt = xmlNewParserCtxt();
12629     if (ctxt == NULL)
12630 	return(NULL);
12631 
12632     if (base != NULL) {
12633         if (xmlBuildURISafe(URL, base, &uri) < 0)
12634             goto error;
12635         if (uri != NULL)
12636             URL = uri;
12637     }
12638 
12639     input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12640                             XML_RESOURCE_UNKNOWN);
12641     if (input == NULL)
12642         goto error;
12643 
12644     if (inputPush(ctxt, input) < 0)
12645         goto error;
12646 
12647     xmlFree(uri);
12648     return(ctxt);
12649 
12650 error:
12651     xmlFree(uri);
12652     xmlFreeParserCtxt(ctxt);
12653     return(NULL);
12654 }
12655 
12656 /************************************************************************
12657  *									*
12658  *		Front ends when parsing from a file			*
12659  *									*
12660  ************************************************************************/
12661 
12662 /**
12663  * xmlCreateURLParserCtxt:
12664  * @filename:  the filename or URL
12665  * @options:  a combination of xmlParserOption
12666  *
12667  * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12668  *
12669  * Create a parser context for a file or URL content.
12670  * Automatic support for ZLIB/Compress compressed document is provided
12671  * by default if found at compile-time and for file accesses
12672  *
12673  * Returns the new parser context or NULL
12674  */
12675 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)12676 xmlCreateURLParserCtxt(const char *filename, int options)
12677 {
12678     xmlParserCtxtPtr ctxt;
12679     xmlParserInputPtr input;
12680 
12681     ctxt = xmlNewParserCtxt();
12682     if (ctxt == NULL)
12683 	return(NULL);
12684 
12685     xmlCtxtUseOptions(ctxt, options);
12686     ctxt->linenumbers = 1;
12687 
12688     input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12689     if (input == NULL) {
12690 	xmlFreeParserCtxt(ctxt);
12691 	return(NULL);
12692     }
12693     inputPush(ctxt, input);
12694 
12695     return(ctxt);
12696 }
12697 
12698 /**
12699  * xmlCreateFileParserCtxt:
12700  * @filename:  the filename
12701  *
12702  * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12703  *
12704  * Create a parser context for a file content.
12705  * Automatic support for ZLIB/Compress compressed document is provided
12706  * by default if found at compile-time.
12707  *
12708  * Returns the new parser context or NULL
12709  */
12710 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)12711 xmlCreateFileParserCtxt(const char *filename)
12712 {
12713     return(xmlCreateURLParserCtxt(filename, 0));
12714 }
12715 
12716 #ifdef LIBXML_SAX1_ENABLED
12717 /**
12718  * xmlSAXParseFileWithData:
12719  * @sax:  the SAX handler block
12720  * @filename:  the filename
12721  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12722  *             documents
12723  * @data:  the userdata
12724  *
12725  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12726  *
12727  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12728  * compressed document is provided by default if found at compile-time.
12729  * It use the given SAX function block to handle the parsing callback.
12730  * If sax is NULL, fallback to the default DOM tree building routines.
12731  *
12732  * User data (void *) is stored within the parser context in the
12733  * context's _private member, so it is available nearly everywhere in libxml
12734  *
12735  * Returns the resulting document tree
12736  */
12737 
12738 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)12739 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12740                         int recovery, void *data) {
12741     xmlDocPtr ret;
12742     xmlParserCtxtPtr ctxt;
12743     xmlParserInputPtr input;
12744 
12745     ctxt = xmlNewSAXParserCtxt(sax, NULL);
12746     if (ctxt == NULL)
12747 	return(NULL);
12748 
12749     if (data != NULL)
12750 	ctxt->_private = data;
12751 
12752     if (recovery) {
12753         ctxt->options |= XML_PARSE_RECOVER;
12754         ctxt->recovery = 1;
12755     }
12756 
12757     input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12758 
12759     ret = xmlCtxtParseDocument(ctxt, input);
12760 
12761     xmlFreeParserCtxt(ctxt);
12762     return(ret);
12763 }
12764 
12765 /**
12766  * xmlSAXParseFile:
12767  * @sax:  the SAX handler block
12768  * @filename:  the filename
12769  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12770  *             documents
12771  *
12772  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12773  *
12774  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12775  * compressed document is provided by default if found at compile-time.
12776  * It use the given SAX function block to handle the parsing callback.
12777  * If sax is NULL, fallback to the default DOM tree building routines.
12778  *
12779  * Returns the resulting document tree
12780  */
12781 
12782 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)12783 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12784                           int recovery) {
12785     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12786 }
12787 
12788 /**
12789  * xmlRecoverDoc:
12790  * @cur:  a pointer to an array of xmlChar
12791  *
12792  * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12793  *
12794  * parse an XML in-memory document and build a tree.
12795  * In the case the document is not Well Formed, a attempt to build a
12796  * tree is tried anyway
12797  *
12798  * Returns the resulting document tree or NULL in case of failure
12799  */
12800 
12801 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)12802 xmlRecoverDoc(const xmlChar *cur) {
12803     return(xmlSAXParseDoc(NULL, cur, 1));
12804 }
12805 
12806 /**
12807  * xmlParseFile:
12808  * @filename:  the filename
12809  *
12810  * DEPRECATED: Use xmlReadFile.
12811  *
12812  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12813  * compressed document is provided by default if found at compile-time.
12814  *
12815  * Returns the resulting document tree if the file was wellformed,
12816  * NULL otherwise.
12817  */
12818 
12819 xmlDocPtr
xmlParseFile(const char * filename)12820 xmlParseFile(const char *filename) {
12821     return(xmlSAXParseFile(NULL, filename, 0));
12822 }
12823 
12824 /**
12825  * xmlRecoverFile:
12826  * @filename:  the filename
12827  *
12828  * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12829  *
12830  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12831  * compressed document is provided by default if found at compile-time.
12832  * In the case the document is not Well Formed, it attempts to build
12833  * a tree anyway
12834  *
12835  * Returns the resulting document tree or NULL in case of failure
12836  */
12837 
12838 xmlDocPtr
xmlRecoverFile(const char * filename)12839 xmlRecoverFile(const char *filename) {
12840     return(xmlSAXParseFile(NULL, filename, 1));
12841 }
12842 
12843 
12844 /**
12845  * xmlSetupParserForBuffer:
12846  * @ctxt:  an XML parser context
12847  * @buffer:  a xmlChar * buffer
12848  * @filename:  a file name
12849  *
12850  * DEPRECATED: Don't use.
12851  *
12852  * Setup the parser context to parse a new buffer; Clears any prior
12853  * contents from the parser context. The buffer parameter must not be
12854  * NULL, but the filename parameter can be
12855  */
12856 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)12857 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12858                              const char* filename)
12859 {
12860     xmlParserInputPtr input;
12861 
12862     if ((ctxt == NULL) || (buffer == NULL))
12863         return;
12864 
12865     xmlClearParserCtxt(ctxt);
12866 
12867     input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12868     if (input == NULL)
12869         return;
12870     inputPush(ctxt, input);
12871 }
12872 
12873 /**
12874  * xmlSAXUserParseFile:
12875  * @sax:  a SAX handler
12876  * @user_data:  The user data returned on SAX callbacks
12877  * @filename:  a file name
12878  *
12879  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12880  *
12881  * parse an XML file and call the given SAX handler routines.
12882  * Automatic support for ZLIB/Compress compressed document is provided
12883  *
12884  * Returns 0 in case of success or a error number otherwise
12885  */
12886 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)12887 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12888                     const char *filename) {
12889     int ret = 0;
12890     xmlParserCtxtPtr ctxt;
12891 
12892     ctxt = xmlCreateFileParserCtxt(filename);
12893     if (ctxt == NULL) return -1;
12894     if (sax != NULL) {
12895         if (sax->initialized == XML_SAX2_MAGIC) {
12896             *ctxt->sax = *sax;
12897         } else {
12898             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12899             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12900         }
12901 	ctxt->userData = user_data;
12902     }
12903 
12904     xmlParseDocument(ctxt);
12905 
12906     if (ctxt->wellFormed)
12907 	ret = 0;
12908     else {
12909         if (ctxt->errNo != 0)
12910 	    ret = ctxt->errNo;
12911 	else
12912 	    ret = -1;
12913     }
12914     if (ctxt->myDoc != NULL) {
12915         xmlFreeDoc(ctxt->myDoc);
12916 	ctxt->myDoc = NULL;
12917     }
12918     xmlFreeParserCtxt(ctxt);
12919 
12920     return ret;
12921 }
12922 #endif /* LIBXML_SAX1_ENABLED */
12923 
12924 /************************************************************************
12925  *									*
12926  *		Front ends when parsing from memory			*
12927  *									*
12928  ************************************************************************/
12929 
12930 /**
12931  * xmlCreateMemoryParserCtxt:
12932  * @buffer:  a pointer to a char array
12933  * @size:  the size of the array
12934  *
12935  * Create a parser context for an XML in-memory document. The input buffer
12936  * must not contain a terminating null byte.
12937  *
12938  * Returns the new parser context or NULL
12939  */
12940 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)12941 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12942     xmlParserCtxtPtr ctxt;
12943     xmlParserInputPtr input;
12944 
12945     if (size < 0)
12946 	return(NULL);
12947 
12948     ctxt = xmlNewParserCtxt();
12949     if (ctxt == NULL)
12950 	return(NULL);
12951 
12952     input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12953     if (input == NULL) {
12954 	xmlFreeParserCtxt(ctxt);
12955 	return(NULL);
12956     }
12957     inputPush(ctxt, input);
12958 
12959     return(ctxt);
12960 }
12961 
12962 #ifdef LIBXML_SAX1_ENABLED
12963 /**
12964  * xmlSAXParseMemoryWithData:
12965  * @sax:  the SAX handler block
12966  * @buffer:  an pointer to a char array
12967  * @size:  the size of the array
12968  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12969  *             documents
12970  * @data:  the userdata
12971  *
12972  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12973  *
12974  * parse an XML in-memory block and use the given SAX function block
12975  * to handle the parsing callback. If sax is NULL, fallback to the default
12976  * DOM tree building routines.
12977  *
12978  * User data (void *) is stored within the parser context in the
12979  * context's _private member, so it is available nearly everywhere in libxml
12980  *
12981  * Returns the resulting document tree
12982  */
12983 
12984 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)12985 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12986                           int size, int recovery, void *data) {
12987     xmlDocPtr ret;
12988     xmlParserCtxtPtr ctxt;
12989     xmlParserInputPtr input;
12990 
12991     if (size < 0)
12992         return(NULL);
12993 
12994     ctxt = xmlNewSAXParserCtxt(sax, NULL);
12995     if (ctxt == NULL)
12996         return(NULL);
12997 
12998     if (data != NULL)
12999 	ctxt->_private=data;
13000 
13001     if (recovery) {
13002         ctxt->options |= XML_PARSE_RECOVER;
13003         ctxt->recovery = 1;
13004     }
13005 
13006     input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
13007                               XML_INPUT_BUF_STATIC);
13008 
13009     ret = xmlCtxtParseDocument(ctxt, input);
13010 
13011     xmlFreeParserCtxt(ctxt);
13012     return(ret);
13013 }
13014 
13015 /**
13016  * xmlSAXParseMemory:
13017  * @sax:  the SAX handler block
13018  * @buffer:  an pointer to a char array
13019  * @size:  the size of the array
13020  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13021  *             documents
13022  *
13023  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13024  *
13025  * parse an XML in-memory block and use the given SAX function block
13026  * to handle the parsing callback. If sax is NULL, fallback to the default
13027  * DOM tree building routines.
13028  *
13029  * Returns the resulting document tree
13030  */
13031 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13032 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13033 	          int size, int recovery) {
13034     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13035 }
13036 
13037 /**
13038  * xmlParseMemory:
13039  * @buffer:  an pointer to a char array
13040  * @size:  the size of the array
13041  *
13042  * DEPRECATED: Use xmlReadMemory.
13043  *
13044  * parse an XML in-memory block and build a tree.
13045  *
13046  * Returns the resulting document tree
13047  */
13048 
xmlParseMemory(const char * buffer,int size)13049 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13050    return(xmlSAXParseMemory(NULL, buffer, size, 0));
13051 }
13052 
13053 /**
13054  * xmlRecoverMemory:
13055  * @buffer:  an pointer to a char array
13056  * @size:  the size of the array
13057  *
13058  * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13059  *
13060  * parse an XML in-memory block and build a tree.
13061  * In the case the document is not Well Formed, an attempt to
13062  * build a tree is tried anyway
13063  *
13064  * Returns the resulting document tree or NULL in case of error
13065  */
13066 
xmlRecoverMemory(const char * buffer,int size)13067 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13068    return(xmlSAXParseMemory(NULL, buffer, size, 1));
13069 }
13070 
13071 /**
13072  * xmlSAXUserParseMemory:
13073  * @sax:  a SAX handler
13074  * @user_data:  The user data returned on SAX callbacks
13075  * @buffer:  an in-memory XML document input
13076  * @size:  the length of the XML document in bytes
13077  *
13078  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13079  *
13080  * parse an XML in-memory buffer and call the given SAX handler routines.
13081  *
13082  * Returns 0 in case of success or a error number otherwise
13083  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13084 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13085 			  const char *buffer, int size) {
13086     int ret = 0;
13087     xmlParserCtxtPtr ctxt;
13088 
13089     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13090     if (ctxt == NULL) return -1;
13091     if (sax != NULL) {
13092         if (sax->initialized == XML_SAX2_MAGIC) {
13093             *ctxt->sax = *sax;
13094         } else {
13095             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13096             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13097         }
13098 	ctxt->userData = user_data;
13099     }
13100 
13101     xmlParseDocument(ctxt);
13102 
13103     if (ctxt->wellFormed)
13104 	ret = 0;
13105     else {
13106         if (ctxt->errNo != 0)
13107 	    ret = ctxt->errNo;
13108 	else
13109 	    ret = -1;
13110     }
13111     if (ctxt->myDoc != NULL) {
13112         xmlFreeDoc(ctxt->myDoc);
13113 	ctxt->myDoc = NULL;
13114     }
13115     xmlFreeParserCtxt(ctxt);
13116 
13117     return ret;
13118 }
13119 #endif /* LIBXML_SAX1_ENABLED */
13120 
13121 /**
13122  * xmlCreateDocParserCtxt:
13123  * @str:  a pointer to an array of xmlChar
13124  *
13125  * Creates a parser context for an XML in-memory document.
13126  *
13127  * Returns the new parser context or NULL
13128  */
13129 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * str)13130 xmlCreateDocParserCtxt(const xmlChar *str) {
13131     xmlParserCtxtPtr ctxt;
13132     xmlParserInputPtr input;
13133 
13134     ctxt = xmlNewParserCtxt();
13135     if (ctxt == NULL)
13136 	return(NULL);
13137 
13138     input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13139     if (input == NULL) {
13140 	xmlFreeParserCtxt(ctxt);
13141 	return(NULL);
13142     }
13143     inputPush(ctxt, input);
13144 
13145     return(ctxt);
13146 }
13147 
13148 #ifdef LIBXML_SAX1_ENABLED
13149 /**
13150  * xmlSAXParseDoc:
13151  * @sax:  the SAX handler block
13152  * @cur:  a pointer to an array of xmlChar
13153  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13154  *             documents
13155  *
13156  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13157  *
13158  * parse an XML in-memory document and build a tree.
13159  * It use the given SAX function block to handle the parsing callback.
13160  * If sax is NULL, fallback to the default DOM tree building routines.
13161  *
13162  * Returns the resulting document tree
13163  */
13164 
13165 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)13166 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13167     xmlDocPtr ret;
13168     xmlParserCtxtPtr ctxt;
13169     xmlSAXHandlerPtr oldsax = NULL;
13170 
13171     if (cur == NULL) return(NULL);
13172 
13173 
13174     ctxt = xmlCreateDocParserCtxt(cur);
13175     if (ctxt == NULL) return(NULL);
13176     if (sax != NULL) {
13177         oldsax = ctxt->sax;
13178         ctxt->sax = sax;
13179         ctxt->userData = NULL;
13180     }
13181 
13182     xmlParseDocument(ctxt);
13183     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13184     else {
13185        ret = NULL;
13186        xmlFreeDoc(ctxt->myDoc);
13187        ctxt->myDoc = NULL;
13188     }
13189     if (sax != NULL)
13190 	ctxt->sax = oldsax;
13191     xmlFreeParserCtxt(ctxt);
13192 
13193     return(ret);
13194 }
13195 
13196 /**
13197  * xmlParseDoc:
13198  * @cur:  a pointer to an array of xmlChar
13199  *
13200  * DEPRECATED: Use xmlReadDoc.
13201  *
13202  * parse an XML in-memory document and build a tree.
13203  *
13204  * Returns the resulting document tree
13205  */
13206 
13207 xmlDocPtr
xmlParseDoc(const xmlChar * cur)13208 xmlParseDoc(const xmlChar *cur) {
13209     return(xmlSAXParseDoc(NULL, cur, 0));
13210 }
13211 #endif /* LIBXML_SAX1_ENABLED */
13212 
13213 /************************************************************************
13214  *									*
13215  *	New set (2.6.0) of simpler and more flexible APIs		*
13216  *									*
13217  ************************************************************************/
13218 
13219 /**
13220  * DICT_FREE:
13221  * @str:  a string
13222  *
13223  * Free a string if it is not owned by the "dict" dictionary in the
13224  * current scope
13225  */
13226 #define DICT_FREE(str)						\
13227 	if ((str) && ((!dict) ||				\
13228 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
13229 	    xmlFree((char *)(str));
13230 
13231 /**
13232  * xmlCtxtReset:
13233  * @ctxt: an XML parser context
13234  *
13235  * Reset a parser context
13236  */
13237 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)13238 xmlCtxtReset(xmlParserCtxtPtr ctxt)
13239 {
13240     xmlParserInputPtr input;
13241     xmlDictPtr dict;
13242 
13243     if (ctxt == NULL)
13244         return;
13245 
13246     dict = ctxt->dict;
13247 
13248     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13249         xmlFreeInputStream(input);
13250     }
13251     ctxt->inputNr = 0;
13252     ctxt->input = NULL;
13253 
13254     ctxt->spaceNr = 0;
13255     if (ctxt->spaceTab != NULL) {
13256 	ctxt->spaceTab[0] = -1;
13257 	ctxt->space = &ctxt->spaceTab[0];
13258     } else {
13259         ctxt->space = NULL;
13260     }
13261 
13262 
13263     ctxt->nodeNr = 0;
13264     ctxt->node = NULL;
13265 
13266     ctxt->nameNr = 0;
13267     ctxt->name = NULL;
13268 
13269     ctxt->nsNr = 0;
13270     xmlParserNsReset(ctxt->nsdb);
13271 
13272     DICT_FREE(ctxt->version);
13273     ctxt->version = NULL;
13274     DICT_FREE(ctxt->encoding);
13275     ctxt->encoding = NULL;
13276     DICT_FREE(ctxt->extSubURI);
13277     ctxt->extSubURI = NULL;
13278     DICT_FREE(ctxt->extSubSystem);
13279     ctxt->extSubSystem = NULL;
13280     if (ctxt->myDoc != NULL)
13281         xmlFreeDoc(ctxt->myDoc);
13282     ctxt->myDoc = NULL;
13283 
13284     ctxt->standalone = -1;
13285     ctxt->hasExternalSubset = 0;
13286     ctxt->hasPErefs = 0;
13287     ctxt->html = 0;
13288     ctxt->instate = XML_PARSER_START;
13289 
13290     ctxt->wellFormed = 1;
13291     ctxt->nsWellFormed = 1;
13292     ctxt->disableSAX = 0;
13293     ctxt->valid = 1;
13294 #if 0
13295     ctxt->vctxt.userData = ctxt;
13296     ctxt->vctxt.error = xmlParserValidityError;
13297     ctxt->vctxt.warning = xmlParserValidityWarning;
13298 #endif
13299     ctxt->record_info = 0;
13300     ctxt->checkIndex = 0;
13301     ctxt->endCheckState = 0;
13302     ctxt->inSubset = 0;
13303     ctxt->errNo = XML_ERR_OK;
13304     ctxt->depth = 0;
13305     ctxt->catalogs = NULL;
13306     ctxt->sizeentities = 0;
13307     ctxt->sizeentcopy = 0;
13308     xmlInitNodeInfoSeq(&ctxt->node_seq);
13309 
13310     if (ctxt->attsDefault != NULL) {
13311         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13312         ctxt->attsDefault = NULL;
13313     }
13314     if (ctxt->attsSpecial != NULL) {
13315         xmlHashFree(ctxt->attsSpecial, NULL);
13316         ctxt->attsSpecial = NULL;
13317     }
13318 
13319 #ifdef LIBXML_CATALOG_ENABLED
13320     if (ctxt->catalogs != NULL)
13321 	xmlCatalogFreeLocal(ctxt->catalogs);
13322 #endif
13323     ctxt->nbErrors = 0;
13324     ctxt->nbWarnings = 0;
13325     if (ctxt->lastError.code != XML_ERR_OK)
13326         xmlResetError(&ctxt->lastError);
13327 }
13328 
13329 /**
13330  * xmlCtxtResetPush:
13331  * @ctxt: an XML parser context
13332  * @chunk:  a pointer to an array of chars
13333  * @size:  number of chars in the array
13334  * @filename:  an optional file name or URI
13335  * @encoding:  the document encoding, or NULL
13336  *
13337  * Reset a push parser context
13338  *
13339  * Returns 0 in case of success and 1 in case of error
13340  */
13341 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)13342 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13343                  int size, const char *filename, const char *encoding)
13344 {
13345     xmlParserInputPtr input;
13346 
13347     if (ctxt == NULL)
13348         return(1);
13349 
13350     xmlCtxtReset(ctxt);
13351 
13352     input = xmlInputCreatePush(filename, chunk, size);
13353     if (input == NULL)
13354         return(1);
13355 
13356     inputPush(ctxt, input);
13357 
13358     if (encoding != NULL)
13359         xmlSwitchEncodingName(ctxt, encoding);
13360 
13361     return(0);
13362 }
13363 
13364 static int
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt,int options,int keepMask)13365 xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13366 {
13367     int allMask;
13368 
13369     if (ctxt == NULL)
13370         return(-1);
13371 
13372     /*
13373      * XInclude options aren't handled by the parser.
13374      *
13375      * XML_PARSE_XINCLUDE
13376      * XML_PARSE_NOXINCNODE
13377      * XML_PARSE_NOBASEFIX
13378      */
13379     allMask = XML_PARSE_RECOVER |
13380               XML_PARSE_NOENT |
13381               XML_PARSE_DTDLOAD |
13382               XML_PARSE_DTDATTR |
13383               XML_PARSE_DTDVALID |
13384               XML_PARSE_NOERROR |
13385               XML_PARSE_NOWARNING |
13386               XML_PARSE_PEDANTIC |
13387               XML_PARSE_NOBLANKS |
13388 #ifdef LIBXML_SAX1_ENABLED
13389               XML_PARSE_SAX1 |
13390 #endif
13391               XML_PARSE_NONET |
13392               XML_PARSE_NODICT |
13393               XML_PARSE_NSCLEAN |
13394               XML_PARSE_NOCDATA |
13395               XML_PARSE_COMPACT |
13396               XML_PARSE_OLD10 |
13397               XML_PARSE_HUGE |
13398               XML_PARSE_OLDSAX |
13399               XML_PARSE_IGNORE_ENC |
13400               XML_PARSE_BIG_LINES |
13401               XML_PARSE_NO_XXE;
13402 
13403     ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13404 
13405     /*
13406      * For some options, struct members are historically the source
13407      * of truth. The values are initalized from global variables and
13408      * old code could also modify them directly. Several older API
13409      * functions that don't take an options argument rely on these
13410      * deprecated mechanisms.
13411      *
13412      * Once public access to struct members and the globals are
13413      * disabled, we can use the options bitmask as source of
13414      * truth, making all these struct members obsolete.
13415      *
13416      * The XML_DETECT_IDS flags is misnamed. It simply enables
13417      * loading of the external subset.
13418      */
13419     ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13420     ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13421     ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13422     ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13423     ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13424     ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13425     ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13426     ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13427 
13428     /*
13429      * Changing SAX callbacks is a bad idea. This should be fixed.
13430      */
13431     if (options & XML_PARSE_NOBLANKS) {
13432         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13433     }
13434     if (options & XML_PARSE_NOCDATA) {
13435         ctxt->sax->cdataBlock = NULL;
13436     }
13437     if (options & XML_PARSE_HUGE) {
13438         if (ctxt->dict != NULL)
13439             xmlDictSetLimit(ctxt->dict, 0);
13440     }
13441 
13442     ctxt->linenumbers = 1;
13443 
13444     return(options & ~allMask);
13445 }
13446 
13447 /**
13448  * xmlCtxtSetOptions:
13449  * @ctxt: an XML parser context
13450  * @options:  a bitmask of xmlParserOption values
13451  *
13452  * Applies the options to the parser context. Unset options are
13453  * cleared.
13454  *
13455  * Available since 2.13.0. With older versions, you can use
13456  * xmlCtxtUseOptions.
13457  *
13458  * XML_PARSE_RECOVER
13459  *
13460  * Enable "recovery" mode which allows non-wellformed documents.
13461  * How this mode behaves exactly is unspecified and may change
13462  * without further notice. Use of this feature is DISCOURAGED.
13463  *
13464  * XML_PARSE_NOENT
13465  *
13466  * Despite the confusing name, this option enables substitution
13467  * of entities. The resulting tree won't contain any entity
13468  * reference nodes.
13469  *
13470  * This option also enables loading of external entities (both
13471  * general and parameter entities) which is dangerous. If you
13472  * process untrusted data, it's recommended to set the
13473  * XML_PARSE_NO_XXE option to disable loading of external
13474  * entities.
13475  *
13476  * XML_PARSE_DTDLOAD
13477  *
13478  * Enables loading of an external DTD and the loading and
13479  * substitution of external parameter entities. Has no effect
13480  * if XML_PARSE_NO_XXE is set.
13481  *
13482  * XML_PARSE_DTDATTR
13483  *
13484  * Adds default attributes from the DTD to the result document.
13485  *
13486  * Implies XML_PARSE_DTDLOAD, but loading of external content
13487  * can be disabled with XML_PARSE_NO_XXE.
13488  *
13489  * XML_PARSE_DTDVALID
13490  *
13491  * This option enables DTD validation which requires to load
13492  * external DTDs and external entities (both general and
13493  * parameter entities) unless XML_PARSE_NO_XXE was set.
13494  *
13495  * XML_PARSE_NO_XXE
13496  *
13497  * Disables loading of external DTDs or entities.
13498  *
13499  * XML_PARSE_NOERROR
13500  *
13501  * Disable error and warning reports to the error handlers.
13502  * Errors are still accessible with xmlCtxtGetLastError.
13503  *
13504  * XML_PARSE_NOWARNING
13505  *
13506  * Disable warning reports.
13507  *
13508  * XML_PARSE_PEDANTIC
13509  *
13510  * Enable some pedantic warnings.
13511  *
13512  * XML_PARSE_NOBLANKS
13513  *
13514  * Remove some text nodes containing only whitespace from the
13515  * result document. Which nodes are removed depends on DTD
13516  * element declarations or a conservative heuristic. The
13517  * reindenting feature of the serialization code relies on this
13518  * option to be set when parsing. Use of this option is
13519  * DISCOURAGED.
13520  *
13521  * XML_PARSE_SAX1
13522  *
13523  * Always invoke the deprecated SAX1 startElement and endElement
13524  * handlers. This option is DEPRECATED.
13525  *
13526  * XML_PARSE_NONET
13527  *
13528  * Disable network access with the builtin HTTP client.
13529  *
13530  * XML_PARSE_NODICT
13531  *
13532  * Create a document without interned strings, making all
13533  * strings separate memory allocations.
13534  *
13535  * XML_PARSE_NSCLEAN
13536  *
13537  * Remove redundant namespace declarations from the result
13538  * document.
13539  *
13540  * XML_PARSE_NOCDATA
13541  *
13542  * Output normal text nodes instead of CDATA nodes.
13543  *
13544  * XML_PARSE_COMPACT
13545  *
13546  * Store small strings directly in the node struct to save
13547  * memory.
13548  *
13549  * XML_PARSE_OLD10
13550  *
13551  * Use old Name productions from before XML 1.0 Fifth Edition.
13552  * This options is DEPRECATED.
13553  *
13554  * XML_PARSE_HUGE
13555  *
13556  * Relax some internal limits.
13557  *
13558  * Maximum size of text nodes, tags, comments, processing instructions,
13559  * CDATA sections, entity values
13560  *
13561  * normal: 10M
13562  * huge:    1B
13563  *
13564  * Maximum size of names, system literals, pubid literals
13565  *
13566  * normal: 50K
13567  * huge:   10M
13568  *
13569  * Maximum nesting depth of elements
13570  *
13571  * normal:  256
13572  * huge:   2048
13573  *
13574  * Maximum nesting depth of entities
13575  *
13576  * normal: 20
13577  * huge:   40
13578  *
13579  * XML_PARSE_OLDSAX
13580  *
13581  * Enable an unspecified legacy mode for SAX parsers. This
13582  * option is DEPRECATED.
13583  *
13584  * XML_PARSE_IGNORE_ENC
13585  *
13586  * Ignore the encoding in the XML declaration. This option is
13587  * mostly unneeded these days. The only effect is to enforce
13588  * UTF-8 decoding of ASCII-like data.
13589  *
13590  * XML_PARSE_BIG_LINES
13591  *
13592  * Enable reporting of line numbers larger than 65535.
13593  *
13594  * XML_PARSE_NO_UNZIP
13595  *
13596  * Disables input decompression. Setting this option is recommended
13597  * to avoid zip bombs.
13598  *
13599  * Available since 2.14.0.
13600  *
13601  * Returns 0 in case of success, the set of unknown or unimplemented options
13602  *         in case of error.
13603  */
13604 int
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt,int options)13605 xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13606 {
13607     return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13608 }
13609 
13610 /**
13611  * xmlCtxtUseOptions:
13612  * @ctxt: an XML parser context
13613  * @options:  a combination of xmlParserOption
13614  *
13615  * DEPRECATED: Use xmlCtxtSetOptions.
13616  *
13617  * Applies the options to the parser context. The following options
13618  * are never cleared and can only be enabled:
13619  *
13620  * XML_PARSE_NOERROR
13621  * XML_PARSE_NOWARNING
13622  * XML_PARSE_NONET
13623  * XML_PARSE_NSCLEAN
13624  * XML_PARSE_NOCDATA
13625  * XML_PARSE_COMPACT
13626  * XML_PARSE_OLD10
13627  * XML_PARSE_HUGE
13628  * XML_PARSE_OLDSAX
13629  * XML_PARSE_IGNORE_ENC
13630  * XML_PARSE_BIG_LINES
13631  *
13632  * Returns 0 in case of success, the set of unknown or unimplemented options
13633  *         in case of error.
13634  */
13635 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)13636 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13637 {
13638     int keepMask;
13639 
13640     /*
13641      * For historic reasons, some options can only be enabled.
13642      */
13643     keepMask = XML_PARSE_NOERROR |
13644                XML_PARSE_NOWARNING |
13645                XML_PARSE_NONET |
13646                XML_PARSE_NSCLEAN |
13647                XML_PARSE_NOCDATA |
13648                XML_PARSE_COMPACT |
13649                XML_PARSE_OLD10 |
13650                XML_PARSE_HUGE |
13651                XML_PARSE_OLDSAX |
13652                XML_PARSE_IGNORE_ENC |
13653                XML_PARSE_BIG_LINES;
13654 
13655     return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13656 }
13657 
13658 /**
13659  * xmlCtxtSetMaxAmplification:
13660  * @ctxt: an XML parser context
13661  * @maxAmpl:  maximum amplification factor
13662  *
13663  * To protect against exponential entity expansion ("billion laughs"), the
13664  * size of serialized output is (roughly) limited to the input size
13665  * multiplied by this factor. The default value is 5.
13666  *
13667  * When working with documents making heavy use of entity expansion, it can
13668  * be necessary to increase the value. For security reasons, this should only
13669  * be considered when processing trusted input.
13670  */
13671 void
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,unsigned maxAmpl)13672 xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13673 {
13674     ctxt->maxAmpl = maxAmpl;
13675 }
13676 
13677 /**
13678  * xmlCtxtParseDocument:
13679  * @ctxt:  an XML parser context
13680  * @input:  parser input
13681  *
13682  * Parse an XML document and return the resulting document tree.
13683  * Takes ownership of the input object.
13684  *
13685  * Available since 2.13.0.
13686  *
13687  * Returns the resulting document tree or NULL
13688  */
13689 xmlDocPtr
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)13690 xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13691 {
13692     xmlDocPtr ret = NULL;
13693 
13694     if ((ctxt == NULL) || (input == NULL))
13695         return(NULL);
13696 
13697     /* assert(ctxt->inputNr == 0); */
13698     while (ctxt->inputNr > 0)
13699         xmlFreeInputStream(inputPop(ctxt));
13700 
13701     if (inputPush(ctxt, input) < 0) {
13702         xmlFreeInputStream(input);
13703         return(NULL);
13704     }
13705 
13706     xmlParseDocument(ctxt);
13707 
13708     if ((ctxt->wellFormed) ||
13709         ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13710         ret = ctxt->myDoc;
13711     } else {
13712         if (ctxt->errNo == XML_ERR_OK)
13713             xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13714 
13715         ret = NULL;
13716 	xmlFreeDoc(ctxt->myDoc);
13717     }
13718     ctxt->myDoc = NULL;
13719 
13720     /* assert(ctxt->inputNr == 1); */
13721     while (ctxt->inputNr > 0)
13722         xmlFreeInputStream(inputPop(ctxt));
13723 
13724     return(ret);
13725 }
13726 
13727 /**
13728  * xmlReadDoc:
13729  * @cur:  a pointer to a zero terminated string
13730  * @URL:  base URL (optional)
13731  * @encoding:  the document encoding (optional)
13732  * @options:  a combination of xmlParserOption
13733  *
13734  * Convenience function to parse an XML document from a
13735  * zero-terminated string.
13736  *
13737  * See xmlCtxtReadDoc for details.
13738  *
13739  * Returns the resulting document tree
13740  */
13741 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)13742 xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13743            int options)
13744 {
13745     xmlParserCtxtPtr ctxt;
13746     xmlParserInputPtr input;
13747     xmlDocPtr doc;
13748 
13749     ctxt = xmlNewParserCtxt();
13750     if (ctxt == NULL)
13751         return(NULL);
13752 
13753     xmlCtxtUseOptions(ctxt, options);
13754 
13755     input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13756                               XML_INPUT_BUF_STATIC);
13757 
13758     doc = xmlCtxtParseDocument(ctxt, input);
13759 
13760     xmlFreeParserCtxt(ctxt);
13761     return(doc);
13762 }
13763 
13764 /**
13765  * xmlReadFile:
13766  * @filename:  a file or URL
13767  * @encoding:  the document encoding (optional)
13768  * @options:  a combination of xmlParserOption
13769  *
13770  * Convenience function to parse an XML file from the filesystem,
13771  * the network or a global user-define resource loader.
13772  *
13773  * See xmlCtxtReadFile for details.
13774  *
13775  * Returns the resulting document tree
13776  */
13777 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)13778 xmlReadFile(const char *filename, const char *encoding, int options)
13779 {
13780     xmlParserCtxtPtr ctxt;
13781     xmlParserInputPtr input;
13782     xmlDocPtr doc;
13783 
13784     ctxt = xmlNewParserCtxt();
13785     if (ctxt == NULL)
13786         return(NULL);
13787 
13788     xmlCtxtUseOptions(ctxt, options);
13789 
13790     input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13791 
13792     doc = xmlCtxtParseDocument(ctxt, input);
13793 
13794     xmlFreeParserCtxt(ctxt);
13795     return(doc);
13796 }
13797 
13798 /**
13799  * xmlReadMemory:
13800  * @buffer:  a pointer to a char array
13801  * @size:  the size of the array
13802  * @url:  base URL (optional)
13803  * @encoding:  the document encoding (optional)
13804  * @options:  a combination of xmlParserOption
13805  *
13806  * Parse an XML in-memory document and build a tree. The input buffer must
13807  * not contain a terminating null byte.
13808  *
13809  * See xmlCtxtReadMemory for details.
13810  *
13811  * Returns the resulting document tree
13812  */
13813 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * url,const char * encoding,int options)13814 xmlReadMemory(const char *buffer, int size, const char *url,
13815               const char *encoding, int options)
13816 {
13817     xmlParserCtxtPtr ctxt;
13818     xmlParserInputPtr input;
13819     xmlDocPtr doc;
13820 
13821     if (size < 0)
13822 	return(NULL);
13823 
13824     ctxt = xmlNewParserCtxt();
13825     if (ctxt == NULL)
13826         return(NULL);
13827 
13828     xmlCtxtUseOptions(ctxt, options);
13829 
13830     input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13831                               XML_INPUT_BUF_STATIC);
13832 
13833     doc = xmlCtxtParseDocument(ctxt, input);
13834 
13835     xmlFreeParserCtxt(ctxt);
13836     return(doc);
13837 }
13838 
13839 /**
13840  * xmlReadFd:
13841  * @fd:  an open file descriptor
13842  * @URL:  base URL (optional)
13843  * @encoding:  the document encoding (optional)
13844  * @options:  a combination of xmlParserOption
13845  *
13846  * Parse an XML from a file descriptor and build a tree.
13847  *
13848  * See xmlCtxtReadFd for details.
13849  *
13850  * NOTE that the file descriptor will not be closed when the
13851  * context is freed or reset.
13852  *
13853  * Returns the resulting document tree
13854  */
13855 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)13856 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13857 {
13858     xmlParserCtxtPtr ctxt;
13859     xmlParserInputPtr input;
13860     xmlDocPtr doc;
13861 
13862     ctxt = xmlNewParserCtxt();
13863     if (ctxt == NULL)
13864         return(NULL);
13865 
13866     xmlCtxtUseOptions(ctxt, options);
13867 
13868     input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13869 
13870     doc = xmlCtxtParseDocument(ctxt, input);
13871 
13872     xmlFreeParserCtxt(ctxt);
13873     return(doc);
13874 }
13875 
13876 /**
13877  * xmlReadIO:
13878  * @ioread:  an I/O read function
13879  * @ioclose:  an I/O close function (optional)
13880  * @ioctx:  an I/O handler
13881  * @URL:  base URL (optional)
13882  * @encoding:  the document encoding (optional)
13883  * @options:  a combination of xmlParserOption
13884  *
13885  * Parse an XML document from I/O functions and context and build a tree.
13886  *
13887  * See xmlCtxtReadIO for details.
13888  *
13889  * Returns the resulting document tree
13890  */
13891 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)13892 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13893           void *ioctx, const char *URL, const char *encoding, int options)
13894 {
13895     xmlParserCtxtPtr ctxt;
13896     xmlParserInputPtr input;
13897     xmlDocPtr doc;
13898 
13899     ctxt = xmlNewParserCtxt();
13900     if (ctxt == NULL)
13901         return(NULL);
13902 
13903     xmlCtxtUseOptions(ctxt, options);
13904 
13905     input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13906 
13907     doc = xmlCtxtParseDocument(ctxt, input);
13908 
13909     xmlFreeParserCtxt(ctxt);
13910     return(doc);
13911 }
13912 
13913 /**
13914  * xmlCtxtReadDoc:
13915  * @ctxt:  an XML parser context
13916  * @str:  a pointer to a zero terminated string
13917  * @URL:  base URL (optional)
13918  * @encoding:  the document encoding (optional)
13919  * @options:  a combination of xmlParserOption
13920  *
13921  * Parse an XML in-memory document and build a tree.
13922  *
13923  * @URL is used as base to resolve external entities and for error
13924  * reporting.
13925  *
13926  * See xmlCtxtUseOptions for details.
13927  *
13928  * Returns the resulting document tree
13929  */
13930 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)13931 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13932                const char *URL, const char *encoding, int options)
13933 {
13934     xmlParserInputPtr input;
13935 
13936     if (ctxt == NULL)
13937         return(NULL);
13938 
13939     xmlCtxtReset(ctxt);
13940     xmlCtxtUseOptions(ctxt, options);
13941 
13942     input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13943                               XML_INPUT_BUF_STATIC);
13944 
13945     return(xmlCtxtParseDocument(ctxt, input));
13946 }
13947 
13948 /**
13949  * xmlCtxtReadFile:
13950  * @ctxt:  an XML parser context
13951  * @filename:  a file or URL
13952  * @encoding:  the document encoding (optional)
13953  * @options:  a combination of xmlParserOption
13954  *
13955  * Parse an XML file from the filesystem, the network or a user-defined
13956  * resource loader.
13957  *
13958  * Returns the resulting document tree
13959  */
13960 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)13961 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13962                 const char *encoding, int options)
13963 {
13964     xmlParserInputPtr input;
13965 
13966     if (ctxt == NULL)
13967         return(NULL);
13968 
13969     xmlCtxtReset(ctxt);
13970     xmlCtxtUseOptions(ctxt, options);
13971 
13972     input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13973 
13974     return(xmlCtxtParseDocument(ctxt, input));
13975 }
13976 
13977 /**
13978  * xmlCtxtReadMemory:
13979  * @ctxt:  an XML parser context
13980  * @buffer:  a pointer to a char array
13981  * @size:  the size of the array
13982  * @URL:  base URL (optional)
13983  * @encoding:  the document encoding (optional)
13984  * @options:  a combination of xmlParserOption
13985  *
13986  * Parse an XML in-memory document and build a tree. The input buffer must
13987  * not contain a terminating null byte.
13988  *
13989  * @URL is used as base to resolve external entities and for error
13990  * reporting.
13991  *
13992  * See xmlCtxtUseOptions for details.
13993  *
13994  * Returns the resulting document tree
13995  */
13996 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)13997 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13998                   const char *URL, const char *encoding, int options)
13999 {
14000     xmlParserInputPtr input;
14001 
14002     if ((ctxt == NULL) || (size < 0))
14003         return(NULL);
14004 
14005     xmlCtxtReset(ctxt);
14006     xmlCtxtUseOptions(ctxt, options);
14007 
14008     input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
14009                               XML_INPUT_BUF_STATIC);
14010 
14011     return(xmlCtxtParseDocument(ctxt, input));
14012 }
14013 
14014 /**
14015  * xmlCtxtReadFd:
14016  * @ctxt:  an XML parser context
14017  * @fd:  an open file descriptor
14018  * @URL:  base URL (optional)
14019  * @encoding:  the document encoding (optional)
14020  * @options:  a combination of xmlParserOption
14021  *
14022  * Parse an XML document from a file descriptor and build a tree.
14023  *
14024  * NOTE that the file descriptor will not be closed when the
14025  * context is freed or reset.
14026  *
14027  * @URL is used as base to resolve external entities and for error
14028  * reporting.
14029  *
14030  * See xmlCtxtUseOptions for details.
14031  *
14032  * Returns the resulting document tree
14033  */
14034 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14035 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14036               const char *URL, const char *encoding, int options)
14037 {
14038     xmlParserInputPtr input;
14039 
14040     if (ctxt == NULL)
14041         return(NULL);
14042 
14043     xmlCtxtReset(ctxt);
14044     xmlCtxtUseOptions(ctxt, options);
14045 
14046     input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14047 
14048     return(xmlCtxtParseDocument(ctxt, input));
14049 }
14050 
14051 /**
14052  * xmlCtxtReadIO:
14053  * @ctxt:  an XML parser context
14054  * @ioread:  an I/O read function
14055  * @ioclose:  an I/O close function
14056  * @ioctx:  an I/O handler
14057  * @URL:  the base URL to use for the document
14058  * @encoding:  the document encoding, or NULL
14059  * @options:  a combination of xmlParserOption
14060  *
14061  * parse an XML document from I/O functions and source and build a tree.
14062  * This reuses the existing @ctxt parser context
14063  *
14064  * @URL is used as base to resolve external entities and for error
14065  * reporting.
14066  *
14067  * See xmlCtxtUseOptions for details.
14068  *
14069  * Returns the resulting document tree
14070  */
14071 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14072 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14073               xmlInputCloseCallback ioclose, void *ioctx,
14074 	      const char *URL,
14075               const char *encoding, int options)
14076 {
14077     xmlParserInputPtr input;
14078 
14079     if (ctxt == NULL)
14080         return(NULL);
14081 
14082     xmlCtxtReset(ctxt);
14083     xmlCtxtUseOptions(ctxt, options);
14084 
14085     input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14086 
14087     return(xmlCtxtParseDocument(ctxt, input));
14088 }
14089 
14090