• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/parser.h>
55 #include <libxml/xmlmemory.h>
56 #include <libxml/tree.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #include <libxml/SAX2.h>
65 #ifdef LIBXML_CATALOG_ENABLED
66 #include <libxml/catalog.h>
67 #endif
68 
69 #include "private/buf.h"
70 #include "private/dict.h"
71 #include "private/entities.h"
72 #include "private/error.h"
73 #include "private/html.h"
74 #include "private/io.h"
75 #include "private/parser.h"
76 
77 #define NS_INDEX_EMPTY  INT_MAX
78 #define NS_INDEX_XML    (INT_MAX - 1)
79 #define URI_HASH_EMPTY  0xD943A04E
80 #define URI_HASH_XML    0xF0451F02
81 
82 struct _xmlStartTag {
83     const xmlChar *prefix;
84     const xmlChar *URI;
85     int line;
86     int nsNr;
87 };
88 
89 typedef struct {
90     void *saxData;
91     unsigned prefixHashValue;
92     unsigned uriHashValue;
93     unsigned elementId;
94     int oldIndex;
95 } xmlParserNsExtra;
96 
97 typedef struct {
98     unsigned hashValue;
99     int index;
100 } xmlParserNsBucket;
101 
102 struct _xmlParserNsData {
103     xmlParserNsExtra *extra;
104 
105     unsigned hashSize;
106     unsigned hashElems;
107     xmlParserNsBucket *hash;
108 
109     unsigned elementId;
110     int defaultNsIndex;
111 };
112 
113 struct _xmlAttrHashBucket {
114     unsigned hashValue;
115     int index;
116 };
117 
118 static xmlParserCtxtPtr
119 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
120         const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
121         xmlParserCtxtPtr pctx);
122 
123 static int
124 xmlParseElementStart(xmlParserCtxtPtr ctxt);
125 
126 static void
127 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
128 
129 /************************************************************************
130  *									*
131  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
132  *									*
133  ************************************************************************/
134 
135 #define XML_PARSER_BIG_ENTITY 1000
136 #define XML_PARSER_LOT_ENTITY 5000
137 
138 /*
139  * Constants for protection against abusive entity expansion
140  * ("billion laughs").
141  */
142 
143 /*
144  * A certain amount of entity expansion which is always allowed.
145  */
146 #define XML_PARSER_ALLOWED_EXPANSION 1000000
147 
148 /*
149  * Fixed cost for each entity reference. This crudely models processing time
150  * as well to protect, for example, against exponential expansion of empty
151  * or very short entities.
152  */
153 #define XML_ENT_FIXED_COST 20
154 
155 /**
156  * xmlParserMaxDepth:
157  *
158  * arbitrary depth limit for the XML documents that we allow to
159  * process. This is not a limitation of the parser but a safety
160  * boundary feature. It can be disabled with the XML_PARSE_HUGE
161  * parser option.
162  */
163 unsigned int xmlParserMaxDepth = 256;
164 
165 
166 
167 #define XML_PARSER_BIG_BUFFER_SIZE 300
168 #define XML_PARSER_BUFFER_SIZE 100
169 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
170 
171 /**
172  * XML_PARSER_CHUNK_SIZE
173  *
174  * When calling GROW that's the minimal amount of data
175  * the parser expected to have received. It is not a hard
176  * limit but an optimization when reading strings like Names
177  * It is not strictly needed as long as inputs available characters
178  * are followed by 0, which should be provided by the I/O level
179  */
180 #define XML_PARSER_CHUNK_SIZE 100
181 
182 /**
183  * xmlParserVersion:
184  *
185  * Constant string describing the internal version of the library
186  */
187 const char *const
188 xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
189 
190 /*
191  * List of XML prefixed PI allowed by W3C specs
192  */
193 
194 static const char* const xmlW3CPIs[] = {
195     "xml-stylesheet",
196     "xml-model",
197     NULL
198 };
199 
200 
201 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
202 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
203                                               const xmlChar **str);
204 
205 static xmlParserErrors
206 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
207 	              xmlSAXHandlerPtr sax,
208 		      void *user_data, int depth, const xmlChar *URL,
209 		      const xmlChar *ID, xmlNodePtr *list);
210 
211 static int
212 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
213                           const char *encoding);
214 #ifdef LIBXML_LEGACY_ENABLED
215 static void
216 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
217                       xmlNodePtr lastNode);
218 #endif /* LIBXML_LEGACY_ENABLED */
219 
220 static xmlParserErrors
221 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
222 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
223 
224 static int
225 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
226 
227 /************************************************************************
228  *									*
229  *		Some factorized error routines				*
230  *									*
231  ************************************************************************/
232 
233 /**
234  * xmlErrAttributeDup:
235  * @ctxt:  an XML parser context
236  * @prefix:  the attribute prefix
237  * @localname:  the attribute localname
238  *
239  * Handle a redefinition of attribute error
240  */
241 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)242 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
243                    const xmlChar * localname)
244 {
245     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
246         (ctxt->instate == XML_PARSER_EOF))
247 	return;
248     if (ctxt != NULL)
249 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
250 
251     if (prefix == NULL)
252         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
253                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
254                         (const char *) localname, NULL, NULL, 0, 0,
255                         "Attribute %s redefined\n", localname);
256     else
257         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
258                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
259                         (const char *) prefix, (const char *) localname,
260                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
261                         localname);
262     if (ctxt != NULL) {
263 	ctxt->wellFormed = 0;
264 	if (ctxt->recovery == 0)
265 	    ctxt->disableSAX = 1;
266     }
267 }
268 
269 /**
270  * xmlFatalErrMsg:
271  * @ctxt:  an XML parser context
272  * @error:  the error number
273  * @msg:  the error message
274  *
275  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
276  */
277 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)278 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
279                const char *msg)
280 {
281     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
282         (ctxt->instate == XML_PARSER_EOF))
283 	return;
284     if (ctxt != NULL)
285 	ctxt->errNo = error;
286     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
287                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
288     if (ctxt != NULL) {
289 	ctxt->wellFormed = 0;
290 	if (ctxt->recovery == 0)
291 	    ctxt->disableSAX = 1;
292     }
293 }
294 
295 /**
296  * xmlWarningMsg:
297  * @ctxt:  an XML parser context
298  * @error:  the error number
299  * @msg:  the error message
300  * @str1:  extra data
301  * @str2:  extra data
302  *
303  * Handle a warning.
304  */
305 void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)306 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
307               const char *msg, const xmlChar *str1, const xmlChar *str2)
308 {
309     xmlStructuredErrorFunc schannel = NULL;
310 
311     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
312         (ctxt->instate == XML_PARSER_EOF))
313 	return;
314     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
315         (ctxt->sax->initialized == XML_SAX2_MAGIC))
316         schannel = ctxt->sax->serror;
317     if (ctxt != NULL) {
318         __xmlRaiseError(schannel,
319                     (ctxt->sax) ? ctxt->sax->warning : NULL,
320                     ctxt->userData,
321                     ctxt, NULL, XML_FROM_PARSER, error,
322                     XML_ERR_WARNING, NULL, 0,
323 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
324 		    msg, (const char *) str1, (const char *) str2);
325     } else {
326         __xmlRaiseError(schannel, NULL, NULL,
327                     ctxt, NULL, XML_FROM_PARSER, error,
328                     XML_ERR_WARNING, NULL, 0,
329 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
330 		    msg, (const char *) str1, (const char *) str2);
331     }
332 }
333 
334 /**
335  * xmlValidityError:
336  * @ctxt:  an XML parser context
337  * @error:  the error number
338  * @msg:  the error message
339  * @str1:  extra data
340  *
341  * Handle a validity error.
342  */
343 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)344 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
345               const char *msg, const xmlChar *str1, const xmlChar *str2)
346 {
347     xmlStructuredErrorFunc schannel = NULL;
348 
349     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
350         (ctxt->instate == XML_PARSER_EOF))
351 	return;
352     if (ctxt != NULL) {
353 	ctxt->errNo = error;
354 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
355 	    schannel = ctxt->sax->serror;
356     }
357     if (ctxt != NULL) {
358         __xmlRaiseError(schannel,
359                     ctxt->vctxt.error, ctxt->vctxt.userData,
360                     ctxt, NULL, XML_FROM_DTD, error,
361                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
362 		    (const char *) str2, NULL, 0, 0,
363 		    msg, (const char *) str1, (const char *) str2);
364 	ctxt->valid = 0;
365     } else {
366         __xmlRaiseError(schannel, NULL, NULL,
367                     ctxt, NULL, XML_FROM_DTD, error,
368                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
369 		    (const char *) str2, NULL, 0, 0,
370 		    msg, (const char *) str1, (const char *) str2);
371     }
372 }
373 
374 /**
375  * xmlFatalErrMsgInt:
376  * @ctxt:  an XML parser context
377  * @error:  the error number
378  * @msg:  the error message
379  * @val:  an integer value
380  *
381  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
382  */
383 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)384 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
385                   const char *msg, int val)
386 {
387     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
388         (ctxt->instate == XML_PARSER_EOF))
389 	return;
390     if (ctxt != NULL)
391 	ctxt->errNo = error;
392     __xmlRaiseError(NULL, NULL, NULL,
393                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
394                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
395     if (ctxt != NULL) {
396 	ctxt->wellFormed = 0;
397 	if (ctxt->recovery == 0)
398 	    ctxt->disableSAX = 1;
399     }
400 }
401 
402 /**
403  * xmlFatalErrMsgStrIntStr:
404  * @ctxt:  an XML parser context
405  * @error:  the error number
406  * @msg:  the error message
407  * @str1:  an string info
408  * @val:  an integer value
409  * @str2:  an string info
410  *
411  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
412  */
413 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)414 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415                   const char *msg, const xmlChar *str1, int val,
416 		  const xmlChar *str2)
417 {
418     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
419         (ctxt->instate == XML_PARSER_EOF))
420 	return;
421     if (ctxt != NULL)
422 	ctxt->errNo = error;
423     __xmlRaiseError(NULL, NULL, NULL,
424                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
425                     NULL, 0, (const char *) str1, (const char *) str2,
426 		    NULL, val, 0, msg, str1, val, str2);
427     if (ctxt != NULL) {
428 	ctxt->wellFormed = 0;
429 	if (ctxt->recovery == 0)
430 	    ctxt->disableSAX = 1;
431     }
432 }
433 
434 /**
435  * xmlFatalErrMsgStr:
436  * @ctxt:  an XML parser context
437  * @error:  the error number
438  * @msg:  the error message
439  * @val:  a string value
440  *
441  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
442  */
443 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)444 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445                   const char *msg, const xmlChar * val)
446 {
447     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
448         (ctxt->instate == XML_PARSER_EOF))
449 	return;
450     if (ctxt != NULL)
451 	ctxt->errNo = error;
452     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
453                     XML_FROM_PARSER, error, XML_ERR_FATAL,
454                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
455                     val);
456     if (ctxt != NULL) {
457 	ctxt->wellFormed = 0;
458 	if (ctxt->recovery == 0)
459 	    ctxt->disableSAX = 1;
460     }
461 }
462 
463 /**
464  * xmlErrMsgStr:
465  * @ctxt:  an XML parser context
466  * @error:  the error number
467  * @msg:  the error message
468  * @val:  a string value
469  *
470  * Handle a non fatal parser error
471  */
472 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)473 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
474                   const char *msg, const xmlChar * val)
475 {
476     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
477         (ctxt->instate == XML_PARSER_EOF))
478 	return;
479     if (ctxt != NULL)
480 	ctxt->errNo = error;
481     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
482                     XML_FROM_PARSER, error, XML_ERR_ERROR,
483                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
484                     val);
485 }
486 
487 /**
488  * xmlNsErr:
489  * @ctxt:  an XML parser context
490  * @error:  the error number
491  * @msg:  the message
492  * @info1:  extra information string
493  * @info2:  extra information string
494  *
495  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
496  */
497 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)498 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
499          const char *msg,
500          const xmlChar * info1, const xmlChar * info2,
501          const xmlChar * info3)
502 {
503     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
504         (ctxt->instate == XML_PARSER_EOF))
505 	return;
506     if (ctxt != NULL)
507 	ctxt->errNo = error;
508     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
509                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
510                     (const char *) info2, (const char *) info3, 0, 0, msg,
511                     info1, info2, info3);
512     if (ctxt != NULL)
513 	ctxt->nsWellFormed = 0;
514 }
515 
516 /**
517  * xmlNsWarn
518  * @ctxt:  an XML parser context
519  * @error:  the error number
520  * @msg:  the message
521  * @info1:  extra information string
522  * @info2:  extra information string
523  *
524  * Handle a namespace warning error
525  */
526 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)527 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
528          const char *msg,
529          const xmlChar * info1, const xmlChar * info2,
530          const xmlChar * info3)
531 {
532     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
533         (ctxt->instate == XML_PARSER_EOF))
534 	return;
535     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
536                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
537                     (const char *) info2, (const char *) info3, 0, 0, msg,
538                     info1, info2, info3);
539 }
540 
541 static void
xmlSaturatedAdd(unsigned long * dst,unsigned long val)542 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
543     if (val > ULONG_MAX - *dst)
544         *dst = ULONG_MAX;
545     else
546         *dst += val;
547 }
548 
549 static void
xmlSaturatedAddSizeT(unsigned long * dst,unsigned long val)550 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
551     if (val > ULONG_MAX - *dst)
552         *dst = ULONG_MAX;
553     else
554         *dst += val;
555 }
556 
557 /**
558  * xmlParserEntityCheck:
559  * @ctxt:  parser context
560  * @extra:  sum of unexpanded entity sizes
561  *
562  * Check for non-linear entity expansion behaviour.
563  *
564  * In some cases like xmlStringDecodeEntities, this function is called
565  * for each, possibly nested entity and its unexpanded content length.
566  *
567  * In other cases like xmlParseReference, it's only called for each
568  * top-level entity with its unexpanded content length plus the sum of
569  * the unexpanded content lengths (plus fixed cost) of all nested
570  * entities.
571  *
572  * Summing the unexpanded lengths also adds the length of the reference.
573  * This is by design. Taking the length of the entity name into account
574  * discourages attacks that try to waste CPU time with abusively long
575  * entity names. See test/recurse/lol6.xml for example. Each call also
576  * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
577  * short entities.
578  *
579  * Returns 1 on error, 0 on success.
580  */
581 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long extra)582 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
583 {
584     unsigned long consumed;
585     xmlParserInputPtr input = ctxt->input;
586     xmlEntityPtr entity = input->entity;
587 
588     /*
589      * Compute total consumed bytes so far, including input streams of
590      * external entities.
591      */
592     consumed = input->parentConsumed;
593     if ((entity == NULL) ||
594         ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
595          ((entity->flags & XML_ENT_PARSED) == 0))) {
596         xmlSaturatedAdd(&consumed, input->consumed);
597         xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
598     }
599     xmlSaturatedAdd(&consumed, ctxt->sizeentities);
600 
601     /*
602      * Add extra cost and some fixed cost.
603      */
604     xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
605     xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
606 
607     /*
608      * It's important to always use saturation arithmetic when tracking
609      * entity sizes to make the size checks reliable. If "sizeentcopy"
610      * overflows, we have to abort.
611      */
612     if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
613         ((ctxt->sizeentcopy >= ULONG_MAX) ||
614          (ctxt->sizeentcopy / ctxt->maxAmpl > consumed))) {
615         xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
616                        "Maximum entity amplification factor exceeded, see "
617                        "xmlCtxtSetMaxAmplification.\n");
618         xmlHaltParser(ctxt);
619         return(1);
620     }
621 
622     return(0);
623 }
624 
625 /************************************************************************
626  *									*
627  *		Library wide options					*
628  *									*
629  ************************************************************************/
630 
631 /**
632   * xmlHasFeature:
633   * @feature: the feature to be examined
634   *
635   * Examines if the library has been compiled with a given feature.
636   *
637   * Returns a non-zero value if the feature exist, otherwise zero.
638   * Returns zero (0) if the feature does not exist or an unknown
639   * unknown feature is requested, non-zero otherwise.
640   */
641 int
xmlHasFeature(xmlFeature feature)642 xmlHasFeature(xmlFeature feature)
643 {
644     switch (feature) {
645 	case XML_WITH_THREAD:
646 #ifdef LIBXML_THREAD_ENABLED
647 	    return(1);
648 #else
649 	    return(0);
650 #endif
651         case XML_WITH_TREE:
652 #ifdef LIBXML_TREE_ENABLED
653             return(1);
654 #else
655             return(0);
656 #endif
657         case XML_WITH_OUTPUT:
658 #ifdef LIBXML_OUTPUT_ENABLED
659             return(1);
660 #else
661             return(0);
662 #endif
663         case XML_WITH_PUSH:
664 #ifdef LIBXML_PUSH_ENABLED
665             return(1);
666 #else
667             return(0);
668 #endif
669         case XML_WITH_READER:
670 #ifdef LIBXML_READER_ENABLED
671             return(1);
672 #else
673             return(0);
674 #endif
675         case XML_WITH_PATTERN:
676 #ifdef LIBXML_PATTERN_ENABLED
677             return(1);
678 #else
679             return(0);
680 #endif
681         case XML_WITH_WRITER:
682 #ifdef LIBXML_WRITER_ENABLED
683             return(1);
684 #else
685             return(0);
686 #endif
687         case XML_WITH_SAX1:
688 #ifdef LIBXML_SAX1_ENABLED
689             return(1);
690 #else
691             return(0);
692 #endif
693         case XML_WITH_FTP:
694 #ifdef LIBXML_FTP_ENABLED
695             return(1);
696 #else
697             return(0);
698 #endif
699         case XML_WITH_HTTP:
700 #ifdef LIBXML_HTTP_ENABLED
701             return(1);
702 #else
703             return(0);
704 #endif
705         case XML_WITH_VALID:
706 #ifdef LIBXML_VALID_ENABLED
707             return(1);
708 #else
709             return(0);
710 #endif
711         case XML_WITH_HTML:
712 #ifdef LIBXML_HTML_ENABLED
713             return(1);
714 #else
715             return(0);
716 #endif
717         case XML_WITH_LEGACY:
718 #ifdef LIBXML_LEGACY_ENABLED
719             return(1);
720 #else
721             return(0);
722 #endif
723         case XML_WITH_C14N:
724 #ifdef LIBXML_C14N_ENABLED
725             return(1);
726 #else
727             return(0);
728 #endif
729         case XML_WITH_CATALOG:
730 #ifdef LIBXML_CATALOG_ENABLED
731             return(1);
732 #else
733             return(0);
734 #endif
735         case XML_WITH_XPATH:
736 #ifdef LIBXML_XPATH_ENABLED
737             return(1);
738 #else
739             return(0);
740 #endif
741         case XML_WITH_XPTR:
742 #ifdef LIBXML_XPTR_ENABLED
743             return(1);
744 #else
745             return(0);
746 #endif
747         case XML_WITH_XINCLUDE:
748 #ifdef LIBXML_XINCLUDE_ENABLED
749             return(1);
750 #else
751             return(0);
752 #endif
753         case XML_WITH_ICONV:
754 #ifdef LIBXML_ICONV_ENABLED
755             return(1);
756 #else
757             return(0);
758 #endif
759         case XML_WITH_ISO8859X:
760 #ifdef LIBXML_ISO8859X_ENABLED
761             return(1);
762 #else
763             return(0);
764 #endif
765         case XML_WITH_UNICODE:
766 #ifdef LIBXML_UNICODE_ENABLED
767             return(1);
768 #else
769             return(0);
770 #endif
771         case XML_WITH_REGEXP:
772 #ifdef LIBXML_REGEXP_ENABLED
773             return(1);
774 #else
775             return(0);
776 #endif
777         case XML_WITH_AUTOMATA:
778 #ifdef LIBXML_AUTOMATA_ENABLED
779             return(1);
780 #else
781             return(0);
782 #endif
783         case XML_WITH_EXPR:
784 #ifdef LIBXML_EXPR_ENABLED
785             return(1);
786 #else
787             return(0);
788 #endif
789         case XML_WITH_SCHEMAS:
790 #ifdef LIBXML_SCHEMAS_ENABLED
791             return(1);
792 #else
793             return(0);
794 #endif
795         case XML_WITH_SCHEMATRON:
796 #ifdef LIBXML_SCHEMATRON_ENABLED
797             return(1);
798 #else
799             return(0);
800 #endif
801         case XML_WITH_MODULES:
802 #ifdef LIBXML_MODULES_ENABLED
803             return(1);
804 #else
805             return(0);
806 #endif
807         case XML_WITH_DEBUG:
808 #ifdef LIBXML_DEBUG_ENABLED
809             return(1);
810 #else
811             return(0);
812 #endif
813         case XML_WITH_DEBUG_MEM:
814 #ifdef DEBUG_MEMORY_LOCATION
815             return(1);
816 #else
817             return(0);
818 #endif
819         case XML_WITH_DEBUG_RUN:
820             return(0);
821         case XML_WITH_ZLIB:
822 #ifdef LIBXML_ZLIB_ENABLED
823             return(1);
824 #else
825             return(0);
826 #endif
827         case XML_WITH_LZMA:
828 #ifdef LIBXML_LZMA_ENABLED
829             return(1);
830 #else
831             return(0);
832 #endif
833         case XML_WITH_ICU:
834 #ifdef LIBXML_ICU_ENABLED
835             return(1);
836 #else
837             return(0);
838 #endif
839         default:
840 	    break;
841      }
842      return(0);
843 }
844 
845 /************************************************************************
846  *									*
847  *		SAX2 defaulted attributes handling			*
848  *									*
849  ************************************************************************/
850 
851 /**
852  * xmlDetectSAX2:
853  * @ctxt:  an XML parser context
854  *
855  * Do the SAX2 detection and specific initialization
856  */
857 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)858 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
859     xmlSAXHandlerPtr sax;
860 
861     /* Avoid unused variable warning if features are disabled. */
862     (void) sax;
863 
864     if (ctxt == NULL) return;
865     sax = ctxt->sax;
866 #ifdef LIBXML_SAX1_ENABLED
867     if ((sax) && (sax->initialized == XML_SAX2_MAGIC))
868         ctxt->sax2 = 1;
869 #else
870     ctxt->sax2 = 1;
871 #endif /* LIBXML_SAX1_ENABLED */
872 
873     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
874     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
875     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
876     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
877 		(ctxt->str_xml_ns == NULL)) {
878         xmlErrMemory(ctxt, NULL);
879     }
880 }
881 
882 typedef struct {
883     xmlHashedString prefix;
884     xmlHashedString name;
885     xmlHashedString value;
886     const xmlChar *valueEnd;
887     int external;
888     int expandedSize;
889 } xmlDefAttr;
890 
891 typedef struct _xmlDefAttrs xmlDefAttrs;
892 typedef xmlDefAttrs *xmlDefAttrsPtr;
893 struct _xmlDefAttrs {
894     int nbAttrs;	/* number of defaulted attributes on that element */
895     int maxAttrs;       /* the size of the array */
896 #if __STDC_VERSION__ >= 199901L
897     /* Using a C99 flexible array member avoids UBSan errors. */
898     xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
899 #else
900     xmlDefAttr attrs[1];
901 #endif
902 };
903 
904 /**
905  * xmlAttrNormalizeSpace:
906  * @src: the source string
907  * @dst: the target string
908  *
909  * Normalize the space in non CDATA attribute values:
910  * If the attribute type is not CDATA, then the XML processor MUST further
911  * process the normalized attribute value by discarding any leading and
912  * trailing space (#x20) characters, and by replacing sequences of space
913  * (#x20) characters by a single space (#x20) character.
914  * Note that the size of dst need to be at least src, and if one doesn't need
915  * to preserve dst (and it doesn't come from a dictionary or read-only) then
916  * passing src as dst is just fine.
917  *
918  * Returns a pointer to the normalized value (dst) or NULL if no conversion
919  *         is needed.
920  */
921 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)922 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
923 {
924     if ((src == NULL) || (dst == NULL))
925         return(NULL);
926 
927     while (*src == 0x20) src++;
928     while (*src != 0) {
929 	if (*src == 0x20) {
930 	    while (*src == 0x20) src++;
931 	    if (*src != 0)
932 		*dst++ = 0x20;
933 	} else {
934 	    *dst++ = *src++;
935 	}
936     }
937     *dst = 0;
938     if (dst == src)
939        return(NULL);
940     return(dst);
941 }
942 
943 /**
944  * xmlAttrNormalizeSpace2:
945  * @src: the source string
946  *
947  * Normalize the space in non CDATA attribute values, a slightly more complex
948  * front end to avoid allocation problems when running on attribute values
949  * coming from the input.
950  *
951  * Returns a pointer to the normalized value (dst) or NULL if no conversion
952  *         is needed.
953  */
954 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)955 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
956 {
957     int i;
958     int remove_head = 0;
959     int need_realloc = 0;
960     const xmlChar *cur;
961 
962     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
963         return(NULL);
964     i = *len;
965     if (i <= 0)
966         return(NULL);
967 
968     cur = src;
969     while (*cur == 0x20) {
970         cur++;
971 	remove_head++;
972     }
973     while (*cur != 0) {
974 	if (*cur == 0x20) {
975 	    cur++;
976 	    if ((*cur == 0x20) || (*cur == 0)) {
977 	        need_realloc = 1;
978 		break;
979 	    }
980 	} else
981 	    cur++;
982     }
983     if (need_realloc) {
984         xmlChar *ret;
985 
986 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
987 	if (ret == NULL) {
988 	    xmlErrMemory(ctxt, NULL);
989 	    return(NULL);
990 	}
991 	xmlAttrNormalizeSpace(ret, ret);
992 	*len = strlen((const char *)ret);
993         return(ret);
994     } else if (remove_head) {
995         *len -= remove_head;
996         memmove(src, src + remove_head, 1 + *len);
997 	return(src);
998     }
999     return(NULL);
1000 }
1001 
1002 /**
1003  * xmlAddDefAttrs:
1004  * @ctxt:  an XML parser context
1005  * @fullname:  the element fullname
1006  * @fullattr:  the attribute fullname
1007  * @value:  the attribute value
1008  *
1009  * Add a defaulted attribute for an element
1010  */
1011 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1012 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1013                const xmlChar *fullname,
1014                const xmlChar *fullattr,
1015                const xmlChar *value) {
1016     xmlDefAttrsPtr defaults;
1017     xmlDefAttr *attr;
1018     int len, expandedSize;
1019     xmlHashedString name;
1020     xmlHashedString prefix;
1021     xmlHashedString hvalue;
1022     const xmlChar *localname;
1023 
1024     /*
1025      * Allows to detect attribute redefinitions
1026      */
1027     if (ctxt->attsSpecial != NULL) {
1028         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1029 	    return;
1030     }
1031 
1032     if (ctxt->attsDefault == NULL) {
1033         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1034 	if (ctxt->attsDefault == NULL)
1035 	    goto mem_error;
1036     }
1037 
1038     /*
1039      * split the element name into prefix:localname , the string found
1040      * are within the DTD and then not associated to namespace names.
1041      */
1042     localname = xmlSplitQName3(fullname, &len);
1043     if (localname == NULL) {
1044         name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1045 	prefix.name = NULL;
1046     } else {
1047         name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1048 	prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1049         if (prefix.name == NULL)
1050             goto mem_error;
1051     }
1052     if (name.name == NULL)
1053         goto mem_error;
1054 
1055     /*
1056      * make sure there is some storage
1057      */
1058     defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1059     if ((defaults == NULL) ||
1060         (defaults->nbAttrs >= defaults->maxAttrs)) {
1061         xmlDefAttrsPtr temp;
1062         int newSize;
1063 
1064         newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1065         temp = xmlRealloc(defaults,
1066                           sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1067 	if (temp == NULL)
1068 	    goto mem_error;
1069         if (defaults == NULL)
1070             temp->nbAttrs = 0;
1071 	temp->maxAttrs = newSize;
1072         defaults = temp;
1073 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1074 	                        defaults, NULL) < 0) {
1075 	    xmlFree(defaults);
1076 	    goto mem_error;
1077 	}
1078     }
1079 
1080     /*
1081      * Split the attribute name into prefix:localname , the string found
1082      * are within the DTD and hen not associated to namespace names.
1083      */
1084     localname = xmlSplitQName3(fullattr, &len);
1085     if (localname == NULL) {
1086         name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1087 	prefix.name = NULL;
1088     } else {
1089         name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1090 	prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1091         if (prefix.name == NULL)
1092             goto mem_error;
1093     }
1094     if (name.name == NULL)
1095         goto mem_error;
1096 
1097     /* intern the string and precompute the end */
1098     len = strlen((const char *) value);
1099     hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1100     if (hvalue.name == NULL)
1101         goto mem_error;
1102 
1103     expandedSize = strlen((const char *) name.name);
1104     if (prefix.name != NULL)
1105         expandedSize += strlen((const char *) prefix.name);
1106     expandedSize += len;
1107 
1108     attr = &defaults->attrs[defaults->nbAttrs++];
1109     attr->name = name;
1110     attr->prefix = prefix;
1111     attr->value = hvalue;
1112     attr->valueEnd = hvalue.name + len;
1113     attr->external = ctxt->external;
1114     attr->expandedSize = expandedSize;
1115 
1116     return;
1117 
1118 mem_error:
1119     xmlErrMemory(ctxt, NULL);
1120     return;
1121 }
1122 
1123 /**
1124  * xmlAddSpecialAttr:
1125  * @ctxt:  an XML parser context
1126  * @fullname:  the element fullname
1127  * @fullattr:  the attribute fullname
1128  * @type:  the attribute type
1129  *
1130  * Register this attribute type
1131  */
1132 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1133 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1134 		  const xmlChar *fullname,
1135 		  const xmlChar *fullattr,
1136 		  int type)
1137 {
1138     if (ctxt->attsSpecial == NULL) {
1139         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1140 	if (ctxt->attsSpecial == NULL)
1141 	    goto mem_error;
1142     }
1143 
1144     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1145         return;
1146 
1147     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1148                      (void *) (ptrdiff_t) type);
1149     return;
1150 
1151 mem_error:
1152     xmlErrMemory(ctxt, NULL);
1153     return;
1154 }
1155 
1156 /**
1157  * xmlCleanSpecialAttrCallback:
1158  *
1159  * Removes CDATA attributes from the special attribute table
1160  */
1161 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1162 xmlCleanSpecialAttrCallback(void *payload, void *data,
1163                             const xmlChar *fullname, const xmlChar *fullattr,
1164                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1165     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1166 
1167     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1168         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1169     }
1170 }
1171 
1172 /**
1173  * xmlCleanSpecialAttr:
1174  * @ctxt:  an XML parser context
1175  *
1176  * Trim the list of attributes defined to remove all those of type
1177  * CDATA as they are not special. This call should be done when finishing
1178  * to parse the DTD and before starting to parse the document root.
1179  */
1180 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1181 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1182 {
1183     if (ctxt->attsSpecial == NULL)
1184         return;
1185 
1186     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1187 
1188     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1189         xmlHashFree(ctxt->attsSpecial, NULL);
1190         ctxt->attsSpecial = NULL;
1191     }
1192     return;
1193 }
1194 
1195 /**
1196  * xmlCheckLanguageID:
1197  * @lang:  pointer to the string value
1198  *
1199  * DEPRECATED: Internal function, do not use.
1200  *
1201  * Checks that the value conforms to the LanguageID production:
1202  *
1203  * NOTE: this is somewhat deprecated, those productions were removed from
1204  *       the XML Second edition.
1205  *
1206  * [33] LanguageID ::= Langcode ('-' Subcode)*
1207  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1208  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1209  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1210  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1211  * [38] Subcode ::= ([a-z] | [A-Z])+
1212  *
1213  * The current REC reference the successors of RFC 1766, currently 5646
1214  *
1215  * http://www.rfc-editor.org/rfc/rfc5646.txt
1216  * langtag       = language
1217  *                 ["-" script]
1218  *                 ["-" region]
1219  *                 *("-" variant)
1220  *                 *("-" extension)
1221  *                 ["-" privateuse]
1222  * language      = 2*3ALPHA            ; shortest ISO 639 code
1223  *                 ["-" extlang]       ; sometimes followed by
1224  *                                     ; extended language subtags
1225  *               / 4ALPHA              ; or reserved for future use
1226  *               / 5*8ALPHA            ; or registered language subtag
1227  *
1228  * extlang       = 3ALPHA              ; selected ISO 639 codes
1229  *                 *2("-" 3ALPHA)      ; permanently reserved
1230  *
1231  * script        = 4ALPHA              ; ISO 15924 code
1232  *
1233  * region        = 2ALPHA              ; ISO 3166-1 code
1234  *               / 3DIGIT              ; UN M.49 code
1235  *
1236  * variant       = 5*8alphanum         ; registered variants
1237  *               / (DIGIT 3alphanum)
1238  *
1239  * extension     = singleton 1*("-" (2*8alphanum))
1240  *
1241  *                                     ; Single alphanumerics
1242  *                                     ; "x" reserved for private use
1243  * singleton     = DIGIT               ; 0 - 9
1244  *               / %x41-57             ; A - W
1245  *               / %x59-5A             ; Y - Z
1246  *               / %x61-77             ; a - w
1247  *               / %x79-7A             ; y - z
1248  *
1249  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1250  * The parser below doesn't try to cope with extension or privateuse
1251  * that could be added but that's not interoperable anyway
1252  *
1253  * Returns 1 if correct 0 otherwise
1254  **/
1255 int
xmlCheckLanguageID(const xmlChar * lang)1256 xmlCheckLanguageID(const xmlChar * lang)
1257 {
1258     const xmlChar *cur = lang, *nxt;
1259 
1260     if (cur == NULL)
1261         return (0);
1262     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1263         ((cur[0] == 'I') && (cur[1] == '-')) ||
1264         ((cur[0] == 'x') && (cur[1] == '-')) ||
1265         ((cur[0] == 'X') && (cur[1] == '-'))) {
1266         /*
1267          * Still allow IANA code and user code which were coming
1268          * from the previous version of the XML-1.0 specification
1269          * it's deprecated but we should not fail
1270          */
1271         cur += 2;
1272         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1273                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1274             cur++;
1275         return(cur[0] == 0);
1276     }
1277     nxt = cur;
1278     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1279            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1280            nxt++;
1281     if (nxt - cur >= 4) {
1282         /*
1283          * Reserved
1284          */
1285         if ((nxt - cur > 8) || (nxt[0] != 0))
1286             return(0);
1287         return(1);
1288     }
1289     if (nxt - cur < 2)
1290         return(0);
1291     /* we got an ISO 639 code */
1292     if (nxt[0] == 0)
1293         return(1);
1294     if (nxt[0] != '-')
1295         return(0);
1296 
1297     nxt++;
1298     cur = nxt;
1299     /* now we can have extlang or script or region or variant */
1300     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1301         goto region_m49;
1302 
1303     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1304            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1305            nxt++;
1306     if (nxt - cur == 4)
1307         goto script;
1308     if (nxt - cur == 2)
1309         goto region;
1310     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1311         goto variant;
1312     if (nxt - cur != 3)
1313         return(0);
1314     /* we parsed an extlang */
1315     if (nxt[0] == 0)
1316         return(1);
1317     if (nxt[0] != '-')
1318         return(0);
1319 
1320     nxt++;
1321     cur = nxt;
1322     /* now we can have script or region or variant */
1323     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1324         goto region_m49;
1325 
1326     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1327            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1328            nxt++;
1329     if (nxt - cur == 2)
1330         goto region;
1331     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1332         goto variant;
1333     if (nxt - cur != 4)
1334         return(0);
1335     /* we parsed a script */
1336 script:
1337     if (nxt[0] == 0)
1338         return(1);
1339     if (nxt[0] != '-')
1340         return(0);
1341 
1342     nxt++;
1343     cur = nxt;
1344     /* now we can have region or variant */
1345     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1346         goto region_m49;
1347 
1348     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1349            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1350            nxt++;
1351 
1352     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353         goto variant;
1354     if (nxt - cur != 2)
1355         return(0);
1356     /* we parsed a region */
1357 region:
1358     if (nxt[0] == 0)
1359         return(1);
1360     if (nxt[0] != '-')
1361         return(0);
1362 
1363     nxt++;
1364     cur = nxt;
1365     /* now we can just have a variant */
1366     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1367            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1368            nxt++;
1369 
1370     if ((nxt - cur < 5) || (nxt - cur > 8))
1371         return(0);
1372 
1373     /* we parsed a variant */
1374 variant:
1375     if (nxt[0] == 0)
1376         return(1);
1377     if (nxt[0] != '-')
1378         return(0);
1379     /* extensions and private use subtags not checked */
1380     return (1);
1381 
1382 region_m49:
1383     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1384         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1385         nxt += 3;
1386         goto region;
1387     }
1388     return(0);
1389 }
1390 
1391 /************************************************************************
1392  *									*
1393  *		Parser stacks related functions and macros		*
1394  *									*
1395  ************************************************************************/
1396 
1397 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1398                                             const xmlChar ** str);
1399 
1400 /**
1401  * xmlParserNsCreate:
1402  *
1403  * Create a new namespace database.
1404  *
1405  * Returns the new obejct.
1406  */
1407 xmlParserNsData *
xmlParserNsCreate(void)1408 xmlParserNsCreate(void) {
1409     xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1410 
1411     if (nsdb == NULL)
1412         return(NULL);
1413     memset(nsdb, 0, sizeof(*nsdb));
1414     nsdb->defaultNsIndex = INT_MAX;
1415 
1416     return(nsdb);
1417 }
1418 
1419 /**
1420  * xmlParserNsFree:
1421  * @nsdb: namespace database
1422  *
1423  * Free a namespace database.
1424  */
1425 void
xmlParserNsFree(xmlParserNsData * nsdb)1426 xmlParserNsFree(xmlParserNsData *nsdb) {
1427     if (nsdb == NULL)
1428         return;
1429 
1430     xmlFree(nsdb->extra);
1431     xmlFree(nsdb->hash);
1432     xmlFree(nsdb);
1433 }
1434 
1435 /**
1436  * xmlParserNsReset:
1437  * @nsdb: namespace database
1438  *
1439  * Reset a namespace database.
1440  */
1441 static void
xmlParserNsReset(xmlParserNsData * nsdb)1442 xmlParserNsReset(xmlParserNsData *nsdb) {
1443     if (nsdb == NULL)
1444         return;
1445 
1446     nsdb->hashElems = 0;
1447     nsdb->elementId = 0;
1448     nsdb->defaultNsIndex = INT_MAX;
1449 
1450     if (nsdb->hash)
1451         memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1452 }
1453 
1454 /**
1455  * xmlParserStartElement:
1456  * @nsdb: namespace database
1457  *
1458  * Signal that a new element has started.
1459  *
1460  * Returns 0 on success, -1 if the element counter overflowed.
1461  */
1462 static int
xmlParserNsStartElement(xmlParserNsData * nsdb)1463 xmlParserNsStartElement(xmlParserNsData *nsdb) {
1464     if (nsdb->elementId == UINT_MAX)
1465         return(-1);
1466     nsdb->elementId++;
1467 
1468     return(0);
1469 }
1470 
1471 /**
1472  * xmlParserNsLookup:
1473  * @ctxt: parser context
1474  * @prefix: namespace prefix
1475  * @bucketPtr: optional bucket (return value)
1476  *
1477  * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1478  * be set to the matching bucket, or the first empty bucket if no match
1479  * was found.
1480  *
1481  * Returns the namespace index on success, INT_MAX if no namespace was
1482  * found.
1483  */
1484 static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,xmlParserNsBucket ** bucketPtr)1485 xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1486                   xmlParserNsBucket **bucketPtr) {
1487     xmlParserNsBucket *bucket;
1488     unsigned index, hashValue;
1489 
1490     if (prefix->name == NULL)
1491         return(ctxt->nsdb->defaultNsIndex);
1492 
1493     if (ctxt->nsdb->hashSize == 0)
1494         return(INT_MAX);
1495 
1496     hashValue = prefix->hashValue;
1497     index = hashValue & (ctxt->nsdb->hashSize - 1);
1498     bucket = &ctxt->nsdb->hash[index];
1499 
1500     while (bucket->hashValue) {
1501         if ((bucket->hashValue == hashValue) &&
1502             (bucket->index != INT_MAX)) {
1503             if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1504                 if (bucketPtr != NULL)
1505                     *bucketPtr = bucket;
1506                 return(bucket->index);
1507             }
1508         }
1509 
1510         index++;
1511         bucket++;
1512         if (index == ctxt->nsdb->hashSize) {
1513             index = 0;
1514             bucket = ctxt->nsdb->hash;
1515         }
1516     }
1517 
1518     if (bucketPtr != NULL)
1519         *bucketPtr = bucket;
1520     return(INT_MAX);
1521 }
1522 
1523 /**
1524  * xmlParserNsLookupUri:
1525  * @ctxt: parser context
1526  * @prefix: namespace prefix
1527  *
1528  * Lookup namespace URI with given prefix.
1529  *
1530  * Returns the namespace URI on success, NULL if no namespace was found.
1531  */
1532 static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix)1533 xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1534     const xmlChar *ret;
1535     int nsIndex;
1536 
1537     if (prefix->name == ctxt->str_xml)
1538         return(ctxt->str_xml_ns);
1539 
1540     nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1541     if (nsIndex == INT_MAX)
1542         return(NULL);
1543 
1544     ret = ctxt->nsTab[nsIndex * 2 + 1];
1545     if (ret[0] == 0)
1546         ret = NULL;
1547     return(ret);
1548 }
1549 
1550 /**
1551  * xmlParserNsLookupSax:
1552  * @ctxt: parser context
1553  * @prefix: namespace prefix
1554  *
1555  * Lookup extra data for the given prefix. This returns data stored
1556  * with xmlParserNsUdpateSax.
1557  *
1558  * Returns the data on success, NULL if no namespace was found.
1559  */
1560 void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix)1561 xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1562     xmlHashedString hprefix;
1563     int nsIndex;
1564 
1565     if (prefix == ctxt->str_xml)
1566         return(NULL);
1567 
1568     hprefix.name = prefix;
1569     if (prefix != NULL)
1570         hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1571     else
1572         hprefix.hashValue = 0;
1573     nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1574     if (nsIndex == INT_MAX)
1575         return(NULL);
1576 
1577     return(ctxt->nsdb->extra[nsIndex].saxData);
1578 }
1579 
1580 /**
1581  * xmlParserNsUpdateSax:
1582  * @ctxt: parser context
1583  * @prefix: namespace prefix
1584  * @saxData: extra data for SAX handler
1585  *
1586  * Sets or updates extra data for the given prefix. This value will be
1587  * returned by xmlParserNsLookupSax as long as the namespace with the
1588  * given prefix is in scope.
1589  *
1590  * Returns the data on success, NULL if no namespace was found.
1591  */
1592 int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix,void * saxData)1593 xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1594                      void *saxData) {
1595     xmlHashedString hprefix;
1596     int nsIndex;
1597 
1598     if (prefix == ctxt->str_xml)
1599         return(-1);
1600 
1601     hprefix.name = prefix;
1602     if (prefix != NULL)
1603         hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1604     else
1605         hprefix.hashValue = 0;
1606     nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1607     if (nsIndex == INT_MAX)
1608         return(-1);
1609 
1610     ctxt->nsdb->extra[nsIndex].saxData = saxData;
1611     return(0);
1612 }
1613 
1614 /**
1615  * xmlParserNsGrow:
1616  * @ctxt: parser context
1617  *
1618  * Grows the namespace tables.
1619  *
1620  * Returns 0 on success, -1 if a memory allocation failed.
1621  */
1622 static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt)1623 xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1624     const xmlChar **table;
1625     xmlParserNsExtra *extra;
1626     int newSize;
1627 
1628     if (ctxt->nsMax > INT_MAX / 2)
1629         goto error;
1630     newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1631 
1632     table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1633     if (table == NULL)
1634         goto error;
1635     ctxt->nsTab = table;
1636 
1637     extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1638     if (extra == NULL)
1639         goto error;
1640     ctxt->nsdb->extra = extra;
1641 
1642     ctxt->nsMax = newSize;
1643     return(0);
1644 
1645 error:
1646     xmlErrMemory(ctxt, NULL);
1647     return(-1);
1648 }
1649 
1650 /**
1651  * xmlParserNsPush:
1652  * @ctxt: parser context
1653  * @prefix: prefix with hash value
1654  * @uri: uri with hash value
1655  * @saxData: extra data for SAX handler
1656  * @defAttr: whether the namespace comes from a default attribute
1657  *
1658  * Push a new namespace on the table.
1659  *
1660  * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1661  * -1 if a memory allocation failed.
1662  */
1663 static int
xmlParserNsPush(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,const xmlHashedString * uri,void * saxData,int defAttr)1664 xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1665                 const xmlHashedString *uri, void *saxData, int defAttr) {
1666     xmlParserNsBucket *bucket = NULL;
1667     xmlParserNsExtra *extra;
1668     const xmlChar **ns;
1669     unsigned hashValue, nsIndex, oldIndex;
1670 
1671     if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1672         return(0);
1673 
1674     if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1675         xmlErrMemory(ctxt, NULL);
1676         return(-1);
1677     }
1678 
1679     /*
1680      * Default namespace and 'xml' namespace
1681      */
1682     if ((prefix == NULL) || (prefix->name == NULL)) {
1683         oldIndex = ctxt->nsdb->defaultNsIndex;
1684 
1685         if (oldIndex != INT_MAX) {
1686             if (defAttr != 0)
1687                 return(0);
1688 
1689             extra = &ctxt->nsdb->extra[oldIndex];
1690 
1691             if (extra->elementId == ctxt->nsdb->elementId) {
1692                 xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1693                 return(0);
1694             }
1695 
1696             if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1697                 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1698                 return(0);
1699         }
1700 
1701         ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1702         goto populate_entry;
1703     }
1704 
1705     /*
1706      * Hash table lookup
1707      */
1708     oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1709     if (oldIndex != INT_MAX) {
1710         extra = &ctxt->nsdb->extra[oldIndex];
1711 
1712         if (defAttr != 0)
1713             return(0);
1714 
1715         /*
1716          * Check for duplicate definitions on the same element.
1717          */
1718         if (extra->elementId == ctxt->nsdb->elementId) {
1719             xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1720             return(0);
1721         }
1722 
1723         if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1724             (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1725             return(0);
1726 
1727         bucket->index = ctxt->nsNr;
1728         goto populate_entry;
1729     }
1730 
1731     /*
1732      * Insert new bucket
1733      */
1734 
1735     hashValue = prefix->hashValue;
1736 
1737     /*
1738      * Grow hash table, 50% fill factor
1739      */
1740     if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1741         xmlParserNsBucket *newHash;
1742         unsigned newSize, i, index;
1743 
1744         if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1745             xmlErrMemory(ctxt, NULL);
1746             return(-1);
1747         }
1748         newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1749         newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1750         if (newHash == NULL) {
1751             xmlErrMemory(ctxt, NULL);
1752             return(-1);
1753         }
1754         memset(newHash, 0, newSize * sizeof(newHash[0]));
1755 
1756         for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1757             unsigned hv = ctxt->nsdb->hash[i].hashValue;
1758             unsigned newIndex;
1759 
1760             if (hv == 0)
1761                 continue;
1762             newIndex = hv & (newSize - 1);
1763 
1764             while (newHash[newIndex].hashValue != 0) {
1765                 newIndex++;
1766                 if (newIndex == newSize)
1767                     newIndex = 0;
1768             }
1769 
1770             newHash[newIndex] = ctxt->nsdb->hash[i];
1771         }
1772 
1773         xmlFree(ctxt->nsdb->hash);
1774         ctxt->nsdb->hash = newHash;
1775         ctxt->nsdb->hashSize = newSize;
1776 
1777         /*
1778          * Relookup
1779          */
1780         index = hashValue & (newSize - 1);
1781 
1782         while (newHash[index].hashValue != 0) {
1783             index++;
1784             if (index == newSize)
1785                 index = 0;
1786         }
1787 
1788         bucket = &newHash[index];
1789     }
1790 
1791     bucket->hashValue = hashValue;
1792     bucket->index = ctxt->nsNr;
1793     ctxt->nsdb->hashElems++;
1794     oldIndex = INT_MAX;
1795 
1796 populate_entry:
1797     nsIndex = ctxt->nsNr;
1798 
1799     ns = &ctxt->nsTab[nsIndex * 2];
1800     ns[0] = prefix ? prefix->name : NULL;
1801     ns[1] = uri->name;
1802 
1803     extra = &ctxt->nsdb->extra[nsIndex];
1804     extra->saxData = saxData;
1805     extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1806     extra->uriHashValue = uri->hashValue;
1807     extra->elementId = ctxt->nsdb->elementId;
1808     extra->oldIndex = oldIndex;
1809 
1810     ctxt->nsNr++;
1811 
1812     return(1);
1813 }
1814 
1815 /**
1816  * xmlParserNsPop:
1817  * @ctxt: an XML parser context
1818  * @nr:  the number to pop
1819  *
1820  * Pops the top @nr namespaces and restores the hash table.
1821  *
1822  * Returns the number of namespaces popped.
1823  */
1824 static int
xmlParserNsPop(xmlParserCtxtPtr ctxt,int nr)1825 xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1826 {
1827     int i;
1828 
1829     /* assert(nr <= ctxt->nsNr); */
1830 
1831     for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1832         const xmlChar *prefix = ctxt->nsTab[i * 2];
1833         xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1834 
1835         if (prefix == NULL) {
1836             ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1837         } else {
1838             xmlHashedString hprefix;
1839             xmlParserNsBucket *bucket = NULL;
1840 
1841             hprefix.name = prefix;
1842             hprefix.hashValue = extra->prefixHashValue;
1843             xmlParserNsLookup(ctxt, &hprefix, &bucket);
1844             /* assert(bucket && bucket->hashValue); */
1845             bucket->index = extra->oldIndex;
1846         }
1847     }
1848 
1849     ctxt->nsNr -= nr;
1850     return(nr);
1851 }
1852 
1853 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1854 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1855     const xmlChar **atts;
1856     unsigned *attallocs;
1857     int maxatts;
1858 
1859     if (nr + 5 > ctxt->maxatts) {
1860 	maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1861 	atts = (const xmlChar **) xmlMalloc(
1862 				     maxatts * sizeof(const xmlChar *));
1863 	if (atts == NULL) goto mem_error;
1864 	attallocs = xmlRealloc(ctxt->attallocs,
1865                                (maxatts / 5) * sizeof(attallocs[0]));
1866 	if (attallocs == NULL) {
1867             xmlFree(atts);
1868             goto mem_error;
1869         }
1870         if (ctxt->maxatts > 0)
1871             memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1872         xmlFree(ctxt->atts);
1873 	ctxt->atts = atts;
1874 	ctxt->attallocs = attallocs;
1875 	ctxt->maxatts = maxatts;
1876     }
1877     return(ctxt->maxatts);
1878 mem_error:
1879     xmlErrMemory(ctxt, NULL);
1880     return(-1);
1881 }
1882 
1883 /**
1884  * inputPush:
1885  * @ctxt:  an XML parser context
1886  * @value:  the parser input
1887  *
1888  * Pushes a new parser input on top of the input stack
1889  *
1890  * Returns -1 in case of error, the index in the stack otherwise
1891  */
1892 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1893 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1894 {
1895     if ((ctxt == NULL) || (value == NULL))
1896         return(-1);
1897     if (ctxt->inputNr >= ctxt->inputMax) {
1898         size_t newSize = ctxt->inputMax * 2;
1899         xmlParserInputPtr *tmp;
1900 
1901         tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1902                                                newSize * sizeof(*tmp));
1903         if (tmp == NULL) {
1904             xmlErrMemory(ctxt, NULL);
1905             return (-1);
1906         }
1907         ctxt->inputTab = tmp;
1908         ctxt->inputMax = newSize;
1909     }
1910     ctxt->inputTab[ctxt->inputNr] = value;
1911     ctxt->input = value;
1912     return (ctxt->inputNr++);
1913 }
1914 /**
1915  * inputPop:
1916  * @ctxt: an XML parser context
1917  *
1918  * Pops the top parser input from the input stack
1919  *
1920  * Returns the input just removed
1921  */
1922 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1923 inputPop(xmlParserCtxtPtr ctxt)
1924 {
1925     xmlParserInputPtr ret;
1926 
1927     if (ctxt == NULL)
1928         return(NULL);
1929     if (ctxt->inputNr <= 0)
1930         return (NULL);
1931     ctxt->inputNr--;
1932     if (ctxt->inputNr > 0)
1933         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1934     else
1935         ctxt->input = NULL;
1936     ret = ctxt->inputTab[ctxt->inputNr];
1937     ctxt->inputTab[ctxt->inputNr] = NULL;
1938     return (ret);
1939 }
1940 /**
1941  * nodePush:
1942  * @ctxt:  an XML parser context
1943  * @value:  the element node
1944  *
1945  * DEPRECATED: Internal function, do not use.
1946  *
1947  * Pushes a new element node on top of the node stack
1948  *
1949  * Returns -1 in case of error, the index in the stack otherwise
1950  */
1951 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1952 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1953 {
1954     if (ctxt == NULL) return(0);
1955     if (ctxt->nodeNr >= ctxt->nodeMax) {
1956         xmlNodePtr *tmp;
1957 
1958 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1959                                       ctxt->nodeMax * 2 *
1960                                       sizeof(ctxt->nodeTab[0]));
1961         if (tmp == NULL) {
1962             xmlErrMemory(ctxt, NULL);
1963             return (-1);
1964         }
1965         ctxt->nodeTab = tmp;
1966 	ctxt->nodeMax *= 2;
1967     }
1968     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1969         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1970 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1971 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1972 			  xmlParserMaxDepth);
1973 	xmlHaltParser(ctxt);
1974 	return(-1);
1975     }
1976     ctxt->nodeTab[ctxt->nodeNr] = value;
1977     ctxt->node = value;
1978     return (ctxt->nodeNr++);
1979 }
1980 
1981 /**
1982  * nodePop:
1983  * @ctxt: an XML parser context
1984  *
1985  * DEPRECATED: Internal function, do not use.
1986  *
1987  * Pops the top element node from the node stack
1988  *
1989  * Returns the node just removed
1990  */
1991 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1992 nodePop(xmlParserCtxtPtr ctxt)
1993 {
1994     xmlNodePtr ret;
1995 
1996     if (ctxt == NULL) return(NULL);
1997     if (ctxt->nodeNr <= 0)
1998         return (NULL);
1999     ctxt->nodeNr--;
2000     if (ctxt->nodeNr > 0)
2001         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2002     else
2003         ctxt->node = NULL;
2004     ret = ctxt->nodeTab[ctxt->nodeNr];
2005     ctxt->nodeTab[ctxt->nodeNr] = NULL;
2006     return (ret);
2007 }
2008 
2009 /**
2010  * nameNsPush:
2011  * @ctxt:  an XML parser context
2012  * @value:  the element name
2013  * @prefix:  the element prefix
2014  * @URI:  the element namespace name
2015  * @line:  the current line number for error messages
2016  * @nsNr:  the number of namespaces pushed on the namespace table
2017  *
2018  * Pushes a new element name/prefix/URL on top of the name stack
2019  *
2020  * Returns -1 in case of error, the index in the stack otherwise
2021  */
2022 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)2023 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2024            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2025 {
2026     xmlStartTag *tag;
2027 
2028     if (ctxt->nameNr >= ctxt->nameMax) {
2029         const xmlChar * *tmp;
2030         xmlStartTag *tmp2;
2031         ctxt->nameMax *= 2;
2032         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2033                                     ctxt->nameMax *
2034                                     sizeof(ctxt->nameTab[0]));
2035         if (tmp == NULL) {
2036 	    ctxt->nameMax /= 2;
2037 	    goto mem_error;
2038         }
2039 	ctxt->nameTab = tmp;
2040         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2041                                     ctxt->nameMax *
2042                                     sizeof(ctxt->pushTab[0]));
2043         if (tmp2 == NULL) {
2044 	    ctxt->nameMax /= 2;
2045 	    goto mem_error;
2046         }
2047 	ctxt->pushTab = tmp2;
2048     } else if (ctxt->pushTab == NULL) {
2049         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2050                                             sizeof(ctxt->pushTab[0]));
2051         if (ctxt->pushTab == NULL)
2052             goto mem_error;
2053     }
2054     ctxt->nameTab[ctxt->nameNr] = value;
2055     ctxt->name = value;
2056     tag = &ctxt->pushTab[ctxt->nameNr];
2057     tag->prefix = prefix;
2058     tag->URI = URI;
2059     tag->line = line;
2060     tag->nsNr = nsNr;
2061     return (ctxt->nameNr++);
2062 mem_error:
2063     xmlErrMemory(ctxt, NULL);
2064     return (-1);
2065 }
2066 #ifdef LIBXML_PUSH_ENABLED
2067 /**
2068  * nameNsPop:
2069  * @ctxt: an XML parser context
2070  *
2071  * Pops the top element/prefix/URI name from the name stack
2072  *
2073  * Returns the name just removed
2074  */
2075 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)2076 nameNsPop(xmlParserCtxtPtr ctxt)
2077 {
2078     const xmlChar *ret;
2079 
2080     if (ctxt->nameNr <= 0)
2081         return (NULL);
2082     ctxt->nameNr--;
2083     if (ctxt->nameNr > 0)
2084         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2085     else
2086         ctxt->name = NULL;
2087     ret = ctxt->nameTab[ctxt->nameNr];
2088     ctxt->nameTab[ctxt->nameNr] = NULL;
2089     return (ret);
2090 }
2091 #endif /* LIBXML_PUSH_ENABLED */
2092 
2093 /**
2094  * namePush:
2095  * @ctxt:  an XML parser context
2096  * @value:  the element name
2097  *
2098  * DEPRECATED: Internal function, do not use.
2099  *
2100  * Pushes a new element name on top of the name stack
2101  *
2102  * Returns -1 in case of error, the index in the stack otherwise
2103  */
2104 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)2105 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2106 {
2107     if (ctxt == NULL) return (-1);
2108 
2109     if (ctxt->nameNr >= ctxt->nameMax) {
2110         const xmlChar * *tmp;
2111         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2112                                     ctxt->nameMax * 2 *
2113                                     sizeof(ctxt->nameTab[0]));
2114         if (tmp == NULL) {
2115 	    goto mem_error;
2116         }
2117 	ctxt->nameTab = tmp;
2118         ctxt->nameMax *= 2;
2119     }
2120     ctxt->nameTab[ctxt->nameNr] = value;
2121     ctxt->name = value;
2122     return (ctxt->nameNr++);
2123 mem_error:
2124     xmlErrMemory(ctxt, NULL);
2125     return (-1);
2126 }
2127 
2128 /**
2129  * namePop:
2130  * @ctxt: an XML parser context
2131  *
2132  * DEPRECATED: Internal function, do not use.
2133  *
2134  * Pops the top element name from the name stack
2135  *
2136  * Returns the name just removed
2137  */
2138 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)2139 namePop(xmlParserCtxtPtr ctxt)
2140 {
2141     const xmlChar *ret;
2142 
2143     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2144         return (NULL);
2145     ctxt->nameNr--;
2146     if (ctxt->nameNr > 0)
2147         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2148     else
2149         ctxt->name = NULL;
2150     ret = ctxt->nameTab[ctxt->nameNr];
2151     ctxt->nameTab[ctxt->nameNr] = NULL;
2152     return (ret);
2153 }
2154 
spacePush(xmlParserCtxtPtr ctxt,int val)2155 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2156     if (ctxt->spaceNr >= ctxt->spaceMax) {
2157         int *tmp;
2158 
2159 	ctxt->spaceMax *= 2;
2160         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2161 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2162         if (tmp == NULL) {
2163 	    xmlErrMemory(ctxt, NULL);
2164 	    ctxt->spaceMax /=2;
2165 	    return(-1);
2166 	}
2167 	ctxt->spaceTab = tmp;
2168     }
2169     ctxt->spaceTab[ctxt->spaceNr] = val;
2170     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2171     return(ctxt->spaceNr++);
2172 }
2173 
spacePop(xmlParserCtxtPtr ctxt)2174 static int spacePop(xmlParserCtxtPtr ctxt) {
2175     int ret;
2176     if (ctxt->spaceNr <= 0) return(0);
2177     ctxt->spaceNr--;
2178     if (ctxt->spaceNr > 0)
2179 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2180     else
2181         ctxt->space = &ctxt->spaceTab[0];
2182     ret = ctxt->spaceTab[ctxt->spaceNr];
2183     ctxt->spaceTab[ctxt->spaceNr] = -1;
2184     return(ret);
2185 }
2186 
2187 /*
2188  * Macros for accessing the content. Those should be used only by the parser,
2189  * and not exported.
2190  *
2191  * Dirty macros, i.e. one often need to make assumption on the context to
2192  * use them
2193  *
2194  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2195  *           To be used with extreme caution since operations consuming
2196  *           characters may move the input buffer to a different location !
2197  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2198  *           This should be used internally by the parser
2199  *           only to compare to ASCII values otherwise it would break when
2200  *           running with UTF-8 encoding.
2201  *   RAW     same as CUR but in the input buffer, bypass any token
2202  *           extraction that may have been done
2203  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2204  *           to compare on ASCII based substring.
2205  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2206  *           strings without newlines within the parser.
2207  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2208  *           defined char within the parser.
2209  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2210  *
2211  *   NEXT    Skip to the next character, this does the proper decoding
2212  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2213  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2214  *   CUR_CHAR(l) returns the current unicode character (int), set l
2215  *           to the number of xmlChars used for the encoding [0-5].
2216  *   CUR_SCHAR  same but operate on a string instead of the context
2217  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2218  *            the index
2219  *   GROW, SHRINK  handling of input buffers
2220  */
2221 
2222 #define RAW (*ctxt->input->cur)
2223 #define CUR (*ctxt->input->cur)
2224 #define NXT(val) ctxt->input->cur[(val)]
2225 #define CUR_PTR ctxt->input->cur
2226 #define BASE_PTR ctxt->input->base
2227 
2228 #define CMP4( s, c1, c2, c3, c4 ) \
2229   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2230     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2231 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2232   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2233 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2234   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2235 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2236   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2237 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2238   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2239 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2240   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2241     ((unsigned char *) s)[ 8 ] == c9 )
2242 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2243   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2244     ((unsigned char *) s)[ 9 ] == c10 )
2245 
2246 #define SKIP(val) do {							\
2247     ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2248     if (*ctxt->input->cur == 0)						\
2249         xmlParserGrow(ctxt);						\
2250   } while (0)
2251 
2252 #define SKIPL(val) do {							\
2253     int skipl;								\
2254     for(skipl=0; skipl<val; skipl++) {					\
2255 	if (*(ctxt->input->cur) == '\n') {				\
2256 	ctxt->input->line++; ctxt->input->col = 1;			\
2257 	} else ctxt->input->col++;					\
2258 	ctxt->input->cur++;						\
2259     }									\
2260     if (*ctxt->input->cur == 0)						\
2261         xmlParserGrow(ctxt);						\
2262   } while (0)
2263 
2264 /* Don't shrink push parser buffer. */
2265 #define SHRINK \
2266     if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \
2267         (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2268 	(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2269 	xmlParserShrink(ctxt);
2270 
2271 #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)	\
2272 	xmlParserGrow(ctxt);
2273 
2274 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2275 
2276 #define NEXT xmlNextChar(ctxt)
2277 
2278 #define NEXT1 {								\
2279 	ctxt->input->col++;						\
2280 	ctxt->input->cur++;						\
2281 	if (*ctxt->input->cur == 0)					\
2282 	    xmlParserGrow(ctxt);						\
2283     }
2284 
2285 #define NEXTL(l) do {							\
2286     if (*(ctxt->input->cur) == '\n') {					\
2287 	ctxt->input->line++; ctxt->input->col = 1;			\
2288     } else ctxt->input->col++;						\
2289     ctxt->input->cur += l;				\
2290   } while (0)
2291 
2292 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2293 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2294 
2295 #define COPY_BUF(b, i, v)						\
2296     if (v < 0x80) b[i++] = v;						\
2297     else i += xmlCopyCharMultiByte(&b[i],v)
2298 
2299 /**
2300  * xmlSkipBlankChars:
2301  * @ctxt:  the XML parser context
2302  *
2303  * DEPRECATED: Internal function, do not use.
2304  *
2305  * skip all blanks character found at that point in the input streams.
2306  * It pops up finished entities in the process if allowable at that point.
2307  *
2308  * Returns the number of space chars skipped
2309  */
2310 
2311 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2312 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2313     int res = 0;
2314 
2315     /*
2316      * It's Okay to use CUR/NEXT here since all the blanks are on
2317      * the ASCII range.
2318      */
2319     if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2320         (ctxt->instate == XML_PARSER_START)) {
2321 	const xmlChar *cur;
2322 	/*
2323 	 * if we are in the document content, go really fast
2324 	 */
2325 	cur = ctxt->input->cur;
2326 	while (IS_BLANK_CH(*cur)) {
2327 	    if (*cur == '\n') {
2328 		ctxt->input->line++; ctxt->input->col = 1;
2329 	    } else {
2330 		ctxt->input->col++;
2331 	    }
2332 	    cur++;
2333 	    if (res < INT_MAX)
2334 		res++;
2335 	    if (*cur == 0) {
2336 		ctxt->input->cur = cur;
2337 		xmlParserGrow(ctxt);
2338 		cur = ctxt->input->cur;
2339 	    }
2340 	}
2341 	ctxt->input->cur = cur;
2342     } else {
2343         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2344 
2345 	while (ctxt->instate != XML_PARSER_EOF) {
2346             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2347 		NEXT;
2348 	    } else if (CUR == '%') {
2349                 /*
2350                  * Need to handle support of entities branching here
2351                  */
2352 	        if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2353                     break;
2354 	        xmlParsePEReference(ctxt);
2355             } else if (CUR == 0) {
2356                 unsigned long consumed;
2357                 xmlEntityPtr ent;
2358 
2359                 if (ctxt->inputNr <= 1)
2360                     break;
2361 
2362                 consumed = ctxt->input->consumed;
2363                 xmlSaturatedAddSizeT(&consumed,
2364                                      ctxt->input->cur - ctxt->input->base);
2365 
2366                 /*
2367                  * Add to sizeentities when parsing an external entity
2368                  * for the first time.
2369                  */
2370                 ent = ctxt->input->entity;
2371                 if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2372                     ((ent->flags & XML_ENT_PARSED) == 0)) {
2373                     ent->flags |= XML_ENT_PARSED;
2374 
2375                     xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2376                 }
2377 
2378                 xmlParserEntityCheck(ctxt, consumed);
2379 
2380                 xmlPopInput(ctxt);
2381             } else {
2382                 break;
2383             }
2384 
2385             /*
2386              * Also increase the counter when entering or exiting a PERef.
2387              * The spec says: "When a parameter-entity reference is recognized
2388              * in the DTD and included, its replacement text MUST be enlarged
2389              * by the attachment of one leading and one following space (#x20)
2390              * character."
2391              */
2392 	    if (res < INT_MAX)
2393 		res++;
2394         }
2395     }
2396     return(res);
2397 }
2398 
2399 /************************************************************************
2400  *									*
2401  *		Commodity functions to handle entities			*
2402  *									*
2403  ************************************************************************/
2404 
2405 /**
2406  * xmlPopInput:
2407  * @ctxt:  an XML parser context
2408  *
2409  * xmlPopInput: the current input pointed by ctxt->input came to an end
2410  *          pop it and return the next char.
2411  *
2412  * Returns the current xmlChar in the parser context
2413  */
2414 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2415 xmlPopInput(xmlParserCtxtPtr ctxt) {
2416     xmlParserInputPtr input;
2417 
2418     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2419     if (xmlParserDebugEntities)
2420 	xmlGenericError(xmlGenericErrorContext,
2421 		"Popping input %d\n", ctxt->inputNr);
2422     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2423         (ctxt->instate != XML_PARSER_EOF))
2424         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2425                     "Unfinished entity outside the DTD");
2426     input = inputPop(ctxt);
2427     if (input->entity != NULL)
2428         input->entity->flags &= ~XML_ENT_EXPANDING;
2429     xmlFreeInputStream(input);
2430     if (*ctxt->input->cur == 0)
2431         xmlParserGrow(ctxt);
2432     return(CUR);
2433 }
2434 
2435 /**
2436  * xmlPushInput:
2437  * @ctxt:  an XML parser context
2438  * @input:  an XML parser input fragment (entity, XML fragment ...).
2439  *
2440  * xmlPushInput: switch to a new input stream which is stacked on top
2441  *               of the previous one(s).
2442  * Returns -1 in case of error or the index in the input stack
2443  */
2444 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2445 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2446     int ret;
2447     if (input == NULL) return(-1);
2448 
2449     if (xmlParserDebugEntities) {
2450 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2451 	    xmlGenericError(xmlGenericErrorContext,
2452 		    "%s(%d): ", ctxt->input->filename,
2453 		    ctxt->input->line);
2454 	xmlGenericError(xmlGenericErrorContext,
2455 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2456     }
2457     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2458         (ctxt->inputNr > 100)) {
2459         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2460         while (ctxt->inputNr > 1)
2461             xmlFreeInputStream(inputPop(ctxt));
2462 	return(-1);
2463     }
2464     ret = inputPush(ctxt, input);
2465     if (ctxt->instate == XML_PARSER_EOF)
2466         return(-1);
2467     GROW;
2468     return(ret);
2469 }
2470 
2471 /**
2472  * xmlParseCharRef:
2473  * @ctxt:  an XML parser context
2474  *
2475  * DEPRECATED: Internal function, don't use.
2476  *
2477  * Parse a numeric character reference. Always consumes '&'.
2478  *
2479  * [66] CharRef ::= '&#' [0-9]+ ';' |
2480  *                  '&#x' [0-9a-fA-F]+ ';'
2481  *
2482  * [ WFC: Legal Character ]
2483  * Characters referred to using character references must match the
2484  * production for Char.
2485  *
2486  * Returns the value parsed (as an int), 0 in case of error
2487  */
2488 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2489 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2490     int val = 0;
2491     int count = 0;
2492 
2493     /*
2494      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2495      */
2496     if ((RAW == '&') && (NXT(1) == '#') &&
2497         (NXT(2) == 'x')) {
2498 	SKIP(3);
2499 	GROW;
2500 	while (RAW != ';') { /* loop blocked by count */
2501 	    if (count++ > 20) {
2502 		count = 0;
2503 		GROW;
2504                 if (ctxt->instate == XML_PARSER_EOF)
2505                     return(0);
2506 	    }
2507 	    if ((RAW >= '0') && (RAW <= '9'))
2508 	        val = val * 16 + (CUR - '0');
2509 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2510 	        val = val * 16 + (CUR - 'a') + 10;
2511 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2512 	        val = val * 16 + (CUR - 'A') + 10;
2513 	    else {
2514 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2515 		val = 0;
2516 		break;
2517 	    }
2518 	    if (val > 0x110000)
2519 	        val = 0x110000;
2520 
2521 	    NEXT;
2522 	    count++;
2523 	}
2524 	if (RAW == ';') {
2525 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2526 	    ctxt->input->col++;
2527 	    ctxt->input->cur++;
2528 	}
2529     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2530 	SKIP(2);
2531 	GROW;
2532 	while (RAW != ';') { /* loop blocked by count */
2533 	    if (count++ > 20) {
2534 		count = 0;
2535 		GROW;
2536                 if (ctxt->instate == XML_PARSER_EOF)
2537                     return(0);
2538 	    }
2539 	    if ((RAW >= '0') && (RAW <= '9'))
2540 	        val = val * 10 + (CUR - '0');
2541 	    else {
2542 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2543 		val = 0;
2544 		break;
2545 	    }
2546 	    if (val > 0x110000)
2547 	        val = 0x110000;
2548 
2549 	    NEXT;
2550 	    count++;
2551 	}
2552 	if (RAW == ';') {
2553 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2554 	    ctxt->input->col++;
2555 	    ctxt->input->cur++;
2556 	}
2557     } else {
2558         if (RAW == '&')
2559             SKIP(1);
2560         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2561     }
2562 
2563     /*
2564      * [ WFC: Legal Character ]
2565      * Characters referred to using character references must match the
2566      * production for Char.
2567      */
2568     if (val >= 0x110000) {
2569         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2570                 "xmlParseCharRef: character reference out of bounds\n",
2571 	        val);
2572     } else if (IS_CHAR(val)) {
2573         return(val);
2574     } else {
2575         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2576                           "xmlParseCharRef: invalid xmlChar value %d\n",
2577 	                  val);
2578     }
2579     return(0);
2580 }
2581 
2582 /**
2583  * xmlParseStringCharRef:
2584  * @ctxt:  an XML parser context
2585  * @str:  a pointer to an index in the string
2586  *
2587  * parse Reference declarations, variant parsing from a string rather
2588  * than an an input flow.
2589  *
2590  * [66] CharRef ::= '&#' [0-9]+ ';' |
2591  *                  '&#x' [0-9a-fA-F]+ ';'
2592  *
2593  * [ WFC: Legal Character ]
2594  * Characters referred to using character references must match the
2595  * production for Char.
2596  *
2597  * Returns the value parsed (as an int), 0 in case of error, str will be
2598  *         updated to the current value of the index
2599  */
2600 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2601 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2602     const xmlChar *ptr;
2603     xmlChar cur;
2604     int val = 0;
2605 
2606     if ((str == NULL) || (*str == NULL)) return(0);
2607     ptr = *str;
2608     cur = *ptr;
2609     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2610 	ptr += 3;
2611 	cur = *ptr;
2612 	while (cur != ';') { /* Non input consuming loop */
2613 	    if ((cur >= '0') && (cur <= '9'))
2614 	        val = val * 16 + (cur - '0');
2615 	    else if ((cur >= 'a') && (cur <= 'f'))
2616 	        val = val * 16 + (cur - 'a') + 10;
2617 	    else if ((cur >= 'A') && (cur <= 'F'))
2618 	        val = val * 16 + (cur - 'A') + 10;
2619 	    else {
2620 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2621 		val = 0;
2622 		break;
2623 	    }
2624 	    if (val > 0x110000)
2625 	        val = 0x110000;
2626 
2627 	    ptr++;
2628 	    cur = *ptr;
2629 	}
2630 	if (cur == ';')
2631 	    ptr++;
2632     } else if  ((cur == '&') && (ptr[1] == '#')){
2633 	ptr += 2;
2634 	cur = *ptr;
2635 	while (cur != ';') { /* Non input consuming loops */
2636 	    if ((cur >= '0') && (cur <= '9'))
2637 	        val = val * 10 + (cur - '0');
2638 	    else {
2639 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2640 		val = 0;
2641 		break;
2642 	    }
2643 	    if (val > 0x110000)
2644 	        val = 0x110000;
2645 
2646 	    ptr++;
2647 	    cur = *ptr;
2648 	}
2649 	if (cur == ';')
2650 	    ptr++;
2651     } else {
2652 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2653 	return(0);
2654     }
2655     *str = ptr;
2656 
2657     /*
2658      * [ WFC: Legal Character ]
2659      * Characters referred to using character references must match the
2660      * production for Char.
2661      */
2662     if (val >= 0x110000) {
2663         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2664                 "xmlParseStringCharRef: character reference out of bounds\n",
2665                 val);
2666     } else if (IS_CHAR(val)) {
2667         return(val);
2668     } else {
2669         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2670 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2671 			  val);
2672     }
2673     return(0);
2674 }
2675 
2676 /**
2677  * xmlParserHandlePEReference:
2678  * @ctxt:  the parser context
2679  *
2680  * DEPRECATED: Internal function, do not use.
2681  *
2682  * [69] PEReference ::= '%' Name ';'
2683  *
2684  * [ WFC: No Recursion ]
2685  * A parsed entity must not contain a recursive
2686  * reference to itself, either directly or indirectly.
2687  *
2688  * [ WFC: Entity Declared ]
2689  * In a document without any DTD, a document with only an internal DTD
2690  * subset which contains no parameter entity references, or a document
2691  * with "standalone='yes'", ...  ... The declaration of a parameter
2692  * entity must precede any reference to it...
2693  *
2694  * [ VC: Entity Declared ]
2695  * In a document with an external subset or external parameter entities
2696  * with "standalone='no'", ...  ... The declaration of a parameter entity
2697  * must precede any reference to it...
2698  *
2699  * [ WFC: In DTD ]
2700  * Parameter-entity references may only appear in the DTD.
2701  * NOTE: misleading but this is handled.
2702  *
2703  * A PEReference may have been detected in the current input stream
2704  * the handling is done accordingly to
2705  *      http://www.w3.org/TR/REC-xml#entproc
2706  * i.e.
2707  *   - Included in literal in entity values
2708  *   - Included as Parameter Entity reference within DTDs
2709  */
2710 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2711 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2712     switch(ctxt->instate) {
2713 	case XML_PARSER_CDATA_SECTION:
2714 	    return;
2715         case XML_PARSER_COMMENT:
2716 	    return;
2717 	case XML_PARSER_START_TAG:
2718 	    return;
2719 	case XML_PARSER_END_TAG:
2720 	    return;
2721         case XML_PARSER_EOF:
2722 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2723 	    return;
2724         case XML_PARSER_PROLOG:
2725 	case XML_PARSER_START:
2726 	case XML_PARSER_XML_DECL:
2727 	case XML_PARSER_MISC:
2728 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2729 	    return;
2730 	case XML_PARSER_ENTITY_DECL:
2731         case XML_PARSER_CONTENT:
2732         case XML_PARSER_ATTRIBUTE_VALUE:
2733         case XML_PARSER_PI:
2734 	case XML_PARSER_SYSTEM_LITERAL:
2735 	case XML_PARSER_PUBLIC_LITERAL:
2736 	    /* we just ignore it there */
2737 	    return;
2738         case XML_PARSER_EPILOG:
2739 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2740 	    return;
2741 	case XML_PARSER_ENTITY_VALUE:
2742 	    /*
2743 	     * NOTE: in the case of entity values, we don't do the
2744 	     *       substitution here since we need the literal
2745 	     *       entity value to be able to save the internal
2746 	     *       subset of the document.
2747 	     *       This will be handled by xmlStringDecodeEntities
2748 	     */
2749 	    return;
2750         case XML_PARSER_DTD:
2751 	    /*
2752 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2753 	     * In the internal DTD subset, parameter-entity references
2754 	     * can occur only where markup declarations can occur, not
2755 	     * within markup declarations.
2756 	     * In that case this is handled in xmlParseMarkupDecl
2757 	     */
2758 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2759 		return;
2760 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2761 		return;
2762             break;
2763         case XML_PARSER_IGNORE:
2764             return;
2765     }
2766 
2767     xmlParsePEReference(ctxt);
2768 }
2769 
2770 /*
2771  * Macro used to grow the current buffer.
2772  * buffer##_size is expected to be a size_t
2773  * mem_error: is expected to handle memory allocation failures
2774  */
2775 #define growBuffer(buffer, n) {						\
2776     xmlChar *tmp;							\
2777     size_t new_size = buffer##_size * 2 + n;                            \
2778     if (new_size < buffer##_size) goto mem_error;                       \
2779     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2780     if (tmp == NULL) goto mem_error;					\
2781     buffer = tmp;							\
2782     buffer##_size = new_size;                                           \
2783 }
2784 
2785 /**
2786  * xmlStringDecodeEntitiesInt:
2787  * @ctxt:  the parser context
2788  * @str:  the input string
2789  * @len: the string length
2790  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2791  * @end:  an end marker xmlChar, 0 if none
2792  * @end2:  an end marker xmlChar, 0 if none
2793  * @end3:  an end marker xmlChar, 0 if none
2794  * @check:  whether to perform entity checks
2795  */
2796 static xmlChar *
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3,int check)2797 xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2798 		           int what, xmlChar end, xmlChar  end2, xmlChar end3,
2799                            int check) {
2800     xmlChar *buffer = NULL;
2801     size_t buffer_size = 0;
2802     size_t nbchars = 0;
2803 
2804     xmlChar *current = NULL;
2805     xmlChar *rep = NULL;
2806     const xmlChar *last;
2807     xmlEntityPtr ent;
2808     int c,l;
2809 
2810     if (str == NULL)
2811         return(NULL);
2812     last = str + len;
2813 
2814     if (((ctxt->depth > 40) &&
2815          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2816 	(ctxt->depth > 100)) {
2817 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2818                        "Maximum entity nesting depth exceeded");
2819 	return(NULL);
2820     }
2821 
2822     /*
2823      * allocate a translation buffer.
2824      */
2825     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2826     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2827     if (buffer == NULL) goto mem_error;
2828 
2829     /*
2830      * OK loop until we reach one of the ending char or a size limit.
2831      * we are operating on already parsed values.
2832      */
2833     if (str < last)
2834 	c = CUR_SCHAR(str, l);
2835     else
2836         c = 0;
2837     while ((c != 0) && (c != end) && /* non input consuming loop */
2838            (c != end2) && (c != end3) &&
2839            (ctxt->instate != XML_PARSER_EOF)) {
2840 
2841 	if (c == 0) break;
2842         if ((c == '&') && (str[1] == '#')) {
2843 	    int val = xmlParseStringCharRef(ctxt, &str);
2844 	    if (val == 0)
2845                 goto int_error;
2846 	    COPY_BUF(buffer, nbchars, val);
2847 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2848 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2849 	    }
2850 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2851 	    if (xmlParserDebugEntities)
2852 		xmlGenericError(xmlGenericErrorContext,
2853 			"String decoding Entity Reference: %.30s\n",
2854 			str);
2855 	    ent = xmlParseStringEntityRef(ctxt, &str);
2856 	    if ((ent != NULL) &&
2857 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2858 		if (ent->content != NULL) {
2859 		    COPY_BUF(buffer, nbchars, ent->content[0]);
2860 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2861 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2862 		    }
2863 		} else {
2864 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2865 			    "predefined entity has no content\n");
2866                     goto int_error;
2867 		}
2868 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2869 	        if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2870                     goto int_error;
2871 
2872                 if (ent->flags & XML_ENT_EXPANDING) {
2873 	            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2874                     xmlHaltParser(ctxt);
2875                     ent->content[0] = 0;
2876                     goto int_error;
2877                 }
2878 
2879                 ent->flags |= XML_ENT_EXPANDING;
2880 		ctxt->depth++;
2881 		rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2882                         ent->length, what, 0, 0, 0, check);
2883 		ctxt->depth--;
2884                 ent->flags &= ~XML_ENT_EXPANDING;
2885 
2886 		if (rep == NULL) {
2887                     ent->content[0] = 0;
2888                     goto int_error;
2889                 }
2890 
2891                 current = rep;
2892                 while (*current != 0) { /* non input consuming loop */
2893                     buffer[nbchars++] = *current++;
2894                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2895                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2896                     }
2897                 }
2898                 xmlFree(rep);
2899                 rep = NULL;
2900 	    } else if (ent != NULL) {
2901 		int i = xmlStrlen(ent->name);
2902 		const xmlChar *cur = ent->name;
2903 
2904 		buffer[nbchars++] = '&';
2905 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2906 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2907 		}
2908 		for (;i > 0;i--)
2909 		    buffer[nbchars++] = *cur++;
2910 		buffer[nbchars++] = ';';
2911 	    }
2912 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2913 	    if (xmlParserDebugEntities)
2914 		xmlGenericError(xmlGenericErrorContext,
2915 			"String decoding PE Reference: %.30s\n", str);
2916 	    ent = xmlParseStringPEReference(ctxt, &str);
2917 	    if (ent != NULL) {
2918                 if (ent->content == NULL) {
2919 		    /*
2920 		     * Note: external parsed entities will not be loaded,
2921 		     * it is not required for a non-validating parser to
2922 		     * complete external PEReferences coming from the
2923 		     * internal subset
2924 		     */
2925 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2926 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2927 			(ctxt->validate != 0)) {
2928 			xmlLoadEntityContent(ctxt, ent);
2929 		    } else {
2930 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2931 		  "not validating will not read content for PE entity %s\n",
2932 		                      ent->name, NULL);
2933 		    }
2934 		}
2935 
2936 	        if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2937                     goto int_error;
2938 
2939                 if (ent->flags & XML_ENT_EXPANDING) {
2940 	            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2941                     xmlHaltParser(ctxt);
2942                     if (ent->content != NULL)
2943                         ent->content[0] = 0;
2944                     goto int_error;
2945                 }
2946 
2947                 ent->flags |= XML_ENT_EXPANDING;
2948 		ctxt->depth++;
2949 		rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2950                         ent->length, what, 0, 0, 0, check);
2951 		ctxt->depth--;
2952                 ent->flags &= ~XML_ENT_EXPANDING;
2953 
2954 		if (rep == NULL) {
2955                     if (ent->content != NULL)
2956                         ent->content[0] = 0;
2957                     goto int_error;
2958                 }
2959                 current = rep;
2960                 while (*current != 0) { /* non input consuming loop */
2961                     buffer[nbchars++] = *current++;
2962                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2963                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2964                     }
2965                 }
2966                 xmlFree(rep);
2967                 rep = NULL;
2968 	    }
2969 	} else {
2970 	    COPY_BUF(buffer, nbchars, c);
2971 	    str += l;
2972 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2973 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2974 	    }
2975 	}
2976 	if (str < last)
2977 	    c = CUR_SCHAR(str, l);
2978 	else
2979 	    c = 0;
2980     }
2981     buffer[nbchars] = 0;
2982     return(buffer);
2983 
2984 mem_error:
2985     xmlErrMemory(ctxt, NULL);
2986 int_error:
2987     if (rep != NULL)
2988         xmlFree(rep);
2989     if (buffer != NULL)
2990         xmlFree(buffer);
2991     return(NULL);
2992 }
2993 
2994 /**
2995  * xmlStringLenDecodeEntities:
2996  * @ctxt:  the parser context
2997  * @str:  the input string
2998  * @len: the string length
2999  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3000  * @end:  an end marker xmlChar, 0 if none
3001  * @end2:  an end marker xmlChar, 0 if none
3002  * @end3:  an end marker xmlChar, 0 if none
3003  *
3004  * DEPRECATED: Internal function, don't use.
3005  *
3006  * Takes a entity string content and process to do the adequate substitutions.
3007  *
3008  * [67] Reference ::= EntityRef | CharRef
3009  *
3010  * [69] PEReference ::= '%' Name ';'
3011  *
3012  * Returns A newly allocated string with the substitution done. The caller
3013  *      must deallocate it !
3014  */
3015 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)3016 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3017                            int what, xmlChar end, xmlChar  end2,
3018                            xmlChar end3) {
3019     if ((ctxt == NULL) || (str == NULL) || (len < 0))
3020         return(NULL);
3021     return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
3022                                       end, end2, end3, 0));
3023 }
3024 
3025 /**
3026  * xmlStringDecodeEntities:
3027  * @ctxt:  the parser context
3028  * @str:  the input string
3029  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3030  * @end:  an end marker xmlChar, 0 if none
3031  * @end2:  an end marker xmlChar, 0 if none
3032  * @end3:  an end marker xmlChar, 0 if none
3033  *
3034  * DEPRECATED: Internal function, don't use.
3035  *
3036  * Takes a entity string content and process to do the adequate substitutions.
3037  *
3038  * [67] Reference ::= EntityRef | CharRef
3039  *
3040  * [69] PEReference ::= '%' Name ';'
3041  *
3042  * Returns A newly allocated string with the substitution done. The caller
3043  *      must deallocate it !
3044  */
3045 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)3046 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
3047 		        xmlChar end, xmlChar  end2, xmlChar end3) {
3048     if ((ctxt == NULL) || (str == NULL)) return(NULL);
3049     return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
3050                                       end, end2, end3, 0));
3051 }
3052 
3053 /************************************************************************
3054  *									*
3055  *		Commodity functions, cleanup needed ?			*
3056  *									*
3057  ************************************************************************/
3058 
3059 /**
3060  * areBlanks:
3061  * @ctxt:  an XML parser context
3062  * @str:  a xmlChar *
3063  * @len:  the size of @str
3064  * @blank_chars: we know the chars are blanks
3065  *
3066  * Is this a sequence of blank chars that one can ignore ?
3067  *
3068  * Returns 1 if ignorable 0 otherwise.
3069  */
3070 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)3071 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3072                      int blank_chars) {
3073     int i, ret;
3074     xmlNodePtr lastChild;
3075 
3076     /*
3077      * Don't spend time trying to differentiate them, the same callback is
3078      * used !
3079      */
3080     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
3081 	return(0);
3082 
3083     /*
3084      * Check for xml:space value.
3085      */
3086     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
3087         (*(ctxt->space) == -2))
3088 	return(0);
3089 
3090     /*
3091      * Check that the string is made of blanks
3092      */
3093     if (blank_chars == 0) {
3094 	for (i = 0;i < len;i++)
3095 	    if (!(IS_BLANK_CH(str[i]))) return(0);
3096     }
3097 
3098     /*
3099      * Look if the element is mixed content in the DTD if available
3100      */
3101     if (ctxt->node == NULL) return(0);
3102     if (ctxt->myDoc != NULL) {
3103 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3104         if (ret == 0) return(1);
3105         if (ret == 1) return(0);
3106     }
3107 
3108     /*
3109      * Otherwise, heuristic :-\
3110      */
3111     if ((RAW != '<') && (RAW != 0xD)) return(0);
3112     if ((ctxt->node->children == NULL) &&
3113 	(RAW == '<') && (NXT(1) == '/')) return(0);
3114 
3115     lastChild = xmlGetLastChild(ctxt->node);
3116     if (lastChild == NULL) {
3117         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3118             (ctxt->node->content != NULL)) return(0);
3119     } else if (xmlNodeIsText(lastChild))
3120         return(0);
3121     else if ((ctxt->node->children != NULL) &&
3122              (xmlNodeIsText(ctxt->node->children)))
3123         return(0);
3124     return(1);
3125 }
3126 
3127 /************************************************************************
3128  *									*
3129  *		Extra stuff for namespace support			*
3130  *	Relates to http://www.w3.org/TR/WD-xml-names			*
3131  *									*
3132  ************************************************************************/
3133 
3134 /**
3135  * xmlSplitQName:
3136  * @ctxt:  an XML parser context
3137  * @name:  an XML parser context
3138  * @prefix:  a xmlChar **
3139  *
3140  * parse an UTF8 encoded XML qualified name string
3141  *
3142  * [NS 5] QName ::= (Prefix ':')? LocalPart
3143  *
3144  * [NS 6] Prefix ::= NCName
3145  *
3146  * [NS 7] LocalPart ::= NCName
3147  *
3148  * Returns the local part, and prefix is updated
3149  *   to get the Prefix if any.
3150  */
3151 
3152 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)3153 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3154     xmlChar buf[XML_MAX_NAMELEN + 5];
3155     xmlChar *buffer = NULL;
3156     int len = 0;
3157     int max = XML_MAX_NAMELEN;
3158     xmlChar *ret = NULL;
3159     const xmlChar *cur = name;
3160     int c;
3161 
3162     if (prefix == NULL) return(NULL);
3163     *prefix = NULL;
3164 
3165     if (cur == NULL) return(NULL);
3166 
3167 #ifndef XML_XML_NAMESPACE
3168     /* xml: prefix is not really a namespace */
3169     if ((cur[0] == 'x') && (cur[1] == 'm') &&
3170         (cur[2] == 'l') && (cur[3] == ':'))
3171 	return(xmlStrdup(name));
3172 #endif
3173 
3174     /* nasty but well=formed */
3175     if (cur[0] == ':')
3176 	return(xmlStrdup(name));
3177 
3178     c = *cur++;
3179     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3180 	buf[len++] = c;
3181 	c = *cur++;
3182     }
3183     if (len >= max) {
3184 	/*
3185 	 * Okay someone managed to make a huge name, so he's ready to pay
3186 	 * for the processing speed.
3187 	 */
3188 	max = len * 2;
3189 
3190 	buffer = (xmlChar *) xmlMallocAtomic(max);
3191 	if (buffer == NULL) {
3192 	    xmlErrMemory(ctxt, NULL);
3193 	    return(NULL);
3194 	}
3195 	memcpy(buffer, buf, len);
3196 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3197 	    if (len + 10 > max) {
3198 	        xmlChar *tmp;
3199 
3200 		max *= 2;
3201 		tmp = (xmlChar *) xmlRealloc(buffer, max);
3202 		if (tmp == NULL) {
3203 		    xmlFree(buffer);
3204 		    xmlErrMemory(ctxt, NULL);
3205 		    return(NULL);
3206 		}
3207 		buffer = tmp;
3208 	    }
3209 	    buffer[len++] = c;
3210 	    c = *cur++;
3211 	}
3212 	buffer[len] = 0;
3213     }
3214 
3215     if ((c == ':') && (*cur == 0)) {
3216         if (buffer != NULL)
3217 	    xmlFree(buffer);
3218 	*prefix = NULL;
3219 	return(xmlStrdup(name));
3220     }
3221 
3222     if (buffer == NULL)
3223 	ret = xmlStrndup(buf, len);
3224     else {
3225 	ret = buffer;
3226 	buffer = NULL;
3227 	max = XML_MAX_NAMELEN;
3228     }
3229 
3230 
3231     if (c == ':') {
3232 	c = *cur;
3233         *prefix = ret;
3234 	if (c == 0) {
3235 	    return(xmlStrndup(BAD_CAST "", 0));
3236 	}
3237 	len = 0;
3238 
3239 	/*
3240 	 * Check that the first character is proper to start
3241 	 * a new name
3242 	 */
3243 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3244 	      ((c >= 0x41) && (c <= 0x5A)) ||
3245 	      (c == '_') || (c == ':'))) {
3246 	    int l;
3247 	    int first = CUR_SCHAR(cur, l);
3248 
3249 	    if (!IS_LETTER(first) && (first != '_')) {
3250 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3251 			    "Name %s is not XML Namespace compliant\n",
3252 				  name);
3253 	    }
3254 	}
3255 	cur++;
3256 
3257 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3258 	    buf[len++] = c;
3259 	    c = *cur++;
3260 	}
3261 	if (len >= max) {
3262 	    /*
3263 	     * Okay someone managed to make a huge name, so he's ready to pay
3264 	     * for the processing speed.
3265 	     */
3266 	    max = len * 2;
3267 
3268 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3269 	    if (buffer == NULL) {
3270 	        xmlErrMemory(ctxt, NULL);
3271 		return(NULL);
3272 	    }
3273 	    memcpy(buffer, buf, len);
3274 	    while (c != 0) { /* tested bigname2.xml */
3275 		if (len + 10 > max) {
3276 		    xmlChar *tmp;
3277 
3278 		    max *= 2;
3279 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3280 		    if (tmp == NULL) {
3281 			xmlErrMemory(ctxt, NULL);
3282 			xmlFree(buffer);
3283 			return(NULL);
3284 		    }
3285 		    buffer = tmp;
3286 		}
3287 		buffer[len++] = c;
3288 		c = *cur++;
3289 	    }
3290 	    buffer[len] = 0;
3291 	}
3292 
3293 	if (buffer == NULL)
3294 	    ret = xmlStrndup(buf, len);
3295 	else {
3296 	    ret = buffer;
3297 	}
3298     }
3299 
3300     return(ret);
3301 }
3302 
3303 /************************************************************************
3304  *									*
3305  *			The parser itself				*
3306  *	Relates to http://www.w3.org/TR/REC-xml				*
3307  *									*
3308  ************************************************************************/
3309 
3310 /************************************************************************
3311  *									*
3312  *	Routines to parse Name, NCName and NmToken			*
3313  *									*
3314  ************************************************************************/
3315 
3316 /*
3317  * The two following functions are related to the change of accepted
3318  * characters for Name and NmToken in the Revision 5 of XML-1.0
3319  * They correspond to the modified production [4] and the new production [4a]
3320  * changes in that revision. Also note that the macros used for the
3321  * productions Letter, Digit, CombiningChar and Extender are not needed
3322  * anymore.
3323  * We still keep compatibility to pre-revision5 parsing semantic if the
3324  * new XML_PARSE_OLD10 option is given to the parser.
3325  */
3326 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3327 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3328     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3329         /*
3330 	 * Use the new checks of production [4] [4a] amd [5] of the
3331 	 * Update 5 of XML-1.0
3332 	 */
3333 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3334 	    (((c >= 'a') && (c <= 'z')) ||
3335 	     ((c >= 'A') && (c <= 'Z')) ||
3336 	     (c == '_') || (c == ':') ||
3337 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3338 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3339 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3340 	     ((c >= 0x370) && (c <= 0x37D)) ||
3341 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3342 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3343 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3344 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3345 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3346 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3347 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3348 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3349 	    return(1);
3350     } else {
3351         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3352 	    return(1);
3353     }
3354     return(0);
3355 }
3356 
3357 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3358 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3359     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3360         /*
3361 	 * Use the new checks of production [4] [4a] amd [5] of the
3362 	 * Update 5 of XML-1.0
3363 	 */
3364 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3365 	    (((c >= 'a') && (c <= 'z')) ||
3366 	     ((c >= 'A') && (c <= 'Z')) ||
3367 	     ((c >= '0') && (c <= '9')) || /* !start */
3368 	     (c == '_') || (c == ':') ||
3369 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3370 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3371 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3372 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3373 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3374 	     ((c >= 0x370) && (c <= 0x37D)) ||
3375 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3376 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3377 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3378 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3379 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3380 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3381 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3382 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3383 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3384 	     return(1);
3385     } else {
3386         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3387             (c == '.') || (c == '-') ||
3388 	    (c == '_') || (c == ':') ||
3389 	    (IS_COMBINING(c)) ||
3390 	    (IS_EXTENDER(c)))
3391 	    return(1);
3392     }
3393     return(0);
3394 }
3395 
3396 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3397                                           int *len, int *alloc, int normalize);
3398 
3399 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3400 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3401     int len = 0, l;
3402     int c;
3403     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404                     XML_MAX_TEXT_LENGTH :
3405                     XML_MAX_NAME_LENGTH;
3406 
3407     /*
3408      * Handler for more complex cases
3409      */
3410     c = CUR_CHAR(l);
3411     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3412         /*
3413 	 * Use the new checks of production [4] [4a] amd [5] of the
3414 	 * Update 5 of XML-1.0
3415 	 */
3416 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3417 	    (!(((c >= 'a') && (c <= 'z')) ||
3418 	       ((c >= 'A') && (c <= 'Z')) ||
3419 	       (c == '_') || (c == ':') ||
3420 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3421 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3422 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3423 	       ((c >= 0x370) && (c <= 0x37D)) ||
3424 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3425 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3426 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3427 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3428 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3429 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3430 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3431 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3432 	    return(NULL);
3433 	}
3434 	len += l;
3435 	NEXTL(l);
3436 	c = CUR_CHAR(l);
3437 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3438 	       (((c >= 'a') && (c <= 'z')) ||
3439 	        ((c >= 'A') && (c <= 'Z')) ||
3440 	        ((c >= '0') && (c <= '9')) || /* !start */
3441 	        (c == '_') || (c == ':') ||
3442 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3443 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3444 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3445 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3446 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3447 	        ((c >= 0x370) && (c <= 0x37D)) ||
3448 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3449 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3450 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3451 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3452 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3453 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3454 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3455 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3456 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3457 		)) {
3458             if (len <= INT_MAX - l)
3459 	        len += l;
3460 	    NEXTL(l);
3461 	    c = CUR_CHAR(l);
3462 	}
3463     } else {
3464 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3465 	    (!IS_LETTER(c) && (c != '_') &&
3466 	     (c != ':'))) {
3467 	    return(NULL);
3468 	}
3469 	len += l;
3470 	NEXTL(l);
3471 	c = CUR_CHAR(l);
3472 
3473 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3474 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3475 		(c == '.') || (c == '-') ||
3476 		(c == '_') || (c == ':') ||
3477 		(IS_COMBINING(c)) ||
3478 		(IS_EXTENDER(c)))) {
3479             if (len <= INT_MAX - l)
3480 	        len += l;
3481 	    NEXTL(l);
3482 	    c = CUR_CHAR(l);
3483 	}
3484     }
3485     if (ctxt->instate == XML_PARSER_EOF)
3486         return(NULL);
3487     if (len > maxLength) {
3488         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3489         return(NULL);
3490     }
3491     if (ctxt->input->cur - ctxt->input->base < len) {
3492         /*
3493          * There were a couple of bugs where PERefs lead to to a change
3494          * of the buffer. Check the buffer size to avoid passing an invalid
3495          * pointer to xmlDictLookup.
3496          */
3497         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3498                     "unexpected change of input buffer");
3499         return (NULL);
3500     }
3501     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3502         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3503     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3504 }
3505 
3506 /**
3507  * xmlParseName:
3508  * @ctxt:  an XML parser context
3509  *
3510  * DEPRECATED: Internal function, don't use.
3511  *
3512  * parse an XML name.
3513  *
3514  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3515  *                  CombiningChar | Extender
3516  *
3517  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3518  *
3519  * [6] Names ::= Name (#x20 Name)*
3520  *
3521  * Returns the Name parsed or NULL
3522  */
3523 
3524 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3525 xmlParseName(xmlParserCtxtPtr ctxt) {
3526     const xmlChar *in;
3527     const xmlChar *ret;
3528     size_t count = 0;
3529     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3530                        XML_MAX_TEXT_LENGTH :
3531                        XML_MAX_NAME_LENGTH;
3532 
3533     GROW;
3534     if (ctxt->instate == XML_PARSER_EOF)
3535         return(NULL);
3536 
3537     /*
3538      * Accelerator for simple ASCII names
3539      */
3540     in = ctxt->input->cur;
3541     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3542 	((*in >= 0x41) && (*in <= 0x5A)) ||
3543 	(*in == '_') || (*in == ':')) {
3544 	in++;
3545 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3546 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3547 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3548 	       (*in == '_') || (*in == '-') ||
3549 	       (*in == ':') || (*in == '.'))
3550 	    in++;
3551 	if ((*in > 0) && (*in < 0x80)) {
3552 	    count = in - ctxt->input->cur;
3553             if (count > maxLength) {
3554                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3555                 return(NULL);
3556             }
3557 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3558 	    ctxt->input->cur = in;
3559 	    ctxt->input->col += count;
3560 	    if (ret == NULL)
3561 	        xmlErrMemory(ctxt, NULL);
3562 	    return(ret);
3563 	}
3564     }
3565     /* accelerator for special cases */
3566     return(xmlParseNameComplex(ctxt));
3567 }
3568 
3569 static xmlHashedString
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3570 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3571     xmlHashedString ret;
3572     int len = 0, l;
3573     int c;
3574     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3575                     XML_MAX_TEXT_LENGTH :
3576                     XML_MAX_NAME_LENGTH;
3577     size_t startPosition = 0;
3578 
3579     ret.name = NULL;
3580     ret.hashValue = 0;
3581 
3582     /*
3583      * Handler for more complex cases
3584      */
3585     startPosition = CUR_PTR - BASE_PTR;
3586     c = CUR_CHAR(l);
3587     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3588 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3589 	return(ret);
3590     }
3591 
3592     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3593 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3594         if (len <= INT_MAX - l)
3595 	    len += l;
3596 	NEXTL(l);
3597 	c = CUR_CHAR(l);
3598     }
3599     if (ctxt->instate == XML_PARSER_EOF)
3600         return(ret);
3601     if (len > maxLength) {
3602         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3603         return(ret);
3604     }
3605     ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3606     return(ret);
3607 }
3608 
3609 /**
3610  * xmlParseNCName:
3611  * @ctxt:  an XML parser context
3612  * @len:  length of the string parsed
3613  *
3614  * parse an XML name.
3615  *
3616  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3617  *                      CombiningChar | Extender
3618  *
3619  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3620  *
3621  * Returns the Name parsed or NULL
3622  */
3623 
3624 static xmlHashedString
xmlParseNCName(xmlParserCtxtPtr ctxt)3625 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3626     const xmlChar *in, *e;
3627     xmlHashedString ret;
3628     size_t count = 0;
3629     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3630                        XML_MAX_TEXT_LENGTH :
3631                        XML_MAX_NAME_LENGTH;
3632 
3633     ret.name = NULL;
3634 
3635     /*
3636      * Accelerator for simple ASCII names
3637      */
3638     in = ctxt->input->cur;
3639     e = ctxt->input->end;
3640     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3641 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3642 	 (*in == '_')) && (in < e)) {
3643 	in++;
3644 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3645 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3646 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3647 	        (*in == '_') || (*in == '-') ||
3648 	        (*in == '.')) && (in < e))
3649 	    in++;
3650 	if (in >= e)
3651 	    goto complex;
3652 	if ((*in > 0) && (*in < 0x80)) {
3653 	    count = in - ctxt->input->cur;
3654             if (count > maxLength) {
3655                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3656                 return(ret);
3657             }
3658 	    ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3659 	    ctxt->input->cur = in;
3660 	    ctxt->input->col += count;
3661 	    if (ret.name == NULL) {
3662 	        xmlErrMemory(ctxt, NULL);
3663 	    }
3664 	    return(ret);
3665 	}
3666     }
3667 complex:
3668     return(xmlParseNCNameComplex(ctxt));
3669 }
3670 
3671 /**
3672  * xmlParseNameAndCompare:
3673  * @ctxt:  an XML parser context
3674  *
3675  * parse an XML name and compares for match
3676  * (specialized for endtag parsing)
3677  *
3678  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3679  * and the name for mismatch
3680  */
3681 
3682 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3683 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3684     register const xmlChar *cmp = other;
3685     register const xmlChar *in;
3686     const xmlChar *ret;
3687 
3688     GROW;
3689     if (ctxt->instate == XML_PARSER_EOF)
3690         return(NULL);
3691 
3692     in = ctxt->input->cur;
3693     while (*in != 0 && *in == *cmp) {
3694 	++in;
3695 	++cmp;
3696     }
3697     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3698 	/* success */
3699 	ctxt->input->col += in - ctxt->input->cur;
3700 	ctxt->input->cur = in;
3701 	return (const xmlChar*) 1;
3702     }
3703     /* failure (or end of input buffer), check with full function */
3704     ret = xmlParseName (ctxt);
3705     /* strings coming from the dictionary direct compare possible */
3706     if (ret == other) {
3707 	return (const xmlChar*) 1;
3708     }
3709     return ret;
3710 }
3711 
3712 /**
3713  * xmlParseStringName:
3714  * @ctxt:  an XML parser context
3715  * @str:  a pointer to the string pointer (IN/OUT)
3716  *
3717  * parse an XML name.
3718  *
3719  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3720  *                  CombiningChar | Extender
3721  *
3722  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3723  *
3724  * [6] Names ::= Name (#x20 Name)*
3725  *
3726  * Returns the Name parsed or NULL. The @str pointer
3727  * is updated to the current location in the string.
3728  */
3729 
3730 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3731 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3732     xmlChar buf[XML_MAX_NAMELEN + 5];
3733     const xmlChar *cur = *str;
3734     int len = 0, l;
3735     int c;
3736     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3737                     XML_MAX_TEXT_LENGTH :
3738                     XML_MAX_NAME_LENGTH;
3739 
3740     c = CUR_SCHAR(cur, l);
3741     if (!xmlIsNameStartChar(ctxt, c)) {
3742 	return(NULL);
3743     }
3744 
3745     COPY_BUF(buf, len, c);
3746     cur += l;
3747     c = CUR_SCHAR(cur, l);
3748     while (xmlIsNameChar(ctxt, c)) {
3749 	COPY_BUF(buf, len, c);
3750 	cur += l;
3751 	c = CUR_SCHAR(cur, l);
3752 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3753 	    /*
3754 	     * Okay someone managed to make a huge name, so he's ready to pay
3755 	     * for the processing speed.
3756 	     */
3757 	    xmlChar *buffer;
3758 	    int max = len * 2;
3759 
3760 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3761 	    if (buffer == NULL) {
3762 	        xmlErrMemory(ctxt, NULL);
3763 		return(NULL);
3764 	    }
3765 	    memcpy(buffer, buf, len);
3766 	    while (xmlIsNameChar(ctxt, c)) {
3767 		if (len + 10 > max) {
3768 		    xmlChar *tmp;
3769 
3770 		    max *= 2;
3771 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3772 		    if (tmp == NULL) {
3773 			xmlErrMemory(ctxt, NULL);
3774 			xmlFree(buffer);
3775 			return(NULL);
3776 		    }
3777 		    buffer = tmp;
3778 		}
3779 		COPY_BUF(buffer, len, c);
3780 		cur += l;
3781 		c = CUR_SCHAR(cur, l);
3782                 if (len > maxLength) {
3783                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3784                     xmlFree(buffer);
3785                     return(NULL);
3786                 }
3787 	    }
3788 	    buffer[len] = 0;
3789 	    *str = cur;
3790 	    return(buffer);
3791 	}
3792     }
3793     if (len > maxLength) {
3794         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3795         return(NULL);
3796     }
3797     *str = cur;
3798     return(xmlStrndup(buf, len));
3799 }
3800 
3801 /**
3802  * xmlParseNmtoken:
3803  * @ctxt:  an XML parser context
3804  *
3805  * DEPRECATED: Internal function, don't use.
3806  *
3807  * parse an XML Nmtoken.
3808  *
3809  * [7] Nmtoken ::= (NameChar)+
3810  *
3811  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3812  *
3813  * Returns the Nmtoken parsed or NULL
3814  */
3815 
3816 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3817 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3818     xmlChar buf[XML_MAX_NAMELEN + 5];
3819     int len = 0, l;
3820     int c;
3821     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3822                     XML_MAX_TEXT_LENGTH :
3823                     XML_MAX_NAME_LENGTH;
3824 
3825     c = CUR_CHAR(l);
3826 
3827     while (xmlIsNameChar(ctxt, c)) {
3828 	COPY_BUF(buf, len, c);
3829 	NEXTL(l);
3830 	c = CUR_CHAR(l);
3831 	if (len >= XML_MAX_NAMELEN) {
3832 	    /*
3833 	     * Okay someone managed to make a huge token, so he's ready to pay
3834 	     * for the processing speed.
3835 	     */
3836 	    xmlChar *buffer;
3837 	    int max = len * 2;
3838 
3839 	    buffer = (xmlChar *) xmlMallocAtomic(max);
3840 	    if (buffer == NULL) {
3841 	        xmlErrMemory(ctxt, NULL);
3842 		return(NULL);
3843 	    }
3844 	    memcpy(buffer, buf, len);
3845 	    while (xmlIsNameChar(ctxt, c)) {
3846 		if (len + 10 > max) {
3847 		    xmlChar *tmp;
3848 
3849 		    max *= 2;
3850 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3851 		    if (tmp == NULL) {
3852 			xmlErrMemory(ctxt, NULL);
3853 			xmlFree(buffer);
3854 			return(NULL);
3855 		    }
3856 		    buffer = tmp;
3857 		}
3858 		COPY_BUF(buffer, len, c);
3859                 if (len > maxLength) {
3860                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3861                     xmlFree(buffer);
3862                     return(NULL);
3863                 }
3864 		NEXTL(l);
3865 		c = CUR_CHAR(l);
3866 	    }
3867 	    buffer[len] = 0;
3868             if (ctxt->instate == XML_PARSER_EOF) {
3869                 xmlFree(buffer);
3870                 return(NULL);
3871             }
3872 	    return(buffer);
3873 	}
3874     }
3875     if (ctxt->instate == XML_PARSER_EOF)
3876         return(NULL);
3877     if (len == 0)
3878         return(NULL);
3879     if (len > maxLength) {
3880         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3881         return(NULL);
3882     }
3883     return(xmlStrndup(buf, len));
3884 }
3885 
3886 /**
3887  * xmlParseEntityValue:
3888  * @ctxt:  an XML parser context
3889  * @orig:  if non-NULL store a copy of the original entity value
3890  *
3891  * DEPRECATED: Internal function, don't use.
3892  *
3893  * parse a value for ENTITY declarations
3894  *
3895  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3896  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3897  *
3898  * Returns the EntityValue parsed with reference substituted or NULL
3899  */
3900 
3901 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3902 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3903     xmlChar *buf = NULL;
3904     int len = 0;
3905     int size = XML_PARSER_BUFFER_SIZE;
3906     int c, l;
3907     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3908                     XML_MAX_HUGE_LENGTH :
3909                     XML_MAX_TEXT_LENGTH;
3910     xmlChar stop;
3911     xmlChar *ret = NULL;
3912     const xmlChar *cur = NULL;
3913     xmlParserInputPtr input;
3914 
3915     if (RAW == '"') stop = '"';
3916     else if (RAW == '\'') stop = '\'';
3917     else {
3918 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3919 	return(NULL);
3920     }
3921     buf = (xmlChar *) xmlMallocAtomic(size);
3922     if (buf == NULL) {
3923 	xmlErrMemory(ctxt, NULL);
3924 	return(NULL);
3925     }
3926 
3927     /*
3928      * The content of the entity definition is copied in a buffer.
3929      */
3930 
3931     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3932     input = ctxt->input;
3933     GROW;
3934     if (ctxt->instate == XML_PARSER_EOF)
3935         goto error;
3936     NEXT;
3937     c = CUR_CHAR(l);
3938     /*
3939      * NOTE: 4.4.5 Included in Literal
3940      * When a parameter entity reference appears in a literal entity
3941      * value, ... a single or double quote character in the replacement
3942      * text is always treated as a normal data character and will not
3943      * terminate the literal.
3944      * In practice it means we stop the loop only when back at parsing
3945      * the initial entity and the quote is found
3946      */
3947     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3948 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3949 	if (len + 5 >= size) {
3950 	    xmlChar *tmp;
3951 
3952 	    size *= 2;
3953 	    tmp = (xmlChar *) xmlRealloc(buf, size);
3954 	    if (tmp == NULL) {
3955 		xmlErrMemory(ctxt, NULL);
3956                 goto error;
3957 	    }
3958 	    buf = tmp;
3959 	}
3960 	COPY_BUF(buf, len, c);
3961 	NEXTL(l);
3962 
3963 	GROW;
3964 	c = CUR_CHAR(l);
3965 	if (c == 0) {
3966 	    GROW;
3967 	    c = CUR_CHAR(l);
3968 	}
3969 
3970         if (len > maxLength) {
3971             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3972                            "entity value too long\n");
3973             goto error;
3974         }
3975     }
3976     buf[len] = 0;
3977     if (ctxt->instate == XML_PARSER_EOF)
3978         goto error;
3979     if (c != stop) {
3980         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3981         goto error;
3982     }
3983     NEXT;
3984 
3985     /*
3986      * Raise problem w.r.t. '&' and '%' being used in non-entities
3987      * reference constructs. Note Charref will be handled in
3988      * xmlStringDecodeEntities()
3989      */
3990     cur = buf;
3991     while (*cur != 0) { /* non input consuming */
3992 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3993 	    xmlChar *name;
3994 	    xmlChar tmp = *cur;
3995             int nameOk = 0;
3996 
3997 	    cur++;
3998 	    name = xmlParseStringName(ctxt, &cur);
3999             if (name != NULL) {
4000                 nameOk = 1;
4001                 xmlFree(name);
4002             }
4003             if ((nameOk == 0) || (*cur != ';')) {
4004 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
4005 	    "EntityValue: '%c' forbidden except for entities references\n",
4006 	                          tmp);
4007                 goto error;
4008 	    }
4009 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
4010 		(ctxt->inputNr == 1)) {
4011 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
4012                 goto error;
4013 	    }
4014 	    if (*cur == 0)
4015 	        break;
4016 	}
4017 	cur++;
4018     }
4019 
4020     /*
4021      * Then PEReference entities are substituted.
4022      *
4023      * NOTE: 4.4.7 Bypassed
4024      * When a general entity reference appears in the EntityValue in
4025      * an entity declaration, it is bypassed and left as is.
4026      * so XML_SUBSTITUTE_REF is not set here.
4027      */
4028     ++ctxt->depth;
4029     ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
4030                                      0, 0, 0, /* check */ 1);
4031     --ctxt->depth;
4032 
4033     if (orig != NULL) {
4034         *orig = buf;
4035         buf = NULL;
4036     }
4037 
4038 error:
4039     if (buf != NULL)
4040         xmlFree(buf);
4041     return(ret);
4042 }
4043 
4044 /**
4045  * xmlParseAttValueComplex:
4046  * @ctxt:  an XML parser context
4047  * @len:   the resulting attribute len
4048  * @normalize:  whether to apply the inner normalization
4049  *
4050  * parse a value for an attribute, this is the fallback function
4051  * of xmlParseAttValue() when the attribute parsing requires handling
4052  * of non-ASCII characters, or normalization compaction.
4053  *
4054  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4055  */
4056 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)4057 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4058     xmlChar limit = 0;
4059     xmlChar *buf = NULL;
4060     xmlChar *rep = NULL;
4061     size_t len = 0;
4062     size_t buf_size = 0;
4063     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4064                        XML_MAX_HUGE_LENGTH :
4065                        XML_MAX_TEXT_LENGTH;
4066     int c, l, in_space = 0;
4067     xmlChar *current = NULL;
4068     xmlEntityPtr ent;
4069 
4070     if (NXT(0) == '"') {
4071 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4072 	limit = '"';
4073         NEXT;
4074     } else if (NXT(0) == '\'') {
4075 	limit = '\'';
4076 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4077         NEXT;
4078     } else {
4079 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4080 	return(NULL);
4081     }
4082 
4083     /*
4084      * allocate a translation buffer.
4085      */
4086     buf_size = XML_PARSER_BUFFER_SIZE;
4087     buf = (xmlChar *) xmlMallocAtomic(buf_size);
4088     if (buf == NULL) goto mem_error;
4089 
4090     /*
4091      * OK loop until we reach one of the ending char or a size limit.
4092      */
4093     c = CUR_CHAR(l);
4094     while (((NXT(0) != limit) && /* checked */
4095             (IS_CHAR(c)) && (c != '<')) &&
4096             (ctxt->instate != XML_PARSER_EOF)) {
4097 	if (c == '&') {
4098 	    in_space = 0;
4099 	    if (NXT(1) == '#') {
4100 		int val = xmlParseCharRef(ctxt);
4101 
4102 		if (val == '&') {
4103 		    if (ctxt->replaceEntities) {
4104 			if (len + 10 > buf_size) {
4105 			    growBuffer(buf, 10);
4106 			}
4107 			buf[len++] = '&';
4108 		    } else {
4109 			/*
4110 			 * The reparsing will be done in xmlStringGetNodeList()
4111 			 * called by the attribute() function in SAX.c
4112 			 */
4113 			if (len + 10 > buf_size) {
4114 			    growBuffer(buf, 10);
4115 			}
4116 			buf[len++] = '&';
4117 			buf[len++] = '#';
4118 			buf[len++] = '3';
4119 			buf[len++] = '8';
4120 			buf[len++] = ';';
4121 		    }
4122 		} else if (val != 0) {
4123 		    if (len + 10 > buf_size) {
4124 			growBuffer(buf, 10);
4125 		    }
4126 		    len += xmlCopyChar(0, &buf[len], val);
4127 		}
4128 	    } else {
4129 		ent = xmlParseEntityRef(ctxt);
4130 		if ((ent != NULL) &&
4131 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4132 		    if (len + 10 > buf_size) {
4133 			growBuffer(buf, 10);
4134 		    }
4135 		    if ((ctxt->replaceEntities == 0) &&
4136 		        (ent->content[0] == '&')) {
4137 			buf[len++] = '&';
4138 			buf[len++] = '#';
4139 			buf[len++] = '3';
4140 			buf[len++] = '8';
4141 			buf[len++] = ';';
4142 		    } else {
4143 			buf[len++] = ent->content[0];
4144 		    }
4145 		} else if ((ent != NULL) &&
4146 		           (ctxt->replaceEntities != 0)) {
4147 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4148                         if (xmlParserEntityCheck(ctxt, ent->length))
4149                             goto error;
4150 
4151 			++ctxt->depth;
4152 			rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4153                                 ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4154                                 /* check */ 1);
4155 			--ctxt->depth;
4156 			if (rep != NULL) {
4157 			    current = rep;
4158 			    while (*current != 0) { /* non input consuming */
4159                                 if ((*current == 0xD) || (*current == 0xA) ||
4160                                     (*current == 0x9)) {
4161                                     buf[len++] = 0x20;
4162                                     current++;
4163                                 } else
4164                                     buf[len++] = *current++;
4165 				if (len + 10 > buf_size) {
4166 				    growBuffer(buf, 10);
4167 				}
4168 			    }
4169 			    xmlFree(rep);
4170 			    rep = NULL;
4171 			}
4172 		    } else {
4173 			if (len + 10 > buf_size) {
4174 			    growBuffer(buf, 10);
4175 			}
4176 			if (ent->content != NULL)
4177 			    buf[len++] = ent->content[0];
4178 		    }
4179 		} else if (ent != NULL) {
4180 		    int i = xmlStrlen(ent->name);
4181 		    const xmlChar *cur = ent->name;
4182 
4183 		    /*
4184                      * We also check for recursion and amplification
4185                      * when entities are not substituted. They're
4186                      * often expanded later.
4187 		     */
4188 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4189 			(ent->content != NULL)) {
4190                         if ((ent->flags & XML_ENT_CHECKED) == 0) {
4191                             unsigned long oldCopy = ctxt->sizeentcopy;
4192 
4193                             ctxt->sizeentcopy = ent->length;
4194 
4195                             ++ctxt->depth;
4196                             rep = xmlStringDecodeEntitiesInt(ctxt,
4197                                     ent->content, ent->length,
4198                                     XML_SUBSTITUTE_REF, 0, 0, 0,
4199                                     /* check */ 1);
4200                             --ctxt->depth;
4201 
4202                             /*
4203                              * If we're parsing DTD content, the entity
4204                              * might reference other entities which
4205                              * weren't defined yet, so the check isn't
4206                              * reliable.
4207                              */
4208                             if (ctxt->inSubset == 0) {
4209                                 ent->flags |= XML_ENT_CHECKED;
4210                                 ent->expandedSize = ctxt->sizeentcopy;
4211                             }
4212 
4213                             if (rep != NULL) {
4214                                 xmlFree(rep);
4215                                 rep = NULL;
4216                             } else {
4217                                 ent->content[0] = 0;
4218                             }
4219 
4220                             if (xmlParserEntityCheck(ctxt, oldCopy))
4221                                 goto error;
4222                         } else {
4223                             if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4224                                 goto error;
4225                         }
4226 		    }
4227 
4228 		    /*
4229 		     * Just output the reference
4230 		     */
4231 		    buf[len++] = '&';
4232 		    while (len + i + 10 > buf_size) {
4233 			growBuffer(buf, i + 10);
4234 		    }
4235 		    for (;i > 0;i--)
4236 			buf[len++] = *cur++;
4237 		    buf[len++] = ';';
4238 		}
4239 	    }
4240 	} else {
4241 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4242 	        if ((len != 0) || (!normalize)) {
4243 		    if ((!normalize) || (!in_space)) {
4244 			COPY_BUF(buf, len, 0x20);
4245 			while (len + 10 > buf_size) {
4246 			    growBuffer(buf, 10);
4247 			}
4248 		    }
4249 		    in_space = 1;
4250 		}
4251 	    } else {
4252 	        in_space = 0;
4253 		COPY_BUF(buf, len, c);
4254 		if (len + 10 > buf_size) {
4255 		    growBuffer(buf, 10);
4256 		}
4257 	    }
4258 	    NEXTL(l);
4259 	}
4260 	GROW;
4261 	c = CUR_CHAR(l);
4262         if (len > maxLength) {
4263             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4264                            "AttValue length too long\n");
4265             goto mem_error;
4266         }
4267     }
4268     if (ctxt->instate == XML_PARSER_EOF)
4269         goto error;
4270 
4271     if ((in_space) && (normalize)) {
4272         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4273     }
4274     buf[len] = 0;
4275     if (RAW == '<') {
4276 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4277     } else if (RAW != limit) {
4278 	if ((c != 0) && (!IS_CHAR(c))) {
4279 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4280 			   "invalid character in attribute value\n");
4281 	} else {
4282 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4283 			   "AttValue: ' expected\n");
4284         }
4285     } else
4286 	NEXT;
4287 
4288     if (attlen != NULL) *attlen = len;
4289     return(buf);
4290 
4291 mem_error:
4292     xmlErrMemory(ctxt, NULL);
4293 error:
4294     if (buf != NULL)
4295         xmlFree(buf);
4296     if (rep != NULL)
4297         xmlFree(rep);
4298     return(NULL);
4299 }
4300 
4301 /**
4302  * xmlParseAttValue:
4303  * @ctxt:  an XML parser context
4304  *
4305  * DEPRECATED: Internal function, don't use.
4306  *
4307  * parse a value for an attribute
4308  * Note: the parser won't do substitution of entities here, this
4309  * will be handled later in xmlStringGetNodeList
4310  *
4311  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4312  *                   "'" ([^<&'] | Reference)* "'"
4313  *
4314  * 3.3.3 Attribute-Value Normalization:
4315  * Before the value of an attribute is passed to the application or
4316  * checked for validity, the XML processor must normalize it as follows:
4317  * - a character reference is processed by appending the referenced
4318  *   character to the attribute value
4319  * - an entity reference is processed by recursively processing the
4320  *   replacement text of the entity
4321  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4322  *   appending #x20 to the normalized value, except that only a single
4323  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4324  *   parsed entity or the literal entity value of an internal parsed entity
4325  * - other characters are processed by appending them to the normalized value
4326  * If the declared value is not CDATA, then the XML processor must further
4327  * process the normalized attribute value by discarding any leading and
4328  * trailing space (#x20) characters, and by replacing sequences of space
4329  * (#x20) characters by a single space (#x20) character.
4330  * All attributes for which no declaration has been read should be treated
4331  * by a non-validating parser as if declared CDATA.
4332  *
4333  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4334  */
4335 
4336 
4337 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4338 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4339     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4340     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4341 }
4342 
4343 /**
4344  * xmlParseSystemLiteral:
4345  * @ctxt:  an XML parser context
4346  *
4347  * DEPRECATED: Internal function, don't use.
4348  *
4349  * parse an XML Literal
4350  *
4351  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4352  *
4353  * Returns the SystemLiteral parsed or NULL
4354  */
4355 
4356 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4357 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4358     xmlChar *buf = NULL;
4359     int len = 0;
4360     int size = XML_PARSER_BUFFER_SIZE;
4361     int cur, l;
4362     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4363                     XML_MAX_TEXT_LENGTH :
4364                     XML_MAX_NAME_LENGTH;
4365     xmlChar stop;
4366     int state = ctxt->instate;
4367 
4368     if (RAW == '"') {
4369         NEXT;
4370 	stop = '"';
4371     } else if (RAW == '\'') {
4372         NEXT;
4373 	stop = '\'';
4374     } else {
4375 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4376 	return(NULL);
4377     }
4378 
4379     buf = (xmlChar *) xmlMallocAtomic(size);
4380     if (buf == NULL) {
4381         xmlErrMemory(ctxt, NULL);
4382 	return(NULL);
4383     }
4384     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4385     cur = CUR_CHAR(l);
4386     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4387 	if (len + 5 >= size) {
4388 	    xmlChar *tmp;
4389 
4390 	    size *= 2;
4391 	    tmp = (xmlChar *) xmlRealloc(buf, size);
4392 	    if (tmp == NULL) {
4393 	        xmlFree(buf);
4394 		xmlErrMemory(ctxt, NULL);
4395 		ctxt->instate = (xmlParserInputState) state;
4396 		return(NULL);
4397 	    }
4398 	    buf = tmp;
4399 	}
4400 	COPY_BUF(buf, len, cur);
4401         if (len > maxLength) {
4402             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4403             xmlFree(buf);
4404             ctxt->instate = (xmlParserInputState) state;
4405             return(NULL);
4406         }
4407 	NEXTL(l);
4408 	cur = CUR_CHAR(l);
4409     }
4410     buf[len] = 0;
4411     if (ctxt->instate == XML_PARSER_EOF) {
4412         xmlFree(buf);
4413         return(NULL);
4414     }
4415     ctxt->instate = (xmlParserInputState) state;
4416     if (!IS_CHAR(cur)) {
4417 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4418     } else {
4419 	NEXT;
4420     }
4421     return(buf);
4422 }
4423 
4424 /**
4425  * xmlParsePubidLiteral:
4426  * @ctxt:  an XML parser context
4427  *
4428  * DEPRECATED: Internal function, don't use.
4429  *
4430  * parse an XML public literal
4431  *
4432  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4433  *
4434  * Returns the PubidLiteral parsed or NULL.
4435  */
4436 
4437 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4438 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4439     xmlChar *buf = NULL;
4440     int len = 0;
4441     int size = XML_PARSER_BUFFER_SIZE;
4442     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4443                     XML_MAX_TEXT_LENGTH :
4444                     XML_MAX_NAME_LENGTH;
4445     xmlChar cur;
4446     xmlChar stop;
4447     xmlParserInputState oldstate = ctxt->instate;
4448 
4449     if (RAW == '"') {
4450         NEXT;
4451 	stop = '"';
4452     } else if (RAW == '\'') {
4453         NEXT;
4454 	stop = '\'';
4455     } else {
4456 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4457 	return(NULL);
4458     }
4459     buf = (xmlChar *) xmlMallocAtomic(size);
4460     if (buf == NULL) {
4461 	xmlErrMemory(ctxt, NULL);
4462 	return(NULL);
4463     }
4464     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4465     cur = CUR;
4466     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4467 	if (len + 1 >= size) {
4468 	    xmlChar *tmp;
4469 
4470 	    size *= 2;
4471 	    tmp = (xmlChar *) xmlRealloc(buf, size);
4472 	    if (tmp == NULL) {
4473 		xmlErrMemory(ctxt, NULL);
4474 		xmlFree(buf);
4475 		return(NULL);
4476 	    }
4477 	    buf = tmp;
4478 	}
4479 	buf[len++] = cur;
4480         if (len > maxLength) {
4481             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4482             xmlFree(buf);
4483             return(NULL);
4484         }
4485 	NEXT;
4486 	cur = CUR;
4487     }
4488     buf[len] = 0;
4489     if (ctxt->instate == XML_PARSER_EOF) {
4490         xmlFree(buf);
4491         return(NULL);
4492     }
4493     if (cur != stop) {
4494 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4495     } else {
4496 	NEXTL(1);
4497     }
4498     ctxt->instate = oldstate;
4499     return(buf);
4500 }
4501 
4502 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4503 
4504 /*
4505  * used for the test in the inner loop of the char data testing
4506  */
4507 static const unsigned char test_char_data[256] = {
4508     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4509     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4510     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4511     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4512     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4513     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4514     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4515     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4516     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4517     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4518     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4519     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4520     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4521     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4522     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4523     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4524     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4525     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4526     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4527     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4528     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4529     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4530     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4531     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4532     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4533     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4534     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4535     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4536     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4537     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4538     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4539     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4540 };
4541 
4542 /**
4543  * xmlParseCharDataInternal:
4544  * @ctxt:  an XML parser context
4545  * @partial:  buffer may contain partial UTF-8 sequences
4546  *
4547  * Parse character data. Always makes progress if the first char isn't
4548  * '<' or '&'.
4549  *
4550  * The right angle bracket (>) may be represented using the string "&gt;",
4551  * and must, for compatibility, be escaped using "&gt;" or a character
4552  * reference when it appears in the string "]]>" in content, when that
4553  * string is not marking the end of a CDATA section.
4554  *
4555  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4556  */
4557 static void
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt,int partial)4558 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4559     const xmlChar *in;
4560     int nbchar = 0;
4561     int line = ctxt->input->line;
4562     int col = ctxt->input->col;
4563     int ccol;
4564 
4565     GROW;
4566     /*
4567      * Accelerated common case where input don't need to be
4568      * modified before passing it to the handler.
4569      */
4570     in = ctxt->input->cur;
4571     do {
4572 get_more_space:
4573         while (*in == 0x20) { in++; ctxt->input->col++; }
4574         if (*in == 0xA) {
4575             do {
4576                 ctxt->input->line++; ctxt->input->col = 1;
4577                 in++;
4578             } while (*in == 0xA);
4579             goto get_more_space;
4580         }
4581         if (*in == '<') {
4582             nbchar = in - ctxt->input->cur;
4583             if (nbchar > 0) {
4584                 const xmlChar *tmp = ctxt->input->cur;
4585                 ctxt->input->cur = in;
4586 
4587                 if ((ctxt->sax != NULL) &&
4588                     (ctxt->disableSAX == 0) &&
4589                     (ctxt->sax->ignorableWhitespace !=
4590                      ctxt->sax->characters)) {
4591                     if (areBlanks(ctxt, tmp, nbchar, 1)) {
4592                         if (ctxt->sax->ignorableWhitespace != NULL)
4593                             ctxt->sax->ignorableWhitespace(ctxt->userData,
4594                                                    tmp, nbchar);
4595                     } else {
4596                         if (ctxt->sax->characters != NULL)
4597                             ctxt->sax->characters(ctxt->userData,
4598                                                   tmp, nbchar);
4599                         if (*ctxt->space == -1)
4600                             *ctxt->space = -2;
4601                     }
4602                 } else if ((ctxt->sax != NULL) &&
4603                            (ctxt->disableSAX == 0) &&
4604                            (ctxt->sax->characters != NULL)) {
4605                     ctxt->sax->characters(ctxt->userData,
4606                                           tmp, nbchar);
4607                 }
4608             }
4609             return;
4610         }
4611 
4612 get_more:
4613         ccol = ctxt->input->col;
4614         while (test_char_data[*in]) {
4615             in++;
4616             ccol++;
4617         }
4618         ctxt->input->col = ccol;
4619         if (*in == 0xA) {
4620             do {
4621                 ctxt->input->line++; ctxt->input->col = 1;
4622                 in++;
4623             } while (*in == 0xA);
4624             goto get_more;
4625         }
4626         if (*in == ']') {
4627             if ((in[1] == ']') && (in[2] == '>')) {
4628                 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4629                 if (ctxt->instate != XML_PARSER_EOF)
4630                     ctxt->input->cur = in + 1;
4631                 return;
4632             }
4633             in++;
4634             ctxt->input->col++;
4635             goto get_more;
4636         }
4637         nbchar = in - ctxt->input->cur;
4638         if (nbchar > 0) {
4639             if ((ctxt->sax != NULL) &&
4640                 (ctxt->disableSAX == 0) &&
4641                 (ctxt->sax->ignorableWhitespace !=
4642                  ctxt->sax->characters) &&
4643                 (IS_BLANK_CH(*ctxt->input->cur))) {
4644                 const xmlChar *tmp = ctxt->input->cur;
4645                 ctxt->input->cur = in;
4646 
4647                 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4648                     if (ctxt->sax->ignorableWhitespace != NULL)
4649                         ctxt->sax->ignorableWhitespace(ctxt->userData,
4650                                                        tmp, nbchar);
4651                 } else {
4652                     if (ctxt->sax->characters != NULL)
4653                         ctxt->sax->characters(ctxt->userData,
4654                                               tmp, nbchar);
4655                     if (*ctxt->space == -1)
4656                         *ctxt->space = -2;
4657                 }
4658                 line = ctxt->input->line;
4659                 col = ctxt->input->col;
4660             } else if ((ctxt->sax != NULL) &&
4661                        (ctxt->disableSAX == 0)) {
4662                 if (ctxt->sax->characters != NULL)
4663                     ctxt->sax->characters(ctxt->userData,
4664                                           ctxt->input->cur, nbchar);
4665                 line = ctxt->input->line;
4666                 col = ctxt->input->col;
4667             }
4668             if (ctxt->instate == XML_PARSER_EOF)
4669                 return;
4670         }
4671         ctxt->input->cur = in;
4672         if (*in == 0xD) {
4673             in++;
4674             if (*in == 0xA) {
4675                 ctxt->input->cur = in;
4676                 in++;
4677                 ctxt->input->line++; ctxt->input->col = 1;
4678                 continue; /* while */
4679             }
4680             in--;
4681         }
4682         if (*in == '<') {
4683             return;
4684         }
4685         if (*in == '&') {
4686             return;
4687         }
4688         SHRINK;
4689         GROW;
4690         if (ctxt->instate == XML_PARSER_EOF)
4691             return;
4692         in = ctxt->input->cur;
4693     } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4694              (*in == 0x09) || (*in == 0x0a));
4695     ctxt->input->line = line;
4696     ctxt->input->col = col;
4697     xmlParseCharDataComplex(ctxt, partial);
4698 }
4699 
4700 /**
4701  * xmlParseCharDataComplex:
4702  * @ctxt:  an XML parser context
4703  * @cdata:  int indicating whether we are within a CDATA section
4704  *
4705  * Always makes progress if the first char isn't '<' or '&'.
4706  *
4707  * parse a CharData section.this is the fallback function
4708  * of xmlParseCharData() when the parsing requires handling
4709  * of non-ASCII characters.
4710  */
4711 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int partial)4712 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4713     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4714     int nbchar = 0;
4715     int cur, l;
4716 
4717     cur = CUR_CHAR(l);
4718     while ((cur != '<') && /* checked */
4719            (cur != '&') &&
4720 	   (IS_CHAR(cur))) {
4721 	if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4722 	    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4723 	}
4724 	COPY_BUF(buf, nbchar, cur);
4725 	/* move current position before possible calling of ctxt->sax->characters */
4726 	NEXTL(l);
4727 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4728 	    buf[nbchar] = 0;
4729 
4730 	    /*
4731 	     * OK the segment is to be consumed as chars.
4732 	     */
4733 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4734 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4735 		    if (ctxt->sax->ignorableWhitespace != NULL)
4736 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4737 			                               buf, nbchar);
4738 		} else {
4739 		    if (ctxt->sax->characters != NULL)
4740 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4741 		    if ((ctxt->sax->characters !=
4742 		         ctxt->sax->ignorableWhitespace) &&
4743 			(*ctxt->space == -1))
4744 			*ctxt->space = -2;
4745 		}
4746 	    }
4747 	    nbchar = 0;
4748             /* something really bad happened in the SAX callback */
4749             if (ctxt->instate != XML_PARSER_CONTENT)
4750                 return;
4751             SHRINK;
4752 	}
4753 	cur = CUR_CHAR(l);
4754     }
4755     if (ctxt->instate == XML_PARSER_EOF)
4756         return;
4757     if (nbchar != 0) {
4758         buf[nbchar] = 0;
4759 	/*
4760 	 * OK the segment is to be consumed as chars.
4761 	 */
4762 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4763 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4764 		if (ctxt->sax->ignorableWhitespace != NULL)
4765 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4766 	    } else {
4767 		if (ctxt->sax->characters != NULL)
4768 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4769 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4770 		    (*ctxt->space == -1))
4771 		    *ctxt->space = -2;
4772 	    }
4773 	}
4774     }
4775     /*
4776      * cur == 0 can mean
4777      *
4778      * - XML_PARSER_EOF or memory error. This is checked above.
4779      * - An actual 0 character.
4780      * - End of buffer.
4781      * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4782      */
4783     if (ctxt->input->cur < ctxt->input->end) {
4784         if ((cur == 0) && (CUR != 0)) {
4785             if (partial == 0) {
4786                 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4787                         "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4788                 NEXTL(1);
4789             }
4790         } else if ((cur != '<') && (cur != '&')) {
4791             /* Generate the error and skip the offending character */
4792             xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4793                               "PCDATA invalid Char value %d\n", cur);
4794             NEXTL(l);
4795         }
4796     }
4797 }
4798 
4799 /**
4800  * xmlParseCharData:
4801  * @ctxt:  an XML parser context
4802  * @cdata:  unused
4803  *
4804  * DEPRECATED: Internal function, don't use.
4805  */
4806 void
xmlParseCharData(xmlParserCtxtPtr ctxt,ATTRIBUTE_UNUSED int cdata)4807 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4808     xmlParseCharDataInternal(ctxt, 0);
4809 }
4810 
4811 /**
4812  * xmlParseExternalID:
4813  * @ctxt:  an XML parser context
4814  * @publicID:  a xmlChar** receiving PubidLiteral
4815  * @strict: indicate whether we should restrict parsing to only
4816  *          production [75], see NOTE below
4817  *
4818  * DEPRECATED: Internal function, don't use.
4819  *
4820  * Parse an External ID or a Public ID
4821  *
4822  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4823  *       'PUBLIC' S PubidLiteral S SystemLiteral
4824  *
4825  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4826  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4827  *
4828  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4829  *
4830  * Returns the function returns SystemLiteral and in the second
4831  *                case publicID receives PubidLiteral, is strict is off
4832  *                it is possible to return NULL and have publicID set.
4833  */
4834 
4835 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4836 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4837     xmlChar *URI = NULL;
4838 
4839     *publicID = NULL;
4840     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4841         SKIP(6);
4842 	if (SKIP_BLANKS == 0) {
4843 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4844 	                   "Space required after 'SYSTEM'\n");
4845 	}
4846 	URI = xmlParseSystemLiteral(ctxt);
4847 	if (URI == NULL) {
4848 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4849         }
4850     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4851         SKIP(6);
4852 	if (SKIP_BLANKS == 0) {
4853 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4854 		    "Space required after 'PUBLIC'\n");
4855 	}
4856 	*publicID = xmlParsePubidLiteral(ctxt);
4857 	if (*publicID == NULL) {
4858 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4859 	}
4860 	if (strict) {
4861 	    /*
4862 	     * We don't handle [83] so "S SystemLiteral" is required.
4863 	     */
4864 	    if (SKIP_BLANKS == 0) {
4865 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4866 			"Space required after the Public Identifier\n");
4867 	    }
4868 	} else {
4869 	    /*
4870 	     * We handle [83] so we return immediately, if
4871 	     * "S SystemLiteral" is not detected. We skip blanks if no
4872              * system literal was found, but this is harmless since we must
4873              * be at the end of a NotationDecl.
4874 	     */
4875 	    if (SKIP_BLANKS == 0) return(NULL);
4876 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
4877 	}
4878 	URI = xmlParseSystemLiteral(ctxt);
4879 	if (URI == NULL) {
4880 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4881         }
4882     }
4883     return(URI);
4884 }
4885 
4886 /**
4887  * xmlParseCommentComplex:
4888  * @ctxt:  an XML parser context
4889  * @buf:  the already parsed part of the buffer
4890  * @len:  number of bytes in the buffer
4891  * @size:  allocated size of the buffer
4892  *
4893  * Skip an XML (SGML) comment <!-- .... -->
4894  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4895  *  must not occur within comments. "
4896  * This is the slow routine in case the accelerator for ascii didn't work
4897  *
4898  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4899  */
4900 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4901 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4902                        size_t len, size_t size) {
4903     int q, ql;
4904     int r, rl;
4905     int cur, l;
4906     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4907                        XML_MAX_HUGE_LENGTH :
4908                        XML_MAX_TEXT_LENGTH;
4909     int inputid;
4910 
4911     inputid = ctxt->input->id;
4912 
4913     if (buf == NULL) {
4914         len = 0;
4915 	size = XML_PARSER_BUFFER_SIZE;
4916 	buf = (xmlChar *) xmlMallocAtomic(size);
4917 	if (buf == NULL) {
4918 	    xmlErrMemory(ctxt, NULL);
4919 	    return;
4920 	}
4921     }
4922     q = CUR_CHAR(ql);
4923     if (q == 0)
4924         goto not_terminated;
4925     if (!IS_CHAR(q)) {
4926         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4927                           "xmlParseComment: invalid xmlChar value %d\n",
4928 	                  q);
4929 	xmlFree (buf);
4930 	return;
4931     }
4932     NEXTL(ql);
4933     r = CUR_CHAR(rl);
4934     if (r == 0)
4935         goto not_terminated;
4936     if (!IS_CHAR(r)) {
4937         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4938                           "xmlParseComment: invalid xmlChar value %d\n",
4939 	                  r);
4940 	xmlFree (buf);
4941 	return;
4942     }
4943     NEXTL(rl);
4944     cur = CUR_CHAR(l);
4945     if (cur == 0)
4946         goto not_terminated;
4947     while (IS_CHAR(cur) && /* checked */
4948            ((cur != '>') ||
4949 	    (r != '-') || (q != '-'))) {
4950 	if ((r == '-') && (q == '-')) {
4951 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4952 	}
4953 	if (len + 5 >= size) {
4954 	    xmlChar *new_buf;
4955             size_t new_size;
4956 
4957 	    new_size = size * 2;
4958 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4959 	    if (new_buf == NULL) {
4960 		xmlFree (buf);
4961 		xmlErrMemory(ctxt, NULL);
4962 		return;
4963 	    }
4964 	    buf = new_buf;
4965             size = new_size;
4966 	}
4967 	COPY_BUF(buf, len, q);
4968         if (len > maxLength) {
4969             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4970                          "Comment too big found", NULL);
4971             xmlFree (buf);
4972             return;
4973         }
4974 
4975 	q = r;
4976 	ql = rl;
4977 	r = cur;
4978 	rl = l;
4979 
4980 	NEXTL(l);
4981 	cur = CUR_CHAR(l);
4982 
4983     }
4984     buf[len] = 0;
4985     if (ctxt->instate == XML_PARSER_EOF) {
4986         xmlFree(buf);
4987         return;
4988     }
4989     if (cur == 0) {
4990 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4991 	                     "Comment not terminated \n<!--%.50s\n", buf);
4992     } else if (!IS_CHAR(cur)) {
4993         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4994                           "xmlParseComment: invalid xmlChar value %d\n",
4995 	                  cur);
4996     } else {
4997 	if (inputid != ctxt->input->id) {
4998 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999 		           "Comment doesn't start and stop in the same"
5000                            " entity\n");
5001 	}
5002         NEXT;
5003 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5004 	    (!ctxt->disableSAX))
5005 	    ctxt->sax->comment(ctxt->userData, buf);
5006     }
5007     xmlFree(buf);
5008     return;
5009 not_terminated:
5010     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5011 			 "Comment not terminated\n", NULL);
5012     xmlFree(buf);
5013     return;
5014 }
5015 
5016 /**
5017  * xmlParseComment:
5018  * @ctxt:  an XML parser context
5019  *
5020  * DEPRECATED: Internal function, don't use.
5021  *
5022  * Parse an XML (SGML) comment. Always consumes '<!'.
5023  *
5024  *  The spec says that "For compatibility, the string "--" (double-hyphen)
5025  *  must not occur within comments. "
5026  *
5027  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5028  */
5029 void
xmlParseComment(xmlParserCtxtPtr ctxt)5030 xmlParseComment(xmlParserCtxtPtr ctxt) {
5031     xmlChar *buf = NULL;
5032     size_t size = XML_PARSER_BUFFER_SIZE;
5033     size_t len = 0;
5034     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5035                        XML_MAX_HUGE_LENGTH :
5036                        XML_MAX_TEXT_LENGTH;
5037     xmlParserInputState state;
5038     const xmlChar *in;
5039     size_t nbchar = 0;
5040     int ccol;
5041     int inputid;
5042 
5043     /*
5044      * Check that there is a comment right here.
5045      */
5046     if ((RAW != '<') || (NXT(1) != '!'))
5047         return;
5048     SKIP(2);
5049     if ((RAW != '-') || (NXT(1) != '-'))
5050         return;
5051     state = ctxt->instate;
5052     ctxt->instate = XML_PARSER_COMMENT;
5053     inputid = ctxt->input->id;
5054     SKIP(2);
5055     GROW;
5056 
5057     /*
5058      * Accelerated common case where input don't need to be
5059      * modified before passing it to the handler.
5060      */
5061     in = ctxt->input->cur;
5062     do {
5063 	if (*in == 0xA) {
5064 	    do {
5065 		ctxt->input->line++; ctxt->input->col = 1;
5066 		in++;
5067 	    } while (*in == 0xA);
5068 	}
5069 get_more:
5070         ccol = ctxt->input->col;
5071 	while (((*in > '-') && (*in <= 0x7F)) ||
5072 	       ((*in >= 0x20) && (*in < '-')) ||
5073 	       (*in == 0x09)) {
5074 		    in++;
5075 		    ccol++;
5076 	}
5077 	ctxt->input->col = ccol;
5078 	if (*in == 0xA) {
5079 	    do {
5080 		ctxt->input->line++; ctxt->input->col = 1;
5081 		in++;
5082 	    } while (*in == 0xA);
5083 	    goto get_more;
5084 	}
5085 	nbchar = in - ctxt->input->cur;
5086 	/*
5087 	 * save current set of data
5088 	 */
5089 	if (nbchar > 0) {
5090             if (buf == NULL) {
5091                 if ((*in == '-') && (in[1] == '-'))
5092                     size = nbchar + 1;
5093                 else
5094                     size = XML_PARSER_BUFFER_SIZE + nbchar;
5095                 buf = (xmlChar *) xmlMallocAtomic(size);
5096                 if (buf == NULL) {
5097                     xmlErrMemory(ctxt, NULL);
5098                     ctxt->instate = state;
5099                     return;
5100                 }
5101                 len = 0;
5102             } else if (len + nbchar + 1 >= size) {
5103                 xmlChar *new_buf;
5104                 size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5105                 new_buf = (xmlChar *) xmlRealloc(buf, size);
5106                 if (new_buf == NULL) {
5107                     xmlFree (buf);
5108                     xmlErrMemory(ctxt, NULL);
5109                     ctxt->instate = state;
5110                     return;
5111                 }
5112                 buf = new_buf;
5113             }
5114             memcpy(&buf[len], ctxt->input->cur, nbchar);
5115             len += nbchar;
5116             buf[len] = 0;
5117 	}
5118         if (len > maxLength) {
5119             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5120                          "Comment too big found", NULL);
5121             xmlFree (buf);
5122             return;
5123         }
5124 	ctxt->input->cur = in;
5125 	if (*in == 0xA) {
5126 	    in++;
5127 	    ctxt->input->line++; ctxt->input->col = 1;
5128 	}
5129 	if (*in == 0xD) {
5130 	    in++;
5131 	    if (*in == 0xA) {
5132 		ctxt->input->cur = in;
5133 		in++;
5134 		ctxt->input->line++; ctxt->input->col = 1;
5135 		goto get_more;
5136 	    }
5137 	    in--;
5138 	}
5139 	SHRINK;
5140 	GROW;
5141         if (ctxt->instate == XML_PARSER_EOF) {
5142             xmlFree(buf);
5143             return;
5144         }
5145 	in = ctxt->input->cur;
5146 	if (*in == '-') {
5147 	    if (in[1] == '-') {
5148 	        if (in[2] == '>') {
5149 		    if (ctxt->input->id != inputid) {
5150 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5151 			               "comment doesn't start and stop in the"
5152                                        " same entity\n");
5153 		    }
5154 		    SKIP(3);
5155 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5156 		        (!ctxt->disableSAX)) {
5157 			if (buf != NULL)
5158 			    ctxt->sax->comment(ctxt->userData, buf);
5159 			else
5160 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5161 		    }
5162 		    if (buf != NULL)
5163 		        xmlFree(buf);
5164 		    if (ctxt->instate != XML_PARSER_EOF)
5165 			ctxt->instate = state;
5166 		    return;
5167 		}
5168 		if (buf != NULL) {
5169 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5170 		                      "Double hyphen within comment: "
5171                                       "<!--%.50s\n",
5172 				      buf);
5173 		} else
5174 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5175 		                      "Double hyphen within comment\n", NULL);
5176                 if (ctxt->instate == XML_PARSER_EOF) {
5177                     xmlFree(buf);
5178                     return;
5179                 }
5180 		in++;
5181 		ctxt->input->col++;
5182 	    }
5183 	    in++;
5184 	    ctxt->input->col++;
5185 	    goto get_more;
5186 	}
5187     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5188     xmlParseCommentComplex(ctxt, buf, len, size);
5189     ctxt->instate = state;
5190     return;
5191 }
5192 
5193 
5194 /**
5195  * xmlParsePITarget:
5196  * @ctxt:  an XML parser context
5197  *
5198  * DEPRECATED: Internal function, don't use.
5199  *
5200  * parse the name of a PI
5201  *
5202  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5203  *
5204  * Returns the PITarget name or NULL
5205  */
5206 
5207 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5208 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5209     const xmlChar *name;
5210 
5211     name = xmlParseName(ctxt);
5212     if ((name != NULL) &&
5213         ((name[0] == 'x') || (name[0] == 'X')) &&
5214         ((name[1] == 'm') || (name[1] == 'M')) &&
5215         ((name[2] == 'l') || (name[2] == 'L'))) {
5216 	int i;
5217 	if ((name[0] == 'x') && (name[1] == 'm') &&
5218 	    (name[2] == 'l') && (name[3] == 0)) {
5219 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5220 		 "XML declaration allowed only at the start of the document\n");
5221 	    return(name);
5222 	} else if (name[3] == 0) {
5223 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5224 	    return(name);
5225 	}
5226 	for (i = 0;;i++) {
5227 	    if (xmlW3CPIs[i] == NULL) break;
5228 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5229 	        return(name);
5230 	}
5231 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5232 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5233 		      NULL, NULL);
5234     }
5235     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5236 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5237 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5238     }
5239     return(name);
5240 }
5241 
5242 #ifdef LIBXML_CATALOG_ENABLED
5243 /**
5244  * xmlParseCatalogPI:
5245  * @ctxt:  an XML parser context
5246  * @catalog:  the PI value string
5247  *
5248  * parse an XML Catalog Processing Instruction.
5249  *
5250  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5251  *
5252  * Occurs only if allowed by the user and if happening in the Misc
5253  * part of the document before any doctype information
5254  * This will add the given catalog to the parsing context in order
5255  * to be used if there is a resolution need further down in the document
5256  */
5257 
5258 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5259 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5260     xmlChar *URL = NULL;
5261     const xmlChar *tmp, *base;
5262     xmlChar marker;
5263 
5264     tmp = catalog;
5265     while (IS_BLANK_CH(*tmp)) tmp++;
5266     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5267 	goto error;
5268     tmp += 7;
5269     while (IS_BLANK_CH(*tmp)) tmp++;
5270     if (*tmp != '=') {
5271 	return;
5272     }
5273     tmp++;
5274     while (IS_BLANK_CH(*tmp)) tmp++;
5275     marker = *tmp;
5276     if ((marker != '\'') && (marker != '"'))
5277 	goto error;
5278     tmp++;
5279     base = tmp;
5280     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5281     if (*tmp == 0)
5282 	goto error;
5283     URL = xmlStrndup(base, tmp - base);
5284     tmp++;
5285     while (IS_BLANK_CH(*tmp)) tmp++;
5286     if (*tmp != 0)
5287 	goto error;
5288 
5289     if (URL != NULL) {
5290 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5291 	xmlFree(URL);
5292     }
5293     return;
5294 
5295 error:
5296     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5297 	          "Catalog PI syntax error: %s\n",
5298 		  catalog, NULL);
5299     if (URL != NULL)
5300 	xmlFree(URL);
5301 }
5302 #endif
5303 
5304 /**
5305  * xmlParsePI:
5306  * @ctxt:  an XML parser context
5307  *
5308  * DEPRECATED: Internal function, don't use.
5309  *
5310  * parse an XML Processing Instruction.
5311  *
5312  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5313  *
5314  * The processing is transferred to SAX once parsed.
5315  */
5316 
5317 void
xmlParsePI(xmlParserCtxtPtr ctxt)5318 xmlParsePI(xmlParserCtxtPtr ctxt) {
5319     xmlChar *buf = NULL;
5320     size_t len = 0;
5321     size_t size = XML_PARSER_BUFFER_SIZE;
5322     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5323                        XML_MAX_HUGE_LENGTH :
5324                        XML_MAX_TEXT_LENGTH;
5325     int cur, l;
5326     const xmlChar *target;
5327     xmlParserInputState state;
5328 
5329     if ((RAW == '<') && (NXT(1) == '?')) {
5330 	int inputid = ctxt->input->id;
5331 	state = ctxt->instate;
5332         ctxt->instate = XML_PARSER_PI;
5333 	/*
5334 	 * this is a Processing Instruction.
5335 	 */
5336 	SKIP(2);
5337 
5338 	/*
5339 	 * Parse the target name and check for special support like
5340 	 * namespace.
5341 	 */
5342         target = xmlParsePITarget(ctxt);
5343 	if (target != NULL) {
5344 	    if ((RAW == '?') && (NXT(1) == '>')) {
5345 		if (inputid != ctxt->input->id) {
5346 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5347 	                           "PI declaration doesn't start and stop in"
5348                                    " the same entity\n");
5349 		}
5350 		SKIP(2);
5351 
5352 		/*
5353 		 * SAX: PI detected.
5354 		 */
5355 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5356 		    (ctxt->sax->processingInstruction != NULL))
5357 		    ctxt->sax->processingInstruction(ctxt->userData,
5358 		                                     target, NULL);
5359 		if (ctxt->instate != XML_PARSER_EOF)
5360 		    ctxt->instate = state;
5361 		return;
5362 	    }
5363 	    buf = (xmlChar *) xmlMallocAtomic(size);
5364 	    if (buf == NULL) {
5365 		xmlErrMemory(ctxt, NULL);
5366 		ctxt->instate = state;
5367 		return;
5368 	    }
5369 	    if (SKIP_BLANKS == 0) {
5370 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5371 			  "ParsePI: PI %s space expected\n", target);
5372 	    }
5373 	    cur = CUR_CHAR(l);
5374 	    while (IS_CHAR(cur) && /* checked */
5375 		   ((cur != '?') || (NXT(1) != '>'))) {
5376 		if (len + 5 >= size) {
5377 		    xmlChar *tmp;
5378                     size_t new_size = size * 2;
5379 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5380 		    if (tmp == NULL) {
5381 			xmlErrMemory(ctxt, NULL);
5382 			xmlFree(buf);
5383 			ctxt->instate = state;
5384 			return;
5385 		    }
5386 		    buf = tmp;
5387                     size = new_size;
5388 		}
5389 		COPY_BUF(buf, len, cur);
5390                 if (len > maxLength) {
5391                     xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5392                                       "PI %s too big found", target);
5393                     xmlFree(buf);
5394                     ctxt->instate = state;
5395                     return;
5396                 }
5397 		NEXTL(l);
5398 		cur = CUR_CHAR(l);
5399 	    }
5400 	    buf[len] = 0;
5401             if (ctxt->instate == XML_PARSER_EOF) {
5402                 xmlFree(buf);
5403                 return;
5404             }
5405 	    if (cur != '?') {
5406 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5407 		      "ParsePI: PI %s never end ...\n", target);
5408 	    } else {
5409 		if (inputid != ctxt->input->id) {
5410 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5411 	                           "PI declaration doesn't start and stop in"
5412                                    " the same entity\n");
5413 		}
5414 		SKIP(2);
5415 
5416 #ifdef LIBXML_CATALOG_ENABLED
5417 		if (((state == XML_PARSER_MISC) ||
5418 	             (state == XML_PARSER_START)) &&
5419 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5420 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5421 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5422 			(allow == XML_CATA_ALLOW_ALL))
5423 			xmlParseCatalogPI(ctxt, buf);
5424 		}
5425 #endif
5426 
5427 
5428 		/*
5429 		 * SAX: PI detected.
5430 		 */
5431 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5432 		    (ctxt->sax->processingInstruction != NULL))
5433 		    ctxt->sax->processingInstruction(ctxt->userData,
5434 		                                     target, buf);
5435 	    }
5436 	    xmlFree(buf);
5437 	} else {
5438 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5439 	}
5440 	if (ctxt->instate != XML_PARSER_EOF)
5441 	    ctxt->instate = state;
5442     }
5443 }
5444 
5445 /**
5446  * xmlParseNotationDecl:
5447  * @ctxt:  an XML parser context
5448  *
5449  * DEPRECATED: Internal function, don't use.
5450  *
5451  * Parse a notation declaration. Always consumes '<!'.
5452  *
5453  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5454  *
5455  * Hence there is actually 3 choices:
5456  *     'PUBLIC' S PubidLiteral
5457  *     'PUBLIC' S PubidLiteral S SystemLiteral
5458  * and 'SYSTEM' S SystemLiteral
5459  *
5460  * See the NOTE on xmlParseExternalID().
5461  */
5462 
5463 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5464 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5465     const xmlChar *name;
5466     xmlChar *Pubid;
5467     xmlChar *Systemid;
5468 
5469     if ((CUR != '<') || (NXT(1) != '!'))
5470         return;
5471     SKIP(2);
5472 
5473     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5474 	int inputid = ctxt->input->id;
5475 	SKIP(8);
5476 	if (SKIP_BLANKS == 0) {
5477 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478 			   "Space required after '<!NOTATION'\n");
5479 	    return;
5480 	}
5481 
5482         name = xmlParseName(ctxt);
5483 	if (name == NULL) {
5484 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5485 	    return;
5486 	}
5487 	if (xmlStrchr(name, ':') != NULL) {
5488 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5489 		     "colons are forbidden from notation names '%s'\n",
5490 		     name, NULL, NULL);
5491 	}
5492 	if (SKIP_BLANKS == 0) {
5493 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494 		     "Space required after the NOTATION name'\n");
5495 	    return;
5496 	}
5497 
5498 	/*
5499 	 * Parse the IDs.
5500 	 */
5501 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5502 	SKIP_BLANKS;
5503 
5504 	if (RAW == '>') {
5505 	    if (inputid != ctxt->input->id) {
5506 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5507 	                       "Notation declaration doesn't start and stop"
5508                                " in the same entity\n");
5509 	    }
5510 	    NEXT;
5511 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5512 		(ctxt->sax->notationDecl != NULL))
5513 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5514 	} else {
5515 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5516 	}
5517 	if (Systemid != NULL) xmlFree(Systemid);
5518 	if (Pubid != NULL) xmlFree(Pubid);
5519     }
5520 }
5521 
5522 /**
5523  * xmlParseEntityDecl:
5524  * @ctxt:  an XML parser context
5525  *
5526  * DEPRECATED: Internal function, don't use.
5527  *
5528  * Parse an entity declaration. Always consumes '<!'.
5529  *
5530  * [70] EntityDecl ::= GEDecl | PEDecl
5531  *
5532  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5533  *
5534  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5535  *
5536  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5537  *
5538  * [74] PEDef ::= EntityValue | ExternalID
5539  *
5540  * [76] NDataDecl ::= S 'NDATA' S Name
5541  *
5542  * [ VC: Notation Declared ]
5543  * The Name must match the declared name of a notation.
5544  */
5545 
5546 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5547 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5548     const xmlChar *name = NULL;
5549     xmlChar *value = NULL;
5550     xmlChar *URI = NULL, *literal = NULL;
5551     const xmlChar *ndata = NULL;
5552     int isParameter = 0;
5553     xmlChar *orig = NULL;
5554 
5555     if ((CUR != '<') || (NXT(1) != '!'))
5556         return;
5557     SKIP(2);
5558 
5559     /* GROW; done in the caller */
5560     if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5561 	int inputid = ctxt->input->id;
5562 	SKIP(6);
5563 	if (SKIP_BLANKS == 0) {
5564 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5565 			   "Space required after '<!ENTITY'\n");
5566 	}
5567 
5568 	if (RAW == '%') {
5569 	    NEXT;
5570 	    if (SKIP_BLANKS == 0) {
5571 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5572 			       "Space required after '%%'\n");
5573 	    }
5574 	    isParameter = 1;
5575 	}
5576 
5577         name = xmlParseName(ctxt);
5578 	if (name == NULL) {
5579 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5580 	                   "xmlParseEntityDecl: no name\n");
5581             return;
5582 	}
5583 	if (xmlStrchr(name, ':') != NULL) {
5584 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5585 		     "colons are forbidden from entities names '%s'\n",
5586 		     name, NULL, NULL);
5587 	}
5588 	if (SKIP_BLANKS == 0) {
5589 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5590 			   "Space required after the entity name\n");
5591 	}
5592 
5593 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5594 	/*
5595 	 * handle the various case of definitions...
5596 	 */
5597 	if (isParameter) {
5598 	    if ((RAW == '"') || (RAW == '\'')) {
5599 	        value = xmlParseEntityValue(ctxt, &orig);
5600 		if (value) {
5601 		    if ((ctxt->sax != NULL) &&
5602 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5603 			ctxt->sax->entityDecl(ctxt->userData, name,
5604 		                    XML_INTERNAL_PARAMETER_ENTITY,
5605 				    NULL, NULL, value);
5606 		}
5607 	    } else {
5608 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5609 		if ((URI == NULL) && (literal == NULL)) {
5610 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5611 		}
5612 		if (URI) {
5613 		    xmlURIPtr uri;
5614 
5615 		    uri = xmlParseURI((const char *) URI);
5616 		    if (uri == NULL) {
5617 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5618 				     "Invalid URI: %s\n", URI);
5619 			/*
5620 			 * This really ought to be a well formedness error
5621 			 * but the XML Core WG decided otherwise c.f. issue
5622 			 * E26 of the XML erratas.
5623 			 */
5624 		    } else {
5625 			if (uri->fragment != NULL) {
5626 			    /*
5627 			     * Okay this is foolish to block those but not
5628 			     * invalid URIs.
5629 			     */
5630 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5631 			} else {
5632 			    if ((ctxt->sax != NULL) &&
5633 				(!ctxt->disableSAX) &&
5634 				(ctxt->sax->entityDecl != NULL))
5635 				ctxt->sax->entityDecl(ctxt->userData, name,
5636 					    XML_EXTERNAL_PARAMETER_ENTITY,
5637 					    literal, URI, NULL);
5638 			}
5639 			xmlFreeURI(uri);
5640 		    }
5641 		}
5642 	    }
5643 	} else {
5644 	    if ((RAW == '"') || (RAW == '\'')) {
5645 	        value = xmlParseEntityValue(ctxt, &orig);
5646 		if ((ctxt->sax != NULL) &&
5647 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5648 		    ctxt->sax->entityDecl(ctxt->userData, name,
5649 				XML_INTERNAL_GENERAL_ENTITY,
5650 				NULL, NULL, value);
5651 		/*
5652 		 * For expat compatibility in SAX mode.
5653 		 */
5654 		if ((ctxt->myDoc == NULL) ||
5655 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5656 		    if (ctxt->myDoc == NULL) {
5657 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5658 			if (ctxt->myDoc == NULL) {
5659 			    xmlErrMemory(ctxt, "New Doc failed");
5660 			    goto done;
5661 			}
5662 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5663 		    }
5664 		    if (ctxt->myDoc->intSubset == NULL)
5665 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5666 					    BAD_CAST "fake", NULL, NULL);
5667 
5668 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5669 			              NULL, NULL, value);
5670 		}
5671 	    } else {
5672 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5673 		if ((URI == NULL) && (literal == NULL)) {
5674 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5675 		}
5676 		if (URI) {
5677 		    xmlURIPtr uri;
5678 
5679 		    uri = xmlParseURI((const char *)URI);
5680 		    if (uri == NULL) {
5681 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5682 				     "Invalid URI: %s\n", URI);
5683 			/*
5684 			 * This really ought to be a well formedness error
5685 			 * but the XML Core WG decided otherwise c.f. issue
5686 			 * E26 of the XML erratas.
5687 			 */
5688 		    } else {
5689 			if (uri->fragment != NULL) {
5690 			    /*
5691 			     * Okay this is foolish to block those but not
5692 			     * invalid URIs.
5693 			     */
5694 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5695 			}
5696 			xmlFreeURI(uri);
5697 		    }
5698 		}
5699 		if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5700 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 				   "Space required before 'NDATA'\n");
5702 		}
5703 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5704 		    SKIP(5);
5705 		    if (SKIP_BLANKS == 0) {
5706 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707 				       "Space required after 'NDATA'\n");
5708 		    }
5709 		    ndata = xmlParseName(ctxt);
5710 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5711 		        (ctxt->sax->unparsedEntityDecl != NULL))
5712 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5713 				    literal, URI, ndata);
5714 		} else {
5715 		    if ((ctxt->sax != NULL) &&
5716 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5717 			ctxt->sax->entityDecl(ctxt->userData, name,
5718 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5719 				    literal, URI, NULL);
5720 		    /*
5721 		     * For expat compatibility in SAX mode.
5722 		     * assuming the entity replacement was asked for
5723 		     */
5724 		    if ((ctxt->replaceEntities != 0) &&
5725 			((ctxt->myDoc == NULL) ||
5726 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5727 			if (ctxt->myDoc == NULL) {
5728 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5729 			    if (ctxt->myDoc == NULL) {
5730 			        xmlErrMemory(ctxt, "New Doc failed");
5731 				goto done;
5732 			    }
5733 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5734 			}
5735 
5736 			if (ctxt->myDoc->intSubset == NULL)
5737 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5738 						BAD_CAST "fake", NULL, NULL);
5739 			xmlSAX2EntityDecl(ctxt, name,
5740 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5741 				          literal, URI, NULL);
5742 		    }
5743 		}
5744 	    }
5745 	}
5746 	if (ctxt->instate == XML_PARSER_EOF)
5747 	    goto done;
5748 	SKIP_BLANKS;
5749 	if (RAW != '>') {
5750 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5751 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5752 	    xmlHaltParser(ctxt);
5753 	} else {
5754 	    if (inputid != ctxt->input->id) {
5755 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5756 	                       "Entity declaration doesn't start and stop in"
5757                                " the same entity\n");
5758 	    }
5759 	    NEXT;
5760 	}
5761 	if (orig != NULL) {
5762 	    /*
5763 	     * Ugly mechanism to save the raw entity value.
5764 	     */
5765 	    xmlEntityPtr cur = NULL;
5766 
5767 	    if (isParameter) {
5768 	        if ((ctxt->sax != NULL) &&
5769 		    (ctxt->sax->getParameterEntity != NULL))
5770 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5771 	    } else {
5772 	        if ((ctxt->sax != NULL) &&
5773 		    (ctxt->sax->getEntity != NULL))
5774 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5775 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5776 		    cur = xmlSAX2GetEntity(ctxt, name);
5777 		}
5778 	    }
5779             if ((cur != NULL) && (cur->orig == NULL)) {
5780 		cur->orig = orig;
5781                 orig = NULL;
5782 	    }
5783 	}
5784 
5785 done:
5786 	if (value != NULL) xmlFree(value);
5787 	if (URI != NULL) xmlFree(URI);
5788 	if (literal != NULL) xmlFree(literal);
5789         if (orig != NULL) xmlFree(orig);
5790     }
5791 }
5792 
5793 /**
5794  * xmlParseDefaultDecl:
5795  * @ctxt:  an XML parser context
5796  * @value:  Receive a possible fixed default value for the attribute
5797  *
5798  * DEPRECATED: Internal function, don't use.
5799  *
5800  * Parse an attribute default declaration
5801  *
5802  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5803  *
5804  * [ VC: Required Attribute ]
5805  * if the default declaration is the keyword #REQUIRED, then the
5806  * attribute must be specified for all elements of the type in the
5807  * attribute-list declaration.
5808  *
5809  * [ VC: Attribute Default Legal ]
5810  * The declared default value must meet the lexical constraints of
5811  * the declared attribute type c.f. xmlValidateAttributeDecl()
5812  *
5813  * [ VC: Fixed Attribute Default ]
5814  * if an attribute has a default value declared with the #FIXED
5815  * keyword, instances of that attribute must match the default value.
5816  *
5817  * [ WFC: No < in Attribute Values ]
5818  * handled in xmlParseAttValue()
5819  *
5820  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5821  *          or XML_ATTRIBUTE_FIXED.
5822  */
5823 
5824 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5825 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5826     int val;
5827     xmlChar *ret;
5828 
5829     *value = NULL;
5830     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5831 	SKIP(9);
5832 	return(XML_ATTRIBUTE_REQUIRED);
5833     }
5834     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5835 	SKIP(8);
5836 	return(XML_ATTRIBUTE_IMPLIED);
5837     }
5838     val = XML_ATTRIBUTE_NONE;
5839     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5840 	SKIP(6);
5841 	val = XML_ATTRIBUTE_FIXED;
5842 	if (SKIP_BLANKS == 0) {
5843 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5844 			   "Space required after '#FIXED'\n");
5845 	}
5846     }
5847     ret = xmlParseAttValue(ctxt);
5848     ctxt->instate = XML_PARSER_DTD;
5849     if (ret == NULL) {
5850 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5851 		       "Attribute default value declaration error\n");
5852     } else
5853         *value = ret;
5854     return(val);
5855 }
5856 
5857 /**
5858  * xmlParseNotationType:
5859  * @ctxt:  an XML parser context
5860  *
5861  * DEPRECATED: Internal function, don't use.
5862  *
5863  * parse an Notation attribute type.
5864  *
5865  * Note: the leading 'NOTATION' S part has already being parsed...
5866  *
5867  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5868  *
5869  * [ VC: Notation Attributes ]
5870  * Values of this type must match one of the notation names included
5871  * in the declaration; all notation names in the declaration must be declared.
5872  *
5873  * Returns: the notation attribute tree built while parsing
5874  */
5875 
5876 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5877 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5878     const xmlChar *name;
5879     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5880 
5881     if (RAW != '(') {
5882 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5883 	return(NULL);
5884     }
5885     do {
5886         NEXT;
5887 	SKIP_BLANKS;
5888         name = xmlParseName(ctxt);
5889 	if (name == NULL) {
5890 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5891 			   "Name expected in NOTATION declaration\n");
5892             xmlFreeEnumeration(ret);
5893 	    return(NULL);
5894 	}
5895 	tmp = ret;
5896 	while (tmp != NULL) {
5897 	    if (xmlStrEqual(name, tmp->name)) {
5898 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5899 	  "standalone: attribute notation value token %s duplicated\n",
5900 				 name, NULL);
5901 		if (!xmlDictOwns(ctxt->dict, name))
5902 		    xmlFree((xmlChar *) name);
5903 		break;
5904 	    }
5905 	    tmp = tmp->next;
5906 	}
5907 	if (tmp == NULL) {
5908 	    cur = xmlCreateEnumeration(name);
5909 	    if (cur == NULL) {
5910                 xmlFreeEnumeration(ret);
5911                 return(NULL);
5912             }
5913 	    if (last == NULL) ret = last = cur;
5914 	    else {
5915 		last->next = cur;
5916 		last = cur;
5917 	    }
5918 	}
5919 	SKIP_BLANKS;
5920     } while (RAW == '|');
5921     if (RAW != ')') {
5922 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5923         xmlFreeEnumeration(ret);
5924 	return(NULL);
5925     }
5926     NEXT;
5927     return(ret);
5928 }
5929 
5930 /**
5931  * xmlParseEnumerationType:
5932  * @ctxt:  an XML parser context
5933  *
5934  * DEPRECATED: Internal function, don't use.
5935  *
5936  * parse an Enumeration attribute type.
5937  *
5938  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5939  *
5940  * [ VC: Enumeration ]
5941  * Values of this type must match one of the Nmtoken tokens in
5942  * the declaration
5943  *
5944  * Returns: the enumeration attribute tree built while parsing
5945  */
5946 
5947 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5948 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5949     xmlChar *name;
5950     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5951 
5952     if (RAW != '(') {
5953 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5954 	return(NULL);
5955     }
5956     do {
5957         NEXT;
5958 	SKIP_BLANKS;
5959         name = xmlParseNmtoken(ctxt);
5960 	if (name == NULL) {
5961 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5962 	    return(ret);
5963 	}
5964 	tmp = ret;
5965 	while (tmp != NULL) {
5966 	    if (xmlStrEqual(name, tmp->name)) {
5967 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5968 	  "standalone: attribute enumeration value token %s duplicated\n",
5969 				 name, NULL);
5970 		if (!xmlDictOwns(ctxt->dict, name))
5971 		    xmlFree(name);
5972 		break;
5973 	    }
5974 	    tmp = tmp->next;
5975 	}
5976 	if (tmp == NULL) {
5977 	    cur = xmlCreateEnumeration(name);
5978 	    if (!xmlDictOwns(ctxt->dict, name))
5979 		xmlFree(name);
5980 	    if (cur == NULL) {
5981                 xmlFreeEnumeration(ret);
5982                 return(NULL);
5983             }
5984 	    if (last == NULL) ret = last = cur;
5985 	    else {
5986 		last->next = cur;
5987 		last = cur;
5988 	    }
5989 	}
5990 	SKIP_BLANKS;
5991     } while (RAW == '|');
5992     if (RAW != ')') {
5993 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5994 	return(ret);
5995     }
5996     NEXT;
5997     return(ret);
5998 }
5999 
6000 /**
6001  * xmlParseEnumeratedType:
6002  * @ctxt:  an XML parser context
6003  * @tree:  the enumeration tree built while parsing
6004  *
6005  * DEPRECATED: Internal function, don't use.
6006  *
6007  * parse an Enumerated attribute type.
6008  *
6009  * [57] EnumeratedType ::= NotationType | Enumeration
6010  *
6011  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6012  *
6013  *
6014  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6015  */
6016 
6017 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6018 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6019     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6020 	SKIP(8);
6021 	if (SKIP_BLANKS == 0) {
6022 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023 			   "Space required after 'NOTATION'\n");
6024 	    return(0);
6025 	}
6026 	*tree = xmlParseNotationType(ctxt);
6027 	if (*tree == NULL) return(0);
6028 	return(XML_ATTRIBUTE_NOTATION);
6029     }
6030     *tree = xmlParseEnumerationType(ctxt);
6031     if (*tree == NULL) return(0);
6032     return(XML_ATTRIBUTE_ENUMERATION);
6033 }
6034 
6035 /**
6036  * xmlParseAttributeType:
6037  * @ctxt:  an XML parser context
6038  * @tree:  the enumeration tree built while parsing
6039  *
6040  * DEPRECATED: Internal function, don't use.
6041  *
6042  * parse the Attribute list def for an element
6043  *
6044  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6045  *
6046  * [55] StringType ::= 'CDATA'
6047  *
6048  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6049  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6050  *
6051  * Validity constraints for attribute values syntax are checked in
6052  * xmlValidateAttributeValue()
6053  *
6054  * [ VC: ID ]
6055  * Values of type ID must match the Name production. A name must not
6056  * appear more than once in an XML document as a value of this type;
6057  * i.e., ID values must uniquely identify the elements which bear them.
6058  *
6059  * [ VC: One ID per Element Type ]
6060  * No element type may have more than one ID attribute specified.
6061  *
6062  * [ VC: ID Attribute Default ]
6063  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6064  *
6065  * [ VC: IDREF ]
6066  * Values of type IDREF must match the Name production, and values
6067  * of type IDREFS must match Names; each IDREF Name must match the value
6068  * of an ID attribute on some element in the XML document; i.e. IDREF
6069  * values must match the value of some ID attribute.
6070  *
6071  * [ VC: Entity Name ]
6072  * Values of type ENTITY must match the Name production, values
6073  * of type ENTITIES must match Names; each Entity Name must match the
6074  * name of an unparsed entity declared in the DTD.
6075  *
6076  * [ VC: Name Token ]
6077  * Values of type NMTOKEN must match the Nmtoken production; values
6078  * of type NMTOKENS must match Nmtokens.
6079  *
6080  * Returns the attribute type
6081  */
6082 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6083 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6084     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6085 	SKIP(5);
6086 	return(XML_ATTRIBUTE_CDATA);
6087      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6088 	SKIP(6);
6089 	return(XML_ATTRIBUTE_IDREFS);
6090      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6091 	SKIP(5);
6092 	return(XML_ATTRIBUTE_IDREF);
6093      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6094         SKIP(2);
6095 	return(XML_ATTRIBUTE_ID);
6096      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6097 	SKIP(6);
6098 	return(XML_ATTRIBUTE_ENTITY);
6099      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6100 	SKIP(8);
6101 	return(XML_ATTRIBUTE_ENTITIES);
6102      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6103 	SKIP(8);
6104 	return(XML_ATTRIBUTE_NMTOKENS);
6105      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6106 	SKIP(7);
6107 	return(XML_ATTRIBUTE_NMTOKEN);
6108      }
6109      return(xmlParseEnumeratedType(ctxt, tree));
6110 }
6111 
6112 /**
6113  * xmlParseAttributeListDecl:
6114  * @ctxt:  an XML parser context
6115  *
6116  * DEPRECATED: Internal function, don't use.
6117  *
6118  * Parse an attribute list declaration for an element. Always consumes '<!'.
6119  *
6120  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6121  *
6122  * [53] AttDef ::= S Name S AttType S DefaultDecl
6123  *
6124  */
6125 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6126 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6127     const xmlChar *elemName;
6128     const xmlChar *attrName;
6129     xmlEnumerationPtr tree;
6130 
6131     if ((CUR != '<') || (NXT(1) != '!'))
6132         return;
6133     SKIP(2);
6134 
6135     if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6136 	int inputid = ctxt->input->id;
6137 
6138 	SKIP(7);
6139 	if (SKIP_BLANKS == 0) {
6140 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6141 		                 "Space required after '<!ATTLIST'\n");
6142 	}
6143         elemName = xmlParseName(ctxt);
6144 	if (elemName == NULL) {
6145 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6146 			   "ATTLIST: no name for Element\n");
6147 	    return;
6148 	}
6149 	SKIP_BLANKS;
6150 	GROW;
6151 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6152 	    int type;
6153 	    int def;
6154 	    xmlChar *defaultValue = NULL;
6155 
6156 	    GROW;
6157             tree = NULL;
6158 	    attrName = xmlParseName(ctxt);
6159 	    if (attrName == NULL) {
6160 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6161 			       "ATTLIST: no name for Attribute\n");
6162 		break;
6163 	    }
6164 	    GROW;
6165 	    if (SKIP_BLANKS == 0) {
6166 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6167 		        "Space required after the attribute name\n");
6168 		break;
6169 	    }
6170 
6171 	    type = xmlParseAttributeType(ctxt, &tree);
6172 	    if (type <= 0) {
6173 	        break;
6174 	    }
6175 
6176 	    GROW;
6177 	    if (SKIP_BLANKS == 0) {
6178 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6179 			       "Space required after the attribute type\n");
6180 	        if (tree != NULL)
6181 		    xmlFreeEnumeration(tree);
6182 		break;
6183 	    }
6184 
6185 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6186 	    if (def <= 0) {
6187                 if (defaultValue != NULL)
6188 		    xmlFree(defaultValue);
6189 	        if (tree != NULL)
6190 		    xmlFreeEnumeration(tree);
6191 	        break;
6192 	    }
6193 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6194 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6195 
6196 	    GROW;
6197             if (RAW != '>') {
6198 		if (SKIP_BLANKS == 0) {
6199 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6200 			"Space required after the attribute default value\n");
6201 		    if (defaultValue != NULL)
6202 			xmlFree(defaultValue);
6203 		    if (tree != NULL)
6204 			xmlFreeEnumeration(tree);
6205 		    break;
6206 		}
6207 	    }
6208 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6209 		(ctxt->sax->attributeDecl != NULL))
6210 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6211 	                        type, def, defaultValue, tree);
6212 	    else if (tree != NULL)
6213 		xmlFreeEnumeration(tree);
6214 
6215 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6216 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6217 		(def != XML_ATTRIBUTE_REQUIRED)) {
6218 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6219 	    }
6220 	    if (ctxt->sax2) {
6221 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6222 	    }
6223 	    if (defaultValue != NULL)
6224 	        xmlFree(defaultValue);
6225 	    GROW;
6226 	}
6227 	if (RAW == '>') {
6228 	    if (inputid != ctxt->input->id) {
6229 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6230                                "Attribute list declaration doesn't start and"
6231                                " stop in the same entity\n");
6232 	    }
6233 	    NEXT;
6234 	}
6235     }
6236 }
6237 
6238 /**
6239  * xmlParseElementMixedContentDecl:
6240  * @ctxt:  an XML parser context
6241  * @inputchk:  the input used for the current entity, needed for boundary checks
6242  *
6243  * DEPRECATED: Internal function, don't use.
6244  *
6245  * parse the declaration for a Mixed Element content
6246  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6247  *
6248  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6249  *                '(' S? '#PCDATA' S? ')'
6250  *
6251  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6252  *
6253  * [ VC: No Duplicate Types ]
6254  * The same name must not appear more than once in a single
6255  * mixed-content declaration.
6256  *
6257  * returns: the list of the xmlElementContentPtr describing the element choices
6258  */
6259 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6260 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6261     xmlElementContentPtr ret = NULL, cur = NULL, n;
6262     const xmlChar *elem = NULL;
6263 
6264     GROW;
6265     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6266 	SKIP(7);
6267 	SKIP_BLANKS;
6268 	if (RAW == ')') {
6269 	    if (ctxt->input->id != inputchk) {
6270 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6271                                "Element content declaration doesn't start and"
6272                                " stop in the same entity\n");
6273 	    }
6274 	    NEXT;
6275 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6276 	    if (ret == NULL)
6277 	        return(NULL);
6278 	    if (RAW == '*') {
6279 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6280 		NEXT;
6281 	    }
6282 	    return(ret);
6283 	}
6284 	if ((RAW == '(') || (RAW == '|')) {
6285 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6286 	    if (ret == NULL) return(NULL);
6287 	}
6288 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6289 	    NEXT;
6290 	    if (elem == NULL) {
6291 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6292 		if (ret == NULL) {
6293 		    xmlFreeDocElementContent(ctxt->myDoc, cur);
6294                     return(NULL);
6295                 }
6296 		ret->c1 = cur;
6297 		if (cur != NULL)
6298 		    cur->parent = ret;
6299 		cur = ret;
6300 	    } else {
6301 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6302 		if (n == NULL) {
6303 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6304                     return(NULL);
6305                 }
6306 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307 		if (n->c1 != NULL)
6308 		    n->c1->parent = n;
6309 	        cur->c2 = n;
6310 		if (n != NULL)
6311 		    n->parent = cur;
6312 		cur = n;
6313 	    }
6314 	    SKIP_BLANKS;
6315 	    elem = xmlParseName(ctxt);
6316 	    if (elem == NULL) {
6317 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6318 			"xmlParseElementMixedContentDecl : Name expected\n");
6319 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6320 		return(NULL);
6321 	    }
6322 	    SKIP_BLANKS;
6323 	    GROW;
6324 	}
6325 	if ((RAW == ')') && (NXT(1) == '*')) {
6326 	    if (elem != NULL) {
6327 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6328 		                               XML_ELEMENT_CONTENT_ELEMENT);
6329 		if (cur->c2 != NULL)
6330 		    cur->c2->parent = cur;
6331             }
6332             if (ret != NULL)
6333                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6334 	    if (ctxt->input->id != inputchk) {
6335 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6336                                "Element content declaration doesn't start and"
6337                                " stop in the same entity\n");
6338 	    }
6339 	    SKIP(2);
6340 	} else {
6341 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6342 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6343 	    return(NULL);
6344 	}
6345 
6346     } else {
6347 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6348     }
6349     return(ret);
6350 }
6351 
6352 /**
6353  * xmlParseElementChildrenContentDeclPriv:
6354  * @ctxt:  an XML parser context
6355  * @inputchk:  the input used for the current entity, needed for boundary checks
6356  * @depth: the level of recursion
6357  *
6358  * parse the declaration for a Mixed Element content
6359  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6360  *
6361  *
6362  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6363  *
6364  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6365  *
6366  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6367  *
6368  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6369  *
6370  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6371  * TODO Parameter-entity replacement text must be properly nested
6372  *	with parenthesized groups. That is to say, if either of the
6373  *	opening or closing parentheses in a choice, seq, or Mixed
6374  *	construct is contained in the replacement text for a parameter
6375  *	entity, both must be contained in the same replacement text. For
6376  *	interoperability, if a parameter-entity reference appears in a
6377  *	choice, seq, or Mixed construct, its replacement text should not
6378  *	be empty, and neither the first nor last non-blank character of
6379  *	the replacement text should be a connector (| or ,).
6380  *
6381  * Returns the tree of xmlElementContentPtr describing the element
6382  *          hierarchy.
6383  */
6384 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6385 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6386                                        int depth) {
6387     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6388     const xmlChar *elem;
6389     xmlChar type = 0;
6390 
6391     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6392         (depth >  2048)) {
6393         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6394 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6395                           depth);
6396 	return(NULL);
6397     }
6398     SKIP_BLANKS;
6399     GROW;
6400     if (RAW == '(') {
6401 	int inputid = ctxt->input->id;
6402 
6403         /* Recurse on first child */
6404 	NEXT;
6405 	SKIP_BLANKS;
6406         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6407                                                            depth + 1);
6408         if (cur == NULL)
6409             return(NULL);
6410 	SKIP_BLANKS;
6411 	GROW;
6412     } else {
6413 	elem = xmlParseName(ctxt);
6414 	if (elem == NULL) {
6415 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6416 	    return(NULL);
6417 	}
6418         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6419 	if (cur == NULL) {
6420 	    xmlErrMemory(ctxt, NULL);
6421 	    return(NULL);
6422 	}
6423 	GROW;
6424 	if (RAW == '?') {
6425 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6426 	    NEXT;
6427 	} else if (RAW == '*') {
6428 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6429 	    NEXT;
6430 	} else if (RAW == '+') {
6431 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6432 	    NEXT;
6433 	} else {
6434 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6435 	}
6436 	GROW;
6437     }
6438     SKIP_BLANKS;
6439     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6440         /*
6441 	 * Each loop we parse one separator and one element.
6442 	 */
6443         if (RAW == ',') {
6444 	    if (type == 0) type = CUR;
6445 
6446 	    /*
6447 	     * Detect "Name | Name , Name" error
6448 	     */
6449 	    else if (type != CUR) {
6450 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6451 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6452 		                  type);
6453 		if ((last != NULL) && (last != ret))
6454 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6455 		if (ret != NULL)
6456 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6457 		return(NULL);
6458 	    }
6459 	    NEXT;
6460 
6461 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6462 	    if (op == NULL) {
6463 		if ((last != NULL) && (last != ret))
6464 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6465 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6466 		return(NULL);
6467 	    }
6468 	    if (last == NULL) {
6469 		op->c1 = ret;
6470 		if (ret != NULL)
6471 		    ret->parent = op;
6472 		ret = cur = op;
6473 	    } else {
6474 	        cur->c2 = op;
6475 		if (op != NULL)
6476 		    op->parent = cur;
6477 		op->c1 = last;
6478 		if (last != NULL)
6479 		    last->parent = op;
6480 		cur =op;
6481 		last = NULL;
6482 	    }
6483 	} else if (RAW == '|') {
6484 	    if (type == 0) type = CUR;
6485 
6486 	    /*
6487 	     * Detect "Name , Name | Name" error
6488 	     */
6489 	    else if (type != CUR) {
6490 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6491 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6492 				  type);
6493 		if ((last != NULL) && (last != ret))
6494 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6495 		if (ret != NULL)
6496 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6497 		return(NULL);
6498 	    }
6499 	    NEXT;
6500 
6501 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6502 	    if (op == NULL) {
6503 		if ((last != NULL) && (last != ret))
6504 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6505 		if (ret != NULL)
6506 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6507 		return(NULL);
6508 	    }
6509 	    if (last == NULL) {
6510 		op->c1 = ret;
6511 		if (ret != NULL)
6512 		    ret->parent = op;
6513 		ret = cur = op;
6514 	    } else {
6515 	        cur->c2 = op;
6516 		if (op != NULL)
6517 		    op->parent = cur;
6518 		op->c1 = last;
6519 		if (last != NULL)
6520 		    last->parent = op;
6521 		cur =op;
6522 		last = NULL;
6523 	    }
6524 	} else {
6525 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6526 	    if ((last != NULL) && (last != ret))
6527 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6528 	    if (ret != NULL)
6529 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6530 	    return(NULL);
6531 	}
6532 	GROW;
6533 	SKIP_BLANKS;
6534 	GROW;
6535 	if (RAW == '(') {
6536 	    int inputid = ctxt->input->id;
6537 	    /* Recurse on second child */
6538 	    NEXT;
6539 	    SKIP_BLANKS;
6540 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6541                                                           depth + 1);
6542             if (last == NULL) {
6543 		if (ret != NULL)
6544 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6545 		return(NULL);
6546             }
6547 	    SKIP_BLANKS;
6548 	} else {
6549 	    elem = xmlParseName(ctxt);
6550 	    if (elem == NULL) {
6551 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6552 		if (ret != NULL)
6553 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6554 		return(NULL);
6555 	    }
6556 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6557 	    if (last == NULL) {
6558 		if (ret != NULL)
6559 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6560 		return(NULL);
6561 	    }
6562 	    if (RAW == '?') {
6563 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6564 		NEXT;
6565 	    } else if (RAW == '*') {
6566 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6567 		NEXT;
6568 	    } else if (RAW == '+') {
6569 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6570 		NEXT;
6571 	    } else {
6572 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6573 	    }
6574 	}
6575 	SKIP_BLANKS;
6576 	GROW;
6577     }
6578     if ((cur != NULL) && (last != NULL)) {
6579         cur->c2 = last;
6580 	if (last != NULL)
6581 	    last->parent = cur;
6582     }
6583     if (ctxt->input->id != inputchk) {
6584 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6585                        "Element content declaration doesn't start and stop in"
6586                        " the same entity\n");
6587     }
6588     NEXT;
6589     if (RAW == '?') {
6590 	if (ret != NULL) {
6591 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6592 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6593 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6594 	    else
6595 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6596 	}
6597 	NEXT;
6598     } else if (RAW == '*') {
6599 	if (ret != NULL) {
6600 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6601 	    cur = ret;
6602 	    /*
6603 	     * Some normalization:
6604 	     * (a | b* | c?)* == (a | b | c)*
6605 	     */
6606 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6607 		if ((cur->c1 != NULL) &&
6608 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6609 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6610 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6611 		if ((cur->c2 != NULL) &&
6612 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6613 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6614 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6615 		cur = cur->c2;
6616 	    }
6617 	}
6618 	NEXT;
6619     } else if (RAW == '+') {
6620 	if (ret != NULL) {
6621 	    int found = 0;
6622 
6623 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6624 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6625 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6626 	    else
6627 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6628 	    /*
6629 	     * Some normalization:
6630 	     * (a | b*)+ == (a | b)*
6631 	     * (a | b?)+ == (a | b)*
6632 	     */
6633 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6634 		if ((cur->c1 != NULL) &&
6635 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6636 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6637 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6638 		    found = 1;
6639 		}
6640 		if ((cur->c2 != NULL) &&
6641 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6642 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6643 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6644 		    found = 1;
6645 		}
6646 		cur = cur->c2;
6647 	    }
6648 	    if (found)
6649 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6650 	}
6651 	NEXT;
6652     }
6653     return(ret);
6654 }
6655 
6656 /**
6657  * xmlParseElementChildrenContentDecl:
6658  * @ctxt:  an XML parser context
6659  * @inputchk:  the input used for the current entity, needed for boundary checks
6660  *
6661  * DEPRECATED: Internal function, don't use.
6662  *
6663  * parse the declaration for a Mixed Element content
6664  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6665  *
6666  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6667  *
6668  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6669  *
6670  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6671  *
6672  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6673  *
6674  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6675  * TODO Parameter-entity replacement text must be properly nested
6676  *	with parenthesized groups. That is to say, if either of the
6677  *	opening or closing parentheses in a choice, seq, or Mixed
6678  *	construct is contained in the replacement text for a parameter
6679  *	entity, both must be contained in the same replacement text. For
6680  *	interoperability, if a parameter-entity reference appears in a
6681  *	choice, seq, or Mixed construct, its replacement text should not
6682  *	be empty, and neither the first nor last non-blank character of
6683  *	the replacement text should be a connector (| or ,).
6684  *
6685  * Returns the tree of xmlElementContentPtr describing the element
6686  *          hierarchy.
6687  */
6688 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6689 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6690     /* stub left for API/ABI compat */
6691     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6692 }
6693 
6694 /**
6695  * xmlParseElementContentDecl:
6696  * @ctxt:  an XML parser context
6697  * @name:  the name of the element being defined.
6698  * @result:  the Element Content pointer will be stored here if any
6699  *
6700  * DEPRECATED: Internal function, don't use.
6701  *
6702  * parse the declaration for an Element content either Mixed or Children,
6703  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6704  *
6705  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6706  *
6707  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6708  */
6709 
6710 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6711 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6712                            xmlElementContentPtr *result) {
6713 
6714     xmlElementContentPtr tree = NULL;
6715     int inputid = ctxt->input->id;
6716     int res;
6717 
6718     *result = NULL;
6719 
6720     if (RAW != '(') {
6721 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6722 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6723 	return(-1);
6724     }
6725     NEXT;
6726     GROW;
6727     if (ctxt->instate == XML_PARSER_EOF)
6728         return(-1);
6729     SKIP_BLANKS;
6730     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6731         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6732 	res = XML_ELEMENT_TYPE_MIXED;
6733     } else {
6734         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6735 	res = XML_ELEMENT_TYPE_ELEMENT;
6736     }
6737     SKIP_BLANKS;
6738     *result = tree;
6739     return(res);
6740 }
6741 
6742 /**
6743  * xmlParseElementDecl:
6744  * @ctxt:  an XML parser context
6745  *
6746  * DEPRECATED: Internal function, don't use.
6747  *
6748  * Parse an element declaration. Always consumes '<!'.
6749  *
6750  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6751  *
6752  * [ VC: Unique Element Type Declaration ]
6753  * No element type may be declared more than once
6754  *
6755  * Returns the type of the element, or -1 in case of error
6756  */
6757 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6758 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6759     const xmlChar *name;
6760     int ret = -1;
6761     xmlElementContentPtr content  = NULL;
6762 
6763     if ((CUR != '<') || (NXT(1) != '!'))
6764         return(ret);
6765     SKIP(2);
6766 
6767     /* GROW; done in the caller */
6768     if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6769 	int inputid = ctxt->input->id;
6770 
6771 	SKIP(7);
6772 	if (SKIP_BLANKS == 0) {
6773 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6774 		           "Space required after 'ELEMENT'\n");
6775 	    return(-1);
6776 	}
6777         name = xmlParseName(ctxt);
6778 	if (name == NULL) {
6779 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6780 			   "xmlParseElementDecl: no name for Element\n");
6781 	    return(-1);
6782 	}
6783 	if (SKIP_BLANKS == 0) {
6784 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6785 			   "Space required after the element name\n");
6786 	}
6787 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6788 	    SKIP(5);
6789 	    /*
6790 	     * Element must always be empty.
6791 	     */
6792 	    ret = XML_ELEMENT_TYPE_EMPTY;
6793 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6794 	           (NXT(2) == 'Y')) {
6795 	    SKIP(3);
6796 	    /*
6797 	     * Element is a generic container.
6798 	     */
6799 	    ret = XML_ELEMENT_TYPE_ANY;
6800 	} else if (RAW == '(') {
6801 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6802 	} else {
6803 	    /*
6804 	     * [ WFC: PEs in Internal Subset ] error handling.
6805 	     */
6806 	    if ((RAW == '%') && (ctxt->external == 0) &&
6807 	        (ctxt->inputNr == 1)) {
6808 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6809 	  "PEReference: forbidden within markup decl in internal subset\n");
6810 	    } else {
6811 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6812 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6813             }
6814 	    return(-1);
6815 	}
6816 
6817 	SKIP_BLANKS;
6818 
6819 	if (RAW != '>') {
6820 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6821 	    if (content != NULL) {
6822 		xmlFreeDocElementContent(ctxt->myDoc, content);
6823 	    }
6824 	} else {
6825 	    if (inputid != ctxt->input->id) {
6826 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6827                                "Element declaration doesn't start and stop in"
6828                                " the same entity\n");
6829 	    }
6830 
6831 	    NEXT;
6832 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6833 		(ctxt->sax->elementDecl != NULL)) {
6834 		if (content != NULL)
6835 		    content->parent = NULL;
6836 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6837 		                       content);
6838 		if ((content != NULL) && (content->parent == NULL)) {
6839 		    /*
6840 		     * this is a trick: if xmlAddElementDecl is called,
6841 		     * instead of copying the full tree it is plugged directly
6842 		     * if called from the parser. Avoid duplicating the
6843 		     * interfaces or change the API/ABI
6844 		     */
6845 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6846 		}
6847 	    } else if (content != NULL) {
6848 		xmlFreeDocElementContent(ctxt->myDoc, content);
6849 	    }
6850 	}
6851     }
6852     return(ret);
6853 }
6854 
6855 /**
6856  * xmlParseConditionalSections
6857  * @ctxt:  an XML parser context
6858  *
6859  * Parse a conditional section. Always consumes '<!['.
6860  *
6861  * [61] conditionalSect ::= includeSect | ignoreSect
6862  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6863  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6864  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6865  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6866  */
6867 
6868 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6869 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6870     int *inputIds = NULL;
6871     size_t inputIdsSize = 0;
6872     size_t depth = 0;
6873 
6874     while (ctxt->instate != XML_PARSER_EOF) {
6875         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6876             int id = ctxt->input->id;
6877 
6878             SKIP(3);
6879             SKIP_BLANKS;
6880 
6881             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6882                 SKIP(7);
6883                 SKIP_BLANKS;
6884                 if (RAW != '[') {
6885                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6886                     xmlHaltParser(ctxt);
6887                     goto error;
6888                 }
6889                 if (ctxt->input->id != id) {
6890                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6891                                    "All markup of the conditional section is"
6892                                    " not in the same entity\n");
6893                 }
6894                 NEXT;
6895 
6896                 if (inputIdsSize <= depth) {
6897                     int *tmp;
6898 
6899                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6900                     tmp = (int *) xmlRealloc(inputIds,
6901                             inputIdsSize * sizeof(int));
6902                     if (tmp == NULL) {
6903                         xmlErrMemory(ctxt, NULL);
6904                         goto error;
6905                     }
6906                     inputIds = tmp;
6907                 }
6908                 inputIds[depth] = id;
6909                 depth++;
6910             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6911                 size_t ignoreDepth = 0;
6912 
6913                 SKIP(6);
6914                 SKIP_BLANKS;
6915                 if (RAW != '[') {
6916                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6917                     xmlHaltParser(ctxt);
6918                     goto error;
6919                 }
6920                 if (ctxt->input->id != id) {
6921                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6922                                    "All markup of the conditional section is"
6923                                    " not in the same entity\n");
6924                 }
6925                 NEXT;
6926 
6927                 while (RAW != 0) {
6928                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6929                         SKIP(3);
6930                         ignoreDepth++;
6931                         /* Check for integer overflow */
6932                         if (ignoreDepth == 0) {
6933                             xmlErrMemory(ctxt, NULL);
6934                             goto error;
6935                         }
6936                     } else if ((RAW == ']') && (NXT(1) == ']') &&
6937                                (NXT(2) == '>')) {
6938                         if (ignoreDepth == 0)
6939                             break;
6940                         SKIP(3);
6941                         ignoreDepth--;
6942                     } else {
6943                         NEXT;
6944                     }
6945                 }
6946 
6947 		if (RAW == 0) {
6948 		    xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6949                     goto error;
6950 		}
6951                 if (ctxt->input->id != id) {
6952                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6953                                    "All markup of the conditional section is"
6954                                    " not in the same entity\n");
6955                 }
6956                 SKIP(3);
6957             } else {
6958                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6959                 xmlHaltParser(ctxt);
6960                 goto error;
6961             }
6962         } else if ((depth > 0) &&
6963                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6964             depth--;
6965             if (ctxt->input->id != inputIds[depth]) {
6966                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6967                                "All markup of the conditional section is not"
6968                                " in the same entity\n");
6969             }
6970             SKIP(3);
6971         } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6972             xmlParseMarkupDecl(ctxt);
6973         } else {
6974             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6975             xmlHaltParser(ctxt);
6976             goto error;
6977         }
6978 
6979         if (depth == 0)
6980             break;
6981 
6982         SKIP_BLANKS;
6983         SHRINK;
6984         GROW;
6985     }
6986 
6987 error:
6988     xmlFree(inputIds);
6989 }
6990 
6991 /**
6992  * xmlParseMarkupDecl:
6993  * @ctxt:  an XML parser context
6994  *
6995  * DEPRECATED: Internal function, don't use.
6996  *
6997  * Parse markup declarations. Always consumes '<!' or '<?'.
6998  *
6999  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7000  *                     NotationDecl | PI | Comment
7001  *
7002  * [ VC: Proper Declaration/PE Nesting ]
7003  * Parameter-entity replacement text must be properly nested with
7004  * markup declarations. That is to say, if either the first character
7005  * or the last character of a markup declaration (markupdecl above) is
7006  * contained in the replacement text for a parameter-entity reference,
7007  * both must be contained in the same replacement text.
7008  *
7009  * [ WFC: PEs in Internal Subset ]
7010  * In the internal DTD subset, parameter-entity references can occur
7011  * only where markup declarations can occur, not within markup declarations.
7012  * (This does not apply to references that occur in external parameter
7013  * entities or to the external subset.)
7014  */
7015 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)7016 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7017     GROW;
7018     if (CUR == '<') {
7019         if (NXT(1) == '!') {
7020 	    switch (NXT(2)) {
7021 	        case 'E':
7022 		    if (NXT(3) == 'L')
7023 			xmlParseElementDecl(ctxt);
7024 		    else if (NXT(3) == 'N')
7025 			xmlParseEntityDecl(ctxt);
7026                     else
7027                         SKIP(2);
7028 		    break;
7029 	        case 'A':
7030 		    xmlParseAttributeListDecl(ctxt);
7031 		    break;
7032 	        case 'N':
7033 		    xmlParseNotationDecl(ctxt);
7034 		    break;
7035 	        case '-':
7036 		    xmlParseComment(ctxt);
7037 		    break;
7038 		default:
7039 		    /* there is an error but it will be detected later */
7040                     SKIP(2);
7041 		    break;
7042 	    }
7043 	} else if (NXT(1) == '?') {
7044 	    xmlParsePI(ctxt);
7045 	}
7046     }
7047 
7048     /*
7049      * detect requirement to exit there and act accordingly
7050      * and avoid having instate overridden later on
7051      */
7052     if (ctxt->instate == XML_PARSER_EOF)
7053         return;
7054 
7055     ctxt->instate = XML_PARSER_DTD;
7056 }
7057 
7058 /**
7059  * xmlParseTextDecl:
7060  * @ctxt:  an XML parser context
7061  *
7062  * DEPRECATED: Internal function, don't use.
7063  *
7064  * parse an XML declaration header for external entities
7065  *
7066  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7067  */
7068 
7069 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7070 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7071     xmlChar *version;
7072     int oldstate;
7073 
7074     /*
7075      * We know that '<?xml' is here.
7076      */
7077     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7078 	SKIP(5);
7079     } else {
7080 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7081 	return;
7082     }
7083 
7084     /* Avoid expansion of parameter entities when skipping blanks. */
7085     oldstate = ctxt->instate;
7086     ctxt->instate = XML_PARSER_START;
7087 
7088     if (SKIP_BLANKS == 0) {
7089 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7090 		       "Space needed after '<?xml'\n");
7091     }
7092 
7093     /*
7094      * We may have the VersionInfo here.
7095      */
7096     version = xmlParseVersionInfo(ctxt);
7097     if (version == NULL)
7098 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
7099     else {
7100 	if (SKIP_BLANKS == 0) {
7101 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7102 		           "Space needed here\n");
7103 	}
7104     }
7105     ctxt->input->version = version;
7106 
7107     /*
7108      * We must have the encoding declaration
7109      */
7110     xmlParseEncodingDecl(ctxt);
7111     if (ctxt->instate == XML_PARSER_EOF)
7112         return;
7113     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7114 	/*
7115 	 * The XML REC instructs us to stop parsing right here
7116 	 */
7117         ctxt->instate = oldstate;
7118         return;
7119     }
7120 
7121     SKIP_BLANKS;
7122     if ((RAW == '?') && (NXT(1) == '>')) {
7123         SKIP(2);
7124     } else if (RAW == '>') {
7125         /* Deprecated old WD ... */
7126 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7127 	NEXT;
7128     } else {
7129         int c;
7130 
7131 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7132         while ((c = CUR) != 0) {
7133             NEXT;
7134             if (c == '>')
7135                 break;
7136         }
7137         if (ctxt->instate == XML_PARSER_EOF)
7138             return;
7139     }
7140 
7141     ctxt->instate = oldstate;
7142 }
7143 
7144 /**
7145  * xmlParseExternalSubset:
7146  * @ctxt:  an XML parser context
7147  * @ExternalID: the external identifier
7148  * @SystemID: the system identifier (or URL)
7149  *
7150  * parse Markup declarations from an external subset
7151  *
7152  * [30] extSubset ::= textDecl? extSubsetDecl
7153  *
7154  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7155  */
7156 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7157 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7158                        const xmlChar *SystemID) {
7159     xmlDetectSAX2(ctxt);
7160 
7161     xmlDetectEncoding(ctxt);
7162 
7163     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7164 	xmlParseTextDecl(ctxt);
7165 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7166 	    /*
7167 	     * The XML REC instructs us to stop parsing right here
7168 	     */
7169 	    xmlHaltParser(ctxt);
7170 	    return;
7171 	}
7172     }
7173     if (ctxt->myDoc == NULL) {
7174         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7175 	if (ctxt->myDoc == NULL) {
7176 	    xmlErrMemory(ctxt, "New Doc failed");
7177 	    return;
7178 	}
7179 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7180     }
7181     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7182         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7183 
7184     ctxt->instate = XML_PARSER_DTD;
7185     ctxt->external = 1;
7186     SKIP_BLANKS;
7187     while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7188 	GROW;
7189         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7190             xmlParseConditionalSections(ctxt);
7191         } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7192             xmlParseMarkupDecl(ctxt);
7193         } else {
7194             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7195             xmlHaltParser(ctxt);
7196             return;
7197         }
7198         SKIP_BLANKS;
7199         SHRINK;
7200     }
7201 
7202     if (RAW != 0) {
7203 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7204     }
7205 
7206 }
7207 
7208 /**
7209  * xmlParseReference:
7210  * @ctxt:  an XML parser context
7211  *
7212  * DEPRECATED: Internal function, don't use.
7213  *
7214  * parse and handle entity references in content, depending on the SAX
7215  * interface, this may end-up in a call to character() if this is a
7216  * CharRef, a predefined entity, if there is no reference() callback.
7217  * or if the parser was asked to switch to that mode.
7218  *
7219  * Always consumes '&'.
7220  *
7221  * [67] Reference ::= EntityRef | CharRef
7222  */
7223 void
xmlParseReference(xmlParserCtxtPtr ctxt)7224 xmlParseReference(xmlParserCtxtPtr ctxt) {
7225     xmlEntityPtr ent;
7226     xmlChar *val;
7227     int was_checked;
7228     xmlNodePtr list = NULL;
7229     xmlParserErrors ret = XML_ERR_OK;
7230 
7231 
7232     if (RAW != '&')
7233         return;
7234 
7235     /*
7236      * Simple case of a CharRef
7237      */
7238     if (NXT(1) == '#') {
7239 	int i = 0;
7240 	xmlChar out[16];
7241 	int value = xmlParseCharRef(ctxt);
7242 
7243 	if (value == 0)
7244 	    return;
7245 
7246         /*
7247          * Just encode the value in UTF-8
7248          */
7249         COPY_BUF(out, i, value);
7250         out[i] = 0;
7251         if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7252             (!ctxt->disableSAX))
7253             ctxt->sax->characters(ctxt->userData, out, i);
7254 	return;
7255     }
7256 
7257     /*
7258      * We are seeing an entity reference
7259      */
7260     ent = xmlParseEntityRef(ctxt);
7261     if (ent == NULL) return;
7262     if (!ctxt->wellFormed)
7263 	return;
7264     was_checked = ent->flags & XML_ENT_PARSED;
7265 
7266     /* special case of predefined entities */
7267     if ((ent->name == NULL) ||
7268         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7269 	val = ent->content;
7270 	if (val == NULL) return;
7271 	/*
7272 	 * inline the entity.
7273 	 */
7274 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7275 	    (!ctxt->disableSAX))
7276 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7277 	return;
7278     }
7279 
7280     /*
7281      * The first reference to the entity trigger a parsing phase
7282      * where the ent->children is filled with the result from
7283      * the parsing.
7284      * Note: external parsed entities will not be loaded, it is not
7285      * required for a non-validating parser, unless the parsing option
7286      * of validating, or substituting entities were given. Doing so is
7287      * far more secure as the parser will only process data coming from
7288      * the document entity by default.
7289      *
7290      * FIXME: This doesn't work correctly since entities can be
7291      * expanded with different namespace declarations in scope.
7292      * For example:
7293      *
7294      * <!DOCTYPE doc [
7295      *   <!ENTITY ent "<ns:elem/>">
7296      * ]>
7297      * <doc>
7298      *   <decl1 xmlns:ns="urn:ns1">
7299      *     &ent;
7300      *   </decl1>
7301      *   <decl2 xmlns:ns="urn:ns2">
7302      *     &ent;
7303      *   </decl2>
7304      * </doc>
7305      *
7306      * Proposed fix:
7307      *
7308      * - Remove the ent->owner optimization which tries to avoid the
7309      *   initial copy of the entity. Always make entities own the
7310      *   subtree.
7311      * - Ignore current namespace declarations when parsing the
7312      *   entity. If a prefix can't be resolved, don't report an error
7313      *   but mark it as unresolved.
7314      * - Try to resolve these prefixes when expanding the entity.
7315      *   This will require a specialized version of xmlStaticCopyNode
7316      *   which can also make use of the namespace hash table to avoid
7317      *   quadratic behavior.
7318      *
7319      * Alternatively, we could simply reparse the entity on each
7320      * expansion like we already do with custom SAX callbacks.
7321      * External entity content should be cached in this case.
7322      */
7323     if (((ent->flags & XML_ENT_PARSED) == 0) &&
7324         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7325          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7326 	unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7327 
7328 	/*
7329 	 * This is a bit hackish but this seems the best
7330 	 * way to make sure both SAX and DOM entity support
7331 	 * behaves okay.
7332 	 */
7333 	void *user_data;
7334 	if (ctxt->userData == ctxt)
7335 	    user_data = NULL;
7336 	else
7337 	    user_data = ctxt->userData;
7338 
7339         /* Avoid overflow as much as possible */
7340         ctxt->sizeentcopy = 0;
7341 
7342         if (ent->flags & XML_ENT_EXPANDING) {
7343             xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7344             xmlHaltParser(ctxt);
7345             return;
7346         }
7347 
7348         ent->flags |= XML_ENT_EXPANDING;
7349 
7350 	/*
7351 	 * Check that this entity is well formed
7352 	 * 4.3.2: An internal general parsed entity is well-formed
7353 	 * if its replacement text matches the production labeled
7354 	 * content.
7355 	 */
7356 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7357 	    ctxt->depth++;
7358 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7359 	                                              user_data, &list);
7360 	    ctxt->depth--;
7361 
7362 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7363 	    ctxt->depth++;
7364 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7365 	                                   user_data, ctxt->depth, ent->URI,
7366 					   ent->ExternalID, &list);
7367 	    ctxt->depth--;
7368 	} else {
7369 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7370 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7371 			 "invalid entity type found\n", NULL);
7372 	}
7373 
7374         ent->flags &= ~XML_ENT_EXPANDING;
7375         ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7376         ent->expandedSize = ctxt->sizeentcopy;
7377 	if (ret == XML_ERR_ENTITY_LOOP) {
7378             xmlHaltParser(ctxt);
7379 	    xmlFreeNodeList(list);
7380 	    return;
7381 	}
7382 	if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7383 	    xmlFreeNodeList(list);
7384 	    return;
7385 	}
7386 
7387 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7388             ent->children = list;
7389             /*
7390              * Prune it directly in the generated document
7391              * except for single text nodes.
7392              */
7393             if ((ctxt->replaceEntities == 0) ||
7394                 (ctxt->parseMode == XML_PARSE_READER) ||
7395                 ((list->type == XML_TEXT_NODE) &&
7396                  (list->next == NULL))) {
7397                 ent->owner = 1;
7398                 while (list != NULL) {
7399                     list->parent = (xmlNodePtr) ent;
7400                     if (list->doc != ent->doc)
7401                         xmlSetTreeDoc(list, ent->doc);
7402                     if (list->next == NULL)
7403                         ent->last = list;
7404                     list = list->next;
7405                 }
7406                 list = NULL;
7407             } else {
7408                 ent->owner = 0;
7409                 while (list != NULL) {
7410                     list->parent = (xmlNodePtr) ctxt->node;
7411                     list->doc = ctxt->myDoc;
7412                     if (list->next == NULL)
7413                         ent->last = list;
7414                     list = list->next;
7415                 }
7416                 list = ent->children;
7417 #ifdef LIBXML_LEGACY_ENABLED
7418                 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7419                     xmlAddEntityReference(ent, list, NULL);
7420 #endif /* LIBXML_LEGACY_ENABLED */
7421             }
7422 	} else if ((ret != XML_ERR_OK) &&
7423 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7424 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7425 		     "Entity '%s' failed to parse\n", ent->name);
7426             if (ent->content != NULL)
7427                 ent->content[0] = 0;
7428 	} else if (list != NULL) {
7429 	    xmlFreeNodeList(list);
7430 	    list = NULL;
7431 	}
7432 
7433         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7434         was_checked = 0;
7435     }
7436 
7437     /*
7438      * Now that the entity content has been gathered
7439      * provide it to the application, this can take different forms based
7440      * on the parsing modes.
7441      */
7442     if (ent->children == NULL) {
7443 	/*
7444 	 * Probably running in SAX mode and the callbacks don't
7445 	 * build the entity content. So unless we already went
7446 	 * though parsing for first checking go though the entity
7447 	 * content to generate callbacks associated to the entity
7448 	 */
7449 	if (was_checked != 0) {
7450 	    void *user_data;
7451 	    /*
7452 	     * This is a bit hackish but this seems the best
7453 	     * way to make sure both SAX and DOM entity support
7454 	     * behaves okay.
7455 	     */
7456 	    if (ctxt->userData == ctxt)
7457 		user_data = NULL;
7458 	    else
7459 		user_data = ctxt->userData;
7460 
7461 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7462 		ctxt->depth++;
7463 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7464 				   ent->content, user_data, NULL);
7465 		ctxt->depth--;
7466 	    } else if (ent->etype ==
7467 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7468 	        unsigned long oldsizeentities = ctxt->sizeentities;
7469 
7470 		ctxt->depth++;
7471 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7472 			   ctxt->sax, user_data, ctxt->depth,
7473 			   ent->URI, ent->ExternalID, NULL);
7474 		ctxt->depth--;
7475 
7476                 /* Undo the change to sizeentities */
7477                 ctxt->sizeentities = oldsizeentities;
7478 	    } else {
7479 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7480 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7481 			     "invalid entity type found\n", NULL);
7482 	    }
7483 	    if (ret == XML_ERR_ENTITY_LOOP) {
7484 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7485 		return;
7486 	    }
7487             if (xmlParserEntityCheck(ctxt, 0))
7488                 return;
7489 	}
7490 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7491 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7492 	    /*
7493 	     * Entity reference callback comes second, it's somewhat
7494 	     * superfluous but a compatibility to historical behaviour
7495 	     */
7496 	    ctxt->sax->reference(ctxt->userData, ent->name);
7497 	}
7498 	return;
7499     }
7500 
7501     /*
7502      * We also check for amplification if entities aren't substituted.
7503      * They might be expanded later.
7504      */
7505     if ((was_checked != 0) &&
7506         (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7507         return;
7508 
7509     /*
7510      * If we didn't get any children for the entity being built
7511      */
7512     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7513 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7514 	/*
7515 	 * Create a node.
7516 	 */
7517 	ctxt->sax->reference(ctxt->userData, ent->name);
7518 	return;
7519     }
7520 
7521     if (ctxt->replaceEntities)  {
7522 	/*
7523 	 * There is a problem on the handling of _private for entities
7524 	 * (bug 155816): Should we copy the content of the field from
7525 	 * the entity (possibly overwriting some value set by the user
7526 	 * when a copy is created), should we leave it alone, or should
7527 	 * we try to take care of different situations?  The problem
7528 	 * is exacerbated by the usage of this field by the xmlReader.
7529 	 * To fix this bug, we look at _private on the created node
7530 	 * and, if it's NULL, we copy in whatever was in the entity.
7531 	 * If it's not NULL we leave it alone.  This is somewhat of a
7532 	 * hack - maybe we should have further tests to determine
7533 	 * what to do.
7534 	 */
7535 	if (ctxt->node != NULL) {
7536 	    /*
7537 	     * Seems we are generating the DOM content, do
7538 	     * a simple tree copy for all references except the first
7539 	     * In the first occurrence list contains the replacement.
7540 	     */
7541 	    if (((list == NULL) && (ent->owner == 0)) ||
7542 		(ctxt->parseMode == XML_PARSE_READER)) {
7543 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7544 
7545 		/*
7546 		 * when operating on a reader, the entities definitions
7547 		 * are always owning the entities subtree.
7548 		if (ctxt->parseMode == XML_PARSE_READER)
7549 		    ent->owner = 1;
7550 		 */
7551 
7552 		cur = ent->children;
7553 		while (cur != NULL) {
7554 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7555 		    if (nw != NULL) {
7556 			if (nw->_private == NULL)
7557 			    nw->_private = cur->_private;
7558 			if (firstChild == NULL){
7559 			    firstChild = nw;
7560 			}
7561 			nw = xmlAddChild(ctxt->node, nw);
7562 		    }
7563 		    if (cur == ent->last) {
7564 			/*
7565 			 * needed to detect some strange empty
7566 			 * node cases in the reader tests
7567 			 */
7568 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7569 			    (nw != NULL) &&
7570 			    (nw->type == XML_ELEMENT_NODE) &&
7571 			    (nw->children == NULL))
7572 			    nw->extra = 1;
7573 
7574 			break;
7575 		    }
7576 		    cur = cur->next;
7577 		}
7578 #ifdef LIBXML_LEGACY_ENABLED
7579 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7580 		  xmlAddEntityReference(ent, firstChild, nw);
7581 #endif /* LIBXML_LEGACY_ENABLED */
7582 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7583 		xmlNodePtr nw = NULL, cur, next, last,
7584 			   firstChild = NULL;
7585 
7586 		/*
7587 		 * Copy the entity child list and make it the new
7588 		 * entity child list. The goal is to make sure any
7589 		 * ID or REF referenced will be the one from the
7590 		 * document content and not the entity copy.
7591 		 */
7592 		cur = ent->children;
7593 		ent->children = NULL;
7594 		last = ent->last;
7595 		ent->last = NULL;
7596 		while (cur != NULL) {
7597 		    next = cur->next;
7598 		    cur->next = NULL;
7599 		    cur->parent = NULL;
7600 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7601 		    if (nw != NULL) {
7602 			if (nw->_private == NULL)
7603 			    nw->_private = cur->_private;
7604 			if (firstChild == NULL){
7605 			    firstChild = cur;
7606 			}
7607 			xmlAddChild((xmlNodePtr) ent, nw);
7608 		    }
7609 		    xmlAddChild(ctxt->node, cur);
7610 		    if (cur == last)
7611 			break;
7612 		    cur = next;
7613 		}
7614 		if (ent->owner == 0)
7615 		    ent->owner = 1;
7616 #ifdef LIBXML_LEGACY_ENABLED
7617 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7618 		  xmlAddEntityReference(ent, firstChild, nw);
7619 #endif /* LIBXML_LEGACY_ENABLED */
7620 	    } else {
7621 		const xmlChar *nbktext;
7622 
7623 		/*
7624 		 * the name change is to avoid coalescing of the
7625 		 * node with a possible previous text one which
7626 		 * would make ent->children a dangling pointer
7627 		 */
7628 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7629 					-1);
7630 		if (ent->children->type == XML_TEXT_NODE)
7631 		    ent->children->name = nbktext;
7632 		if ((ent->last != ent->children) &&
7633 		    (ent->last->type == XML_TEXT_NODE))
7634 		    ent->last->name = nbktext;
7635 		xmlAddChildList(ctxt->node, ent->children);
7636 	    }
7637 
7638 	    /*
7639 	     * This is to avoid a nasty side effect, see
7640 	     * characters() in SAX.c
7641 	     */
7642 	    ctxt->nodemem = 0;
7643 	    ctxt->nodelen = 0;
7644 	    return;
7645 	}
7646     }
7647 }
7648 
7649 /**
7650  * xmlParseEntityRef:
7651  * @ctxt:  an XML parser context
7652  *
7653  * DEPRECATED: Internal function, don't use.
7654  *
7655  * Parse an entitiy reference. Always consumes '&'.
7656  *
7657  * [68] EntityRef ::= '&' Name ';'
7658  *
7659  * [ WFC: Entity Declared ]
7660  * In a document without any DTD, a document with only an internal DTD
7661  * subset which contains no parameter entity references, or a document
7662  * with "standalone='yes'", the Name given in the entity reference
7663  * must match that in an entity declaration, except that well-formed
7664  * documents need not declare any of the following entities: amp, lt,
7665  * gt, apos, quot.  The declaration of a parameter entity must precede
7666  * any reference to it.  Similarly, the declaration of a general entity
7667  * must precede any reference to it which appears in a default value in an
7668  * attribute-list declaration. Note that if entities are declared in the
7669  * external subset or in external parameter entities, a non-validating
7670  * processor is not obligated to read and process their declarations;
7671  * for such documents, the rule that an entity must be declared is a
7672  * well-formedness constraint only if standalone='yes'.
7673  *
7674  * [ WFC: Parsed Entity ]
7675  * An entity reference must not contain the name of an unparsed entity
7676  *
7677  * Returns the xmlEntityPtr if found, or NULL otherwise.
7678  */
7679 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7680 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7681     const xmlChar *name;
7682     xmlEntityPtr ent = NULL;
7683 
7684     GROW;
7685     if (ctxt->instate == XML_PARSER_EOF)
7686         return(NULL);
7687 
7688     if (RAW != '&')
7689         return(NULL);
7690     NEXT;
7691     name = xmlParseName(ctxt);
7692     if (name == NULL) {
7693 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7694 		       "xmlParseEntityRef: no name\n");
7695         return(NULL);
7696     }
7697     if (RAW != ';') {
7698 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7699 	return(NULL);
7700     }
7701     NEXT;
7702 
7703     /*
7704      * Predefined entities override any extra definition
7705      */
7706     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7707         ent = xmlGetPredefinedEntity(name);
7708         if (ent != NULL)
7709             return(ent);
7710     }
7711 
7712     /*
7713      * Ask first SAX for entity resolution, otherwise try the
7714      * entities which may have stored in the parser context.
7715      */
7716     if (ctxt->sax != NULL) {
7717 	if (ctxt->sax->getEntity != NULL)
7718 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7719 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7720 	    (ctxt->options & XML_PARSE_OLDSAX))
7721 	    ent = xmlGetPredefinedEntity(name);
7722 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7723 	    (ctxt->userData==ctxt)) {
7724 	    ent = xmlSAX2GetEntity(ctxt, name);
7725 	}
7726     }
7727     if (ctxt->instate == XML_PARSER_EOF)
7728 	return(NULL);
7729     /*
7730      * [ WFC: Entity Declared ]
7731      * In a document without any DTD, a document with only an
7732      * internal DTD subset which contains no parameter entity
7733      * references, or a document with "standalone='yes'", the
7734      * Name given in the entity reference must match that in an
7735      * entity declaration, except that well-formed documents
7736      * need not declare any of the following entities: amp, lt,
7737      * gt, apos, quot.
7738      * The declaration of a parameter entity must precede any
7739      * reference to it.
7740      * Similarly, the declaration of a general entity must
7741      * precede any reference to it which appears in a default
7742      * value in an attribute-list declaration. Note that if
7743      * entities are declared in the external subset or in
7744      * external parameter entities, a non-validating processor
7745      * is not obligated to read and process their declarations;
7746      * for such documents, the rule that an entity must be
7747      * declared is a well-formedness constraint only if
7748      * standalone='yes'.
7749      */
7750     if (ent == NULL) {
7751 	if ((ctxt->standalone == 1) ||
7752 	    ((ctxt->hasExternalSubset == 0) &&
7753 	     (ctxt->hasPErefs == 0))) {
7754 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7755 		     "Entity '%s' not defined\n", name);
7756 	} else {
7757 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7758 		     "Entity '%s' not defined\n", name);
7759 	    if ((ctxt->inSubset == 0) &&
7760 		(ctxt->sax != NULL) &&
7761                 (ctxt->disableSAX == 0) &&
7762 		(ctxt->sax->reference != NULL)) {
7763 		ctxt->sax->reference(ctxt->userData, name);
7764 	    }
7765 	}
7766 	ctxt->valid = 0;
7767     }
7768 
7769     /*
7770      * [ WFC: Parsed Entity ]
7771      * An entity reference must not contain the name of an
7772      * unparsed entity
7773      */
7774     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7775 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7776 		 "Entity reference to unparsed entity %s\n", name);
7777     }
7778 
7779     /*
7780      * [ WFC: No External Entity References ]
7781      * Attribute values cannot contain direct or indirect
7782      * entity references to external entities.
7783      */
7784     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7785 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7786 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7787 	     "Attribute references external entity '%s'\n", name);
7788     }
7789     /*
7790      * [ WFC: No < in Attribute Values ]
7791      * The replacement text of any entity referred to directly or
7792      * indirectly in an attribute value (other than "&lt;") must
7793      * not contain a <.
7794      */
7795     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7796 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7797 	if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7798             if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7799                 ent->flags |= XML_ENT_CONTAINS_LT;
7800             ent->flags |= XML_ENT_CHECKED_LT;
7801         }
7802         if (ent->flags & XML_ENT_CONTAINS_LT)
7803             xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7804                     "'<' in entity '%s' is not allowed in attributes "
7805                     "values\n", name);
7806     }
7807 
7808     /*
7809      * Internal check, no parameter entities here ...
7810      */
7811     else {
7812 	switch (ent->etype) {
7813 	    case XML_INTERNAL_PARAMETER_ENTITY:
7814 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7815 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7816 	     "Attempt to reference the parameter entity '%s'\n",
7817 			      name);
7818 	    break;
7819 	    default:
7820 	    break;
7821 	}
7822     }
7823 
7824     /*
7825      * [ WFC: No Recursion ]
7826      * A parsed entity must not contain a recursive reference
7827      * to itself, either directly or indirectly.
7828      * Done somewhere else
7829      */
7830     return(ent);
7831 }
7832 
7833 /**
7834  * xmlParseStringEntityRef:
7835  * @ctxt:  an XML parser context
7836  * @str:  a pointer to an index in the string
7837  *
7838  * parse ENTITY references declarations, but this version parses it from
7839  * a string value.
7840  *
7841  * [68] EntityRef ::= '&' Name ';'
7842  *
7843  * [ WFC: Entity Declared ]
7844  * In a document without any DTD, a document with only an internal DTD
7845  * subset which contains no parameter entity references, or a document
7846  * with "standalone='yes'", the Name given in the entity reference
7847  * must match that in an entity declaration, except that well-formed
7848  * documents need not declare any of the following entities: amp, lt,
7849  * gt, apos, quot.  The declaration of a parameter entity must precede
7850  * any reference to it.  Similarly, the declaration of a general entity
7851  * must precede any reference to it which appears in a default value in an
7852  * attribute-list declaration. Note that if entities are declared in the
7853  * external subset or in external parameter entities, a non-validating
7854  * processor is not obligated to read and process their declarations;
7855  * for such documents, the rule that an entity must be declared is a
7856  * well-formedness constraint only if standalone='yes'.
7857  *
7858  * [ WFC: Parsed Entity ]
7859  * An entity reference must not contain the name of an unparsed entity
7860  *
7861  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7862  * is updated to the current location in the string.
7863  */
7864 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7865 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7866     xmlChar *name;
7867     const xmlChar *ptr;
7868     xmlChar cur;
7869     xmlEntityPtr ent = NULL;
7870 
7871     if ((str == NULL) || (*str == NULL))
7872         return(NULL);
7873     ptr = *str;
7874     cur = *ptr;
7875     if (cur != '&')
7876 	return(NULL);
7877 
7878     ptr++;
7879     name = xmlParseStringName(ctxt, &ptr);
7880     if (name == NULL) {
7881 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7882 		       "xmlParseStringEntityRef: no name\n");
7883 	*str = ptr;
7884 	return(NULL);
7885     }
7886     if (*ptr != ';') {
7887 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7888         xmlFree(name);
7889 	*str = ptr;
7890 	return(NULL);
7891     }
7892     ptr++;
7893 
7894 
7895     /*
7896      * Predefined entities override any extra definition
7897      */
7898     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7899         ent = xmlGetPredefinedEntity(name);
7900         if (ent != NULL) {
7901             xmlFree(name);
7902             *str = ptr;
7903             return(ent);
7904         }
7905     }
7906 
7907     /*
7908      * Ask first SAX for entity resolution, otherwise try the
7909      * entities which may have stored in the parser context.
7910      */
7911     if (ctxt->sax != NULL) {
7912 	if (ctxt->sax->getEntity != NULL)
7913 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7914 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7915 	    ent = xmlGetPredefinedEntity(name);
7916 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7917 	    ent = xmlSAX2GetEntity(ctxt, name);
7918 	}
7919     }
7920     if (ctxt->instate == XML_PARSER_EOF) {
7921 	xmlFree(name);
7922 	return(NULL);
7923     }
7924 
7925     /*
7926      * [ WFC: Entity Declared ]
7927      * In a document without any DTD, a document with only an
7928      * internal DTD subset which contains no parameter entity
7929      * references, or a document with "standalone='yes'", the
7930      * Name given in the entity reference must match that in an
7931      * entity declaration, except that well-formed documents
7932      * need not declare any of the following entities: amp, lt,
7933      * gt, apos, quot.
7934      * The declaration of a parameter entity must precede any
7935      * reference to it.
7936      * Similarly, the declaration of a general entity must
7937      * precede any reference to it which appears in a default
7938      * value in an attribute-list declaration. Note that if
7939      * entities are declared in the external subset or in
7940      * external parameter entities, a non-validating processor
7941      * is not obligated to read and process their declarations;
7942      * for such documents, the rule that an entity must be
7943      * declared is a well-formedness constraint only if
7944      * standalone='yes'.
7945      */
7946     if (ent == NULL) {
7947 	if ((ctxt->standalone == 1) ||
7948 	    ((ctxt->hasExternalSubset == 0) &&
7949 	     (ctxt->hasPErefs == 0))) {
7950 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7951 		     "Entity '%s' not defined\n", name);
7952 	} else {
7953 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7954 			  "Entity '%s' not defined\n",
7955 			  name);
7956 	}
7957 	/* TODO ? check regressions ctxt->valid = 0; */
7958     }
7959 
7960     /*
7961      * [ WFC: Parsed Entity ]
7962      * An entity reference must not contain the name of an
7963      * unparsed entity
7964      */
7965     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7966 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7967 		 "Entity reference to unparsed entity %s\n", name);
7968     }
7969 
7970     /*
7971      * [ WFC: No External Entity References ]
7972      * Attribute values cannot contain direct or indirect
7973      * entity references to external entities.
7974      */
7975     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7976 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7977 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7978 	 "Attribute references external entity '%s'\n", name);
7979     }
7980     /*
7981      * [ WFC: No < in Attribute Values ]
7982      * The replacement text of any entity referred to directly or
7983      * indirectly in an attribute value (other than "&lt;") must
7984      * not contain a <.
7985      */
7986     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7987 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7988 	if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7989             if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7990                 ent->flags |= XML_ENT_CONTAINS_LT;
7991             ent->flags |= XML_ENT_CHECKED_LT;
7992         }
7993         if (ent->flags & XML_ENT_CONTAINS_LT)
7994             xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7995                     "'<' in entity '%s' is not allowed in attributes "
7996                     "values\n", name);
7997     }
7998 
7999     /*
8000      * Internal check, no parameter entities here ...
8001      */
8002     else {
8003 	switch (ent->etype) {
8004 	    case XML_INTERNAL_PARAMETER_ENTITY:
8005 	    case XML_EXTERNAL_PARAMETER_ENTITY:
8006 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
8007 	     "Attempt to reference the parameter entity '%s'\n",
8008 				  name);
8009 	    break;
8010 	    default:
8011 	    break;
8012 	}
8013     }
8014 
8015     /*
8016      * [ WFC: No Recursion ]
8017      * A parsed entity must not contain a recursive reference
8018      * to itself, either directly or indirectly.
8019      * Done somewhere else
8020      */
8021 
8022     xmlFree(name);
8023     *str = ptr;
8024     return(ent);
8025 }
8026 
8027 /**
8028  * xmlParsePEReference:
8029  * @ctxt:  an XML parser context
8030  *
8031  * DEPRECATED: Internal function, don't use.
8032  *
8033  * Parse a parameter entity reference. Always consumes '%'.
8034  *
8035  * The entity content is handled directly by pushing it's content as
8036  * a new input stream.
8037  *
8038  * [69] PEReference ::= '%' Name ';'
8039  *
8040  * [ WFC: No Recursion ]
8041  * A parsed entity must not contain a recursive
8042  * reference to itself, either directly or indirectly.
8043  *
8044  * [ WFC: Entity Declared ]
8045  * In a document without any DTD, a document with only an internal DTD
8046  * subset which contains no parameter entity references, or a document
8047  * with "standalone='yes'", ...  ... The declaration of a parameter
8048  * entity must precede any reference to it...
8049  *
8050  * [ VC: Entity Declared ]
8051  * In a document with an external subset or external parameter entities
8052  * with "standalone='no'", ...  ... The declaration of a parameter entity
8053  * must precede any reference to it...
8054  *
8055  * [ WFC: In DTD ]
8056  * Parameter-entity references may only appear in the DTD.
8057  * NOTE: misleading but this is handled.
8058  */
8059 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)8060 xmlParsePEReference(xmlParserCtxtPtr ctxt)
8061 {
8062     const xmlChar *name;
8063     xmlEntityPtr entity = NULL;
8064     xmlParserInputPtr input;
8065 
8066     if (RAW != '%')
8067         return;
8068     NEXT;
8069     name = xmlParseName(ctxt);
8070     if (name == NULL) {
8071 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8072 	return;
8073     }
8074     if (xmlParserDebugEntities)
8075 	xmlGenericError(xmlGenericErrorContext,
8076 		"PEReference: %s\n", name);
8077     if (RAW != ';') {
8078 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8079         return;
8080     }
8081 
8082     NEXT;
8083 
8084     /*
8085      * Request the entity from SAX
8086      */
8087     if ((ctxt->sax != NULL) &&
8088 	(ctxt->sax->getParameterEntity != NULL))
8089 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8090     if (ctxt->instate == XML_PARSER_EOF)
8091 	return;
8092     if (entity == NULL) {
8093 	/*
8094 	 * [ WFC: Entity Declared ]
8095 	 * In a document without any DTD, a document with only an
8096 	 * internal DTD subset which contains no parameter entity
8097 	 * references, or a document with "standalone='yes'", ...
8098 	 * ... The declaration of a parameter entity must precede
8099 	 * any reference to it...
8100 	 */
8101 	if ((ctxt->standalone == 1) ||
8102 	    ((ctxt->hasExternalSubset == 0) &&
8103 	     (ctxt->hasPErefs == 0))) {
8104 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8105 			      "PEReference: %%%s; not found\n",
8106 			      name);
8107 	} else {
8108 	    /*
8109 	     * [ VC: Entity Declared ]
8110 	     * In a document with an external subset or external
8111 	     * parameter entities with "standalone='no'", ...
8112 	     * ... The declaration of a parameter entity must
8113 	     * precede any reference to it...
8114 	     */
8115             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8116                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8117                                  "PEReference: %%%s; not found\n",
8118                                  name, NULL);
8119             } else
8120                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8121                               "PEReference: %%%s; not found\n",
8122                               name, NULL);
8123             ctxt->valid = 0;
8124 	}
8125     } else {
8126 	/*
8127 	 * Internal checking in case the entity quest barfed
8128 	 */
8129 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8130 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8131 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8132 		  "Internal: %%%s; is not a parameter entity\n",
8133 			  name, NULL);
8134 	} else {
8135             unsigned long parentConsumed;
8136             xmlEntityPtr oldEnt;
8137 
8138 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8139 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8140 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8141 		((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8142 		((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8143 		(ctxt->replaceEntities == 0) &&
8144 		(ctxt->validate == 0))
8145 		return;
8146 
8147             if (entity->flags & XML_ENT_EXPANDING) {
8148                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8149                 xmlHaltParser(ctxt);
8150                 return;
8151             }
8152 
8153             /* Must be computed from old input before pushing new input. */
8154             parentConsumed = ctxt->input->parentConsumed;
8155             oldEnt = ctxt->input->entity;
8156             if ((oldEnt == NULL) ||
8157                 ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8158                  ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8159                 xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8160                 xmlSaturatedAddSizeT(&parentConsumed,
8161                                      ctxt->input->cur - ctxt->input->base);
8162             }
8163 
8164 	    input = xmlNewEntityInputStream(ctxt, entity);
8165 	    if (xmlPushInput(ctxt, input) < 0) {
8166                 xmlFreeInputStream(input);
8167 		return;
8168             }
8169 
8170             entity->flags |= XML_ENT_EXPANDING;
8171 
8172             input->parentConsumed = parentConsumed;
8173 
8174 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8175                 xmlDetectEncoding(ctxt);
8176 
8177                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8178                     (IS_BLANK_CH(NXT(5)))) {
8179                     xmlParseTextDecl(ctxt);
8180                 }
8181             }
8182 	}
8183     }
8184     ctxt->hasPErefs = 1;
8185 }
8186 
8187 /**
8188  * xmlLoadEntityContent:
8189  * @ctxt:  an XML parser context
8190  * @entity: an unloaded system entity
8191  *
8192  * Load the original content of the given system entity from the
8193  * ExternalID/SystemID given. This is to be used for Included in Literal
8194  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8195  *
8196  * Returns 0 in case of success and -1 in case of failure
8197  */
8198 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8199 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8200     xmlParserInputPtr oldinput, input = NULL;
8201     xmlParserInputPtr *oldinputTab;
8202     const xmlChar *oldencoding;
8203     xmlChar *content = NULL;
8204     size_t length, i;
8205     int oldinputNr, oldinputMax, oldprogressive;
8206     int ret = -1;
8207     int res;
8208 
8209     if ((ctxt == NULL) || (entity == NULL) ||
8210         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8211 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8212 	(entity->content != NULL)) {
8213 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8214 	            "xmlLoadEntityContent parameter error");
8215         return(-1);
8216     }
8217 
8218     if (xmlParserDebugEntities)
8219 	xmlGenericError(xmlGenericErrorContext,
8220 		"Reading %s entity content input\n", entity->name);
8221 
8222     input = xmlLoadExternalEntity((char *) entity->URI,
8223            (char *) entity->ExternalID, ctxt);
8224     if (input == NULL) {
8225 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8226 	            "xmlLoadEntityContent input error");
8227         return(-1);
8228     }
8229 
8230     oldinput = ctxt->input;
8231     oldinputNr = ctxt->inputNr;
8232     oldinputMax = ctxt->inputMax;
8233     oldinputTab = ctxt->inputTab;
8234     oldencoding = ctxt->encoding;
8235     oldprogressive = ctxt->progressive;
8236 
8237     ctxt->input = NULL;
8238     ctxt->inputNr = 0;
8239     ctxt->inputMax = 1;
8240     ctxt->encoding = NULL;
8241     ctxt->progressive = 0;
8242     ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
8243     if (ctxt->inputTab == NULL) {
8244         xmlErrMemory(ctxt, NULL);
8245         xmlFreeInputStream(input);
8246         goto error;
8247     }
8248 
8249     xmlBufResetInput(input->buf->buffer, input);
8250 
8251     inputPush(ctxt, input);
8252 
8253     xmlDetectEncoding(ctxt);
8254 
8255     /*
8256      * Parse a possible text declaration first
8257      */
8258     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
8259 	xmlParseTextDecl(ctxt);
8260         /*
8261          * An XML-1.0 document can't reference an entity not XML-1.0
8262          */
8263         if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
8264             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
8265             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
8266                            "Version mismatch between document and entity\n");
8267         }
8268     }
8269 
8270     if (ctxt->instate == XML_PARSER_EOF)
8271         goto error;
8272 
8273     length = input->cur - input->base;
8274     xmlBufShrink(input->buf->buffer, length);
8275     xmlSaturatedAdd(&ctxt->sizeentities, length);
8276 
8277     while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8278         ;
8279 
8280     xmlBufResetInput(input->buf->buffer, input);
8281 
8282     if (res < 0) {
8283         xmlFatalErr(ctxt, input->buf->error, NULL);
8284         goto error;
8285     }
8286 
8287     length = xmlBufUse(input->buf->buffer);
8288     content = xmlBufDetach(input->buf->buffer);
8289 
8290     if (length > INT_MAX) {
8291         xmlErrMemory(ctxt, NULL);
8292         goto error;
8293     }
8294 
8295     for (i = 0; i < length; ) {
8296         int clen = length - i;
8297         int c = xmlGetUTF8Char(content + i, &clen);
8298 
8299         if ((c < 0) || (!IS_CHAR(c))) {
8300             xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8301                               "xmlLoadEntityContent: invalid char value %d\n",
8302                               content[i]);
8303             goto error;
8304         }
8305         i += clen;
8306     }
8307 
8308     xmlSaturatedAdd(&ctxt->sizeentities, length);
8309     entity->content = content;
8310     entity->length = length;
8311     content = NULL;
8312     ret = 0;
8313 
8314 error:
8315     while (ctxt->inputNr > 0)
8316         xmlFreeInputStream(inputPop(ctxt));
8317     xmlFree(ctxt->inputTab);
8318     xmlFree((xmlChar *) ctxt->encoding);
8319 
8320     ctxt->input = oldinput;
8321     ctxt->inputNr = oldinputNr;
8322     ctxt->inputMax = oldinputMax;
8323     ctxt->inputTab = oldinputTab;
8324     ctxt->encoding = oldencoding;
8325     ctxt->progressive = oldprogressive;
8326 
8327     xmlFree(content);
8328 
8329     return(ret);
8330 }
8331 
8332 /**
8333  * xmlParseStringPEReference:
8334  * @ctxt:  an XML parser context
8335  * @str:  a pointer to an index in the string
8336  *
8337  * parse PEReference declarations
8338  *
8339  * [69] PEReference ::= '%' Name ';'
8340  *
8341  * [ WFC: No Recursion ]
8342  * A parsed entity must not contain a recursive
8343  * reference to itself, either directly or indirectly.
8344  *
8345  * [ WFC: Entity Declared ]
8346  * In a document without any DTD, a document with only an internal DTD
8347  * subset which contains no parameter entity references, or a document
8348  * with "standalone='yes'", ...  ... The declaration of a parameter
8349  * entity must precede any reference to it...
8350  *
8351  * [ VC: Entity Declared ]
8352  * In a document with an external subset or external parameter entities
8353  * with "standalone='no'", ...  ... The declaration of a parameter entity
8354  * must precede any reference to it...
8355  *
8356  * [ WFC: In DTD ]
8357  * Parameter-entity references may only appear in the DTD.
8358  * NOTE: misleading but this is handled.
8359  *
8360  * Returns the string of the entity content.
8361  *         str is updated to the current value of the index
8362  */
8363 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8364 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8365     const xmlChar *ptr;
8366     xmlChar cur;
8367     xmlChar *name;
8368     xmlEntityPtr entity = NULL;
8369 
8370     if ((str == NULL) || (*str == NULL)) return(NULL);
8371     ptr = *str;
8372     cur = *ptr;
8373     if (cur != '%')
8374         return(NULL);
8375     ptr++;
8376     name = xmlParseStringName(ctxt, &ptr);
8377     if (name == NULL) {
8378 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8379 		       "xmlParseStringPEReference: no name\n");
8380 	*str = ptr;
8381 	return(NULL);
8382     }
8383     cur = *ptr;
8384     if (cur != ';') {
8385 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8386 	xmlFree(name);
8387 	*str = ptr;
8388 	return(NULL);
8389     }
8390     ptr++;
8391 
8392     /*
8393      * Request the entity from SAX
8394      */
8395     if ((ctxt->sax != NULL) &&
8396 	(ctxt->sax->getParameterEntity != NULL))
8397 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8398     if (ctxt->instate == XML_PARSER_EOF) {
8399 	xmlFree(name);
8400 	*str = ptr;
8401 	return(NULL);
8402     }
8403     if (entity == NULL) {
8404 	/*
8405 	 * [ WFC: Entity Declared ]
8406 	 * In a document without any DTD, a document with only an
8407 	 * internal DTD subset which contains no parameter entity
8408 	 * references, or a document with "standalone='yes'", ...
8409 	 * ... The declaration of a parameter entity must precede
8410 	 * any reference to it...
8411 	 */
8412 	if ((ctxt->standalone == 1) ||
8413 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8414 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8415 		 "PEReference: %%%s; not found\n", name);
8416 	} else {
8417 	    /*
8418 	     * [ VC: Entity Declared ]
8419 	     * In a document with an external subset or external
8420 	     * parameter entities with "standalone='no'", ...
8421 	     * ... The declaration of a parameter entity must
8422 	     * precede any reference to it...
8423 	     */
8424 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8425 			  "PEReference: %%%s; not found\n",
8426 			  name, NULL);
8427 	    ctxt->valid = 0;
8428 	}
8429     } else {
8430 	/*
8431 	 * Internal checking in case the entity quest barfed
8432 	 */
8433 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8434 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8435 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8436 			  "%%%s; is not a parameter entity\n",
8437 			  name, NULL);
8438 	}
8439     }
8440     ctxt->hasPErefs = 1;
8441     xmlFree(name);
8442     *str = ptr;
8443     return(entity);
8444 }
8445 
8446 /**
8447  * xmlParseDocTypeDecl:
8448  * @ctxt:  an XML parser context
8449  *
8450  * DEPRECATED: Internal function, don't use.
8451  *
8452  * parse a DOCTYPE declaration
8453  *
8454  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8455  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8456  *
8457  * [ VC: Root Element Type ]
8458  * The Name in the document type declaration must match the element
8459  * type of the root element.
8460  */
8461 
8462 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8463 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8464     const xmlChar *name = NULL;
8465     xmlChar *ExternalID = NULL;
8466     xmlChar *URI = NULL;
8467 
8468     /*
8469      * We know that '<!DOCTYPE' has been detected.
8470      */
8471     SKIP(9);
8472 
8473     SKIP_BLANKS;
8474 
8475     /*
8476      * Parse the DOCTYPE name.
8477      */
8478     name = xmlParseName(ctxt);
8479     if (name == NULL) {
8480 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8481 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8482     }
8483     ctxt->intSubName = name;
8484 
8485     SKIP_BLANKS;
8486 
8487     /*
8488      * Check for SystemID and ExternalID
8489      */
8490     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8491 
8492     if ((URI != NULL) || (ExternalID != NULL)) {
8493         ctxt->hasExternalSubset = 1;
8494     }
8495     ctxt->extSubURI = URI;
8496     ctxt->extSubSystem = ExternalID;
8497 
8498     SKIP_BLANKS;
8499 
8500     /*
8501      * Create and update the internal subset.
8502      */
8503     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8504 	(!ctxt->disableSAX))
8505 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8506     if (ctxt->instate == XML_PARSER_EOF)
8507 	return;
8508 
8509     /*
8510      * Is there any internal subset declarations ?
8511      * they are handled separately in xmlParseInternalSubset()
8512      */
8513     if (RAW == '[')
8514 	return;
8515 
8516     /*
8517      * We should be at the end of the DOCTYPE declaration.
8518      */
8519     if (RAW != '>') {
8520 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8521     }
8522     NEXT;
8523 }
8524 
8525 /**
8526  * xmlParseInternalSubset:
8527  * @ctxt:  an XML parser context
8528  *
8529  * parse the internal subset declaration
8530  *
8531  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8532  */
8533 
8534 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8535 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8536     /*
8537      * Is there any DTD definition ?
8538      */
8539     if (RAW == '[') {
8540         int baseInputNr = ctxt->inputNr;
8541         ctxt->instate = XML_PARSER_DTD;
8542         NEXT;
8543 	/*
8544 	 * Parse the succession of Markup declarations and
8545 	 * PEReferences.
8546 	 * Subsequence (markupdecl | PEReference | S)*
8547 	 */
8548 	SKIP_BLANKS;
8549 	while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8550                (ctxt->instate != XML_PARSER_EOF)) {
8551 
8552             /*
8553              * Conditional sections are allowed from external entities included
8554              * by PE References in the internal subset.
8555              */
8556             if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8557                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8558                 xmlParseConditionalSections(ctxt);
8559             } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8560 	        xmlParseMarkupDecl(ctxt);
8561             } else if (RAW == '%') {
8562 	        xmlParsePEReference(ctxt);
8563             } else {
8564 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8565                         "xmlParseInternalSubset: error detected in"
8566                         " Markup declaration\n");
8567                 xmlHaltParser(ctxt);
8568                 return;
8569             }
8570 	    SKIP_BLANKS;
8571             SHRINK;
8572             GROW;
8573 	}
8574 	if (RAW == ']') {
8575 	    NEXT;
8576 	    SKIP_BLANKS;
8577 	}
8578     }
8579 
8580     /*
8581      * We should be at the end of the DOCTYPE declaration.
8582      */
8583     if (RAW != '>') {
8584 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8585 	return;
8586     }
8587     NEXT;
8588 }
8589 
8590 #ifdef LIBXML_SAX1_ENABLED
8591 /**
8592  * xmlParseAttribute:
8593  * @ctxt:  an XML parser context
8594  * @value:  a xmlChar ** used to store the value of the attribute
8595  *
8596  * DEPRECATED: Internal function, don't use.
8597  *
8598  * parse an attribute
8599  *
8600  * [41] Attribute ::= Name Eq AttValue
8601  *
8602  * [ WFC: No External Entity References ]
8603  * Attribute values cannot contain direct or indirect entity references
8604  * to external entities.
8605  *
8606  * [ WFC: No < in Attribute Values ]
8607  * The replacement text of any entity referred to directly or indirectly in
8608  * an attribute value (other than "&lt;") must not contain a <.
8609  *
8610  * [ VC: Attribute Value Type ]
8611  * The attribute must have been declared; the value must be of the type
8612  * declared for it.
8613  *
8614  * [25] Eq ::= S? '=' S?
8615  *
8616  * With namespace:
8617  *
8618  * [NS 11] Attribute ::= QName Eq AttValue
8619  *
8620  * Also the case QName == xmlns:??? is handled independently as a namespace
8621  * definition.
8622  *
8623  * Returns the attribute name, and the value in *value.
8624  */
8625 
8626 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8627 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8628     const xmlChar *name;
8629     xmlChar *val;
8630 
8631     *value = NULL;
8632     GROW;
8633     name = xmlParseName(ctxt);
8634     if (name == NULL) {
8635 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8636 	               "error parsing attribute name\n");
8637         return(NULL);
8638     }
8639 
8640     /*
8641      * read the value
8642      */
8643     SKIP_BLANKS;
8644     if (RAW == '=') {
8645         NEXT;
8646 	SKIP_BLANKS;
8647 	val = xmlParseAttValue(ctxt);
8648 	ctxt->instate = XML_PARSER_CONTENT;
8649     } else {
8650 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8651 	       "Specification mandates value for attribute %s\n", name);
8652 	return(name);
8653     }
8654 
8655     /*
8656      * Check that xml:lang conforms to the specification
8657      * No more registered as an error, just generate a warning now
8658      * since this was deprecated in XML second edition
8659      */
8660     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8661 	if (!xmlCheckLanguageID(val)) {
8662 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8663 		          "Malformed value for xml:lang : %s\n",
8664 			  val, NULL);
8665 	}
8666     }
8667 
8668     /*
8669      * Check that xml:space conforms to the specification
8670      */
8671     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8672 	if (xmlStrEqual(val, BAD_CAST "default"))
8673 	    *(ctxt->space) = 0;
8674 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8675 	    *(ctxt->space) = 1;
8676 	else {
8677 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8678 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8679                                  val, NULL);
8680 	}
8681     }
8682 
8683     *value = val;
8684     return(name);
8685 }
8686 
8687 /**
8688  * xmlParseStartTag:
8689  * @ctxt:  an XML parser context
8690  *
8691  * DEPRECATED: Internal function, don't use.
8692  *
8693  * Parse a start tag. Always consumes '<'.
8694  *
8695  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8696  *
8697  * [ WFC: Unique Att Spec ]
8698  * No attribute name may appear more than once in the same start-tag or
8699  * empty-element tag.
8700  *
8701  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8702  *
8703  * [ WFC: Unique Att Spec ]
8704  * No attribute name may appear more than once in the same start-tag or
8705  * empty-element tag.
8706  *
8707  * With namespace:
8708  *
8709  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8710  *
8711  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8712  *
8713  * Returns the element name parsed
8714  */
8715 
8716 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8717 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8718     const xmlChar *name;
8719     const xmlChar *attname;
8720     xmlChar *attvalue;
8721     const xmlChar **atts = ctxt->atts;
8722     int nbatts = 0;
8723     int maxatts = ctxt->maxatts;
8724     int i;
8725 
8726     if (RAW != '<') return(NULL);
8727     NEXT1;
8728 
8729     name = xmlParseName(ctxt);
8730     if (name == NULL) {
8731 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8732 	     "xmlParseStartTag: invalid element name\n");
8733         return(NULL);
8734     }
8735 
8736     /*
8737      * Now parse the attributes, it ends up with the ending
8738      *
8739      * (S Attribute)* S?
8740      */
8741     SKIP_BLANKS;
8742     GROW;
8743 
8744     while (((RAW != '>') &&
8745 	   ((RAW != '/') || (NXT(1) != '>')) &&
8746 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8747 	attname = xmlParseAttribute(ctxt, &attvalue);
8748         if (attname == NULL) {
8749 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8750 			   "xmlParseStartTag: problem parsing attributes\n");
8751 	    break;
8752 	}
8753         if (attvalue != NULL) {
8754 	    /*
8755 	     * [ WFC: Unique Att Spec ]
8756 	     * No attribute name may appear more than once in the same
8757 	     * start-tag or empty-element tag.
8758 	     */
8759 	    for (i = 0; i < nbatts;i += 2) {
8760 	        if (xmlStrEqual(atts[i], attname)) {
8761 		    xmlErrAttributeDup(ctxt, NULL, attname);
8762 		    xmlFree(attvalue);
8763 		    goto failed;
8764 		}
8765 	    }
8766 	    /*
8767 	     * Add the pair to atts
8768 	     */
8769 	    if (atts == NULL) {
8770 	        maxatts = 22; /* allow for 10 attrs by default */
8771 	        atts = (const xmlChar **)
8772 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8773 		if (atts == NULL) {
8774 		    xmlErrMemory(ctxt, NULL);
8775 		    if (attvalue != NULL)
8776 			xmlFree(attvalue);
8777 		    goto failed;
8778 		}
8779 		ctxt->atts = atts;
8780 		ctxt->maxatts = maxatts;
8781 	    } else if (nbatts + 4 > maxatts) {
8782 	        const xmlChar **n;
8783 
8784 	        maxatts *= 2;
8785 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8786 					     maxatts * sizeof(const xmlChar *));
8787 		if (n == NULL) {
8788 		    xmlErrMemory(ctxt, NULL);
8789 		    if (attvalue != NULL)
8790 			xmlFree(attvalue);
8791 		    goto failed;
8792 		}
8793 		atts = n;
8794 		ctxt->atts = atts;
8795 		ctxt->maxatts = maxatts;
8796 	    }
8797 	    atts[nbatts++] = attname;
8798 	    atts[nbatts++] = attvalue;
8799 	    atts[nbatts] = NULL;
8800 	    atts[nbatts + 1] = NULL;
8801 	} else {
8802 	    if (attvalue != NULL)
8803 		xmlFree(attvalue);
8804 	}
8805 
8806 failed:
8807 
8808 	GROW
8809 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8810 	    break;
8811 	if (SKIP_BLANKS == 0) {
8812 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8813 			   "attributes construct error\n");
8814 	}
8815 	SHRINK;
8816         GROW;
8817     }
8818 
8819     /*
8820      * SAX: Start of Element !
8821      */
8822     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8823 	(!ctxt->disableSAX)) {
8824 	if (nbatts > 0)
8825 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8826 	else
8827 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8828     }
8829 
8830     if (atts != NULL) {
8831         /* Free only the content strings */
8832         for (i = 1;i < nbatts;i+=2)
8833 	    if (atts[i] != NULL)
8834 	       xmlFree((xmlChar *) atts[i]);
8835     }
8836     return(name);
8837 }
8838 
8839 /**
8840  * xmlParseEndTag1:
8841  * @ctxt:  an XML parser context
8842  * @line:  line of the start tag
8843  * @nsNr:  number of namespaces on the start tag
8844  *
8845  * Parse an end tag. Always consumes '</'.
8846  *
8847  * [42] ETag ::= '</' Name S? '>'
8848  *
8849  * With namespace
8850  *
8851  * [NS 9] ETag ::= '</' QName S? '>'
8852  */
8853 
8854 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8855 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8856     const xmlChar *name;
8857 
8858     GROW;
8859     if ((RAW != '<') || (NXT(1) != '/')) {
8860 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8861 		       "xmlParseEndTag: '</' not found\n");
8862 	return;
8863     }
8864     SKIP(2);
8865 
8866     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8867 
8868     /*
8869      * We should definitely be at the ending "S? '>'" part
8870      */
8871     GROW;
8872     SKIP_BLANKS;
8873     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8874 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8875     } else
8876 	NEXT1;
8877 
8878     /*
8879      * [ WFC: Element Type Match ]
8880      * The Name in an element's end-tag must match the element type in the
8881      * start-tag.
8882      *
8883      */
8884     if (name != (xmlChar*)1) {
8885         if (name == NULL) name = BAD_CAST "unparsable";
8886         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8887 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8888 		                ctxt->name, line, name);
8889     }
8890 
8891     /*
8892      * SAX: End of Tag
8893      */
8894     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8895 	(!ctxt->disableSAX))
8896         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8897 
8898     namePop(ctxt);
8899     spacePop(ctxt);
8900     return;
8901 }
8902 
8903 /**
8904  * xmlParseEndTag:
8905  * @ctxt:  an XML parser context
8906  *
8907  * DEPRECATED: Internal function, don't use.
8908  *
8909  * parse an end of tag
8910  *
8911  * [42] ETag ::= '</' Name S? '>'
8912  *
8913  * With namespace
8914  *
8915  * [NS 9] ETag ::= '</' QName S? '>'
8916  */
8917 
8918 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8919 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8920     xmlParseEndTag1(ctxt, 0);
8921 }
8922 #endif /* LIBXML_SAX1_ENABLED */
8923 
8924 /************************************************************************
8925  *									*
8926  *		      SAX 2 specific operations				*
8927  *									*
8928  ************************************************************************/
8929 
8930 /**
8931  * xmlParseQNameHashed:
8932  * @ctxt:  an XML parser context
8933  * @prefix:  pointer to store the prefix part
8934  *
8935  * parse an XML Namespace QName
8936  *
8937  * [6]  QName  ::= (Prefix ':')? LocalPart
8938  * [7]  Prefix  ::= NCName
8939  * [8]  LocalPart  ::= NCName
8940  *
8941  * Returns the Name parsed or NULL
8942  */
8943 
8944 static xmlHashedString
xmlParseQNameHashed(xmlParserCtxtPtr ctxt,xmlHashedString * prefix)8945 xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8946     xmlHashedString l, p;
8947     int start;
8948 
8949     l.name = NULL;
8950     p.name = NULL;
8951 
8952     GROW;
8953     if (ctxt->instate == XML_PARSER_EOF)
8954         return(l);
8955     start = CUR_PTR - BASE_PTR;
8956 
8957     l = xmlParseNCName(ctxt);
8958     if ((l.name != NULL) && (CUR == ':')) {
8959         NEXT;
8960 	p = l;
8961 	l = xmlParseNCName(ctxt);
8962     }
8963     if ((l.name == NULL) || (CUR == ':')) {
8964         xmlChar *tmp;
8965 
8966         l.name = NULL;
8967         p.name = NULL;
8968         if (ctxt->instate == XML_PARSER_EOF)
8969             return(l);
8970         if ((CUR != ':') && (CUR_PTR <= BASE_PTR + start))
8971             return(l);
8972         tmp = xmlParseNmtoken(ctxt);
8973         if (tmp != NULL)
8974             xmlFree(tmp);
8975         if (ctxt->instate == XML_PARSER_EOF)
8976             return(l);
8977         l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8978                                 CUR_PTR - (BASE_PTR + start));
8979         xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8980                  "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8981     }
8982 
8983     *prefix = p;
8984     return(l);
8985 }
8986 
8987 /**
8988  * xmlParseQName:
8989  * @ctxt:  an XML parser context
8990  * @prefix:  pointer to store the prefix part
8991  *
8992  * parse an XML Namespace QName
8993  *
8994  * [6]  QName  ::= (Prefix ':')? LocalPart
8995  * [7]  Prefix  ::= NCName
8996  * [8]  LocalPart  ::= NCName
8997  *
8998  * Returns the Name parsed or NULL
8999  */
9000 
9001 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)9002 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
9003     xmlHashedString n, p;
9004 
9005     n = xmlParseQNameHashed(ctxt, &p);
9006     if (n.name == NULL)
9007         return(NULL);
9008     *prefix = p.name;
9009     return(n.name);
9010 }
9011 
9012 /**
9013  * xmlParseQNameAndCompare:
9014  * @ctxt:  an XML parser context
9015  * @name:  the localname
9016  * @prefix:  the prefix, if any.
9017  *
9018  * parse an XML name and compares for match
9019  * (specialized for endtag parsing)
9020  *
9021  * Returns NULL for an illegal name, (xmlChar*) 1 for success
9022  * and the name for mismatch
9023  */
9024 
9025 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)9026 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
9027                         xmlChar const *prefix) {
9028     const xmlChar *cmp;
9029     const xmlChar *in;
9030     const xmlChar *ret;
9031     const xmlChar *prefix2;
9032 
9033     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
9034 
9035     GROW;
9036     in = ctxt->input->cur;
9037 
9038     cmp = prefix;
9039     while (*in != 0 && *in == *cmp) {
9040 	++in;
9041 	++cmp;
9042     }
9043     if ((*cmp == 0) && (*in == ':')) {
9044         in++;
9045 	cmp = name;
9046 	while (*in != 0 && *in == *cmp) {
9047 	    ++in;
9048 	    ++cmp;
9049 	}
9050 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
9051 	    /* success */
9052             ctxt->input->col += in - ctxt->input->cur;
9053 	    ctxt->input->cur = in;
9054 	    return((const xmlChar*) 1);
9055 	}
9056     }
9057     /*
9058      * all strings coms from the dictionary, equality can be done directly
9059      */
9060     ret = xmlParseQName (ctxt, &prefix2);
9061     if (ret == NULL)
9062         return(NULL);
9063     if ((ret == name) && (prefix == prefix2))
9064 	return((const xmlChar*) 1);
9065     return ret;
9066 }
9067 
9068 /**
9069  * xmlParseAttValueInternal:
9070  * @ctxt:  an XML parser context
9071  * @len:  attribute len result
9072  * @alloc:  whether the attribute was reallocated as a new string
9073  * @normalize:  if 1 then further non-CDATA normalization must be done
9074  *
9075  * parse a value for an attribute.
9076  * NOTE: if no normalization is needed, the routine will return pointers
9077  *       directly from the data buffer.
9078  *
9079  * 3.3.3 Attribute-Value Normalization:
9080  * Before the value of an attribute is passed to the application or
9081  * checked for validity, the XML processor must normalize it as follows:
9082  * - a character reference is processed by appending the referenced
9083  *   character to the attribute value
9084  * - an entity reference is processed by recursively processing the
9085  *   replacement text of the entity
9086  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9087  *   appending #x20 to the normalized value, except that only a single
9088  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9089  *   parsed entity or the literal entity value of an internal parsed entity
9090  * - other characters are processed by appending them to the normalized value
9091  * If the declared value is not CDATA, then the XML processor must further
9092  * process the normalized attribute value by discarding any leading and
9093  * trailing space (#x20) characters, and by replacing sequences of space
9094  * (#x20) characters by a single space (#x20) character.
9095  * All attributes for which no declaration has been read should be treated
9096  * by a non-validating parser as if declared CDATA.
9097  *
9098  * Returns the AttValue parsed or NULL. The value has to be freed by the
9099  *     caller if it was copied, this can be detected by val[*len] == 0.
9100  */
9101 
9102 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9103     const xmlChar *oldbase = ctxt->input->base;\
9104     GROW;\
9105     if (ctxt->instate == XML_PARSER_EOF)\
9106         return(NULL);\
9107     if (oldbase != ctxt->input->base) {\
9108         ptrdiff_t delta = ctxt->input->base - oldbase;\
9109         start = start + delta;\
9110         in = in + delta;\
9111     }\
9112     end = ctxt->input->end;
9113 
9114 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)9115 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9116                          int normalize)
9117 {
9118     xmlChar limit = 0;
9119     const xmlChar *in = NULL, *start, *end, *last;
9120     xmlChar *ret = NULL;
9121     int line, col;
9122     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9123                     XML_MAX_HUGE_LENGTH :
9124                     XML_MAX_TEXT_LENGTH;
9125 
9126     GROW;
9127     in = (xmlChar *) CUR_PTR;
9128     line = ctxt->input->line;
9129     col = ctxt->input->col;
9130     if (*in != '"' && *in != '\'') {
9131         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9132         return (NULL);
9133     }
9134     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9135 
9136     /*
9137      * try to handle in this routine the most common case where no
9138      * allocation of a new string is required and where content is
9139      * pure ASCII.
9140      */
9141     limit = *in++;
9142     col++;
9143     end = ctxt->input->end;
9144     start = in;
9145     if (in >= end) {
9146         GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147     }
9148     if (normalize) {
9149         /*
9150 	 * Skip any leading spaces
9151 	 */
9152 	while ((in < end) && (*in != limit) &&
9153 	       ((*in == 0x20) || (*in == 0x9) ||
9154 	        (*in == 0xA) || (*in == 0xD))) {
9155 	    if (*in == 0xA) {
9156 	        line++; col = 1;
9157 	    } else {
9158 	        col++;
9159 	    }
9160 	    in++;
9161 	    start = in;
9162 	    if (in >= end) {
9163                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9164                 if ((in - start) > maxLength) {
9165                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9166                                    "AttValue length too long\n");
9167                     return(NULL);
9168                 }
9169 	    }
9170 	}
9171 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9172 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9173 	    col++;
9174 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
9175 	    if (in >= end) {
9176                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9177                 if ((in - start) > maxLength) {
9178                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9179                                    "AttValue length too long\n");
9180                     return(NULL);
9181                 }
9182 	    }
9183 	}
9184 	last = in;
9185 	/*
9186 	 * skip the trailing blanks
9187 	 */
9188 	while ((last[-1] == 0x20) && (last > start)) last--;
9189 	while ((in < end) && (*in != limit) &&
9190 	       ((*in == 0x20) || (*in == 0x9) ||
9191 	        (*in == 0xA) || (*in == 0xD))) {
9192 	    if (*in == 0xA) {
9193 	        line++, col = 1;
9194 	    } else {
9195 	        col++;
9196 	    }
9197 	    in++;
9198 	    if (in >= end) {
9199 		const xmlChar *oldbase = ctxt->input->base;
9200 		GROW;
9201                 if (ctxt->instate == XML_PARSER_EOF)
9202                     return(NULL);
9203 		if (oldbase != ctxt->input->base) {
9204 		    ptrdiff_t delta = ctxt->input->base - oldbase;
9205 		    start = start + delta;
9206 		    in = in + delta;
9207 		    last = last + delta;
9208 		}
9209 		end = ctxt->input->end;
9210                 if ((in - start) > maxLength) {
9211                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9212                                    "AttValue length too long\n");
9213                     return(NULL);
9214                 }
9215 	    }
9216 	}
9217         if ((in - start) > maxLength) {
9218             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9219                            "AttValue length too long\n");
9220             return(NULL);
9221         }
9222 	if (*in != limit) goto need_complex;
9223     } else {
9224 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9225 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9226 	    in++;
9227 	    col++;
9228 	    if (in >= end) {
9229                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9230                 if ((in - start) > maxLength) {
9231                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9232                                    "AttValue length too long\n");
9233                     return(NULL);
9234                 }
9235 	    }
9236 	}
9237 	last = in;
9238         if ((in - start) > maxLength) {
9239             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9240                            "AttValue length too long\n");
9241             return(NULL);
9242         }
9243 	if (*in != limit) goto need_complex;
9244     }
9245     in++;
9246     col++;
9247     if (len != NULL) {
9248         if (alloc) *alloc = 0;
9249         *len = last - start;
9250         ret = (xmlChar *) start;
9251     } else {
9252         if (alloc) *alloc = 1;
9253         ret = xmlStrndup(start, last - start);
9254     }
9255     CUR_PTR = in;
9256     ctxt->input->line = line;
9257     ctxt->input->col = col;
9258     return ret;
9259 need_complex:
9260     if (alloc) *alloc = 1;
9261     return xmlParseAttValueComplex(ctxt, len, normalize);
9262 }
9263 
9264 /**
9265  * xmlParseAttribute2:
9266  * @ctxt:  an XML parser context
9267  * @pref:  the element prefix
9268  * @elem:  the element name
9269  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9270  * @value:  a xmlChar ** used to store the value of the attribute
9271  * @len:  an int * to save the length of the attribute
9272  * @alloc:  an int * to indicate if the attribute was allocated
9273  *
9274  * parse an attribute in the new SAX2 framework.
9275  *
9276  * Returns the attribute name, and the value in *value, .
9277  */
9278 
9279 static xmlHashedString
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,xmlHashedString * hprefix,xmlChar ** value,int * len,int * alloc)9280 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9281                    const xmlChar * pref, const xmlChar * elem,
9282                    xmlHashedString * hprefix, xmlChar ** value,
9283                    int *len, int *alloc)
9284 {
9285     xmlHashedString hname;
9286     const xmlChar *prefix, *name;
9287     xmlChar *val, *internal_val = NULL;
9288     int normalize = 0;
9289 
9290     *value = NULL;
9291     GROW;
9292     hname = xmlParseQNameHashed(ctxt, hprefix);
9293     if (hname.name == NULL) {
9294         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9295                        "error parsing attribute name\n");
9296         return(hname);
9297     }
9298     name = hname.name;
9299     if (hprefix->name != NULL)
9300         prefix = hprefix->name;
9301     else
9302         prefix = NULL;
9303 
9304     /*
9305      * get the type if needed
9306      */
9307     if (ctxt->attsSpecial != NULL) {
9308         int type;
9309 
9310         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9311                                                  pref, elem,
9312                                                  prefix, name);
9313         if (type != 0)
9314             normalize = 1;
9315     }
9316 
9317     /*
9318      * read the value
9319      */
9320     SKIP_BLANKS;
9321     if (RAW == '=') {
9322         NEXT;
9323         SKIP_BLANKS;
9324         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9325         if (val == NULL) {
9326             hname.name = NULL;
9327             return(hname);
9328         }
9329 	if (normalize) {
9330 	    /*
9331 	     * Sometimes a second normalisation pass for spaces is needed
9332 	     * but that only happens if charrefs or entities references
9333 	     * have been used in the attribute value, i.e. the attribute
9334 	     * value have been extracted in an allocated string already.
9335 	     */
9336 	    if (*alloc) {
9337 	        const xmlChar *val2;
9338 
9339 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9340 		if ((val2 != NULL) && (val2 != val)) {
9341 		    xmlFree(val);
9342 		    val = (xmlChar *) val2;
9343 		}
9344 	    }
9345 	}
9346         ctxt->instate = XML_PARSER_CONTENT;
9347     } else {
9348         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9349                           "Specification mandates value for attribute %s\n",
9350                           name);
9351         return(hname);
9352     }
9353 
9354     if (prefix == ctxt->str_xml) {
9355         /*
9356          * Check that xml:lang conforms to the specification
9357          * No more registered as an error, just generate a warning now
9358          * since this was deprecated in XML second edition
9359          */
9360         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9361             internal_val = xmlStrndup(val, *len);
9362             if (!xmlCheckLanguageID(internal_val)) {
9363                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9364                               "Malformed value for xml:lang : %s\n",
9365                               internal_val, NULL);
9366             }
9367         }
9368 
9369         /*
9370          * Check that xml:space conforms to the specification
9371          */
9372         if (xmlStrEqual(name, BAD_CAST "space")) {
9373             internal_val = xmlStrndup(val, *len);
9374             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9375                 *(ctxt->space) = 0;
9376             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9377                 *(ctxt->space) = 1;
9378             else {
9379                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9380                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9381                               internal_val, NULL);
9382             }
9383         }
9384         if (internal_val) {
9385             xmlFree(internal_val);
9386         }
9387     }
9388 
9389     *value = val;
9390     return (hname);
9391 }
9392 
9393 ATTRIBUTE_NO_SANITIZE_INTEGER
9394 static unsigned
xmlCombineHash(unsigned v1,unsigned v2)9395 xmlCombineHash(unsigned v1, unsigned v2) {
9396     return(HASH_ROL(v1, 15) ^ v2);
9397 }
9398 
9399 /**
9400  * xmlAttrHashInsert:
9401  * @ctxt: parser context
9402  * @aindex: attribute index (this is a multiple of 5)
9403  * @sizePtr: size of the hash table (input/output value)
9404  * @name: attribute name
9405  * @uri: namespace uri
9406  * @hashValue: combined hash value of name and uri
9407  *
9408  * Inserts a new attribute into the hash table.
9409  *
9410  * Returns INT_MAX if no existing attribute was found, the attribute
9411  * index if an attribute was found, -1 if a memory allocation failed.
9412  */
9413 static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt,int aindex,unsigned * sizePtr,const xmlChar * name,const xmlChar * uri,unsigned hashValue)9414 xmlAttrHashInsert(xmlParserCtxtPtr ctxt, int aindex, unsigned *sizePtr,
9415                   const xmlChar *name, const xmlChar *uri,
9416                   unsigned hashValue) {
9417     xmlAttrHashBucket *table = ctxt->attrHash;
9418     xmlAttrHashBucket *bucket;
9419     unsigned hindex;
9420     unsigned size = *sizePtr;
9421 
9422     if (size > 0) {
9423         hindex = hashValue & (size - 1);
9424         bucket = &table[hindex];
9425 
9426         while (bucket->hashValue != 0) {
9427             const xmlChar **atts = &ctxt->atts[bucket->index];
9428 
9429             if (name == atts[0]) {
9430                 int nsIndex = (int) (ptrdiff_t) atts[2];
9431 
9432                 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
9433                     (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
9434                     (uri == ctxt->nsTab[nsIndex * 2 + 1]))
9435                     return(bucket->index);
9436             }
9437 
9438             hindex++;
9439             bucket++;
9440             if (hindex >= size) {
9441                 hindex = 0;
9442                 bucket = table;
9443             }
9444         }
9445     }
9446 
9447     /*
9448      * Grow hash table
9449      */
9450     if ((unsigned) aindex / 5 >= size / 2) {
9451         xmlAttrHashBucket *newTable;
9452         unsigned newSize, i, nindex;
9453 
9454         newSize = size ? size * 2 : 8;
9455 
9456         if (newSize > ctxt->attrHashMax) {
9457             newTable = xmlRealloc(table, newSize * sizeof(newTable[0]));
9458             if (newTable == NULL) {
9459                 xmlErrMemory(ctxt, NULL);
9460                 return(-1);
9461             }
9462 
9463             table = newTable;
9464             ctxt->attrHash = newTable;
9465             ctxt->attrHashMax = newSize;
9466         }
9467 
9468         memset(&table[size], 0, (newSize - size) * sizeof(table[0]));
9469 
9470         if (size > 0) {
9471             /*
9472              * We must search for the start of a probe sequence to make
9473              * in-place operation work.
9474              */
9475             hindex = 0;
9476             bucket = table;
9477             while (bucket->hashValue != 0) {
9478                 hindex++;
9479                 bucket++;
9480             }
9481 
9482             for (i = 0; i < size; i++) {
9483                 if (bucket->hashValue != 0) {
9484                     nindex = bucket->hashValue & (newSize - 1);
9485 
9486                     while (nindex != hindex) {
9487                         if (table[nindex].hashValue == 0) {
9488                             table[nindex] = *bucket;
9489                             bucket->hashValue = 0;
9490                             break;
9491                         }
9492 
9493                         nindex++;
9494                         if (nindex >= newSize)
9495                             nindex = 0;
9496                     }
9497                 }
9498 
9499                 hindex++;
9500                 bucket++;
9501                 if (hindex >= size) {
9502                     hindex = 0;
9503                     bucket = table;
9504                 }
9505             }
9506         }
9507 
9508         size = newSize;
9509         *sizePtr = newSize;
9510 
9511         /*
9512          * Relookup
9513          */
9514         hindex = hashValue & (size - 1);
9515         bucket = &table[hindex];
9516 
9517         while (bucket->hashValue != 0) {
9518             hindex++;
9519             bucket++;
9520             if (hindex >= size) {
9521                 hindex = 0;
9522                 bucket = table;
9523             }
9524         }
9525     }
9526 
9527     bucket->hashValue = hashValue;
9528     bucket->index = aindex;
9529 
9530     return(INT_MAX);
9531 }
9532 
9533 /**
9534  * xmlParseStartTag2:
9535  * @ctxt:  an XML parser context
9536  *
9537  * Parse a start tag. Always consumes '<'.
9538  *
9539  * This routine is called when running SAX2 parsing
9540  *
9541  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9542  *
9543  * [ WFC: Unique Att Spec ]
9544  * No attribute name may appear more than once in the same start-tag or
9545  * empty-element tag.
9546  *
9547  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9548  *
9549  * [ WFC: Unique Att Spec ]
9550  * No attribute name may appear more than once in the same start-tag or
9551  * empty-element tag.
9552  *
9553  * With namespace:
9554  *
9555  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9556  *
9557  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9558  *
9559  * Returns the element name parsed
9560  */
9561 
9562 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * nbNsPtr)9563 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9564                   const xmlChar **URI, int *nbNsPtr) {
9565     xmlHashedString hlocalname;
9566     xmlHashedString hprefix;
9567     xmlHashedString hattname;
9568     xmlHashedString haprefix;
9569     const xmlChar *localname;
9570     const xmlChar *prefix;
9571     const xmlChar *attname;
9572     const xmlChar *aprefix;
9573     const xmlChar *uri;
9574     xmlChar *attvalue = NULL;
9575     const xmlChar **atts = ctxt->atts;
9576     unsigned attrHashSize = 0;
9577     int maxatts = ctxt->maxatts;
9578     int nratts, nbatts, nbdef, inputid;
9579     int i, j, nbNs, attval, nsIndex;
9580     int alloc = 0;
9581 
9582     if (RAW != '<') return(NULL);
9583     NEXT1;
9584 
9585     inputid = ctxt->input->id;
9586     nbatts = 0;
9587     nratts = 0;
9588     nbdef = 0;
9589     nbNs = 0;
9590     attval = 0;
9591 
9592     if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9593         xmlErrMemory(ctxt, NULL);
9594         return(NULL);
9595     }
9596 
9597     hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9598     if (hlocalname.name == NULL) {
9599 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9600 		       "StartTag: invalid element name\n");
9601         return(NULL);
9602     }
9603     localname = hlocalname.name;
9604     prefix = hprefix.name;
9605 
9606     /*
9607      * Now parse the attributes, it ends up with the ending
9608      *
9609      * (S Attribute)* S?
9610      */
9611     SKIP_BLANKS;
9612     GROW;
9613 
9614     /*
9615      * The ctxt->atts array will be ultimately passed to the SAX callback
9616      * containing five xmlChar pointers for each attribute:
9617      *
9618      * [0] attribute name
9619      * [1] attribute prefix
9620      * [2] namespace URI
9621      * [3] attribute value
9622      * [4] end of attribute value
9623      *
9624      * To save memory, we reuse this array temporarily and store integers
9625      * in these pointer variables.
9626      *
9627      * [0] attribute name
9628      * [1] attribute prefix
9629      * [2] hash value of attribute prefix, and later namespace index
9630      * [3] for non-allocated values: ptrdiff_t offset into input buffer
9631      * [4] for non-allocated values: ptrdiff_t offset into input buffer
9632      *
9633      * The ctxt->attallocs array contains an additional unsigned int for
9634      * each attribute, containing the hash value of the attribute name
9635      * and the alloc flag in bit 31.
9636      */
9637 
9638     while (((RAW != '>') &&
9639 	   ((RAW != '/') || (NXT(1) != '>')) &&
9640 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9641 	int len = -1;
9642 
9643 	hattname = xmlParseAttribute2(ctxt, prefix, localname,
9644                                           &haprefix, &attvalue, &len,
9645                                           &alloc);
9646         if (hattname.name == NULL) {
9647 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9648 	         "xmlParseStartTag: problem parsing attributes\n");
9649 	    break;
9650 	}
9651         if (attvalue == NULL)
9652             goto next_attr;
9653         attname = hattname.name;
9654         aprefix = haprefix.name;
9655 	if (len < 0) len = xmlStrlen(attvalue);
9656 
9657         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9658             xmlHashedString huri;
9659             xmlURIPtr parsedUri;
9660 
9661             huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9662             uri = huri.name;
9663             if (uri == NULL) {
9664                 xmlErrMemory(ctxt, NULL);
9665                 goto next_attr;
9666             }
9667             if (*uri != 0) {
9668                 parsedUri = xmlParseURI((const char *) uri);
9669                 if (parsedUri == NULL) {
9670                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9671                              "xmlns: '%s' is not a valid URI\n",
9672                                        uri, NULL, NULL);
9673                 } else {
9674                     if (parsedUri->scheme == NULL) {
9675                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9676                                   "xmlns: URI %s is not absolute\n",
9677                                   uri, NULL, NULL);
9678                     }
9679                     xmlFreeURI(parsedUri);
9680                 }
9681                 if (uri == ctxt->str_xml_ns) {
9682                     if (attname != ctxt->str_xml) {
9683                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9684                      "xml namespace URI cannot be the default namespace\n",
9685                                  NULL, NULL, NULL);
9686                     }
9687                     goto next_attr;
9688                 }
9689                 if ((len == 29) &&
9690                     (xmlStrEqual(uri,
9691                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9692                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9693                          "reuse of the xmlns namespace name is forbidden\n",
9694                              NULL, NULL, NULL);
9695                     goto next_attr;
9696                 }
9697             }
9698 
9699             if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9700                 nbNs++;
9701         } else if (aprefix == ctxt->str_xmlns) {
9702             xmlHashedString huri;
9703             xmlURIPtr parsedUri;
9704 
9705             huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9706             uri = huri.name;
9707             if (uri == NULL) {
9708                 xmlErrMemory(ctxt, NULL);
9709                 goto next_attr;
9710             }
9711 
9712             if (attname == ctxt->str_xml) {
9713                 if (uri != ctxt->str_xml_ns) {
9714                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9715                              "xml namespace prefix mapped to wrong URI\n",
9716                              NULL, NULL, NULL);
9717                 }
9718                 /*
9719                  * Do not keep a namespace definition node
9720                  */
9721                 goto next_attr;
9722             }
9723             if (uri == ctxt->str_xml_ns) {
9724                 if (attname != ctxt->str_xml) {
9725                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9726                              "xml namespace URI mapped to wrong prefix\n",
9727                              NULL, NULL, NULL);
9728                 }
9729                 goto next_attr;
9730             }
9731             if (attname == ctxt->str_xmlns) {
9732                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9733                          "redefinition of the xmlns prefix is forbidden\n",
9734                          NULL, NULL, NULL);
9735                 goto next_attr;
9736             }
9737             if ((len == 29) &&
9738                 (xmlStrEqual(uri,
9739                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9740                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9741                          "reuse of the xmlns namespace name is forbidden\n",
9742                          NULL, NULL, NULL);
9743                 goto next_attr;
9744             }
9745             if ((uri == NULL) || (uri[0] == 0)) {
9746                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9747                          "xmlns:%s: Empty XML namespace is not allowed\n",
9748                               attname, NULL, NULL);
9749                 goto next_attr;
9750             } else {
9751                 parsedUri = xmlParseURI((const char *) uri);
9752                 if (parsedUri == NULL) {
9753                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9754                          "xmlns:%s: '%s' is not a valid URI\n",
9755                                        attname, uri, NULL);
9756                 } else {
9757                     if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9758                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9759                                   "xmlns:%s: URI %s is not absolute\n",
9760                                   attname, uri, NULL);
9761                     }
9762                     xmlFreeURI(parsedUri);
9763                 }
9764             }
9765 
9766             if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9767                 nbNs++;
9768         } else {
9769             /*
9770              * Populate attributes array, see above for repurposing
9771              * of xmlChar pointers.
9772              */
9773             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9774                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9775                     goto next_attr;
9776                 }
9777                 maxatts = ctxt->maxatts;
9778                 atts = ctxt->atts;
9779             }
9780             ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9781                                         ((unsigned) alloc << 31);
9782             atts[nbatts++] = attname;
9783             atts[nbatts++] = aprefix;
9784             atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9785             if (alloc) {
9786                 atts[nbatts++] = attvalue;
9787                 attvalue += len;
9788                 atts[nbatts++] = attvalue;
9789             } else {
9790                 /*
9791                  * attvalue points into the input buffer which can be
9792                  * reallocated. Store differences to input->base instead.
9793                  * The pointers will be reconstructed later.
9794                  */
9795                 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9796                 attvalue += len;
9797                 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9798             }
9799             /*
9800              * tag if some deallocation is needed
9801              */
9802             if (alloc != 0) attval = 1;
9803             attvalue = NULL; /* moved into atts */
9804         }
9805 
9806 next_attr:
9807         if ((attvalue != NULL) && (alloc != 0)) {
9808             xmlFree(attvalue);
9809             attvalue = NULL;
9810         }
9811 
9812 	GROW
9813         if (ctxt->instate == XML_PARSER_EOF)
9814             break;
9815 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9816 	    break;
9817 	if (SKIP_BLANKS == 0) {
9818 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9819 			   "attributes construct error\n");
9820 	    break;
9821 	}
9822         GROW;
9823     }
9824 
9825     if (ctxt->input->id != inputid) {
9826         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9827                     "Unexpected change of input\n");
9828         localname = NULL;
9829         goto done;
9830     }
9831 
9832     /*
9833      * Namespaces from default attributes
9834      */
9835     if (ctxt->attsDefault != NULL) {
9836         xmlDefAttrsPtr defaults;
9837 
9838 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9839 	if (defaults != NULL) {
9840 	    for (i = 0; i < defaults->nbAttrs; i++) {
9841                 xmlDefAttr *attr = &defaults->attrs[i];
9842 
9843 	        attname = attr->name.name;
9844 		aprefix = attr->prefix.name;
9845 
9846 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9847                     xmlParserEntityCheck(ctxt, attr->expandedSize);
9848 
9849                     if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9850                         nbNs++;
9851 		} else if (aprefix == ctxt->str_xmlns) {
9852                     xmlParserEntityCheck(ctxt, attr->expandedSize);
9853 
9854                     if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9855                                       NULL, 1) > 0)
9856                         nbNs++;
9857 		}
9858 	    }
9859 	}
9860     }
9861 
9862     /*
9863      * Resolve attribute namespaces
9864      */
9865     for (i = 0; i < nbatts; i += 5) {
9866         attname = atts[i];
9867         aprefix = atts[i+1];
9868 
9869         /*
9870 	* The default namespace does not apply to attribute names.
9871 	*/
9872 	if (aprefix == NULL) {
9873             nsIndex = NS_INDEX_EMPTY;
9874         } else if (aprefix == ctxt->str_xml) {
9875             nsIndex = NS_INDEX_XML;
9876         } else {
9877             haprefix.name = aprefix;
9878             haprefix.hashValue = (size_t) atts[i+2];
9879             nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9880 	    if (nsIndex == INT_MAX) {
9881                 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9882 		    "Namespace prefix %s for %s on %s is not defined\n",
9883 		    aprefix, attname, localname);
9884                 nsIndex = NS_INDEX_EMPTY;
9885             }
9886         }
9887 
9888         atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9889     }
9890 
9891     /*
9892      * Verify that attribute names are unique.
9893      */
9894     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9895         const xmlChar *nsuri;
9896         unsigned hashValue, nameHashValue, uriHashValue;
9897         int res;
9898 
9899         attname = atts[i];
9900         aprefix = atts[i+1];
9901         nsIndex = (ptrdiff_t) atts[i+2];
9902         /* Hash values always have bit 31 set, see dict.c */
9903         nameHashValue = ctxt->attallocs[j] | 0x80000000;
9904 
9905         if (nsIndex == NS_INDEX_EMPTY) {
9906             nsuri = NULL;
9907             uriHashValue = URI_HASH_EMPTY;
9908         } else if (nsIndex == NS_INDEX_XML) {
9909             nsuri = ctxt->str_xml_ns;
9910             uriHashValue = URI_HASH_XML;
9911         } else {
9912             nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9913             uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9914         }
9915 
9916         hashValue = xmlCombineHash(nameHashValue, uriHashValue);
9917         res = xmlAttrHashInsert(ctxt, i, &attrHashSize, attname, nsuri,
9918                                 hashValue);
9919         if (res < 0)
9920             continue;
9921 
9922 	/*
9923 	 * [ WFC: Unique Att Spec ]
9924 	 * No attribute name may appear more than once in the same
9925 	 * start-tag or empty-element tag.
9926 	 * As extended by the Namespace in XML REC.
9927 	 */
9928         if (res < INT_MAX) {
9929             if (aprefix == atts[res+1]) {
9930                 xmlErrAttributeDup(ctxt, aprefix, attname);
9931             } else {
9932                 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9933                          "Namespaced Attribute %s in '%s' redefined\n",
9934                          attname, nsuri, NULL);
9935             }
9936 	}
9937     }
9938 
9939     /*
9940      * Default attributes
9941      */
9942     if (ctxt->attsDefault != NULL) {
9943         xmlDefAttrsPtr defaults;
9944 
9945 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9946 	if (defaults != NULL) {
9947 	    for (i = 0; i < defaults->nbAttrs; i++) {
9948                 xmlDefAttr *attr = &defaults->attrs[i];
9949                 const xmlChar *nsuri;
9950                 unsigned hashValue, uriHashValue;
9951                 int res;
9952 
9953 	        attname = attr->name.name;
9954 		aprefix = attr->prefix.name;
9955 
9956 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9957                     continue;
9958 		if (aprefix == ctxt->str_xmlns)
9959                     continue;
9960 
9961                 if (aprefix == NULL) {
9962                     nsIndex = NS_INDEX_EMPTY;
9963                     nsuri = NULL;
9964                     uriHashValue = URI_HASH_EMPTY;
9965                 } if (aprefix == ctxt->str_xml) {
9966                     nsIndex = NS_INDEX_XML;
9967                     nsuri = ctxt->str_xml_ns;
9968                     uriHashValue = URI_HASH_XML;
9969                 } else if (aprefix != NULL) {
9970                     nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9971                     if (nsIndex == INT_MAX) {
9972                         xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9973                                  "Namespace prefix %s for %s on %s is not "
9974                                  "defined\n",
9975                                  aprefix, attname, localname);
9976                         nsIndex = NS_INDEX_EMPTY;
9977                         nsuri = NULL;
9978                         uriHashValue = URI_HASH_EMPTY;
9979                     } else {
9980                         nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9981                         uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9982                     }
9983                 }
9984 
9985                 /*
9986                  * Check whether the attribute exists
9987                  */
9988                 hashValue = xmlCombineHash(attr->name.hashValue, uriHashValue);
9989                 res = xmlAttrHashInsert(ctxt, nbatts, &attrHashSize, attname,
9990                                         nsuri, hashValue);
9991                 if (res < 0)
9992                     continue;
9993                 if (res < INT_MAX) {
9994                     if (aprefix == atts[res+1])
9995                         continue;
9996                     xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9997                              "Namespaced Attribute %s in '%s' redefined\n",
9998                              attname, nsuri, NULL);
9999                 }
10000 
10001                 xmlParserEntityCheck(ctxt, attr->expandedSize);
10002 
10003                 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
10004                     if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
10005                         localname = NULL;
10006                         goto done;
10007                     }
10008                     maxatts = ctxt->maxatts;
10009                     atts = ctxt->atts;
10010                 }
10011 
10012                 atts[nbatts++] = attname;
10013                 atts[nbatts++] = aprefix;
10014                 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
10015                 atts[nbatts++] = attr->value.name;
10016                 atts[nbatts++] = attr->valueEnd;
10017                 if ((ctxt->standalone == 1) && (attr->external != 0)) {
10018                     xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
10019                             "standalone: attribute %s on %s defaulted "
10020                             "from external subset\n",
10021                             attname, localname);
10022                 }
10023                 nbdef++;
10024 	    }
10025 	}
10026     }
10027 
10028     /*
10029      * Reconstruct attribute pointers
10030      */
10031     for (i = 0, j = 0; i < nbatts; i += 5, j++) {
10032         /* namespace URI */
10033         nsIndex = (ptrdiff_t) atts[i+2];
10034         if (nsIndex == INT_MAX)
10035             atts[i+2] = NULL;
10036         else if (nsIndex == INT_MAX - 1)
10037             atts[i+2] = ctxt->str_xml_ns;
10038         else
10039             atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
10040 
10041         if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
10042             atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
10043             atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
10044         }
10045     }
10046 
10047     uri = xmlParserNsLookupUri(ctxt, &hprefix);
10048     if ((prefix != NULL) && (uri == NULL)) {
10049 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
10050 	         "Namespace prefix %s on %s is not defined\n",
10051 		 prefix, localname, NULL);
10052     }
10053     *pref = prefix;
10054     *URI = uri;
10055 
10056     /*
10057      * SAX callback
10058      */
10059     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
10060 	(!ctxt->disableSAX)) {
10061 	if (nbNs > 0)
10062 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
10063                           nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
10064 			  nbatts / 5, nbdef, atts);
10065 	else
10066 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
10067                           0, NULL, nbatts / 5, nbdef, atts);
10068     }
10069 
10070 done:
10071     /*
10072      * Free allocated attribute values
10073      */
10074     if (attval != 0) {
10075 	for (i = 0, j = 0; j < nratts; i += 5, j++)
10076 	    if (ctxt->attallocs[j] & 0x80000000)
10077 	        xmlFree((xmlChar *) atts[i+3]);
10078     }
10079 
10080     *nbNsPtr = nbNs;
10081     return(localname);
10082 }
10083 
10084 /**
10085  * xmlParseEndTag2:
10086  * @ctxt:  an XML parser context
10087  * @line:  line of the start tag
10088  * @nsNr:  number of namespaces on the start tag
10089  *
10090  * Parse an end tag. Always consumes '</'.
10091  *
10092  * [42] ETag ::= '</' Name S? '>'
10093  *
10094  * With namespace
10095  *
10096  * [NS 9] ETag ::= '</' QName S? '>'
10097  */
10098 
10099 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)10100 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
10101     const xmlChar *name;
10102 
10103     GROW;
10104     if ((RAW != '<') || (NXT(1) != '/')) {
10105 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
10106 	return;
10107     }
10108     SKIP(2);
10109 
10110     if (tag->prefix == NULL)
10111         name = xmlParseNameAndCompare(ctxt, ctxt->name);
10112     else
10113         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
10114 
10115     /*
10116      * We should definitely be at the ending "S? '>'" part
10117      */
10118     GROW;
10119     if (ctxt->instate == XML_PARSER_EOF)
10120         return;
10121     SKIP_BLANKS;
10122     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
10123 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
10124     } else
10125 	NEXT1;
10126 
10127     /*
10128      * [ WFC: Element Type Match ]
10129      * The Name in an element's end-tag must match the element type in the
10130      * start-tag.
10131      *
10132      */
10133     if (name != (xmlChar*)1) {
10134         if (name == NULL) name = BAD_CAST "unparsable";
10135         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
10136 		     "Opening and ending tag mismatch: %s line %d and %s\n",
10137 		                ctxt->name, tag->line, name);
10138     }
10139 
10140     /*
10141      * SAX: End of Tag
10142      */
10143     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10144 	(!ctxt->disableSAX))
10145 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
10146                                 tag->URI);
10147 
10148     spacePop(ctxt);
10149     if (tag->nsNr != 0)
10150 	xmlParserNsPop(ctxt, tag->nsNr);
10151 }
10152 
10153 /**
10154  * xmlParseCDSect:
10155  * @ctxt:  an XML parser context
10156  *
10157  * DEPRECATED: Internal function, don't use.
10158  *
10159  * Parse escaped pure raw content. Always consumes '<!['.
10160  *
10161  * [18] CDSect ::= CDStart CData CDEnd
10162  *
10163  * [19] CDStart ::= '<![CDATA['
10164  *
10165  * [20] Data ::= (Char* - (Char* ']]>' Char*))
10166  *
10167  * [21] CDEnd ::= ']]>'
10168  */
10169 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)10170 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
10171     xmlChar *buf = NULL;
10172     int len = 0;
10173     int size = XML_PARSER_BUFFER_SIZE;
10174     int r, rl;
10175     int	s, sl;
10176     int cur, l;
10177     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10178                     XML_MAX_HUGE_LENGTH :
10179                     XML_MAX_TEXT_LENGTH;
10180 
10181     if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
10182         return;
10183     SKIP(3);
10184 
10185     if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
10186         return;
10187     SKIP(6);
10188 
10189     ctxt->instate = XML_PARSER_CDATA_SECTION;
10190     r = CUR_CHAR(rl);
10191     if (!IS_CHAR(r)) {
10192 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
10193         goto out;
10194     }
10195     NEXTL(rl);
10196     s = CUR_CHAR(sl);
10197     if (!IS_CHAR(s)) {
10198 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
10199         goto out;
10200     }
10201     NEXTL(sl);
10202     cur = CUR_CHAR(l);
10203     buf = (xmlChar *) xmlMallocAtomic(size);
10204     if (buf == NULL) {
10205 	xmlErrMemory(ctxt, NULL);
10206         goto out;
10207     }
10208     while (IS_CHAR(cur) &&
10209            ((r != ']') || (s != ']') || (cur != '>'))) {
10210 	if (len + 5 >= size) {
10211 	    xmlChar *tmp;
10212 
10213 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2);
10214 	    if (tmp == NULL) {
10215 		xmlErrMemory(ctxt, NULL);
10216                 goto out;
10217 	    }
10218 	    buf = tmp;
10219 	    size *= 2;
10220 	}
10221 	COPY_BUF(buf, len, r);
10222         if (len > maxLength) {
10223             xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10224                            "CData section too big found\n");
10225             goto out;
10226         }
10227 	r = s;
10228 	rl = sl;
10229 	s = cur;
10230 	sl = l;
10231 	NEXTL(l);
10232 	cur = CUR_CHAR(l);
10233     }
10234     buf[len] = 0;
10235     if (ctxt->instate == XML_PARSER_EOF) {
10236         xmlFree(buf);
10237         return;
10238     }
10239     if (cur != '>') {
10240 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10241 	                     "CData section not finished\n%.50s\n", buf);
10242         goto out;
10243     }
10244     NEXTL(l);
10245 
10246     /*
10247      * OK the buffer is to be consumed as cdata.
10248      */
10249     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10250 	if (ctxt->sax->cdataBlock != NULL)
10251 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
10252 	else if (ctxt->sax->characters != NULL)
10253 	    ctxt->sax->characters(ctxt->userData, buf, len);
10254     }
10255 
10256 out:
10257     if (ctxt->instate != XML_PARSER_EOF)
10258         ctxt->instate = XML_PARSER_CONTENT;
10259     xmlFree(buf);
10260 }
10261 
10262 /**
10263  * xmlParseContentInternal:
10264  * @ctxt:  an XML parser context
10265  *
10266  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
10267  * unexpected EOF to the caller.
10268  */
10269 
10270 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)10271 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
10272     int nameNr = ctxt->nameNr;
10273 
10274     GROW;
10275     while ((ctxt->input->cur < ctxt->input->end) &&
10276 	   (ctxt->instate != XML_PARSER_EOF)) {
10277 	const xmlChar *cur = ctxt->input->cur;
10278 
10279 	/*
10280 	 * First case : a Processing Instruction.
10281 	 */
10282 	if ((*cur == '<') && (cur[1] == '?')) {
10283 	    xmlParsePI(ctxt);
10284 	}
10285 
10286 	/*
10287 	 * Second case : a CDSection
10288 	 */
10289 	/* 2.6.0 test was *cur not RAW */
10290 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10291 	    xmlParseCDSect(ctxt);
10292 	}
10293 
10294 	/*
10295 	 * Third case :  a comment
10296 	 */
10297 	else if ((*cur == '<') && (NXT(1) == '!') &&
10298 		 (NXT(2) == '-') && (NXT(3) == '-')) {
10299 	    xmlParseComment(ctxt);
10300 	    ctxt->instate = XML_PARSER_CONTENT;
10301 	}
10302 
10303 	/*
10304 	 * Fourth case :  a sub-element.
10305 	 */
10306 	else if (*cur == '<') {
10307             if (NXT(1) == '/') {
10308                 if (ctxt->nameNr <= nameNr)
10309                     break;
10310 	        xmlParseElementEnd(ctxt);
10311             } else {
10312 	        xmlParseElementStart(ctxt);
10313             }
10314 	}
10315 
10316 	/*
10317 	 * Fifth case : a reference. If if has not been resolved,
10318 	 *    parsing returns it's Name, create the node
10319 	 */
10320 
10321 	else if (*cur == '&') {
10322 	    xmlParseReference(ctxt);
10323 	}
10324 
10325 	/*
10326 	 * Last case, text. Note that References are handled directly.
10327 	 */
10328 	else {
10329 	    xmlParseCharDataInternal(ctxt, 0);
10330 	}
10331 
10332 	SHRINK;
10333 	GROW;
10334     }
10335 }
10336 
10337 /**
10338  * xmlParseContent:
10339  * @ctxt:  an XML parser context
10340  *
10341  * Parse a content sequence. Stops at EOF or '</'.
10342  *
10343  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10344  */
10345 
10346 void
xmlParseContent(xmlParserCtxtPtr ctxt)10347 xmlParseContent(xmlParserCtxtPtr ctxt) {
10348     int nameNr = ctxt->nameNr;
10349 
10350     xmlParseContentInternal(ctxt);
10351 
10352     if ((ctxt->instate != XML_PARSER_EOF) &&
10353         (ctxt->errNo == XML_ERR_OK) &&
10354         (ctxt->nameNr > nameNr)) {
10355         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10356         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10357         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10358                 "Premature end of data in tag %s line %d\n",
10359 		name, line, NULL);
10360     }
10361 }
10362 
10363 /**
10364  * xmlParseElement:
10365  * @ctxt:  an XML parser context
10366  *
10367  * DEPRECATED: Internal function, don't use.
10368  *
10369  * parse an XML element
10370  *
10371  * [39] element ::= EmptyElemTag | STag content ETag
10372  *
10373  * [ WFC: Element Type Match ]
10374  * The Name in an element's end-tag must match the element type in the
10375  * start-tag.
10376  *
10377  */
10378 
10379 void
xmlParseElement(xmlParserCtxtPtr ctxt)10380 xmlParseElement(xmlParserCtxtPtr ctxt) {
10381     if (xmlParseElementStart(ctxt) != 0)
10382         return;
10383 
10384     xmlParseContentInternal(ctxt);
10385     if (ctxt->instate == XML_PARSER_EOF)
10386 	return;
10387 
10388     if (ctxt->input->cur >= ctxt->input->end) {
10389         if (ctxt->errNo == XML_ERR_OK) {
10390             const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10391             int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10392             xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10393                     "Premature end of data in tag %s line %d\n",
10394                     name, line, NULL);
10395         }
10396         return;
10397     }
10398 
10399     xmlParseElementEnd(ctxt);
10400 }
10401 
10402 /**
10403  * xmlParseElementStart:
10404  * @ctxt:  an XML parser context
10405  *
10406  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10407  * opening tag was parsed, 1 if an empty element was parsed.
10408  *
10409  * Always consumes '<'.
10410  */
10411 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10412 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10413     const xmlChar *name;
10414     const xmlChar *prefix = NULL;
10415     const xmlChar *URI = NULL;
10416     xmlParserNodeInfo node_info;
10417     int line;
10418     xmlNodePtr cur;
10419     int nbNs = 0;
10420 
10421     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10422         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10423 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10424 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10425 			  xmlParserMaxDepth);
10426 	xmlHaltParser(ctxt);
10427 	return(-1);
10428     }
10429 
10430     /* Capture start position */
10431     if (ctxt->record_info) {
10432         node_info.begin_pos = ctxt->input->consumed +
10433                           (CUR_PTR - ctxt->input->base);
10434 	node_info.begin_line = ctxt->input->line;
10435     }
10436 
10437     if (ctxt->spaceNr == 0)
10438 	spacePush(ctxt, -1);
10439     else if (*ctxt->space == -2)
10440 	spacePush(ctxt, -1);
10441     else
10442 	spacePush(ctxt, *ctxt->space);
10443 
10444     line = ctxt->input->line;
10445 #ifdef LIBXML_SAX1_ENABLED
10446     if (ctxt->sax2)
10447 #endif /* LIBXML_SAX1_ENABLED */
10448         name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10449 #ifdef LIBXML_SAX1_ENABLED
10450     else
10451 	name = xmlParseStartTag(ctxt);
10452 #endif /* LIBXML_SAX1_ENABLED */
10453     if (ctxt->instate == XML_PARSER_EOF)
10454 	return(-1);
10455     if (name == NULL) {
10456 	spacePop(ctxt);
10457         return(-1);
10458     }
10459     nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10460     cur = ctxt->node;
10461 
10462 #ifdef LIBXML_VALID_ENABLED
10463     /*
10464      * [ VC: Root Element Type ]
10465      * The Name in the document type declaration must match the element
10466      * type of the root element.
10467      */
10468     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10469         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10470         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10471 #endif /* LIBXML_VALID_ENABLED */
10472 
10473     /*
10474      * Check for an Empty Element.
10475      */
10476     if ((RAW == '/') && (NXT(1) == '>')) {
10477         SKIP(2);
10478 	if (ctxt->sax2) {
10479 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10480 		(!ctxt->disableSAX))
10481 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10482 #ifdef LIBXML_SAX1_ENABLED
10483 	} else {
10484 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10485 		(!ctxt->disableSAX))
10486 		ctxt->sax->endElement(ctxt->userData, name);
10487 #endif /* LIBXML_SAX1_ENABLED */
10488 	}
10489 	namePop(ctxt);
10490 	spacePop(ctxt);
10491 	if (nbNs > 0)
10492 	    xmlParserNsPop(ctxt, nbNs);
10493 	if (cur != NULL && ctxt->record_info) {
10494             node_info.node = cur;
10495             node_info.end_pos = ctxt->input->consumed +
10496                                 (CUR_PTR - ctxt->input->base);
10497             node_info.end_line = ctxt->input->line;
10498             xmlParserAddNodeInfo(ctxt, &node_info);
10499 	}
10500 	return(1);
10501     }
10502     if (RAW == '>') {
10503         NEXT1;
10504         if (cur != NULL && ctxt->record_info) {
10505             node_info.node = cur;
10506             node_info.end_pos = 0;
10507             node_info.end_line = 0;
10508             xmlParserAddNodeInfo(ctxt, &node_info);
10509         }
10510     } else {
10511         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10512 		     "Couldn't find end of Start Tag %s line %d\n",
10513 		                name, line, NULL);
10514 
10515 	/*
10516 	 * end of parsing of this node.
10517 	 */
10518 	nodePop(ctxt);
10519 	namePop(ctxt);
10520 	spacePop(ctxt);
10521 	if (nbNs > 0)
10522 	    xmlParserNsPop(ctxt, nbNs);
10523 	return(-1);
10524     }
10525 
10526     return(0);
10527 }
10528 
10529 /**
10530  * xmlParseElementEnd:
10531  * @ctxt:  an XML parser context
10532  *
10533  * Parse the end of an XML element. Always consumes '</'.
10534  */
10535 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10536 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10537     xmlNodePtr cur = ctxt->node;
10538 
10539     if (ctxt->nameNr <= 0) {
10540         if ((RAW == '<') && (NXT(1) == '/'))
10541             SKIP(2);
10542         return;
10543     }
10544 
10545     /*
10546      * parse the end of tag: '</' should be here.
10547      */
10548     if (ctxt->sax2) {
10549 	xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10550 	namePop(ctxt);
10551     }
10552 #ifdef LIBXML_SAX1_ENABLED
10553     else
10554 	xmlParseEndTag1(ctxt, 0);
10555 #endif /* LIBXML_SAX1_ENABLED */
10556 
10557     /*
10558      * Capture end position
10559      */
10560     if (cur != NULL && ctxt->record_info) {
10561         xmlParserNodeInfoPtr node_info;
10562 
10563         node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10564         if (node_info != NULL) {
10565             node_info->end_pos = ctxt->input->consumed +
10566                                  (CUR_PTR - ctxt->input->base);
10567             node_info->end_line = ctxt->input->line;
10568         }
10569     }
10570 }
10571 
10572 /**
10573  * xmlParseVersionNum:
10574  * @ctxt:  an XML parser context
10575  *
10576  * DEPRECATED: Internal function, don't use.
10577  *
10578  * parse the XML version value.
10579  *
10580  * [26] VersionNum ::= '1.' [0-9]+
10581  *
10582  * In practice allow [0-9].[0-9]+ at that level
10583  *
10584  * Returns the string giving the XML version number, or NULL
10585  */
10586 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10587 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10588     xmlChar *buf = NULL;
10589     int len = 0;
10590     int size = 10;
10591     xmlChar cur;
10592 
10593     buf = (xmlChar *) xmlMallocAtomic(size);
10594     if (buf == NULL) {
10595 	xmlErrMemory(ctxt, NULL);
10596 	return(NULL);
10597     }
10598     cur = CUR;
10599     if (!((cur >= '0') && (cur <= '9'))) {
10600 	xmlFree(buf);
10601 	return(NULL);
10602     }
10603     buf[len++] = cur;
10604     NEXT;
10605     cur=CUR;
10606     if (cur != '.') {
10607 	xmlFree(buf);
10608 	return(NULL);
10609     }
10610     buf[len++] = cur;
10611     NEXT;
10612     cur=CUR;
10613     while ((cur >= '0') && (cur <= '9')) {
10614 	if (len + 1 >= size) {
10615 	    xmlChar *tmp;
10616 
10617 	    size *= 2;
10618 	    tmp = (xmlChar *) xmlRealloc(buf, size);
10619 	    if (tmp == NULL) {
10620 	        xmlFree(buf);
10621 		xmlErrMemory(ctxt, NULL);
10622 		return(NULL);
10623 	    }
10624 	    buf = tmp;
10625 	}
10626 	buf[len++] = cur;
10627 	NEXT;
10628 	cur=CUR;
10629     }
10630     buf[len] = 0;
10631     return(buf);
10632 }
10633 
10634 /**
10635  * xmlParseVersionInfo:
10636  * @ctxt:  an XML parser context
10637  *
10638  * DEPRECATED: Internal function, don't use.
10639  *
10640  * parse the XML version.
10641  *
10642  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10643  *
10644  * [25] Eq ::= S? '=' S?
10645  *
10646  * Returns the version string, e.g. "1.0"
10647  */
10648 
10649 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10650 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10651     xmlChar *version = NULL;
10652 
10653     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10654 	SKIP(7);
10655 	SKIP_BLANKS;
10656 	if (RAW != '=') {
10657 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10658 	    return(NULL);
10659         }
10660 	NEXT;
10661 	SKIP_BLANKS;
10662 	if (RAW == '"') {
10663 	    NEXT;
10664 	    version = xmlParseVersionNum(ctxt);
10665 	    if (RAW != '"') {
10666 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10667 	    } else
10668 	        NEXT;
10669 	} else if (RAW == '\''){
10670 	    NEXT;
10671 	    version = xmlParseVersionNum(ctxt);
10672 	    if (RAW != '\'') {
10673 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10674 	    } else
10675 	        NEXT;
10676 	} else {
10677 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10678 	}
10679     }
10680     return(version);
10681 }
10682 
10683 /**
10684  * xmlParseEncName:
10685  * @ctxt:  an XML parser context
10686  *
10687  * DEPRECATED: Internal function, don't use.
10688  *
10689  * parse the XML encoding name
10690  *
10691  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10692  *
10693  * Returns the encoding name value or NULL
10694  */
10695 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10696 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10697     xmlChar *buf = NULL;
10698     int len = 0;
10699     int size = 10;
10700     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10701                     XML_MAX_TEXT_LENGTH :
10702                     XML_MAX_NAME_LENGTH;
10703     xmlChar cur;
10704 
10705     cur = CUR;
10706     if (((cur >= 'a') && (cur <= 'z')) ||
10707         ((cur >= 'A') && (cur <= 'Z'))) {
10708 	buf = (xmlChar *) xmlMallocAtomic(size);
10709 	if (buf == NULL) {
10710 	    xmlErrMemory(ctxt, NULL);
10711 	    return(NULL);
10712 	}
10713 
10714 	buf[len++] = cur;
10715 	NEXT;
10716 	cur = CUR;
10717 	while (((cur >= 'a') && (cur <= 'z')) ||
10718 	       ((cur >= 'A') && (cur <= 'Z')) ||
10719 	       ((cur >= '0') && (cur <= '9')) ||
10720 	       (cur == '.') || (cur == '_') ||
10721 	       (cur == '-')) {
10722 	    if (len + 1 >= size) {
10723 	        xmlChar *tmp;
10724 
10725 		size *= 2;
10726 		tmp = (xmlChar *) xmlRealloc(buf, size);
10727 		if (tmp == NULL) {
10728 		    xmlErrMemory(ctxt, NULL);
10729 		    xmlFree(buf);
10730 		    return(NULL);
10731 		}
10732 		buf = tmp;
10733 	    }
10734 	    buf[len++] = cur;
10735             if (len > maxLength) {
10736                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10737                 xmlFree(buf);
10738                 return(NULL);
10739             }
10740 	    NEXT;
10741 	    cur = CUR;
10742         }
10743 	buf[len] = 0;
10744     } else {
10745 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10746     }
10747     return(buf);
10748 }
10749 
10750 /**
10751  * xmlParseEncodingDecl:
10752  * @ctxt:  an XML parser context
10753  *
10754  * DEPRECATED: Internal function, don't use.
10755  *
10756  * parse the XML encoding declaration
10757  *
10758  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10759  *
10760  * this setups the conversion filters.
10761  *
10762  * Returns the encoding value or NULL
10763  */
10764 
10765 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10766 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10767     xmlChar *encoding = NULL;
10768 
10769     SKIP_BLANKS;
10770     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10771         return(NULL);
10772 
10773     SKIP(8);
10774     SKIP_BLANKS;
10775     if (RAW != '=') {
10776         xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10777         return(NULL);
10778     }
10779     NEXT;
10780     SKIP_BLANKS;
10781     if (RAW == '"') {
10782         NEXT;
10783         encoding = xmlParseEncName(ctxt);
10784         if (RAW != '"') {
10785             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10786             xmlFree((xmlChar *) encoding);
10787             return(NULL);
10788         } else
10789             NEXT;
10790     } else if (RAW == '\''){
10791         NEXT;
10792         encoding = xmlParseEncName(ctxt);
10793         if (RAW != '\'') {
10794             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10795             xmlFree((xmlChar *) encoding);
10796             return(NULL);
10797         } else
10798             NEXT;
10799     } else {
10800         xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10801     }
10802 
10803     if (encoding == NULL)
10804         return(NULL);
10805 
10806     xmlSetDeclaredEncoding(ctxt, encoding);
10807 
10808     return(ctxt->encoding);
10809 }
10810 
10811 /**
10812  * xmlParseSDDecl:
10813  * @ctxt:  an XML parser context
10814  *
10815  * DEPRECATED: Internal function, don't use.
10816  *
10817  * parse the XML standalone declaration
10818  *
10819  * [32] SDDecl ::= S 'standalone' Eq
10820  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10821  *
10822  * [ VC: Standalone Document Declaration ]
10823  * TODO The standalone document declaration must have the value "no"
10824  * if any external markup declarations contain declarations of:
10825  *  - attributes with default values, if elements to which these
10826  *    attributes apply appear in the document without specifications
10827  *    of values for these attributes, or
10828  *  - entities (other than amp, lt, gt, apos, quot), if references
10829  *    to those entities appear in the document, or
10830  *  - attributes with values subject to normalization, where the
10831  *    attribute appears in the document with a value which will change
10832  *    as a result of normalization, or
10833  *  - element types with element content, if white space occurs directly
10834  *    within any instance of those types.
10835  *
10836  * Returns:
10837  *   1 if standalone="yes"
10838  *   0 if standalone="no"
10839  *  -2 if standalone attribute is missing or invalid
10840  *	  (A standalone value of -2 means that the XML declaration was found,
10841  *	   but no value was specified for the standalone attribute).
10842  */
10843 
10844 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10845 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10846     int standalone = -2;
10847 
10848     SKIP_BLANKS;
10849     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10850 	SKIP(10);
10851         SKIP_BLANKS;
10852 	if (RAW != '=') {
10853 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10854 	    return(standalone);
10855         }
10856 	NEXT;
10857 	SKIP_BLANKS;
10858         if (RAW == '\''){
10859 	    NEXT;
10860 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10861 	        standalone = 0;
10862                 SKIP(2);
10863 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10864 	               (NXT(2) == 's')) {
10865 	        standalone = 1;
10866 		SKIP(3);
10867             } else {
10868 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10869 	    }
10870 	    if (RAW != '\'') {
10871 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10872 	    } else
10873 	        NEXT;
10874 	} else if (RAW == '"'){
10875 	    NEXT;
10876 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10877 	        standalone = 0;
10878 		SKIP(2);
10879 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10880 	               (NXT(2) == 's')) {
10881 	        standalone = 1;
10882                 SKIP(3);
10883             } else {
10884 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10885 	    }
10886 	    if (RAW != '"') {
10887 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10888 	    } else
10889 	        NEXT;
10890 	} else {
10891 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10892         }
10893     }
10894     return(standalone);
10895 }
10896 
10897 /**
10898  * xmlParseXMLDecl:
10899  * @ctxt:  an XML parser context
10900  *
10901  * DEPRECATED: Internal function, don't use.
10902  *
10903  * parse an XML declaration header
10904  *
10905  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10906  */
10907 
10908 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10909 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10910     xmlChar *version;
10911 
10912     /*
10913      * This value for standalone indicates that the document has an
10914      * XML declaration but it does not have a standalone attribute.
10915      * It will be overwritten later if a standalone attribute is found.
10916      */
10917 
10918     ctxt->standalone = -2;
10919 
10920     /*
10921      * We know that '<?xml' is here.
10922      */
10923     SKIP(5);
10924 
10925     if (!IS_BLANK_CH(RAW)) {
10926 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10927 	               "Blank needed after '<?xml'\n");
10928     }
10929     SKIP_BLANKS;
10930 
10931     /*
10932      * We must have the VersionInfo here.
10933      */
10934     version = xmlParseVersionInfo(ctxt);
10935     if (version == NULL) {
10936 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10937     } else {
10938 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10939 	    /*
10940 	     * Changed here for XML-1.0 5th edition
10941 	     */
10942 	    if (ctxt->options & XML_PARSE_OLD10) {
10943 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10944 			          "Unsupported version '%s'\n",
10945 			          version);
10946 	    } else {
10947 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10948 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10949 		                  "Unsupported version '%s'\n",
10950 				  version, NULL);
10951 		} else {
10952 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10953 				      "Unsupported version '%s'\n",
10954 				      version);
10955 		}
10956 	    }
10957 	}
10958 	if (ctxt->version != NULL)
10959 	    xmlFree((void *) ctxt->version);
10960 	ctxt->version = version;
10961     }
10962 
10963     /*
10964      * We may have the encoding declaration
10965      */
10966     if (!IS_BLANK_CH(RAW)) {
10967         if ((RAW == '?') && (NXT(1) == '>')) {
10968 	    SKIP(2);
10969 	    return;
10970 	}
10971 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10972     }
10973     xmlParseEncodingDecl(ctxt);
10974     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10975          (ctxt->instate == XML_PARSER_EOF)) {
10976 	/*
10977 	 * The XML REC instructs us to stop parsing right here
10978 	 */
10979         return;
10980     }
10981 
10982     /*
10983      * We may have the standalone status.
10984      */
10985     if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10986         if ((RAW == '?') && (NXT(1) == '>')) {
10987 	    SKIP(2);
10988 	    return;
10989 	}
10990 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10991     }
10992 
10993     /*
10994      * We can grow the input buffer freely at that point
10995      */
10996     GROW;
10997 
10998     SKIP_BLANKS;
10999     ctxt->standalone = xmlParseSDDecl(ctxt);
11000 
11001     SKIP_BLANKS;
11002     if ((RAW == '?') && (NXT(1) == '>')) {
11003         SKIP(2);
11004     } else if (RAW == '>') {
11005         /* Deprecated old WD ... */
11006 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
11007 	NEXT;
11008     } else {
11009         int c;
11010 
11011 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
11012         while ((c = CUR) != 0) {
11013             NEXT;
11014             if (c == '>')
11015                 break;
11016         }
11017     }
11018 }
11019 
11020 /**
11021  * xmlParseMisc:
11022  * @ctxt:  an XML parser context
11023  *
11024  * DEPRECATED: Internal function, don't use.
11025  *
11026  * parse an XML Misc* optional field.
11027  *
11028  * [27] Misc ::= Comment | PI |  S
11029  */
11030 
11031 void
xmlParseMisc(xmlParserCtxtPtr ctxt)11032 xmlParseMisc(xmlParserCtxtPtr ctxt) {
11033     while (ctxt->instate != XML_PARSER_EOF) {
11034         SKIP_BLANKS;
11035         GROW;
11036         if ((RAW == '<') && (NXT(1) == '?')) {
11037 	    xmlParsePI(ctxt);
11038         } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
11039 	    xmlParseComment(ctxt);
11040         } else {
11041             break;
11042         }
11043     }
11044 }
11045 
11046 /**
11047  * xmlParseDocument:
11048  * @ctxt:  an XML parser context
11049  *
11050  * parse an XML document (and build a tree if using the standard SAX
11051  * interface).
11052  *
11053  * [1] document ::= prolog element Misc*
11054  *
11055  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
11056  *
11057  * Returns 0, -1 in case of error. the parser context is augmented
11058  *                as a result of the parsing.
11059  */
11060 
11061 int
xmlParseDocument(xmlParserCtxtPtr ctxt)11062 xmlParseDocument(xmlParserCtxtPtr ctxt) {
11063     xmlInitParser();
11064 
11065     if ((ctxt == NULL) || (ctxt->input == NULL))
11066         return(-1);
11067 
11068     GROW;
11069 
11070     /*
11071      * SAX: detecting the level.
11072      */
11073     xmlDetectSAX2(ctxt);
11074 
11075     /*
11076      * SAX: beginning of the document processing.
11077      */
11078     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11079         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11080     if (ctxt->instate == XML_PARSER_EOF)
11081 	return(-1);
11082 
11083     xmlDetectEncoding(ctxt);
11084 
11085     if (CUR == 0) {
11086 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11087 	return(-1);
11088     }
11089 
11090     GROW;
11091     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11092 
11093 	/*
11094 	 * Note that we will switch encoding on the fly.
11095 	 */
11096 	xmlParseXMLDecl(ctxt);
11097 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
11098 	    (ctxt->instate == XML_PARSER_EOF)) {
11099 	    /*
11100 	     * The XML REC instructs us to stop parsing right here
11101 	     */
11102 	    return(-1);
11103 	}
11104 	SKIP_BLANKS;
11105     } else {
11106 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11107     }
11108     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11109         ctxt->sax->startDocument(ctxt->userData);
11110     if (ctxt->instate == XML_PARSER_EOF)
11111 	return(-1);
11112     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
11113         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
11114 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
11115     }
11116 
11117     /*
11118      * The Misc part of the Prolog
11119      */
11120     xmlParseMisc(ctxt);
11121 
11122     /*
11123      * Then possibly doc type declaration(s) and more Misc
11124      * (doctypedecl Misc*)?
11125      */
11126     GROW;
11127     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
11128 
11129 	ctxt->inSubset = 1;
11130 	xmlParseDocTypeDecl(ctxt);
11131 	if (RAW == '[') {
11132 	    ctxt->instate = XML_PARSER_DTD;
11133 	    xmlParseInternalSubset(ctxt);
11134 	    if (ctxt->instate == XML_PARSER_EOF)
11135 		return(-1);
11136 	}
11137 
11138 	/*
11139 	 * Create and update the external subset.
11140 	 */
11141 	ctxt->inSubset = 2;
11142 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
11143 	    (!ctxt->disableSAX))
11144 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11145 	                              ctxt->extSubSystem, ctxt->extSubURI);
11146 	if (ctxt->instate == XML_PARSER_EOF)
11147 	    return(-1);
11148 	ctxt->inSubset = 0;
11149 
11150         xmlCleanSpecialAttr(ctxt);
11151 
11152 	ctxt->instate = XML_PARSER_PROLOG;
11153 	xmlParseMisc(ctxt);
11154     }
11155 
11156     /*
11157      * Time to start parsing the tree itself
11158      */
11159     GROW;
11160     if (RAW != '<') {
11161 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11162 		       "Start tag expected, '<' not found\n");
11163     } else {
11164 	ctxt->instate = XML_PARSER_CONTENT;
11165 	xmlParseElement(ctxt);
11166 	ctxt->instate = XML_PARSER_EPILOG;
11167 
11168 
11169 	/*
11170 	 * The Misc part at the end
11171 	 */
11172 	xmlParseMisc(ctxt);
11173 
11174         if (ctxt->input->cur < ctxt->input->end) {
11175             if (ctxt->errNo == XML_ERR_OK)
11176 	        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11177         } else if ((ctxt->input->buf != NULL) &&
11178                    (ctxt->input->buf->encoder != NULL) &&
11179                    (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11180             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11181                            "Truncated multi-byte sequence at EOF\n");
11182         }
11183 	ctxt->instate = XML_PARSER_EOF;
11184     }
11185 
11186     /*
11187      * SAX: end of the document processing.
11188      */
11189     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11190         ctxt->sax->endDocument(ctxt->userData);
11191 
11192     /*
11193      * Remove locally kept entity definitions if the tree was not built
11194      */
11195     if ((ctxt->myDoc != NULL) &&
11196 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
11197 	xmlFreeDoc(ctxt->myDoc);
11198 	ctxt->myDoc = NULL;
11199     }
11200 
11201     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
11202         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
11203 	if (ctxt->valid)
11204 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
11205 	if (ctxt->nsWellFormed)
11206 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
11207 	if (ctxt->options & XML_PARSE_OLD10)
11208 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
11209     }
11210     if (! ctxt->wellFormed) {
11211 	ctxt->valid = 0;
11212 	return(-1);
11213     }
11214     return(0);
11215 }
11216 
11217 /**
11218  * xmlParseExtParsedEnt:
11219  * @ctxt:  an XML parser context
11220  *
11221  * parse a general parsed entity
11222  * An external general parsed entity is well-formed if it matches the
11223  * production labeled extParsedEnt.
11224  *
11225  * [78] extParsedEnt ::= TextDecl? content
11226  *
11227  * Returns 0, -1 in case of error. the parser context is augmented
11228  *                as a result of the parsing.
11229  */
11230 
11231 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)11232 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11233     if ((ctxt == NULL) || (ctxt->input == NULL))
11234         return(-1);
11235 
11236     xmlDetectSAX2(ctxt);
11237 
11238     /*
11239      * SAX: beginning of the document processing.
11240      */
11241     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11242         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11243 
11244     xmlDetectEncoding(ctxt);
11245 
11246     if (CUR == 0) {
11247 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11248     }
11249 
11250     /*
11251      * Check for the XMLDecl in the Prolog.
11252      */
11253     GROW;
11254     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11255 
11256 	/*
11257 	 * Note that we will switch encoding on the fly.
11258 	 */
11259 	xmlParseXMLDecl(ctxt);
11260 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11261 	    /*
11262 	     * The XML REC instructs us to stop parsing right here
11263 	     */
11264 	    return(-1);
11265 	}
11266 	SKIP_BLANKS;
11267     } else {
11268 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11269     }
11270     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11271         ctxt->sax->startDocument(ctxt->userData);
11272     if (ctxt->instate == XML_PARSER_EOF)
11273 	return(-1);
11274 
11275     /*
11276      * Doing validity checking on chunk doesn't make sense
11277      */
11278     ctxt->instate = XML_PARSER_CONTENT;
11279     ctxt->validate = 0;
11280     ctxt->loadsubset = 0;
11281     ctxt->depth = 0;
11282 
11283     xmlParseContent(ctxt);
11284     if (ctxt->instate == XML_PARSER_EOF)
11285 	return(-1);
11286 
11287     if ((RAW == '<') && (NXT(1) == '/')) {
11288 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11289     } else if (RAW != 0) {
11290 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11291     }
11292 
11293     /*
11294      * SAX: end of the document processing.
11295      */
11296     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11297         ctxt->sax->endDocument(ctxt->userData);
11298 
11299     if (! ctxt->wellFormed) return(-1);
11300     return(0);
11301 }
11302 
11303 #ifdef LIBXML_PUSH_ENABLED
11304 /************************************************************************
11305  *									*
11306  *		Progressive parsing interfaces				*
11307  *									*
11308  ************************************************************************/
11309 
11310 /**
11311  * xmlParseLookupChar:
11312  * @ctxt:  an XML parser context
11313  * @c:  character
11314  *
11315  * Check whether the input buffer contains a character.
11316  */
11317 static int
xmlParseLookupChar(xmlParserCtxtPtr ctxt,int c)11318 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11319     const xmlChar *cur;
11320 
11321     if (ctxt->checkIndex == 0) {
11322         cur = ctxt->input->cur + 1;
11323     } else {
11324         cur = ctxt->input->cur + ctxt->checkIndex;
11325     }
11326 
11327     if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11328         size_t index = ctxt->input->end - ctxt->input->cur;
11329 
11330         if (index > LONG_MAX) {
11331             ctxt->checkIndex = 0;
11332             return(1);
11333         }
11334         ctxt->checkIndex = index;
11335         return(0);
11336     } else {
11337         ctxt->checkIndex = 0;
11338         return(1);
11339     }
11340 }
11341 
11342 /**
11343  * xmlParseLookupString:
11344  * @ctxt:  an XML parser context
11345  * @startDelta: delta to apply at the start
11346  * @str:  string
11347  * @strLen:  length of string
11348  *
11349  * Check whether the input buffer contains a string.
11350  */
11351 static const xmlChar *
xmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen)11352 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11353                      const char *str, size_t strLen) {
11354     const xmlChar *cur, *term;
11355 
11356     if (ctxt->checkIndex == 0) {
11357         cur = ctxt->input->cur + startDelta;
11358     } else {
11359         cur = ctxt->input->cur + ctxt->checkIndex;
11360     }
11361 
11362     term = BAD_CAST strstr((const char *) cur, str);
11363     if (term == NULL) {
11364         const xmlChar *end = ctxt->input->end;
11365         size_t index;
11366 
11367         /* Rescan (strLen - 1) characters. */
11368         if ((size_t) (end - cur) < strLen)
11369             end = cur;
11370         else
11371             end -= strLen - 1;
11372         index = end - ctxt->input->cur;
11373         if (index > LONG_MAX) {
11374             ctxt->checkIndex = 0;
11375             return(ctxt->input->end - strLen);
11376         }
11377         ctxt->checkIndex = index;
11378     } else {
11379         ctxt->checkIndex = 0;
11380     }
11381 
11382     return(term);
11383 }
11384 
11385 /**
11386  * xmlParseLookupCharData:
11387  * @ctxt:  an XML parser context
11388  *
11389  * Check whether the input buffer contains terminated char data.
11390  */
11391 static int
xmlParseLookupCharData(xmlParserCtxtPtr ctxt)11392 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11393     const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11394     const xmlChar *end = ctxt->input->end;
11395     size_t index;
11396 
11397     while (cur < end) {
11398         if ((*cur == '<') || (*cur == '&')) {
11399             ctxt->checkIndex = 0;
11400             return(1);
11401         }
11402         cur++;
11403     }
11404 
11405     index = cur - ctxt->input->cur;
11406     if (index > LONG_MAX) {
11407         ctxt->checkIndex = 0;
11408         return(1);
11409     }
11410     ctxt->checkIndex = index;
11411     return(0);
11412 }
11413 
11414 /**
11415  * xmlParseLookupGt:
11416  * @ctxt:  an XML parser context
11417  *
11418  * Check whether there's enough data in the input buffer to finish parsing
11419  * a start tag. This has to take quotes into account.
11420  */
11421 static int
xmlParseLookupGt(xmlParserCtxtPtr ctxt)11422 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11423     const xmlChar *cur;
11424     const xmlChar *end = ctxt->input->end;
11425     int state = ctxt->endCheckState;
11426     size_t index;
11427 
11428     if (ctxt->checkIndex == 0)
11429         cur = ctxt->input->cur + 1;
11430     else
11431         cur = ctxt->input->cur + ctxt->checkIndex;
11432 
11433     while (cur < end) {
11434         if (state) {
11435             if (*cur == state)
11436                 state = 0;
11437         } else if (*cur == '\'' || *cur == '"') {
11438             state = *cur;
11439         } else if (*cur == '>') {
11440             ctxt->checkIndex = 0;
11441             ctxt->endCheckState = 0;
11442             return(1);
11443         }
11444         cur++;
11445     }
11446 
11447     index = cur - ctxt->input->cur;
11448     if (index > LONG_MAX) {
11449         ctxt->checkIndex = 0;
11450         ctxt->endCheckState = 0;
11451         return(1);
11452     }
11453     ctxt->checkIndex = index;
11454     ctxt->endCheckState = state;
11455     return(0);
11456 }
11457 
11458 /**
11459  * xmlParseLookupInternalSubset:
11460  * @ctxt:  an XML parser context
11461  *
11462  * Check whether there's enough data in the input buffer to finish parsing
11463  * the internal subset.
11464  */
11465 static int
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt)11466 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11467     /*
11468      * Sorry, but progressive parsing of the internal subset is not
11469      * supported. We first check that the full content of the internal
11470      * subset is available and parsing is launched only at that point.
11471      * Internal subset ends with "']' S? '>'" in an unescaped section and
11472      * not in a ']]>' sequence which are conditional sections.
11473      */
11474     const xmlChar *cur, *start;
11475     const xmlChar *end = ctxt->input->end;
11476     int state = ctxt->endCheckState;
11477     size_t index;
11478 
11479     if (ctxt->checkIndex == 0) {
11480         cur = ctxt->input->cur + 1;
11481     } else {
11482         cur = ctxt->input->cur + ctxt->checkIndex;
11483     }
11484     start = cur;
11485 
11486     while (cur < end) {
11487         if (state == '-') {
11488             if ((*cur == '-') &&
11489                 (cur[1] == '-') &&
11490                 (cur[2] == '>')) {
11491                 state = 0;
11492                 cur += 3;
11493                 start = cur;
11494                 continue;
11495             }
11496         }
11497         else if (state == ']') {
11498             if (*cur == '>') {
11499                 ctxt->checkIndex = 0;
11500                 ctxt->endCheckState = 0;
11501                 return(1);
11502             }
11503             if (IS_BLANK_CH(*cur)) {
11504                 state = ' ';
11505             } else if (*cur != ']') {
11506                 state = 0;
11507                 start = cur;
11508                 continue;
11509             }
11510         }
11511         else if (state == ' ') {
11512             if (*cur == '>') {
11513                 ctxt->checkIndex = 0;
11514                 ctxt->endCheckState = 0;
11515                 return(1);
11516             }
11517             if (!IS_BLANK_CH(*cur)) {
11518                 state = 0;
11519                 start = cur;
11520                 continue;
11521             }
11522         }
11523         else if (state != 0) {
11524             if (*cur == state) {
11525                 state = 0;
11526                 start = cur + 1;
11527             }
11528         }
11529         else if (*cur == '<') {
11530             if ((cur[1] == '!') &&
11531                 (cur[2] == '-') &&
11532                 (cur[3] == '-')) {
11533                 state = '-';
11534                 cur += 4;
11535                 /* Don't treat <!--> as comment */
11536                 start = cur;
11537                 continue;
11538             }
11539         }
11540         else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11541             state = *cur;
11542         }
11543 
11544         cur++;
11545     }
11546 
11547     /*
11548      * Rescan the three last characters to detect "<!--" and "-->"
11549      * split across chunks.
11550      */
11551     if ((state == 0) || (state == '-')) {
11552         if (cur - start < 3)
11553             cur = start;
11554         else
11555             cur -= 3;
11556     }
11557     index = cur - ctxt->input->cur;
11558     if (index > LONG_MAX) {
11559         ctxt->checkIndex = 0;
11560         ctxt->endCheckState = 0;
11561         return(1);
11562     }
11563     ctxt->checkIndex = index;
11564     ctxt->endCheckState = state;
11565     return(0);
11566 }
11567 
11568 /**
11569  * xmlCheckCdataPush:
11570  * @cur: pointer to the block of characters
11571  * @len: length of the block in bytes
11572  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11573  *
11574  * Check that the block of characters is okay as SCdata content [20]
11575  *
11576  * Returns the number of bytes to pass if okay, a negative index where an
11577  *         UTF-8 error occurred otherwise
11578  */
11579 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11580 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11581     int ix;
11582     unsigned char c;
11583     int codepoint;
11584 
11585     if ((utf == NULL) || (len <= 0))
11586         return(0);
11587 
11588     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11589         c = utf[ix];
11590         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11591 	    if (c >= 0x20)
11592 		ix++;
11593 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11594 	        ix++;
11595 	    else
11596 	        return(-ix);
11597 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11598 	    if (ix + 2 > len) return(complete ? -ix : ix);
11599 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11600 	        return(-ix);
11601 	    codepoint = (utf[ix] & 0x1f) << 6;
11602 	    codepoint |= utf[ix+1] & 0x3f;
11603 	    if (!xmlIsCharQ(codepoint))
11604 	        return(-ix);
11605 	    ix += 2;
11606 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11607 	    if (ix + 3 > len) return(complete ? -ix : ix);
11608 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11609 	        ((utf[ix+2] & 0xc0) != 0x80))
11610 		    return(-ix);
11611 	    codepoint = (utf[ix] & 0xf) << 12;
11612 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11613 	    codepoint |= utf[ix+2] & 0x3f;
11614 	    if (!xmlIsCharQ(codepoint))
11615 	        return(-ix);
11616 	    ix += 3;
11617 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11618 	    if (ix + 4 > len) return(complete ? -ix : ix);
11619 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11620 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11621 		((utf[ix+3] & 0xc0) != 0x80))
11622 		    return(-ix);
11623 	    codepoint = (utf[ix] & 0x7) << 18;
11624 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11625 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11626 	    codepoint |= utf[ix+3] & 0x3f;
11627 	    if (!xmlIsCharQ(codepoint))
11628 	        return(-ix);
11629 	    ix += 4;
11630 	} else				/* unknown encoding */
11631 	    return(-ix);
11632       }
11633       return(ix);
11634 }
11635 
11636 /**
11637  * xmlParseTryOrFinish:
11638  * @ctxt:  an XML parser context
11639  * @terminate:  last chunk indicator
11640  *
11641  * Try to progress on parsing
11642  *
11643  * Returns zero if no parsing was possible
11644  */
11645 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11646 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11647     int ret = 0;
11648     size_t avail;
11649     xmlChar cur, next;
11650 
11651     if (ctxt->input == NULL)
11652         return(0);
11653 
11654     if ((ctxt->input != NULL) &&
11655         (ctxt->input->cur - ctxt->input->base > 4096)) {
11656         xmlParserShrink(ctxt);
11657     }
11658 
11659     while (ctxt->instate != XML_PARSER_EOF) {
11660 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11661 	    return(0);
11662 
11663         avail = ctxt->input->end - ctxt->input->cur;
11664         if (avail < 1)
11665 	    goto done;
11666         switch (ctxt->instate) {
11667             case XML_PARSER_EOF:
11668 	        /*
11669 		 * Document parsing is done !
11670 		 */
11671 	        goto done;
11672             case XML_PARSER_START:
11673                 /*
11674                  * Very first chars read from the document flow.
11675                  */
11676                 if ((!terminate) && (avail < 4))
11677                     goto done;
11678 
11679                 /*
11680                  * We need more bytes to detect EBCDIC code pages.
11681                  * See xmlDetectEBCDIC.
11682                  */
11683                 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11684                     (!terminate) && (avail < 200))
11685                     goto done;
11686 
11687                 xmlDetectEncoding(ctxt);
11688                 if (ctxt->instate == XML_PARSER_EOF)
11689                     goto done;
11690                 ctxt->instate = XML_PARSER_XML_DECL;
11691 		break;
11692 
11693             case XML_PARSER_XML_DECL:
11694 		if ((!terminate) && (avail < 2))
11695 		    goto done;
11696 		cur = ctxt->input->cur[0];
11697 		next = ctxt->input->cur[1];
11698 	        if ((cur == '<') && (next == '?')) {
11699 		    /* PI or XML decl */
11700 		    if ((!terminate) &&
11701                         (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11702 			goto done;
11703 		    if ((ctxt->input->cur[2] == 'x') &&
11704 			(ctxt->input->cur[3] == 'm') &&
11705 			(ctxt->input->cur[4] == 'l') &&
11706 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11707 			ret += 5;
11708 			xmlParseXMLDecl(ctxt);
11709 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11710 			    /*
11711 			     * The XML REC instructs us to stop parsing right
11712 			     * here
11713 			     */
11714 			    xmlHaltParser(ctxt);
11715 			    return(0);
11716 			}
11717 		    } else {
11718 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11719 		    }
11720 		} else {
11721 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11722 		    if (ctxt->version == NULL) {
11723 		        xmlErrMemory(ctxt, NULL);
11724 			break;
11725 		    }
11726 		}
11727                 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11728                     ctxt->sax->setDocumentLocator(ctxt->userData,
11729                                                   &xmlDefaultSAXLocator);
11730                 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11731                     (!ctxt->disableSAX))
11732                     ctxt->sax->startDocument(ctxt->userData);
11733                 if (ctxt->instate == XML_PARSER_EOF)
11734                     goto done;
11735                 ctxt->instate = XML_PARSER_MISC;
11736 		break;
11737             case XML_PARSER_START_TAG: {
11738 	        const xmlChar *name;
11739 		const xmlChar *prefix = NULL;
11740 		const xmlChar *URI = NULL;
11741                 int line = ctxt->input->line;
11742 		int nbNs = 0;
11743 
11744 		if ((!terminate) && (avail < 2))
11745 		    goto done;
11746 		cur = ctxt->input->cur[0];
11747 	        if (cur != '<') {
11748 		    xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11749                                    "Start tag expected, '<' not found");
11750 		    xmlHaltParser(ctxt);
11751 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11752 			ctxt->sax->endDocument(ctxt->userData);
11753 		    goto done;
11754 		}
11755 		if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11756                     goto done;
11757 		if (ctxt->spaceNr == 0)
11758 		    spacePush(ctxt, -1);
11759 		else if (*ctxt->space == -2)
11760 		    spacePush(ctxt, -1);
11761 		else
11762 		    spacePush(ctxt, *ctxt->space);
11763 #ifdef LIBXML_SAX1_ENABLED
11764 		if (ctxt->sax2)
11765 #endif /* LIBXML_SAX1_ENABLED */
11766 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11767 #ifdef LIBXML_SAX1_ENABLED
11768 		else
11769 		    name = xmlParseStartTag(ctxt);
11770 #endif /* LIBXML_SAX1_ENABLED */
11771 		if (ctxt->instate == XML_PARSER_EOF)
11772 		    goto done;
11773 		if (name == NULL) {
11774 		    spacePop(ctxt);
11775 		    xmlHaltParser(ctxt);
11776 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11777 			ctxt->sax->endDocument(ctxt->userData);
11778 		    goto done;
11779 		}
11780 #ifdef LIBXML_VALID_ENABLED
11781 		/*
11782 		 * [ VC: Root Element Type ]
11783 		 * The Name in the document type declaration must match
11784 		 * the element type of the root element.
11785 		 */
11786 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11787 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11788 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11789 #endif /* LIBXML_VALID_ENABLED */
11790 
11791 		/*
11792 		 * Check for an Empty Element.
11793 		 */
11794 		if ((RAW == '/') && (NXT(1) == '>')) {
11795 		    SKIP(2);
11796 
11797 		    if (ctxt->sax2) {
11798 			if ((ctxt->sax != NULL) &&
11799 			    (ctxt->sax->endElementNs != NULL) &&
11800 			    (!ctxt->disableSAX))
11801 			    ctxt->sax->endElementNs(ctxt->userData, name,
11802 			                            prefix, URI);
11803 			if (nbNs > 0)
11804 			    xmlParserNsPop(ctxt, nbNs);
11805 #ifdef LIBXML_SAX1_ENABLED
11806 		    } else {
11807 			if ((ctxt->sax != NULL) &&
11808 			    (ctxt->sax->endElement != NULL) &&
11809 			    (!ctxt->disableSAX))
11810 			    ctxt->sax->endElement(ctxt->userData, name);
11811 #endif /* LIBXML_SAX1_ENABLED */
11812 		    }
11813 		    spacePop(ctxt);
11814 		} else if (RAW == '>') {
11815 		    NEXT;
11816                     nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11817 		} else {
11818 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11819 					 "Couldn't find end of Start Tag %s\n",
11820 					 name);
11821 		    nodePop(ctxt);
11822 		    spacePop(ctxt);
11823                     if (nbNs > 0)
11824                         xmlParserNsPop(ctxt, nbNs);
11825 		}
11826 
11827                 if (ctxt->instate == XML_PARSER_EOF)
11828                     goto done;
11829                 if (ctxt->nameNr == 0)
11830                     ctxt->instate = XML_PARSER_EPILOG;
11831                 else
11832                     ctxt->instate = XML_PARSER_CONTENT;
11833                 break;
11834 	    }
11835             case XML_PARSER_CONTENT: {
11836 		cur = ctxt->input->cur[0];
11837 
11838 		if (cur == '<') {
11839                     if ((!terminate) && (avail < 2))
11840                         goto done;
11841 		    next = ctxt->input->cur[1];
11842 
11843                     if (next == '/') {
11844                         ctxt->instate = XML_PARSER_END_TAG;
11845                         break;
11846                     } else if (next == '?') {
11847                         if ((!terminate) &&
11848                             (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11849                             goto done;
11850                         xmlParsePI(ctxt);
11851                         if (ctxt->instate == XML_PARSER_EOF)
11852                             goto done;
11853                         ctxt->instate = XML_PARSER_CONTENT;
11854                         break;
11855                     } else if (next == '!') {
11856                         if ((!terminate) && (avail < 3))
11857                             goto done;
11858                         next = ctxt->input->cur[2];
11859 
11860                         if (next == '-') {
11861                             if ((!terminate) && (avail < 4))
11862                                 goto done;
11863                             if (ctxt->input->cur[3] == '-') {
11864                                 if ((!terminate) &&
11865                                     (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11866                                     goto done;
11867                                 xmlParseComment(ctxt);
11868                                 if (ctxt->instate == XML_PARSER_EOF)
11869                                     goto done;
11870                                 ctxt->instate = XML_PARSER_CONTENT;
11871                                 break;
11872                             }
11873                         } else if (next == '[') {
11874                             if ((!terminate) && (avail < 9))
11875                                 goto done;
11876                             if ((ctxt->input->cur[2] == '[') &&
11877                                 (ctxt->input->cur[3] == 'C') &&
11878                                 (ctxt->input->cur[4] == 'D') &&
11879                                 (ctxt->input->cur[5] == 'A') &&
11880                                 (ctxt->input->cur[6] == 'T') &&
11881                                 (ctxt->input->cur[7] == 'A') &&
11882                                 (ctxt->input->cur[8] == '[')) {
11883                                 SKIP(9);
11884                                 ctxt->instate = XML_PARSER_CDATA_SECTION;
11885                                 break;
11886                             }
11887                         }
11888                     }
11889 		} else if (cur == '&') {
11890 		    if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11891 			goto done;
11892 		    xmlParseReference(ctxt);
11893                     break;
11894 		} else {
11895 		    /* TODO Avoid the extra copy, handle directly !!! */
11896 		    /*
11897 		     * Goal of the following test is:
11898 		     *  - minimize calls to the SAX 'character' callback
11899 		     *    when they are mergeable
11900 		     *  - handle an problem for isBlank when we only parse
11901 		     *    a sequence of blank chars and the next one is
11902 		     *    not available to check against '<' presence.
11903 		     *  - tries to homogenize the differences in SAX
11904 		     *    callbacks between the push and pull versions
11905 		     *    of the parser.
11906 		     */
11907 		    if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11908 			if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11909 			    goto done;
11910                     }
11911                     ctxt->checkIndex = 0;
11912 		    xmlParseCharDataInternal(ctxt, !terminate);
11913                     break;
11914 		}
11915 
11916                 ctxt->instate = XML_PARSER_START_TAG;
11917 		break;
11918 	    }
11919             case XML_PARSER_END_TAG:
11920 		if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11921 		    goto done;
11922 		if (ctxt->sax2) {
11923 	            xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11924 		    nameNsPop(ctxt);
11925 		}
11926 #ifdef LIBXML_SAX1_ENABLED
11927 		  else
11928 		    xmlParseEndTag1(ctxt, 0);
11929 #endif /* LIBXML_SAX1_ENABLED */
11930                 if (ctxt->instate == XML_PARSER_EOF)
11931                     goto done;
11932 		if (ctxt->nameNr == 0) {
11933 		    ctxt->instate = XML_PARSER_EPILOG;
11934 		} else {
11935 		    ctxt->instate = XML_PARSER_CONTENT;
11936 		}
11937 		break;
11938             case XML_PARSER_CDATA_SECTION: {
11939 	        /*
11940 		 * The Push mode need to have the SAX callback for
11941 		 * cdataBlock merge back contiguous callbacks.
11942 		 */
11943 		const xmlChar *term;
11944 
11945                 if (terminate) {
11946                     /*
11947                      * Don't call xmlParseLookupString. If 'terminate'
11948                      * is set, checkIndex is invalid.
11949                      */
11950                     term = BAD_CAST strstr((const char *) ctxt->input->cur,
11951                                            "]]>");
11952                 } else {
11953 		    term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11954                 }
11955 
11956 		if (term == NULL) {
11957 		    int tmp, size;
11958 
11959                     if (terminate) {
11960                         /* Unfinished CDATA section */
11961                         size = ctxt->input->end - ctxt->input->cur;
11962                     } else {
11963                         if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11964                             goto done;
11965                         ctxt->checkIndex = 0;
11966                         /* XXX: Why don't we pass the full buffer? */
11967                         size = XML_PARSER_BIG_BUFFER_SIZE;
11968                     }
11969                     tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11970                     if (tmp <= 0) {
11971                         tmp = -tmp;
11972                         ctxt->input->cur += tmp;
11973                         goto encoding_error;
11974                     }
11975                     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11976                         if (ctxt->sax->cdataBlock != NULL)
11977                             ctxt->sax->cdataBlock(ctxt->userData,
11978                                                   ctxt->input->cur, tmp);
11979                         else if (ctxt->sax->characters != NULL)
11980                             ctxt->sax->characters(ctxt->userData,
11981                                                   ctxt->input->cur, tmp);
11982                     }
11983                     if (ctxt->instate == XML_PARSER_EOF)
11984                         goto done;
11985                     SKIPL(tmp);
11986 		} else {
11987                     int base = term - CUR_PTR;
11988 		    int tmp;
11989 
11990 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11991 		    if ((tmp < 0) || (tmp != base)) {
11992 			tmp = -tmp;
11993 			ctxt->input->cur += tmp;
11994 			goto encoding_error;
11995 		    }
11996 		    if ((ctxt->sax != NULL) && (base == 0) &&
11997 		        (ctxt->sax->cdataBlock != NULL) &&
11998 		        (!ctxt->disableSAX)) {
11999 			/*
12000 			 * Special case to provide identical behaviour
12001 			 * between pull and push parsers on enpty CDATA
12002 			 * sections
12003 			 */
12004 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
12005 			     (!strncmp((const char *)&ctxt->input->cur[-9],
12006 			               "<![CDATA[", 9)))
12007 			     ctxt->sax->cdataBlock(ctxt->userData,
12008 			                           BAD_CAST "", 0);
12009 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
12010 			(!ctxt->disableSAX)) {
12011 			if (ctxt->sax->cdataBlock != NULL)
12012 			    ctxt->sax->cdataBlock(ctxt->userData,
12013 						  ctxt->input->cur, base);
12014 			else if (ctxt->sax->characters != NULL)
12015 			    ctxt->sax->characters(ctxt->userData,
12016 						  ctxt->input->cur, base);
12017 		    }
12018 		    if (ctxt->instate == XML_PARSER_EOF)
12019 			goto done;
12020 		    SKIPL(base + 3);
12021 		    ctxt->instate = XML_PARSER_CONTENT;
12022 		}
12023 		break;
12024 	    }
12025             case XML_PARSER_MISC:
12026             case XML_PARSER_PROLOG:
12027             case XML_PARSER_EPILOG:
12028 		SKIP_BLANKS;
12029                 avail = ctxt->input->end - ctxt->input->cur;
12030 		if (avail < 1)
12031 		    goto done;
12032 		if (ctxt->input->cur[0] == '<') {
12033                     if ((!terminate) && (avail < 2))
12034                         goto done;
12035                     next = ctxt->input->cur[1];
12036                     if (next == '?') {
12037                         if ((!terminate) &&
12038                             (!xmlParseLookupString(ctxt, 2, "?>", 2)))
12039                             goto done;
12040                         xmlParsePI(ctxt);
12041                         if (ctxt->instate == XML_PARSER_EOF)
12042                             goto done;
12043                         break;
12044                     } else if (next == '!') {
12045                         if ((!terminate) && (avail < 3))
12046                             goto done;
12047 
12048                         if (ctxt->input->cur[2] == '-') {
12049                             if ((!terminate) && (avail < 4))
12050                                 goto done;
12051                             if (ctxt->input->cur[3] == '-') {
12052                                 if ((!terminate) &&
12053                                     (!xmlParseLookupString(ctxt, 4, "-->", 3)))
12054                                     goto done;
12055                                 xmlParseComment(ctxt);
12056                                 if (ctxt->instate == XML_PARSER_EOF)
12057                                     goto done;
12058                                 break;
12059                             }
12060                         } else if (ctxt->instate == XML_PARSER_MISC) {
12061                             if ((!terminate) && (avail < 9))
12062                                 goto done;
12063                             if ((ctxt->input->cur[2] == 'D') &&
12064                                 (ctxt->input->cur[3] == 'O') &&
12065                                 (ctxt->input->cur[4] == 'C') &&
12066                                 (ctxt->input->cur[5] == 'T') &&
12067                                 (ctxt->input->cur[6] == 'Y') &&
12068                                 (ctxt->input->cur[7] == 'P') &&
12069                                 (ctxt->input->cur[8] == 'E')) {
12070                                 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
12071                                     goto done;
12072                                 ctxt->inSubset = 1;
12073                                 xmlParseDocTypeDecl(ctxt);
12074                                 if (ctxt->instate == XML_PARSER_EOF)
12075                                     goto done;
12076                                 if (RAW == '[') {
12077                                     ctxt->instate = XML_PARSER_DTD;
12078                                 } else {
12079                                     /*
12080                                      * Create and update the external subset.
12081                                      */
12082                                     ctxt->inSubset = 2;
12083                                     if ((ctxt->sax != NULL) &&
12084                                         (!ctxt->disableSAX) &&
12085                                         (ctxt->sax->externalSubset != NULL))
12086                                         ctxt->sax->externalSubset(
12087                                                 ctxt->userData,
12088                                                 ctxt->intSubName,
12089                                                 ctxt->extSubSystem,
12090                                                 ctxt->extSubURI);
12091                                     ctxt->inSubset = 0;
12092                                     xmlCleanSpecialAttr(ctxt);
12093                                     if (ctxt->instate == XML_PARSER_EOF)
12094                                         goto done;
12095                                     ctxt->instate = XML_PARSER_PROLOG;
12096                                 }
12097                                 break;
12098                             }
12099                         }
12100                     }
12101                 }
12102 
12103                 if (ctxt->instate == XML_PARSER_EPILOG) {
12104                     if (ctxt->errNo == XML_ERR_OK)
12105                         xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12106 		    ctxt->instate = XML_PARSER_EOF;
12107                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12108                         ctxt->sax->endDocument(ctxt->userData);
12109                 } else {
12110 		    ctxt->instate = XML_PARSER_START_TAG;
12111 		}
12112 		break;
12113             case XML_PARSER_DTD: {
12114                 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12115                     goto done;
12116 		xmlParseInternalSubset(ctxt);
12117 		if (ctxt->instate == XML_PARSER_EOF)
12118 		    goto done;
12119 		ctxt->inSubset = 2;
12120 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12121 		    (ctxt->sax->externalSubset != NULL))
12122 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12123 			    ctxt->extSubSystem, ctxt->extSubURI);
12124 		ctxt->inSubset = 0;
12125 		xmlCleanSpecialAttr(ctxt);
12126 		if (ctxt->instate == XML_PARSER_EOF)
12127 		    goto done;
12128 		ctxt->instate = XML_PARSER_PROLOG;
12129                 break;
12130 	    }
12131             default:
12132 		xmlGenericError(xmlGenericErrorContext,
12133 			"PP: internal error\n");
12134 		ctxt->instate = XML_PARSER_EOF;
12135 		break;
12136 	}
12137     }
12138 done:
12139     return(ret);
12140 encoding_error:
12141     if (ctxt->input->end - ctxt->input->cur < 4) {
12142 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12143 		     "Input is not proper UTF-8, indicate encoding !\n",
12144 		     NULL, NULL);
12145     } else {
12146         char buffer[150];
12147 
12148 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12149 			ctxt->input->cur[0], ctxt->input->cur[1],
12150 			ctxt->input->cur[2], ctxt->input->cur[3]);
12151 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12152 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12153 		     BAD_CAST buffer, NULL);
12154     }
12155     return(0);
12156 }
12157 
12158 /**
12159  * xmlParseChunk:
12160  * @ctxt:  an XML parser context
12161  * @chunk:  an char array
12162  * @size:  the size in byte of the chunk
12163  * @terminate:  last chunk indicator
12164  *
12165  * Parse a Chunk of memory
12166  *
12167  * Returns zero if no error, the xmlParserErrors otherwise.
12168  */
12169 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12170 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12171               int terminate) {
12172     int end_in_lf = 0;
12173 
12174     if (ctxt == NULL)
12175         return(XML_ERR_INTERNAL_ERROR);
12176     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12177         return(ctxt->errNo);
12178     if (ctxt->instate == XML_PARSER_EOF)
12179         return(-1);
12180     if (ctxt->input == NULL)
12181         return(-1);
12182 
12183     ctxt->progressive = 1;
12184     if (ctxt->instate == XML_PARSER_START)
12185         xmlDetectSAX2(ctxt);
12186     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12187         (chunk[size - 1] == '\r')) {
12188 	end_in_lf = 1;
12189 	size--;
12190     }
12191 
12192     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12193         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12194 	size_t pos = ctxt->input->cur - ctxt->input->base;
12195 	int res;
12196 
12197 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12198         xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12199 	if (res < 0) {
12200             xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12201 	    xmlHaltParser(ctxt);
12202 	    return(ctxt->errNo);
12203 	}
12204     }
12205 
12206     xmlParseTryOrFinish(ctxt, terminate);
12207     if (ctxt->instate == XML_PARSER_EOF)
12208         return(ctxt->errNo);
12209 
12210     if ((ctxt->input != NULL) &&
12211          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12212          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12213         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12214         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12215         xmlHaltParser(ctxt);
12216     }
12217     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12218         return(ctxt->errNo);
12219 
12220     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12221         (ctxt->input->buf != NULL)) {
12222 	size_t pos = ctxt->input->cur - ctxt->input->base;
12223         int res;
12224 
12225 	res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12226 	xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12227         if (res < 0) {
12228             xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12229             xmlHaltParser(ctxt);
12230             return(ctxt->errNo);
12231         }
12232     }
12233     if (terminate) {
12234 	/*
12235 	 * Check for termination
12236 	 */
12237         if ((ctxt->instate != XML_PARSER_EOF) &&
12238             (ctxt->instate != XML_PARSER_EPILOG)) {
12239             if (ctxt->nameNr > 0) {
12240                 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
12241                 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
12242                 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
12243                         "Premature end of data in tag %s line %d\n",
12244                         name, line, NULL);
12245             } else if (ctxt->instate == XML_PARSER_START) {
12246                 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
12247             } else {
12248                 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
12249                                "Start tag expected, '<' not found\n");
12250             }
12251         } else if ((ctxt->input->buf != NULL) &&
12252                    (ctxt->input->buf->encoder != NULL) &&
12253                    (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
12254             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
12255                            "Truncated multi-byte sequence at EOF\n");
12256         }
12257 	if (ctxt->instate != XML_PARSER_EOF) {
12258 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12259 		ctxt->sax->endDocument(ctxt->userData);
12260 	}
12261 	ctxt->instate = XML_PARSER_EOF;
12262     }
12263     if (ctxt->wellFormed == 0)
12264 	return((xmlParserErrors) ctxt->errNo);
12265     else
12266         return(0);
12267 }
12268 
12269 /************************************************************************
12270  *									*
12271  *		I/O front end functions to the parser			*
12272  *									*
12273  ************************************************************************/
12274 
12275 /**
12276  * xmlCreatePushParserCtxt:
12277  * @sax:  a SAX handler
12278  * @user_data:  The user data returned on SAX callbacks
12279  * @chunk:  a pointer to an array of chars
12280  * @size:  number of chars in the array
12281  * @filename:  an optional file name or URI
12282  *
12283  * Create a parser context for using the XML parser in push mode.
12284  * If @buffer and @size are non-NULL, the data is used to detect
12285  * the encoding.  The remaining characters will be parsed so they
12286  * don't need to be fed in again through xmlParseChunk.
12287  * To allow content encoding detection, @size should be >= 4
12288  * The value of @filename is used for fetching external entities
12289  * and error/warning reports.
12290  *
12291  * Returns the new parser context or NULL
12292  */
12293 
12294 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12295 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12296                         const char *chunk, int size, const char *filename) {
12297     xmlParserCtxtPtr ctxt;
12298     xmlParserInputPtr inputStream;
12299     xmlParserInputBufferPtr buf;
12300 
12301     buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12302     if (buf == NULL) return(NULL);
12303 
12304     ctxt = xmlNewSAXParserCtxt(sax, user_data);
12305     if (ctxt == NULL) {
12306         xmlErrMemory(NULL, "creating parser: out of memory\n");
12307 	xmlFreeParserInputBuffer(buf);
12308 	return(NULL);
12309     }
12310     ctxt->dictNames = 1;
12311     if (filename == NULL) {
12312 	ctxt->directory = NULL;
12313     } else {
12314         ctxt->directory = xmlParserGetDirectory(filename);
12315     }
12316 
12317     inputStream = xmlNewInputStream(ctxt);
12318     if (inputStream == NULL) {
12319 	xmlFreeParserCtxt(ctxt);
12320 	xmlFreeParserInputBuffer(buf);
12321 	return(NULL);
12322     }
12323 
12324     if (filename == NULL)
12325 	inputStream->filename = NULL;
12326     else {
12327 	inputStream->filename = (char *)
12328 	    xmlCanonicPath((const xmlChar *) filename);
12329 	if (inputStream->filename == NULL) {
12330             xmlFreeInputStream(inputStream);
12331 	    xmlFreeParserCtxt(ctxt);
12332 	    xmlFreeParserInputBuffer(buf);
12333 	    return(NULL);
12334 	}
12335     }
12336     inputStream->buf = buf;
12337     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12338     inputPush(ctxt, inputStream);
12339 
12340     if ((size != 0) && (chunk != NULL) &&
12341         (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12342 	size_t pos = ctxt->input->cur - ctxt->input->base;
12343         int res;
12344 
12345 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12346         xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12347         if (res < 0) {
12348             xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12349             xmlHaltParser(ctxt);
12350         }
12351     }
12352 
12353     return(ctxt);
12354 }
12355 #endif /* LIBXML_PUSH_ENABLED */
12356 
12357 /**
12358  * xmlStopParser:
12359  * @ctxt:  an XML parser context
12360  *
12361  * Blocks further parser processing
12362  */
12363 void
xmlStopParser(xmlParserCtxtPtr ctxt)12364 xmlStopParser(xmlParserCtxtPtr ctxt) {
12365     if (ctxt == NULL)
12366         return;
12367     xmlHaltParser(ctxt);
12368     ctxt->errNo = XML_ERR_USER_STOP;
12369 }
12370 
12371 /**
12372  * xmlCreateIOParserCtxt:
12373  * @sax:  a SAX handler
12374  * @user_data:  The user data returned on SAX callbacks
12375  * @ioread:  an I/O read function
12376  * @ioclose:  an I/O close function
12377  * @ioctx:  an I/O handler
12378  * @enc:  the charset encoding if known
12379  *
12380  * Create a parser context for using the XML parser with an existing
12381  * I/O stream
12382  *
12383  * Returns the new parser context or NULL
12384  */
12385 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12386 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12387 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12388 	void *ioctx, xmlCharEncoding enc) {
12389     xmlParserCtxtPtr ctxt;
12390     xmlParserInputPtr inputStream;
12391     xmlParserInputBufferPtr buf;
12392 
12393     if (ioread == NULL) return(NULL);
12394 
12395     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12396     if (buf == NULL) {
12397         if (ioclose != NULL)
12398             ioclose(ioctx);
12399         return (NULL);
12400     }
12401 
12402     ctxt = xmlNewSAXParserCtxt(sax, user_data);
12403     if (ctxt == NULL) {
12404 	xmlFreeParserInputBuffer(buf);
12405 	return(NULL);
12406     }
12407 
12408     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12409     if (inputStream == NULL) {
12410 	xmlFreeParserCtxt(ctxt);
12411 	return(NULL);
12412     }
12413     inputPush(ctxt, inputStream);
12414 
12415     return(ctxt);
12416 }
12417 
12418 #ifdef LIBXML_VALID_ENABLED
12419 /************************************************************************
12420  *									*
12421  *		Front ends when parsing a DTD				*
12422  *									*
12423  ************************************************************************/
12424 
12425 /**
12426  * xmlIOParseDTD:
12427  * @sax:  the SAX handler block or NULL
12428  * @input:  an Input Buffer
12429  * @enc:  the charset encoding if known
12430  *
12431  * Load and parse a DTD
12432  *
12433  * Returns the resulting xmlDtdPtr or NULL in case of error.
12434  * @input will be freed by the function in any case.
12435  */
12436 
12437 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12438 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12439 	      xmlCharEncoding enc) {
12440     xmlDtdPtr ret = NULL;
12441     xmlParserCtxtPtr ctxt;
12442     xmlParserInputPtr pinput = NULL;
12443 
12444     if (input == NULL)
12445 	return(NULL);
12446 
12447     ctxt = xmlNewSAXParserCtxt(sax, NULL);
12448     if (ctxt == NULL) {
12449         xmlFreeParserInputBuffer(input);
12450 	return(NULL);
12451     }
12452 
12453     /* We are loading a DTD */
12454     ctxt->options |= XML_PARSE_DTDLOAD;
12455 
12456     xmlDetectSAX2(ctxt);
12457 
12458     /*
12459      * generate a parser input from the I/O handler
12460      */
12461 
12462     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12463     if (pinput == NULL) {
12464         xmlFreeParserInputBuffer(input);
12465 	xmlFreeParserCtxt(ctxt);
12466 	return(NULL);
12467     }
12468 
12469     /*
12470      * plug some encoding conversion routines here.
12471      */
12472     if (xmlPushInput(ctxt, pinput) < 0) {
12473 	xmlFreeParserCtxt(ctxt);
12474 	return(NULL);
12475     }
12476     if (enc != XML_CHAR_ENCODING_NONE) {
12477         xmlSwitchEncoding(ctxt, enc);
12478     }
12479 
12480     /*
12481      * let's parse that entity knowing it's an external subset.
12482      */
12483     ctxt->inSubset = 2;
12484     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12485     if (ctxt->myDoc == NULL) {
12486 	xmlErrMemory(ctxt, "New Doc failed");
12487 	return(NULL);
12488     }
12489     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12490     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12491 	                               BAD_CAST "none", BAD_CAST "none");
12492 
12493     xmlDetectEncoding(ctxt);
12494 
12495     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12496 
12497     if (ctxt->myDoc != NULL) {
12498 	if (ctxt->wellFormed) {
12499 	    ret = ctxt->myDoc->extSubset;
12500 	    ctxt->myDoc->extSubset = NULL;
12501 	    if (ret != NULL) {
12502 		xmlNodePtr tmp;
12503 
12504 		ret->doc = NULL;
12505 		tmp = ret->children;
12506 		while (tmp != NULL) {
12507 		    tmp->doc = NULL;
12508 		    tmp = tmp->next;
12509 		}
12510 	    }
12511 	} else {
12512 	    ret = NULL;
12513 	}
12514         xmlFreeDoc(ctxt->myDoc);
12515         ctxt->myDoc = NULL;
12516     }
12517     xmlFreeParserCtxt(ctxt);
12518 
12519     return(ret);
12520 }
12521 
12522 /**
12523  * xmlSAXParseDTD:
12524  * @sax:  the SAX handler block
12525  * @ExternalID:  a NAME* containing the External ID of the DTD
12526  * @SystemID:  a NAME* containing the URL to the DTD
12527  *
12528  * DEPRECATED: Don't use.
12529  *
12530  * Load and parse an external subset.
12531  *
12532  * Returns the resulting xmlDtdPtr or NULL in case of error.
12533  */
12534 
12535 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12536 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12537                           const xmlChar *SystemID) {
12538     xmlDtdPtr ret = NULL;
12539     xmlParserCtxtPtr ctxt;
12540     xmlParserInputPtr input = NULL;
12541     xmlChar* systemIdCanonic;
12542 
12543     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12544 
12545     ctxt = xmlNewSAXParserCtxt(sax, NULL);
12546     if (ctxt == NULL) {
12547 	return(NULL);
12548     }
12549 
12550     /* We are loading a DTD */
12551     ctxt->options |= XML_PARSE_DTDLOAD;
12552 
12553     /*
12554      * Canonicalise the system ID
12555      */
12556     systemIdCanonic = xmlCanonicPath(SystemID);
12557     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12558 	xmlFreeParserCtxt(ctxt);
12559 	return(NULL);
12560     }
12561 
12562     /*
12563      * Ask the Entity resolver to load the damn thing
12564      */
12565 
12566     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12567 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12568 	                                 systemIdCanonic);
12569     if (input == NULL) {
12570 	xmlFreeParserCtxt(ctxt);
12571 	if (systemIdCanonic != NULL)
12572 	    xmlFree(systemIdCanonic);
12573 	return(NULL);
12574     }
12575 
12576     /*
12577      * plug some encoding conversion routines here.
12578      */
12579     if (xmlPushInput(ctxt, input) < 0) {
12580 	xmlFreeParserCtxt(ctxt);
12581 	if (systemIdCanonic != NULL)
12582 	    xmlFree(systemIdCanonic);
12583 	return(NULL);
12584     }
12585 
12586     xmlDetectEncoding(ctxt);
12587 
12588     if (input->filename == NULL)
12589 	input->filename = (char *) systemIdCanonic;
12590     else
12591 	xmlFree(systemIdCanonic);
12592 
12593     /*
12594      * let's parse that entity knowing it's an external subset.
12595      */
12596     ctxt->inSubset = 2;
12597     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12598     if (ctxt->myDoc == NULL) {
12599 	xmlErrMemory(ctxt, "New Doc failed");
12600 	xmlFreeParserCtxt(ctxt);
12601 	return(NULL);
12602     }
12603     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12604     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12605 	                               ExternalID, SystemID);
12606     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12607 
12608     if (ctxt->myDoc != NULL) {
12609 	if (ctxt->wellFormed) {
12610 	    ret = ctxt->myDoc->extSubset;
12611 	    ctxt->myDoc->extSubset = NULL;
12612 	    if (ret != NULL) {
12613 		xmlNodePtr tmp;
12614 
12615 		ret->doc = NULL;
12616 		tmp = ret->children;
12617 		while (tmp != NULL) {
12618 		    tmp->doc = NULL;
12619 		    tmp = tmp->next;
12620 		}
12621 	    }
12622 	} else {
12623 	    ret = NULL;
12624 	}
12625         xmlFreeDoc(ctxt->myDoc);
12626         ctxt->myDoc = NULL;
12627     }
12628     xmlFreeParserCtxt(ctxt);
12629 
12630     return(ret);
12631 }
12632 
12633 
12634 /**
12635  * xmlParseDTD:
12636  * @ExternalID:  a NAME* containing the External ID of the DTD
12637  * @SystemID:  a NAME* containing the URL to the DTD
12638  *
12639  * Load and parse an external subset.
12640  *
12641  * Returns the resulting xmlDtdPtr or NULL in case of error.
12642  */
12643 
12644 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12645 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12646     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12647 }
12648 #endif /* LIBXML_VALID_ENABLED */
12649 
12650 /************************************************************************
12651  *									*
12652  *		Front ends when parsing an Entity			*
12653  *									*
12654  ************************************************************************/
12655 
12656 /**
12657  * xmlParseCtxtExternalEntity:
12658  * @ctx:  the existing parsing context
12659  * @URL:  the URL for the entity to load
12660  * @ID:  the System ID for the entity to load
12661  * @lst:  the return value for the set of parsed nodes
12662  *
12663  * Parse an external general entity within an existing parsing context
12664  * An external general parsed entity is well-formed if it matches the
12665  * production labeled extParsedEnt.
12666  *
12667  * [78] extParsedEnt ::= TextDecl? content
12668  *
12669  * Returns 0 if the entity is well formed, -1 in case of args problem and
12670  *    the parser error code otherwise
12671  */
12672 
12673 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12674 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12675 	               const xmlChar *ID, xmlNodePtr *lst) {
12676     void *userData;
12677 
12678     if (ctx == NULL) return(-1);
12679     /*
12680      * If the user provided their own SAX callbacks, then reuse the
12681      * userData callback field, otherwise the expected setup in a
12682      * DOM builder is to have userData == ctxt
12683      */
12684     if (ctx->userData == ctx)
12685         userData = NULL;
12686     else
12687         userData = ctx->userData;
12688     return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12689                                          userData, ctx->depth + 1,
12690                                          URL, ID, lst);
12691 }
12692 
12693 /**
12694  * xmlParseExternalEntityPrivate:
12695  * @doc:  the document the chunk pertains to
12696  * @oldctxt:  the previous parser context if available
12697  * @sax:  the SAX handler block (possibly NULL)
12698  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12699  * @depth:  Used for loop detection, use 0
12700  * @URL:  the URL for the entity to load
12701  * @ID:  the System ID for the entity to load
12702  * @list:  the return value for the set of parsed nodes
12703  *
12704  * Private version of xmlParseExternalEntity()
12705  *
12706  * Returns 0 if the entity is well formed, -1 in case of args problem and
12707  *    the parser error code otherwise
12708  */
12709 
12710 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12711 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12712 	              xmlSAXHandlerPtr sax,
12713 		      void *user_data, int depth, const xmlChar *URL,
12714 		      const xmlChar *ID, xmlNodePtr *list) {
12715     xmlParserCtxtPtr ctxt;
12716     xmlDocPtr newDoc;
12717     xmlNodePtr newRoot;
12718     xmlParserErrors ret = XML_ERR_OK;
12719 
12720     if (((depth > 40) &&
12721 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12722 	(depth > 100)) {
12723 	xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12724                        "Maximum entity nesting depth exceeded");
12725         return(XML_ERR_ENTITY_LOOP);
12726     }
12727 
12728     if (list != NULL)
12729         *list = NULL;
12730     if ((URL == NULL) && (ID == NULL))
12731 	return(XML_ERR_INTERNAL_ERROR);
12732     if (doc == NULL)
12733 	return(XML_ERR_INTERNAL_ERROR);
12734 
12735     ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12736                                              oldctxt);
12737     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12738     if (oldctxt != NULL) {
12739         ctxt->nbErrors = oldctxt->nbErrors;
12740         ctxt->nbWarnings = oldctxt->nbWarnings;
12741     }
12742     xmlDetectSAX2(ctxt);
12743 
12744     newDoc = xmlNewDoc(BAD_CAST "1.0");
12745     if (newDoc == NULL) {
12746 	xmlFreeParserCtxt(ctxt);
12747 	return(XML_ERR_INTERNAL_ERROR);
12748     }
12749     newDoc->properties = XML_DOC_INTERNAL;
12750     if (doc) {
12751         newDoc->intSubset = doc->intSubset;
12752         newDoc->extSubset = doc->extSubset;
12753         if (doc->dict) {
12754             newDoc->dict = doc->dict;
12755             xmlDictReference(newDoc->dict);
12756         }
12757         if (doc->URL != NULL) {
12758             newDoc->URL = xmlStrdup(doc->URL);
12759         }
12760     }
12761     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12762     if (newRoot == NULL) {
12763 	if (sax != NULL)
12764 	xmlFreeParserCtxt(ctxt);
12765 	newDoc->intSubset = NULL;
12766 	newDoc->extSubset = NULL;
12767         xmlFreeDoc(newDoc);
12768 	return(XML_ERR_INTERNAL_ERROR);
12769     }
12770     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12771     nodePush(ctxt, newDoc->children);
12772     if (doc == NULL) {
12773         ctxt->myDoc = newDoc;
12774     } else {
12775         ctxt->myDoc = doc;
12776         newRoot->doc = doc;
12777     }
12778 
12779     xmlDetectEncoding(ctxt);
12780 
12781     /*
12782      * Parse a possible text declaration first
12783      */
12784     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12785 	xmlParseTextDecl(ctxt);
12786         /*
12787          * An XML-1.0 document can't reference an entity not XML-1.0
12788          */
12789         if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12790             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12791             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12792                            "Version mismatch between document and entity\n");
12793         }
12794     }
12795 
12796     ctxt->instate = XML_PARSER_CONTENT;
12797     ctxt->depth = depth;
12798     if (oldctxt != NULL) {
12799 	ctxt->_private = oldctxt->_private;
12800 	ctxt->loadsubset = oldctxt->loadsubset;
12801 	ctxt->validate = oldctxt->validate;
12802 	ctxt->valid = oldctxt->valid;
12803 	ctxt->replaceEntities = oldctxt->replaceEntities;
12804         if (oldctxt->validate) {
12805             ctxt->vctxt.error = oldctxt->vctxt.error;
12806             ctxt->vctxt.warning = oldctxt->vctxt.warning;
12807             ctxt->vctxt.userData = oldctxt->vctxt.userData;
12808             ctxt->vctxt.flags = oldctxt->vctxt.flags;
12809         }
12810 	ctxt->external = oldctxt->external;
12811         if (ctxt->dict) xmlDictFree(ctxt->dict);
12812         ctxt->dict = oldctxt->dict;
12813         ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12814         ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12815         ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12816         ctxt->dictNames = oldctxt->dictNames;
12817         ctxt->attsDefault = oldctxt->attsDefault;
12818         ctxt->attsSpecial = oldctxt->attsSpecial;
12819         ctxt->linenumbers = oldctxt->linenumbers;
12820 	ctxt->record_info = oldctxt->record_info;
12821 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12822 	ctxt->node_seq.length = oldctxt->node_seq.length;
12823 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12824     } else {
12825 	/*
12826 	 * Doing validity checking on chunk without context
12827 	 * doesn't make sense
12828 	 */
12829 	ctxt->_private = NULL;
12830 	ctxt->validate = 0;
12831 	ctxt->external = 2;
12832 	ctxt->loadsubset = 0;
12833     }
12834 
12835     xmlParseContent(ctxt);
12836 
12837     if ((RAW == '<') && (NXT(1) == '/')) {
12838 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12839     } else if (RAW != 0) {
12840 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12841     }
12842     if (ctxt->node != newDoc->children) {
12843 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12844     }
12845 
12846     if (!ctxt->wellFormed) {
12847 	ret = (xmlParserErrors)ctxt->errNo;
12848         if (oldctxt != NULL) {
12849             oldctxt->errNo = ctxt->errNo;
12850             oldctxt->wellFormed = 0;
12851             xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12852         }
12853     } else {
12854 	if (list != NULL) {
12855 	    xmlNodePtr cur;
12856 
12857 	    /*
12858 	     * Return the newly created nodeset after unlinking it from
12859 	     * they pseudo parent.
12860 	     */
12861 	    cur = newDoc->children->children;
12862 	    *list = cur;
12863 	    while (cur != NULL) {
12864 		cur->parent = NULL;
12865 		cur = cur->next;
12866 	    }
12867             newDoc->children->children = NULL;
12868 	}
12869 	ret = XML_ERR_OK;
12870     }
12871 
12872     /*
12873      * Also record the size of the entity parsed
12874      */
12875     if (ctxt->input != NULL && oldctxt != NULL) {
12876         unsigned long consumed = ctxt->input->consumed;
12877 
12878         xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12879 
12880         xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12881         xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12882 
12883         xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12884         xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12885     }
12886 
12887     if (oldctxt != NULL) {
12888         ctxt->dict = NULL;
12889         ctxt->attsDefault = NULL;
12890         ctxt->attsSpecial = NULL;
12891         oldctxt->nbErrors = ctxt->nbErrors;
12892         oldctxt->nbWarnings = ctxt->nbWarnings;
12893         oldctxt->validate = ctxt->validate;
12894         oldctxt->valid = ctxt->valid;
12895         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12896         oldctxt->node_seq.length = ctxt->node_seq.length;
12897         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12898     }
12899     ctxt->node_seq.maximum = 0;
12900     ctxt->node_seq.length = 0;
12901     ctxt->node_seq.buffer = NULL;
12902     xmlFreeParserCtxt(ctxt);
12903     newDoc->intSubset = NULL;
12904     newDoc->extSubset = NULL;
12905     xmlFreeDoc(newDoc);
12906 
12907     return(ret);
12908 }
12909 
12910 #ifdef LIBXML_SAX1_ENABLED
12911 /**
12912  * xmlParseExternalEntity:
12913  * @doc:  the document the chunk pertains to
12914  * @sax:  the SAX handler block (possibly NULL)
12915  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12916  * @depth:  Used for loop detection, use 0
12917  * @URL:  the URL for the entity to load
12918  * @ID:  the System ID for the entity to load
12919  * @lst:  the return value for the set of parsed nodes
12920  *
12921  * Parse an external general entity
12922  * An external general parsed entity is well-formed if it matches the
12923  * production labeled extParsedEnt.
12924  *
12925  * [78] extParsedEnt ::= TextDecl? content
12926  *
12927  * Returns 0 if the entity is well formed, -1 in case of args problem and
12928  *    the parser error code otherwise
12929  */
12930 
12931 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12932 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12933 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12934     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12935 		                       ID, lst));
12936 }
12937 
12938 /**
12939  * xmlParseBalancedChunkMemory:
12940  * @doc:  the document the chunk pertains to (must not be NULL)
12941  * @sax:  the SAX handler block (possibly NULL)
12942  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12943  * @depth:  Used for loop detection, use 0
12944  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12945  * @lst:  the return value for the set of parsed nodes
12946  *
12947  * Parse a well-balanced chunk of an XML document
12948  * called by the parser
12949  * The allowed sequence for the Well Balanced Chunk is the one defined by
12950  * the content production in the XML grammar:
12951  *
12952  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12953  *
12954  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12955  *    the parser error code otherwise
12956  */
12957 
12958 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12959 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12960      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12961     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12962                                                 depth, string, lst, 0 );
12963 }
12964 #endif /* LIBXML_SAX1_ENABLED */
12965 
12966 /**
12967  * xmlParseBalancedChunkMemoryInternal:
12968  * @oldctxt:  the existing parsing context
12969  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12970  * @user_data:  the user data field for the parser context
12971  * @lst:  the return value for the set of parsed nodes
12972  *
12973  *
12974  * Parse a well-balanced chunk of an XML document
12975  * called by the parser
12976  * The allowed sequence for the Well Balanced Chunk is the one defined by
12977  * the content production in the XML grammar:
12978  *
12979  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12980  *
12981  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12982  * error code otherwise
12983  *
12984  * In case recover is set to 1, the nodelist will not be empty even if
12985  * the parsed chunk is not well balanced.
12986  */
12987 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12988 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12989 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12990     xmlParserCtxtPtr ctxt;
12991     xmlDocPtr newDoc = NULL;
12992     xmlNodePtr newRoot;
12993     xmlSAXHandlerPtr oldsax = NULL;
12994     xmlNodePtr content = NULL;
12995     xmlNodePtr last = NULL;
12996     xmlParserErrors ret = XML_ERR_OK;
12997     xmlHashedString hprefix, huri;
12998     unsigned i;
12999 
13000     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13001         (oldctxt->depth >  100)) {
13002 	xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13003                        "Maximum entity nesting depth exceeded");
13004 	return(XML_ERR_ENTITY_LOOP);
13005     }
13006 
13007 
13008     if (lst != NULL)
13009         *lst = NULL;
13010     if (string == NULL)
13011         return(XML_ERR_INTERNAL_ERROR);
13012 
13013     ctxt = xmlCreateDocParserCtxt(string);
13014     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13015     ctxt->nbErrors = oldctxt->nbErrors;
13016     ctxt->nbWarnings = oldctxt->nbWarnings;
13017     if (user_data != NULL)
13018 	ctxt->userData = user_data;
13019     else
13020 	ctxt->userData = ctxt;
13021     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13022     ctxt->dict = oldctxt->dict;
13023     ctxt->input_id = oldctxt->input_id;
13024     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13025     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13026     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13027 
13028     /*
13029      * Propagate namespaces down the entity
13030      *
13031      * Making entities and namespaces work correctly requires additional
13032      * changes, see xmlParseReference.
13033      */
13034 
13035     /* Default namespace */
13036     hprefix.name = NULL;
13037     hprefix.hashValue = 0;
13038     huri.name = xmlParserNsLookupUri(oldctxt, &hprefix);
13039     huri.hashValue = 0;
13040     if (huri.name != NULL)
13041         xmlParserNsPush(ctxt, NULL, &huri, NULL, 0);
13042 
13043     for (i = 0; i < oldctxt->nsdb->hashSize; i++) {
13044         xmlParserNsBucket *bucket = &oldctxt->nsdb->hash[i];
13045         const xmlChar **ns;
13046         xmlParserNsExtra *extra;
13047         unsigned nsIndex;
13048 
13049         if ((bucket->hashValue != 0) &&
13050             (bucket->index != INT_MAX)) {
13051             nsIndex = bucket->index;
13052             ns = &oldctxt->nsTab[nsIndex * 2];
13053             extra = &oldctxt->nsdb->extra[nsIndex];
13054 
13055             hprefix.name = ns[0];
13056             hprefix.hashValue = bucket->hashValue;
13057             huri.name = ns[1];
13058             huri.hashValue = extra->uriHashValue;
13059             /*
13060              * Don't copy SAX data to avoid a use-after-free with XML reader.
13061              * This matches the pre-2.12 behavior.
13062              */
13063             xmlParserNsPush(ctxt, &hprefix, &huri, NULL, 0);
13064         }
13065     }
13066 
13067     oldsax = ctxt->sax;
13068     ctxt->sax = oldctxt->sax;
13069     xmlDetectSAX2(ctxt);
13070     ctxt->replaceEntities = oldctxt->replaceEntities;
13071     ctxt->options = oldctxt->options;
13072 
13073     ctxt->_private = oldctxt->_private;
13074     if (oldctxt->myDoc == NULL) {
13075 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13076 	if (newDoc == NULL) {
13077             ret = XML_ERR_INTERNAL_ERROR;
13078             goto error;
13079 	}
13080 	newDoc->properties = XML_DOC_INTERNAL;
13081 	newDoc->dict = ctxt->dict;
13082 	xmlDictReference(newDoc->dict);
13083 	ctxt->myDoc = newDoc;
13084     } else {
13085 	ctxt->myDoc = oldctxt->myDoc;
13086         content = ctxt->myDoc->children;
13087 	last = ctxt->myDoc->last;
13088     }
13089     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13090     if (newRoot == NULL) {
13091         ret = XML_ERR_INTERNAL_ERROR;
13092         goto error;
13093     }
13094     ctxt->myDoc->children = NULL;
13095     ctxt->myDoc->last = NULL;
13096     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13097     nodePush(ctxt, ctxt->myDoc->children);
13098     ctxt->instate = XML_PARSER_CONTENT;
13099     ctxt->depth = oldctxt->depth;
13100 
13101     ctxt->validate = 0;
13102     ctxt->loadsubset = oldctxt->loadsubset;
13103     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13104 	/*
13105 	 * ID/IDREF registration will be done in xmlValidateElement below
13106 	 */
13107 	ctxt->loadsubset |= XML_SKIP_IDS;
13108     }
13109     ctxt->dictNames = oldctxt->dictNames;
13110     ctxt->attsDefault = oldctxt->attsDefault;
13111     ctxt->attsSpecial = oldctxt->attsSpecial;
13112 
13113     xmlParseContent(ctxt);
13114     if ((RAW == '<') && (NXT(1) == '/')) {
13115 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13116     } else if (RAW != 0) {
13117 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13118     }
13119     if (ctxt->node != ctxt->myDoc->children) {
13120 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13121     }
13122 
13123     if (!ctxt->wellFormed) {
13124 	ret = (xmlParserErrors)ctxt->errNo;
13125         oldctxt->errNo = ctxt->errNo;
13126         oldctxt->wellFormed = 0;
13127         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13128     } else {
13129         ret = XML_ERR_OK;
13130     }
13131 
13132     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13133 	xmlNodePtr cur;
13134 
13135 	/*
13136 	 * Return the newly created nodeset after unlinking it from
13137 	 * they pseudo parent.
13138 	 */
13139 	cur = ctxt->myDoc->children->children;
13140 	*lst = cur;
13141 	while (cur != NULL) {
13142 #ifdef LIBXML_VALID_ENABLED
13143 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13144 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13145 		(cur->type == XML_ELEMENT_NODE)) {
13146 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13147 			oldctxt->myDoc, cur);
13148 	    }
13149 #endif /* LIBXML_VALID_ENABLED */
13150 	    cur->parent = NULL;
13151 	    cur = cur->next;
13152 	}
13153 	ctxt->myDoc->children->children = NULL;
13154     }
13155     if (ctxt->myDoc != NULL) {
13156 	xmlFreeNode(ctxt->myDoc->children);
13157         ctxt->myDoc->children = content;
13158         ctxt->myDoc->last = last;
13159     }
13160 
13161     /*
13162      * Also record the size of the entity parsed
13163      */
13164     if (ctxt->input != NULL && oldctxt != NULL) {
13165         unsigned long consumed = ctxt->input->consumed;
13166 
13167         xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13168 
13169         xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13170         xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13171     }
13172 
13173     oldctxt->nbErrors = ctxt->nbErrors;
13174     oldctxt->nbWarnings = ctxt->nbWarnings;
13175 
13176 error:
13177     ctxt->sax = oldsax;
13178     ctxt->dict = NULL;
13179     ctxt->attsDefault = NULL;
13180     ctxt->attsSpecial = NULL;
13181     xmlFreeParserCtxt(ctxt);
13182     if (newDoc != NULL) {
13183 	xmlFreeDoc(newDoc);
13184     }
13185 
13186     return(ret);
13187 }
13188 
13189 /**
13190  * xmlParseInNodeContext:
13191  * @node:  the context node
13192  * @data:  the input string
13193  * @datalen:  the input string length in bytes
13194  * @options:  a combination of xmlParserOption
13195  * @lst:  the return value for the set of parsed nodes
13196  *
13197  * Parse a well-balanced chunk of an XML document
13198  * within the context (DTD, namespaces, etc ...) of the given node.
13199  *
13200  * The allowed sequence for the data is a Well Balanced Chunk defined by
13201  * the content production in the XML grammar:
13202  *
13203  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13204  *
13205  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13206  * error code otherwise
13207  */
13208 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13209 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13210                       int options, xmlNodePtr *lst) {
13211     xmlParserCtxtPtr ctxt;
13212     xmlDocPtr doc = NULL;
13213     xmlNodePtr fake, cur;
13214     int nsnr = 0;
13215 
13216     xmlParserErrors ret = XML_ERR_OK;
13217 
13218     /*
13219      * check all input parameters, grab the document
13220      */
13221     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13222         return(XML_ERR_INTERNAL_ERROR);
13223     switch (node->type) {
13224         case XML_ELEMENT_NODE:
13225         case XML_ATTRIBUTE_NODE:
13226         case XML_TEXT_NODE:
13227         case XML_CDATA_SECTION_NODE:
13228         case XML_ENTITY_REF_NODE:
13229         case XML_PI_NODE:
13230         case XML_COMMENT_NODE:
13231         case XML_DOCUMENT_NODE:
13232         case XML_HTML_DOCUMENT_NODE:
13233 	    break;
13234 	default:
13235 	    return(XML_ERR_INTERNAL_ERROR);
13236 
13237     }
13238     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13239            (node->type != XML_DOCUMENT_NODE) &&
13240 	   (node->type != XML_HTML_DOCUMENT_NODE))
13241 	node = node->parent;
13242     if (node == NULL)
13243 	return(XML_ERR_INTERNAL_ERROR);
13244     if (node->type == XML_ELEMENT_NODE)
13245 	doc = node->doc;
13246     else
13247         doc = (xmlDocPtr) node;
13248     if (doc == NULL)
13249 	return(XML_ERR_INTERNAL_ERROR);
13250 
13251     /*
13252      * allocate a context and set-up everything not related to the
13253      * node position in the tree
13254      */
13255     if (doc->type == XML_DOCUMENT_NODE)
13256 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13257 #ifdef LIBXML_HTML_ENABLED
13258     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13259 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13260         /*
13261          * When parsing in context, it makes no sense to add implied
13262          * elements like html/body/etc...
13263          */
13264         options |= HTML_PARSE_NOIMPLIED;
13265     }
13266 #endif
13267     else
13268         return(XML_ERR_INTERNAL_ERROR);
13269 
13270     if (ctxt == NULL)
13271         return(XML_ERR_NO_MEMORY);
13272 
13273     /*
13274      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13275      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13276      * we must wait until the last moment to free the original one.
13277      */
13278     if (doc->dict != NULL) {
13279         if (ctxt->dict != NULL)
13280 	    xmlDictFree(ctxt->dict);
13281 	ctxt->dict = doc->dict;
13282     } else
13283         options |= XML_PARSE_NODICT;
13284 
13285     if (doc->encoding != NULL) {
13286         xmlCharEncodingHandlerPtr hdlr;
13287 
13288         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13289         if (hdlr != NULL) {
13290             xmlSwitchToEncoding(ctxt, hdlr);
13291 	} else {
13292             return(XML_ERR_UNSUPPORTED_ENCODING);
13293         }
13294     }
13295 
13296     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13297     xmlDetectSAX2(ctxt);
13298     ctxt->myDoc = doc;
13299     /* parsing in context, i.e. as within existing content */
13300     ctxt->input_id = 2;
13301     ctxt->instate = XML_PARSER_CONTENT;
13302 
13303     fake = xmlNewDocComment(node->doc, NULL);
13304     if (fake == NULL) {
13305         xmlFreeParserCtxt(ctxt);
13306 	return(XML_ERR_NO_MEMORY);
13307     }
13308     xmlAddChild(node, fake);
13309 
13310     if (node->type == XML_ELEMENT_NODE) {
13311 	nodePush(ctxt, node);
13312 	/*
13313 	 * initialize the SAX2 namespaces stack
13314 	 */
13315 	cur = node;
13316 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13317 	    xmlNsPtr ns = cur->nsDef;
13318             xmlHashedString hprefix, huri;
13319 
13320 	    while (ns != NULL) {
13321                 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
13322                 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
13323                 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
13324                     nsnr++;
13325 		ns = ns->next;
13326 	    }
13327 	    cur = cur->parent;
13328 	}
13329     }
13330 
13331     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13332 	/*
13333 	 * ID/IDREF registration will be done in xmlValidateElement below
13334 	 */
13335 	ctxt->loadsubset |= XML_SKIP_IDS;
13336     }
13337 
13338 #ifdef LIBXML_HTML_ENABLED
13339     if (doc->type == XML_HTML_DOCUMENT_NODE)
13340         __htmlParseContent(ctxt);
13341     else
13342 #endif
13343 	xmlParseContent(ctxt);
13344 
13345     xmlParserNsPop(ctxt, nsnr);
13346     if ((RAW == '<') && (NXT(1) == '/')) {
13347 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13348     } else if (RAW != 0) {
13349 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13350     }
13351     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13352 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13353 	ctxt->wellFormed = 0;
13354     }
13355 
13356     if (!ctxt->wellFormed) {
13357         if (ctxt->errNo == 0)
13358 	    ret = XML_ERR_INTERNAL_ERROR;
13359 	else
13360 	    ret = (xmlParserErrors)ctxt->errNo;
13361     } else {
13362         ret = XML_ERR_OK;
13363     }
13364 
13365     /*
13366      * Return the newly created nodeset after unlinking it from
13367      * the pseudo sibling.
13368      */
13369 
13370     cur = fake->next;
13371     fake->next = NULL;
13372     node->last = fake;
13373 
13374     if (cur != NULL) {
13375 	cur->prev = NULL;
13376     }
13377 
13378     *lst = cur;
13379 
13380     while (cur != NULL) {
13381 	cur->parent = NULL;
13382 	cur = cur->next;
13383     }
13384 
13385     xmlUnlinkNode(fake);
13386     xmlFreeNode(fake);
13387 
13388 
13389     if (ret != XML_ERR_OK) {
13390         xmlFreeNodeList(*lst);
13391 	*lst = NULL;
13392     }
13393 
13394     if (doc->dict != NULL)
13395         ctxt->dict = NULL;
13396     xmlFreeParserCtxt(ctxt);
13397 
13398     return(ret);
13399 }
13400 
13401 #ifdef LIBXML_SAX1_ENABLED
13402 /**
13403  * xmlParseBalancedChunkMemoryRecover:
13404  * @doc:  the document the chunk pertains to (must not be NULL)
13405  * @sax:  the SAX handler block (possibly NULL)
13406  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13407  * @depth:  Used for loop detection, use 0
13408  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13409  * @lst:  the return value for the set of parsed nodes
13410  * @recover: return nodes even if the data is broken (use 0)
13411  *
13412  *
13413  * Parse a well-balanced chunk of an XML document
13414  * called by the parser
13415  * The allowed sequence for the Well Balanced Chunk is the one defined by
13416  * the content production in the XML grammar:
13417  *
13418  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13419  *
13420  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13421  *    the parser error code otherwise
13422  *
13423  * In case recover is set to 1, the nodelist will not be empty even if
13424  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13425  * some extent.
13426  */
13427 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13428 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13429      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13430      int recover) {
13431     xmlParserCtxtPtr ctxt;
13432     xmlDocPtr newDoc;
13433     xmlSAXHandlerPtr oldsax = NULL;
13434     xmlNodePtr content, newRoot;
13435     int ret = 0;
13436 
13437     if (depth > 40) {
13438 	return(XML_ERR_ENTITY_LOOP);
13439     }
13440 
13441 
13442     if (lst != NULL)
13443         *lst = NULL;
13444     if (string == NULL)
13445         return(-1);
13446 
13447     ctxt = xmlCreateDocParserCtxt(string);
13448     if (ctxt == NULL) return(-1);
13449     ctxt->userData = ctxt;
13450     if (sax != NULL) {
13451 	oldsax = ctxt->sax;
13452         ctxt->sax = sax;
13453 	if (user_data != NULL)
13454 	    ctxt->userData = user_data;
13455     }
13456     newDoc = xmlNewDoc(BAD_CAST "1.0");
13457     if (newDoc == NULL) {
13458 	xmlFreeParserCtxt(ctxt);
13459 	return(-1);
13460     }
13461     newDoc->properties = XML_DOC_INTERNAL;
13462     if ((doc != NULL) && (doc->dict != NULL)) {
13463         xmlDictFree(ctxt->dict);
13464 	ctxt->dict = doc->dict;
13465 	xmlDictReference(ctxt->dict);
13466 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13467 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13468 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13469 	ctxt->dictNames = 1;
13470     } else {
13471 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13472     }
13473     /* doc == NULL is only supported for historic reasons */
13474     if (doc != NULL) {
13475 	newDoc->intSubset = doc->intSubset;
13476 	newDoc->extSubset = doc->extSubset;
13477     }
13478     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13479     if (newRoot == NULL) {
13480 	if (sax != NULL)
13481 	    ctxt->sax = oldsax;
13482 	xmlFreeParserCtxt(ctxt);
13483 	newDoc->intSubset = NULL;
13484 	newDoc->extSubset = NULL;
13485         xmlFreeDoc(newDoc);
13486 	return(-1);
13487     }
13488     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13489     nodePush(ctxt, newRoot);
13490     /* doc == NULL is only supported for historic reasons */
13491     if (doc == NULL) {
13492 	ctxt->myDoc = newDoc;
13493     } else {
13494 	ctxt->myDoc = newDoc;
13495 	newDoc->children->doc = doc;
13496 	/* Ensure that doc has XML spec namespace */
13497 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13498 	newDoc->oldNs = doc->oldNs;
13499     }
13500     ctxt->instate = XML_PARSER_CONTENT;
13501     ctxt->input_id = 2;
13502     ctxt->depth = depth;
13503 
13504     /*
13505      * Doing validity checking on chunk doesn't make sense
13506      */
13507     ctxt->validate = 0;
13508     ctxt->loadsubset = 0;
13509     xmlDetectSAX2(ctxt);
13510 
13511     if ( doc != NULL ){
13512         content = doc->children;
13513         doc->children = NULL;
13514         xmlParseContent(ctxt);
13515         doc->children = content;
13516     }
13517     else {
13518         xmlParseContent(ctxt);
13519     }
13520     if ((RAW == '<') && (NXT(1) == '/')) {
13521 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13522     } else if (RAW != 0) {
13523 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13524     }
13525     if (ctxt->node != newDoc->children) {
13526 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13527     }
13528 
13529     if (!ctxt->wellFormed) {
13530         if (ctxt->errNo == 0)
13531 	    ret = 1;
13532 	else
13533 	    ret = ctxt->errNo;
13534     } else {
13535       ret = 0;
13536     }
13537 
13538     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13539 	xmlNodePtr cur;
13540 
13541 	/*
13542 	 * Return the newly created nodeset after unlinking it from
13543 	 * they pseudo parent.
13544 	 */
13545 	cur = newDoc->children->children;
13546 	*lst = cur;
13547 	while (cur != NULL) {
13548 	    xmlSetTreeDoc(cur, doc);
13549 	    cur->parent = NULL;
13550 	    cur = cur->next;
13551 	}
13552 	newDoc->children->children = NULL;
13553     }
13554 
13555     if (sax != NULL)
13556 	ctxt->sax = oldsax;
13557     xmlFreeParserCtxt(ctxt);
13558     newDoc->intSubset = NULL;
13559     newDoc->extSubset = NULL;
13560     /* This leaks the namespace list if doc == NULL */
13561     newDoc->oldNs = NULL;
13562     xmlFreeDoc(newDoc);
13563 
13564     return(ret);
13565 }
13566 
13567 /**
13568  * xmlSAXParseEntity:
13569  * @sax:  the SAX handler block
13570  * @filename:  the filename
13571  *
13572  * DEPRECATED: Don't use.
13573  *
13574  * parse an XML external entity out of context and build a tree.
13575  * It use the given SAX function block to handle the parsing callback.
13576  * If sax is NULL, fallback to the default DOM tree building routines.
13577  *
13578  * [78] extParsedEnt ::= TextDecl? content
13579  *
13580  * This correspond to a "Well Balanced" chunk
13581  *
13582  * Returns the resulting document tree
13583  */
13584 
13585 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13586 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13587     xmlDocPtr ret;
13588     xmlParserCtxtPtr ctxt;
13589 
13590     ctxt = xmlCreateFileParserCtxt(filename);
13591     if (ctxt == NULL) {
13592 	return(NULL);
13593     }
13594     if (sax != NULL) {
13595 	if (ctxt->sax != NULL)
13596 	    xmlFree(ctxt->sax);
13597         ctxt->sax = sax;
13598         ctxt->userData = NULL;
13599     }
13600 
13601     xmlParseExtParsedEnt(ctxt);
13602 
13603     if (ctxt->wellFormed)
13604 	ret = ctxt->myDoc;
13605     else {
13606         ret = NULL;
13607         xmlFreeDoc(ctxt->myDoc);
13608         ctxt->myDoc = NULL;
13609     }
13610     if (sax != NULL)
13611         ctxt->sax = NULL;
13612     xmlFreeParserCtxt(ctxt);
13613 
13614     return(ret);
13615 }
13616 
13617 /**
13618  * xmlParseEntity:
13619  * @filename:  the filename
13620  *
13621  * parse an XML external entity out of context and build a tree.
13622  *
13623  * [78] extParsedEnt ::= TextDecl? content
13624  *
13625  * This correspond to a "Well Balanced" chunk
13626  *
13627  * Returns the resulting document tree
13628  */
13629 
13630 xmlDocPtr
xmlParseEntity(const char * filename)13631 xmlParseEntity(const char *filename) {
13632     return(xmlSAXParseEntity(NULL, filename));
13633 }
13634 #endif /* LIBXML_SAX1_ENABLED */
13635 
13636 /**
13637  * xmlCreateEntityParserCtxtInternal:
13638  * @URL:  the entity URL
13639  * @ID:  the entity PUBLIC ID
13640  * @base:  a possible base for the target URI
13641  * @pctx:  parser context used to set options on new context
13642  *
13643  * Create a parser context for an external entity
13644  * Automatic support for ZLIB/Compress compressed document is provided
13645  * by default if found at compile-time.
13646  *
13647  * Returns the new parser context or NULL
13648  */
13649 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax,void * userData,const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13650 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13651         const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13652         xmlParserCtxtPtr pctx) {
13653     xmlParserCtxtPtr ctxt;
13654     xmlParserInputPtr inputStream;
13655     char *directory = NULL;
13656     xmlChar *uri;
13657 
13658     ctxt = xmlNewSAXParserCtxt(sax, userData);
13659     if (ctxt == NULL) {
13660 	return(NULL);
13661     }
13662 
13663     if (pctx != NULL) {
13664         ctxt->options = pctx->options;
13665         ctxt->_private = pctx->_private;
13666 	ctxt->input_id = pctx->input_id;
13667     }
13668 
13669     /* Don't read from stdin. */
13670     if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13671         URL = BAD_CAST "./-";
13672 
13673     uri = xmlBuildURI(URL, base);
13674 
13675     if (uri == NULL) {
13676 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13677 	if (inputStream == NULL) {
13678 	    xmlFreeParserCtxt(ctxt);
13679 	    return(NULL);
13680 	}
13681 
13682 	inputPush(ctxt, inputStream);
13683 
13684 	if ((ctxt->directory == NULL) && (directory == NULL))
13685 	    directory = xmlParserGetDirectory((char *)URL);
13686 	if ((ctxt->directory == NULL) && (directory != NULL))
13687 	    ctxt->directory = directory;
13688     } else {
13689 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13690 	if (inputStream == NULL) {
13691 	    xmlFree(uri);
13692 	    xmlFreeParserCtxt(ctxt);
13693 	    return(NULL);
13694 	}
13695 
13696 	inputPush(ctxt, inputStream);
13697 
13698 	if ((ctxt->directory == NULL) && (directory == NULL))
13699 	    directory = xmlParserGetDirectory((char *)uri);
13700 	if ((ctxt->directory == NULL) && (directory != NULL))
13701 	    ctxt->directory = directory;
13702 	xmlFree(uri);
13703     }
13704     return(ctxt);
13705 }
13706 
13707 /**
13708  * xmlCreateEntityParserCtxt:
13709  * @URL:  the entity URL
13710  * @ID:  the entity PUBLIC ID
13711  * @base:  a possible base for the target URI
13712  *
13713  * Create a parser context for an external entity
13714  * Automatic support for ZLIB/Compress compressed document is provided
13715  * by default if found at compile-time.
13716  *
13717  * Returns the new parser context or NULL
13718  */
13719 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13720 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13721 	                  const xmlChar *base) {
13722     return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13723 
13724 }
13725 
13726 /************************************************************************
13727  *									*
13728  *		Front ends when parsing from a file			*
13729  *									*
13730  ************************************************************************/
13731 
13732 /**
13733  * xmlCreateURLParserCtxt:
13734  * @filename:  the filename or URL
13735  * @options:  a combination of xmlParserOption
13736  *
13737  * Create a parser context for a file or URL content.
13738  * Automatic support for ZLIB/Compress compressed document is provided
13739  * by default if found at compile-time and for file accesses
13740  *
13741  * Returns the new parser context or NULL
13742  */
13743 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13744 xmlCreateURLParserCtxt(const char *filename, int options)
13745 {
13746     xmlParserCtxtPtr ctxt;
13747     xmlParserInputPtr inputStream;
13748     char *directory = NULL;
13749 
13750     ctxt = xmlNewParserCtxt();
13751     if (ctxt == NULL) {
13752 	xmlErrMemory(NULL, "cannot allocate parser context");
13753 	return(NULL);
13754     }
13755 
13756     if (options)
13757 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13758     ctxt->linenumbers = 1;
13759 
13760     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13761     if (inputStream == NULL) {
13762 	xmlFreeParserCtxt(ctxt);
13763 	return(NULL);
13764     }
13765 
13766     inputPush(ctxt, inputStream);
13767     if ((ctxt->directory == NULL) && (directory == NULL))
13768         directory = xmlParserGetDirectory(filename);
13769     if ((ctxt->directory == NULL) && (directory != NULL))
13770         ctxt->directory = directory;
13771 
13772     return(ctxt);
13773 }
13774 
13775 /**
13776  * xmlCreateFileParserCtxt:
13777  * @filename:  the filename
13778  *
13779  * Create a parser context for a file content.
13780  * Automatic support for ZLIB/Compress compressed document is provided
13781  * by default if found at compile-time.
13782  *
13783  * Returns the new parser context or NULL
13784  */
13785 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13786 xmlCreateFileParserCtxt(const char *filename)
13787 {
13788     return(xmlCreateURLParserCtxt(filename, 0));
13789 }
13790 
13791 #ifdef LIBXML_SAX1_ENABLED
13792 /**
13793  * xmlSAXParseFileWithData:
13794  * @sax:  the SAX handler block
13795  * @filename:  the filename
13796  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13797  *             documents
13798  * @data:  the userdata
13799  *
13800  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13801  *
13802  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13803  * compressed document is provided by default if found at compile-time.
13804  * It use the given SAX function block to handle the parsing callback.
13805  * If sax is NULL, fallback to the default DOM tree building routines.
13806  *
13807  * User data (void *) is stored within the parser context in the
13808  * context's _private member, so it is available nearly everywhere in libxml
13809  *
13810  * Returns the resulting document tree
13811  */
13812 
13813 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13814 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13815                         int recovery, void *data) {
13816     xmlDocPtr ret;
13817     xmlParserCtxtPtr ctxt;
13818 
13819     xmlInitParser();
13820 
13821     ctxt = xmlCreateFileParserCtxt(filename);
13822     if (ctxt == NULL) {
13823 	return(NULL);
13824     }
13825     if (sax != NULL) {
13826 	if (ctxt->sax != NULL)
13827 	    xmlFree(ctxt->sax);
13828         ctxt->sax = sax;
13829     }
13830     xmlDetectSAX2(ctxt);
13831     if (data!=NULL) {
13832 	ctxt->_private = data;
13833     }
13834 
13835     if (ctxt->directory == NULL)
13836         ctxt->directory = xmlParserGetDirectory(filename);
13837 
13838     ctxt->recovery = recovery;
13839 
13840     xmlParseDocument(ctxt);
13841 
13842     if ((ctxt->wellFormed) || recovery) {
13843         ret = ctxt->myDoc;
13844 	if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13845 	    if (ctxt->input->buf->compressed > 0)
13846 		ret->compression = 9;
13847 	    else
13848 		ret->compression = ctxt->input->buf->compressed;
13849 	}
13850     }
13851     else {
13852        ret = NULL;
13853        xmlFreeDoc(ctxt->myDoc);
13854        ctxt->myDoc = NULL;
13855     }
13856     if (sax != NULL)
13857         ctxt->sax = NULL;
13858     xmlFreeParserCtxt(ctxt);
13859 
13860     return(ret);
13861 }
13862 
13863 /**
13864  * xmlSAXParseFile:
13865  * @sax:  the SAX handler block
13866  * @filename:  the filename
13867  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13868  *             documents
13869  *
13870  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13871  *
13872  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13873  * compressed document is provided by default if found at compile-time.
13874  * It use the given SAX function block to handle the parsing callback.
13875  * If sax is NULL, fallback to the default DOM tree building routines.
13876  *
13877  * Returns the resulting document tree
13878  */
13879 
13880 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13881 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13882                           int recovery) {
13883     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13884 }
13885 
13886 /**
13887  * xmlRecoverDoc:
13888  * @cur:  a pointer to an array of xmlChar
13889  *
13890  * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13891  *
13892  * parse an XML in-memory document and build a tree.
13893  * In the case the document is not Well Formed, a attempt to build a
13894  * tree is tried anyway
13895  *
13896  * Returns the resulting document tree or NULL in case of failure
13897  */
13898 
13899 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)13900 xmlRecoverDoc(const xmlChar *cur) {
13901     return(xmlSAXParseDoc(NULL, cur, 1));
13902 }
13903 
13904 /**
13905  * xmlParseFile:
13906  * @filename:  the filename
13907  *
13908  * DEPRECATED: Use xmlReadFile.
13909  *
13910  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13911  * compressed document is provided by default if found at compile-time.
13912  *
13913  * Returns the resulting document tree if the file was wellformed,
13914  * NULL otherwise.
13915  */
13916 
13917 xmlDocPtr
xmlParseFile(const char * filename)13918 xmlParseFile(const char *filename) {
13919     return(xmlSAXParseFile(NULL, filename, 0));
13920 }
13921 
13922 /**
13923  * xmlRecoverFile:
13924  * @filename:  the filename
13925  *
13926  * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13927  *
13928  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13929  * compressed document is provided by default if found at compile-time.
13930  * In the case the document is not Well Formed, it attempts to build
13931  * a tree anyway
13932  *
13933  * Returns the resulting document tree or NULL in case of failure
13934  */
13935 
13936 xmlDocPtr
xmlRecoverFile(const char * filename)13937 xmlRecoverFile(const char *filename) {
13938     return(xmlSAXParseFile(NULL, filename, 1));
13939 }
13940 
13941 
13942 /**
13943  * xmlSetupParserForBuffer:
13944  * @ctxt:  an XML parser context
13945  * @buffer:  a xmlChar * buffer
13946  * @filename:  a file name
13947  *
13948  * DEPRECATED: Don't use.
13949  *
13950  * Setup the parser context to parse a new buffer; Clears any prior
13951  * contents from the parser context. The buffer parameter must not be
13952  * NULL, but the filename parameter can be
13953  */
13954 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13955 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13956                              const char* filename)
13957 {
13958     xmlParserInputPtr input;
13959 
13960     if ((ctxt == NULL) || (buffer == NULL))
13961         return;
13962 
13963     input = xmlNewInputStream(ctxt);
13964     if (input == NULL) {
13965         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13966         xmlClearParserCtxt(ctxt);
13967         return;
13968     }
13969 
13970     xmlClearParserCtxt(ctxt);
13971     if (filename != NULL)
13972         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13973     input->base = buffer;
13974     input->cur = buffer;
13975     input->end = &buffer[xmlStrlen(buffer)];
13976     inputPush(ctxt, input);
13977 }
13978 
13979 /**
13980  * xmlSAXUserParseFile:
13981  * @sax:  a SAX handler
13982  * @user_data:  The user data returned on SAX callbacks
13983  * @filename:  a file name
13984  *
13985  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13986  *
13987  * parse an XML file and call the given SAX handler routines.
13988  * Automatic support for ZLIB/Compress compressed document is provided
13989  *
13990  * Returns 0 in case of success or a error number otherwise
13991  */
13992 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13993 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13994                     const char *filename) {
13995     int ret = 0;
13996     xmlParserCtxtPtr ctxt;
13997 
13998     ctxt = xmlCreateFileParserCtxt(filename);
13999     if (ctxt == NULL) return -1;
14000     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14001 	xmlFree(ctxt->sax);
14002     ctxt->sax = sax;
14003     xmlDetectSAX2(ctxt);
14004 
14005     if (user_data != NULL)
14006 	ctxt->userData = user_data;
14007 
14008     xmlParseDocument(ctxt);
14009 
14010     if (ctxt->wellFormed)
14011 	ret = 0;
14012     else {
14013         if (ctxt->errNo != 0)
14014 	    ret = ctxt->errNo;
14015 	else
14016 	    ret = -1;
14017     }
14018     if (sax != NULL)
14019 	ctxt->sax = NULL;
14020     if (ctxt->myDoc != NULL) {
14021         xmlFreeDoc(ctxt->myDoc);
14022 	ctxt->myDoc = NULL;
14023     }
14024     xmlFreeParserCtxt(ctxt);
14025 
14026     return ret;
14027 }
14028 #endif /* LIBXML_SAX1_ENABLED */
14029 
14030 /************************************************************************
14031  *									*
14032  *		Front ends when parsing from memory			*
14033  *									*
14034  ************************************************************************/
14035 
14036 /**
14037  * xmlCreateMemoryParserCtxt:
14038  * @buffer:  a pointer to a char array
14039  * @size:  the size of the array
14040  *
14041  * Create a parser context for an XML in-memory document.
14042  *
14043  * Returns the new parser context or NULL
14044  */
14045 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14046 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14047     xmlParserCtxtPtr ctxt;
14048     xmlParserInputPtr input;
14049     xmlParserInputBufferPtr buf;
14050 
14051     if (buffer == NULL)
14052 	return(NULL);
14053     if (size <= 0)
14054 	return(NULL);
14055 
14056     ctxt = xmlNewParserCtxt();
14057     if (ctxt == NULL)
14058 	return(NULL);
14059 
14060     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14061     if (buf == NULL) {
14062 	xmlFreeParserCtxt(ctxt);
14063 	return(NULL);
14064     }
14065 
14066     input = xmlNewInputStream(ctxt);
14067     if (input == NULL) {
14068 	xmlFreeParserInputBuffer(buf);
14069 	xmlFreeParserCtxt(ctxt);
14070 	return(NULL);
14071     }
14072 
14073     input->filename = NULL;
14074     input->buf = buf;
14075     xmlBufResetInput(input->buf->buffer, input);
14076 
14077     inputPush(ctxt, input);
14078     return(ctxt);
14079 }
14080 
14081 #ifdef LIBXML_SAX1_ENABLED
14082 /**
14083  * xmlSAXParseMemoryWithData:
14084  * @sax:  the SAX handler block
14085  * @buffer:  an pointer to a char array
14086  * @size:  the size of the array
14087  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14088  *             documents
14089  * @data:  the userdata
14090  *
14091  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14092  *
14093  * parse an XML in-memory block and use the given SAX function block
14094  * to handle the parsing callback. If sax is NULL, fallback to the default
14095  * DOM tree building routines.
14096  *
14097  * User data (void *) is stored within the parser context in the
14098  * context's _private member, so it is available nearly everywhere in libxml
14099  *
14100  * Returns the resulting document tree
14101  */
14102 
14103 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14104 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14105 	          int size, int recovery, void *data) {
14106     xmlDocPtr ret;
14107     xmlParserCtxtPtr ctxt;
14108 
14109     xmlInitParser();
14110 
14111     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14112     if (ctxt == NULL) return(NULL);
14113     if (sax != NULL) {
14114 	if (ctxt->sax != NULL)
14115 	    xmlFree(ctxt->sax);
14116         ctxt->sax = sax;
14117     }
14118     xmlDetectSAX2(ctxt);
14119     if (data!=NULL) {
14120 	ctxt->_private=data;
14121     }
14122 
14123     ctxt->recovery = recovery;
14124 
14125     xmlParseDocument(ctxt);
14126 
14127     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14128     else {
14129        ret = NULL;
14130        xmlFreeDoc(ctxt->myDoc);
14131        ctxt->myDoc = NULL;
14132     }
14133     if (sax != NULL)
14134 	ctxt->sax = NULL;
14135     xmlFreeParserCtxt(ctxt);
14136 
14137     return(ret);
14138 }
14139 
14140 /**
14141  * xmlSAXParseMemory:
14142  * @sax:  the SAX handler block
14143  * @buffer:  an pointer to a char array
14144  * @size:  the size of the array
14145  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14146  *             documents
14147  *
14148  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14149  *
14150  * parse an XML in-memory block and use the given SAX function block
14151  * to handle the parsing callback. If sax is NULL, fallback to the default
14152  * DOM tree building routines.
14153  *
14154  * Returns the resulting document tree
14155  */
14156 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14157 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14158 	          int size, int recovery) {
14159     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14160 }
14161 
14162 /**
14163  * xmlParseMemory:
14164  * @buffer:  an pointer to a char array
14165  * @size:  the size of the array
14166  *
14167  * DEPRECATED: Use xmlReadMemory.
14168  *
14169  * parse an XML in-memory block and build a tree.
14170  *
14171  * Returns the resulting document tree
14172  */
14173 
xmlParseMemory(const char * buffer,int size)14174 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14175    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14176 }
14177 
14178 /**
14179  * xmlRecoverMemory:
14180  * @buffer:  an pointer to a char array
14181  * @size:  the size of the array
14182  *
14183  * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14184  *
14185  * parse an XML in-memory block and build a tree.
14186  * In the case the document is not Well Formed, an attempt to
14187  * build a tree is tried anyway
14188  *
14189  * Returns the resulting document tree or NULL in case of error
14190  */
14191 
xmlRecoverMemory(const char * buffer,int size)14192 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14193    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14194 }
14195 
14196 /**
14197  * xmlSAXUserParseMemory:
14198  * @sax:  a SAX handler
14199  * @user_data:  The user data returned on SAX callbacks
14200  * @buffer:  an in-memory XML document input
14201  * @size:  the length of the XML document in bytes
14202  *
14203  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14204  *
14205  * parse an XML in-memory buffer and call the given SAX handler routines.
14206  *
14207  * Returns 0 in case of success or a error number otherwise
14208  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14209 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14210 			  const char *buffer, int size) {
14211     int ret = 0;
14212     xmlParserCtxtPtr ctxt;
14213 
14214     xmlInitParser();
14215 
14216     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14217     if (ctxt == NULL) return -1;
14218     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14219         xmlFree(ctxt->sax);
14220     ctxt->sax = sax;
14221     xmlDetectSAX2(ctxt);
14222 
14223     if (user_data != NULL)
14224 	ctxt->userData = user_data;
14225 
14226     xmlParseDocument(ctxt);
14227 
14228     if (ctxt->wellFormed)
14229 	ret = 0;
14230     else {
14231         if (ctxt->errNo != 0)
14232 	    ret = ctxt->errNo;
14233 	else
14234 	    ret = -1;
14235     }
14236     if (sax != NULL)
14237         ctxt->sax = NULL;
14238     if (ctxt->myDoc != NULL) {
14239         xmlFreeDoc(ctxt->myDoc);
14240 	ctxt->myDoc = NULL;
14241     }
14242     xmlFreeParserCtxt(ctxt);
14243 
14244     return ret;
14245 }
14246 #endif /* LIBXML_SAX1_ENABLED */
14247 
14248 /**
14249  * xmlCreateDocParserCtxt:
14250  * @str:  a pointer to an array of xmlChar
14251  *
14252  * Creates a parser context for an XML in-memory document.
14253  *
14254  * Returns the new parser context or NULL
14255  */
14256 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * str)14257 xmlCreateDocParserCtxt(const xmlChar *str) {
14258     xmlParserCtxtPtr ctxt;
14259     xmlParserInputPtr input;
14260     xmlParserInputBufferPtr buf;
14261 
14262     if (str == NULL)
14263 	return(NULL);
14264 
14265     ctxt = xmlNewParserCtxt();
14266     if (ctxt == NULL)
14267 	return(NULL);
14268 
14269     buf = xmlParserInputBufferCreateString(str);
14270     if (buf == NULL) {
14271 	xmlFreeParserCtxt(ctxt);
14272 	return(NULL);
14273     }
14274 
14275     input = xmlNewInputStream(ctxt);
14276     if (input == NULL) {
14277 	xmlFreeParserInputBuffer(buf);
14278 	xmlFreeParserCtxt(ctxt);
14279 	return(NULL);
14280     }
14281 
14282     input->filename = NULL;
14283     input->buf = buf;
14284     xmlBufResetInput(input->buf->buffer, input);
14285 
14286     inputPush(ctxt, input);
14287     return(ctxt);
14288 }
14289 
14290 #ifdef LIBXML_SAX1_ENABLED
14291 /**
14292  * xmlSAXParseDoc:
14293  * @sax:  the SAX handler block
14294  * @cur:  a pointer to an array of xmlChar
14295  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14296  *             documents
14297  *
14298  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14299  *
14300  * parse an XML in-memory document and build a tree.
14301  * It use the given SAX function block to handle the parsing callback.
14302  * If sax is NULL, fallback to the default DOM tree building routines.
14303  *
14304  * Returns the resulting document tree
14305  */
14306 
14307 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14308 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14309     xmlDocPtr ret;
14310     xmlParserCtxtPtr ctxt;
14311     xmlSAXHandlerPtr oldsax = NULL;
14312 
14313     if (cur == NULL) return(NULL);
14314 
14315 
14316     ctxt = xmlCreateDocParserCtxt(cur);
14317     if (ctxt == NULL) return(NULL);
14318     if (sax != NULL) {
14319         oldsax = ctxt->sax;
14320         ctxt->sax = sax;
14321         ctxt->userData = NULL;
14322     }
14323     xmlDetectSAX2(ctxt);
14324 
14325     xmlParseDocument(ctxt);
14326     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14327     else {
14328        ret = NULL;
14329        xmlFreeDoc(ctxt->myDoc);
14330        ctxt->myDoc = NULL;
14331     }
14332     if (sax != NULL)
14333 	ctxt->sax = oldsax;
14334     xmlFreeParserCtxt(ctxt);
14335 
14336     return(ret);
14337 }
14338 
14339 /**
14340  * xmlParseDoc:
14341  * @cur:  a pointer to an array of xmlChar
14342  *
14343  * DEPRECATED: Use xmlReadDoc.
14344  *
14345  * parse an XML in-memory document and build a tree.
14346  *
14347  * Returns the resulting document tree
14348  */
14349 
14350 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14351 xmlParseDoc(const xmlChar *cur) {
14352     return(xmlSAXParseDoc(NULL, cur, 0));
14353 }
14354 #endif /* LIBXML_SAX1_ENABLED */
14355 
14356 #ifdef LIBXML_LEGACY_ENABLED
14357 /************************************************************************
14358  *									*
14359  *	Specific function to keep track of entities references		*
14360  *	and used by the XSLT debugger					*
14361  *									*
14362  ************************************************************************/
14363 
14364 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14365 
14366 /**
14367  * xmlAddEntityReference:
14368  * @ent : A valid entity
14369  * @firstNode : A valid first node for children of entity
14370  * @lastNode : A valid last node of children entity
14371  *
14372  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14373  */
14374 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14375 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14376                       xmlNodePtr lastNode)
14377 {
14378     if (xmlEntityRefFunc != NULL) {
14379         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14380     }
14381 }
14382 
14383 
14384 /**
14385  * xmlSetEntityReferenceFunc:
14386  * @func: A valid function
14387  *
14388  * Set the function to call call back when a xml reference has been made
14389  */
14390 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14391 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14392 {
14393     xmlEntityRefFunc = func;
14394 }
14395 #endif /* LIBXML_LEGACY_ENABLED */
14396 
14397 /************************************************************************
14398  *									*
14399  *	New set (2.6.0) of simpler and more flexible APIs		*
14400  *									*
14401  ************************************************************************/
14402 
14403 /**
14404  * DICT_FREE:
14405  * @str:  a string
14406  *
14407  * Free a string if it is not owned by the "dict" dictionary in the
14408  * current scope
14409  */
14410 #define DICT_FREE(str)						\
14411 	if ((str) && ((!dict) ||				\
14412 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14413 	    xmlFree((char *)(str));
14414 
14415 /**
14416  * xmlCtxtReset:
14417  * @ctxt: an XML parser context
14418  *
14419  * Reset a parser context
14420  */
14421 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14422 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14423 {
14424     xmlParserInputPtr input;
14425     xmlDictPtr dict;
14426 
14427     if (ctxt == NULL)
14428         return;
14429 
14430     dict = ctxt->dict;
14431 
14432     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14433         xmlFreeInputStream(input);
14434     }
14435     ctxt->inputNr = 0;
14436     ctxt->input = NULL;
14437 
14438     ctxt->spaceNr = 0;
14439     if (ctxt->spaceTab != NULL) {
14440 	ctxt->spaceTab[0] = -1;
14441 	ctxt->space = &ctxt->spaceTab[0];
14442     } else {
14443         ctxt->space = NULL;
14444     }
14445 
14446 
14447     ctxt->nodeNr = 0;
14448     ctxt->node = NULL;
14449 
14450     ctxt->nameNr = 0;
14451     ctxt->name = NULL;
14452 
14453     ctxt->nsNr = 0;
14454     xmlParserNsReset(ctxt->nsdb);
14455 
14456     DICT_FREE(ctxt->version);
14457     ctxt->version = NULL;
14458     DICT_FREE(ctxt->encoding);
14459     ctxt->encoding = NULL;
14460     DICT_FREE(ctxt->directory);
14461     ctxt->directory = NULL;
14462     DICT_FREE(ctxt->extSubURI);
14463     ctxt->extSubURI = NULL;
14464     DICT_FREE(ctxt->extSubSystem);
14465     ctxt->extSubSystem = NULL;
14466     if (ctxt->myDoc != NULL)
14467         xmlFreeDoc(ctxt->myDoc);
14468     ctxt->myDoc = NULL;
14469 
14470     ctxt->standalone = -1;
14471     ctxt->hasExternalSubset = 0;
14472     ctxt->hasPErefs = 0;
14473     ctxt->html = 0;
14474     ctxt->external = 0;
14475     ctxt->instate = XML_PARSER_START;
14476     ctxt->token = 0;
14477 
14478     ctxt->wellFormed = 1;
14479     ctxt->nsWellFormed = 1;
14480     ctxt->disableSAX = 0;
14481     ctxt->valid = 1;
14482 #if 0
14483     ctxt->vctxt.userData = ctxt;
14484     ctxt->vctxt.error = xmlParserValidityError;
14485     ctxt->vctxt.warning = xmlParserValidityWarning;
14486 #endif
14487     ctxt->record_info = 0;
14488     ctxt->checkIndex = 0;
14489     ctxt->endCheckState = 0;
14490     ctxt->inSubset = 0;
14491     ctxt->errNo = XML_ERR_OK;
14492     ctxt->depth = 0;
14493     ctxt->catalogs = NULL;
14494     ctxt->sizeentities = 0;
14495     ctxt->sizeentcopy = 0;
14496     xmlInitNodeInfoSeq(&ctxt->node_seq);
14497 
14498     if (ctxt->attsDefault != NULL) {
14499         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14500         ctxt->attsDefault = NULL;
14501     }
14502     if (ctxt->attsSpecial != NULL) {
14503         xmlHashFree(ctxt->attsSpecial, NULL);
14504         ctxt->attsSpecial = NULL;
14505     }
14506 
14507 #ifdef LIBXML_CATALOG_ENABLED
14508     if (ctxt->catalogs != NULL)
14509 	xmlCatalogFreeLocal(ctxt->catalogs);
14510 #endif
14511     ctxt->nbErrors = 0;
14512     ctxt->nbWarnings = 0;
14513     if (ctxt->lastError.code != XML_ERR_OK)
14514         xmlResetError(&ctxt->lastError);
14515 }
14516 
14517 /**
14518  * xmlCtxtResetPush:
14519  * @ctxt: an XML parser context
14520  * @chunk:  a pointer to an array of chars
14521  * @size:  number of chars in the array
14522  * @filename:  an optional file name or URI
14523  * @encoding:  the document encoding, or NULL
14524  *
14525  * Reset a push parser context
14526  *
14527  * Returns 0 in case of success and 1 in case of error
14528  */
14529 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14530 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14531                  int size, const char *filename, const char *encoding)
14532 {
14533     xmlParserInputPtr inputStream;
14534     xmlParserInputBufferPtr buf;
14535 
14536     if (ctxt == NULL)
14537         return(1);
14538 
14539     buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
14540     if (buf == NULL)
14541         return(1);
14542 
14543     if (ctxt == NULL) {
14544         xmlFreeParserInputBuffer(buf);
14545         return(1);
14546     }
14547 
14548     xmlCtxtReset(ctxt);
14549 
14550     if (filename == NULL) {
14551         ctxt->directory = NULL;
14552     } else {
14553         ctxt->directory = xmlParserGetDirectory(filename);
14554     }
14555 
14556     inputStream = xmlNewInputStream(ctxt);
14557     if (inputStream == NULL) {
14558         xmlFreeParserInputBuffer(buf);
14559         return(1);
14560     }
14561 
14562     if (filename == NULL)
14563         inputStream->filename = NULL;
14564     else
14565         inputStream->filename = (char *)
14566             xmlCanonicPath((const xmlChar *) filename);
14567     inputStream->buf = buf;
14568     xmlBufResetInput(buf->buffer, inputStream);
14569 
14570     inputPush(ctxt, inputStream);
14571 
14572     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14573         (ctxt->input->buf != NULL)) {
14574         size_t pos = ctxt->input->cur - ctxt->input->base;
14575         int res;
14576 
14577         res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14578         xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
14579         if (res < 0) {
14580             xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
14581             xmlHaltParser(ctxt);
14582             return(1);
14583         }
14584     }
14585 
14586     if (encoding != NULL) {
14587         xmlCharEncodingHandlerPtr hdlr;
14588 
14589         hdlr = xmlFindCharEncodingHandler(encoding);
14590         if (hdlr != NULL) {
14591             xmlSwitchToEncoding(ctxt, hdlr);
14592 	} else {
14593 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14594 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14595         }
14596     }
14597 
14598     return(0);
14599 }
14600 
14601 
14602 /**
14603  * xmlCtxtUseOptionsInternal:
14604  * @ctxt: an XML parser context
14605  * @options:  a combination of xmlParserOption
14606  * @encoding:  the user provided encoding to use
14607  *
14608  * Applies the options to the parser context
14609  *
14610  * Returns 0 in case of success, the set of unknown or unimplemented options
14611  *         in case of error.
14612  */
14613 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14614 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14615 {
14616     if (ctxt == NULL)
14617         return(-1);
14618     if (encoding != NULL) {
14619         if (ctxt->encoding != NULL)
14620 	    xmlFree((xmlChar *) ctxt->encoding);
14621         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14622     }
14623     if (options & XML_PARSE_RECOVER) {
14624         ctxt->recovery = 1;
14625         options -= XML_PARSE_RECOVER;
14626 	ctxt->options |= XML_PARSE_RECOVER;
14627     } else
14628         ctxt->recovery = 0;
14629     if (options & XML_PARSE_DTDLOAD) {
14630         ctxt->loadsubset = XML_DETECT_IDS;
14631         options -= XML_PARSE_DTDLOAD;
14632 	ctxt->options |= XML_PARSE_DTDLOAD;
14633     } else
14634         ctxt->loadsubset = 0;
14635     if (options & XML_PARSE_DTDATTR) {
14636         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14637         options -= XML_PARSE_DTDATTR;
14638 	ctxt->options |= XML_PARSE_DTDATTR;
14639     }
14640     if (options & XML_PARSE_NOENT) {
14641         ctxt->replaceEntities = 1;
14642         /* ctxt->loadsubset |= XML_DETECT_IDS; */
14643         options -= XML_PARSE_NOENT;
14644 	ctxt->options |= XML_PARSE_NOENT;
14645     } else
14646         ctxt->replaceEntities = 0;
14647     if (options & XML_PARSE_PEDANTIC) {
14648         ctxt->pedantic = 1;
14649         options -= XML_PARSE_PEDANTIC;
14650 	ctxt->options |= XML_PARSE_PEDANTIC;
14651     } else
14652         ctxt->pedantic = 0;
14653     if (options & XML_PARSE_NOBLANKS) {
14654         ctxt->keepBlanks = 0;
14655         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14656         options -= XML_PARSE_NOBLANKS;
14657 	ctxt->options |= XML_PARSE_NOBLANKS;
14658     } else
14659         ctxt->keepBlanks = 1;
14660     if (options & XML_PARSE_DTDVALID) {
14661         ctxt->validate = 1;
14662         if (options & XML_PARSE_NOWARNING)
14663             ctxt->vctxt.warning = NULL;
14664         if (options & XML_PARSE_NOERROR)
14665             ctxt->vctxt.error = NULL;
14666         options -= XML_PARSE_DTDVALID;
14667 	ctxt->options |= XML_PARSE_DTDVALID;
14668     } else
14669         ctxt->validate = 0;
14670     if (options & XML_PARSE_NOWARNING) {
14671         ctxt->sax->warning = NULL;
14672         options -= XML_PARSE_NOWARNING;
14673     }
14674     if (options & XML_PARSE_NOERROR) {
14675         ctxt->sax->error = NULL;
14676         ctxt->sax->fatalError = NULL;
14677         options -= XML_PARSE_NOERROR;
14678     }
14679 #ifdef LIBXML_SAX1_ENABLED
14680     if (options & XML_PARSE_SAX1) {
14681         ctxt->sax->startElementNs = NULL;
14682         ctxt->sax->endElementNs = NULL;
14683         ctxt->sax->initialized = 1;
14684         options -= XML_PARSE_SAX1;
14685 	ctxt->options |= XML_PARSE_SAX1;
14686     }
14687 #endif /* LIBXML_SAX1_ENABLED */
14688     if (options & XML_PARSE_NODICT) {
14689         ctxt->dictNames = 0;
14690         options -= XML_PARSE_NODICT;
14691 	ctxt->options |= XML_PARSE_NODICT;
14692     } else {
14693         ctxt->dictNames = 1;
14694     }
14695     if (options & XML_PARSE_NOCDATA) {
14696         ctxt->sax->cdataBlock = NULL;
14697         options -= XML_PARSE_NOCDATA;
14698 	ctxt->options |= XML_PARSE_NOCDATA;
14699     }
14700     if (options & XML_PARSE_NSCLEAN) {
14701 	ctxt->options |= XML_PARSE_NSCLEAN;
14702         options -= XML_PARSE_NSCLEAN;
14703     }
14704     if (options & XML_PARSE_NONET) {
14705 	ctxt->options |= XML_PARSE_NONET;
14706         options -= XML_PARSE_NONET;
14707     }
14708     if (options & XML_PARSE_COMPACT) {
14709 	ctxt->options |= XML_PARSE_COMPACT;
14710         options -= XML_PARSE_COMPACT;
14711     }
14712     if (options & XML_PARSE_OLD10) {
14713 	ctxt->options |= XML_PARSE_OLD10;
14714         options -= XML_PARSE_OLD10;
14715     }
14716     if (options & XML_PARSE_NOBASEFIX) {
14717 	ctxt->options |= XML_PARSE_NOBASEFIX;
14718         options -= XML_PARSE_NOBASEFIX;
14719     }
14720     if (options & XML_PARSE_HUGE) {
14721 	ctxt->options |= XML_PARSE_HUGE;
14722         options -= XML_PARSE_HUGE;
14723         if (ctxt->dict != NULL)
14724             xmlDictSetLimit(ctxt->dict, 0);
14725     }
14726     if (options & XML_PARSE_OLDSAX) {
14727 	ctxt->options |= XML_PARSE_OLDSAX;
14728         options -= XML_PARSE_OLDSAX;
14729     }
14730     if (options & XML_PARSE_IGNORE_ENC) {
14731 	ctxt->options |= XML_PARSE_IGNORE_ENC;
14732         options -= XML_PARSE_IGNORE_ENC;
14733     }
14734     if (options & XML_PARSE_BIG_LINES) {
14735 	ctxt->options |= XML_PARSE_BIG_LINES;
14736         options -= XML_PARSE_BIG_LINES;
14737     }
14738     ctxt->linenumbers = 1;
14739     return (options);
14740 }
14741 
14742 /**
14743  * xmlCtxtUseOptions:
14744  * @ctxt: an XML parser context
14745  * @options:  a combination of xmlParserOption
14746  *
14747  * Applies the options to the parser context
14748  *
14749  * Returns 0 in case of success, the set of unknown or unimplemented options
14750  *         in case of error.
14751  */
14752 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14753 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14754 {
14755    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14756 }
14757 
14758 /**
14759  * xmlCtxtSetMaxAmplification:
14760  * @ctxt: an XML parser context
14761  * @maxAmpl:  maximum amplification factor
14762  *
14763  * To protect against exponential entity expansion ("billion laughs"), the
14764  * size of serialized output is (roughly) limited to the input size
14765  * multiplied by this factor. The default value is 5.
14766  *
14767  * When working with documents making heavy use of entity expansion, it can
14768  * be necessary to increase the value. For security reasons, this should only
14769  * be considered when processing trusted input.
14770  */
14771 void
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,unsigned maxAmpl)14772 xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
14773 {
14774     ctxt->maxAmpl = maxAmpl;
14775 }
14776 
14777 /**
14778  * xmlDoRead:
14779  * @ctxt:  an XML parser context
14780  * @URL:  the base URL to use for the document
14781  * @encoding:  the document encoding, or NULL
14782  * @options:  a combination of xmlParserOption
14783  * @reuse:  keep the context for reuse
14784  *
14785  * Common front-end for the xmlRead functions
14786  *
14787  * Returns the resulting document tree or NULL
14788  */
14789 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14790 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14791           int options, int reuse)
14792 {
14793     xmlDocPtr ret;
14794 
14795     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14796     if (encoding != NULL) {
14797         xmlCharEncodingHandlerPtr hdlr;
14798 
14799         /*
14800          * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14801          * caller provided an encoding. Otherwise, we might switch to
14802          * the encoding from the XML declaration which is likely to
14803          * break things. Also see xmlSwitchInputEncoding.
14804          */
14805 	hdlr = xmlFindCharEncodingHandler(encoding);
14806 	if (hdlr != NULL)
14807 	    xmlSwitchToEncoding(ctxt, hdlr);
14808     }
14809     if ((URL != NULL) && (ctxt->input != NULL) &&
14810         (ctxt->input->filename == NULL))
14811         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14812     xmlParseDocument(ctxt);
14813     if ((ctxt->wellFormed) || ctxt->recovery)
14814         ret = ctxt->myDoc;
14815     else {
14816         ret = NULL;
14817 	if (ctxt->myDoc != NULL) {
14818 	    xmlFreeDoc(ctxt->myDoc);
14819 	}
14820     }
14821     ctxt->myDoc = NULL;
14822     if (!reuse) {
14823 	xmlFreeParserCtxt(ctxt);
14824     }
14825 
14826     return (ret);
14827 }
14828 
14829 /**
14830  * xmlReadDoc:
14831  * @cur:  a pointer to a zero terminated string
14832  * @URL:  the base URL to use for the document
14833  * @encoding:  the document encoding, or NULL
14834  * @options:  a combination of xmlParserOption
14835  *
14836  * parse an XML in-memory document and build a tree.
14837  *
14838  * Returns the resulting document tree
14839  */
14840 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14841 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14842 {
14843     xmlParserCtxtPtr ctxt;
14844 
14845     if (cur == NULL)
14846         return (NULL);
14847     xmlInitParser();
14848 
14849     ctxt = xmlCreateDocParserCtxt(cur);
14850     if (ctxt == NULL)
14851         return (NULL);
14852     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14853 }
14854 
14855 /**
14856  * xmlReadFile:
14857  * @filename:  a file or URL
14858  * @encoding:  the document encoding, or NULL
14859  * @options:  a combination of xmlParserOption
14860  *
14861  * parse an XML file from the filesystem or the network.
14862  *
14863  * Returns the resulting document tree
14864  */
14865 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14866 xmlReadFile(const char *filename, const char *encoding, int options)
14867 {
14868     xmlParserCtxtPtr ctxt;
14869 
14870     xmlInitParser();
14871     ctxt = xmlCreateURLParserCtxt(filename, options);
14872     if (ctxt == NULL)
14873         return (NULL);
14874     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14875 }
14876 
14877 /**
14878  * xmlReadMemory:
14879  * @buffer:  a pointer to a char array
14880  * @size:  the size of the array
14881  * @URL:  the base URL to use for the document
14882  * @encoding:  the document encoding, or NULL
14883  * @options:  a combination of xmlParserOption
14884  *
14885  * parse an XML in-memory document and build a tree.
14886  *
14887  * Returns the resulting document tree
14888  */
14889 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14890 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14891 {
14892     xmlParserCtxtPtr ctxt;
14893 
14894     xmlInitParser();
14895     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14896     if (ctxt == NULL)
14897         return (NULL);
14898     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14899 }
14900 
14901 /**
14902  * xmlReadFd:
14903  * @fd:  an open file descriptor
14904  * @URL:  the base URL to use for the document
14905  * @encoding:  the document encoding, or NULL
14906  * @options:  a combination of xmlParserOption
14907  *
14908  * parse an XML from a file descriptor and build a tree.
14909  * NOTE that the file descriptor will not be closed when the
14910  *      reader is closed or reset.
14911  *
14912  * Returns the resulting document tree
14913  */
14914 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14915 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14916 {
14917     xmlParserCtxtPtr ctxt;
14918     xmlParserInputBufferPtr input;
14919     xmlParserInputPtr stream;
14920 
14921     if (fd < 0)
14922         return (NULL);
14923     xmlInitParser();
14924 
14925     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14926     if (input == NULL)
14927         return (NULL);
14928     input->closecallback = NULL;
14929     ctxt = xmlNewParserCtxt();
14930     if (ctxt == NULL) {
14931         xmlFreeParserInputBuffer(input);
14932         return (NULL);
14933     }
14934     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14935     if (stream == NULL) {
14936         xmlFreeParserInputBuffer(input);
14937 	xmlFreeParserCtxt(ctxt);
14938         return (NULL);
14939     }
14940     inputPush(ctxt, stream);
14941     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14942 }
14943 
14944 /**
14945  * xmlReadIO:
14946  * @ioread:  an I/O read function
14947  * @ioclose:  an I/O close function
14948  * @ioctx:  an I/O handler
14949  * @URL:  the base URL to use for the document
14950  * @encoding:  the document encoding, or NULL
14951  * @options:  a combination of xmlParserOption
14952  *
14953  * parse an XML document from I/O functions and source and build a tree.
14954  *
14955  * Returns the resulting document tree
14956  */
14957 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14958 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14959           void *ioctx, const char *URL, const char *encoding, int options)
14960 {
14961     xmlParserCtxtPtr ctxt;
14962     xmlParserInputBufferPtr input;
14963     xmlParserInputPtr stream;
14964 
14965     if (ioread == NULL)
14966         return (NULL);
14967     xmlInitParser();
14968 
14969     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14970                                          XML_CHAR_ENCODING_NONE);
14971     if (input == NULL) {
14972         if (ioclose != NULL)
14973             ioclose(ioctx);
14974         return (NULL);
14975     }
14976     ctxt = xmlNewParserCtxt();
14977     if (ctxt == NULL) {
14978         xmlFreeParserInputBuffer(input);
14979         return (NULL);
14980     }
14981     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14982     if (stream == NULL) {
14983         xmlFreeParserInputBuffer(input);
14984 	xmlFreeParserCtxt(ctxt);
14985         return (NULL);
14986     }
14987     inputPush(ctxt, stream);
14988     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14989 }
14990 
14991 /**
14992  * xmlCtxtReadDoc:
14993  * @ctxt:  an XML parser context
14994  * @str:  a pointer to a zero terminated string
14995  * @URL:  the base URL to use for the document
14996  * @encoding:  the document encoding, or NULL
14997  * @options:  a combination of xmlParserOption
14998  *
14999  * parse an XML in-memory document and build a tree.
15000  * This reuses the existing @ctxt parser context
15001  *
15002  * Returns the resulting document tree
15003  */
15004 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)15005 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
15006                const char *URL, const char *encoding, int options)
15007 {
15008     xmlParserInputBufferPtr input;
15009     xmlParserInputPtr stream;
15010 
15011     if (ctxt == NULL)
15012         return (NULL);
15013     if (str == NULL)
15014         return (NULL);
15015     xmlInitParser();
15016 
15017     xmlCtxtReset(ctxt);
15018 
15019     input = xmlParserInputBufferCreateString(str);
15020     if (input == NULL) {
15021 	return(NULL);
15022     }
15023 
15024     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15025     if (stream == NULL) {
15026 	xmlFreeParserInputBuffer(input);
15027 	return(NULL);
15028     }
15029 
15030     inputPush(ctxt, stream);
15031     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15032 }
15033 
15034 /**
15035  * xmlCtxtReadFile:
15036  * @ctxt:  an XML parser context
15037  * @filename:  a file or URL
15038  * @encoding:  the document encoding, or NULL
15039  * @options:  a combination of xmlParserOption
15040  *
15041  * parse an XML file from the filesystem or the network.
15042  * This reuses the existing @ctxt parser context
15043  *
15044  * Returns the resulting document tree
15045  */
15046 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15047 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15048                 const char *encoding, int options)
15049 {
15050     xmlParserInputPtr stream;
15051 
15052     if (filename == NULL)
15053         return (NULL);
15054     if (ctxt == NULL)
15055         return (NULL);
15056     xmlInitParser();
15057 
15058     xmlCtxtReset(ctxt);
15059 
15060     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15061     if (stream == NULL) {
15062         return (NULL);
15063     }
15064     inputPush(ctxt, stream);
15065     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15066 }
15067 
15068 /**
15069  * xmlCtxtReadMemory:
15070  * @ctxt:  an XML parser context
15071  * @buffer:  a pointer to a char array
15072  * @size:  the size of the array
15073  * @URL:  the base URL to use for the document
15074  * @encoding:  the document encoding, or NULL
15075  * @options:  a combination of xmlParserOption
15076  *
15077  * parse an XML in-memory document and build a tree.
15078  * This reuses the existing @ctxt parser context
15079  *
15080  * Returns the resulting document tree
15081  */
15082 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15083 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15084                   const char *URL, const char *encoding, int options)
15085 {
15086     xmlParserInputBufferPtr input;
15087     xmlParserInputPtr stream;
15088 
15089     if (ctxt == NULL)
15090         return (NULL);
15091     if (buffer == NULL)
15092         return (NULL);
15093     xmlInitParser();
15094 
15095     xmlCtxtReset(ctxt);
15096 
15097     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15098     if (input == NULL) {
15099 	return(NULL);
15100     }
15101 
15102     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15103     if (stream == NULL) {
15104 	xmlFreeParserInputBuffer(input);
15105 	return(NULL);
15106     }
15107 
15108     inputPush(ctxt, stream);
15109     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15110 }
15111 
15112 /**
15113  * xmlCtxtReadFd:
15114  * @ctxt:  an XML parser context
15115  * @fd:  an open file descriptor
15116  * @URL:  the base URL to use for the document
15117  * @encoding:  the document encoding, or NULL
15118  * @options:  a combination of xmlParserOption
15119  *
15120  * parse an XML from a file descriptor and build a tree.
15121  * This reuses the existing @ctxt parser context
15122  * NOTE that the file descriptor will not be closed when the
15123  *      reader is closed or reset.
15124  *
15125  * Returns the resulting document tree
15126  */
15127 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15128 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15129               const char *URL, const char *encoding, int options)
15130 {
15131     xmlParserInputBufferPtr input;
15132     xmlParserInputPtr stream;
15133 
15134     if (fd < 0)
15135         return (NULL);
15136     if (ctxt == NULL)
15137         return (NULL);
15138     xmlInitParser();
15139 
15140     xmlCtxtReset(ctxt);
15141 
15142 
15143     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15144     if (input == NULL)
15145         return (NULL);
15146     input->closecallback = NULL;
15147     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15148     if (stream == NULL) {
15149         xmlFreeParserInputBuffer(input);
15150         return (NULL);
15151     }
15152     inputPush(ctxt, stream);
15153     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15154 }
15155 
15156 /**
15157  * xmlCtxtReadIO:
15158  * @ctxt:  an XML parser context
15159  * @ioread:  an I/O read function
15160  * @ioclose:  an I/O close function
15161  * @ioctx:  an I/O handler
15162  * @URL:  the base URL to use for the document
15163  * @encoding:  the document encoding, or NULL
15164  * @options:  a combination of xmlParserOption
15165  *
15166  * parse an XML document from I/O functions and source and build a tree.
15167  * This reuses the existing @ctxt parser context
15168  *
15169  * Returns the resulting document tree
15170  */
15171 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15172 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15173               xmlInputCloseCallback ioclose, void *ioctx,
15174 	      const char *URL,
15175               const char *encoding, int options)
15176 {
15177     xmlParserInputBufferPtr input;
15178     xmlParserInputPtr stream;
15179 
15180     if (ioread == NULL)
15181         return (NULL);
15182     if (ctxt == NULL)
15183         return (NULL);
15184     xmlInitParser();
15185 
15186     xmlCtxtReset(ctxt);
15187 
15188     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15189                                          XML_CHAR_ENCODING_NONE);
15190     if (input == NULL) {
15191         if (ioclose != NULL)
15192             ioclose(ioctx);
15193         return (NULL);
15194     }
15195     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15196     if (stream == NULL) {
15197         xmlFreeParserInputBuffer(input);
15198         return (NULL);
15199     }
15200     inputPush(ctxt, stream);
15201     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15202 }
15203 
15204