• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 struct _xmlStartTag {
91     const xmlChar *prefix;
92     const xmlChar *URI;
93     int line;
94     int nsNr;
95 };
96 
97 static void
98 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99 
100 static xmlParserCtxtPtr
101 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
103 
104 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105 
106 static int
107 xmlParseElementStart(xmlParserCtxtPtr ctxt);
108 
109 static void
110 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111 
112 /************************************************************************
113  *									*
114  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
115  *									*
116  ************************************************************************/
117 
118 #define XML_MAX_HUGE_LENGTH 1000000000
119 
120 #define XML_PARSER_BIG_ENTITY 1000
121 #define XML_PARSER_LOT_ENTITY 5000
122 
123 /*
124  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
125  *    replacement over the size in byte of the input indicates that you have
126  *    and exponential behaviour. A value of 10 correspond to at least 3 entity
127  *    replacement per byte of input.
128  */
129 #define XML_PARSER_NON_LINEAR 10
130 
131 /*
132  * xmlParserEntityCheck
133  *
134  * Function to check non-linear entity expansion behaviour
135  * This is here to detect and stop exponential linear entity expansion
136  * This is not a limitation of the parser but a safety
137  * boundary feature. It can be disabled with the XML_PARSE_HUGE
138  * parser option.
139  */
140 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)141 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
142                      xmlEntityPtr ent, size_t replacement)
143 {
144     size_t consumed = 0;
145     int i;
146 
147     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
148         return (0);
149     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
150         return (1);
151 
152     /*
153      * This may look absurd but is needed to detect
154      * entities problems
155      */
156     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
157 	(ent->content != NULL) && (ent->checked == 0) &&
158 	(ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
159 	unsigned long oldnbent = ctxt->nbentities, diff;
160 	xmlChar *rep;
161 
162 	ent->checked = 1;
163 
164         ++ctxt->depth;
165 	rep = xmlStringDecodeEntities(ctxt, ent->content,
166 				  XML_SUBSTITUTE_REF, 0, 0, 0);
167         --ctxt->depth;
168 	if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
169 	    ent->content[0] = 0;
170 	}
171 
172         diff = ctxt->nbentities - oldnbent + 1;
173         if (diff > INT_MAX / 2)
174             diff = INT_MAX / 2;
175 	ent->checked = diff * 2;
176 	if (rep != NULL) {
177 	    if (xmlStrchr(rep, '<'))
178 		ent->checked |= 1;
179 	    xmlFree(rep);
180 	    rep = NULL;
181 	}
182     }
183 
184     /*
185      * Prevent entity exponential check, not just replacement while
186      * parsing the DTD
187      * The check is potentially costly so do that only once in a thousand
188      */
189     if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
190         (ctxt->nbentities % 1024 == 0)) {
191 	for (i = 0;i < ctxt->inputNr;i++) {
192 	    consumed += ctxt->inputTab[i]->consumed +
193 	               (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
194 	}
195 	if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
196 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 	    ctxt->instate = XML_PARSER_EOF;
198 	    return (1);
199 	}
200 	consumed = 0;
201     }
202 
203 
204 
205     if (replacement != 0) {
206 	if (replacement < XML_MAX_TEXT_LENGTH)
207 	    return(0);
208 
209         /*
210 	 * If the volume of entity copy reaches 10 times the
211 	 * amount of parsed data and over the large text threshold
212 	 * then that's very likely to be an abuse.
213 	 */
214         if (ctxt->input != NULL) {
215 	    consumed = ctxt->input->consumed +
216 	               (ctxt->input->cur - ctxt->input->base);
217 	}
218         consumed += ctxt->sizeentities;
219 
220         if (replacement < XML_PARSER_NON_LINEAR * consumed)
221 	    return(0);
222     } else if (size != 0) {
223         /*
224          * Do the check based on the replacement size of the entity
225          */
226         if (size < XML_PARSER_BIG_ENTITY)
227 	    return(0);
228 
229         /*
230          * A limit on the amount of text data reasonably used
231          */
232         if (ctxt->input != NULL) {
233             consumed = ctxt->input->consumed +
234                 (ctxt->input->cur - ctxt->input->base);
235         }
236         consumed += ctxt->sizeentities;
237 
238         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
239 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
240             return (0);
241     } else if (ent != NULL) {
242         /*
243          * use the number of parsed entities in the replacement
244          */
245         size = ent->checked / 2;
246 
247         /*
248          * The amount of data parsed counting entities size only once
249          */
250         if (ctxt->input != NULL) {
251             consumed = ctxt->input->consumed +
252                 (ctxt->input->cur - ctxt->input->base);
253         }
254         consumed += ctxt->sizeentities;
255 
256         /*
257          * Check the density of entities for the amount of data
258 	 * knowing an entity reference will take at least 3 bytes
259          */
260         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
261             return (0);
262     } else {
263         /*
264          * strange we got no data for checking
265          */
266 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
267 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
268 	    (ctxt->nbentities <= 10000))
269 	    return (0);
270     }
271     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
272     return (1);
273 }
274 
275 /**
276  * xmlParserMaxDepth:
277  *
278  * arbitrary depth limit for the XML documents that we allow to
279  * process. This is not a limitation of the parser but a safety
280  * boundary feature. It can be disabled with the XML_PARSE_HUGE
281  * parser option.
282  */
283 unsigned int xmlParserMaxDepth = 256;
284 
285 
286 
287 #define SAX2 1
288 #define XML_PARSER_BIG_BUFFER_SIZE 300
289 #define XML_PARSER_BUFFER_SIZE 100
290 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
291 
292 /**
293  * XML_PARSER_CHUNK_SIZE
294  *
295  * When calling GROW that's the minimal amount of data
296  * the parser expected to have received. It is not a hard
297  * limit but an optimization when reading strings like Names
298  * It is not strictly needed as long as inputs available characters
299  * are followed by 0, which should be provided by the I/O level
300  */
301 #define XML_PARSER_CHUNK_SIZE 100
302 
303 /*
304  * List of XML prefixed PI allowed by W3C specs
305  */
306 
307 static const char *xmlW3CPIs[] = {
308     "xml-stylesheet",
309     "xml-model",
310     NULL
311 };
312 
313 
314 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
315 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
316                                               const xmlChar **str);
317 
318 static xmlParserErrors
319 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
320 	              xmlSAXHandlerPtr sax,
321 		      void *user_data, int depth, const xmlChar *URL,
322 		      const xmlChar *ID, xmlNodePtr *list);
323 
324 static int
325 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
326                           const char *encoding);
327 #ifdef LIBXML_LEGACY_ENABLED
328 static void
329 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
330                       xmlNodePtr lastNode);
331 #endif /* LIBXML_LEGACY_ENABLED */
332 
333 static xmlParserErrors
334 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
335 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
336 
337 static int
338 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
339 
340 /************************************************************************
341  *									*
342  *		Some factorized error routines				*
343  *									*
344  ************************************************************************/
345 
346 /**
347  * xmlErrAttributeDup:
348  * @ctxt:  an XML parser context
349  * @prefix:  the attribute prefix
350  * @localname:  the attribute localname
351  *
352  * Handle a redefinition of attribute error
353  */
354 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)355 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
356                    const xmlChar * localname)
357 {
358     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
359         (ctxt->instate == XML_PARSER_EOF))
360 	return;
361     if (ctxt != NULL)
362 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
363 
364     if (prefix == NULL)
365         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
366                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
367                         (const char *) localname, NULL, NULL, 0, 0,
368                         "Attribute %s redefined\n", localname);
369     else
370         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
371                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
372                         (const char *) prefix, (const char *) localname,
373                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
374                         localname);
375     if (ctxt != NULL) {
376 	ctxt->wellFormed = 0;
377 	if (ctxt->recovery == 0)
378 	    ctxt->disableSAX = 1;
379     }
380 }
381 
382 /**
383  * xmlFatalErr:
384  * @ctxt:  an XML parser context
385  * @error:  the error number
386  * @extra:  extra information string
387  *
388  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
389  */
390 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)391 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
392 {
393     const char *errmsg;
394 
395     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
396         (ctxt->instate == XML_PARSER_EOF))
397 	return;
398     switch (error) {
399         case XML_ERR_INVALID_HEX_CHARREF:
400             errmsg = "CharRef: invalid hexadecimal value";
401             break;
402         case XML_ERR_INVALID_DEC_CHARREF:
403             errmsg = "CharRef: invalid decimal value";
404             break;
405         case XML_ERR_INVALID_CHARREF:
406             errmsg = "CharRef: invalid value";
407             break;
408         case XML_ERR_INTERNAL_ERROR:
409             errmsg = "internal error";
410             break;
411         case XML_ERR_PEREF_AT_EOF:
412             errmsg = "PEReference at end of document";
413             break;
414         case XML_ERR_PEREF_IN_PROLOG:
415             errmsg = "PEReference in prolog";
416             break;
417         case XML_ERR_PEREF_IN_EPILOG:
418             errmsg = "PEReference in epilog";
419             break;
420         case XML_ERR_PEREF_NO_NAME:
421             errmsg = "PEReference: no name";
422             break;
423         case XML_ERR_PEREF_SEMICOL_MISSING:
424             errmsg = "PEReference: expecting ';'";
425             break;
426         case XML_ERR_ENTITY_LOOP:
427             errmsg = "Detected an entity reference loop";
428             break;
429         case XML_ERR_ENTITY_NOT_STARTED:
430             errmsg = "EntityValue: \" or ' expected";
431             break;
432         case XML_ERR_ENTITY_PE_INTERNAL:
433             errmsg = "PEReferences forbidden in internal subset";
434             break;
435         case XML_ERR_ENTITY_NOT_FINISHED:
436             errmsg = "EntityValue: \" or ' expected";
437             break;
438         case XML_ERR_ATTRIBUTE_NOT_STARTED:
439             errmsg = "AttValue: \" or ' expected";
440             break;
441         case XML_ERR_LT_IN_ATTRIBUTE:
442             errmsg = "Unescaped '<' not allowed in attributes values";
443             break;
444         case XML_ERR_LITERAL_NOT_STARTED:
445             errmsg = "SystemLiteral \" or ' expected";
446             break;
447         case XML_ERR_LITERAL_NOT_FINISHED:
448             errmsg = "Unfinished System or Public ID \" or ' expected";
449             break;
450         case XML_ERR_MISPLACED_CDATA_END:
451             errmsg = "Sequence ']]>' not allowed in content";
452             break;
453         case XML_ERR_URI_REQUIRED:
454             errmsg = "SYSTEM or PUBLIC, the URI is missing";
455             break;
456         case XML_ERR_PUBID_REQUIRED:
457             errmsg = "PUBLIC, the Public Identifier is missing";
458             break;
459         case XML_ERR_HYPHEN_IN_COMMENT:
460             errmsg = "Comment must not contain '--' (double-hyphen)";
461             break;
462         case XML_ERR_PI_NOT_STARTED:
463             errmsg = "xmlParsePI : no target name";
464             break;
465         case XML_ERR_RESERVED_XML_NAME:
466             errmsg = "Invalid PI name";
467             break;
468         case XML_ERR_NOTATION_NOT_STARTED:
469             errmsg = "NOTATION: Name expected here";
470             break;
471         case XML_ERR_NOTATION_NOT_FINISHED:
472             errmsg = "'>' required to close NOTATION declaration";
473             break;
474         case XML_ERR_VALUE_REQUIRED:
475             errmsg = "Entity value required";
476             break;
477         case XML_ERR_URI_FRAGMENT:
478             errmsg = "Fragment not allowed";
479             break;
480         case XML_ERR_ATTLIST_NOT_STARTED:
481             errmsg = "'(' required to start ATTLIST enumeration";
482             break;
483         case XML_ERR_NMTOKEN_REQUIRED:
484             errmsg = "NmToken expected in ATTLIST enumeration";
485             break;
486         case XML_ERR_ATTLIST_NOT_FINISHED:
487             errmsg = "')' required to finish ATTLIST enumeration";
488             break;
489         case XML_ERR_MIXED_NOT_STARTED:
490             errmsg = "MixedContentDecl : '|' or ')*' expected";
491             break;
492         case XML_ERR_PCDATA_REQUIRED:
493             errmsg = "MixedContentDecl : '#PCDATA' expected";
494             break;
495         case XML_ERR_ELEMCONTENT_NOT_STARTED:
496             errmsg = "ContentDecl : Name or '(' expected";
497             break;
498         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
499             errmsg = "ContentDecl : ',' '|' or ')' expected";
500             break;
501         case XML_ERR_PEREF_IN_INT_SUBSET:
502             errmsg =
503                 "PEReference: forbidden within markup decl in internal subset";
504             break;
505         case XML_ERR_GT_REQUIRED:
506             errmsg = "expected '>'";
507             break;
508         case XML_ERR_CONDSEC_INVALID:
509             errmsg = "XML conditional section '[' expected";
510             break;
511         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
512             errmsg = "Content error in the external subset";
513             break;
514         case XML_ERR_CONDSEC_INVALID_KEYWORD:
515             errmsg =
516                 "conditional section INCLUDE or IGNORE keyword expected";
517             break;
518         case XML_ERR_CONDSEC_NOT_FINISHED:
519             errmsg = "XML conditional section not closed";
520             break;
521         case XML_ERR_XMLDECL_NOT_STARTED:
522             errmsg = "Text declaration '<?xml' required";
523             break;
524         case XML_ERR_XMLDECL_NOT_FINISHED:
525             errmsg = "parsing XML declaration: '?>' expected";
526             break;
527         case XML_ERR_EXT_ENTITY_STANDALONE:
528             errmsg = "external parsed entities cannot be standalone";
529             break;
530         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
531             errmsg = "EntityRef: expecting ';'";
532             break;
533         case XML_ERR_DOCTYPE_NOT_FINISHED:
534             errmsg = "DOCTYPE improperly terminated";
535             break;
536         case XML_ERR_LTSLASH_REQUIRED:
537             errmsg = "EndTag: '</' not found";
538             break;
539         case XML_ERR_EQUAL_REQUIRED:
540             errmsg = "expected '='";
541             break;
542         case XML_ERR_STRING_NOT_CLOSED:
543             errmsg = "String not closed expecting \" or '";
544             break;
545         case XML_ERR_STRING_NOT_STARTED:
546             errmsg = "String not started expecting ' or \"";
547             break;
548         case XML_ERR_ENCODING_NAME:
549             errmsg = "Invalid XML encoding name";
550             break;
551         case XML_ERR_STANDALONE_VALUE:
552             errmsg = "standalone accepts only 'yes' or 'no'";
553             break;
554         case XML_ERR_DOCUMENT_EMPTY:
555             errmsg = "Document is empty";
556             break;
557         case XML_ERR_DOCUMENT_END:
558             errmsg = "Extra content at the end of the document";
559             break;
560         case XML_ERR_NOT_WELL_BALANCED:
561             errmsg = "chunk is not well balanced";
562             break;
563         case XML_ERR_EXTRA_CONTENT:
564             errmsg = "extra content at the end of well balanced chunk";
565             break;
566         case XML_ERR_VERSION_MISSING:
567             errmsg = "Malformed declaration expecting version";
568             break;
569         case XML_ERR_NAME_TOO_LONG:
570             errmsg = "Name too long";
571             break;
572 #if 0
573         case:
574             errmsg = "";
575             break;
576 #endif
577         default:
578             errmsg = "Unregistered error message";
579     }
580     if (ctxt != NULL)
581 	ctxt->errNo = error;
582     if (info == NULL) {
583         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
584                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
585                         errmsg);
586     } else {
587         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
588                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
589                         errmsg, info);
590     }
591     if (ctxt != NULL) {
592 	ctxt->wellFormed = 0;
593 	if (ctxt->recovery == 0)
594 	    ctxt->disableSAX = 1;
595     }
596 }
597 
598 /**
599  * xmlFatalErrMsg:
600  * @ctxt:  an XML parser context
601  * @error:  the error number
602  * @msg:  the error message
603  *
604  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
605  */
606 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)607 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
608                const char *msg)
609 {
610     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
611         (ctxt->instate == XML_PARSER_EOF))
612 	return;
613     if (ctxt != NULL)
614 	ctxt->errNo = error;
615     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
616                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
617     if (ctxt != NULL) {
618 	ctxt->wellFormed = 0;
619 	if (ctxt->recovery == 0)
620 	    ctxt->disableSAX = 1;
621     }
622 }
623 
624 /**
625  * xmlWarningMsg:
626  * @ctxt:  an XML parser context
627  * @error:  the error number
628  * @msg:  the error message
629  * @str1:  extra data
630  * @str2:  extra data
631  *
632  * Handle a warning.
633  */
634 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)635 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
636               const char *msg, const xmlChar *str1, const xmlChar *str2)
637 {
638     xmlStructuredErrorFunc schannel = NULL;
639 
640     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641         (ctxt->instate == XML_PARSER_EOF))
642 	return;
643     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
644         (ctxt->sax->initialized == XML_SAX2_MAGIC))
645         schannel = ctxt->sax->serror;
646     if (ctxt != NULL) {
647         __xmlRaiseError(schannel,
648                     (ctxt->sax) ? ctxt->sax->warning : NULL,
649                     ctxt->userData,
650                     ctxt, NULL, XML_FROM_PARSER, error,
651                     XML_ERR_WARNING, NULL, 0,
652 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
653 		    msg, (const char *) str1, (const char *) str2);
654     } else {
655         __xmlRaiseError(schannel, NULL, NULL,
656                     ctxt, NULL, XML_FROM_PARSER, error,
657                     XML_ERR_WARNING, NULL, 0,
658 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
659 		    msg, (const char *) str1, (const char *) str2);
660     }
661 }
662 
663 /**
664  * xmlValidityError:
665  * @ctxt:  an XML parser context
666  * @error:  the error number
667  * @msg:  the error message
668  * @str1:  extra data
669  *
670  * Handle a validity error.
671  */
672 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)673 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
674               const char *msg, const xmlChar *str1, const xmlChar *str2)
675 {
676     xmlStructuredErrorFunc schannel = NULL;
677 
678     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
679         (ctxt->instate == XML_PARSER_EOF))
680 	return;
681     if (ctxt != NULL) {
682 	ctxt->errNo = error;
683 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
684 	    schannel = ctxt->sax->serror;
685     }
686     if (ctxt != NULL) {
687         __xmlRaiseError(schannel,
688                     ctxt->vctxt.error, ctxt->vctxt.userData,
689                     ctxt, NULL, XML_FROM_DTD, error,
690                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
691 		    (const char *) str2, NULL, 0, 0,
692 		    msg, (const char *) str1, (const char *) str2);
693 	ctxt->valid = 0;
694     } else {
695         __xmlRaiseError(schannel, NULL, NULL,
696                     ctxt, NULL, XML_FROM_DTD, error,
697                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
698 		    (const char *) str2, NULL, 0, 0,
699 		    msg, (const char *) str1, (const char *) str2);
700     }
701 }
702 
703 /**
704  * xmlFatalErrMsgInt:
705  * @ctxt:  an XML parser context
706  * @error:  the error number
707  * @msg:  the error message
708  * @val:  an integer value
709  *
710  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
711  */
712 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)713 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714                   const char *msg, int val)
715 {
716     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
717         (ctxt->instate == XML_PARSER_EOF))
718 	return;
719     if (ctxt != NULL)
720 	ctxt->errNo = error;
721     __xmlRaiseError(NULL, NULL, NULL,
722                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
723                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
724     if (ctxt != NULL) {
725 	ctxt->wellFormed = 0;
726 	if (ctxt->recovery == 0)
727 	    ctxt->disableSAX = 1;
728     }
729 }
730 
731 /**
732  * xmlFatalErrMsgStrIntStr:
733  * @ctxt:  an XML parser context
734  * @error:  the error number
735  * @msg:  the error message
736  * @str1:  an string info
737  * @val:  an integer value
738  * @str2:  an string info
739  *
740  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
741  */
742 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)743 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
744                   const char *msg, const xmlChar *str1, int val,
745 		  const xmlChar *str2)
746 {
747     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
748         (ctxt->instate == XML_PARSER_EOF))
749 	return;
750     if (ctxt != NULL)
751 	ctxt->errNo = error;
752     __xmlRaiseError(NULL, NULL, NULL,
753                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
754                     NULL, 0, (const char *) str1, (const char *) str2,
755 		    NULL, val, 0, msg, str1, val, str2);
756     if (ctxt != NULL) {
757 	ctxt->wellFormed = 0;
758 	if (ctxt->recovery == 0)
759 	    ctxt->disableSAX = 1;
760     }
761 }
762 
763 /**
764  * xmlFatalErrMsgStr:
765  * @ctxt:  an XML parser context
766  * @error:  the error number
767  * @msg:  the error message
768  * @val:  a string value
769  *
770  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
771  */
772 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)773 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
774                   const char *msg, const xmlChar * val)
775 {
776     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
777         (ctxt->instate == XML_PARSER_EOF))
778 	return;
779     if (ctxt != NULL)
780 	ctxt->errNo = error;
781     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
782                     XML_FROM_PARSER, error, XML_ERR_FATAL,
783                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
784                     val);
785     if (ctxt != NULL) {
786 	ctxt->wellFormed = 0;
787 	if (ctxt->recovery == 0)
788 	    ctxt->disableSAX = 1;
789     }
790 }
791 
792 /**
793  * xmlErrMsgStr:
794  * @ctxt:  an XML parser context
795  * @error:  the error number
796  * @msg:  the error message
797  * @val:  a string value
798  *
799  * Handle a non fatal parser error
800  */
801 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)802 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
803                   const char *msg, const xmlChar * val)
804 {
805     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
806         (ctxt->instate == XML_PARSER_EOF))
807 	return;
808     if (ctxt != NULL)
809 	ctxt->errNo = error;
810     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
811                     XML_FROM_PARSER, error, XML_ERR_ERROR,
812                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
813                     val);
814 }
815 
816 /**
817  * xmlNsErr:
818  * @ctxt:  an XML parser context
819  * @error:  the error number
820  * @msg:  the message
821  * @info1:  extra information string
822  * @info2:  extra information string
823  *
824  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
825  */
826 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)827 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
828          const char *msg,
829          const xmlChar * info1, const xmlChar * info2,
830          const xmlChar * info3)
831 {
832     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
833         (ctxt->instate == XML_PARSER_EOF))
834 	return;
835     if (ctxt != NULL)
836 	ctxt->errNo = error;
837     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
838                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
839                     (const char *) info2, (const char *) info3, 0, 0, msg,
840                     info1, info2, info3);
841     if (ctxt != NULL)
842 	ctxt->nsWellFormed = 0;
843 }
844 
845 /**
846  * xmlNsWarn
847  * @ctxt:  an XML parser context
848  * @error:  the error number
849  * @msg:  the message
850  * @info1:  extra information string
851  * @info2:  extra information string
852  *
853  * Handle a namespace warning error
854  */
855 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)856 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
857          const char *msg,
858          const xmlChar * info1, const xmlChar * info2,
859          const xmlChar * info3)
860 {
861     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
862         (ctxt->instate == XML_PARSER_EOF))
863 	return;
864     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
865                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
866                     (const char *) info2, (const char *) info3, 0, 0, msg,
867                     info1, info2, info3);
868 }
869 
870 /************************************************************************
871  *									*
872  *		Library wide options					*
873  *									*
874  ************************************************************************/
875 
876 /**
877   * xmlHasFeature:
878   * @feature: the feature to be examined
879   *
880   * Examines if the library has been compiled with a given feature.
881   *
882   * Returns a non-zero value if the feature exist, otherwise zero.
883   * Returns zero (0) if the feature does not exist or an unknown
884   * unknown feature is requested, non-zero otherwise.
885   */
886 int
xmlHasFeature(xmlFeature feature)887 xmlHasFeature(xmlFeature feature)
888 {
889     switch (feature) {
890 	case XML_WITH_THREAD:
891 #ifdef LIBXML_THREAD_ENABLED
892 	    return(1);
893 #else
894 	    return(0);
895 #endif
896         case XML_WITH_TREE:
897 #ifdef LIBXML_TREE_ENABLED
898             return(1);
899 #else
900             return(0);
901 #endif
902         case XML_WITH_OUTPUT:
903 #ifdef LIBXML_OUTPUT_ENABLED
904             return(1);
905 #else
906             return(0);
907 #endif
908         case XML_WITH_PUSH:
909 #ifdef LIBXML_PUSH_ENABLED
910             return(1);
911 #else
912             return(0);
913 #endif
914         case XML_WITH_READER:
915 #ifdef LIBXML_READER_ENABLED
916             return(1);
917 #else
918             return(0);
919 #endif
920         case XML_WITH_PATTERN:
921 #ifdef LIBXML_PATTERN_ENABLED
922             return(1);
923 #else
924             return(0);
925 #endif
926         case XML_WITH_WRITER:
927 #ifdef LIBXML_WRITER_ENABLED
928             return(1);
929 #else
930             return(0);
931 #endif
932         case XML_WITH_SAX1:
933 #ifdef LIBXML_SAX1_ENABLED
934             return(1);
935 #else
936             return(0);
937 #endif
938         case XML_WITH_FTP:
939 #ifdef LIBXML_FTP_ENABLED
940             return(1);
941 #else
942             return(0);
943 #endif
944         case XML_WITH_HTTP:
945 #ifdef LIBXML_HTTP_ENABLED
946             return(1);
947 #else
948             return(0);
949 #endif
950         case XML_WITH_VALID:
951 #ifdef LIBXML_VALID_ENABLED
952             return(1);
953 #else
954             return(0);
955 #endif
956         case XML_WITH_HTML:
957 #ifdef LIBXML_HTML_ENABLED
958             return(1);
959 #else
960             return(0);
961 #endif
962         case XML_WITH_LEGACY:
963 #ifdef LIBXML_LEGACY_ENABLED
964             return(1);
965 #else
966             return(0);
967 #endif
968         case XML_WITH_C14N:
969 #ifdef LIBXML_C14N_ENABLED
970             return(1);
971 #else
972             return(0);
973 #endif
974         case XML_WITH_CATALOG:
975 #ifdef LIBXML_CATALOG_ENABLED
976             return(1);
977 #else
978             return(0);
979 #endif
980         case XML_WITH_XPATH:
981 #ifdef LIBXML_XPATH_ENABLED
982             return(1);
983 #else
984             return(0);
985 #endif
986         case XML_WITH_XPTR:
987 #ifdef LIBXML_XPTR_ENABLED
988             return(1);
989 #else
990             return(0);
991 #endif
992         case XML_WITH_XINCLUDE:
993 #ifdef LIBXML_XINCLUDE_ENABLED
994             return(1);
995 #else
996             return(0);
997 #endif
998         case XML_WITH_ICONV:
999 #ifdef LIBXML_ICONV_ENABLED
1000             return(1);
1001 #else
1002             return(0);
1003 #endif
1004         case XML_WITH_ISO8859X:
1005 #ifdef LIBXML_ISO8859X_ENABLED
1006             return(1);
1007 #else
1008             return(0);
1009 #endif
1010         case XML_WITH_UNICODE:
1011 #ifdef LIBXML_UNICODE_ENABLED
1012             return(1);
1013 #else
1014             return(0);
1015 #endif
1016         case XML_WITH_REGEXP:
1017 #ifdef LIBXML_REGEXP_ENABLED
1018             return(1);
1019 #else
1020             return(0);
1021 #endif
1022         case XML_WITH_AUTOMATA:
1023 #ifdef LIBXML_AUTOMATA_ENABLED
1024             return(1);
1025 #else
1026             return(0);
1027 #endif
1028         case XML_WITH_EXPR:
1029 #ifdef LIBXML_EXPR_ENABLED
1030             return(1);
1031 #else
1032             return(0);
1033 #endif
1034         case XML_WITH_SCHEMAS:
1035 #ifdef LIBXML_SCHEMAS_ENABLED
1036             return(1);
1037 #else
1038             return(0);
1039 #endif
1040         case XML_WITH_SCHEMATRON:
1041 #ifdef LIBXML_SCHEMATRON_ENABLED
1042             return(1);
1043 #else
1044             return(0);
1045 #endif
1046         case XML_WITH_MODULES:
1047 #ifdef LIBXML_MODULES_ENABLED
1048             return(1);
1049 #else
1050             return(0);
1051 #endif
1052         case XML_WITH_DEBUG:
1053 #ifdef LIBXML_DEBUG_ENABLED
1054             return(1);
1055 #else
1056             return(0);
1057 #endif
1058         case XML_WITH_DEBUG_MEM:
1059 #ifdef DEBUG_MEMORY_LOCATION
1060             return(1);
1061 #else
1062             return(0);
1063 #endif
1064         case XML_WITH_DEBUG_RUN:
1065 #ifdef LIBXML_DEBUG_RUNTIME
1066             return(1);
1067 #else
1068             return(0);
1069 #endif
1070         case XML_WITH_ZLIB:
1071 #ifdef LIBXML_ZLIB_ENABLED
1072             return(1);
1073 #else
1074             return(0);
1075 #endif
1076         case XML_WITH_LZMA:
1077 #ifdef LIBXML_LZMA_ENABLED
1078             return(1);
1079 #else
1080             return(0);
1081 #endif
1082         case XML_WITH_ICU:
1083 #ifdef LIBXML_ICU_ENABLED
1084             return(1);
1085 #else
1086             return(0);
1087 #endif
1088         default:
1089 	    break;
1090      }
1091      return(0);
1092 }
1093 
1094 /************************************************************************
1095  *									*
1096  *		SAX2 defaulted attributes handling			*
1097  *									*
1098  ************************************************************************/
1099 
1100 /**
1101  * xmlDetectSAX2:
1102  * @ctxt:  an XML parser context
1103  *
1104  * Do the SAX2 detection and specific initialization
1105  */
1106 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1107 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1108     xmlSAXHandlerPtr sax;
1109     if (ctxt == NULL) return;
1110     sax = ctxt->sax;
1111 #ifdef LIBXML_SAX1_ENABLED
1112     if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1113         ((sax->startElementNs != NULL) ||
1114          (sax->endElementNs != NULL) ||
1115          ((sax->startElement == NULL) && (sax->endElement == NULL))))
1116         ctxt->sax2 = 1;
1117 #else
1118     ctxt->sax2 = 1;
1119 #endif /* LIBXML_SAX1_ENABLED */
1120 
1121     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1122     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1123     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1124     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1125 		(ctxt->str_xml_ns == NULL)) {
1126         xmlErrMemory(ctxt, NULL);
1127     }
1128 }
1129 
1130 typedef struct _xmlDefAttrs xmlDefAttrs;
1131 typedef xmlDefAttrs *xmlDefAttrsPtr;
1132 struct _xmlDefAttrs {
1133     int nbAttrs;	/* number of defaulted attributes on that element */
1134     int maxAttrs;       /* the size of the array */
1135 #if __STDC_VERSION__ >= 199901L
1136     /* Using a C99 flexible array member avoids UBSan errors. */
1137     const xmlChar *values[]; /* array of localname/prefix/values/external */
1138 #else
1139     const xmlChar *values[5];
1140 #endif
1141 };
1142 
1143 /**
1144  * xmlAttrNormalizeSpace:
1145  * @src: the source string
1146  * @dst: the target string
1147  *
1148  * Normalize the space in non CDATA attribute values:
1149  * If the attribute type is not CDATA, then the XML processor MUST further
1150  * process the normalized attribute value by discarding any leading and
1151  * trailing space (#x20) characters, and by replacing sequences of space
1152  * (#x20) characters by a single space (#x20) character.
1153  * Note that the size of dst need to be at least src, and if one doesn't need
1154  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1155  * passing src as dst is just fine.
1156  *
1157  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1158  *         is needed.
1159  */
1160 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1161 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1162 {
1163     if ((src == NULL) || (dst == NULL))
1164         return(NULL);
1165 
1166     while (*src == 0x20) src++;
1167     while (*src != 0) {
1168 	if (*src == 0x20) {
1169 	    while (*src == 0x20) src++;
1170 	    if (*src != 0)
1171 		*dst++ = 0x20;
1172 	} else {
1173 	    *dst++ = *src++;
1174 	}
1175     }
1176     *dst = 0;
1177     if (dst == src)
1178        return(NULL);
1179     return(dst);
1180 }
1181 
1182 /**
1183  * xmlAttrNormalizeSpace2:
1184  * @src: the source string
1185  *
1186  * Normalize the space in non CDATA attribute values, a slightly more complex
1187  * front end to avoid allocation problems when running on attribute values
1188  * coming from the input.
1189  *
1190  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1191  *         is needed.
1192  */
1193 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1194 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1195 {
1196     int i;
1197     int remove_head = 0;
1198     int need_realloc = 0;
1199     const xmlChar *cur;
1200 
1201     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1202         return(NULL);
1203     i = *len;
1204     if (i <= 0)
1205         return(NULL);
1206 
1207     cur = src;
1208     while (*cur == 0x20) {
1209         cur++;
1210 	remove_head++;
1211     }
1212     while (*cur != 0) {
1213 	if (*cur == 0x20) {
1214 	    cur++;
1215 	    if ((*cur == 0x20) || (*cur == 0)) {
1216 	        need_realloc = 1;
1217 		break;
1218 	    }
1219 	} else
1220 	    cur++;
1221     }
1222     if (need_realloc) {
1223         xmlChar *ret;
1224 
1225 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1226 	if (ret == NULL) {
1227 	    xmlErrMemory(ctxt, NULL);
1228 	    return(NULL);
1229 	}
1230 	xmlAttrNormalizeSpace(ret, ret);
1231 	*len = (int) strlen((const char *)ret);
1232         return(ret);
1233     } else if (remove_head) {
1234         *len -= remove_head;
1235         memmove(src, src + remove_head, 1 + *len);
1236 	return(src);
1237     }
1238     return(NULL);
1239 }
1240 
1241 /**
1242  * xmlAddDefAttrs:
1243  * @ctxt:  an XML parser context
1244  * @fullname:  the element fullname
1245  * @fullattr:  the attribute fullname
1246  * @value:  the attribute value
1247  *
1248  * Add a defaulted attribute for an element
1249  */
1250 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1251 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1252                const xmlChar *fullname,
1253                const xmlChar *fullattr,
1254                const xmlChar *value) {
1255     xmlDefAttrsPtr defaults;
1256     int len;
1257     const xmlChar *name;
1258     const xmlChar *prefix;
1259 
1260     /*
1261      * Allows to detect attribute redefinitions
1262      */
1263     if (ctxt->attsSpecial != NULL) {
1264         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1265 	    return;
1266     }
1267 
1268     if (ctxt->attsDefault == NULL) {
1269         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1270 	if (ctxt->attsDefault == NULL)
1271 	    goto mem_error;
1272     }
1273 
1274     /*
1275      * split the element name into prefix:localname , the string found
1276      * are within the DTD and then not associated to namespace names.
1277      */
1278     name = xmlSplitQName3(fullname, &len);
1279     if (name == NULL) {
1280         name = xmlDictLookup(ctxt->dict, fullname, -1);
1281 	prefix = NULL;
1282     } else {
1283         name = xmlDictLookup(ctxt->dict, name, -1);
1284 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1285     }
1286 
1287     /*
1288      * make sure there is some storage
1289      */
1290     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1291     if (defaults == NULL) {
1292         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1293 	                   (4 * 5) * sizeof(const xmlChar *));
1294 	if (defaults == NULL)
1295 	    goto mem_error;
1296 	defaults->nbAttrs = 0;
1297 	defaults->maxAttrs = 4;
1298 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1299 	                        defaults, NULL) < 0) {
1300 	    xmlFree(defaults);
1301 	    goto mem_error;
1302 	}
1303     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1304         xmlDefAttrsPtr temp;
1305 
1306         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1307 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1308 	if (temp == NULL)
1309 	    goto mem_error;
1310 	defaults = temp;
1311 	defaults->maxAttrs *= 2;
1312 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1313 	                        defaults, NULL) < 0) {
1314 	    xmlFree(defaults);
1315 	    goto mem_error;
1316 	}
1317     }
1318 
1319     /*
1320      * Split the element name into prefix:localname , the string found
1321      * are within the DTD and hen not associated to namespace names.
1322      */
1323     name = xmlSplitQName3(fullattr, &len);
1324     if (name == NULL) {
1325         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1326 	prefix = NULL;
1327     } else {
1328         name = xmlDictLookup(ctxt->dict, name, -1);
1329 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1330     }
1331 
1332     defaults->values[5 * defaults->nbAttrs] = name;
1333     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1334     /* intern the string and precompute the end */
1335     len = xmlStrlen(value);
1336     value = xmlDictLookup(ctxt->dict, value, len);
1337     defaults->values[5 * defaults->nbAttrs + 2] = value;
1338     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1339     if (ctxt->external)
1340         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1341     else
1342         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1343     defaults->nbAttrs++;
1344 
1345     return;
1346 
1347 mem_error:
1348     xmlErrMemory(ctxt, NULL);
1349     return;
1350 }
1351 
1352 /**
1353  * xmlAddSpecialAttr:
1354  * @ctxt:  an XML parser context
1355  * @fullname:  the element fullname
1356  * @fullattr:  the attribute fullname
1357  * @type:  the attribute type
1358  *
1359  * Register this attribute type
1360  */
1361 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1362 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1363 		  const xmlChar *fullname,
1364 		  const xmlChar *fullattr,
1365 		  int type)
1366 {
1367     if (ctxt->attsSpecial == NULL) {
1368         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1369 	if (ctxt->attsSpecial == NULL)
1370 	    goto mem_error;
1371     }
1372 
1373     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1374         return;
1375 
1376     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1377                      (void *) (ptrdiff_t) type);
1378     return;
1379 
1380 mem_error:
1381     xmlErrMemory(ctxt, NULL);
1382     return;
1383 }
1384 
1385 /**
1386  * xmlCleanSpecialAttrCallback:
1387  *
1388  * Removes CDATA attributes from the special attribute table
1389  */
1390 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1391 xmlCleanSpecialAttrCallback(void *payload, void *data,
1392                             const xmlChar *fullname, const xmlChar *fullattr,
1393                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1394     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1395 
1396     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1397         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1398     }
1399 }
1400 
1401 /**
1402  * xmlCleanSpecialAttr:
1403  * @ctxt:  an XML parser context
1404  *
1405  * Trim the list of attributes defined to remove all those of type
1406  * CDATA as they are not special. This call should be done when finishing
1407  * to parse the DTD and before starting to parse the document root.
1408  */
1409 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1410 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1411 {
1412     if (ctxt->attsSpecial == NULL)
1413         return;
1414 
1415     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1416 
1417     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1418         xmlHashFree(ctxt->attsSpecial, NULL);
1419         ctxt->attsSpecial = NULL;
1420     }
1421     return;
1422 }
1423 
1424 /**
1425  * xmlCheckLanguageID:
1426  * @lang:  pointer to the string value
1427  *
1428  * Checks that the value conforms to the LanguageID production:
1429  *
1430  * NOTE: this is somewhat deprecated, those productions were removed from
1431  *       the XML Second edition.
1432  *
1433  * [33] LanguageID ::= Langcode ('-' Subcode)*
1434  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1435  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1436  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1437  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1438  * [38] Subcode ::= ([a-z] | [A-Z])+
1439  *
1440  * The current REC reference the successors of RFC 1766, currently 5646
1441  *
1442  * http://www.rfc-editor.org/rfc/rfc5646.txt
1443  * langtag       = language
1444  *                 ["-" script]
1445  *                 ["-" region]
1446  *                 *("-" variant)
1447  *                 *("-" extension)
1448  *                 ["-" privateuse]
1449  * language      = 2*3ALPHA            ; shortest ISO 639 code
1450  *                 ["-" extlang]       ; sometimes followed by
1451  *                                     ; extended language subtags
1452  *               / 4ALPHA              ; or reserved for future use
1453  *               / 5*8ALPHA            ; or registered language subtag
1454  *
1455  * extlang       = 3ALPHA              ; selected ISO 639 codes
1456  *                 *2("-" 3ALPHA)      ; permanently reserved
1457  *
1458  * script        = 4ALPHA              ; ISO 15924 code
1459  *
1460  * region        = 2ALPHA              ; ISO 3166-1 code
1461  *               / 3DIGIT              ; UN M.49 code
1462  *
1463  * variant       = 5*8alphanum         ; registered variants
1464  *               / (DIGIT 3alphanum)
1465  *
1466  * extension     = singleton 1*("-" (2*8alphanum))
1467  *
1468  *                                     ; Single alphanumerics
1469  *                                     ; "x" reserved for private use
1470  * singleton     = DIGIT               ; 0 - 9
1471  *               / %x41-57             ; A - W
1472  *               / %x59-5A             ; Y - Z
1473  *               / %x61-77             ; a - w
1474  *               / %x79-7A             ; y - z
1475  *
1476  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1477  * The parser below doesn't try to cope with extension or privateuse
1478  * that could be added but that's not interoperable anyway
1479  *
1480  * Returns 1 if correct 0 otherwise
1481  **/
1482 int
xmlCheckLanguageID(const xmlChar * lang)1483 xmlCheckLanguageID(const xmlChar * lang)
1484 {
1485     const xmlChar *cur = lang, *nxt;
1486 
1487     if (cur == NULL)
1488         return (0);
1489     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1490         ((cur[0] == 'I') && (cur[1] == '-')) ||
1491         ((cur[0] == 'x') && (cur[1] == '-')) ||
1492         ((cur[0] == 'X') && (cur[1] == '-'))) {
1493         /*
1494          * Still allow IANA code and user code which were coming
1495          * from the previous version of the XML-1.0 specification
1496          * it's deprecated but we should not fail
1497          */
1498         cur += 2;
1499         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1500                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1501             cur++;
1502         return(cur[0] == 0);
1503     }
1504     nxt = cur;
1505     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1506            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1507            nxt++;
1508     if (nxt - cur >= 4) {
1509         /*
1510          * Reserved
1511          */
1512         if ((nxt - cur > 8) || (nxt[0] != 0))
1513             return(0);
1514         return(1);
1515     }
1516     if (nxt - cur < 2)
1517         return(0);
1518     /* we got an ISO 639 code */
1519     if (nxt[0] == 0)
1520         return(1);
1521     if (nxt[0] != '-')
1522         return(0);
1523 
1524     nxt++;
1525     cur = nxt;
1526     /* now we can have extlang or script or region or variant */
1527     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528         goto region_m49;
1529 
1530     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532            nxt++;
1533     if (nxt - cur == 4)
1534         goto script;
1535     if (nxt - cur == 2)
1536         goto region;
1537     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1538         goto variant;
1539     if (nxt - cur != 3)
1540         return(0);
1541     /* we parsed an extlang */
1542     if (nxt[0] == 0)
1543         return(1);
1544     if (nxt[0] != '-')
1545         return(0);
1546 
1547     nxt++;
1548     cur = nxt;
1549     /* now we can have script or region or variant */
1550     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1551         goto region_m49;
1552 
1553     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1554            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1555            nxt++;
1556     if (nxt - cur == 2)
1557         goto region;
1558     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1559         goto variant;
1560     if (nxt - cur != 4)
1561         return(0);
1562     /* we parsed a script */
1563 script:
1564     if (nxt[0] == 0)
1565         return(1);
1566     if (nxt[0] != '-')
1567         return(0);
1568 
1569     nxt++;
1570     cur = nxt;
1571     /* now we can have region or variant */
1572     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1573         goto region_m49;
1574 
1575     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1576            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1577            nxt++;
1578 
1579     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1580         goto variant;
1581     if (nxt - cur != 2)
1582         return(0);
1583     /* we parsed a region */
1584 region:
1585     if (nxt[0] == 0)
1586         return(1);
1587     if (nxt[0] != '-')
1588         return(0);
1589 
1590     nxt++;
1591     cur = nxt;
1592     /* now we can just have a variant */
1593     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1594            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1595            nxt++;
1596 
1597     if ((nxt - cur < 5) || (nxt - cur > 8))
1598         return(0);
1599 
1600     /* we parsed a variant */
1601 variant:
1602     if (nxt[0] == 0)
1603         return(1);
1604     if (nxt[0] != '-')
1605         return(0);
1606     /* extensions and private use subtags not checked */
1607     return (1);
1608 
1609 region_m49:
1610     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1611         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1612         nxt += 3;
1613         goto region;
1614     }
1615     return(0);
1616 }
1617 
1618 /************************************************************************
1619  *									*
1620  *		Parser stacks related functions and macros		*
1621  *									*
1622  ************************************************************************/
1623 
1624 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1625                                             const xmlChar ** str);
1626 
1627 #ifdef SAX2
1628 /**
1629  * nsPush:
1630  * @ctxt:  an XML parser context
1631  * @prefix:  the namespace prefix or NULL
1632  * @URL:  the namespace name
1633  *
1634  * Pushes a new parser namespace on top of the ns stack
1635  *
1636  * Returns -1 in case of error, -2 if the namespace should be discarded
1637  *	   and the index in the stack otherwise.
1638  */
1639 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1640 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1641 {
1642     if (ctxt->options & XML_PARSE_NSCLEAN) {
1643         int i;
1644 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1645 	    if (ctxt->nsTab[i] == prefix) {
1646 		/* in scope */
1647 	        if (ctxt->nsTab[i + 1] == URL)
1648 		    return(-2);
1649 		/* out of scope keep it */
1650 		break;
1651 	    }
1652 	}
1653     }
1654     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1655 	ctxt->nsMax = 10;
1656 	ctxt->nsNr = 0;
1657 	ctxt->nsTab = (const xmlChar **)
1658 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1659 	if (ctxt->nsTab == NULL) {
1660 	    xmlErrMemory(ctxt, NULL);
1661 	    ctxt->nsMax = 0;
1662             return (-1);
1663 	}
1664     } else if (ctxt->nsNr >= ctxt->nsMax) {
1665         const xmlChar ** tmp;
1666         ctxt->nsMax *= 2;
1667         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1668 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1669         if (tmp == NULL) {
1670             xmlErrMemory(ctxt, NULL);
1671 	    ctxt->nsMax /= 2;
1672             return (-1);
1673         }
1674 	ctxt->nsTab = tmp;
1675     }
1676     ctxt->nsTab[ctxt->nsNr++] = prefix;
1677     ctxt->nsTab[ctxt->nsNr++] = URL;
1678     return (ctxt->nsNr);
1679 }
1680 /**
1681  * nsPop:
1682  * @ctxt: an XML parser context
1683  * @nr:  the number to pop
1684  *
1685  * Pops the top @nr parser prefix/namespace from the ns stack
1686  *
1687  * Returns the number of namespaces removed
1688  */
1689 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1690 nsPop(xmlParserCtxtPtr ctxt, int nr)
1691 {
1692     int i;
1693 
1694     if (ctxt->nsTab == NULL) return(0);
1695     if (ctxt->nsNr < nr) {
1696         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1697         nr = ctxt->nsNr;
1698     }
1699     if (ctxt->nsNr <= 0)
1700         return (0);
1701 
1702     for (i = 0;i < nr;i++) {
1703          ctxt->nsNr--;
1704 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1705     }
1706     return(nr);
1707 }
1708 #endif
1709 
1710 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1711 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1712     const xmlChar **atts;
1713     int *attallocs;
1714     int maxatts;
1715 
1716     if (ctxt->atts == NULL) {
1717 	maxatts = 55; /* allow for 10 attrs by default */
1718 	atts = (const xmlChar **)
1719 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1720 	if (atts == NULL) goto mem_error;
1721 	ctxt->atts = atts;
1722 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1723 	if (attallocs == NULL) goto mem_error;
1724 	ctxt->attallocs = attallocs;
1725 	ctxt->maxatts = maxatts;
1726     } else if (nr + 5 > ctxt->maxatts) {
1727 	maxatts = (nr + 5) * 2;
1728 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1729 				     maxatts * sizeof(const xmlChar *));
1730 	if (atts == NULL) goto mem_error;
1731 	ctxt->atts = atts;
1732 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1733 	                             (maxatts / 5) * sizeof(int));
1734 	if (attallocs == NULL) goto mem_error;
1735 	ctxt->attallocs = attallocs;
1736 	ctxt->maxatts = maxatts;
1737     }
1738     return(ctxt->maxatts);
1739 mem_error:
1740     xmlErrMemory(ctxt, NULL);
1741     return(-1);
1742 }
1743 
1744 /**
1745  * inputPush:
1746  * @ctxt:  an XML parser context
1747  * @value:  the parser input
1748  *
1749  * Pushes a new parser input on top of the input stack
1750  *
1751  * Returns -1 in case of error, the index in the stack otherwise
1752  */
1753 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1754 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1755 {
1756     if ((ctxt == NULL) || (value == NULL))
1757         return(-1);
1758     if (ctxt->inputNr >= ctxt->inputMax) {
1759         ctxt->inputMax *= 2;
1760         ctxt->inputTab =
1761             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1762                                              ctxt->inputMax *
1763                                              sizeof(ctxt->inputTab[0]));
1764         if (ctxt->inputTab == NULL) {
1765             xmlErrMemory(ctxt, NULL);
1766 	    xmlFreeInputStream(value);
1767 	    ctxt->inputMax /= 2;
1768 	    value = NULL;
1769             return (-1);
1770         }
1771     }
1772     ctxt->inputTab[ctxt->inputNr] = value;
1773     ctxt->input = value;
1774     return (ctxt->inputNr++);
1775 }
1776 /**
1777  * inputPop:
1778  * @ctxt: an XML parser context
1779  *
1780  * Pops the top parser input from the input stack
1781  *
1782  * Returns the input just removed
1783  */
1784 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1785 inputPop(xmlParserCtxtPtr ctxt)
1786 {
1787     xmlParserInputPtr ret;
1788 
1789     if (ctxt == NULL)
1790         return(NULL);
1791     if (ctxt->inputNr <= 0)
1792         return (NULL);
1793     ctxt->inputNr--;
1794     if (ctxt->inputNr > 0)
1795         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1796     else
1797         ctxt->input = NULL;
1798     ret = ctxt->inputTab[ctxt->inputNr];
1799     ctxt->inputTab[ctxt->inputNr] = NULL;
1800     return (ret);
1801 }
1802 /**
1803  * nodePush:
1804  * @ctxt:  an XML parser context
1805  * @value:  the element node
1806  *
1807  * Pushes a new element node on top of the node stack
1808  *
1809  * Returns -1 in case of error, the index in the stack otherwise
1810  */
1811 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1812 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1813 {
1814     if (ctxt == NULL) return(0);
1815     if (ctxt->nodeNr >= ctxt->nodeMax) {
1816         xmlNodePtr *tmp;
1817 
1818 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1819                                       ctxt->nodeMax * 2 *
1820                                       sizeof(ctxt->nodeTab[0]));
1821         if (tmp == NULL) {
1822             xmlErrMemory(ctxt, NULL);
1823             return (-1);
1824         }
1825         ctxt->nodeTab = tmp;
1826 	ctxt->nodeMax *= 2;
1827     }
1828     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1829         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1830 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1831 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1832 			  xmlParserMaxDepth);
1833 	xmlHaltParser(ctxt);
1834 	return(-1);
1835     }
1836     ctxt->nodeTab[ctxt->nodeNr] = value;
1837     ctxt->node = value;
1838     return (ctxt->nodeNr++);
1839 }
1840 
1841 /**
1842  * nodePop:
1843  * @ctxt: an XML parser context
1844  *
1845  * Pops the top element node from the node stack
1846  *
1847  * Returns the node just removed
1848  */
1849 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1850 nodePop(xmlParserCtxtPtr ctxt)
1851 {
1852     xmlNodePtr ret;
1853 
1854     if (ctxt == NULL) return(NULL);
1855     if (ctxt->nodeNr <= 0)
1856         return (NULL);
1857     ctxt->nodeNr--;
1858     if (ctxt->nodeNr > 0)
1859         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1860     else
1861         ctxt->node = NULL;
1862     ret = ctxt->nodeTab[ctxt->nodeNr];
1863     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1864     return (ret);
1865 }
1866 
1867 /**
1868  * nameNsPush:
1869  * @ctxt:  an XML parser context
1870  * @value:  the element name
1871  * @prefix:  the element prefix
1872  * @URI:  the element namespace name
1873  * @line:  the current line number for error messages
1874  * @nsNr:  the number of namespaces pushed on the namespace table
1875  *
1876  * Pushes a new element name/prefix/URL on top of the name stack
1877  *
1878  * Returns -1 in case of error, the index in the stack otherwise
1879  */
1880 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)1881 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1882            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1883 {
1884     xmlStartTag *tag;
1885 
1886     if (ctxt->nameNr >= ctxt->nameMax) {
1887         const xmlChar * *tmp;
1888         xmlStartTag *tmp2;
1889         ctxt->nameMax *= 2;
1890         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1891                                     ctxt->nameMax *
1892                                     sizeof(ctxt->nameTab[0]));
1893         if (tmp == NULL) {
1894 	    ctxt->nameMax /= 2;
1895 	    goto mem_error;
1896         }
1897 	ctxt->nameTab = tmp;
1898         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1899                                     ctxt->nameMax *
1900                                     sizeof(ctxt->pushTab[0]));
1901         if (tmp2 == NULL) {
1902 	    ctxt->nameMax /= 2;
1903 	    goto mem_error;
1904         }
1905 	ctxt->pushTab = tmp2;
1906     } else if (ctxt->pushTab == NULL) {
1907         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1908                                             sizeof(ctxt->pushTab[0]));
1909         if (ctxt->pushTab == NULL)
1910             goto mem_error;
1911     }
1912     ctxt->nameTab[ctxt->nameNr] = value;
1913     ctxt->name = value;
1914     tag = &ctxt->pushTab[ctxt->nameNr];
1915     tag->prefix = prefix;
1916     tag->URI = URI;
1917     tag->line = line;
1918     tag->nsNr = nsNr;
1919     return (ctxt->nameNr++);
1920 mem_error:
1921     xmlErrMemory(ctxt, NULL);
1922     return (-1);
1923 }
1924 #ifdef LIBXML_PUSH_ENABLED
1925 /**
1926  * nameNsPop:
1927  * @ctxt: an XML parser context
1928  *
1929  * Pops the top element/prefix/URI name from the name stack
1930  *
1931  * Returns the name just removed
1932  */
1933 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1934 nameNsPop(xmlParserCtxtPtr ctxt)
1935 {
1936     const xmlChar *ret;
1937 
1938     if (ctxt->nameNr <= 0)
1939         return (NULL);
1940     ctxt->nameNr--;
1941     if (ctxt->nameNr > 0)
1942         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1943     else
1944         ctxt->name = NULL;
1945     ret = ctxt->nameTab[ctxt->nameNr];
1946     ctxt->nameTab[ctxt->nameNr] = NULL;
1947     return (ret);
1948 }
1949 #endif /* LIBXML_PUSH_ENABLED */
1950 
1951 /**
1952  * namePush:
1953  * @ctxt:  an XML parser context
1954  * @value:  the element name
1955  *
1956  * Pushes a new element name on top of the name stack
1957  *
1958  * Returns -1 in case of error, the index in the stack otherwise
1959  */
1960 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1961 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1962 {
1963     if (ctxt == NULL) return (-1);
1964 
1965     if (ctxt->nameNr >= ctxt->nameMax) {
1966         const xmlChar * *tmp;
1967         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1968                                     ctxt->nameMax * 2 *
1969                                     sizeof(ctxt->nameTab[0]));
1970         if (tmp == NULL) {
1971 	    goto mem_error;
1972         }
1973 	ctxt->nameTab = tmp;
1974         ctxt->nameMax *= 2;
1975     }
1976     ctxt->nameTab[ctxt->nameNr] = value;
1977     ctxt->name = value;
1978     return (ctxt->nameNr++);
1979 mem_error:
1980     xmlErrMemory(ctxt, NULL);
1981     return (-1);
1982 }
1983 /**
1984  * namePop:
1985  * @ctxt: an XML parser context
1986  *
1987  * Pops the top element name from the name stack
1988  *
1989  * Returns the name just removed
1990  */
1991 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1992 namePop(xmlParserCtxtPtr ctxt)
1993 {
1994     const xmlChar *ret;
1995 
1996     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1997         return (NULL);
1998     ctxt->nameNr--;
1999     if (ctxt->nameNr > 0)
2000         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2001     else
2002         ctxt->name = NULL;
2003     ret = ctxt->nameTab[ctxt->nameNr];
2004     ctxt->nameTab[ctxt->nameNr] = NULL;
2005     return (ret);
2006 }
2007 
spacePush(xmlParserCtxtPtr ctxt,int val)2008 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2009     if (ctxt->spaceNr >= ctxt->spaceMax) {
2010         int *tmp;
2011 
2012 	ctxt->spaceMax *= 2;
2013         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2014 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2015         if (tmp == NULL) {
2016 	    xmlErrMemory(ctxt, NULL);
2017 	    ctxt->spaceMax /=2;
2018 	    return(-1);
2019 	}
2020 	ctxt->spaceTab = tmp;
2021     }
2022     ctxt->spaceTab[ctxt->spaceNr] = val;
2023     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2024     return(ctxt->spaceNr++);
2025 }
2026 
spacePop(xmlParserCtxtPtr ctxt)2027 static int spacePop(xmlParserCtxtPtr ctxt) {
2028     int ret;
2029     if (ctxt->spaceNr <= 0) return(0);
2030     ctxt->spaceNr--;
2031     if (ctxt->spaceNr > 0)
2032 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2033     else
2034         ctxt->space = &ctxt->spaceTab[0];
2035     ret = ctxt->spaceTab[ctxt->spaceNr];
2036     ctxt->spaceTab[ctxt->spaceNr] = -1;
2037     return(ret);
2038 }
2039 
2040 /*
2041  * Macros for accessing the content. Those should be used only by the parser,
2042  * and not exported.
2043  *
2044  * Dirty macros, i.e. one often need to make assumption on the context to
2045  * use them
2046  *
2047  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2048  *           To be used with extreme caution since operations consuming
2049  *           characters may move the input buffer to a different location !
2050  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2051  *           This should be used internally by the parser
2052  *           only to compare to ASCII values otherwise it would break when
2053  *           running with UTF-8 encoding.
2054  *   RAW     same as CUR but in the input buffer, bypass any token
2055  *           extraction that may have been done
2056  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2057  *           to compare on ASCII based substring.
2058  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2059  *           strings without newlines within the parser.
2060  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2061  *           defined char within the parser.
2062  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2063  *
2064  *   NEXT    Skip to the next character, this does the proper decoding
2065  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2066  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2067  *   CUR_CHAR(l) returns the current unicode character (int), set l
2068  *           to the number of xmlChars used for the encoding [0-5].
2069  *   CUR_SCHAR  same but operate on a string instead of the context
2070  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2071  *            the index
2072  *   GROW, SHRINK  handling of input buffers
2073  */
2074 
2075 #define RAW (*ctxt->input->cur)
2076 #define CUR (*ctxt->input->cur)
2077 #define NXT(val) ctxt->input->cur[(val)]
2078 #define CUR_PTR ctxt->input->cur
2079 #define BASE_PTR ctxt->input->base
2080 
2081 #define CMP4( s, c1, c2, c3, c4 ) \
2082   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2083     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2084 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2085   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2086 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2087   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2088 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2089   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2090 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2091   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2092 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2093   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2094     ((unsigned char *) s)[ 8 ] == c9 )
2095 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2096   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2097     ((unsigned char *) s)[ 9 ] == c10 )
2098 
2099 #define SKIP(val) do {							\
2100     ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2101     if (*ctxt->input->cur == 0)						\
2102         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2103   } while (0)
2104 
2105 #define SKIPL(val) do {							\
2106     int skipl;								\
2107     for(skipl=0; skipl<val; skipl++) {					\
2108 	if (*(ctxt->input->cur) == '\n') {				\
2109 	ctxt->input->line++; ctxt->input->col = 1;			\
2110 	} else ctxt->input->col++;					\
2111 	ctxt->input->cur++;						\
2112     }									\
2113     if (*ctxt->input->cur == 0)						\
2114         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2115   } while (0)
2116 
2117 #define SHRINK if ((ctxt->progressive == 0) &&				\
2118 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2119 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2120 	xmlSHRINK (ctxt);
2121 
xmlSHRINK(xmlParserCtxtPtr ctxt)2122 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2123     xmlParserInputShrink(ctxt->input);
2124     if (*ctxt->input->cur == 0)
2125         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2126 }
2127 
2128 #define GROW if ((ctxt->progressive == 0) &&				\
2129 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2130 	xmlGROW (ctxt);
2131 
xmlGROW(xmlParserCtxtPtr ctxt)2132 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2133     ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2134     ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2135 
2136     if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2137          (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2138          ((ctxt->input->buf) &&
2139           (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2140         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2141         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2142         xmlHaltParser(ctxt);
2143 	return;
2144     }
2145     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2146     if ((ctxt->input->cur > ctxt->input->end) ||
2147         (ctxt->input->cur < ctxt->input->base)) {
2148         xmlHaltParser(ctxt);
2149         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2150 	return;
2151     }
2152     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2153         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2154 }
2155 
2156 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2157 
2158 #define NEXT xmlNextChar(ctxt)
2159 
2160 #define NEXT1 {								\
2161 	ctxt->input->col++;						\
2162 	ctxt->input->cur++;						\
2163 	if (*ctxt->input->cur == 0)					\
2164 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2165     }
2166 
2167 #define NEXTL(l) do {							\
2168     if (*(ctxt->input->cur) == '\n') {					\
2169 	ctxt->input->line++; ctxt->input->col = 1;			\
2170     } else ctxt->input->col++;						\
2171     ctxt->input->cur += l;				\
2172   } while (0)
2173 
2174 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2175 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2176 
2177 #define COPY_BUF(l,b,i,v)						\
2178     if (l == 1) b[i++] = (xmlChar) v;					\
2179     else i += xmlCopyCharMultiByte(&b[i],v)
2180 
2181 /**
2182  * xmlSkipBlankChars:
2183  * @ctxt:  the XML parser context
2184  *
2185  * skip all blanks character found at that point in the input streams.
2186  * It pops up finished entities in the process if allowable at that point.
2187  *
2188  * Returns the number of space chars skipped
2189  */
2190 
2191 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2192 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2193     int res = 0;
2194 
2195     /*
2196      * It's Okay to use CUR/NEXT here since all the blanks are on
2197      * the ASCII range.
2198      */
2199     if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2200         (ctxt->instate == XML_PARSER_START)) {
2201 	const xmlChar *cur;
2202 	/*
2203 	 * if we are in the document content, go really fast
2204 	 */
2205 	cur = ctxt->input->cur;
2206 	while (IS_BLANK_CH(*cur)) {
2207 	    if (*cur == '\n') {
2208 		ctxt->input->line++; ctxt->input->col = 1;
2209 	    } else {
2210 		ctxt->input->col++;
2211 	    }
2212 	    cur++;
2213 	    res++;
2214 	    if (*cur == 0) {
2215 		ctxt->input->cur = cur;
2216 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2217 		cur = ctxt->input->cur;
2218 	    }
2219 	}
2220 	ctxt->input->cur = cur;
2221     } else {
2222         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2223 
2224 	while (1) {
2225             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2226 		NEXT;
2227 	    } else if (CUR == '%') {
2228                 /*
2229                  * Need to handle support of entities branching here
2230                  */
2231 	        if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2232                     break;
2233 	        xmlParsePEReference(ctxt);
2234             } else if (CUR == 0) {
2235                 if (ctxt->inputNr <= 1)
2236                     break;
2237                 xmlPopInput(ctxt);
2238             } else {
2239                 break;
2240             }
2241 
2242             /*
2243              * Also increase the counter when entering or exiting a PERef.
2244              * The spec says: "When a parameter-entity reference is recognized
2245              * in the DTD and included, its replacement text MUST be enlarged
2246              * by the attachment of one leading and one following space (#x20)
2247              * character."
2248              */
2249 	    res++;
2250         }
2251     }
2252     return(res);
2253 }
2254 
2255 /************************************************************************
2256  *									*
2257  *		Commodity functions to handle entities			*
2258  *									*
2259  ************************************************************************/
2260 
2261 /**
2262  * xmlPopInput:
2263  * @ctxt:  an XML parser context
2264  *
2265  * xmlPopInput: the current input pointed by ctxt->input came to an end
2266  *          pop it and return the next char.
2267  *
2268  * Returns the current xmlChar in the parser context
2269  */
2270 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2271 xmlPopInput(xmlParserCtxtPtr ctxt) {
2272     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273     if (xmlParserDebugEntities)
2274 	xmlGenericError(xmlGenericErrorContext,
2275 		"Popping input %d\n", ctxt->inputNr);
2276     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277         (ctxt->instate != XML_PARSER_EOF))
2278         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279                     "Unfinished entity outside the DTD");
2280     xmlFreeInputStream(inputPop(ctxt));
2281     if (*ctxt->input->cur == 0)
2282         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283     return(CUR);
2284 }
2285 
2286 /**
2287  * xmlPushInput:
2288  * @ctxt:  an XML parser context
2289  * @input:  an XML parser input fragment (entity, XML fragment ...).
2290  *
2291  * xmlPushInput: switch to a new input stream which is stacked on top
2292  *               of the previous one(s).
2293  * Returns -1 in case of error or the index in the input stack
2294  */
2295 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2296 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297     int ret;
2298     if (input == NULL) return(-1);
2299 
2300     if (xmlParserDebugEntities) {
2301 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2302 	    xmlGenericError(xmlGenericErrorContext,
2303 		    "%s(%d): ", ctxt->input->filename,
2304 		    ctxt->input->line);
2305 	xmlGenericError(xmlGenericErrorContext,
2306 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307     }
2308     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309         (ctxt->inputNr > 1024)) {
2310         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311         while (ctxt->inputNr > 1)
2312             xmlFreeInputStream(inputPop(ctxt));
2313 	return(-1);
2314     }
2315     ret = inputPush(ctxt, input);
2316     if (ctxt->instate == XML_PARSER_EOF)
2317         return(-1);
2318     GROW;
2319     return(ret);
2320 }
2321 
2322 /**
2323  * xmlParseCharRef:
2324  * @ctxt:  an XML parser context
2325  *
2326  * parse Reference declarations
2327  *
2328  * [66] CharRef ::= '&#' [0-9]+ ';' |
2329  *                  '&#x' [0-9a-fA-F]+ ';'
2330  *
2331  * [ WFC: Legal Character ]
2332  * Characters referred to using character references must match the
2333  * production for Char.
2334  *
2335  * Returns the value parsed (as an int), 0 in case of error
2336  */
2337 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2338 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2339     int val = 0;
2340     int count = 0;
2341 
2342     /*
2343      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2344      */
2345     if ((RAW == '&') && (NXT(1) == '#') &&
2346         (NXT(2) == 'x')) {
2347 	SKIP(3);
2348 	GROW;
2349 	while (RAW != ';') { /* loop blocked by count */
2350 	    if (count++ > 20) {
2351 		count = 0;
2352 		GROW;
2353                 if (ctxt->instate == XML_PARSER_EOF)
2354                     return(0);
2355 	    }
2356 	    if ((RAW >= '0') && (RAW <= '9'))
2357 	        val = val * 16 + (CUR - '0');
2358 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2359 	        val = val * 16 + (CUR - 'a') + 10;
2360 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2361 	        val = val * 16 + (CUR - 'A') + 10;
2362 	    else {
2363 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2364 		val = 0;
2365 		break;
2366 	    }
2367 	    if (val > 0x110000)
2368 	        val = 0x110000;
2369 
2370 	    NEXT;
2371 	    count++;
2372 	}
2373 	if (RAW == ';') {
2374 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2375 	    ctxt->input->col++;
2376 	    ctxt->input->cur++;
2377 	}
2378     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2379 	SKIP(2);
2380 	GROW;
2381 	while (RAW != ';') { /* loop blocked by count */
2382 	    if (count++ > 20) {
2383 		count = 0;
2384 		GROW;
2385                 if (ctxt->instate == XML_PARSER_EOF)
2386                     return(0);
2387 	    }
2388 	    if ((RAW >= '0') && (RAW <= '9'))
2389 	        val = val * 10 + (CUR - '0');
2390 	    else {
2391 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2392 		val = 0;
2393 		break;
2394 	    }
2395 	    if (val > 0x110000)
2396 	        val = 0x110000;
2397 
2398 	    NEXT;
2399 	    count++;
2400 	}
2401 	if (RAW == ';') {
2402 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2403 	    ctxt->input->col++;
2404 	    ctxt->input->cur++;
2405 	}
2406     } else {
2407         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2408     }
2409 
2410     /*
2411      * [ WFC: Legal Character ]
2412      * Characters referred to using character references must match the
2413      * production for Char.
2414      */
2415     if (val >= 0x110000) {
2416         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417                 "xmlParseCharRef: character reference out of bounds\n",
2418 	        val);
2419     } else if (IS_CHAR(val)) {
2420         return(val);
2421     } else {
2422         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2423                           "xmlParseCharRef: invalid xmlChar value %d\n",
2424 	                  val);
2425     }
2426     return(0);
2427 }
2428 
2429 /**
2430  * xmlParseStringCharRef:
2431  * @ctxt:  an XML parser context
2432  * @str:  a pointer to an index in the string
2433  *
2434  * parse Reference declarations, variant parsing from a string rather
2435  * than an an input flow.
2436  *
2437  * [66] CharRef ::= '&#' [0-9]+ ';' |
2438  *                  '&#x' [0-9a-fA-F]+ ';'
2439  *
2440  * [ WFC: Legal Character ]
2441  * Characters referred to using character references must match the
2442  * production for Char.
2443  *
2444  * Returns the value parsed (as an int), 0 in case of error, str will be
2445  *         updated to the current value of the index
2446  */
2447 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2448 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2449     const xmlChar *ptr;
2450     xmlChar cur;
2451     int val = 0;
2452 
2453     if ((str == NULL) || (*str == NULL)) return(0);
2454     ptr = *str;
2455     cur = *ptr;
2456     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2457 	ptr += 3;
2458 	cur = *ptr;
2459 	while (cur != ';') { /* Non input consuming loop */
2460 	    if ((cur >= '0') && (cur <= '9'))
2461 	        val = val * 16 + (cur - '0');
2462 	    else if ((cur >= 'a') && (cur <= 'f'))
2463 	        val = val * 16 + (cur - 'a') + 10;
2464 	    else if ((cur >= 'A') && (cur <= 'F'))
2465 	        val = val * 16 + (cur - 'A') + 10;
2466 	    else {
2467 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2468 		val = 0;
2469 		break;
2470 	    }
2471 	    if (val > 0x110000)
2472 	        val = 0x110000;
2473 
2474 	    ptr++;
2475 	    cur = *ptr;
2476 	}
2477 	if (cur == ';')
2478 	    ptr++;
2479     } else if  ((cur == '&') && (ptr[1] == '#')){
2480 	ptr += 2;
2481 	cur = *ptr;
2482 	while (cur != ';') { /* Non input consuming loops */
2483 	    if ((cur >= '0') && (cur <= '9'))
2484 	        val = val * 10 + (cur - '0');
2485 	    else {
2486 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2487 		val = 0;
2488 		break;
2489 	    }
2490 	    if (val > 0x110000)
2491 	        val = 0x110000;
2492 
2493 	    ptr++;
2494 	    cur = *ptr;
2495 	}
2496 	if (cur == ';')
2497 	    ptr++;
2498     } else {
2499 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2500 	return(0);
2501     }
2502     *str = ptr;
2503 
2504     /*
2505      * [ WFC: Legal Character ]
2506      * Characters referred to using character references must match the
2507      * production for Char.
2508      */
2509     if (val >= 0x110000) {
2510         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511                 "xmlParseStringCharRef: character reference out of bounds\n",
2512                 val);
2513     } else if (IS_CHAR(val)) {
2514         return(val);
2515     } else {
2516         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2517 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2518 			  val);
2519     }
2520     return(0);
2521 }
2522 
2523 /**
2524  * xmlParserHandlePEReference:
2525  * @ctxt:  the parser context
2526  *
2527  * [69] PEReference ::= '%' Name ';'
2528  *
2529  * [ WFC: No Recursion ]
2530  * A parsed entity must not contain a recursive
2531  * reference to itself, either directly or indirectly.
2532  *
2533  * [ WFC: Entity Declared ]
2534  * In a document without any DTD, a document with only an internal DTD
2535  * subset which contains no parameter entity references, or a document
2536  * with "standalone='yes'", ...  ... The declaration of a parameter
2537  * entity must precede any reference to it...
2538  *
2539  * [ VC: Entity Declared ]
2540  * In a document with an external subset or external parameter entities
2541  * with "standalone='no'", ...  ... The declaration of a parameter entity
2542  * must precede any reference to it...
2543  *
2544  * [ WFC: In DTD ]
2545  * Parameter-entity references may only appear in the DTD.
2546  * NOTE: misleading but this is handled.
2547  *
2548  * A PEReference may have been detected in the current input stream
2549  * the handling is done accordingly to
2550  *      http://www.w3.org/TR/REC-xml#entproc
2551  * i.e.
2552  *   - Included in literal in entity values
2553  *   - Included as Parameter Entity reference within DTDs
2554  */
2555 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2556 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2557     switch(ctxt->instate) {
2558 	case XML_PARSER_CDATA_SECTION:
2559 	    return;
2560         case XML_PARSER_COMMENT:
2561 	    return;
2562 	case XML_PARSER_START_TAG:
2563 	    return;
2564 	case XML_PARSER_END_TAG:
2565 	    return;
2566         case XML_PARSER_EOF:
2567 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2568 	    return;
2569         case XML_PARSER_PROLOG:
2570 	case XML_PARSER_START:
2571 	case XML_PARSER_MISC:
2572 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2573 	    return;
2574 	case XML_PARSER_ENTITY_DECL:
2575         case XML_PARSER_CONTENT:
2576         case XML_PARSER_ATTRIBUTE_VALUE:
2577         case XML_PARSER_PI:
2578 	case XML_PARSER_SYSTEM_LITERAL:
2579 	case XML_PARSER_PUBLIC_LITERAL:
2580 	    /* we just ignore it there */
2581 	    return;
2582         case XML_PARSER_EPILOG:
2583 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2584 	    return;
2585 	case XML_PARSER_ENTITY_VALUE:
2586 	    /*
2587 	     * NOTE: in the case of entity values, we don't do the
2588 	     *       substitution here since we need the literal
2589 	     *       entity value to be able to save the internal
2590 	     *       subset of the document.
2591 	     *       This will be handled by xmlStringDecodeEntities
2592 	     */
2593 	    return;
2594         case XML_PARSER_DTD:
2595 	    /*
2596 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2597 	     * In the internal DTD subset, parameter-entity references
2598 	     * can occur only where markup declarations can occur, not
2599 	     * within markup declarations.
2600 	     * In that case this is handled in xmlParseMarkupDecl
2601 	     */
2602 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2603 		return;
2604 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2605 		return;
2606             break;
2607         case XML_PARSER_IGNORE:
2608             return;
2609     }
2610 
2611     xmlParsePEReference(ctxt);
2612 }
2613 
2614 /*
2615  * Macro used to grow the current buffer.
2616  * buffer##_size is expected to be a size_t
2617  * mem_error: is expected to handle memory allocation failures
2618  */
2619 #define growBuffer(buffer, n) {						\
2620     xmlChar *tmp;							\
2621     size_t new_size = buffer##_size * 2 + n;                            \
2622     if (new_size < buffer##_size) goto mem_error;                       \
2623     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2624     if (tmp == NULL) goto mem_error;					\
2625     buffer = tmp;							\
2626     buffer##_size = new_size;                                           \
2627 }
2628 
2629 /**
2630  * xmlStringLenDecodeEntities:
2631  * @ctxt:  the parser context
2632  * @str:  the input string
2633  * @len: the string length
2634  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2635  * @end:  an end marker xmlChar, 0 if none
2636  * @end2:  an end marker xmlChar, 0 if none
2637  * @end3:  an end marker xmlChar, 0 if none
2638  *
2639  * Takes a entity string content and process to do the adequate substitutions.
2640  *
2641  * [67] Reference ::= EntityRef | CharRef
2642  *
2643  * [69] PEReference ::= '%' Name ';'
2644  *
2645  * Returns A newly allocated string with the substitution done. The caller
2646  *      must deallocate it !
2647  */
2648 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2649 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2650 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2651     xmlChar *buffer = NULL;
2652     size_t buffer_size = 0;
2653     size_t nbchars = 0;
2654 
2655     xmlChar *current = NULL;
2656     xmlChar *rep = NULL;
2657     const xmlChar *last;
2658     xmlEntityPtr ent;
2659     int c,l;
2660 
2661     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2662 	return(NULL);
2663     last = str + len;
2664 
2665     if (((ctxt->depth > 40) &&
2666          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2667 	(ctxt->depth > 1024)) {
2668 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2669 	return(NULL);
2670     }
2671 
2672     /*
2673      * allocate a translation buffer.
2674      */
2675     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2676     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2677     if (buffer == NULL) goto mem_error;
2678 
2679     /*
2680      * OK loop until we reach one of the ending char or a size limit.
2681      * we are operating on already parsed values.
2682      */
2683     if (str < last)
2684 	c = CUR_SCHAR(str, l);
2685     else
2686         c = 0;
2687     while ((c != 0) && (c != end) && /* non input consuming loop */
2688            (c != end2) && (c != end3) &&
2689            (ctxt->instate != XML_PARSER_EOF)) {
2690 
2691 	if (c == 0) break;
2692         if ((c == '&') && (str[1] == '#')) {
2693 	    int val = xmlParseStringCharRef(ctxt, &str);
2694 	    if (val == 0)
2695                 goto int_error;
2696 	    COPY_BUF(0,buffer,nbchars,val);
2697 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2698 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2699 	    }
2700 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2701 	    if (xmlParserDebugEntities)
2702 		xmlGenericError(xmlGenericErrorContext,
2703 			"String decoding Entity Reference: %.30s\n",
2704 			str);
2705 	    ent = xmlParseStringEntityRef(ctxt, &str);
2706 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2707 	    if (ent != NULL)
2708 	        ctxt->nbentities += ent->checked / 2;
2709 	    if ((ent != NULL) &&
2710 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2711 		if (ent->content != NULL) {
2712 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2713 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2714 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2715 		    }
2716 		} else {
2717 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2718 			    "predefined entity has no content\n");
2719                     goto int_error;
2720 		}
2721 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2722 		ctxt->depth++;
2723 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2724 			                      0, 0, 0);
2725 		ctxt->depth--;
2726 		if (rep == NULL) {
2727                     ent->content[0] = 0;
2728                     goto int_error;
2729                 }
2730 
2731                 current = rep;
2732                 while (*current != 0) { /* non input consuming loop */
2733                     buffer[nbchars++] = *current++;
2734                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2735                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2736                             goto int_error;
2737                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2738                     }
2739                 }
2740                 xmlFree(rep);
2741                 rep = NULL;
2742 	    } else if (ent != NULL) {
2743 		int i = xmlStrlen(ent->name);
2744 		const xmlChar *cur = ent->name;
2745 
2746 		buffer[nbchars++] = '&';
2747 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2748 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2749 		}
2750 		for (;i > 0;i--)
2751 		    buffer[nbchars++] = *cur++;
2752 		buffer[nbchars++] = ';';
2753 	    }
2754 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2755 	    if (xmlParserDebugEntities)
2756 		xmlGenericError(xmlGenericErrorContext,
2757 			"String decoding PE Reference: %.30s\n", str);
2758 	    ent = xmlParseStringPEReference(ctxt, &str);
2759 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2760 	    if (ent != NULL)
2761 	        ctxt->nbentities += ent->checked / 2;
2762 	    if (ent != NULL) {
2763                 if (ent->content == NULL) {
2764 		    /*
2765 		     * Note: external parsed entities will not be loaded,
2766 		     * it is not required for a non-validating parser to
2767 		     * complete external PEReferences coming from the
2768 		     * internal subset
2769 		     */
2770 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2771 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2772 			(ctxt->validate != 0)) {
2773 			xmlLoadEntityContent(ctxt, ent);
2774 		    } else {
2775 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2776 		  "not validating will not read content for PE entity %s\n",
2777 		                      ent->name, NULL);
2778 		    }
2779 		}
2780 		ctxt->depth++;
2781 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2782 			                      0, 0, 0);
2783 		ctxt->depth--;
2784 		if (rep == NULL) {
2785                     if (ent->content != NULL)
2786                         ent->content[0] = 0;
2787                     goto int_error;
2788                 }
2789                 current = rep;
2790                 while (*current != 0) { /* non input consuming loop */
2791                     buffer[nbchars++] = *current++;
2792                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2793                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2794                             goto int_error;
2795                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2796                     }
2797                 }
2798                 xmlFree(rep);
2799                 rep = NULL;
2800 	    }
2801 	} else {
2802 	    COPY_BUF(l,buffer,nbchars,c);
2803 	    str += l;
2804 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2805 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2806 	    }
2807 	}
2808 	if (str < last)
2809 	    c = CUR_SCHAR(str, l);
2810 	else
2811 	    c = 0;
2812     }
2813     buffer[nbchars] = 0;
2814     return(buffer);
2815 
2816 mem_error:
2817     xmlErrMemory(ctxt, NULL);
2818 int_error:
2819     if (rep != NULL)
2820         xmlFree(rep);
2821     if (buffer != NULL)
2822         xmlFree(buffer);
2823     return(NULL);
2824 }
2825 
2826 /**
2827  * xmlStringDecodeEntities:
2828  * @ctxt:  the parser context
2829  * @str:  the input string
2830  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2831  * @end:  an end marker xmlChar, 0 if none
2832  * @end2:  an end marker xmlChar, 0 if none
2833  * @end3:  an end marker xmlChar, 0 if none
2834  *
2835  * Takes a entity string content and process to do the adequate substitutions.
2836  *
2837  * [67] Reference ::= EntityRef | CharRef
2838  *
2839  * [69] PEReference ::= '%' Name ';'
2840  *
2841  * Returns A newly allocated string with the substitution done. The caller
2842  *      must deallocate it !
2843  */
2844 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2845 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2846 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2847     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2848     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2849            end, end2, end3));
2850 }
2851 
2852 /************************************************************************
2853  *									*
2854  *		Commodity functions, cleanup needed ?			*
2855  *									*
2856  ************************************************************************/
2857 
2858 /**
2859  * areBlanks:
2860  * @ctxt:  an XML parser context
2861  * @str:  a xmlChar *
2862  * @len:  the size of @str
2863  * @blank_chars: we know the chars are blanks
2864  *
2865  * Is this a sequence of blank chars that one can ignore ?
2866  *
2867  * Returns 1 if ignorable 0 otherwise.
2868  */
2869 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2870 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871                      int blank_chars) {
2872     int i, ret;
2873     xmlNodePtr lastChild;
2874 
2875     /*
2876      * Don't spend time trying to differentiate them, the same callback is
2877      * used !
2878      */
2879     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880 	return(0);
2881 
2882     /*
2883      * Check for xml:space value.
2884      */
2885     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886         (*(ctxt->space) == -2))
2887 	return(0);
2888 
2889     /*
2890      * Check that the string is made of blanks
2891      */
2892     if (blank_chars == 0) {
2893 	for (i = 0;i < len;i++)
2894 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2895     }
2896 
2897     /*
2898      * Look if the element is mixed content in the DTD if available
2899      */
2900     if (ctxt->node == NULL) return(0);
2901     if (ctxt->myDoc != NULL) {
2902 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2903         if (ret == 0) return(1);
2904         if (ret == 1) return(0);
2905     }
2906 
2907     /*
2908      * Otherwise, heuristic :-\
2909      */
2910     if ((RAW != '<') && (RAW != 0xD)) return(0);
2911     if ((ctxt->node->children == NULL) &&
2912 	(RAW == '<') && (NXT(1) == '/')) return(0);
2913 
2914     lastChild = xmlGetLastChild(ctxt->node);
2915     if (lastChild == NULL) {
2916         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2917             (ctxt->node->content != NULL)) return(0);
2918     } else if (xmlNodeIsText(lastChild))
2919         return(0);
2920     else if ((ctxt->node->children != NULL) &&
2921              (xmlNodeIsText(ctxt->node->children)))
2922         return(0);
2923     return(1);
2924 }
2925 
2926 /************************************************************************
2927  *									*
2928  *		Extra stuff for namespace support			*
2929  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2930  *									*
2931  ************************************************************************/
2932 
2933 /**
2934  * xmlSplitQName:
2935  * @ctxt:  an XML parser context
2936  * @name:  an XML parser context
2937  * @prefix:  a xmlChar **
2938  *
2939  * parse an UTF8 encoded XML qualified name string
2940  *
2941  * [NS 5] QName ::= (Prefix ':')? LocalPart
2942  *
2943  * [NS 6] Prefix ::= NCName
2944  *
2945  * [NS 7] LocalPart ::= NCName
2946  *
2947  * Returns the local part, and prefix is updated
2948  *   to get the Prefix if any.
2949  */
2950 
2951 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2952 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2953     xmlChar buf[XML_MAX_NAMELEN + 5];
2954     xmlChar *buffer = NULL;
2955     int len = 0;
2956     int max = XML_MAX_NAMELEN;
2957     xmlChar *ret = NULL;
2958     const xmlChar *cur = name;
2959     int c;
2960 
2961     if (prefix == NULL) return(NULL);
2962     *prefix = NULL;
2963 
2964     if (cur == NULL) return(NULL);
2965 
2966 #ifndef XML_XML_NAMESPACE
2967     /* xml: prefix is not really a namespace */
2968     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2969         (cur[2] == 'l') && (cur[3] == ':'))
2970 	return(xmlStrdup(name));
2971 #endif
2972 
2973     /* nasty but well=formed */
2974     if (cur[0] == ':')
2975 	return(xmlStrdup(name));
2976 
2977     c = *cur++;
2978     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2979 	buf[len++] = c;
2980 	c = *cur++;
2981     }
2982     if (len >= max) {
2983 	/*
2984 	 * Okay someone managed to make a huge name, so he's ready to pay
2985 	 * for the processing speed.
2986 	 */
2987 	max = len * 2;
2988 
2989 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2990 	if (buffer == NULL) {
2991 	    xmlErrMemory(ctxt, NULL);
2992 	    return(NULL);
2993 	}
2994 	memcpy(buffer, buf, len);
2995 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2996 	    if (len + 10 > max) {
2997 	        xmlChar *tmp;
2998 
2999 		max *= 2;
3000 		tmp = (xmlChar *) xmlRealloc(buffer,
3001 						max * sizeof(xmlChar));
3002 		if (tmp == NULL) {
3003 		    xmlFree(buffer);
3004 		    xmlErrMemory(ctxt, NULL);
3005 		    return(NULL);
3006 		}
3007 		buffer = tmp;
3008 	    }
3009 	    buffer[len++] = c;
3010 	    c = *cur++;
3011 	}
3012 	buffer[len] = 0;
3013     }
3014 
3015     if ((c == ':') && (*cur == 0)) {
3016         if (buffer != NULL)
3017 	    xmlFree(buffer);
3018 	*prefix = NULL;
3019 	return(xmlStrdup(name));
3020     }
3021 
3022     if (buffer == NULL)
3023 	ret = xmlStrndup(buf, len);
3024     else {
3025 	ret = buffer;
3026 	buffer = NULL;
3027 	max = XML_MAX_NAMELEN;
3028     }
3029 
3030 
3031     if (c == ':') {
3032 	c = *cur;
3033         *prefix = ret;
3034 	if (c == 0) {
3035 	    return(xmlStrndup(BAD_CAST "", 0));
3036 	}
3037 	len = 0;
3038 
3039 	/*
3040 	 * Check that the first character is proper to start
3041 	 * a new name
3042 	 */
3043 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3044 	      ((c >= 0x41) && (c <= 0x5A)) ||
3045 	      (c == '_') || (c == ':'))) {
3046 	    int l;
3047 	    int first = CUR_SCHAR(cur, l);
3048 
3049 	    if (!IS_LETTER(first) && (first != '_')) {
3050 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3051 			    "Name %s is not XML Namespace compliant\n",
3052 				  name);
3053 	    }
3054 	}
3055 	cur++;
3056 
3057 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3058 	    buf[len++] = c;
3059 	    c = *cur++;
3060 	}
3061 	if (len >= max) {
3062 	    /*
3063 	     * Okay someone managed to make a huge name, so he's ready to pay
3064 	     * for the processing speed.
3065 	     */
3066 	    max = len * 2;
3067 
3068 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3069 	    if (buffer == NULL) {
3070 	        xmlErrMemory(ctxt, NULL);
3071 		return(NULL);
3072 	    }
3073 	    memcpy(buffer, buf, len);
3074 	    while (c != 0) { /* tested bigname2.xml */
3075 		if (len + 10 > max) {
3076 		    xmlChar *tmp;
3077 
3078 		    max *= 2;
3079 		    tmp = (xmlChar *) xmlRealloc(buffer,
3080 						    max * sizeof(xmlChar));
3081 		    if (tmp == NULL) {
3082 			xmlErrMemory(ctxt, NULL);
3083 			xmlFree(buffer);
3084 			return(NULL);
3085 		    }
3086 		    buffer = tmp;
3087 		}
3088 		buffer[len++] = c;
3089 		c = *cur++;
3090 	    }
3091 	    buffer[len] = 0;
3092 	}
3093 
3094 	if (buffer == NULL)
3095 	    ret = xmlStrndup(buf, len);
3096 	else {
3097 	    ret = buffer;
3098 	}
3099     }
3100 
3101     return(ret);
3102 }
3103 
3104 /************************************************************************
3105  *									*
3106  *			The parser itself				*
3107  *	Relates to http://www.w3.org/TR/REC-xml				*
3108  *									*
3109  ************************************************************************/
3110 
3111 /************************************************************************
3112  *									*
3113  *	Routines to parse Name, NCName and NmToken			*
3114  *									*
3115  ************************************************************************/
3116 #ifdef DEBUG
3117 static unsigned long nbParseName = 0;
3118 static unsigned long nbParseNmToken = 0;
3119 static unsigned long nbParseNCName = 0;
3120 static unsigned long nbParseNCNameComplex = 0;
3121 static unsigned long nbParseNameComplex = 0;
3122 static unsigned long nbParseStringName = 0;
3123 #endif
3124 
3125 /*
3126  * The two following functions are related to the change of accepted
3127  * characters for Name and NmToken in the Revision 5 of XML-1.0
3128  * They correspond to the modified production [4] and the new production [4a]
3129  * changes in that revision. Also note that the macros used for the
3130  * productions Letter, Digit, CombiningChar and Extender are not needed
3131  * anymore.
3132  * We still keep compatibility to pre-revision5 parsing semantic if the
3133  * new XML_PARSE_OLD10 option is given to the parser.
3134  */
3135 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3136 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138         /*
3139 	 * Use the new checks of production [4] [4a] amd [5] of the
3140 	 * Update 5 of XML-1.0
3141 	 */
3142 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143 	    (((c >= 'a') && (c <= 'z')) ||
3144 	     ((c >= 'A') && (c <= 'Z')) ||
3145 	     (c == '_') || (c == ':') ||
3146 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3147 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3148 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3149 	     ((c >= 0x370) && (c <= 0x37D)) ||
3150 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3152 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3153 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3158 	    return(1);
3159     } else {
3160         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161 	    return(1);
3162     }
3163     return(0);
3164 }
3165 
3166 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3167 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169         /*
3170 	 * Use the new checks of production [4] [4a] amd [5] of the
3171 	 * Update 5 of XML-1.0
3172 	 */
3173 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174 	    (((c >= 'a') && (c <= 'z')) ||
3175 	     ((c >= 'A') && (c <= 'Z')) ||
3176 	     ((c >= '0') && (c <= '9')) || /* !start */
3177 	     (c == '_') || (c == ':') ||
3178 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3180 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3181 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3182 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183 	     ((c >= 0x370) && (c <= 0x37D)) ||
3184 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3186 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3188 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3193 	     return(1);
3194     } else {
3195         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196             (c == '.') || (c == '-') ||
3197 	    (c == '_') || (c == ':') ||
3198 	    (IS_COMBINING(c)) ||
3199 	    (IS_EXTENDER(c)))
3200 	    return(1);
3201     }
3202     return(0);
3203 }
3204 
3205 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206                                           int *len, int *alloc, int normalize);
3207 
3208 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3209 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210     int len = 0, l;
3211     int c;
3212     int count = 0;
3213     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214                     XML_MAX_TEXT_LENGTH :
3215                     XML_MAX_NAME_LENGTH;
3216 
3217 #ifdef DEBUG
3218     nbParseNameComplex++;
3219 #endif
3220 
3221     /*
3222      * Handler for more complex cases
3223      */
3224     GROW;
3225     if (ctxt->instate == XML_PARSER_EOF)
3226         return(NULL);
3227     c = CUR_CHAR(l);
3228     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229         /*
3230 	 * Use the new checks of production [4] [4a] amd [5] of the
3231 	 * Update 5 of XML-1.0
3232 	 */
3233 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234 	    (!(((c >= 'a') && (c <= 'z')) ||
3235 	       ((c >= 'A') && (c <= 'Z')) ||
3236 	       (c == '_') || (c == ':') ||
3237 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3238 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3239 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3240 	       ((c >= 0x370) && (c <= 0x37D)) ||
3241 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3243 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3244 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249 	    return(NULL);
3250 	}
3251 	len += l;
3252 	NEXTL(l);
3253 	c = CUR_CHAR(l);
3254 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255 	       (((c >= 'a') && (c <= 'z')) ||
3256 	        ((c >= 'A') && (c <= 'Z')) ||
3257 	        ((c >= '0') && (c <= '9')) || /* !start */
3258 	        (c == '_') || (c == ':') ||
3259 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3261 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3262 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3263 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264 	        ((c >= 0x370) && (c <= 0x37D)) ||
3265 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3267 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3269 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3274 		)) {
3275 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3276 		count = 0;
3277 		GROW;
3278                 if (ctxt->instate == XML_PARSER_EOF)
3279                     return(NULL);
3280 	    }
3281             if (len <= INT_MAX - l)
3282 	        len += l;
3283 	    NEXTL(l);
3284 	    c = CUR_CHAR(l);
3285 	}
3286     } else {
3287 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288 	    (!IS_LETTER(c) && (c != '_') &&
3289 	     (c != ':'))) {
3290 	    return(NULL);
3291 	}
3292 	len += l;
3293 	NEXTL(l);
3294 	c = CUR_CHAR(l);
3295 
3296 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298 		(c == '.') || (c == '-') ||
3299 		(c == '_') || (c == ':') ||
3300 		(IS_COMBINING(c)) ||
3301 		(IS_EXTENDER(c)))) {
3302 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3303 		count = 0;
3304 		GROW;
3305                 if (ctxt->instate == XML_PARSER_EOF)
3306                     return(NULL);
3307 	    }
3308             if (len <= INT_MAX - l)
3309 	        len += l;
3310 	    NEXTL(l);
3311 	    c = CUR_CHAR(l);
3312 	}
3313     }
3314     if (len > maxLength) {
3315         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316         return(NULL);
3317     }
3318     if (ctxt->input->cur - ctxt->input->base < len) {
3319         /*
3320          * There were a couple of bugs where PERefs lead to to a change
3321          * of the buffer. Check the buffer size to avoid passing an invalid
3322          * pointer to xmlDictLookup.
3323          */
3324         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325                     "unexpected change of input buffer");
3326         return (NULL);
3327     }
3328     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331 }
3332 
3333 /**
3334  * xmlParseName:
3335  * @ctxt:  an XML parser context
3336  *
3337  * parse an XML name.
3338  *
3339  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3340  *                  CombiningChar | Extender
3341  *
3342  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3343  *
3344  * [6] Names ::= Name (#x20 Name)*
3345  *
3346  * Returns the Name parsed or NULL
3347  */
3348 
3349 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3350 xmlParseName(xmlParserCtxtPtr ctxt) {
3351     const xmlChar *in;
3352     const xmlChar *ret;
3353     size_t count = 0;
3354     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3355                        XML_MAX_TEXT_LENGTH :
3356                        XML_MAX_NAME_LENGTH;
3357 
3358     GROW;
3359 
3360 #ifdef DEBUG
3361     nbParseName++;
3362 #endif
3363 
3364     /*
3365      * Accelerator for simple ASCII names
3366      */
3367     in = ctxt->input->cur;
3368     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3369 	((*in >= 0x41) && (*in <= 0x5A)) ||
3370 	(*in == '_') || (*in == ':')) {
3371 	in++;
3372 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3374 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3375 	       (*in == '_') || (*in == '-') ||
3376 	       (*in == ':') || (*in == '.'))
3377 	    in++;
3378 	if ((*in > 0) && (*in < 0x80)) {
3379 	    count = in - ctxt->input->cur;
3380             if (count > maxLength) {
3381                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3382                 return(NULL);
3383             }
3384 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3385 	    ctxt->input->cur = in;
3386 	    ctxt->input->col += count;
3387 	    if (ret == NULL)
3388 	        xmlErrMemory(ctxt, NULL);
3389 	    return(ret);
3390 	}
3391     }
3392     /* accelerator for special cases */
3393     return(xmlParseNameComplex(ctxt));
3394 }
3395 
3396 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3397 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3398     int len = 0, l;
3399     int c;
3400     int count = 0;
3401     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3402                     XML_MAX_TEXT_LENGTH :
3403                     XML_MAX_NAME_LENGTH;
3404     size_t startPosition = 0;
3405 
3406 #ifdef DEBUG
3407     nbParseNCNameComplex++;
3408 #endif
3409 
3410     /*
3411      * Handler for more complex cases
3412      */
3413     GROW;
3414     startPosition = CUR_PTR - BASE_PTR;
3415     c = CUR_CHAR(l);
3416     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3417 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3418 	return(NULL);
3419     }
3420 
3421     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3422 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3423 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3424 	    count = 0;
3425 	    GROW;
3426             if (ctxt->instate == XML_PARSER_EOF)
3427                 return(NULL);
3428 	}
3429         if (len <= INT_MAX - l)
3430 	    len += l;
3431 	NEXTL(l);
3432 	c = CUR_CHAR(l);
3433 	if (c == 0) {
3434 	    count = 0;
3435 	    /*
3436 	     * when shrinking to extend the buffer we really need to preserve
3437 	     * the part of the name we already parsed. Hence rolling back
3438 	     * by current length.
3439 	     */
3440 	    ctxt->input->cur -= l;
3441 	    GROW;
3442             if (ctxt->instate == XML_PARSER_EOF)
3443                 return(NULL);
3444 	    ctxt->input->cur += l;
3445 	    c = CUR_CHAR(l);
3446 	}
3447     }
3448     if (len > maxLength) {
3449         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3450         return(NULL);
3451     }
3452     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3453 }
3454 
3455 /**
3456  * xmlParseNCName:
3457  * @ctxt:  an XML parser context
3458  * @len:  length of the string parsed
3459  *
3460  * parse an XML name.
3461  *
3462  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3463  *                      CombiningChar | Extender
3464  *
3465  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3466  *
3467  * Returns the Name parsed or NULL
3468  */
3469 
3470 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3471 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3472     const xmlChar *in, *e;
3473     const xmlChar *ret;
3474     size_t count = 0;
3475     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3476                        XML_MAX_TEXT_LENGTH :
3477                        XML_MAX_NAME_LENGTH;
3478 
3479 #ifdef DEBUG
3480     nbParseNCName++;
3481 #endif
3482 
3483     /*
3484      * Accelerator for simple ASCII names
3485      */
3486     in = ctxt->input->cur;
3487     e = ctxt->input->end;
3488     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3489 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3490 	 (*in == '_')) && (in < e)) {
3491 	in++;
3492 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3493 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3494 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3495 	        (*in == '_') || (*in == '-') ||
3496 	        (*in == '.')) && (in < e))
3497 	    in++;
3498 	if (in >= e)
3499 	    goto complex;
3500 	if ((*in > 0) && (*in < 0x80)) {
3501 	    count = in - ctxt->input->cur;
3502             if (count > maxLength) {
3503                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3504                 return(NULL);
3505             }
3506 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3507 	    ctxt->input->cur = in;
3508 	    ctxt->input->col += count;
3509 	    if (ret == NULL) {
3510 	        xmlErrMemory(ctxt, NULL);
3511 	    }
3512 	    return(ret);
3513 	}
3514     }
3515 complex:
3516     return(xmlParseNCNameComplex(ctxt));
3517 }
3518 
3519 /**
3520  * xmlParseNameAndCompare:
3521  * @ctxt:  an XML parser context
3522  *
3523  * parse an XML name and compares for match
3524  * (specialized for endtag parsing)
3525  *
3526  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3527  * and the name for mismatch
3528  */
3529 
3530 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3531 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3532     register const xmlChar *cmp = other;
3533     register const xmlChar *in;
3534     const xmlChar *ret;
3535 
3536     GROW;
3537     if (ctxt->instate == XML_PARSER_EOF)
3538         return(NULL);
3539 
3540     in = ctxt->input->cur;
3541     while (*in != 0 && *in == *cmp) {
3542 	++in;
3543 	++cmp;
3544     }
3545     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3546 	/* success */
3547 	ctxt->input->col += in - ctxt->input->cur;
3548 	ctxt->input->cur = in;
3549 	return (const xmlChar*) 1;
3550     }
3551     /* failure (or end of input buffer), check with full function */
3552     ret = xmlParseName (ctxt);
3553     /* strings coming from the dictionary direct compare possible */
3554     if (ret == other) {
3555 	return (const xmlChar*) 1;
3556     }
3557     return ret;
3558 }
3559 
3560 /**
3561  * xmlParseStringName:
3562  * @ctxt:  an XML parser context
3563  * @str:  a pointer to the string pointer (IN/OUT)
3564  *
3565  * parse an XML name.
3566  *
3567  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3568  *                  CombiningChar | Extender
3569  *
3570  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3571  *
3572  * [6] Names ::= Name (#x20 Name)*
3573  *
3574  * Returns the Name parsed or NULL. The @str pointer
3575  * is updated to the current location in the string.
3576  */
3577 
3578 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3579 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3580     xmlChar buf[XML_MAX_NAMELEN + 5];
3581     const xmlChar *cur = *str;
3582     int len = 0, l;
3583     int c;
3584     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3585                     XML_MAX_TEXT_LENGTH :
3586                     XML_MAX_NAME_LENGTH;
3587 
3588 #ifdef DEBUG
3589     nbParseStringName++;
3590 #endif
3591 
3592     c = CUR_SCHAR(cur, l);
3593     if (!xmlIsNameStartChar(ctxt, c)) {
3594 	return(NULL);
3595     }
3596 
3597     COPY_BUF(l,buf,len,c);
3598     cur += l;
3599     c = CUR_SCHAR(cur, l);
3600     while (xmlIsNameChar(ctxt, c)) {
3601 	COPY_BUF(l,buf,len,c);
3602 	cur += l;
3603 	c = CUR_SCHAR(cur, l);
3604 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3605 	    /*
3606 	     * Okay someone managed to make a huge name, so he's ready to pay
3607 	     * for the processing speed.
3608 	     */
3609 	    xmlChar *buffer;
3610 	    int max = len * 2;
3611 
3612 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3613 	    if (buffer == NULL) {
3614 	        xmlErrMemory(ctxt, NULL);
3615 		return(NULL);
3616 	    }
3617 	    memcpy(buffer, buf, len);
3618 	    while (xmlIsNameChar(ctxt, c)) {
3619 		if (len + 10 > max) {
3620 		    xmlChar *tmp;
3621 
3622 		    max *= 2;
3623 		    tmp = (xmlChar *) xmlRealloc(buffer,
3624 			                            max * sizeof(xmlChar));
3625 		    if (tmp == NULL) {
3626 			xmlErrMemory(ctxt, NULL);
3627 			xmlFree(buffer);
3628 			return(NULL);
3629 		    }
3630 		    buffer = tmp;
3631 		}
3632 		COPY_BUF(l,buffer,len,c);
3633 		cur += l;
3634 		c = CUR_SCHAR(cur, l);
3635                 if (len > maxLength) {
3636                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3637                     xmlFree(buffer);
3638                     return(NULL);
3639                 }
3640 	    }
3641 	    buffer[len] = 0;
3642 	    *str = cur;
3643 	    return(buffer);
3644 	}
3645     }
3646     if (len > maxLength) {
3647         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3648         return(NULL);
3649     }
3650     *str = cur;
3651     return(xmlStrndup(buf, len));
3652 }
3653 
3654 /**
3655  * xmlParseNmtoken:
3656  * @ctxt:  an XML parser context
3657  *
3658  * parse an XML Nmtoken.
3659  *
3660  * [7] Nmtoken ::= (NameChar)+
3661  *
3662  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3663  *
3664  * Returns the Nmtoken parsed or NULL
3665  */
3666 
3667 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3668 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3669     xmlChar buf[XML_MAX_NAMELEN + 5];
3670     int len = 0, l;
3671     int c;
3672     int count = 0;
3673     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3674                     XML_MAX_TEXT_LENGTH :
3675                     XML_MAX_NAME_LENGTH;
3676 
3677 #ifdef DEBUG
3678     nbParseNmToken++;
3679 #endif
3680 
3681     GROW;
3682     if (ctxt->instate == XML_PARSER_EOF)
3683         return(NULL);
3684     c = CUR_CHAR(l);
3685 
3686     while (xmlIsNameChar(ctxt, c)) {
3687 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3688 	    count = 0;
3689 	    GROW;
3690 	}
3691 	COPY_BUF(l,buf,len,c);
3692 	NEXTL(l);
3693 	c = CUR_CHAR(l);
3694 	if (c == 0) {
3695 	    count = 0;
3696 	    GROW;
3697 	    if (ctxt->instate == XML_PARSER_EOF)
3698 		return(NULL);
3699             c = CUR_CHAR(l);
3700 	}
3701 	if (len >= XML_MAX_NAMELEN) {
3702 	    /*
3703 	     * Okay someone managed to make a huge token, so he's ready to pay
3704 	     * for the processing speed.
3705 	     */
3706 	    xmlChar *buffer;
3707 	    int max = len * 2;
3708 
3709 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3710 	    if (buffer == NULL) {
3711 	        xmlErrMemory(ctxt, NULL);
3712 		return(NULL);
3713 	    }
3714 	    memcpy(buffer, buf, len);
3715 	    while (xmlIsNameChar(ctxt, c)) {
3716 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3717 		    count = 0;
3718 		    GROW;
3719                     if (ctxt->instate == XML_PARSER_EOF) {
3720                         xmlFree(buffer);
3721                         return(NULL);
3722                     }
3723 		}
3724 		if (len + 10 > max) {
3725 		    xmlChar *tmp;
3726 
3727 		    max *= 2;
3728 		    tmp = (xmlChar *) xmlRealloc(buffer,
3729 			                            max * sizeof(xmlChar));
3730 		    if (tmp == NULL) {
3731 			xmlErrMemory(ctxt, NULL);
3732 			xmlFree(buffer);
3733 			return(NULL);
3734 		    }
3735 		    buffer = tmp;
3736 		}
3737 		COPY_BUF(l,buffer,len,c);
3738 		NEXTL(l);
3739 		c = CUR_CHAR(l);
3740                 if (len > maxLength) {
3741                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3742                     xmlFree(buffer);
3743                     return(NULL);
3744                 }
3745 	    }
3746 	    buffer[len] = 0;
3747 	    return(buffer);
3748 	}
3749     }
3750     if (len == 0)
3751         return(NULL);
3752     if (len > maxLength) {
3753         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3754         return(NULL);
3755     }
3756     return(xmlStrndup(buf, len));
3757 }
3758 
3759 /**
3760  * xmlParseEntityValue:
3761  * @ctxt:  an XML parser context
3762  * @orig:  if non-NULL store a copy of the original entity value
3763  *
3764  * parse a value for ENTITY declarations
3765  *
3766  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3767  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3768  *
3769  * Returns the EntityValue parsed with reference substituted or NULL
3770  */
3771 
3772 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3773 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3774     xmlChar *buf = NULL;
3775     int len = 0;
3776     int size = XML_PARSER_BUFFER_SIZE;
3777     int c, l;
3778     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3779                     XML_MAX_HUGE_LENGTH :
3780                     XML_MAX_TEXT_LENGTH;
3781     xmlChar stop;
3782     xmlChar *ret = NULL;
3783     const xmlChar *cur = NULL;
3784     xmlParserInputPtr input;
3785 
3786     if (RAW == '"') stop = '"';
3787     else if (RAW == '\'') stop = '\'';
3788     else {
3789 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3790 	return(NULL);
3791     }
3792     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3793     if (buf == NULL) {
3794 	xmlErrMemory(ctxt, NULL);
3795 	return(NULL);
3796     }
3797 
3798     /*
3799      * The content of the entity definition is copied in a buffer.
3800      */
3801 
3802     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3803     input = ctxt->input;
3804     GROW;
3805     if (ctxt->instate == XML_PARSER_EOF)
3806         goto error;
3807     NEXT;
3808     c = CUR_CHAR(l);
3809     /*
3810      * NOTE: 4.4.5 Included in Literal
3811      * When a parameter entity reference appears in a literal entity
3812      * value, ... a single or double quote character in the replacement
3813      * text is always treated as a normal data character and will not
3814      * terminate the literal.
3815      * In practice it means we stop the loop only when back at parsing
3816      * the initial entity and the quote is found
3817      */
3818     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3819 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3820 	if (len + 5 >= size) {
3821 	    xmlChar *tmp;
3822 
3823 	    size *= 2;
3824 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3825 	    if (tmp == NULL) {
3826 		xmlErrMemory(ctxt, NULL);
3827                 goto error;
3828 	    }
3829 	    buf = tmp;
3830 	}
3831 	COPY_BUF(l,buf,len,c);
3832 	NEXTL(l);
3833 
3834 	GROW;
3835 	c = CUR_CHAR(l);
3836 	if (c == 0) {
3837 	    GROW;
3838 	    c = CUR_CHAR(l);
3839 	}
3840 
3841         if (len > maxLength) {
3842             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3843                            "entity value too long\n");
3844             goto error;
3845         }
3846     }
3847     buf[len] = 0;
3848     if (ctxt->instate == XML_PARSER_EOF)
3849         goto error;
3850     if (c != stop) {
3851         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3852         goto error;
3853     }
3854     NEXT;
3855 
3856     /*
3857      * Raise problem w.r.t. '&' and '%' being used in non-entities
3858      * reference constructs. Note Charref will be handled in
3859      * xmlStringDecodeEntities()
3860      */
3861     cur = buf;
3862     while (*cur != 0) { /* non input consuming */
3863 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3864 	    xmlChar *name;
3865 	    xmlChar tmp = *cur;
3866             int nameOk = 0;
3867 
3868 	    cur++;
3869 	    name = xmlParseStringName(ctxt, &cur);
3870             if (name != NULL) {
3871                 nameOk = 1;
3872                 xmlFree(name);
3873             }
3874             if ((nameOk == 0) || (*cur != ';')) {
3875 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3876 	    "EntityValue: '%c' forbidden except for entities references\n",
3877 	                          tmp);
3878                 goto error;
3879 	    }
3880 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3881 		(ctxt->inputNr == 1)) {
3882 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3883                 goto error;
3884 	    }
3885 	    if (*cur == 0)
3886 	        break;
3887 	}
3888 	cur++;
3889     }
3890 
3891     /*
3892      * Then PEReference entities are substituted.
3893      *
3894      * NOTE: 4.4.7 Bypassed
3895      * When a general entity reference appears in the EntityValue in
3896      * an entity declaration, it is bypassed and left as is.
3897      * so XML_SUBSTITUTE_REF is not set here.
3898      */
3899     ++ctxt->depth;
3900     ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3901                                   0, 0, 0);
3902     --ctxt->depth;
3903     if (orig != NULL) {
3904         *orig = buf;
3905         buf = NULL;
3906     }
3907 
3908 error:
3909     if (buf != NULL)
3910         xmlFree(buf);
3911     return(ret);
3912 }
3913 
3914 /**
3915  * xmlParseAttValueComplex:
3916  * @ctxt:  an XML parser context
3917  * @len:   the resulting attribute len
3918  * @normalize:  whether to apply the inner normalization
3919  *
3920  * parse a value for an attribute, this is the fallback function
3921  * of xmlParseAttValue() when the attribute parsing requires handling
3922  * of non-ASCII characters, or normalization compaction.
3923  *
3924  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3925  */
3926 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3927 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3928     xmlChar limit = 0;
3929     xmlChar *buf = NULL;
3930     xmlChar *rep = NULL;
3931     size_t len = 0;
3932     size_t buf_size = 0;
3933     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3934                        XML_MAX_HUGE_LENGTH :
3935                        XML_MAX_TEXT_LENGTH;
3936     int c, l, in_space = 0;
3937     xmlChar *current = NULL;
3938     xmlEntityPtr ent;
3939 
3940     if (NXT(0) == '"') {
3941 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3942 	limit = '"';
3943         NEXT;
3944     } else if (NXT(0) == '\'') {
3945 	limit = '\'';
3946 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3947         NEXT;
3948     } else {
3949 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3950 	return(NULL);
3951     }
3952 
3953     /*
3954      * allocate a translation buffer.
3955      */
3956     buf_size = XML_PARSER_BUFFER_SIZE;
3957     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3958     if (buf == NULL) goto mem_error;
3959 
3960     /*
3961      * OK loop until we reach one of the ending char or a size limit.
3962      */
3963     c = CUR_CHAR(l);
3964     while (((NXT(0) != limit) && /* checked */
3965             (IS_CHAR(c)) && (c != '<')) &&
3966             (ctxt->instate != XML_PARSER_EOF)) {
3967 	if (c == '&') {
3968 	    in_space = 0;
3969 	    if (NXT(1) == '#') {
3970 		int val = xmlParseCharRef(ctxt);
3971 
3972 		if (val == '&') {
3973 		    if (ctxt->replaceEntities) {
3974 			if (len + 10 > buf_size) {
3975 			    growBuffer(buf, 10);
3976 			}
3977 			buf[len++] = '&';
3978 		    } else {
3979 			/*
3980 			 * The reparsing will be done in xmlStringGetNodeList()
3981 			 * called by the attribute() function in SAX.c
3982 			 */
3983 			if (len + 10 > buf_size) {
3984 			    growBuffer(buf, 10);
3985 			}
3986 			buf[len++] = '&';
3987 			buf[len++] = '#';
3988 			buf[len++] = '3';
3989 			buf[len++] = '8';
3990 			buf[len++] = ';';
3991 		    }
3992 		} else if (val != 0) {
3993 		    if (len + 10 > buf_size) {
3994 			growBuffer(buf, 10);
3995 		    }
3996 		    len += xmlCopyChar(0, &buf[len], val);
3997 		}
3998 	    } else {
3999 		ent = xmlParseEntityRef(ctxt);
4000 		ctxt->nbentities++;
4001 		if (ent != NULL)
4002 		    ctxt->nbentities += ent->owner;
4003 		if ((ent != NULL) &&
4004 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4005 		    if (len + 10 > buf_size) {
4006 			growBuffer(buf, 10);
4007 		    }
4008 		    if ((ctxt->replaceEntities == 0) &&
4009 		        (ent->content[0] == '&')) {
4010 			buf[len++] = '&';
4011 			buf[len++] = '#';
4012 			buf[len++] = '3';
4013 			buf[len++] = '8';
4014 			buf[len++] = ';';
4015 		    } else {
4016 			buf[len++] = ent->content[0];
4017 		    }
4018 		} else if ((ent != NULL) &&
4019 		           (ctxt->replaceEntities != 0)) {
4020 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4021 			++ctxt->depth;
4022 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4023 						      XML_SUBSTITUTE_REF,
4024 						      0, 0, 0);
4025 			--ctxt->depth;
4026 			if (rep != NULL) {
4027 			    current = rep;
4028 			    while (*current != 0) { /* non input consuming */
4029                                 if ((*current == 0xD) || (*current == 0xA) ||
4030                                     (*current == 0x9)) {
4031                                     buf[len++] = 0x20;
4032                                     current++;
4033                                 } else
4034                                     buf[len++] = *current++;
4035 				if (len + 10 > buf_size) {
4036 				    growBuffer(buf, 10);
4037 				}
4038 			    }
4039 			    xmlFree(rep);
4040 			    rep = NULL;
4041 			}
4042 		    } else {
4043 			if (len + 10 > buf_size) {
4044 			    growBuffer(buf, 10);
4045 			}
4046 			if (ent->content != NULL)
4047 			    buf[len++] = ent->content[0];
4048 		    }
4049 		} else if (ent != NULL) {
4050 		    int i = xmlStrlen(ent->name);
4051 		    const xmlChar *cur = ent->name;
4052 
4053 		    /*
4054 		     * This may look absurd but is needed to detect
4055 		     * entities problems
4056 		     */
4057 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4058 			(ent->content != NULL) && (ent->checked == 0)) {
4059 			unsigned long oldnbent = ctxt->nbentities, diff;
4060 
4061 			++ctxt->depth;
4062 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4063 						  XML_SUBSTITUTE_REF, 0, 0, 0);
4064 			--ctxt->depth;
4065 
4066                         diff = ctxt->nbentities - oldnbent + 1;
4067                         if (diff > INT_MAX / 2)
4068                             diff = INT_MAX / 2;
4069                         ent->checked = diff * 2;
4070 			if (rep != NULL) {
4071 			    if (xmlStrchr(rep, '<'))
4072 			        ent->checked |= 1;
4073 			    xmlFree(rep);
4074 			    rep = NULL;
4075 			} else {
4076                             ent->content[0] = 0;
4077                         }
4078 		    }
4079 
4080 		    /*
4081 		     * Just output the reference
4082 		     */
4083 		    buf[len++] = '&';
4084 		    while (len + i + 10 > buf_size) {
4085 			growBuffer(buf, i + 10);
4086 		    }
4087 		    for (;i > 0;i--)
4088 			buf[len++] = *cur++;
4089 		    buf[len++] = ';';
4090 		}
4091 	    }
4092 	} else {
4093 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4094 	        if ((len != 0) || (!normalize)) {
4095 		    if ((!normalize) || (!in_space)) {
4096 			COPY_BUF(l,buf,len,0x20);
4097 			while (len + 10 > buf_size) {
4098 			    growBuffer(buf, 10);
4099 			}
4100 		    }
4101 		    in_space = 1;
4102 		}
4103 	    } else {
4104 	        in_space = 0;
4105 		COPY_BUF(l,buf,len,c);
4106 		if (len + 10 > buf_size) {
4107 		    growBuffer(buf, 10);
4108 		}
4109 	    }
4110 	    NEXTL(l);
4111 	}
4112 	GROW;
4113 	c = CUR_CHAR(l);
4114         if (len > maxLength) {
4115             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4116                            "AttValue length too long\n");
4117             goto mem_error;
4118         }
4119     }
4120     if (ctxt->instate == XML_PARSER_EOF)
4121         goto error;
4122 
4123     if ((in_space) && (normalize)) {
4124         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4125     }
4126     buf[len] = 0;
4127     if (RAW == '<') {
4128 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4129     } else if (RAW != limit) {
4130 	if ((c != 0) && (!IS_CHAR(c))) {
4131 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4132 			   "invalid character in attribute value\n");
4133 	} else {
4134 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4135 			   "AttValue: ' expected\n");
4136         }
4137     } else
4138 	NEXT;
4139 
4140     if (attlen != NULL) *attlen = (int) len;
4141     return(buf);
4142 
4143 mem_error:
4144     xmlErrMemory(ctxt, NULL);
4145 error:
4146     if (buf != NULL)
4147         xmlFree(buf);
4148     if (rep != NULL)
4149         xmlFree(rep);
4150     return(NULL);
4151 }
4152 
4153 /**
4154  * xmlParseAttValue:
4155  * @ctxt:  an XML parser context
4156  *
4157  * parse a value for an attribute
4158  * Note: the parser won't do substitution of entities here, this
4159  * will be handled later in xmlStringGetNodeList
4160  *
4161  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4162  *                   "'" ([^<&'] | Reference)* "'"
4163  *
4164  * 3.3.3 Attribute-Value Normalization:
4165  * Before the value of an attribute is passed to the application or
4166  * checked for validity, the XML processor must normalize it as follows:
4167  * - a character reference is processed by appending the referenced
4168  *   character to the attribute value
4169  * - an entity reference is processed by recursively processing the
4170  *   replacement text of the entity
4171  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4172  *   appending #x20 to the normalized value, except that only a single
4173  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4174  *   parsed entity or the literal entity value of an internal parsed entity
4175  * - other characters are processed by appending them to the normalized value
4176  * If the declared value is not CDATA, then the XML processor must further
4177  * process the normalized attribute value by discarding any leading and
4178  * trailing space (#x20) characters, and by replacing sequences of space
4179  * (#x20) characters by a single space (#x20) character.
4180  * All attributes for which no declaration has been read should be treated
4181  * by a non-validating parser as if declared CDATA.
4182  *
4183  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4184  */
4185 
4186 
4187 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4188 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4189     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4190     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4191 }
4192 
4193 /**
4194  * xmlParseSystemLiteral:
4195  * @ctxt:  an XML parser context
4196  *
4197  * parse an XML Literal
4198  *
4199  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4200  *
4201  * Returns the SystemLiteral parsed or NULL
4202  */
4203 
4204 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4205 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4206     xmlChar *buf = NULL;
4207     int len = 0;
4208     int size = XML_PARSER_BUFFER_SIZE;
4209     int cur, l;
4210     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4211                     XML_MAX_TEXT_LENGTH :
4212                     XML_MAX_NAME_LENGTH;
4213     xmlChar stop;
4214     int state = ctxt->instate;
4215     int count = 0;
4216 
4217     SHRINK;
4218     if (RAW == '"') {
4219         NEXT;
4220 	stop = '"';
4221     } else if (RAW == '\'') {
4222         NEXT;
4223 	stop = '\'';
4224     } else {
4225 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4226 	return(NULL);
4227     }
4228 
4229     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4230     if (buf == NULL) {
4231         xmlErrMemory(ctxt, NULL);
4232 	return(NULL);
4233     }
4234     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4235     cur = CUR_CHAR(l);
4236     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4237 	if (len + 5 >= size) {
4238 	    xmlChar *tmp;
4239 
4240 	    size *= 2;
4241 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4242 	    if (tmp == NULL) {
4243 	        xmlFree(buf);
4244 		xmlErrMemory(ctxt, NULL);
4245 		ctxt->instate = (xmlParserInputState) state;
4246 		return(NULL);
4247 	    }
4248 	    buf = tmp;
4249 	}
4250 	count++;
4251 	if (count > 50) {
4252 	    SHRINK;
4253 	    GROW;
4254 	    count = 0;
4255             if (ctxt->instate == XML_PARSER_EOF) {
4256 	        xmlFree(buf);
4257 		return(NULL);
4258             }
4259 	}
4260 	COPY_BUF(l,buf,len,cur);
4261 	NEXTL(l);
4262 	cur = CUR_CHAR(l);
4263 	if (cur == 0) {
4264 	    GROW;
4265 	    SHRINK;
4266 	    cur = CUR_CHAR(l);
4267 	}
4268         if (len > maxLength) {
4269             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4270             xmlFree(buf);
4271             ctxt->instate = (xmlParserInputState) state;
4272             return(NULL);
4273         }
4274     }
4275     buf[len] = 0;
4276     ctxt->instate = (xmlParserInputState) state;
4277     if (!IS_CHAR(cur)) {
4278 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4279     } else {
4280 	NEXT;
4281     }
4282     return(buf);
4283 }
4284 
4285 /**
4286  * xmlParsePubidLiteral:
4287  * @ctxt:  an XML parser context
4288  *
4289  * parse an XML public literal
4290  *
4291  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4292  *
4293  * Returns the PubidLiteral parsed or NULL.
4294  */
4295 
4296 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4297 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4298     xmlChar *buf = NULL;
4299     int len = 0;
4300     int size = XML_PARSER_BUFFER_SIZE;
4301     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4302                     XML_MAX_TEXT_LENGTH :
4303                     XML_MAX_NAME_LENGTH;
4304     xmlChar cur;
4305     xmlChar stop;
4306     int count = 0;
4307     xmlParserInputState oldstate = ctxt->instate;
4308 
4309     SHRINK;
4310     if (RAW == '"') {
4311         NEXT;
4312 	stop = '"';
4313     } else if (RAW == '\'') {
4314         NEXT;
4315 	stop = '\'';
4316     } else {
4317 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4318 	return(NULL);
4319     }
4320     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4321     if (buf == NULL) {
4322 	xmlErrMemory(ctxt, NULL);
4323 	return(NULL);
4324     }
4325     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4326     cur = CUR;
4327     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4328 	if (len + 1 >= size) {
4329 	    xmlChar *tmp;
4330 
4331 	    size *= 2;
4332 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4333 	    if (tmp == NULL) {
4334 		xmlErrMemory(ctxt, NULL);
4335 		xmlFree(buf);
4336 		return(NULL);
4337 	    }
4338 	    buf = tmp;
4339 	}
4340 	buf[len++] = cur;
4341 	count++;
4342 	if (count > 50) {
4343 	    SHRINK;
4344 	    GROW;
4345 	    count = 0;
4346             if (ctxt->instate == XML_PARSER_EOF) {
4347 		xmlFree(buf);
4348 		return(NULL);
4349             }
4350 	}
4351 	NEXT;
4352 	cur = CUR;
4353 	if (cur == 0) {
4354 	    GROW;
4355 	    SHRINK;
4356 	    cur = CUR;
4357 	}
4358         if (len > maxLength) {
4359             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4360             xmlFree(buf);
4361             return(NULL);
4362         }
4363     }
4364     buf[len] = 0;
4365     if (cur != stop) {
4366 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4367     } else {
4368 	NEXT;
4369     }
4370     ctxt->instate = oldstate;
4371     return(buf);
4372 }
4373 
4374 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4375 
4376 /*
4377  * used for the test in the inner loop of the char data testing
4378  */
4379 static const unsigned char test_char_data[256] = {
4380     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4382     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4385     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4386     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4387     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4388     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4389     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4390     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4391     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4392     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4393     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4394     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4395     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4396     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4397     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4412 };
4413 
4414 /**
4415  * xmlParseCharData:
4416  * @ctxt:  an XML parser context
4417  * @cdata:  int indicating whether we are within a CDATA section
4418  *
4419  * parse a CharData section.
4420  * if we are within a CDATA section ']]>' marks an end of section.
4421  *
4422  * The right angle bracket (>) may be represented using the string "&gt;",
4423  * and must, for compatibility, be escaped using "&gt;" or a character
4424  * reference when it appears in the string "]]>" in content, when that
4425  * string is not marking the end of a CDATA section.
4426  *
4427  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4428  */
4429 
4430 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4431 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4432     const xmlChar *in;
4433     int nbchar = 0;
4434     int line = ctxt->input->line;
4435     int col = ctxt->input->col;
4436     int ccol;
4437 
4438     SHRINK;
4439     GROW;
4440     /*
4441      * Accelerated common case where input don't need to be
4442      * modified before passing it to the handler.
4443      */
4444     if (!cdata) {
4445 	in = ctxt->input->cur;
4446 	do {
4447 get_more_space:
4448 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4449 	    if (*in == 0xA) {
4450 		do {
4451 		    ctxt->input->line++; ctxt->input->col = 1;
4452 		    in++;
4453 		} while (*in == 0xA);
4454 		goto get_more_space;
4455 	    }
4456 	    if (*in == '<') {
4457 		nbchar = in - ctxt->input->cur;
4458 		if (nbchar > 0) {
4459 		    const xmlChar *tmp = ctxt->input->cur;
4460 		    ctxt->input->cur = in;
4461 
4462 		    if ((ctxt->sax != NULL) &&
4463 		        (ctxt->sax->ignorableWhitespace !=
4464 		         ctxt->sax->characters)) {
4465 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4466 			    if (ctxt->sax->ignorableWhitespace != NULL)
4467 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4468 						       tmp, nbchar);
4469 			} else {
4470 			    if (ctxt->sax->characters != NULL)
4471 				ctxt->sax->characters(ctxt->userData,
4472 						      tmp, nbchar);
4473 			    if (*ctxt->space == -1)
4474 			        *ctxt->space = -2;
4475 			}
4476 		    } else if ((ctxt->sax != NULL) &&
4477 		               (ctxt->sax->characters != NULL)) {
4478 			ctxt->sax->characters(ctxt->userData,
4479 					      tmp, nbchar);
4480 		    }
4481 		}
4482 		return;
4483 	    }
4484 
4485 get_more:
4486             ccol = ctxt->input->col;
4487 	    while (test_char_data[*in]) {
4488 		in++;
4489 		ccol++;
4490 	    }
4491 	    ctxt->input->col = ccol;
4492 	    if (*in == 0xA) {
4493 		do {
4494 		    ctxt->input->line++; ctxt->input->col = 1;
4495 		    in++;
4496 		} while (*in == 0xA);
4497 		goto get_more;
4498 	    }
4499 	    if (*in == ']') {
4500 		if ((in[1] == ']') && (in[2] == '>')) {
4501 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4502 		    ctxt->input->cur = in + 1;
4503 		    return;
4504 		}
4505 		in++;
4506 		ctxt->input->col++;
4507 		goto get_more;
4508 	    }
4509 	    nbchar = in - ctxt->input->cur;
4510 	    if (nbchar > 0) {
4511 		if ((ctxt->sax != NULL) &&
4512 		    (ctxt->sax->ignorableWhitespace !=
4513 		     ctxt->sax->characters) &&
4514 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4515 		    const xmlChar *tmp = ctxt->input->cur;
4516 		    ctxt->input->cur = in;
4517 
4518 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4519 		        if (ctxt->sax->ignorableWhitespace != NULL)
4520 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4521 							   tmp, nbchar);
4522 		    } else {
4523 		        if (ctxt->sax->characters != NULL)
4524 			    ctxt->sax->characters(ctxt->userData,
4525 						  tmp, nbchar);
4526 			if (*ctxt->space == -1)
4527 			    *ctxt->space = -2;
4528 		    }
4529                     line = ctxt->input->line;
4530                     col = ctxt->input->col;
4531 		} else if (ctxt->sax != NULL) {
4532 		    if (ctxt->sax->characters != NULL)
4533 			ctxt->sax->characters(ctxt->userData,
4534 					      ctxt->input->cur, nbchar);
4535                     line = ctxt->input->line;
4536                     col = ctxt->input->col;
4537 		}
4538                 /* something really bad happened in the SAX callback */
4539                 if (ctxt->instate != XML_PARSER_CONTENT)
4540                     return;
4541 	    }
4542 	    ctxt->input->cur = in;
4543 	    if (*in == 0xD) {
4544 		in++;
4545 		if (*in == 0xA) {
4546 		    ctxt->input->cur = in;
4547 		    in++;
4548 		    ctxt->input->line++; ctxt->input->col = 1;
4549 		    continue; /* while */
4550 		}
4551 		in--;
4552 	    }
4553 	    if (*in == '<') {
4554 		return;
4555 	    }
4556 	    if (*in == '&') {
4557 		return;
4558 	    }
4559 	    SHRINK;
4560 	    GROW;
4561             if (ctxt->instate == XML_PARSER_EOF)
4562 		return;
4563 	    in = ctxt->input->cur;
4564 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4565 	nbchar = 0;
4566     }
4567     ctxt->input->line = line;
4568     ctxt->input->col = col;
4569     xmlParseCharDataComplex(ctxt, cdata);
4570 }
4571 
4572 /**
4573  * xmlParseCharDataComplex:
4574  * @ctxt:  an XML parser context
4575  * @cdata:  int indicating whether we are within a CDATA section
4576  *
4577  * parse a CharData section.this is the fallback function
4578  * of xmlParseCharData() when the parsing requires handling
4579  * of non-ASCII characters.
4580  */
4581 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4582 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4583     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4584     int nbchar = 0;
4585     int cur, l;
4586     int count = 0;
4587 
4588     SHRINK;
4589     GROW;
4590     cur = CUR_CHAR(l);
4591     while ((cur != '<') && /* checked */
4592            (cur != '&') &&
4593 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4594 	if ((cur == ']') && (NXT(1) == ']') &&
4595 	    (NXT(2) == '>')) {
4596 	    if (cdata) break;
4597 	    else {
4598 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4599 	    }
4600 	}
4601 	COPY_BUF(l,buf,nbchar,cur);
4602 	/* move current position before possible calling of ctxt->sax->characters */
4603 	NEXTL(l);
4604 	cur = CUR_CHAR(l);
4605 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4606 	    buf[nbchar] = 0;
4607 
4608 	    /*
4609 	     * OK the segment is to be consumed as chars.
4610 	     */
4611 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4612 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4613 		    if (ctxt->sax->ignorableWhitespace != NULL)
4614 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4615 			                               buf, nbchar);
4616 		} else {
4617 		    if (ctxt->sax->characters != NULL)
4618 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4619 		    if ((ctxt->sax->characters !=
4620 		         ctxt->sax->ignorableWhitespace) &&
4621 			(*ctxt->space == -1))
4622 			*ctxt->space = -2;
4623 		}
4624 	    }
4625 	    nbchar = 0;
4626             /* something really bad happened in the SAX callback */
4627             if (ctxt->instate != XML_PARSER_CONTENT)
4628                 return;
4629 	}
4630 	count++;
4631 	if (count > 50) {
4632 	    SHRINK;
4633 	    GROW;
4634 	    count = 0;
4635             if (ctxt->instate == XML_PARSER_EOF)
4636 		return;
4637 	}
4638     }
4639     if (nbchar != 0) {
4640         buf[nbchar] = 0;
4641 	/*
4642 	 * OK the segment is to be consumed as chars.
4643 	 */
4644 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4645 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4646 		if (ctxt->sax->ignorableWhitespace != NULL)
4647 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4648 	    } else {
4649 		if (ctxt->sax->characters != NULL)
4650 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4651 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4652 		    (*ctxt->space == -1))
4653 		    *ctxt->space = -2;
4654 	    }
4655 	}
4656     }
4657     if ((cur != 0) && (!IS_CHAR(cur))) {
4658 	/* Generate the error and skip the offending character */
4659         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4660                           "PCDATA invalid Char value %d\n",
4661 	                  cur);
4662 	NEXTL(l);
4663     }
4664 }
4665 
4666 /**
4667  * xmlParseExternalID:
4668  * @ctxt:  an XML parser context
4669  * @publicID:  a xmlChar** receiving PubidLiteral
4670  * @strict: indicate whether we should restrict parsing to only
4671  *          production [75], see NOTE below
4672  *
4673  * Parse an External ID or a Public ID
4674  *
4675  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4676  *       'PUBLIC' S PubidLiteral S SystemLiteral
4677  *
4678  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4679  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4680  *
4681  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4682  *
4683  * Returns the function returns SystemLiteral and in the second
4684  *                case publicID receives PubidLiteral, is strict is off
4685  *                it is possible to return NULL and have publicID set.
4686  */
4687 
4688 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4689 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4690     xmlChar *URI = NULL;
4691 
4692     SHRINK;
4693 
4694     *publicID = NULL;
4695     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4696         SKIP(6);
4697 	if (SKIP_BLANKS == 0) {
4698 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4699 	                   "Space required after 'SYSTEM'\n");
4700 	}
4701 	URI = xmlParseSystemLiteral(ctxt);
4702 	if (URI == NULL) {
4703 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4704         }
4705     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4706         SKIP(6);
4707 	if (SKIP_BLANKS == 0) {
4708 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4709 		    "Space required after 'PUBLIC'\n");
4710 	}
4711 	*publicID = xmlParsePubidLiteral(ctxt);
4712 	if (*publicID == NULL) {
4713 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4714 	}
4715 	if (strict) {
4716 	    /*
4717 	     * We don't handle [83] so "S SystemLiteral" is required.
4718 	     */
4719 	    if (SKIP_BLANKS == 0) {
4720 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4721 			"Space required after the Public Identifier\n");
4722 	    }
4723 	} else {
4724 	    /*
4725 	     * We handle [83] so we return immediately, if
4726 	     * "S SystemLiteral" is not detected. We skip blanks if no
4727              * system literal was found, but this is harmless since we must
4728              * be at the end of a NotationDecl.
4729 	     */
4730 	    if (SKIP_BLANKS == 0) return(NULL);
4731 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
4732 	}
4733 	URI = xmlParseSystemLiteral(ctxt);
4734 	if (URI == NULL) {
4735 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4736         }
4737     }
4738     return(URI);
4739 }
4740 
4741 /**
4742  * xmlParseCommentComplex:
4743  * @ctxt:  an XML parser context
4744  * @buf:  the already parsed part of the buffer
4745  * @len:  number of bytes in the buffer
4746  * @size:  allocated size of the buffer
4747  *
4748  * Skip an XML (SGML) comment <!-- .... -->
4749  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4750  *  must not occur within comments. "
4751  * This is the slow routine in case the accelerator for ascii didn't work
4752  *
4753  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4754  */
4755 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4756 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4757                        size_t len, size_t size) {
4758     int q, ql;
4759     int r, rl;
4760     int cur, l;
4761     size_t count = 0;
4762     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4763                        XML_MAX_HUGE_LENGTH :
4764                        XML_MAX_TEXT_LENGTH;
4765     int inputid;
4766 
4767     inputid = ctxt->input->id;
4768 
4769     if (buf == NULL) {
4770         len = 0;
4771 	size = XML_PARSER_BUFFER_SIZE;
4772 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4773 	if (buf == NULL) {
4774 	    xmlErrMemory(ctxt, NULL);
4775 	    return;
4776 	}
4777     }
4778     GROW;	/* Assure there's enough input data */
4779     q = CUR_CHAR(ql);
4780     if (q == 0)
4781         goto not_terminated;
4782     if (!IS_CHAR(q)) {
4783         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784                           "xmlParseComment: invalid xmlChar value %d\n",
4785 	                  q);
4786 	xmlFree (buf);
4787 	return;
4788     }
4789     NEXTL(ql);
4790     r = CUR_CHAR(rl);
4791     if (r == 0)
4792         goto not_terminated;
4793     if (!IS_CHAR(r)) {
4794         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4795                           "xmlParseComment: invalid xmlChar value %d\n",
4796 	                  q);
4797 	xmlFree (buf);
4798 	return;
4799     }
4800     NEXTL(rl);
4801     cur = CUR_CHAR(l);
4802     if (cur == 0)
4803         goto not_terminated;
4804     while (IS_CHAR(cur) && /* checked */
4805            ((cur != '>') ||
4806 	    (r != '-') || (q != '-'))) {
4807 	if ((r == '-') && (q == '-')) {
4808 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4809 	}
4810 	if (len + 5 >= size) {
4811 	    xmlChar *new_buf;
4812             size_t new_size;
4813 
4814 	    new_size = size * 2;
4815 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4816 	    if (new_buf == NULL) {
4817 		xmlFree (buf);
4818 		xmlErrMemory(ctxt, NULL);
4819 		return;
4820 	    }
4821 	    buf = new_buf;
4822             size = new_size;
4823 	}
4824 	COPY_BUF(ql,buf,len,q);
4825 	q = r;
4826 	ql = rl;
4827 	r = cur;
4828 	rl = l;
4829 
4830 	count++;
4831 	if (count > 50) {
4832 	    SHRINK;
4833 	    GROW;
4834 	    count = 0;
4835             if (ctxt->instate == XML_PARSER_EOF) {
4836 		xmlFree(buf);
4837 		return;
4838             }
4839 	}
4840 	NEXTL(l);
4841 	cur = CUR_CHAR(l);
4842 	if (cur == 0) {
4843 	    SHRINK;
4844 	    GROW;
4845 	    cur = CUR_CHAR(l);
4846 	}
4847 
4848         if (len > maxLength) {
4849             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4850                          "Comment too big found", NULL);
4851             xmlFree (buf);
4852             return;
4853         }
4854     }
4855     buf[len] = 0;
4856     if (cur == 0) {
4857 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4858 	                     "Comment not terminated \n<!--%.50s\n", buf);
4859     } else if (!IS_CHAR(cur)) {
4860         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4861                           "xmlParseComment: invalid xmlChar value %d\n",
4862 	                  cur);
4863     } else {
4864 	if (inputid != ctxt->input->id) {
4865 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4866 		           "Comment doesn't start and stop in the same"
4867                            " entity\n");
4868 	}
4869         NEXT;
4870 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4871 	    (!ctxt->disableSAX))
4872 	    ctxt->sax->comment(ctxt->userData, buf);
4873     }
4874     xmlFree(buf);
4875     return;
4876 not_terminated:
4877     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4878 			 "Comment not terminated\n", NULL);
4879     xmlFree(buf);
4880     return;
4881 }
4882 
4883 /**
4884  * xmlParseComment:
4885  * @ctxt:  an XML parser context
4886  *
4887  * Skip an XML (SGML) comment <!-- .... -->
4888  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4889  *  must not occur within comments. "
4890  *
4891  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4892  */
4893 void
xmlParseComment(xmlParserCtxtPtr ctxt)4894 xmlParseComment(xmlParserCtxtPtr ctxt) {
4895     xmlChar *buf = NULL;
4896     size_t size = XML_PARSER_BUFFER_SIZE;
4897     size_t len = 0;
4898     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4899                        XML_MAX_HUGE_LENGTH :
4900                        XML_MAX_TEXT_LENGTH;
4901     xmlParserInputState state;
4902     const xmlChar *in;
4903     size_t nbchar = 0;
4904     int ccol;
4905     int inputid;
4906 
4907     /*
4908      * Check that there is a comment right here.
4909      */
4910     if ((RAW != '<') || (NXT(1) != '!') ||
4911         (NXT(2) != '-') || (NXT(3) != '-')) return;
4912     state = ctxt->instate;
4913     ctxt->instate = XML_PARSER_COMMENT;
4914     inputid = ctxt->input->id;
4915     SKIP(4);
4916     SHRINK;
4917     GROW;
4918 
4919     /*
4920      * Accelerated common case where input don't need to be
4921      * modified before passing it to the handler.
4922      */
4923     in = ctxt->input->cur;
4924     do {
4925 	if (*in == 0xA) {
4926 	    do {
4927 		ctxt->input->line++; ctxt->input->col = 1;
4928 		in++;
4929 	    } while (*in == 0xA);
4930 	}
4931 get_more:
4932         ccol = ctxt->input->col;
4933 	while (((*in > '-') && (*in <= 0x7F)) ||
4934 	       ((*in >= 0x20) && (*in < '-')) ||
4935 	       (*in == 0x09)) {
4936 		    in++;
4937 		    ccol++;
4938 	}
4939 	ctxt->input->col = ccol;
4940 	if (*in == 0xA) {
4941 	    do {
4942 		ctxt->input->line++; ctxt->input->col = 1;
4943 		in++;
4944 	    } while (*in == 0xA);
4945 	    goto get_more;
4946 	}
4947 	nbchar = in - ctxt->input->cur;
4948 	/*
4949 	 * save current set of data
4950 	 */
4951 	if (nbchar > 0) {
4952 	    if ((ctxt->sax != NULL) &&
4953 		(ctxt->sax->comment != NULL)) {
4954 		if (buf == NULL) {
4955 		    if ((*in == '-') && (in[1] == '-'))
4956 		        size = nbchar + 1;
4957 		    else
4958 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4959 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4960 		    if (buf == NULL) {
4961 		        xmlErrMemory(ctxt, NULL);
4962 			ctxt->instate = state;
4963 			return;
4964 		    }
4965 		    len = 0;
4966 		} else if (len + nbchar + 1 >= size) {
4967 		    xmlChar *new_buf;
4968 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4969 		    new_buf = (xmlChar *) xmlRealloc(buf,
4970 		                                     size * sizeof(xmlChar));
4971 		    if (new_buf == NULL) {
4972 		        xmlFree (buf);
4973 			xmlErrMemory(ctxt, NULL);
4974 			ctxt->instate = state;
4975 			return;
4976 		    }
4977 		    buf = new_buf;
4978 		}
4979 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4980 		len += nbchar;
4981 		buf[len] = 0;
4982 	    }
4983 	}
4984         if (len > maxLength) {
4985             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4986                          "Comment too big found", NULL);
4987             xmlFree (buf);
4988             return;
4989         }
4990 	ctxt->input->cur = in;
4991 	if (*in == 0xA) {
4992 	    in++;
4993 	    ctxt->input->line++; ctxt->input->col = 1;
4994 	}
4995 	if (*in == 0xD) {
4996 	    in++;
4997 	    if (*in == 0xA) {
4998 		ctxt->input->cur = in;
4999 		in++;
5000 		ctxt->input->line++; ctxt->input->col = 1;
5001 		goto get_more;
5002 	    }
5003 	    in--;
5004 	}
5005 	SHRINK;
5006 	GROW;
5007         if (ctxt->instate == XML_PARSER_EOF) {
5008             xmlFree(buf);
5009             return;
5010         }
5011 	in = ctxt->input->cur;
5012 	if (*in == '-') {
5013 	    if (in[1] == '-') {
5014 	        if (in[2] == '>') {
5015 		    if (ctxt->input->id != inputid) {
5016 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5017 			               "comment doesn't start and stop in the"
5018                                        " same entity\n");
5019 		    }
5020 		    SKIP(3);
5021 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5022 		        (!ctxt->disableSAX)) {
5023 			if (buf != NULL)
5024 			    ctxt->sax->comment(ctxt->userData, buf);
5025 			else
5026 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5027 		    }
5028 		    if (buf != NULL)
5029 		        xmlFree(buf);
5030 		    if (ctxt->instate != XML_PARSER_EOF)
5031 			ctxt->instate = state;
5032 		    return;
5033 		}
5034 		if (buf != NULL) {
5035 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5036 		                      "Double hyphen within comment: "
5037                                       "<!--%.50s\n",
5038 				      buf);
5039 		} else
5040 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5041 		                      "Double hyphen within comment\n", NULL);
5042                 if (ctxt->instate == XML_PARSER_EOF) {
5043                     xmlFree(buf);
5044                     return;
5045                 }
5046 		in++;
5047 		ctxt->input->col++;
5048 	    }
5049 	    in++;
5050 	    ctxt->input->col++;
5051 	    goto get_more;
5052 	}
5053     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5054     xmlParseCommentComplex(ctxt, buf, len, size);
5055     ctxt->instate = state;
5056     return;
5057 }
5058 
5059 
5060 /**
5061  * xmlParsePITarget:
5062  * @ctxt:  an XML parser context
5063  *
5064  * parse the name of a PI
5065  *
5066  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5067  *
5068  * Returns the PITarget name or NULL
5069  */
5070 
5071 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5072 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5073     const xmlChar *name;
5074 
5075     name = xmlParseName(ctxt);
5076     if ((name != NULL) &&
5077         ((name[0] == 'x') || (name[0] == 'X')) &&
5078         ((name[1] == 'm') || (name[1] == 'M')) &&
5079         ((name[2] == 'l') || (name[2] == 'L'))) {
5080 	int i;
5081 	if ((name[0] == 'x') && (name[1] == 'm') &&
5082 	    (name[2] == 'l') && (name[3] == 0)) {
5083 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5084 		 "XML declaration allowed only at the start of the document\n");
5085 	    return(name);
5086 	} else if (name[3] == 0) {
5087 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5088 	    return(name);
5089 	}
5090 	for (i = 0;;i++) {
5091 	    if (xmlW3CPIs[i] == NULL) break;
5092 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5093 	        return(name);
5094 	}
5095 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5096 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5097 		      NULL, NULL);
5098     }
5099     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5100 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5101 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5102     }
5103     return(name);
5104 }
5105 
5106 #ifdef LIBXML_CATALOG_ENABLED
5107 /**
5108  * xmlParseCatalogPI:
5109  * @ctxt:  an XML parser context
5110  * @catalog:  the PI value string
5111  *
5112  * parse an XML Catalog Processing Instruction.
5113  *
5114  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5115  *
5116  * Occurs only if allowed by the user and if happening in the Misc
5117  * part of the document before any doctype information
5118  * This will add the given catalog to the parsing context in order
5119  * to be used if there is a resolution need further down in the document
5120  */
5121 
5122 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5123 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5124     xmlChar *URL = NULL;
5125     const xmlChar *tmp, *base;
5126     xmlChar marker;
5127 
5128     tmp = catalog;
5129     while (IS_BLANK_CH(*tmp)) tmp++;
5130     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5131 	goto error;
5132     tmp += 7;
5133     while (IS_BLANK_CH(*tmp)) tmp++;
5134     if (*tmp != '=') {
5135 	return;
5136     }
5137     tmp++;
5138     while (IS_BLANK_CH(*tmp)) tmp++;
5139     marker = *tmp;
5140     if ((marker != '\'') && (marker != '"'))
5141 	goto error;
5142     tmp++;
5143     base = tmp;
5144     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5145     if (*tmp == 0)
5146 	goto error;
5147     URL = xmlStrndup(base, tmp - base);
5148     tmp++;
5149     while (IS_BLANK_CH(*tmp)) tmp++;
5150     if (*tmp != 0)
5151 	goto error;
5152 
5153     if (URL != NULL) {
5154 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5155 	xmlFree(URL);
5156     }
5157     return;
5158 
5159 error:
5160     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5161 	          "Catalog PI syntax error: %s\n",
5162 		  catalog, NULL);
5163     if (URL != NULL)
5164 	xmlFree(URL);
5165 }
5166 #endif
5167 
5168 /**
5169  * xmlParsePI:
5170  * @ctxt:  an XML parser context
5171  *
5172  * parse an XML Processing Instruction.
5173  *
5174  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5175  *
5176  * The processing is transferred to SAX once parsed.
5177  */
5178 
5179 void
xmlParsePI(xmlParserCtxtPtr ctxt)5180 xmlParsePI(xmlParserCtxtPtr ctxt) {
5181     xmlChar *buf = NULL;
5182     size_t len = 0;
5183     size_t size = XML_PARSER_BUFFER_SIZE;
5184     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5185                        XML_MAX_HUGE_LENGTH :
5186                        XML_MAX_TEXT_LENGTH;
5187     int cur, l;
5188     const xmlChar *target;
5189     xmlParserInputState state;
5190     int count = 0;
5191 
5192     if ((RAW == '<') && (NXT(1) == '?')) {
5193 	int inputid = ctxt->input->id;
5194 	state = ctxt->instate;
5195         ctxt->instate = XML_PARSER_PI;
5196 	/*
5197 	 * this is a Processing Instruction.
5198 	 */
5199 	SKIP(2);
5200 	SHRINK;
5201 
5202 	/*
5203 	 * Parse the target name and check for special support like
5204 	 * namespace.
5205 	 */
5206         target = xmlParsePITarget(ctxt);
5207 	if (target != NULL) {
5208 	    if ((RAW == '?') && (NXT(1) == '>')) {
5209 		if (inputid != ctxt->input->id) {
5210 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5211 	                           "PI declaration doesn't start and stop in"
5212                                    " the same entity\n");
5213 		}
5214 		SKIP(2);
5215 
5216 		/*
5217 		 * SAX: PI detected.
5218 		 */
5219 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5220 		    (ctxt->sax->processingInstruction != NULL))
5221 		    ctxt->sax->processingInstruction(ctxt->userData,
5222 		                                     target, NULL);
5223 		if (ctxt->instate != XML_PARSER_EOF)
5224 		    ctxt->instate = state;
5225 		return;
5226 	    }
5227 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5228 	    if (buf == NULL) {
5229 		xmlErrMemory(ctxt, NULL);
5230 		ctxt->instate = state;
5231 		return;
5232 	    }
5233 	    if (SKIP_BLANKS == 0) {
5234 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5235 			  "ParsePI: PI %s space expected\n", target);
5236 	    }
5237 	    cur = CUR_CHAR(l);
5238 	    while (IS_CHAR(cur) && /* checked */
5239 		   ((cur != '?') || (NXT(1) != '>'))) {
5240 		if (len + 5 >= size) {
5241 		    xmlChar *tmp;
5242                     size_t new_size = size * 2;
5243 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5244 		    if (tmp == NULL) {
5245 			xmlErrMemory(ctxt, NULL);
5246 			xmlFree(buf);
5247 			ctxt->instate = state;
5248 			return;
5249 		    }
5250 		    buf = tmp;
5251                     size = new_size;
5252 		}
5253 		count++;
5254 		if (count > 50) {
5255 		    SHRINK;
5256 		    GROW;
5257                     if (ctxt->instate == XML_PARSER_EOF) {
5258                         xmlFree(buf);
5259                         return;
5260                     }
5261 		    count = 0;
5262 		}
5263 		COPY_BUF(l,buf,len,cur);
5264 		NEXTL(l);
5265 		cur = CUR_CHAR(l);
5266 		if (cur == 0) {
5267 		    SHRINK;
5268 		    GROW;
5269 		    cur = CUR_CHAR(l);
5270 		}
5271                 if (len > maxLength) {
5272                     xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5273                                       "PI %s too big found", target);
5274                     xmlFree(buf);
5275                     ctxt->instate = state;
5276                     return;
5277                 }
5278 	    }
5279 	    buf[len] = 0;
5280 	    if (cur != '?') {
5281 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5282 		      "ParsePI: PI %s never end ...\n", target);
5283 	    } else {
5284 		if (inputid != ctxt->input->id) {
5285 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5286 	                           "PI declaration doesn't start and stop in"
5287                                    " the same entity\n");
5288 		}
5289 		SKIP(2);
5290 
5291 #ifdef LIBXML_CATALOG_ENABLED
5292 		if (((state == XML_PARSER_MISC) ||
5293 	             (state == XML_PARSER_START)) &&
5294 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5295 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5296 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5297 			(allow == XML_CATA_ALLOW_ALL))
5298 			xmlParseCatalogPI(ctxt, buf);
5299 		}
5300 #endif
5301 
5302 
5303 		/*
5304 		 * SAX: PI detected.
5305 		 */
5306 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5307 		    (ctxt->sax->processingInstruction != NULL))
5308 		    ctxt->sax->processingInstruction(ctxt->userData,
5309 		                                     target, buf);
5310 	    }
5311 	    xmlFree(buf);
5312 	} else {
5313 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5314 	}
5315 	if (ctxt->instate != XML_PARSER_EOF)
5316 	    ctxt->instate = state;
5317     }
5318 }
5319 
5320 /**
5321  * xmlParseNotationDecl:
5322  * @ctxt:  an XML parser context
5323  *
5324  * parse a notation declaration
5325  *
5326  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5327  *
5328  * Hence there is actually 3 choices:
5329  *     'PUBLIC' S PubidLiteral
5330  *     'PUBLIC' S PubidLiteral S SystemLiteral
5331  * and 'SYSTEM' S SystemLiteral
5332  *
5333  * See the NOTE on xmlParseExternalID().
5334  */
5335 
5336 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5337 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5338     const xmlChar *name;
5339     xmlChar *Pubid;
5340     xmlChar *Systemid;
5341 
5342     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5343 	int inputid = ctxt->input->id;
5344 	SHRINK;
5345 	SKIP(10);
5346 	if (SKIP_BLANKS == 0) {
5347 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5348 			   "Space required after '<!NOTATION'\n");
5349 	    return;
5350 	}
5351 
5352         name = xmlParseName(ctxt);
5353 	if (name == NULL) {
5354 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5355 	    return;
5356 	}
5357 	if (xmlStrchr(name, ':') != NULL) {
5358 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5359 		     "colons are forbidden from notation names '%s'\n",
5360 		     name, NULL, NULL);
5361 	}
5362 	if (SKIP_BLANKS == 0) {
5363 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5364 		     "Space required after the NOTATION name'\n");
5365 	    return;
5366 	}
5367 
5368 	/*
5369 	 * Parse the IDs.
5370 	 */
5371 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5372 	SKIP_BLANKS;
5373 
5374 	if (RAW == '>') {
5375 	    if (inputid != ctxt->input->id) {
5376 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5377 	                       "Notation declaration doesn't start and stop"
5378                                " in the same entity\n");
5379 	    }
5380 	    NEXT;
5381 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5382 		(ctxt->sax->notationDecl != NULL))
5383 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5384 	} else {
5385 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5386 	}
5387 	if (Systemid != NULL) xmlFree(Systemid);
5388 	if (Pubid != NULL) xmlFree(Pubid);
5389     }
5390 }
5391 
5392 /**
5393  * xmlParseEntityDecl:
5394  * @ctxt:  an XML parser context
5395  *
5396  * parse <!ENTITY declarations
5397  *
5398  * [70] EntityDecl ::= GEDecl | PEDecl
5399  *
5400  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5401  *
5402  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5403  *
5404  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5405  *
5406  * [74] PEDef ::= EntityValue | ExternalID
5407  *
5408  * [76] NDataDecl ::= S 'NDATA' S Name
5409  *
5410  * [ VC: Notation Declared ]
5411  * The Name must match the declared name of a notation.
5412  */
5413 
5414 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5415 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5416     const xmlChar *name = NULL;
5417     xmlChar *value = NULL;
5418     xmlChar *URI = NULL, *literal = NULL;
5419     const xmlChar *ndata = NULL;
5420     int isParameter = 0;
5421     xmlChar *orig = NULL;
5422 
5423     /* GROW; done in the caller */
5424     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5425 	int inputid = ctxt->input->id;
5426 	SHRINK;
5427 	SKIP(8);
5428 	if (SKIP_BLANKS == 0) {
5429 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5430 			   "Space required after '<!ENTITY'\n");
5431 	}
5432 
5433 	if (RAW == '%') {
5434 	    NEXT;
5435 	    if (SKIP_BLANKS == 0) {
5436 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 			       "Space required after '%%'\n");
5438 	    }
5439 	    isParameter = 1;
5440 	}
5441 
5442         name = xmlParseName(ctxt);
5443 	if (name == NULL) {
5444 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5445 	                   "xmlParseEntityDecl: no name\n");
5446             return;
5447 	}
5448 	if (xmlStrchr(name, ':') != NULL) {
5449 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5450 		     "colons are forbidden from entities names '%s'\n",
5451 		     name, NULL, NULL);
5452 	}
5453 	if (SKIP_BLANKS == 0) {
5454 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455 			   "Space required after the entity name\n");
5456 	}
5457 
5458 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5459 	/*
5460 	 * handle the various case of definitions...
5461 	 */
5462 	if (isParameter) {
5463 	    if ((RAW == '"') || (RAW == '\'')) {
5464 	        value = xmlParseEntityValue(ctxt, &orig);
5465 		if (value) {
5466 		    if ((ctxt->sax != NULL) &&
5467 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5468 			ctxt->sax->entityDecl(ctxt->userData, name,
5469 		                    XML_INTERNAL_PARAMETER_ENTITY,
5470 				    NULL, NULL, value);
5471 		}
5472 	    } else {
5473 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5474 		if ((URI == NULL) && (literal == NULL)) {
5475 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5476 		}
5477 		if (URI) {
5478 		    xmlURIPtr uri;
5479 
5480 		    uri = xmlParseURI((const char *) URI);
5481 		    if (uri == NULL) {
5482 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5483 				     "Invalid URI: %s\n", URI);
5484 			/*
5485 			 * This really ought to be a well formedness error
5486 			 * but the XML Core WG decided otherwise c.f. issue
5487 			 * E26 of the XML erratas.
5488 			 */
5489 		    } else {
5490 			if (uri->fragment != NULL) {
5491 			    /*
5492 			     * Okay this is foolish to block those but not
5493 			     * invalid URIs.
5494 			     */
5495 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5496 			} else {
5497 			    if ((ctxt->sax != NULL) &&
5498 				(!ctxt->disableSAX) &&
5499 				(ctxt->sax->entityDecl != NULL))
5500 				ctxt->sax->entityDecl(ctxt->userData, name,
5501 					    XML_EXTERNAL_PARAMETER_ENTITY,
5502 					    literal, URI, NULL);
5503 			}
5504 			xmlFreeURI(uri);
5505 		    }
5506 		}
5507 	    }
5508 	} else {
5509 	    if ((RAW == '"') || (RAW == '\'')) {
5510 	        value = xmlParseEntityValue(ctxt, &orig);
5511 		if ((ctxt->sax != NULL) &&
5512 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5513 		    ctxt->sax->entityDecl(ctxt->userData, name,
5514 				XML_INTERNAL_GENERAL_ENTITY,
5515 				NULL, NULL, value);
5516 		/*
5517 		 * For expat compatibility in SAX mode.
5518 		 */
5519 		if ((ctxt->myDoc == NULL) ||
5520 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5521 		    if (ctxt->myDoc == NULL) {
5522 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5523 			if (ctxt->myDoc == NULL) {
5524 			    xmlErrMemory(ctxt, "New Doc failed");
5525 			    return;
5526 			}
5527 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5528 		    }
5529 		    if (ctxt->myDoc->intSubset == NULL)
5530 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5531 					    BAD_CAST "fake", NULL, NULL);
5532 
5533 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5534 			              NULL, NULL, value);
5535 		}
5536 	    } else {
5537 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5538 		if ((URI == NULL) && (literal == NULL)) {
5539 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5540 		}
5541 		if (URI) {
5542 		    xmlURIPtr uri;
5543 
5544 		    uri = xmlParseURI((const char *)URI);
5545 		    if (uri == NULL) {
5546 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5547 				     "Invalid URI: %s\n", URI);
5548 			/*
5549 			 * This really ought to be a well formedness error
5550 			 * but the XML Core WG decided otherwise c.f. issue
5551 			 * E26 of the XML erratas.
5552 			 */
5553 		    } else {
5554 			if (uri->fragment != NULL) {
5555 			    /*
5556 			     * Okay this is foolish to block those but not
5557 			     * invalid URIs.
5558 			     */
5559 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5560 			}
5561 			xmlFreeURI(uri);
5562 		    }
5563 		}
5564 		if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5565 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566 				   "Space required before 'NDATA'\n");
5567 		}
5568 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5569 		    SKIP(5);
5570 		    if (SKIP_BLANKS == 0) {
5571 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5572 				       "Space required after 'NDATA'\n");
5573 		    }
5574 		    ndata = xmlParseName(ctxt);
5575 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5576 		        (ctxt->sax->unparsedEntityDecl != NULL))
5577 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5578 				    literal, URI, ndata);
5579 		} else {
5580 		    if ((ctxt->sax != NULL) &&
5581 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5582 			ctxt->sax->entityDecl(ctxt->userData, name,
5583 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5584 				    literal, URI, NULL);
5585 		    /*
5586 		     * For expat compatibility in SAX mode.
5587 		     * assuming the entity replacement was asked for
5588 		     */
5589 		    if ((ctxt->replaceEntities != 0) &&
5590 			((ctxt->myDoc == NULL) ||
5591 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5592 			if (ctxt->myDoc == NULL) {
5593 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5594 			    if (ctxt->myDoc == NULL) {
5595 			        xmlErrMemory(ctxt, "New Doc failed");
5596 				return;
5597 			    }
5598 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5599 			}
5600 
5601 			if (ctxt->myDoc->intSubset == NULL)
5602 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5603 						BAD_CAST "fake", NULL, NULL);
5604 			xmlSAX2EntityDecl(ctxt, name,
5605 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5606 				          literal, URI, NULL);
5607 		    }
5608 		}
5609 	    }
5610 	}
5611 	if (ctxt->instate == XML_PARSER_EOF)
5612 	    goto done;
5613 	SKIP_BLANKS;
5614 	if (RAW != '>') {
5615 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5616 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5617 	    xmlHaltParser(ctxt);
5618 	} else {
5619 	    if (inputid != ctxt->input->id) {
5620 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5621 	                       "Entity declaration doesn't start and stop in"
5622                                " the same entity\n");
5623 	    }
5624 	    NEXT;
5625 	}
5626 	if (orig != NULL) {
5627 	    /*
5628 	     * Ugly mechanism to save the raw entity value.
5629 	     */
5630 	    xmlEntityPtr cur = NULL;
5631 
5632 	    if (isParameter) {
5633 	        if ((ctxt->sax != NULL) &&
5634 		    (ctxt->sax->getParameterEntity != NULL))
5635 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5636 	    } else {
5637 	        if ((ctxt->sax != NULL) &&
5638 		    (ctxt->sax->getEntity != NULL))
5639 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5640 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5641 		    cur = xmlSAX2GetEntity(ctxt, name);
5642 		}
5643 	    }
5644             if ((cur != NULL) && (cur->orig == NULL)) {
5645 		cur->orig = orig;
5646                 orig = NULL;
5647 	    }
5648 	}
5649 
5650 done:
5651 	if (value != NULL) xmlFree(value);
5652 	if (URI != NULL) xmlFree(URI);
5653 	if (literal != NULL) xmlFree(literal);
5654         if (orig != NULL) xmlFree(orig);
5655     }
5656 }
5657 
5658 /**
5659  * xmlParseDefaultDecl:
5660  * @ctxt:  an XML parser context
5661  * @value:  Receive a possible fixed default value for the attribute
5662  *
5663  * Parse an attribute default declaration
5664  *
5665  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5666  *
5667  * [ VC: Required Attribute ]
5668  * if the default declaration is the keyword #REQUIRED, then the
5669  * attribute must be specified for all elements of the type in the
5670  * attribute-list declaration.
5671  *
5672  * [ VC: Attribute Default Legal ]
5673  * The declared default value must meet the lexical constraints of
5674  * the declared attribute type c.f. xmlValidateAttributeDecl()
5675  *
5676  * [ VC: Fixed Attribute Default ]
5677  * if an attribute has a default value declared with the #FIXED
5678  * keyword, instances of that attribute must match the default value.
5679  *
5680  * [ WFC: No < in Attribute Values ]
5681  * handled in xmlParseAttValue()
5682  *
5683  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5684  *          or XML_ATTRIBUTE_FIXED.
5685  */
5686 
5687 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5688 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5689     int val;
5690     xmlChar *ret;
5691 
5692     *value = NULL;
5693     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5694 	SKIP(9);
5695 	return(XML_ATTRIBUTE_REQUIRED);
5696     }
5697     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5698 	SKIP(8);
5699 	return(XML_ATTRIBUTE_IMPLIED);
5700     }
5701     val = XML_ATTRIBUTE_NONE;
5702     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5703 	SKIP(6);
5704 	val = XML_ATTRIBUTE_FIXED;
5705 	if (SKIP_BLANKS == 0) {
5706 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707 			   "Space required after '#FIXED'\n");
5708 	}
5709     }
5710     ret = xmlParseAttValue(ctxt);
5711     ctxt->instate = XML_PARSER_DTD;
5712     if (ret == NULL) {
5713 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5714 		       "Attribute default value declaration error\n");
5715     } else
5716         *value = ret;
5717     return(val);
5718 }
5719 
5720 /**
5721  * xmlParseNotationType:
5722  * @ctxt:  an XML parser context
5723  *
5724  * parse an Notation attribute type.
5725  *
5726  * Note: the leading 'NOTATION' S part has already being parsed...
5727  *
5728  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5729  *
5730  * [ VC: Notation Attributes ]
5731  * Values of this type must match one of the notation names included
5732  * in the declaration; all notation names in the declaration must be declared.
5733  *
5734  * Returns: the notation attribute tree built while parsing
5735  */
5736 
5737 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5738 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5739     const xmlChar *name;
5740     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5741 
5742     if (RAW != '(') {
5743 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5744 	return(NULL);
5745     }
5746     SHRINK;
5747     do {
5748         NEXT;
5749 	SKIP_BLANKS;
5750         name = xmlParseName(ctxt);
5751 	if (name == NULL) {
5752 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5753 			   "Name expected in NOTATION declaration\n");
5754             xmlFreeEnumeration(ret);
5755 	    return(NULL);
5756 	}
5757 	tmp = ret;
5758 	while (tmp != NULL) {
5759 	    if (xmlStrEqual(name, tmp->name)) {
5760 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5761 	  "standalone: attribute notation value token %s duplicated\n",
5762 				 name, NULL);
5763 		if (!xmlDictOwns(ctxt->dict, name))
5764 		    xmlFree((xmlChar *) name);
5765 		break;
5766 	    }
5767 	    tmp = tmp->next;
5768 	}
5769 	if (tmp == NULL) {
5770 	    cur = xmlCreateEnumeration(name);
5771 	    if (cur == NULL) {
5772                 xmlFreeEnumeration(ret);
5773                 return(NULL);
5774             }
5775 	    if (last == NULL) ret = last = cur;
5776 	    else {
5777 		last->next = cur;
5778 		last = cur;
5779 	    }
5780 	}
5781 	SKIP_BLANKS;
5782     } while (RAW == '|');
5783     if (RAW != ')') {
5784 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5785         xmlFreeEnumeration(ret);
5786 	return(NULL);
5787     }
5788     NEXT;
5789     return(ret);
5790 }
5791 
5792 /**
5793  * xmlParseEnumerationType:
5794  * @ctxt:  an XML parser context
5795  *
5796  * parse an Enumeration attribute type.
5797  *
5798  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5799  *
5800  * [ VC: Enumeration ]
5801  * Values of this type must match one of the Nmtoken tokens in
5802  * the declaration
5803  *
5804  * Returns: the enumeration attribute tree built while parsing
5805  */
5806 
5807 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5808 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5809     xmlChar *name;
5810     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5811 
5812     if (RAW != '(') {
5813 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5814 	return(NULL);
5815     }
5816     SHRINK;
5817     do {
5818         NEXT;
5819 	SKIP_BLANKS;
5820         name = xmlParseNmtoken(ctxt);
5821 	if (name == NULL) {
5822 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5823 	    return(ret);
5824 	}
5825 	tmp = ret;
5826 	while (tmp != NULL) {
5827 	    if (xmlStrEqual(name, tmp->name)) {
5828 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5829 	  "standalone: attribute enumeration value token %s duplicated\n",
5830 				 name, NULL);
5831 		if (!xmlDictOwns(ctxt->dict, name))
5832 		    xmlFree(name);
5833 		break;
5834 	    }
5835 	    tmp = tmp->next;
5836 	}
5837 	if (tmp == NULL) {
5838 	    cur = xmlCreateEnumeration(name);
5839 	    if (!xmlDictOwns(ctxt->dict, name))
5840 		xmlFree(name);
5841 	    if (cur == NULL) {
5842                 xmlFreeEnumeration(ret);
5843                 return(NULL);
5844             }
5845 	    if (last == NULL) ret = last = cur;
5846 	    else {
5847 		last->next = cur;
5848 		last = cur;
5849 	    }
5850 	}
5851 	SKIP_BLANKS;
5852     } while (RAW == '|');
5853     if (RAW != ')') {
5854 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5855 	return(ret);
5856     }
5857     NEXT;
5858     return(ret);
5859 }
5860 
5861 /**
5862  * xmlParseEnumeratedType:
5863  * @ctxt:  an XML parser context
5864  * @tree:  the enumeration tree built while parsing
5865  *
5866  * parse an Enumerated attribute type.
5867  *
5868  * [57] EnumeratedType ::= NotationType | Enumeration
5869  *
5870  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5871  *
5872  *
5873  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5874  */
5875 
5876 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5877 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5878     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5879 	SKIP(8);
5880 	if (SKIP_BLANKS == 0) {
5881 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5882 			   "Space required after 'NOTATION'\n");
5883 	    return(0);
5884 	}
5885 	*tree = xmlParseNotationType(ctxt);
5886 	if (*tree == NULL) return(0);
5887 	return(XML_ATTRIBUTE_NOTATION);
5888     }
5889     *tree = xmlParseEnumerationType(ctxt);
5890     if (*tree == NULL) return(0);
5891     return(XML_ATTRIBUTE_ENUMERATION);
5892 }
5893 
5894 /**
5895  * xmlParseAttributeType:
5896  * @ctxt:  an XML parser context
5897  * @tree:  the enumeration tree built while parsing
5898  *
5899  * parse the Attribute list def for an element
5900  *
5901  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5902  *
5903  * [55] StringType ::= 'CDATA'
5904  *
5905  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5906  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5907  *
5908  * Validity constraints for attribute values syntax are checked in
5909  * xmlValidateAttributeValue()
5910  *
5911  * [ VC: ID ]
5912  * Values of type ID must match the Name production. A name must not
5913  * appear more than once in an XML document as a value of this type;
5914  * i.e., ID values must uniquely identify the elements which bear them.
5915  *
5916  * [ VC: One ID per Element Type ]
5917  * No element type may have more than one ID attribute specified.
5918  *
5919  * [ VC: ID Attribute Default ]
5920  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5921  *
5922  * [ VC: IDREF ]
5923  * Values of type IDREF must match the Name production, and values
5924  * of type IDREFS must match Names; each IDREF Name must match the value
5925  * of an ID attribute on some element in the XML document; i.e. IDREF
5926  * values must match the value of some ID attribute.
5927  *
5928  * [ VC: Entity Name ]
5929  * Values of type ENTITY must match the Name production, values
5930  * of type ENTITIES must match Names; each Entity Name must match the
5931  * name of an unparsed entity declared in the DTD.
5932  *
5933  * [ VC: Name Token ]
5934  * Values of type NMTOKEN must match the Nmtoken production; values
5935  * of type NMTOKENS must match Nmtokens.
5936  *
5937  * Returns the attribute type
5938  */
5939 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5940 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5941     SHRINK;
5942     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5943 	SKIP(5);
5944 	return(XML_ATTRIBUTE_CDATA);
5945      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5946 	SKIP(6);
5947 	return(XML_ATTRIBUTE_IDREFS);
5948      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5949 	SKIP(5);
5950 	return(XML_ATTRIBUTE_IDREF);
5951      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5952         SKIP(2);
5953 	return(XML_ATTRIBUTE_ID);
5954      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5955 	SKIP(6);
5956 	return(XML_ATTRIBUTE_ENTITY);
5957      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5958 	SKIP(8);
5959 	return(XML_ATTRIBUTE_ENTITIES);
5960      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5961 	SKIP(8);
5962 	return(XML_ATTRIBUTE_NMTOKENS);
5963      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5964 	SKIP(7);
5965 	return(XML_ATTRIBUTE_NMTOKEN);
5966      }
5967      return(xmlParseEnumeratedType(ctxt, tree));
5968 }
5969 
5970 /**
5971  * xmlParseAttributeListDecl:
5972  * @ctxt:  an XML parser context
5973  *
5974  * : parse the Attribute list def for an element
5975  *
5976  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5977  *
5978  * [53] AttDef ::= S Name S AttType S DefaultDecl
5979  *
5980  */
5981 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5982 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5983     const xmlChar *elemName;
5984     const xmlChar *attrName;
5985     xmlEnumerationPtr tree;
5986 
5987     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5988 	int inputid = ctxt->input->id;
5989 
5990 	SKIP(9);
5991 	if (SKIP_BLANKS == 0) {
5992 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5993 		                 "Space required after '<!ATTLIST'\n");
5994 	}
5995         elemName = xmlParseName(ctxt);
5996 	if (elemName == NULL) {
5997 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5998 			   "ATTLIST: no name for Element\n");
5999 	    return;
6000 	}
6001 	SKIP_BLANKS;
6002 	GROW;
6003 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6004 	    int type;
6005 	    int def;
6006 	    xmlChar *defaultValue = NULL;
6007 
6008 	    GROW;
6009             tree = NULL;
6010 	    attrName = xmlParseName(ctxt);
6011 	    if (attrName == NULL) {
6012 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6013 			       "ATTLIST: no name for Attribute\n");
6014 		break;
6015 	    }
6016 	    GROW;
6017 	    if (SKIP_BLANKS == 0) {
6018 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6019 		        "Space required after the attribute name\n");
6020 		break;
6021 	    }
6022 
6023 	    type = xmlParseAttributeType(ctxt, &tree);
6024 	    if (type <= 0) {
6025 	        break;
6026 	    }
6027 
6028 	    GROW;
6029 	    if (SKIP_BLANKS == 0) {
6030 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6031 			       "Space required after the attribute type\n");
6032 	        if (tree != NULL)
6033 		    xmlFreeEnumeration(tree);
6034 		break;
6035 	    }
6036 
6037 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6038 	    if (def <= 0) {
6039                 if (defaultValue != NULL)
6040 		    xmlFree(defaultValue);
6041 	        if (tree != NULL)
6042 		    xmlFreeEnumeration(tree);
6043 	        break;
6044 	    }
6045 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6046 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6047 
6048 	    GROW;
6049             if (RAW != '>') {
6050 		if (SKIP_BLANKS == 0) {
6051 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6052 			"Space required after the attribute default value\n");
6053 		    if (defaultValue != NULL)
6054 			xmlFree(defaultValue);
6055 		    if (tree != NULL)
6056 			xmlFreeEnumeration(tree);
6057 		    break;
6058 		}
6059 	    }
6060 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6061 		(ctxt->sax->attributeDecl != NULL))
6062 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6063 	                        type, def, defaultValue, tree);
6064 	    else if (tree != NULL)
6065 		xmlFreeEnumeration(tree);
6066 
6067 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6068 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6069 		(def != XML_ATTRIBUTE_REQUIRED)) {
6070 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6071 	    }
6072 	    if (ctxt->sax2) {
6073 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6074 	    }
6075 	    if (defaultValue != NULL)
6076 	        xmlFree(defaultValue);
6077 	    GROW;
6078 	}
6079 	if (RAW == '>') {
6080 	    if (inputid != ctxt->input->id) {
6081 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6082                                "Attribute list declaration doesn't start and"
6083                                " stop in the same entity\n");
6084 	    }
6085 	    NEXT;
6086 	}
6087     }
6088 }
6089 
6090 /**
6091  * xmlParseElementMixedContentDecl:
6092  * @ctxt:  an XML parser context
6093  * @inputchk:  the input used for the current entity, needed for boundary checks
6094  *
6095  * parse the declaration for a Mixed Element content
6096  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6097  *
6098  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6099  *                '(' S? '#PCDATA' S? ')'
6100  *
6101  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6102  *
6103  * [ VC: No Duplicate Types ]
6104  * The same name must not appear more than once in a single
6105  * mixed-content declaration.
6106  *
6107  * returns: the list of the xmlElementContentPtr describing the element choices
6108  */
6109 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6110 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6111     xmlElementContentPtr ret = NULL, cur = NULL, n;
6112     const xmlChar *elem = NULL;
6113 
6114     GROW;
6115     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6116 	SKIP(7);
6117 	SKIP_BLANKS;
6118 	SHRINK;
6119 	if (RAW == ')') {
6120 	    if (ctxt->input->id != inputchk) {
6121 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6122                                "Element content declaration doesn't start and"
6123                                " stop in the same entity\n");
6124 	    }
6125 	    NEXT;
6126 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6127 	    if (ret == NULL)
6128 	        return(NULL);
6129 	    if (RAW == '*') {
6130 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6131 		NEXT;
6132 	    }
6133 	    return(ret);
6134 	}
6135 	if ((RAW == '(') || (RAW == '|')) {
6136 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6137 	    if (ret == NULL) return(NULL);
6138 	}
6139 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6140 	    NEXT;
6141 	    if (elem == NULL) {
6142 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6143 		if (ret == NULL) {
6144 		    xmlFreeDocElementContent(ctxt->myDoc, cur);
6145                     return(NULL);
6146                 }
6147 		ret->c1 = cur;
6148 		if (cur != NULL)
6149 		    cur->parent = ret;
6150 		cur = ret;
6151 	    } else {
6152 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6153 		if (n == NULL) {
6154 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6155                     return(NULL);
6156                 }
6157 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6158 		if (n->c1 != NULL)
6159 		    n->c1->parent = n;
6160 	        cur->c2 = n;
6161 		if (n != NULL)
6162 		    n->parent = cur;
6163 		cur = n;
6164 	    }
6165 	    SKIP_BLANKS;
6166 	    elem = xmlParseName(ctxt);
6167 	    if (elem == NULL) {
6168 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6169 			"xmlParseElementMixedContentDecl : Name expected\n");
6170 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6171 		return(NULL);
6172 	    }
6173 	    SKIP_BLANKS;
6174 	    GROW;
6175 	}
6176 	if ((RAW == ')') && (NXT(1) == '*')) {
6177 	    if (elem != NULL) {
6178 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6179 		                               XML_ELEMENT_CONTENT_ELEMENT);
6180 		if (cur->c2 != NULL)
6181 		    cur->c2->parent = cur;
6182             }
6183             if (ret != NULL)
6184                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6185 	    if (ctxt->input->id != inputchk) {
6186 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6187                                "Element content declaration doesn't start and"
6188                                " stop in the same entity\n");
6189 	    }
6190 	    SKIP(2);
6191 	} else {
6192 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6193 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6194 	    return(NULL);
6195 	}
6196 
6197     } else {
6198 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6199     }
6200     return(ret);
6201 }
6202 
6203 /**
6204  * xmlParseElementChildrenContentDeclPriv:
6205  * @ctxt:  an XML parser context
6206  * @inputchk:  the input used for the current entity, needed for boundary checks
6207  * @depth: the level of recursion
6208  *
6209  * parse the declaration for a Mixed Element content
6210  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6211  *
6212  *
6213  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6214  *
6215  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6216  *
6217  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6218  *
6219  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6220  *
6221  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6222  * TODO Parameter-entity replacement text must be properly nested
6223  *	with parenthesized groups. That is to say, if either of the
6224  *	opening or closing parentheses in a choice, seq, or Mixed
6225  *	construct is contained in the replacement text for a parameter
6226  *	entity, both must be contained in the same replacement text. For
6227  *	interoperability, if a parameter-entity reference appears in a
6228  *	choice, seq, or Mixed construct, its replacement text should not
6229  *	be empty, and neither the first nor last non-blank character of
6230  *	the replacement text should be a connector (| or ,).
6231  *
6232  * Returns the tree of xmlElementContentPtr describing the element
6233  *          hierarchy.
6234  */
6235 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6236 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6237                                        int depth) {
6238     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6239     const xmlChar *elem;
6240     xmlChar type = 0;
6241 
6242     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6243         (depth >  2048)) {
6244         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6245 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6246                           depth);
6247 	return(NULL);
6248     }
6249     SKIP_BLANKS;
6250     GROW;
6251     if (RAW == '(') {
6252 	int inputid = ctxt->input->id;
6253 
6254         /* Recurse on first child */
6255 	NEXT;
6256 	SKIP_BLANKS;
6257         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6258                                                            depth + 1);
6259         if (cur == NULL)
6260             return(NULL);
6261 	SKIP_BLANKS;
6262 	GROW;
6263     } else {
6264 	elem = xmlParseName(ctxt);
6265 	if (elem == NULL) {
6266 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6267 	    return(NULL);
6268 	}
6269         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6270 	if (cur == NULL) {
6271 	    xmlErrMemory(ctxt, NULL);
6272 	    return(NULL);
6273 	}
6274 	GROW;
6275 	if (RAW == '?') {
6276 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6277 	    NEXT;
6278 	} else if (RAW == '*') {
6279 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6280 	    NEXT;
6281 	} else if (RAW == '+') {
6282 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6283 	    NEXT;
6284 	} else {
6285 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6286 	}
6287 	GROW;
6288     }
6289     SKIP_BLANKS;
6290     SHRINK;
6291     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6292         /*
6293 	 * Each loop we parse one separator and one element.
6294 	 */
6295         if (RAW == ',') {
6296 	    if (type == 0) type = CUR;
6297 
6298 	    /*
6299 	     * Detect "Name | Name , Name" error
6300 	     */
6301 	    else if (type != CUR) {
6302 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6303 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6304 		                  type);
6305 		if ((last != NULL) && (last != ret))
6306 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6307 		if (ret != NULL)
6308 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6309 		return(NULL);
6310 	    }
6311 	    NEXT;
6312 
6313 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6314 	    if (op == NULL) {
6315 		if ((last != NULL) && (last != ret))
6316 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6317 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6318 		return(NULL);
6319 	    }
6320 	    if (last == NULL) {
6321 		op->c1 = ret;
6322 		if (ret != NULL)
6323 		    ret->parent = op;
6324 		ret = cur = op;
6325 	    } else {
6326 	        cur->c2 = op;
6327 		if (op != NULL)
6328 		    op->parent = cur;
6329 		op->c1 = last;
6330 		if (last != NULL)
6331 		    last->parent = op;
6332 		cur =op;
6333 		last = NULL;
6334 	    }
6335 	} else if (RAW == '|') {
6336 	    if (type == 0) type = CUR;
6337 
6338 	    /*
6339 	     * Detect "Name , Name | Name" error
6340 	     */
6341 	    else if (type != CUR) {
6342 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6343 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6344 				  type);
6345 		if ((last != NULL) && (last != ret))
6346 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6347 		if (ret != NULL)
6348 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6349 		return(NULL);
6350 	    }
6351 	    NEXT;
6352 
6353 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6354 	    if (op == NULL) {
6355 		if ((last != NULL) && (last != ret))
6356 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6357 		if (ret != NULL)
6358 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6359 		return(NULL);
6360 	    }
6361 	    if (last == NULL) {
6362 		op->c1 = ret;
6363 		if (ret != NULL)
6364 		    ret->parent = op;
6365 		ret = cur = op;
6366 	    } else {
6367 	        cur->c2 = op;
6368 		if (op != NULL)
6369 		    op->parent = cur;
6370 		op->c1 = last;
6371 		if (last != NULL)
6372 		    last->parent = op;
6373 		cur =op;
6374 		last = NULL;
6375 	    }
6376 	} else {
6377 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6378 	    if ((last != NULL) && (last != ret))
6379 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6380 	    if (ret != NULL)
6381 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6382 	    return(NULL);
6383 	}
6384 	GROW;
6385 	SKIP_BLANKS;
6386 	GROW;
6387 	if (RAW == '(') {
6388 	    int inputid = ctxt->input->id;
6389 	    /* Recurse on second child */
6390 	    NEXT;
6391 	    SKIP_BLANKS;
6392 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6393                                                           depth + 1);
6394             if (last == NULL) {
6395 		if (ret != NULL)
6396 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6397 		return(NULL);
6398             }
6399 	    SKIP_BLANKS;
6400 	} else {
6401 	    elem = xmlParseName(ctxt);
6402 	    if (elem == NULL) {
6403 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6404 		if (ret != NULL)
6405 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6406 		return(NULL);
6407 	    }
6408 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6409 	    if (last == NULL) {
6410 		if (ret != NULL)
6411 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6412 		return(NULL);
6413 	    }
6414 	    if (RAW == '?') {
6415 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6416 		NEXT;
6417 	    } else if (RAW == '*') {
6418 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6419 		NEXT;
6420 	    } else if (RAW == '+') {
6421 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6422 		NEXT;
6423 	    } else {
6424 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6425 	    }
6426 	}
6427 	SKIP_BLANKS;
6428 	GROW;
6429     }
6430     if ((cur != NULL) && (last != NULL)) {
6431         cur->c2 = last;
6432 	if (last != NULL)
6433 	    last->parent = cur;
6434     }
6435     if (ctxt->input->id != inputchk) {
6436 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6437                        "Element content declaration doesn't start and stop in"
6438                        " the same entity\n");
6439     }
6440     NEXT;
6441     if (RAW == '?') {
6442 	if (ret != NULL) {
6443 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6444 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6445 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6446 	    else
6447 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6448 	}
6449 	NEXT;
6450     } else if (RAW == '*') {
6451 	if (ret != NULL) {
6452 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6453 	    cur = ret;
6454 	    /*
6455 	     * Some normalization:
6456 	     * (a | b* | c?)* == (a | b | c)*
6457 	     */
6458 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6459 		if ((cur->c1 != NULL) &&
6460 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6461 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6462 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6463 		if ((cur->c2 != NULL) &&
6464 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6465 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6466 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6467 		cur = cur->c2;
6468 	    }
6469 	}
6470 	NEXT;
6471     } else if (RAW == '+') {
6472 	if (ret != NULL) {
6473 	    int found = 0;
6474 
6475 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6476 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6477 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6478 	    else
6479 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6480 	    /*
6481 	     * Some normalization:
6482 	     * (a | b*)+ == (a | b)*
6483 	     * (a | b?)+ == (a | b)*
6484 	     */
6485 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6486 		if ((cur->c1 != NULL) &&
6487 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6490 		    found = 1;
6491 		}
6492 		if ((cur->c2 != NULL) &&
6493 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6494 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6495 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6496 		    found = 1;
6497 		}
6498 		cur = cur->c2;
6499 	    }
6500 	    if (found)
6501 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6502 	}
6503 	NEXT;
6504     }
6505     return(ret);
6506 }
6507 
6508 /**
6509  * xmlParseElementChildrenContentDecl:
6510  * @ctxt:  an XML parser context
6511  * @inputchk:  the input used for the current entity, needed for boundary checks
6512  *
6513  * parse the declaration for a Mixed Element content
6514  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6515  *
6516  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6517  *
6518  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6519  *
6520  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6521  *
6522  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6523  *
6524  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6525  * TODO Parameter-entity replacement text must be properly nested
6526  *	with parenthesized groups. That is to say, if either of the
6527  *	opening or closing parentheses in a choice, seq, or Mixed
6528  *	construct is contained in the replacement text for a parameter
6529  *	entity, both must be contained in the same replacement text. For
6530  *	interoperability, if a parameter-entity reference appears in a
6531  *	choice, seq, or Mixed construct, its replacement text should not
6532  *	be empty, and neither the first nor last non-blank character of
6533  *	the replacement text should be a connector (| or ,).
6534  *
6535  * Returns the tree of xmlElementContentPtr describing the element
6536  *          hierarchy.
6537  */
6538 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6539 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6540     /* stub left for API/ABI compat */
6541     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6542 }
6543 
6544 /**
6545  * xmlParseElementContentDecl:
6546  * @ctxt:  an XML parser context
6547  * @name:  the name of the element being defined.
6548  * @result:  the Element Content pointer will be stored here if any
6549  *
6550  * parse the declaration for an Element content either Mixed or Children,
6551  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6552  *
6553  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6554  *
6555  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6556  */
6557 
6558 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6559 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6560                            xmlElementContentPtr *result) {
6561 
6562     xmlElementContentPtr tree = NULL;
6563     int inputid = ctxt->input->id;
6564     int res;
6565 
6566     *result = NULL;
6567 
6568     if (RAW != '(') {
6569 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6570 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6571 	return(-1);
6572     }
6573     NEXT;
6574     GROW;
6575     if (ctxt->instate == XML_PARSER_EOF)
6576         return(-1);
6577     SKIP_BLANKS;
6578     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6579         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6580 	res = XML_ELEMENT_TYPE_MIXED;
6581     } else {
6582         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6583 	res = XML_ELEMENT_TYPE_ELEMENT;
6584     }
6585     SKIP_BLANKS;
6586     *result = tree;
6587     return(res);
6588 }
6589 
6590 /**
6591  * xmlParseElementDecl:
6592  * @ctxt:  an XML parser context
6593  *
6594  * parse an Element declaration.
6595  *
6596  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6597  *
6598  * [ VC: Unique Element Type Declaration ]
6599  * No element type may be declared more than once
6600  *
6601  * Returns the type of the element, or -1 in case of error
6602  */
6603 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6604 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6605     const xmlChar *name;
6606     int ret = -1;
6607     xmlElementContentPtr content  = NULL;
6608 
6609     /* GROW; done in the caller */
6610     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6611 	int inputid = ctxt->input->id;
6612 
6613 	SKIP(9);
6614 	if (SKIP_BLANKS == 0) {
6615 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6616 		           "Space required after 'ELEMENT'\n");
6617 	    return(-1);
6618 	}
6619         name = xmlParseName(ctxt);
6620 	if (name == NULL) {
6621 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6622 			   "xmlParseElementDecl: no name for Element\n");
6623 	    return(-1);
6624 	}
6625 	if (SKIP_BLANKS == 0) {
6626 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6627 			   "Space required after the element name\n");
6628 	}
6629 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6630 	    SKIP(5);
6631 	    /*
6632 	     * Element must always be empty.
6633 	     */
6634 	    ret = XML_ELEMENT_TYPE_EMPTY;
6635 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6636 	           (NXT(2) == 'Y')) {
6637 	    SKIP(3);
6638 	    /*
6639 	     * Element is a generic container.
6640 	     */
6641 	    ret = XML_ELEMENT_TYPE_ANY;
6642 	} else if (RAW == '(') {
6643 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6644 	} else {
6645 	    /*
6646 	     * [ WFC: PEs in Internal Subset ] error handling.
6647 	     */
6648 	    if ((RAW == '%') && (ctxt->external == 0) &&
6649 	        (ctxt->inputNr == 1)) {
6650 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6651 	  "PEReference: forbidden within markup decl in internal subset\n");
6652 	    } else {
6653 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6654 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6655             }
6656 	    return(-1);
6657 	}
6658 
6659 	SKIP_BLANKS;
6660 
6661 	if (RAW != '>') {
6662 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6663 	    if (content != NULL) {
6664 		xmlFreeDocElementContent(ctxt->myDoc, content);
6665 	    }
6666 	} else {
6667 	    if (inputid != ctxt->input->id) {
6668 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6669                                "Element declaration doesn't start and stop in"
6670                                " the same entity\n");
6671 	    }
6672 
6673 	    NEXT;
6674 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6675 		(ctxt->sax->elementDecl != NULL)) {
6676 		if (content != NULL)
6677 		    content->parent = NULL;
6678 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6679 		                       content);
6680 		if ((content != NULL) && (content->parent == NULL)) {
6681 		    /*
6682 		     * this is a trick: if xmlAddElementDecl is called,
6683 		     * instead of copying the full tree it is plugged directly
6684 		     * if called from the parser. Avoid duplicating the
6685 		     * interfaces or change the API/ABI
6686 		     */
6687 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6688 		}
6689 	    } else if (content != NULL) {
6690 		xmlFreeDocElementContent(ctxt->myDoc, content);
6691 	    }
6692 	}
6693     }
6694     return(ret);
6695 }
6696 
6697 /**
6698  * xmlParseConditionalSections
6699  * @ctxt:  an XML parser context
6700  *
6701  * [61] conditionalSect ::= includeSect | ignoreSect
6702  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6703  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6704  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6705  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6706  */
6707 
6708 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6709 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6710     int *inputIds = NULL;
6711     size_t inputIdsSize = 0;
6712     size_t depth = 0;
6713 
6714     while (ctxt->instate != XML_PARSER_EOF) {
6715         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6716             int id = ctxt->input->id;
6717 
6718             SKIP(3);
6719             SKIP_BLANKS;
6720 
6721             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6722                 SKIP(7);
6723                 SKIP_BLANKS;
6724                 if (RAW != '[') {
6725                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6726                     xmlHaltParser(ctxt);
6727                     goto error;
6728                 }
6729                 if (ctxt->input->id != id) {
6730                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6731                                    "All markup of the conditional section is"
6732                                    " not in the same entity\n");
6733                 }
6734                 NEXT;
6735 
6736                 if (inputIdsSize <= depth) {
6737                     int *tmp;
6738 
6739                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6740                     tmp = (int *) xmlRealloc(inputIds,
6741                             inputIdsSize * sizeof(int));
6742                     if (tmp == NULL) {
6743                         xmlErrMemory(ctxt, NULL);
6744                         goto error;
6745                     }
6746                     inputIds = tmp;
6747                 }
6748                 inputIds[depth] = id;
6749                 depth++;
6750             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6751                 int state;
6752                 xmlParserInputState instate;
6753                 size_t ignoreDepth = 0;
6754 
6755                 SKIP(6);
6756                 SKIP_BLANKS;
6757                 if (RAW != '[') {
6758                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6759                     xmlHaltParser(ctxt);
6760                     goto error;
6761                 }
6762                 if (ctxt->input->id != id) {
6763                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6764                                    "All markup of the conditional section is"
6765                                    " not in the same entity\n");
6766                 }
6767                 NEXT;
6768 
6769                 /*
6770                  * Parse up to the end of the conditional section but disable
6771                  * SAX event generating DTD building in the meantime
6772                  */
6773                 state = ctxt->disableSAX;
6774                 instate = ctxt->instate;
6775                 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6776                 ctxt->instate = XML_PARSER_IGNORE;
6777 
6778                 while (RAW != 0) {
6779                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6780                         SKIP(3);
6781                         ignoreDepth++;
6782                         /* Check for integer overflow */
6783                         if (ignoreDepth == 0) {
6784                             xmlErrMemory(ctxt, NULL);
6785                             goto error;
6786                         }
6787                     } else if ((RAW == ']') && (NXT(1) == ']') &&
6788                                (NXT(2) == '>')) {
6789                         if (ignoreDepth == 0)
6790                             break;
6791                         SKIP(3);
6792                         ignoreDepth--;
6793                     } else {
6794                         NEXT;
6795                     }
6796                 }
6797 
6798                 ctxt->disableSAX = state;
6799                 ctxt->instate = instate;
6800 
6801 		if (RAW == 0) {
6802 		    xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6803                     goto error;
6804 		}
6805                 if (ctxt->input->id != id) {
6806                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807                                    "All markup of the conditional section is"
6808                                    " not in the same entity\n");
6809                 }
6810                 SKIP(3);
6811             } else {
6812                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6813                 xmlHaltParser(ctxt);
6814                 goto error;
6815             }
6816         } else if ((depth > 0) &&
6817                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6818             depth--;
6819             if (ctxt->input->id != inputIds[depth]) {
6820                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6821                                "All markup of the conditional section is not"
6822                                " in the same entity\n");
6823             }
6824             SKIP(3);
6825         } else {
6826             const xmlChar *check = CUR_PTR;
6827             unsigned int cons = ctxt->input->consumed;
6828 
6829             xmlParseMarkupDecl(ctxt);
6830 
6831             if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6832                 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6833                 xmlHaltParser(ctxt);
6834                 goto error;
6835             }
6836         }
6837 
6838         if (depth == 0)
6839             break;
6840 
6841         SKIP_BLANKS;
6842         GROW;
6843     }
6844 
6845 error:
6846     xmlFree(inputIds);
6847 }
6848 
6849 /**
6850  * xmlParseMarkupDecl:
6851  * @ctxt:  an XML parser context
6852  *
6853  * parse Markup declarations
6854  *
6855  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6856  *                     NotationDecl | PI | Comment
6857  *
6858  * [ VC: Proper Declaration/PE Nesting ]
6859  * Parameter-entity replacement text must be properly nested with
6860  * markup declarations. That is to say, if either the first character
6861  * or the last character of a markup declaration (markupdecl above) is
6862  * contained in the replacement text for a parameter-entity reference,
6863  * both must be contained in the same replacement text.
6864  *
6865  * [ WFC: PEs in Internal Subset ]
6866  * In the internal DTD subset, parameter-entity references can occur
6867  * only where markup declarations can occur, not within markup declarations.
6868  * (This does not apply to references that occur in external parameter
6869  * entities or to the external subset.)
6870  */
6871 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6872 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6873     GROW;
6874     if (CUR == '<') {
6875         if (NXT(1) == '!') {
6876 	    switch (NXT(2)) {
6877 	        case 'E':
6878 		    if (NXT(3) == 'L')
6879 			xmlParseElementDecl(ctxt);
6880 		    else if (NXT(3) == 'N')
6881 			xmlParseEntityDecl(ctxt);
6882 		    break;
6883 	        case 'A':
6884 		    xmlParseAttributeListDecl(ctxt);
6885 		    break;
6886 	        case 'N':
6887 		    xmlParseNotationDecl(ctxt);
6888 		    break;
6889 	        case '-':
6890 		    xmlParseComment(ctxt);
6891 		    break;
6892 		default:
6893 		    /* there is an error but it will be detected later */
6894 		    break;
6895 	    }
6896 	} else if (NXT(1) == '?') {
6897 	    xmlParsePI(ctxt);
6898 	}
6899     }
6900 
6901     /*
6902      * detect requirement to exit there and act accordingly
6903      * and avoid having instate overridden later on
6904      */
6905     if (ctxt->instate == XML_PARSER_EOF)
6906         return;
6907 
6908     ctxt->instate = XML_PARSER_DTD;
6909 }
6910 
6911 /**
6912  * xmlParseTextDecl:
6913  * @ctxt:  an XML parser context
6914  *
6915  * parse an XML declaration header for external entities
6916  *
6917  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6918  */
6919 
6920 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6921 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6922     xmlChar *version;
6923     const xmlChar *encoding;
6924     int oldstate;
6925 
6926     /*
6927      * We know that '<?xml' is here.
6928      */
6929     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6930 	SKIP(5);
6931     } else {
6932 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6933 	return;
6934     }
6935 
6936     /* Avoid expansion of parameter entities when skipping blanks. */
6937     oldstate = ctxt->instate;
6938     ctxt->instate = XML_PARSER_START;
6939 
6940     if (SKIP_BLANKS == 0) {
6941 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6942 		       "Space needed after '<?xml'\n");
6943     }
6944 
6945     /*
6946      * We may have the VersionInfo here.
6947      */
6948     version = xmlParseVersionInfo(ctxt);
6949     if (version == NULL)
6950 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6951     else {
6952 	if (SKIP_BLANKS == 0) {
6953 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6954 		           "Space needed here\n");
6955 	}
6956     }
6957     ctxt->input->version = version;
6958 
6959     /*
6960      * We must have the encoding declaration
6961      */
6962     encoding = xmlParseEncodingDecl(ctxt);
6963     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6964 	/*
6965 	 * The XML REC instructs us to stop parsing right here
6966 	 */
6967         ctxt->instate = oldstate;
6968         return;
6969     }
6970     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6971 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6972 		       "Missing encoding in text declaration\n");
6973     }
6974 
6975     SKIP_BLANKS;
6976     if ((RAW == '?') && (NXT(1) == '>')) {
6977         SKIP(2);
6978     } else if (RAW == '>') {
6979         /* Deprecated old WD ... */
6980 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6981 	NEXT;
6982     } else {
6983 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6984 	MOVETO_ENDTAG(CUR_PTR);
6985 	NEXT;
6986     }
6987 
6988     ctxt->instate = oldstate;
6989 }
6990 
6991 /**
6992  * xmlParseExternalSubset:
6993  * @ctxt:  an XML parser context
6994  * @ExternalID: the external identifier
6995  * @SystemID: the system identifier (or URL)
6996  *
6997  * parse Markup declarations from an external subset
6998  *
6999  * [30] extSubset ::= textDecl? extSubsetDecl
7000  *
7001  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7002  */
7003 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7004 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7005                        const xmlChar *SystemID) {
7006     xmlDetectSAX2(ctxt);
7007     GROW;
7008 
7009     if ((ctxt->encoding == NULL) &&
7010         (ctxt->input->end - ctxt->input->cur >= 4)) {
7011         xmlChar start[4];
7012 	xmlCharEncoding enc;
7013 
7014 	start[0] = RAW;
7015 	start[1] = NXT(1);
7016 	start[2] = NXT(2);
7017 	start[3] = NXT(3);
7018 	enc = xmlDetectCharEncoding(start, 4);
7019 	if (enc != XML_CHAR_ENCODING_NONE)
7020 	    xmlSwitchEncoding(ctxt, enc);
7021     }
7022 
7023     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7024 	xmlParseTextDecl(ctxt);
7025 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7026 	    /*
7027 	     * The XML REC instructs us to stop parsing right here
7028 	     */
7029 	    xmlHaltParser(ctxt);
7030 	    return;
7031 	}
7032     }
7033     if (ctxt->myDoc == NULL) {
7034         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7035 	if (ctxt->myDoc == NULL) {
7036 	    xmlErrMemory(ctxt, "New Doc failed");
7037 	    return;
7038 	}
7039 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7040     }
7041     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7042         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7043 
7044     ctxt->instate = XML_PARSER_DTD;
7045     ctxt->external = 1;
7046     SKIP_BLANKS;
7047     while (((RAW == '<') && (NXT(1) == '?')) ||
7048            ((RAW == '<') && (NXT(1) == '!')) ||
7049 	   (RAW == '%')) {
7050 	const xmlChar *check = CUR_PTR;
7051 	unsigned int cons = ctxt->input->consumed;
7052 
7053 	GROW;
7054         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7055 	    xmlParseConditionalSections(ctxt);
7056 	} else
7057 	    xmlParseMarkupDecl(ctxt);
7058         SKIP_BLANKS;
7059 
7060 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7061 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7062 	    break;
7063 	}
7064     }
7065 
7066     if (RAW != 0) {
7067 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7068     }
7069 
7070 }
7071 
7072 /**
7073  * xmlParseReference:
7074  * @ctxt:  an XML parser context
7075  *
7076  * parse and handle entity references in content, depending on the SAX
7077  * interface, this may end-up in a call to character() if this is a
7078  * CharRef, a predefined entity, if there is no reference() callback.
7079  * or if the parser was asked to switch to that mode.
7080  *
7081  * [67] Reference ::= EntityRef | CharRef
7082  */
7083 void
xmlParseReference(xmlParserCtxtPtr ctxt)7084 xmlParseReference(xmlParserCtxtPtr ctxt) {
7085     xmlEntityPtr ent;
7086     xmlChar *val;
7087     int was_checked;
7088     xmlNodePtr list = NULL;
7089     xmlParserErrors ret = XML_ERR_OK;
7090 
7091 
7092     if (RAW != '&')
7093         return;
7094 
7095     /*
7096      * Simple case of a CharRef
7097      */
7098     if (NXT(1) == '#') {
7099 	int i = 0;
7100 	xmlChar out[16];
7101 	int hex = NXT(2);
7102 	int value = xmlParseCharRef(ctxt);
7103 
7104 	if (value == 0)
7105 	    return;
7106 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7107 	    /*
7108 	     * So we are using non-UTF-8 buffers
7109 	     * Check that the char fit on 8bits, if not
7110 	     * generate a CharRef.
7111 	     */
7112 	    if (value <= 0xFF) {
7113 		out[0] = value;
7114 		out[1] = 0;
7115 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7116 		    (!ctxt->disableSAX))
7117 		    ctxt->sax->characters(ctxt->userData, out, 1);
7118 	    } else {
7119 		if ((hex == 'x') || (hex == 'X'))
7120 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7121 		else
7122 		    snprintf((char *)out, sizeof(out), "#%d", value);
7123 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7124 		    (!ctxt->disableSAX))
7125 		    ctxt->sax->reference(ctxt->userData, out);
7126 	    }
7127 	} else {
7128 	    /*
7129 	     * Just encode the value in UTF-8
7130 	     */
7131 	    COPY_BUF(0 ,out, i, value);
7132 	    out[i] = 0;
7133 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7134 		(!ctxt->disableSAX))
7135 		ctxt->sax->characters(ctxt->userData, out, i);
7136 	}
7137 	return;
7138     }
7139 
7140     /*
7141      * We are seeing an entity reference
7142      */
7143     ent = xmlParseEntityRef(ctxt);
7144     if (ent == NULL) return;
7145     if (!ctxt->wellFormed)
7146 	return;
7147     was_checked = ent->checked;
7148 
7149     /* special case of predefined entities */
7150     if ((ent->name == NULL) ||
7151         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7152 	val = ent->content;
7153 	if (val == NULL) return;
7154 	/*
7155 	 * inline the entity.
7156 	 */
7157 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7158 	    (!ctxt->disableSAX))
7159 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7160 	return;
7161     }
7162 
7163     /*
7164      * The first reference to the entity trigger a parsing phase
7165      * where the ent->children is filled with the result from
7166      * the parsing.
7167      * Note: external parsed entities will not be loaded, it is not
7168      * required for a non-validating parser, unless the parsing option
7169      * of validating, or substituting entities were given. Doing so is
7170      * far more secure as the parser will only process data coming from
7171      * the document entity by default.
7172      */
7173     if (((ent->checked == 0) ||
7174          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7175         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7176          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7177 	unsigned long oldnbent = ctxt->nbentities, diff;
7178 
7179 	/*
7180 	 * This is a bit hackish but this seems the best
7181 	 * way to make sure both SAX and DOM entity support
7182 	 * behaves okay.
7183 	 */
7184 	void *user_data;
7185 	if (ctxt->userData == ctxt)
7186 	    user_data = NULL;
7187 	else
7188 	    user_data = ctxt->userData;
7189 
7190 	/*
7191 	 * Check that this entity is well formed
7192 	 * 4.3.2: An internal general parsed entity is well-formed
7193 	 * if its replacement text matches the production labeled
7194 	 * content.
7195 	 */
7196 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7197 	    ctxt->depth++;
7198 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7199 	                                              user_data, &list);
7200 	    ctxt->depth--;
7201 
7202 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7203 	    ctxt->depth++;
7204 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7205 	                                   user_data, ctxt->depth, ent->URI,
7206 					   ent->ExternalID, &list);
7207 	    ctxt->depth--;
7208 	} else {
7209 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7210 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7211 			 "invalid entity type found\n", NULL);
7212 	}
7213 
7214 	/*
7215 	 * Store the number of entities needing parsing for this entity
7216 	 * content and do checkings
7217 	 */
7218         diff = ctxt->nbentities - oldnbent + 1;
7219         if (diff > INT_MAX / 2)
7220             diff = INT_MAX / 2;
7221         ent->checked = diff * 2;
7222 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7223 	    ent->checked |= 1;
7224 	if (ret == XML_ERR_ENTITY_LOOP) {
7225 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7226             xmlHaltParser(ctxt);
7227 	    xmlFreeNodeList(list);
7228 	    return;
7229 	}
7230 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7231 	    xmlFreeNodeList(list);
7232 	    return;
7233 	}
7234 
7235 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7236 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7237 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7238 		(ent->children == NULL)) {
7239 		ent->children = list;
7240                 /*
7241                  * Prune it directly in the generated document
7242                  * except for single text nodes.
7243                  */
7244                 if ((ctxt->replaceEntities == 0) ||
7245                     (ctxt->parseMode == XML_PARSE_READER) ||
7246                     ((list->type == XML_TEXT_NODE) &&
7247                      (list->next == NULL))) {
7248                     ent->owner = 1;
7249                     while (list != NULL) {
7250                         list->parent = (xmlNodePtr) ent;
7251                         xmlSetTreeDoc(list, ent->doc);
7252                         if (list->next == NULL)
7253                             ent->last = list;
7254                         list = list->next;
7255                     }
7256                     list = NULL;
7257                 } else {
7258                     ent->owner = 0;
7259                     while (list != NULL) {
7260                         list->parent = (xmlNodePtr) ctxt->node;
7261                         list->doc = ctxt->myDoc;
7262                         if (list->next == NULL)
7263                             ent->last = list;
7264                         list = list->next;
7265                     }
7266                     list = ent->children;
7267 #ifdef LIBXML_LEGACY_ENABLED
7268                     if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7269                         xmlAddEntityReference(ent, list, NULL);
7270 #endif /* LIBXML_LEGACY_ENABLED */
7271                 }
7272 	    } else {
7273 		xmlFreeNodeList(list);
7274 		list = NULL;
7275 	    }
7276 	} else if ((ret != XML_ERR_OK) &&
7277 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7278 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7279 		     "Entity '%s' failed to parse\n", ent->name);
7280             if (ent->content != NULL)
7281                 ent->content[0] = 0;
7282 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7283 	} else if (list != NULL) {
7284 	    xmlFreeNodeList(list);
7285 	    list = NULL;
7286 	}
7287 	if (ent->checked == 0)
7288 	    ent->checked = 2;
7289 
7290         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7291         was_checked = 0;
7292     } else if (ent->checked != 1) {
7293 	ctxt->nbentities += ent->checked / 2;
7294     }
7295 
7296     /*
7297      * Now that the entity content has been gathered
7298      * provide it to the application, this can take different forms based
7299      * on the parsing modes.
7300      */
7301     if (ent->children == NULL) {
7302 	/*
7303 	 * Probably running in SAX mode and the callbacks don't
7304 	 * build the entity content. So unless we already went
7305 	 * though parsing for first checking go though the entity
7306 	 * content to generate callbacks associated to the entity
7307 	 */
7308 	if (was_checked != 0) {
7309 	    void *user_data;
7310 	    /*
7311 	     * This is a bit hackish but this seems the best
7312 	     * way to make sure both SAX and DOM entity support
7313 	     * behaves okay.
7314 	     */
7315 	    if (ctxt->userData == ctxt)
7316 		user_data = NULL;
7317 	    else
7318 		user_data = ctxt->userData;
7319 
7320 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7321 		ctxt->depth++;
7322 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7323 				   ent->content, user_data, NULL);
7324 		ctxt->depth--;
7325 	    } else if (ent->etype ==
7326 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7327 		ctxt->depth++;
7328 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7329 			   ctxt->sax, user_data, ctxt->depth,
7330 			   ent->URI, ent->ExternalID, NULL);
7331 		ctxt->depth--;
7332 	    } else {
7333 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7334 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7335 			     "invalid entity type found\n", NULL);
7336 	    }
7337 	    if (ret == XML_ERR_ENTITY_LOOP) {
7338 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7339 		return;
7340 	    }
7341 	}
7342 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7343 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7344 	    /*
7345 	     * Entity reference callback comes second, it's somewhat
7346 	     * superfluous but a compatibility to historical behaviour
7347 	     */
7348 	    ctxt->sax->reference(ctxt->userData, ent->name);
7349 	}
7350 	return;
7351     }
7352 
7353     /*
7354      * If we didn't get any children for the entity being built
7355      */
7356     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7357 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7358 	/*
7359 	 * Create a node.
7360 	 */
7361 	ctxt->sax->reference(ctxt->userData, ent->name);
7362 	return;
7363     }
7364 
7365     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7366 	/*
7367 	 * There is a problem on the handling of _private for entities
7368 	 * (bug 155816): Should we copy the content of the field from
7369 	 * the entity (possibly overwriting some value set by the user
7370 	 * when a copy is created), should we leave it alone, or should
7371 	 * we try to take care of different situations?  The problem
7372 	 * is exacerbated by the usage of this field by the xmlReader.
7373 	 * To fix this bug, we look at _private on the created node
7374 	 * and, if it's NULL, we copy in whatever was in the entity.
7375 	 * If it's not NULL we leave it alone.  This is somewhat of a
7376 	 * hack - maybe we should have further tests to determine
7377 	 * what to do.
7378 	 */
7379 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7380 	    /*
7381 	     * Seems we are generating the DOM content, do
7382 	     * a simple tree copy for all references except the first
7383 	     * In the first occurrence list contains the replacement.
7384 	     */
7385 	    if (((list == NULL) && (ent->owner == 0)) ||
7386 		(ctxt->parseMode == XML_PARSE_READER)) {
7387 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7388 
7389 		/*
7390 		 * We are copying here, make sure there is no abuse
7391 		 */
7392 		ctxt->sizeentcopy += ent->length + 5;
7393 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7394 		    return;
7395 
7396 		/*
7397 		 * when operating on a reader, the entities definitions
7398 		 * are always owning the entities subtree.
7399 		if (ctxt->parseMode == XML_PARSE_READER)
7400 		    ent->owner = 1;
7401 		 */
7402 
7403 		cur = ent->children;
7404 		while (cur != NULL) {
7405 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7406 		    if (nw != NULL) {
7407 			if (nw->_private == NULL)
7408 			    nw->_private = cur->_private;
7409 			if (firstChild == NULL){
7410 			    firstChild = nw;
7411 			}
7412 			nw = xmlAddChild(ctxt->node, nw);
7413 		    }
7414 		    if (cur == ent->last) {
7415 			/*
7416 			 * needed to detect some strange empty
7417 			 * node cases in the reader tests
7418 			 */
7419 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7420 			    (nw != NULL) &&
7421 			    (nw->type == XML_ELEMENT_NODE) &&
7422 			    (nw->children == NULL))
7423 			    nw->extra = 1;
7424 
7425 			break;
7426 		    }
7427 		    cur = cur->next;
7428 		}
7429 #ifdef LIBXML_LEGACY_ENABLED
7430 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7431 		  xmlAddEntityReference(ent, firstChild, nw);
7432 #endif /* LIBXML_LEGACY_ENABLED */
7433 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7434 		xmlNodePtr nw = NULL, cur, next, last,
7435 			   firstChild = NULL;
7436 
7437 		/*
7438 		 * We are copying here, make sure there is no abuse
7439 		 */
7440 		ctxt->sizeentcopy += ent->length + 5;
7441 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7442 		    return;
7443 
7444 		/*
7445 		 * Copy the entity child list and make it the new
7446 		 * entity child list. The goal is to make sure any
7447 		 * ID or REF referenced will be the one from the
7448 		 * document content and not the entity copy.
7449 		 */
7450 		cur = ent->children;
7451 		ent->children = NULL;
7452 		last = ent->last;
7453 		ent->last = NULL;
7454 		while (cur != NULL) {
7455 		    next = cur->next;
7456 		    cur->next = NULL;
7457 		    cur->parent = NULL;
7458 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7459 		    if (nw != NULL) {
7460 			if (nw->_private == NULL)
7461 			    nw->_private = cur->_private;
7462 			if (firstChild == NULL){
7463 			    firstChild = cur;
7464 			}
7465 			xmlAddChild((xmlNodePtr) ent, nw);
7466 			xmlAddChild(ctxt->node, cur);
7467 		    }
7468 		    if (cur == last)
7469 			break;
7470 		    cur = next;
7471 		}
7472 		if (ent->owner == 0)
7473 		    ent->owner = 1;
7474 #ifdef LIBXML_LEGACY_ENABLED
7475 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7476 		  xmlAddEntityReference(ent, firstChild, nw);
7477 #endif /* LIBXML_LEGACY_ENABLED */
7478 	    } else {
7479 		const xmlChar *nbktext;
7480 
7481 		/*
7482 		 * the name change is to avoid coalescing of the
7483 		 * node with a possible previous text one which
7484 		 * would make ent->children a dangling pointer
7485 		 */
7486 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7487 					-1);
7488 		if (ent->children->type == XML_TEXT_NODE)
7489 		    ent->children->name = nbktext;
7490 		if ((ent->last != ent->children) &&
7491 		    (ent->last->type == XML_TEXT_NODE))
7492 		    ent->last->name = nbktext;
7493 		xmlAddChildList(ctxt->node, ent->children);
7494 	    }
7495 
7496 	    /*
7497 	     * This is to avoid a nasty side effect, see
7498 	     * characters() in SAX.c
7499 	     */
7500 	    ctxt->nodemem = 0;
7501 	    ctxt->nodelen = 0;
7502 	    return;
7503 	}
7504     }
7505 }
7506 
7507 /**
7508  * xmlParseEntityRef:
7509  * @ctxt:  an XML parser context
7510  *
7511  * parse ENTITY references declarations
7512  *
7513  * [68] EntityRef ::= '&' Name ';'
7514  *
7515  * [ WFC: Entity Declared ]
7516  * In a document without any DTD, a document with only an internal DTD
7517  * subset which contains no parameter entity references, or a document
7518  * with "standalone='yes'", the Name given in the entity reference
7519  * must match that in an entity declaration, except that well-formed
7520  * documents need not declare any of the following entities: amp, lt,
7521  * gt, apos, quot.  The declaration of a parameter entity must precede
7522  * any reference to it.  Similarly, the declaration of a general entity
7523  * must precede any reference to it which appears in a default value in an
7524  * attribute-list declaration. Note that if entities are declared in the
7525  * external subset or in external parameter entities, a non-validating
7526  * processor is not obligated to read and process their declarations;
7527  * for such documents, the rule that an entity must be declared is a
7528  * well-formedness constraint only if standalone='yes'.
7529  *
7530  * [ WFC: Parsed Entity ]
7531  * An entity reference must not contain the name of an unparsed entity
7532  *
7533  * Returns the xmlEntityPtr if found, or NULL otherwise.
7534  */
7535 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7536 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7537     const xmlChar *name;
7538     xmlEntityPtr ent = NULL;
7539 
7540     GROW;
7541     if (ctxt->instate == XML_PARSER_EOF)
7542         return(NULL);
7543 
7544     if (RAW != '&')
7545         return(NULL);
7546     NEXT;
7547     name = xmlParseName(ctxt);
7548     if (name == NULL) {
7549 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7550 		       "xmlParseEntityRef: no name\n");
7551         return(NULL);
7552     }
7553     if (RAW != ';') {
7554 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7555 	return(NULL);
7556     }
7557     NEXT;
7558 
7559     /*
7560      * Predefined entities override any extra definition
7561      */
7562     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7563         ent = xmlGetPredefinedEntity(name);
7564         if (ent != NULL)
7565             return(ent);
7566     }
7567 
7568     /*
7569      * Increase the number of entity references parsed
7570      */
7571     ctxt->nbentities++;
7572 
7573     /*
7574      * Ask first SAX for entity resolution, otherwise try the
7575      * entities which may have stored in the parser context.
7576      */
7577     if (ctxt->sax != NULL) {
7578 	if (ctxt->sax->getEntity != NULL)
7579 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7580 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7581 	    (ctxt->options & XML_PARSE_OLDSAX))
7582 	    ent = xmlGetPredefinedEntity(name);
7583 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7584 	    (ctxt->userData==ctxt)) {
7585 	    ent = xmlSAX2GetEntity(ctxt, name);
7586 	}
7587     }
7588     if (ctxt->instate == XML_PARSER_EOF)
7589 	return(NULL);
7590     /*
7591      * [ WFC: Entity Declared ]
7592      * In a document without any DTD, a document with only an
7593      * internal DTD subset which contains no parameter entity
7594      * references, or a document with "standalone='yes'", the
7595      * Name given in the entity reference must match that in an
7596      * entity declaration, except that well-formed documents
7597      * need not declare any of the following entities: amp, lt,
7598      * gt, apos, quot.
7599      * The declaration of a parameter entity must precede any
7600      * reference to it.
7601      * Similarly, the declaration of a general entity must
7602      * precede any reference to it which appears in a default
7603      * value in an attribute-list declaration. Note that if
7604      * entities are declared in the external subset or in
7605      * external parameter entities, a non-validating processor
7606      * is not obligated to read and process their declarations;
7607      * for such documents, the rule that an entity must be
7608      * declared is a well-formedness constraint only if
7609      * standalone='yes'.
7610      */
7611     if (ent == NULL) {
7612 	if ((ctxt->standalone == 1) ||
7613 	    ((ctxt->hasExternalSubset == 0) &&
7614 	     (ctxt->hasPErefs == 0))) {
7615 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7616 		     "Entity '%s' not defined\n", name);
7617 	} else {
7618 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7619 		     "Entity '%s' not defined\n", name);
7620 	    if ((ctxt->inSubset == 0) &&
7621 		(ctxt->sax != NULL) &&
7622 		(ctxt->sax->reference != NULL)) {
7623 		ctxt->sax->reference(ctxt->userData, name);
7624 	    }
7625 	}
7626 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7627 	ctxt->valid = 0;
7628     }
7629 
7630     /*
7631      * [ WFC: Parsed Entity ]
7632      * An entity reference must not contain the name of an
7633      * unparsed entity
7634      */
7635     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7636 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7637 		 "Entity reference to unparsed entity %s\n", name);
7638     }
7639 
7640     /*
7641      * [ WFC: No External Entity References ]
7642      * Attribute values cannot contain direct or indirect
7643      * entity references to external entities.
7644      */
7645     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7646 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7647 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7648 	     "Attribute references external entity '%s'\n", name);
7649     }
7650     /*
7651      * [ WFC: No < in Attribute Values ]
7652      * The replacement text of any entity referred to directly or
7653      * indirectly in an attribute value (other than "&lt;") must
7654      * not contain a <.
7655      */
7656     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7657 	     (ent != NULL) &&
7658 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7659 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7660 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7661 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7662 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7663         }
7664     }
7665 
7666     /*
7667      * Internal check, no parameter entities here ...
7668      */
7669     else {
7670 	switch (ent->etype) {
7671 	    case XML_INTERNAL_PARAMETER_ENTITY:
7672 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7673 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7674 	     "Attempt to reference the parameter entity '%s'\n",
7675 			      name);
7676 	    break;
7677 	    default:
7678 	    break;
7679 	}
7680     }
7681 
7682     /*
7683      * [ WFC: No Recursion ]
7684      * A parsed entity must not contain a recursive reference
7685      * to itself, either directly or indirectly.
7686      * Done somewhere else
7687      */
7688     return(ent);
7689 }
7690 
7691 /**
7692  * xmlParseStringEntityRef:
7693  * @ctxt:  an XML parser context
7694  * @str:  a pointer to an index in the string
7695  *
7696  * parse ENTITY references declarations, but this version parses it from
7697  * a string value.
7698  *
7699  * [68] EntityRef ::= '&' Name ';'
7700  *
7701  * [ WFC: Entity Declared ]
7702  * In a document without any DTD, a document with only an internal DTD
7703  * subset which contains no parameter entity references, or a document
7704  * with "standalone='yes'", the Name given in the entity reference
7705  * must match that in an entity declaration, except that well-formed
7706  * documents need not declare any of the following entities: amp, lt,
7707  * gt, apos, quot.  The declaration of a parameter entity must precede
7708  * any reference to it.  Similarly, the declaration of a general entity
7709  * must precede any reference to it which appears in a default value in an
7710  * attribute-list declaration. Note that if entities are declared in the
7711  * external subset or in external parameter entities, a non-validating
7712  * processor is not obligated to read and process their declarations;
7713  * for such documents, the rule that an entity must be declared is a
7714  * well-formedness constraint only if standalone='yes'.
7715  *
7716  * [ WFC: Parsed Entity ]
7717  * An entity reference must not contain the name of an unparsed entity
7718  *
7719  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7720  * is updated to the current location in the string.
7721  */
7722 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7723 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7724     xmlChar *name;
7725     const xmlChar *ptr;
7726     xmlChar cur;
7727     xmlEntityPtr ent = NULL;
7728 
7729     if ((str == NULL) || (*str == NULL))
7730         return(NULL);
7731     ptr = *str;
7732     cur = *ptr;
7733     if (cur != '&')
7734 	return(NULL);
7735 
7736     ptr++;
7737     name = xmlParseStringName(ctxt, &ptr);
7738     if (name == NULL) {
7739 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7740 		       "xmlParseStringEntityRef: no name\n");
7741 	*str = ptr;
7742 	return(NULL);
7743     }
7744     if (*ptr != ';') {
7745 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7746         xmlFree(name);
7747 	*str = ptr;
7748 	return(NULL);
7749     }
7750     ptr++;
7751 
7752 
7753     /*
7754      * Predefined entities override any extra definition
7755      */
7756     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7757         ent = xmlGetPredefinedEntity(name);
7758         if (ent != NULL) {
7759             xmlFree(name);
7760             *str = ptr;
7761             return(ent);
7762         }
7763     }
7764 
7765     /*
7766      * Increase the number of entity references parsed
7767      */
7768     ctxt->nbentities++;
7769 
7770     /*
7771      * Ask first SAX for entity resolution, otherwise try the
7772      * entities which may have stored in the parser context.
7773      */
7774     if (ctxt->sax != NULL) {
7775 	if (ctxt->sax->getEntity != NULL)
7776 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7777 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7778 	    ent = xmlGetPredefinedEntity(name);
7779 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7780 	    ent = xmlSAX2GetEntity(ctxt, name);
7781 	}
7782     }
7783     if (ctxt->instate == XML_PARSER_EOF) {
7784 	xmlFree(name);
7785 	return(NULL);
7786     }
7787 
7788     /*
7789      * [ WFC: Entity Declared ]
7790      * In a document without any DTD, a document with only an
7791      * internal DTD subset which contains no parameter entity
7792      * references, or a document with "standalone='yes'", the
7793      * Name given in the entity reference must match that in an
7794      * entity declaration, except that well-formed documents
7795      * need not declare any of the following entities: amp, lt,
7796      * gt, apos, quot.
7797      * The declaration of a parameter entity must precede any
7798      * reference to it.
7799      * Similarly, the declaration of a general entity must
7800      * precede any reference to it which appears in a default
7801      * value in an attribute-list declaration. Note that if
7802      * entities are declared in the external subset or in
7803      * external parameter entities, a non-validating processor
7804      * is not obligated to read and process their declarations;
7805      * for such documents, the rule that an entity must be
7806      * declared is a well-formedness constraint only if
7807      * standalone='yes'.
7808      */
7809     if (ent == NULL) {
7810 	if ((ctxt->standalone == 1) ||
7811 	    ((ctxt->hasExternalSubset == 0) &&
7812 	     (ctxt->hasPErefs == 0))) {
7813 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7814 		     "Entity '%s' not defined\n", name);
7815 	} else {
7816 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7817 			  "Entity '%s' not defined\n",
7818 			  name);
7819 	}
7820 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7821 	/* TODO ? check regressions ctxt->valid = 0; */
7822     }
7823 
7824     /*
7825      * [ WFC: Parsed Entity ]
7826      * An entity reference must not contain the name of an
7827      * unparsed entity
7828      */
7829     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7830 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7831 		 "Entity reference to unparsed entity %s\n", name);
7832     }
7833 
7834     /*
7835      * [ WFC: No External Entity References ]
7836      * Attribute values cannot contain direct or indirect
7837      * entity references to external entities.
7838      */
7839     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7840 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7841 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7842 	 "Attribute references external entity '%s'\n", name);
7843     }
7844     /*
7845      * [ WFC: No < in Attribute Values ]
7846      * The replacement text of any entity referred to directly or
7847      * indirectly in an attribute value (other than "&lt;") must
7848      * not contain a <.
7849      */
7850     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7851 	     (ent != NULL) && (ent->content != NULL) &&
7852 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7853 	     (xmlStrchr(ent->content, '<'))) {
7854 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7855      "'<' in entity '%s' is not allowed in attributes values\n",
7856 			  name);
7857     }
7858 
7859     /*
7860      * Internal check, no parameter entities here ...
7861      */
7862     else {
7863 	switch (ent->etype) {
7864 	    case XML_INTERNAL_PARAMETER_ENTITY:
7865 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7866 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7867 	     "Attempt to reference the parameter entity '%s'\n",
7868 				  name);
7869 	    break;
7870 	    default:
7871 	    break;
7872 	}
7873     }
7874 
7875     /*
7876      * [ WFC: No Recursion ]
7877      * A parsed entity must not contain a recursive reference
7878      * to itself, either directly or indirectly.
7879      * Done somewhere else
7880      */
7881 
7882     xmlFree(name);
7883     *str = ptr;
7884     return(ent);
7885 }
7886 
7887 /**
7888  * xmlParsePEReference:
7889  * @ctxt:  an XML parser context
7890  *
7891  * parse PEReference declarations
7892  * The entity content is handled directly by pushing it's content as
7893  * a new input stream.
7894  *
7895  * [69] PEReference ::= '%' Name ';'
7896  *
7897  * [ WFC: No Recursion ]
7898  * A parsed entity must not contain a recursive
7899  * reference to itself, either directly or indirectly.
7900  *
7901  * [ WFC: Entity Declared ]
7902  * In a document without any DTD, a document with only an internal DTD
7903  * subset which contains no parameter entity references, or a document
7904  * with "standalone='yes'", ...  ... The declaration of a parameter
7905  * entity must precede any reference to it...
7906  *
7907  * [ VC: Entity Declared ]
7908  * In a document with an external subset or external parameter entities
7909  * with "standalone='no'", ...  ... The declaration of a parameter entity
7910  * must precede any reference to it...
7911  *
7912  * [ WFC: In DTD ]
7913  * Parameter-entity references may only appear in the DTD.
7914  * NOTE: misleading but this is handled.
7915  */
7916 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7917 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7918 {
7919     const xmlChar *name;
7920     xmlEntityPtr entity = NULL;
7921     xmlParserInputPtr input;
7922 
7923     if (RAW != '%')
7924         return;
7925     NEXT;
7926     name = xmlParseName(ctxt);
7927     if (name == NULL) {
7928 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7929 	return;
7930     }
7931     if (xmlParserDebugEntities)
7932 	xmlGenericError(xmlGenericErrorContext,
7933 		"PEReference: %s\n", name);
7934     if (RAW != ';') {
7935 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7936         return;
7937     }
7938 
7939     NEXT;
7940 
7941     /*
7942      * Increase the number of entity references parsed
7943      */
7944     ctxt->nbentities++;
7945 
7946     /*
7947      * Request the entity from SAX
7948      */
7949     if ((ctxt->sax != NULL) &&
7950 	(ctxt->sax->getParameterEntity != NULL))
7951 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7952     if (ctxt->instate == XML_PARSER_EOF)
7953 	return;
7954     if (entity == NULL) {
7955 	/*
7956 	 * [ WFC: Entity Declared ]
7957 	 * In a document without any DTD, a document with only an
7958 	 * internal DTD subset which contains no parameter entity
7959 	 * references, or a document with "standalone='yes'", ...
7960 	 * ... The declaration of a parameter entity must precede
7961 	 * any reference to it...
7962 	 */
7963 	if ((ctxt->standalone == 1) ||
7964 	    ((ctxt->hasExternalSubset == 0) &&
7965 	     (ctxt->hasPErefs == 0))) {
7966 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7967 			      "PEReference: %%%s; not found\n",
7968 			      name);
7969 	} else {
7970 	    /*
7971 	     * [ VC: Entity Declared ]
7972 	     * In a document with an external subset or external
7973 	     * parameter entities with "standalone='no'", ...
7974 	     * ... The declaration of a parameter entity must
7975 	     * precede any reference to it...
7976 	     */
7977             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7978                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7979                                  "PEReference: %%%s; not found\n",
7980                                  name, NULL);
7981             } else
7982                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7983                               "PEReference: %%%s; not found\n",
7984                               name, NULL);
7985             ctxt->valid = 0;
7986 	}
7987 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
7988     } else {
7989 	/*
7990 	 * Internal checking in case the entity quest barfed
7991 	 */
7992 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7993 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7994 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7995 		  "Internal: %%%s; is not a parameter entity\n",
7996 			  name, NULL);
7997 	} else {
7998             xmlChar start[4];
7999             xmlCharEncoding enc;
8000 
8001 	    if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8002 	        return;
8003 
8004 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8005 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8006 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8007 		((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8008 		((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8009 		(ctxt->replaceEntities == 0) &&
8010 		(ctxt->validate == 0))
8011 		return;
8012 
8013 	    input = xmlNewEntityInputStream(ctxt, entity);
8014 	    if (xmlPushInput(ctxt, input) < 0) {
8015                 xmlFreeInputStream(input);
8016 		return;
8017             }
8018 
8019 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8020                 /*
8021                  * Get the 4 first bytes and decode the charset
8022                  * if enc != XML_CHAR_ENCODING_NONE
8023                  * plug some encoding conversion routines.
8024                  * Note that, since we may have some non-UTF8
8025                  * encoding (like UTF16, bug 135229), the 'length'
8026                  * is not known, but we can calculate based upon
8027                  * the amount of data in the buffer.
8028                  */
8029                 GROW
8030                 if (ctxt->instate == XML_PARSER_EOF)
8031                     return;
8032                 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8033                     start[0] = RAW;
8034                     start[1] = NXT(1);
8035                     start[2] = NXT(2);
8036                     start[3] = NXT(3);
8037                     enc = xmlDetectCharEncoding(start, 4);
8038                     if (enc != XML_CHAR_ENCODING_NONE) {
8039                         xmlSwitchEncoding(ctxt, enc);
8040                     }
8041                 }
8042 
8043                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8044                     (IS_BLANK_CH(NXT(5)))) {
8045                     xmlParseTextDecl(ctxt);
8046                 }
8047             }
8048 	}
8049     }
8050     ctxt->hasPErefs = 1;
8051 }
8052 
8053 /**
8054  * xmlLoadEntityContent:
8055  * @ctxt:  an XML parser context
8056  * @entity: an unloaded system entity
8057  *
8058  * Load the original content of the given system entity from the
8059  * ExternalID/SystemID given. This is to be used for Included in Literal
8060  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8061  *
8062  * Returns 0 in case of success and -1 in case of failure
8063  */
8064 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8065 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8066     xmlParserInputPtr input;
8067     xmlBufferPtr buf;
8068     int l, c;
8069     int count = 0;
8070 
8071     if ((ctxt == NULL) || (entity == NULL) ||
8072         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8073 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8074 	(entity->content != NULL)) {
8075 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8076 	            "xmlLoadEntityContent parameter error");
8077         return(-1);
8078     }
8079 
8080     if (xmlParserDebugEntities)
8081 	xmlGenericError(xmlGenericErrorContext,
8082 		"Reading %s entity content input\n", entity->name);
8083 
8084     buf = xmlBufferCreate();
8085     if (buf == NULL) {
8086 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8087 	            "xmlLoadEntityContent parameter error");
8088         return(-1);
8089     }
8090 
8091     input = xmlNewEntityInputStream(ctxt, entity);
8092     if (input == NULL) {
8093 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8094 	            "xmlLoadEntityContent input error");
8095 	xmlBufferFree(buf);
8096         return(-1);
8097     }
8098 
8099     /*
8100      * Push the entity as the current input, read char by char
8101      * saving to the buffer until the end of the entity or an error
8102      */
8103     if (xmlPushInput(ctxt, input) < 0) {
8104         xmlBufferFree(buf);
8105 	return(-1);
8106     }
8107 
8108     GROW;
8109     c = CUR_CHAR(l);
8110     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8111            (IS_CHAR(c))) {
8112         xmlBufferAdd(buf, ctxt->input->cur, l);
8113 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8114 	    count = 0;
8115 	    GROW;
8116             if (ctxt->instate == XML_PARSER_EOF) {
8117                 xmlBufferFree(buf);
8118                 return(-1);
8119             }
8120 	}
8121 	NEXTL(l);
8122 	c = CUR_CHAR(l);
8123 	if (c == 0) {
8124 	    count = 0;
8125 	    GROW;
8126             if (ctxt->instate == XML_PARSER_EOF) {
8127                 xmlBufferFree(buf);
8128                 return(-1);
8129             }
8130 	    c = CUR_CHAR(l);
8131 	}
8132     }
8133 
8134     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8135         xmlPopInput(ctxt);
8136     } else if (!IS_CHAR(c)) {
8137         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8138                           "xmlLoadEntityContent: invalid char value %d\n",
8139 	                  c);
8140 	xmlBufferFree(buf);
8141 	return(-1);
8142     }
8143     entity->content = buf->content;
8144     buf->content = NULL;
8145     xmlBufferFree(buf);
8146 
8147     return(0);
8148 }
8149 
8150 /**
8151  * xmlParseStringPEReference:
8152  * @ctxt:  an XML parser context
8153  * @str:  a pointer to an index in the string
8154  *
8155  * parse PEReference declarations
8156  *
8157  * [69] PEReference ::= '%' Name ';'
8158  *
8159  * [ WFC: No Recursion ]
8160  * A parsed entity must not contain a recursive
8161  * reference to itself, either directly or indirectly.
8162  *
8163  * [ WFC: Entity Declared ]
8164  * In a document without any DTD, a document with only an internal DTD
8165  * subset which contains no parameter entity references, or a document
8166  * with "standalone='yes'", ...  ... The declaration of a parameter
8167  * entity must precede any reference to it...
8168  *
8169  * [ VC: Entity Declared ]
8170  * In a document with an external subset or external parameter entities
8171  * with "standalone='no'", ...  ... The declaration of a parameter entity
8172  * must precede any reference to it...
8173  *
8174  * [ WFC: In DTD ]
8175  * Parameter-entity references may only appear in the DTD.
8176  * NOTE: misleading but this is handled.
8177  *
8178  * Returns the string of the entity content.
8179  *         str is updated to the current value of the index
8180  */
8181 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8182 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8183     const xmlChar *ptr;
8184     xmlChar cur;
8185     xmlChar *name;
8186     xmlEntityPtr entity = NULL;
8187 
8188     if ((str == NULL) || (*str == NULL)) return(NULL);
8189     ptr = *str;
8190     cur = *ptr;
8191     if (cur != '%')
8192         return(NULL);
8193     ptr++;
8194     name = xmlParseStringName(ctxt, &ptr);
8195     if (name == NULL) {
8196 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8197 		       "xmlParseStringPEReference: no name\n");
8198 	*str = ptr;
8199 	return(NULL);
8200     }
8201     cur = *ptr;
8202     if (cur != ';') {
8203 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8204 	xmlFree(name);
8205 	*str = ptr;
8206 	return(NULL);
8207     }
8208     ptr++;
8209 
8210     /*
8211      * Increase the number of entity references parsed
8212      */
8213     ctxt->nbentities++;
8214 
8215     /*
8216      * Request the entity from SAX
8217      */
8218     if ((ctxt->sax != NULL) &&
8219 	(ctxt->sax->getParameterEntity != NULL))
8220 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8221     if (ctxt->instate == XML_PARSER_EOF) {
8222 	xmlFree(name);
8223 	*str = ptr;
8224 	return(NULL);
8225     }
8226     if (entity == NULL) {
8227 	/*
8228 	 * [ WFC: Entity Declared ]
8229 	 * In a document without any DTD, a document with only an
8230 	 * internal DTD subset which contains no parameter entity
8231 	 * references, or a document with "standalone='yes'", ...
8232 	 * ... The declaration of a parameter entity must precede
8233 	 * any reference to it...
8234 	 */
8235 	if ((ctxt->standalone == 1) ||
8236 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8237 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8238 		 "PEReference: %%%s; not found\n", name);
8239 	} else {
8240 	    /*
8241 	     * [ VC: Entity Declared ]
8242 	     * In a document with an external subset or external
8243 	     * parameter entities with "standalone='no'", ...
8244 	     * ... The declaration of a parameter entity must
8245 	     * precede any reference to it...
8246 	     */
8247 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8248 			  "PEReference: %%%s; not found\n",
8249 			  name, NULL);
8250 	    ctxt->valid = 0;
8251 	}
8252 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8253     } else {
8254 	/*
8255 	 * Internal checking in case the entity quest barfed
8256 	 */
8257 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8258 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8259 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8260 			  "%%%s; is not a parameter entity\n",
8261 			  name, NULL);
8262 	}
8263     }
8264     ctxt->hasPErefs = 1;
8265     xmlFree(name);
8266     *str = ptr;
8267     return(entity);
8268 }
8269 
8270 /**
8271  * xmlParseDocTypeDecl:
8272  * @ctxt:  an XML parser context
8273  *
8274  * parse a DOCTYPE declaration
8275  *
8276  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8277  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8278  *
8279  * [ VC: Root Element Type ]
8280  * The Name in the document type declaration must match the element
8281  * type of the root element.
8282  */
8283 
8284 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8285 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8286     const xmlChar *name = NULL;
8287     xmlChar *ExternalID = NULL;
8288     xmlChar *URI = NULL;
8289 
8290     /*
8291      * We know that '<!DOCTYPE' has been detected.
8292      */
8293     SKIP(9);
8294 
8295     SKIP_BLANKS;
8296 
8297     /*
8298      * Parse the DOCTYPE name.
8299      */
8300     name = xmlParseName(ctxt);
8301     if (name == NULL) {
8302 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8303 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8304     }
8305     ctxt->intSubName = name;
8306 
8307     SKIP_BLANKS;
8308 
8309     /*
8310      * Check for SystemID and ExternalID
8311      */
8312     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8313 
8314     if ((URI != NULL) || (ExternalID != NULL)) {
8315         ctxt->hasExternalSubset = 1;
8316     }
8317     ctxt->extSubURI = URI;
8318     ctxt->extSubSystem = ExternalID;
8319 
8320     SKIP_BLANKS;
8321 
8322     /*
8323      * Create and update the internal subset.
8324      */
8325     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8326 	(!ctxt->disableSAX))
8327 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8328     if (ctxt->instate == XML_PARSER_EOF)
8329 	return;
8330 
8331     /*
8332      * Is there any internal subset declarations ?
8333      * they are handled separately in xmlParseInternalSubset()
8334      */
8335     if (RAW == '[')
8336 	return;
8337 
8338     /*
8339      * We should be at the end of the DOCTYPE declaration.
8340      */
8341     if (RAW != '>') {
8342 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8343     }
8344     NEXT;
8345 }
8346 
8347 /**
8348  * xmlParseInternalSubset:
8349  * @ctxt:  an XML parser context
8350  *
8351  * parse the internal subset declaration
8352  *
8353  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8354  */
8355 
8356 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8357 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8358     /*
8359      * Is there any DTD definition ?
8360      */
8361     if (RAW == '[') {
8362         int baseInputNr = ctxt->inputNr;
8363         ctxt->instate = XML_PARSER_DTD;
8364         NEXT;
8365 	/*
8366 	 * Parse the succession of Markup declarations and
8367 	 * PEReferences.
8368 	 * Subsequence (markupdecl | PEReference | S)*
8369 	 */
8370 	while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8371                (ctxt->instate != XML_PARSER_EOF)) {
8372 	    const xmlChar *check = CUR_PTR;
8373 	    unsigned int cons = ctxt->input->consumed;
8374 
8375 	    SKIP_BLANKS;
8376 	    xmlParseMarkupDecl(ctxt);
8377 	    xmlParsePEReference(ctxt);
8378 
8379             /*
8380              * Conditional sections are allowed from external entities included
8381              * by PE References in the internal subset.
8382              */
8383             if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8384                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8385                 xmlParseConditionalSections(ctxt);
8386             }
8387 
8388 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8389 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8390 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8391                 if (ctxt->inputNr > baseInputNr)
8392                     xmlPopInput(ctxt);
8393                 else
8394 		    break;
8395 	    }
8396 	}
8397 	if (RAW == ']') {
8398 	    NEXT;
8399 	    SKIP_BLANKS;
8400 	}
8401     }
8402 
8403     /*
8404      * We should be at the end of the DOCTYPE declaration.
8405      */
8406     if (RAW != '>') {
8407 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8408 	return;
8409     }
8410     NEXT;
8411 }
8412 
8413 #ifdef LIBXML_SAX1_ENABLED
8414 /**
8415  * xmlParseAttribute:
8416  * @ctxt:  an XML parser context
8417  * @value:  a xmlChar ** used to store the value of the attribute
8418  *
8419  * parse an attribute
8420  *
8421  * [41] Attribute ::= Name Eq AttValue
8422  *
8423  * [ WFC: No External Entity References ]
8424  * Attribute values cannot contain direct or indirect entity references
8425  * to external entities.
8426  *
8427  * [ WFC: No < in Attribute Values ]
8428  * The replacement text of any entity referred to directly or indirectly in
8429  * an attribute value (other than "&lt;") must not contain a <.
8430  *
8431  * [ VC: Attribute Value Type ]
8432  * The attribute must have been declared; the value must be of the type
8433  * declared for it.
8434  *
8435  * [25] Eq ::= S? '=' S?
8436  *
8437  * With namespace:
8438  *
8439  * [NS 11] Attribute ::= QName Eq AttValue
8440  *
8441  * Also the case QName == xmlns:??? is handled independently as a namespace
8442  * definition.
8443  *
8444  * Returns the attribute name, and the value in *value.
8445  */
8446 
8447 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8448 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8449     const xmlChar *name;
8450     xmlChar *val;
8451 
8452     *value = NULL;
8453     GROW;
8454     name = xmlParseName(ctxt);
8455     if (name == NULL) {
8456 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8457 	               "error parsing attribute name\n");
8458         return(NULL);
8459     }
8460 
8461     /*
8462      * read the value
8463      */
8464     SKIP_BLANKS;
8465     if (RAW == '=') {
8466         NEXT;
8467 	SKIP_BLANKS;
8468 	val = xmlParseAttValue(ctxt);
8469 	ctxt->instate = XML_PARSER_CONTENT;
8470     } else {
8471 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8472 	       "Specification mandates value for attribute %s\n", name);
8473 	return(NULL);
8474     }
8475 
8476     /*
8477      * Check that xml:lang conforms to the specification
8478      * No more registered as an error, just generate a warning now
8479      * since this was deprecated in XML second edition
8480      */
8481     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8482 	if (!xmlCheckLanguageID(val)) {
8483 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8484 		          "Malformed value for xml:lang : %s\n",
8485 			  val, NULL);
8486 	}
8487     }
8488 
8489     /*
8490      * Check that xml:space conforms to the specification
8491      */
8492     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8493 	if (xmlStrEqual(val, BAD_CAST "default"))
8494 	    *(ctxt->space) = 0;
8495 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8496 	    *(ctxt->space) = 1;
8497 	else {
8498 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8499 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8500                                  val, NULL);
8501 	}
8502     }
8503 
8504     *value = val;
8505     return(name);
8506 }
8507 
8508 /**
8509  * xmlParseStartTag:
8510  * @ctxt:  an XML parser context
8511  *
8512  * parse a start of tag either for rule element or
8513  * EmptyElement. In both case we don't parse the tag closing chars.
8514  *
8515  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8516  *
8517  * [ WFC: Unique Att Spec ]
8518  * No attribute name may appear more than once in the same start-tag or
8519  * empty-element tag.
8520  *
8521  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8522  *
8523  * [ WFC: Unique Att Spec ]
8524  * No attribute name may appear more than once in the same start-tag or
8525  * empty-element tag.
8526  *
8527  * With namespace:
8528  *
8529  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8530  *
8531  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8532  *
8533  * Returns the element name parsed
8534  */
8535 
8536 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8537 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8538     const xmlChar *name;
8539     const xmlChar *attname;
8540     xmlChar *attvalue;
8541     const xmlChar **atts = ctxt->atts;
8542     int nbatts = 0;
8543     int maxatts = ctxt->maxatts;
8544     int i;
8545 
8546     if (RAW != '<') return(NULL);
8547     NEXT1;
8548 
8549     name = xmlParseName(ctxt);
8550     if (name == NULL) {
8551 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8552 	     "xmlParseStartTag: invalid element name\n");
8553         return(NULL);
8554     }
8555 
8556     /*
8557      * Now parse the attributes, it ends up with the ending
8558      *
8559      * (S Attribute)* S?
8560      */
8561     SKIP_BLANKS;
8562     GROW;
8563 
8564     while (((RAW != '>') &&
8565 	   ((RAW != '/') || (NXT(1) != '>')) &&
8566 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8567 	const xmlChar *q = CUR_PTR;
8568 	unsigned int cons = ctxt->input->consumed;
8569 
8570 	attname = xmlParseAttribute(ctxt, &attvalue);
8571         if ((attname != NULL) && (attvalue != NULL)) {
8572 	    /*
8573 	     * [ WFC: Unique Att Spec ]
8574 	     * No attribute name may appear more than once in the same
8575 	     * start-tag or empty-element tag.
8576 	     */
8577 	    for (i = 0; i < nbatts;i += 2) {
8578 	        if (xmlStrEqual(atts[i], attname)) {
8579 		    xmlErrAttributeDup(ctxt, NULL, attname);
8580 		    xmlFree(attvalue);
8581 		    goto failed;
8582 		}
8583 	    }
8584 	    /*
8585 	     * Add the pair to atts
8586 	     */
8587 	    if (atts == NULL) {
8588 	        maxatts = 22; /* allow for 10 attrs by default */
8589 	        atts = (const xmlChar **)
8590 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8591 		if (atts == NULL) {
8592 		    xmlErrMemory(ctxt, NULL);
8593 		    if (attvalue != NULL)
8594 			xmlFree(attvalue);
8595 		    goto failed;
8596 		}
8597 		ctxt->atts = atts;
8598 		ctxt->maxatts = maxatts;
8599 	    } else if (nbatts + 4 > maxatts) {
8600 	        const xmlChar **n;
8601 
8602 	        maxatts *= 2;
8603 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8604 					     maxatts * sizeof(const xmlChar *));
8605 		if (n == NULL) {
8606 		    xmlErrMemory(ctxt, NULL);
8607 		    if (attvalue != NULL)
8608 			xmlFree(attvalue);
8609 		    goto failed;
8610 		}
8611 		atts = n;
8612 		ctxt->atts = atts;
8613 		ctxt->maxatts = maxatts;
8614 	    }
8615 	    atts[nbatts++] = attname;
8616 	    atts[nbatts++] = attvalue;
8617 	    atts[nbatts] = NULL;
8618 	    atts[nbatts + 1] = NULL;
8619 	} else {
8620 	    if (attvalue != NULL)
8621 		xmlFree(attvalue);
8622 	}
8623 
8624 failed:
8625 
8626 	GROW
8627 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8628 	    break;
8629 	if (SKIP_BLANKS == 0) {
8630 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8631 			   "attributes construct error\n");
8632 	}
8633         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8634             (attname == NULL) && (attvalue == NULL)) {
8635 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8636 			   "xmlParseStartTag: problem parsing attributes\n");
8637 	    break;
8638 	}
8639 	SHRINK;
8640         GROW;
8641     }
8642 
8643     /*
8644      * SAX: Start of Element !
8645      */
8646     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8647 	(!ctxt->disableSAX)) {
8648 	if (nbatts > 0)
8649 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8650 	else
8651 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8652     }
8653 
8654     if (atts != NULL) {
8655         /* Free only the content strings */
8656         for (i = 1;i < nbatts;i+=2)
8657 	    if (atts[i] != NULL)
8658 	       xmlFree((xmlChar *) atts[i]);
8659     }
8660     return(name);
8661 }
8662 
8663 /**
8664  * xmlParseEndTag1:
8665  * @ctxt:  an XML parser context
8666  * @line:  line of the start tag
8667  * @nsNr:  number of namespaces on the start tag
8668  *
8669  * parse an end of tag
8670  *
8671  * [42] ETag ::= '</' Name S? '>'
8672  *
8673  * With namespace
8674  *
8675  * [NS 9] ETag ::= '</' QName S? '>'
8676  */
8677 
8678 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8679 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8680     const xmlChar *name;
8681 
8682     GROW;
8683     if ((RAW != '<') || (NXT(1) != '/')) {
8684 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8685 		       "xmlParseEndTag: '</' not found\n");
8686 	return;
8687     }
8688     SKIP(2);
8689 
8690     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8691 
8692     /*
8693      * We should definitely be at the ending "S? '>'" part
8694      */
8695     GROW;
8696     SKIP_BLANKS;
8697     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8698 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8699     } else
8700 	NEXT1;
8701 
8702     /*
8703      * [ WFC: Element Type Match ]
8704      * The Name in an element's end-tag must match the element type in the
8705      * start-tag.
8706      *
8707      */
8708     if (name != (xmlChar*)1) {
8709         if (name == NULL) name = BAD_CAST "unparsable";
8710         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8711 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8712 		                ctxt->name, line, name);
8713     }
8714 
8715     /*
8716      * SAX: End of Tag
8717      */
8718     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8719 	(!ctxt->disableSAX))
8720         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8721 
8722     namePop(ctxt);
8723     spacePop(ctxt);
8724     return;
8725 }
8726 
8727 /**
8728  * xmlParseEndTag:
8729  * @ctxt:  an XML parser context
8730  *
8731  * parse an end of tag
8732  *
8733  * [42] ETag ::= '</' Name S? '>'
8734  *
8735  * With namespace
8736  *
8737  * [NS 9] ETag ::= '</' QName S? '>'
8738  */
8739 
8740 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8741 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8742     xmlParseEndTag1(ctxt, 0);
8743 }
8744 #endif /* LIBXML_SAX1_ENABLED */
8745 
8746 /************************************************************************
8747  *									*
8748  *		      SAX 2 specific operations				*
8749  *									*
8750  ************************************************************************/
8751 
8752 /*
8753  * xmlGetNamespace:
8754  * @ctxt:  an XML parser context
8755  * @prefix:  the prefix to lookup
8756  *
8757  * Lookup the namespace name for the @prefix (which ca be NULL)
8758  * The prefix must come from the @ctxt->dict dictionary
8759  *
8760  * Returns the namespace name or NULL if not bound
8761  */
8762 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8763 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8764     int i;
8765 
8766     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8767     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8768         if (ctxt->nsTab[i] == prefix) {
8769 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8770 	        return(NULL);
8771 	    return(ctxt->nsTab[i + 1]);
8772 	}
8773     return(NULL);
8774 }
8775 
8776 /**
8777  * xmlParseQName:
8778  * @ctxt:  an XML parser context
8779  * @prefix:  pointer to store the prefix part
8780  *
8781  * parse an XML Namespace QName
8782  *
8783  * [6]  QName  ::= (Prefix ':')? LocalPart
8784  * [7]  Prefix  ::= NCName
8785  * [8]  LocalPart  ::= NCName
8786  *
8787  * Returns the Name parsed or NULL
8788  */
8789 
8790 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8791 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8792     const xmlChar *l, *p;
8793 
8794     GROW;
8795 
8796     l = xmlParseNCName(ctxt);
8797     if (l == NULL) {
8798         if (CUR == ':') {
8799 	    l = xmlParseName(ctxt);
8800 	    if (l != NULL) {
8801 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8802 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8803 		*prefix = NULL;
8804 		return(l);
8805 	    }
8806 	}
8807         return(NULL);
8808     }
8809     if (CUR == ':') {
8810         NEXT;
8811 	p = l;
8812 	l = xmlParseNCName(ctxt);
8813 	if (l == NULL) {
8814 	    xmlChar *tmp;
8815 
8816             if (ctxt->instate == XML_PARSER_EOF)
8817                 return(NULL);
8818             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8819 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8820 	    l = xmlParseNmtoken(ctxt);
8821 	    if (l == NULL) {
8822                 if (ctxt->instate == XML_PARSER_EOF)
8823                     return(NULL);
8824 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8825             } else {
8826 		tmp = xmlBuildQName(l, p, NULL, 0);
8827 		xmlFree((char *)l);
8828 	    }
8829 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8830 	    if (tmp != NULL) xmlFree(tmp);
8831 	    *prefix = NULL;
8832 	    return(p);
8833 	}
8834 	if (CUR == ':') {
8835 	    xmlChar *tmp;
8836 
8837             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8838 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8839 	    NEXT;
8840 	    tmp = (xmlChar *) xmlParseName(ctxt);
8841 	    if (tmp != NULL) {
8842 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8843 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8844 		if (tmp != NULL) xmlFree(tmp);
8845 		*prefix = p;
8846 		return(l);
8847 	    }
8848             if (ctxt->instate == XML_PARSER_EOF)
8849                 return(NULL);
8850 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8851 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8852 	    if (tmp != NULL) xmlFree(tmp);
8853 	    *prefix = p;
8854 	    return(l);
8855 	}
8856 	*prefix = p;
8857     } else
8858         *prefix = NULL;
8859     return(l);
8860 }
8861 
8862 /**
8863  * xmlParseQNameAndCompare:
8864  * @ctxt:  an XML parser context
8865  * @name:  the localname
8866  * @prefix:  the prefix, if any.
8867  *
8868  * parse an XML name and compares for match
8869  * (specialized for endtag parsing)
8870  *
8871  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8872  * and the name for mismatch
8873  */
8874 
8875 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8876 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8877                         xmlChar const *prefix) {
8878     const xmlChar *cmp;
8879     const xmlChar *in;
8880     const xmlChar *ret;
8881     const xmlChar *prefix2;
8882 
8883     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8884 
8885     GROW;
8886     in = ctxt->input->cur;
8887 
8888     cmp = prefix;
8889     while (*in != 0 && *in == *cmp) {
8890 	++in;
8891 	++cmp;
8892     }
8893     if ((*cmp == 0) && (*in == ':')) {
8894         in++;
8895 	cmp = name;
8896 	while (*in != 0 && *in == *cmp) {
8897 	    ++in;
8898 	    ++cmp;
8899 	}
8900 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8901 	    /* success */
8902             ctxt->input->col += in - ctxt->input->cur;
8903 	    ctxt->input->cur = in;
8904 	    return((const xmlChar*) 1);
8905 	}
8906     }
8907     /*
8908      * all strings coms from the dictionary, equality can be done directly
8909      */
8910     ret = xmlParseQName (ctxt, &prefix2);
8911     if ((ret == name) && (prefix == prefix2))
8912 	return((const xmlChar*) 1);
8913     return ret;
8914 }
8915 
8916 /**
8917  * xmlParseAttValueInternal:
8918  * @ctxt:  an XML parser context
8919  * @len:  attribute len result
8920  * @alloc:  whether the attribute was reallocated as a new string
8921  * @normalize:  if 1 then further non-CDATA normalization must be done
8922  *
8923  * parse a value for an attribute.
8924  * NOTE: if no normalization is needed, the routine will return pointers
8925  *       directly from the data buffer.
8926  *
8927  * 3.3.3 Attribute-Value Normalization:
8928  * Before the value of an attribute is passed to the application or
8929  * checked for validity, the XML processor must normalize it as follows:
8930  * - a character reference is processed by appending the referenced
8931  *   character to the attribute value
8932  * - an entity reference is processed by recursively processing the
8933  *   replacement text of the entity
8934  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8935  *   appending #x20 to the normalized value, except that only a single
8936  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8937  *   parsed entity or the literal entity value of an internal parsed entity
8938  * - other characters are processed by appending them to the normalized value
8939  * If the declared value is not CDATA, then the XML processor must further
8940  * process the normalized attribute value by discarding any leading and
8941  * trailing space (#x20) characters, and by replacing sequences of space
8942  * (#x20) characters by a single space (#x20) character.
8943  * All attributes for which no declaration has been read should be treated
8944  * by a non-validating parser as if declared CDATA.
8945  *
8946  * Returns the AttValue parsed or NULL. The value has to be freed by the
8947  *     caller if it was copied, this can be detected by val[*len] == 0.
8948  */
8949 
8950 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8951     const xmlChar *oldbase = ctxt->input->base;\
8952     GROW;\
8953     if (ctxt->instate == XML_PARSER_EOF)\
8954         return(NULL);\
8955     if (oldbase != ctxt->input->base) {\
8956         ptrdiff_t delta = ctxt->input->base - oldbase;\
8957         start = start + delta;\
8958         in = in + delta;\
8959     }\
8960     end = ctxt->input->end;
8961 
8962 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8963 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8964                          int normalize)
8965 {
8966     xmlChar limit = 0;
8967     const xmlChar *in = NULL, *start, *end, *last;
8968     xmlChar *ret = NULL;
8969     int line, col;
8970     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8971                     XML_MAX_HUGE_LENGTH :
8972                     XML_MAX_TEXT_LENGTH;
8973 
8974     GROW;
8975     in = (xmlChar *) CUR_PTR;
8976     line = ctxt->input->line;
8977     col = ctxt->input->col;
8978     if (*in != '"' && *in != '\'') {
8979         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8980         return (NULL);
8981     }
8982     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8983 
8984     /*
8985      * try to handle in this routine the most common case where no
8986      * allocation of a new string is required and where content is
8987      * pure ASCII.
8988      */
8989     limit = *in++;
8990     col++;
8991     end = ctxt->input->end;
8992     start = in;
8993     if (in >= end) {
8994         GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8995     }
8996     if (normalize) {
8997         /*
8998 	 * Skip any leading spaces
8999 	 */
9000 	while ((in < end) && (*in != limit) &&
9001 	       ((*in == 0x20) || (*in == 0x9) ||
9002 	        (*in == 0xA) || (*in == 0xD))) {
9003 	    if (*in == 0xA) {
9004 	        line++; col = 1;
9005 	    } else {
9006 	        col++;
9007 	    }
9008 	    in++;
9009 	    start = in;
9010 	    if (in >= end) {
9011                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9012                 if ((in - start) > maxLength) {
9013                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9014                                    "AttValue length too long\n");
9015                     return(NULL);
9016                 }
9017 	    }
9018 	}
9019 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9020 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9021 	    col++;
9022 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
9023 	    if (in >= end) {
9024                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9025                 if ((in - start) > maxLength) {
9026                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9027                                    "AttValue length too long\n");
9028                     return(NULL);
9029                 }
9030 	    }
9031 	}
9032 	last = in;
9033 	/*
9034 	 * skip the trailing blanks
9035 	 */
9036 	while ((last[-1] == 0x20) && (last > start)) last--;
9037 	while ((in < end) && (*in != limit) &&
9038 	       ((*in == 0x20) || (*in == 0x9) ||
9039 	        (*in == 0xA) || (*in == 0xD))) {
9040 	    if (*in == 0xA) {
9041 	        line++, col = 1;
9042 	    } else {
9043 	        col++;
9044 	    }
9045 	    in++;
9046 	    if (in >= end) {
9047 		const xmlChar *oldbase = ctxt->input->base;
9048 		GROW;
9049                 if (ctxt->instate == XML_PARSER_EOF)
9050                     return(NULL);
9051 		if (oldbase != ctxt->input->base) {
9052 		    ptrdiff_t delta = ctxt->input->base - oldbase;
9053 		    start = start + delta;
9054 		    in = in + delta;
9055 		    last = last + delta;
9056 		}
9057 		end = ctxt->input->end;
9058                 if ((in - start) > maxLength) {
9059                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9060                                    "AttValue length too long\n");
9061                     return(NULL);
9062                 }
9063 	    }
9064 	}
9065         if ((in - start) > maxLength) {
9066             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9067                            "AttValue length too long\n");
9068             return(NULL);
9069         }
9070 	if (*in != limit) goto need_complex;
9071     } else {
9072 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9073 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9074 	    in++;
9075 	    col++;
9076 	    if (in >= end) {
9077                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9078                 if ((in - start) > maxLength) {
9079                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9080                                    "AttValue length too long\n");
9081                     return(NULL);
9082                 }
9083 	    }
9084 	}
9085 	last = in;
9086         if ((in - start) > maxLength) {
9087             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9088                            "AttValue length too long\n");
9089             return(NULL);
9090         }
9091 	if (*in != limit) goto need_complex;
9092     }
9093     in++;
9094     col++;
9095     if (len != NULL) {
9096         *len = last - start;
9097         ret = (xmlChar *) start;
9098     } else {
9099         if (alloc) *alloc = 1;
9100         ret = xmlStrndup(start, last - start);
9101     }
9102     CUR_PTR = in;
9103     ctxt->input->line = line;
9104     ctxt->input->col = col;
9105     if (alloc) *alloc = 0;
9106     return ret;
9107 need_complex:
9108     if (alloc) *alloc = 1;
9109     return xmlParseAttValueComplex(ctxt, len, normalize);
9110 }
9111 
9112 /**
9113  * xmlParseAttribute2:
9114  * @ctxt:  an XML parser context
9115  * @pref:  the element prefix
9116  * @elem:  the element name
9117  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9118  * @value:  a xmlChar ** used to store the value of the attribute
9119  * @len:  an int * to save the length of the attribute
9120  * @alloc:  an int * to indicate if the attribute was allocated
9121  *
9122  * parse an attribute in the new SAX2 framework.
9123  *
9124  * Returns the attribute name, and the value in *value, .
9125  */
9126 
9127 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9128 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9129                    const xmlChar * pref, const xmlChar * elem,
9130                    const xmlChar ** prefix, xmlChar ** value,
9131                    int *len, int *alloc)
9132 {
9133     const xmlChar *name;
9134     xmlChar *val, *internal_val = NULL;
9135     int normalize = 0;
9136 
9137     *value = NULL;
9138     GROW;
9139     name = xmlParseQName(ctxt, prefix);
9140     if (name == NULL) {
9141         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9142                        "error parsing attribute name\n");
9143         return (NULL);
9144     }
9145 
9146     /*
9147      * get the type if needed
9148      */
9149     if (ctxt->attsSpecial != NULL) {
9150         int type;
9151 
9152         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9153                                                  pref, elem, *prefix, name);
9154         if (type != 0)
9155             normalize = 1;
9156     }
9157 
9158     /*
9159      * read the value
9160      */
9161     SKIP_BLANKS;
9162     if (RAW == '=') {
9163         NEXT;
9164         SKIP_BLANKS;
9165         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9166 	if (normalize) {
9167 	    /*
9168 	     * Sometimes a second normalisation pass for spaces is needed
9169 	     * but that only happens if charrefs or entities references
9170 	     * have been used in the attribute value, i.e. the attribute
9171 	     * value have been extracted in an allocated string already.
9172 	     */
9173 	    if (*alloc) {
9174 	        const xmlChar *val2;
9175 
9176 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9177 		if ((val2 != NULL) && (val2 != val)) {
9178 		    xmlFree(val);
9179 		    val = (xmlChar *) val2;
9180 		}
9181 	    }
9182 	}
9183         ctxt->instate = XML_PARSER_CONTENT;
9184     } else {
9185         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9186                           "Specification mandates value for attribute %s\n",
9187                           name);
9188         return (NULL);
9189     }
9190 
9191     if (*prefix == ctxt->str_xml) {
9192         /*
9193          * Check that xml:lang conforms to the specification
9194          * No more registered as an error, just generate a warning now
9195          * since this was deprecated in XML second edition
9196          */
9197         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9198             internal_val = xmlStrndup(val, *len);
9199             if (!xmlCheckLanguageID(internal_val)) {
9200                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9201                               "Malformed value for xml:lang : %s\n",
9202                               internal_val, NULL);
9203             }
9204         }
9205 
9206         /*
9207          * Check that xml:space conforms to the specification
9208          */
9209         if (xmlStrEqual(name, BAD_CAST "space")) {
9210             internal_val = xmlStrndup(val, *len);
9211             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9212                 *(ctxt->space) = 0;
9213             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9214                 *(ctxt->space) = 1;
9215             else {
9216                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9217                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9218                               internal_val, NULL);
9219             }
9220         }
9221         if (internal_val) {
9222             xmlFree(internal_val);
9223         }
9224     }
9225 
9226     *value = val;
9227     return (name);
9228 }
9229 /**
9230  * xmlParseStartTag2:
9231  * @ctxt:  an XML parser context
9232  *
9233  * parse a start of tag either for rule element or
9234  * EmptyElement. In both case we don't parse the tag closing chars.
9235  * This routine is called when running SAX2 parsing
9236  *
9237  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9238  *
9239  * [ WFC: Unique Att Spec ]
9240  * No attribute name may appear more than once in the same start-tag or
9241  * empty-element tag.
9242  *
9243  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9244  *
9245  * [ WFC: Unique Att Spec ]
9246  * No attribute name may appear more than once in the same start-tag or
9247  * empty-element tag.
9248  *
9249  * With namespace:
9250  *
9251  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9252  *
9253  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9254  *
9255  * Returns the element name parsed
9256  */
9257 
9258 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9259 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9260                   const xmlChar **URI, int *tlen) {
9261     const xmlChar *localname;
9262     const xmlChar *prefix;
9263     const xmlChar *attname;
9264     const xmlChar *aprefix;
9265     const xmlChar *nsname;
9266     xmlChar *attvalue;
9267     const xmlChar **atts = ctxt->atts;
9268     int maxatts = ctxt->maxatts;
9269     int nratts, nbatts, nbdef, inputid;
9270     int i, j, nbNs, attval;
9271     unsigned long cur;
9272     int nsNr = ctxt->nsNr;
9273 
9274     if (RAW != '<') return(NULL);
9275     NEXT1;
9276 
9277     /*
9278      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9279      *       point since the attribute values may be stored as pointers to
9280      *       the buffer and calling SHRINK would destroy them !
9281      *       The Shrinking is only possible once the full set of attribute
9282      *       callbacks have been done.
9283      */
9284     SHRINK;
9285     cur = ctxt->input->cur - ctxt->input->base;
9286     inputid = ctxt->input->id;
9287     nbatts = 0;
9288     nratts = 0;
9289     nbdef = 0;
9290     nbNs = 0;
9291     attval = 0;
9292     /* Forget any namespaces added during an earlier parse of this element. */
9293     ctxt->nsNr = nsNr;
9294 
9295     localname = xmlParseQName(ctxt, &prefix);
9296     if (localname == NULL) {
9297 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9298 		       "StartTag: invalid element name\n");
9299         return(NULL);
9300     }
9301     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9302 
9303     /*
9304      * Now parse the attributes, it ends up with the ending
9305      *
9306      * (S Attribute)* S?
9307      */
9308     SKIP_BLANKS;
9309     GROW;
9310 
9311     while (((RAW != '>') &&
9312 	   ((RAW != '/') || (NXT(1) != '>')) &&
9313 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9314 	const xmlChar *q = CUR_PTR;
9315 	unsigned int cons = ctxt->input->consumed;
9316 	int len = -1, alloc = 0;
9317 
9318 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9319 	                             &aprefix, &attvalue, &len, &alloc);
9320         if ((attname == NULL) || (attvalue == NULL))
9321             goto next_attr;
9322 	if (len < 0) len = xmlStrlen(attvalue);
9323 
9324         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9325             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9326             xmlURIPtr uri;
9327 
9328             if (URL == NULL) {
9329                 xmlErrMemory(ctxt, "dictionary allocation failure");
9330                 if ((attvalue != NULL) && (alloc != 0))
9331                     xmlFree(attvalue);
9332                 localname = NULL;
9333                 goto done;
9334             }
9335             if (*URL != 0) {
9336                 uri = xmlParseURI((const char *) URL);
9337                 if (uri == NULL) {
9338                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9339                              "xmlns: '%s' is not a valid URI\n",
9340                                        URL, NULL, NULL);
9341                 } else {
9342                     if (uri->scheme == NULL) {
9343                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9344                                   "xmlns: URI %s is not absolute\n",
9345                                   URL, NULL, NULL);
9346                     }
9347                     xmlFreeURI(uri);
9348                 }
9349                 if (URL == ctxt->str_xml_ns) {
9350                     if (attname != ctxt->str_xml) {
9351                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9352                      "xml namespace URI cannot be the default namespace\n",
9353                                  NULL, NULL, NULL);
9354                     }
9355                     goto next_attr;
9356                 }
9357                 if ((len == 29) &&
9358                     (xmlStrEqual(URL,
9359                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9360                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9361                          "reuse of the xmlns namespace name is forbidden\n",
9362                              NULL, NULL, NULL);
9363                     goto next_attr;
9364                 }
9365             }
9366             /*
9367              * check that it's not a defined namespace
9368              */
9369             for (j = 1;j <= nbNs;j++)
9370                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9371                     break;
9372             if (j <= nbNs)
9373                 xmlErrAttributeDup(ctxt, NULL, attname);
9374             else
9375                 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9376 
9377         } else if (aprefix == ctxt->str_xmlns) {
9378             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9379             xmlURIPtr uri;
9380 
9381             if (attname == ctxt->str_xml) {
9382                 if (URL != ctxt->str_xml_ns) {
9383                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9384                              "xml namespace prefix mapped to wrong URI\n",
9385                              NULL, NULL, NULL);
9386                 }
9387                 /*
9388                  * Do not keep a namespace definition node
9389                  */
9390                 goto next_attr;
9391             }
9392             if (URL == ctxt->str_xml_ns) {
9393                 if (attname != ctxt->str_xml) {
9394                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9395                              "xml namespace URI mapped to wrong prefix\n",
9396                              NULL, NULL, NULL);
9397                 }
9398                 goto next_attr;
9399             }
9400             if (attname == ctxt->str_xmlns) {
9401                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9402                          "redefinition of the xmlns prefix is forbidden\n",
9403                          NULL, NULL, NULL);
9404                 goto next_attr;
9405             }
9406             if ((len == 29) &&
9407                 (xmlStrEqual(URL,
9408                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9409                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9410                          "reuse of the xmlns namespace name is forbidden\n",
9411                          NULL, NULL, NULL);
9412                 goto next_attr;
9413             }
9414             if ((URL == NULL) || (URL[0] == 0)) {
9415                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9416                          "xmlns:%s: Empty XML namespace is not allowed\n",
9417                               attname, NULL, NULL);
9418                 goto next_attr;
9419             } else {
9420                 uri = xmlParseURI((const char *) URL);
9421                 if (uri == NULL) {
9422                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9423                          "xmlns:%s: '%s' is not a valid URI\n",
9424                                        attname, URL, NULL);
9425                 } else {
9426                     if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9427                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9428                                   "xmlns:%s: URI %s is not absolute\n",
9429                                   attname, URL, NULL);
9430                     }
9431                     xmlFreeURI(uri);
9432                 }
9433             }
9434 
9435             /*
9436              * check that it's not a defined namespace
9437              */
9438             for (j = 1;j <= nbNs;j++)
9439                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9440                     break;
9441             if (j <= nbNs)
9442                 xmlErrAttributeDup(ctxt, aprefix, attname);
9443             else
9444                 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9445 
9446         } else {
9447             /*
9448              * Add the pair to atts
9449              */
9450             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9451                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9452                     goto next_attr;
9453                 }
9454                 maxatts = ctxt->maxatts;
9455                 atts = ctxt->atts;
9456             }
9457             ctxt->attallocs[nratts++] = alloc;
9458             atts[nbatts++] = attname;
9459             atts[nbatts++] = aprefix;
9460             /*
9461              * The namespace URI field is used temporarily to point at the
9462              * base of the current input buffer for non-alloced attributes.
9463              * When the input buffer is reallocated, all the pointers become
9464              * invalid, but they can be reconstructed later.
9465              */
9466             if (alloc)
9467                 atts[nbatts++] = NULL;
9468             else
9469                 atts[nbatts++] = ctxt->input->base;
9470             atts[nbatts++] = attvalue;
9471             attvalue += len;
9472             atts[nbatts++] = attvalue;
9473             /*
9474              * tag if some deallocation is needed
9475              */
9476             if (alloc != 0) attval = 1;
9477             attvalue = NULL; /* moved into atts */
9478         }
9479 
9480 next_attr:
9481         if ((attvalue != NULL) && (alloc != 0)) {
9482             xmlFree(attvalue);
9483             attvalue = NULL;
9484         }
9485 
9486 	GROW
9487         if (ctxt->instate == XML_PARSER_EOF)
9488             break;
9489 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9490 	    break;
9491 	if (SKIP_BLANKS == 0) {
9492 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9493 			   "attributes construct error\n");
9494 	    break;
9495 	}
9496         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9497             (attname == NULL) && (attvalue == NULL)) {
9498 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9499 	         "xmlParseStartTag: problem parsing attributes\n");
9500 	    break;
9501 	}
9502         GROW;
9503     }
9504 
9505     if (ctxt->input->id != inputid) {
9506         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9507                     "Unexpected change of input\n");
9508         localname = NULL;
9509         goto done;
9510     }
9511 
9512     /* Reconstruct attribute value pointers. */
9513     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9514         if (atts[i+2] != NULL) {
9515             /*
9516              * Arithmetic on dangling pointers is technically undefined
9517              * behavior, but well...
9518              */
9519             ptrdiff_t offset = ctxt->input->base - atts[i+2];
9520             atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9521             atts[i+3] += offset;  /* value */
9522             atts[i+4] += offset;  /* valuend */
9523         }
9524     }
9525 
9526     /*
9527      * The attributes defaulting
9528      */
9529     if (ctxt->attsDefault != NULL) {
9530         xmlDefAttrsPtr defaults;
9531 
9532 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9533 	if (defaults != NULL) {
9534 	    for (i = 0;i < defaults->nbAttrs;i++) {
9535 	        attname = defaults->values[5 * i];
9536 		aprefix = defaults->values[5 * i + 1];
9537 
9538                 /*
9539 		 * special work for namespaces defaulted defs
9540 		 */
9541 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9542 		    /*
9543 		     * check that it's not a defined namespace
9544 		     */
9545 		    for (j = 1;j <= nbNs;j++)
9546 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9547 			    break;
9548 	            if (j <= nbNs) continue;
9549 
9550 		    nsname = xmlGetNamespace(ctxt, NULL);
9551 		    if (nsname != defaults->values[5 * i + 2]) {
9552 			if (nsPush(ctxt, NULL,
9553 			           defaults->values[5 * i + 2]) > 0)
9554 			    nbNs++;
9555 		    }
9556 		} else if (aprefix == ctxt->str_xmlns) {
9557 		    /*
9558 		     * check that it's not a defined namespace
9559 		     */
9560 		    for (j = 1;j <= nbNs;j++)
9561 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9562 			    break;
9563 	            if (j <= nbNs) continue;
9564 
9565 		    nsname = xmlGetNamespace(ctxt, attname);
9566 		    if (nsname != defaults->values[2]) {
9567 			if (nsPush(ctxt, attname,
9568 			           defaults->values[5 * i + 2]) > 0)
9569 			    nbNs++;
9570 		    }
9571 		} else {
9572 		    /*
9573 		     * check that it's not a defined attribute
9574 		     */
9575 		    for (j = 0;j < nbatts;j+=5) {
9576 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9577 			    break;
9578 		    }
9579 		    if (j < nbatts) continue;
9580 
9581 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9582 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9583                             localname = NULL;
9584                             goto done;
9585 			}
9586 			maxatts = ctxt->maxatts;
9587 			atts = ctxt->atts;
9588 		    }
9589 		    atts[nbatts++] = attname;
9590 		    atts[nbatts++] = aprefix;
9591 		    if (aprefix == NULL)
9592 			atts[nbatts++] = NULL;
9593 		    else
9594 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9595 		    atts[nbatts++] = defaults->values[5 * i + 2];
9596 		    atts[nbatts++] = defaults->values[5 * i + 3];
9597 		    if ((ctxt->standalone == 1) &&
9598 		        (defaults->values[5 * i + 4] != NULL)) {
9599 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9600 	  "standalone: attribute %s on %s defaulted from external subset\n",
9601 	                                 attname, localname);
9602 		    }
9603 		    nbdef++;
9604 		}
9605 	    }
9606 	}
9607     }
9608 
9609     /*
9610      * The attributes checkings
9611      */
9612     for (i = 0; i < nbatts;i += 5) {
9613         /*
9614 	* The default namespace does not apply to attribute names.
9615 	*/
9616 	if (atts[i + 1] != NULL) {
9617 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9618 	    if (nsname == NULL) {
9619 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9620 		    "Namespace prefix %s for %s on %s is not defined\n",
9621 		    atts[i + 1], atts[i], localname);
9622 	    }
9623 	    atts[i + 2] = nsname;
9624 	} else
9625 	    nsname = NULL;
9626 	/*
9627 	 * [ WFC: Unique Att Spec ]
9628 	 * No attribute name may appear more than once in the same
9629 	 * start-tag or empty-element tag.
9630 	 * As extended by the Namespace in XML REC.
9631 	 */
9632         for (j = 0; j < i;j += 5) {
9633 	    if (atts[i] == atts[j]) {
9634 	        if (atts[i+1] == atts[j+1]) {
9635 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9636 		    break;
9637 		}
9638 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9639 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9640 			     "Namespaced Attribute %s in '%s' redefined\n",
9641 			     atts[i], nsname, NULL);
9642 		    break;
9643 		}
9644 	    }
9645 	}
9646     }
9647 
9648     nsname = xmlGetNamespace(ctxt, prefix);
9649     if ((prefix != NULL) && (nsname == NULL)) {
9650 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9651 	         "Namespace prefix %s on %s is not defined\n",
9652 		 prefix, localname, NULL);
9653     }
9654     *pref = prefix;
9655     *URI = nsname;
9656 
9657     /*
9658      * SAX: Start of Element !
9659      */
9660     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9661 	(!ctxt->disableSAX)) {
9662 	if (nbNs > 0)
9663 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9664 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9665 			  nbatts / 5, nbdef, atts);
9666 	else
9667 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9668 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9669     }
9670 
9671 done:
9672     /*
9673      * Free up attribute allocated strings if needed
9674      */
9675     if (attval != 0) {
9676 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9677 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9678 	        xmlFree((xmlChar *) atts[i]);
9679     }
9680 
9681     return(localname);
9682 }
9683 
9684 /**
9685  * xmlParseEndTag2:
9686  * @ctxt:  an XML parser context
9687  * @line:  line of the start tag
9688  * @nsNr:  number of namespaces on the start tag
9689  *
9690  * parse an end of tag
9691  *
9692  * [42] ETag ::= '</' Name S? '>'
9693  *
9694  * With namespace
9695  *
9696  * [NS 9] ETag ::= '</' QName S? '>'
9697  */
9698 
9699 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9700 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9701     const xmlChar *name;
9702 
9703     GROW;
9704     if ((RAW != '<') || (NXT(1) != '/')) {
9705 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9706 	return;
9707     }
9708     SKIP(2);
9709 
9710     if (tag->prefix == NULL)
9711         name = xmlParseNameAndCompare(ctxt, ctxt->name);
9712     else
9713         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9714 
9715     /*
9716      * We should definitely be at the ending "S? '>'" part
9717      */
9718     GROW;
9719     if (ctxt->instate == XML_PARSER_EOF)
9720         return;
9721     SKIP_BLANKS;
9722     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9723 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9724     } else
9725 	NEXT1;
9726 
9727     /*
9728      * [ WFC: Element Type Match ]
9729      * The Name in an element's end-tag must match the element type in the
9730      * start-tag.
9731      *
9732      */
9733     if (name != (xmlChar*)1) {
9734         if (name == NULL) name = BAD_CAST "unparsable";
9735         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9736 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9737 		                ctxt->name, tag->line, name);
9738     }
9739 
9740     /*
9741      * SAX: End of Tag
9742      */
9743     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9744 	(!ctxt->disableSAX))
9745 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9746                                 tag->URI);
9747 
9748     spacePop(ctxt);
9749     if (tag->nsNr != 0)
9750 	nsPop(ctxt, tag->nsNr);
9751 }
9752 
9753 /**
9754  * xmlParseCDSect:
9755  * @ctxt:  an XML parser context
9756  *
9757  * Parse escaped pure raw content.
9758  *
9759  * [18] CDSect ::= CDStart CData CDEnd
9760  *
9761  * [19] CDStart ::= '<![CDATA['
9762  *
9763  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9764  *
9765  * [21] CDEnd ::= ']]>'
9766  */
9767 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9768 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9769     xmlChar *buf = NULL;
9770     int len = 0;
9771     int size = XML_PARSER_BUFFER_SIZE;
9772     int r, rl;
9773     int	s, sl;
9774     int cur, l;
9775     int count = 0;
9776     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9777                     XML_MAX_HUGE_LENGTH :
9778                     XML_MAX_TEXT_LENGTH;
9779 
9780     /* Check 2.6.0 was NXT(0) not RAW */
9781     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9782 	SKIP(9);
9783     } else
9784         return;
9785 
9786     ctxt->instate = XML_PARSER_CDATA_SECTION;
9787     r = CUR_CHAR(rl);
9788     if (!IS_CHAR(r)) {
9789 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9790 	ctxt->instate = XML_PARSER_CONTENT;
9791         return;
9792     }
9793     NEXTL(rl);
9794     s = CUR_CHAR(sl);
9795     if (!IS_CHAR(s)) {
9796 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9797 	ctxt->instate = XML_PARSER_CONTENT;
9798         return;
9799     }
9800     NEXTL(sl);
9801     cur = CUR_CHAR(l);
9802     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9803     if (buf == NULL) {
9804 	xmlErrMemory(ctxt, NULL);
9805 	return;
9806     }
9807     while (IS_CHAR(cur) &&
9808            ((r != ']') || (s != ']') || (cur != '>'))) {
9809 	if (len + 5 >= size) {
9810 	    xmlChar *tmp;
9811 
9812 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9813 	    if (tmp == NULL) {
9814 	        xmlFree(buf);
9815 		xmlErrMemory(ctxt, NULL);
9816 		return;
9817 	    }
9818 	    buf = tmp;
9819 	    size *= 2;
9820 	}
9821 	COPY_BUF(rl,buf,len,r);
9822 	r = s;
9823 	rl = sl;
9824 	s = cur;
9825 	sl = l;
9826 	count++;
9827 	if (count > 50) {
9828 	    SHRINK;
9829 	    GROW;
9830             if (ctxt->instate == XML_PARSER_EOF) {
9831 		xmlFree(buf);
9832 		return;
9833             }
9834 	    count = 0;
9835 	}
9836 	NEXTL(l);
9837 	cur = CUR_CHAR(l);
9838         if (len > maxLength) {
9839             xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9840                            "CData section too big found\n");
9841             xmlFree(buf);
9842             return;
9843         }
9844     }
9845     buf[len] = 0;
9846     ctxt->instate = XML_PARSER_CONTENT;
9847     if (cur != '>') {
9848 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9849 	                     "CData section not finished\n%.50s\n", buf);
9850 	xmlFree(buf);
9851         return;
9852     }
9853     NEXTL(l);
9854 
9855     /*
9856      * OK the buffer is to be consumed as cdata.
9857      */
9858     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9859 	if (ctxt->sax->cdataBlock != NULL)
9860 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9861 	else if (ctxt->sax->characters != NULL)
9862 	    ctxt->sax->characters(ctxt->userData, buf, len);
9863     }
9864     xmlFree(buf);
9865 }
9866 
9867 /**
9868  * xmlParseContentInternal:
9869  * @ctxt:  an XML parser context
9870  *
9871  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9872  * unexpected EOF to the caller.
9873  */
9874 
9875 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9876 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9877     int nameNr = ctxt->nameNr;
9878 
9879     GROW;
9880     while ((RAW != 0) &&
9881 	   (ctxt->instate != XML_PARSER_EOF)) {
9882 	const xmlChar *test = CUR_PTR;
9883 	unsigned int cons = ctxt->input->consumed;
9884 	const xmlChar *cur = ctxt->input->cur;
9885 
9886 	/*
9887 	 * First case : a Processing Instruction.
9888 	 */
9889 	if ((*cur == '<') && (cur[1] == '?')) {
9890 	    xmlParsePI(ctxt);
9891 	}
9892 
9893 	/*
9894 	 * Second case : a CDSection
9895 	 */
9896 	/* 2.6.0 test was *cur not RAW */
9897 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9898 	    xmlParseCDSect(ctxt);
9899 	}
9900 
9901 	/*
9902 	 * Third case :  a comment
9903 	 */
9904 	else if ((*cur == '<') && (NXT(1) == '!') &&
9905 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9906 	    xmlParseComment(ctxt);
9907 	    ctxt->instate = XML_PARSER_CONTENT;
9908 	}
9909 
9910 	/*
9911 	 * Fourth case :  a sub-element.
9912 	 */
9913 	else if (*cur == '<') {
9914             if (NXT(1) == '/') {
9915                 if (ctxt->nameNr <= nameNr)
9916                     break;
9917 	        xmlParseElementEnd(ctxt);
9918             } else {
9919 	        xmlParseElementStart(ctxt);
9920             }
9921 	}
9922 
9923 	/*
9924 	 * Fifth case : a reference. If if has not been resolved,
9925 	 *    parsing returns it's Name, create the node
9926 	 */
9927 
9928 	else if (*cur == '&') {
9929 	    xmlParseReference(ctxt);
9930 	}
9931 
9932 	/*
9933 	 * Last case, text. Note that References are handled directly.
9934 	 */
9935 	else {
9936 	    xmlParseCharData(ctxt, 0);
9937 	}
9938 
9939 	GROW;
9940 	SHRINK;
9941 
9942 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9943 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9944 	                "detected an error in element content\n");
9945 	    xmlHaltParser(ctxt);
9946             break;
9947 	}
9948     }
9949 }
9950 
9951 /**
9952  * xmlParseContent:
9953  * @ctxt:  an XML parser context
9954  *
9955  * Parse a content sequence. Stops at EOF or '</'.
9956  *
9957  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9958  */
9959 
9960 void
xmlParseContent(xmlParserCtxtPtr ctxt)9961 xmlParseContent(xmlParserCtxtPtr ctxt) {
9962     int nameNr = ctxt->nameNr;
9963 
9964     xmlParseContentInternal(ctxt);
9965 
9966     if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9967         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9968         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9969         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9970                 "Premature end of data in tag %s line %d\n",
9971 		name, line, NULL);
9972     }
9973 }
9974 
9975 /**
9976  * xmlParseElement:
9977  * @ctxt:  an XML parser context
9978  *
9979  * parse an XML element
9980  *
9981  * [39] element ::= EmptyElemTag | STag content ETag
9982  *
9983  * [ WFC: Element Type Match ]
9984  * The Name in an element's end-tag must match the element type in the
9985  * start-tag.
9986  *
9987  */
9988 
9989 void
xmlParseElement(xmlParserCtxtPtr ctxt)9990 xmlParseElement(xmlParserCtxtPtr ctxt) {
9991     if (xmlParseElementStart(ctxt) != 0)
9992         return;
9993 
9994     xmlParseContentInternal(ctxt);
9995     if (ctxt->instate == XML_PARSER_EOF)
9996 	return;
9997 
9998     if (CUR == 0) {
9999         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10000         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10001         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10002                 "Premature end of data in tag %s line %d\n",
10003 		name, line, NULL);
10004         return;
10005     }
10006 
10007     xmlParseElementEnd(ctxt);
10008 }
10009 
10010 /**
10011  * xmlParseElementStart:
10012  * @ctxt:  an XML parser context
10013  *
10014  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10015  * opening tag was parsed, 1 if an empty element was parsed.
10016  */
10017 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10018 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10019     const xmlChar *name;
10020     const xmlChar *prefix = NULL;
10021     const xmlChar *URI = NULL;
10022     xmlParserNodeInfo node_info;
10023     int line, tlen = 0;
10024     xmlNodePtr ret;
10025     int nsNr = ctxt->nsNr;
10026 
10027     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10028         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10029 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10030 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10031 			  xmlParserMaxDepth);
10032 	xmlHaltParser(ctxt);
10033 	return(-1);
10034     }
10035 
10036     /* Capture start position */
10037     if (ctxt->record_info) {
10038         node_info.begin_pos = ctxt->input->consumed +
10039                           (CUR_PTR - ctxt->input->base);
10040 	node_info.begin_line = ctxt->input->line;
10041     }
10042 
10043     if (ctxt->spaceNr == 0)
10044 	spacePush(ctxt, -1);
10045     else if (*ctxt->space == -2)
10046 	spacePush(ctxt, -1);
10047     else
10048 	spacePush(ctxt, *ctxt->space);
10049 
10050     line = ctxt->input->line;
10051 #ifdef LIBXML_SAX1_ENABLED
10052     if (ctxt->sax2)
10053 #endif /* LIBXML_SAX1_ENABLED */
10054         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10055 #ifdef LIBXML_SAX1_ENABLED
10056     else
10057 	name = xmlParseStartTag(ctxt);
10058 #endif /* LIBXML_SAX1_ENABLED */
10059     if (ctxt->instate == XML_PARSER_EOF)
10060 	return(-1);
10061     if (name == NULL) {
10062 	spacePop(ctxt);
10063         return(-1);
10064     }
10065     nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10066     ret = ctxt->node;
10067 
10068 #ifdef LIBXML_VALID_ENABLED
10069     /*
10070      * [ VC: Root Element Type ]
10071      * The Name in the document type declaration must match the element
10072      * type of the root element.
10073      */
10074     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10075         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10076         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10077 #endif /* LIBXML_VALID_ENABLED */
10078 
10079     /*
10080      * Check for an Empty Element.
10081      */
10082     if ((RAW == '/') && (NXT(1) == '>')) {
10083         SKIP(2);
10084 	if (ctxt->sax2) {
10085 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10086 		(!ctxt->disableSAX))
10087 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10088 #ifdef LIBXML_SAX1_ENABLED
10089 	} else {
10090 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10091 		(!ctxt->disableSAX))
10092 		ctxt->sax->endElement(ctxt->userData, name);
10093 #endif /* LIBXML_SAX1_ENABLED */
10094 	}
10095 	namePop(ctxt);
10096 	spacePop(ctxt);
10097 	if (nsNr != ctxt->nsNr)
10098 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10099 	if ( ret != NULL && ctxt->record_info ) {
10100 	   node_info.end_pos = ctxt->input->consumed +
10101 			      (CUR_PTR - ctxt->input->base);
10102 	   node_info.end_line = ctxt->input->line;
10103 	   node_info.node = ret;
10104 	   xmlParserAddNodeInfo(ctxt, &node_info);
10105 	}
10106 	return(1);
10107     }
10108     if (RAW == '>') {
10109         NEXT1;
10110     } else {
10111         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10112 		     "Couldn't find end of Start Tag %s line %d\n",
10113 		                name, line, NULL);
10114 
10115 	/*
10116 	 * end of parsing of this node.
10117 	 */
10118 	nodePop(ctxt);
10119 	namePop(ctxt);
10120 	spacePop(ctxt);
10121 	if (nsNr != ctxt->nsNr)
10122 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10123 
10124 	/*
10125 	 * Capture end position and add node
10126 	 */
10127 	if ( ret != NULL && ctxt->record_info ) {
10128 	   node_info.end_pos = ctxt->input->consumed +
10129 			      (CUR_PTR - ctxt->input->base);
10130 	   node_info.end_line = ctxt->input->line;
10131 	   node_info.node = ret;
10132 	   xmlParserAddNodeInfo(ctxt, &node_info);
10133 	}
10134 	return(-1);
10135     }
10136 
10137     return(0);
10138 }
10139 
10140 /**
10141  * xmlParseElementEnd:
10142  * @ctxt:  an XML parser context
10143  *
10144  * Parse the end of an XML element.
10145  */
10146 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10147 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10148     xmlParserNodeInfo node_info;
10149     xmlNodePtr ret = ctxt->node;
10150 
10151     if (ctxt->nameNr <= 0)
10152         return;
10153 
10154     /*
10155      * parse the end of tag: '</' should be here.
10156      */
10157     if (ctxt->sax2) {
10158 	xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10159 	namePop(ctxt);
10160     }
10161 #ifdef LIBXML_SAX1_ENABLED
10162     else
10163 	xmlParseEndTag1(ctxt, 0);
10164 #endif /* LIBXML_SAX1_ENABLED */
10165 
10166     /*
10167      * Capture end position and add node
10168      */
10169     if ( ret != NULL && ctxt->record_info ) {
10170        node_info.end_pos = ctxt->input->consumed +
10171                           (CUR_PTR - ctxt->input->base);
10172        node_info.end_line = ctxt->input->line;
10173        node_info.node = ret;
10174        xmlParserAddNodeInfo(ctxt, &node_info);
10175     }
10176 }
10177 
10178 /**
10179  * xmlParseVersionNum:
10180  * @ctxt:  an XML parser context
10181  *
10182  * parse the XML version value.
10183  *
10184  * [26] VersionNum ::= '1.' [0-9]+
10185  *
10186  * In practice allow [0-9].[0-9]+ at that level
10187  *
10188  * Returns the string giving the XML version number, or NULL
10189  */
10190 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10191 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10192     xmlChar *buf = NULL;
10193     int len = 0;
10194     int size = 10;
10195     xmlChar cur;
10196 
10197     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10198     if (buf == NULL) {
10199 	xmlErrMemory(ctxt, NULL);
10200 	return(NULL);
10201     }
10202     cur = CUR;
10203     if (!((cur >= '0') && (cur <= '9'))) {
10204 	xmlFree(buf);
10205 	return(NULL);
10206     }
10207     buf[len++] = cur;
10208     NEXT;
10209     cur=CUR;
10210     if (cur != '.') {
10211 	xmlFree(buf);
10212 	return(NULL);
10213     }
10214     buf[len++] = cur;
10215     NEXT;
10216     cur=CUR;
10217     while ((cur >= '0') && (cur <= '9')) {
10218 	if (len + 1 >= size) {
10219 	    xmlChar *tmp;
10220 
10221 	    size *= 2;
10222 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10223 	    if (tmp == NULL) {
10224 	        xmlFree(buf);
10225 		xmlErrMemory(ctxt, NULL);
10226 		return(NULL);
10227 	    }
10228 	    buf = tmp;
10229 	}
10230 	buf[len++] = cur;
10231 	NEXT;
10232 	cur=CUR;
10233     }
10234     buf[len] = 0;
10235     return(buf);
10236 }
10237 
10238 /**
10239  * xmlParseVersionInfo:
10240  * @ctxt:  an XML parser context
10241  *
10242  * parse the XML version.
10243  *
10244  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10245  *
10246  * [25] Eq ::= S? '=' S?
10247  *
10248  * Returns the version string, e.g. "1.0"
10249  */
10250 
10251 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10252 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10253     xmlChar *version = NULL;
10254 
10255     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10256 	SKIP(7);
10257 	SKIP_BLANKS;
10258 	if (RAW != '=') {
10259 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10260 	    return(NULL);
10261         }
10262 	NEXT;
10263 	SKIP_BLANKS;
10264 	if (RAW == '"') {
10265 	    NEXT;
10266 	    version = xmlParseVersionNum(ctxt);
10267 	    if (RAW != '"') {
10268 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10269 	    } else
10270 	        NEXT;
10271 	} else if (RAW == '\''){
10272 	    NEXT;
10273 	    version = xmlParseVersionNum(ctxt);
10274 	    if (RAW != '\'') {
10275 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10276 	    } else
10277 	        NEXT;
10278 	} else {
10279 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10280 	}
10281     }
10282     return(version);
10283 }
10284 
10285 /**
10286  * xmlParseEncName:
10287  * @ctxt:  an XML parser context
10288  *
10289  * parse the XML encoding name
10290  *
10291  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10292  *
10293  * Returns the encoding name value or NULL
10294  */
10295 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10296 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10297     xmlChar *buf = NULL;
10298     int len = 0;
10299     int size = 10;
10300     xmlChar cur;
10301 
10302     cur = CUR;
10303     if (((cur >= 'a') && (cur <= 'z')) ||
10304         ((cur >= 'A') && (cur <= 'Z'))) {
10305 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10306 	if (buf == NULL) {
10307 	    xmlErrMemory(ctxt, NULL);
10308 	    return(NULL);
10309 	}
10310 
10311 	buf[len++] = cur;
10312 	NEXT;
10313 	cur = CUR;
10314 	while (((cur >= 'a') && (cur <= 'z')) ||
10315 	       ((cur >= 'A') && (cur <= 'Z')) ||
10316 	       ((cur >= '0') && (cur <= '9')) ||
10317 	       (cur == '.') || (cur == '_') ||
10318 	       (cur == '-')) {
10319 	    if (len + 1 >= size) {
10320 	        xmlChar *tmp;
10321 
10322 		size *= 2;
10323 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10324 		if (tmp == NULL) {
10325 		    xmlErrMemory(ctxt, NULL);
10326 		    xmlFree(buf);
10327 		    return(NULL);
10328 		}
10329 		buf = tmp;
10330 	    }
10331 	    buf[len++] = cur;
10332 	    NEXT;
10333 	    cur = CUR;
10334 	    if (cur == 0) {
10335 	        SHRINK;
10336 		GROW;
10337 		cur = CUR;
10338 	    }
10339         }
10340 	buf[len] = 0;
10341     } else {
10342 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10343     }
10344     return(buf);
10345 }
10346 
10347 /**
10348  * xmlParseEncodingDecl:
10349  * @ctxt:  an XML parser context
10350  *
10351  * parse the XML encoding declaration
10352  *
10353  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10354  *
10355  * this setups the conversion filters.
10356  *
10357  * Returns the encoding value or NULL
10358  */
10359 
10360 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10361 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10362     xmlChar *encoding = NULL;
10363 
10364     SKIP_BLANKS;
10365     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10366 	SKIP(8);
10367 	SKIP_BLANKS;
10368 	if (RAW != '=') {
10369 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10370 	    return(NULL);
10371         }
10372 	NEXT;
10373 	SKIP_BLANKS;
10374 	if (RAW == '"') {
10375 	    NEXT;
10376 	    encoding = xmlParseEncName(ctxt);
10377 	    if (RAW != '"') {
10378 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10379 		xmlFree((xmlChar *) encoding);
10380 		return(NULL);
10381 	    } else
10382 	        NEXT;
10383 	} else if (RAW == '\''){
10384 	    NEXT;
10385 	    encoding = xmlParseEncName(ctxt);
10386 	    if (RAW != '\'') {
10387 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10388 		xmlFree((xmlChar *) encoding);
10389 		return(NULL);
10390 	    } else
10391 	        NEXT;
10392 	} else {
10393 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10394 	}
10395 
10396         /*
10397          * Non standard parsing, allowing the user to ignore encoding
10398          */
10399         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10400 	    xmlFree((xmlChar *) encoding);
10401             return(NULL);
10402 	}
10403 
10404 	/*
10405 	 * UTF-16 encoding switch has already taken place at this stage,
10406 	 * more over the little-endian/big-endian selection is already done
10407 	 */
10408         if ((encoding != NULL) &&
10409 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10410 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10411 	    /*
10412 	     * If no encoding was passed to the parser, that we are
10413 	     * using UTF-16 and no decoder is present i.e. the
10414 	     * document is apparently UTF-8 compatible, then raise an
10415 	     * encoding mismatch fatal error
10416 	     */
10417 	    if ((ctxt->encoding == NULL) &&
10418 	        (ctxt->input->buf != NULL) &&
10419 	        (ctxt->input->buf->encoder == NULL)) {
10420 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10421 		  "Document labelled UTF-16 but has UTF-8 content\n");
10422 	    }
10423 	    if (ctxt->encoding != NULL)
10424 		xmlFree((xmlChar *) ctxt->encoding);
10425 	    ctxt->encoding = encoding;
10426 	}
10427 	/*
10428 	 * UTF-8 encoding is handled natively
10429 	 */
10430         else if ((encoding != NULL) &&
10431 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10432 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10433 	    if (ctxt->encoding != NULL)
10434 		xmlFree((xmlChar *) ctxt->encoding);
10435 	    ctxt->encoding = encoding;
10436 	}
10437 	else if (encoding != NULL) {
10438 	    xmlCharEncodingHandlerPtr handler;
10439 
10440 	    if (ctxt->input->encoding != NULL)
10441 		xmlFree((xmlChar *) ctxt->input->encoding);
10442 	    ctxt->input->encoding = encoding;
10443 
10444             handler = xmlFindCharEncodingHandler((const char *) encoding);
10445 	    if (handler != NULL) {
10446 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10447 		    /* failed to convert */
10448 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10449 		    return(NULL);
10450 		}
10451 	    } else {
10452 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10453 			"Unsupported encoding %s\n", encoding);
10454 		return(NULL);
10455 	    }
10456 	}
10457     }
10458     return(encoding);
10459 }
10460 
10461 /**
10462  * xmlParseSDDecl:
10463  * @ctxt:  an XML parser context
10464  *
10465  * parse the XML standalone declaration
10466  *
10467  * [32] SDDecl ::= S 'standalone' Eq
10468  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10469  *
10470  * [ VC: Standalone Document Declaration ]
10471  * TODO The standalone document declaration must have the value "no"
10472  * if any external markup declarations contain declarations of:
10473  *  - attributes with default values, if elements to which these
10474  *    attributes apply appear in the document without specifications
10475  *    of values for these attributes, or
10476  *  - entities (other than amp, lt, gt, apos, quot), if references
10477  *    to those entities appear in the document, or
10478  *  - attributes with values subject to normalization, where the
10479  *    attribute appears in the document with a value which will change
10480  *    as a result of normalization, or
10481  *  - element types with element content, if white space occurs directly
10482  *    within any instance of those types.
10483  *
10484  * Returns:
10485  *   1 if standalone="yes"
10486  *   0 if standalone="no"
10487  *  -2 if standalone attribute is missing or invalid
10488  *	  (A standalone value of -2 means that the XML declaration was found,
10489  *	   but no value was specified for the standalone attribute).
10490  */
10491 
10492 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10493 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10494     int standalone = -2;
10495 
10496     SKIP_BLANKS;
10497     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10498 	SKIP(10);
10499         SKIP_BLANKS;
10500 	if (RAW != '=') {
10501 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10502 	    return(standalone);
10503         }
10504 	NEXT;
10505 	SKIP_BLANKS;
10506         if (RAW == '\''){
10507 	    NEXT;
10508 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10509 	        standalone = 0;
10510                 SKIP(2);
10511 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10512 	               (NXT(2) == 's')) {
10513 	        standalone = 1;
10514 		SKIP(3);
10515             } else {
10516 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10517 	    }
10518 	    if (RAW != '\'') {
10519 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10520 	    } else
10521 	        NEXT;
10522 	} else if (RAW == '"'){
10523 	    NEXT;
10524 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10525 	        standalone = 0;
10526 		SKIP(2);
10527 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10528 	               (NXT(2) == 's')) {
10529 	        standalone = 1;
10530                 SKIP(3);
10531             } else {
10532 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10533 	    }
10534 	    if (RAW != '"') {
10535 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10536 	    } else
10537 	        NEXT;
10538 	} else {
10539 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10540         }
10541     }
10542     return(standalone);
10543 }
10544 
10545 /**
10546  * xmlParseXMLDecl:
10547  * @ctxt:  an XML parser context
10548  *
10549  * parse an XML declaration header
10550  *
10551  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10552  */
10553 
10554 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10555 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10556     xmlChar *version;
10557 
10558     /*
10559      * This value for standalone indicates that the document has an
10560      * XML declaration but it does not have a standalone attribute.
10561      * It will be overwritten later if a standalone attribute is found.
10562      */
10563     ctxt->input->standalone = -2;
10564 
10565     /*
10566      * We know that '<?xml' is here.
10567      */
10568     SKIP(5);
10569 
10570     if (!IS_BLANK_CH(RAW)) {
10571 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10572 	               "Blank needed after '<?xml'\n");
10573     }
10574     SKIP_BLANKS;
10575 
10576     /*
10577      * We must have the VersionInfo here.
10578      */
10579     version = xmlParseVersionInfo(ctxt);
10580     if (version == NULL) {
10581 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10582     } else {
10583 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10584 	    /*
10585 	     * Changed here for XML-1.0 5th edition
10586 	     */
10587 	    if (ctxt->options & XML_PARSE_OLD10) {
10588 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10589 			          "Unsupported version '%s'\n",
10590 			          version);
10591 	    } else {
10592 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10593 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10594 		                  "Unsupported version '%s'\n",
10595 				  version, NULL);
10596 		} else {
10597 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10598 				      "Unsupported version '%s'\n",
10599 				      version);
10600 		}
10601 	    }
10602 	}
10603 	if (ctxt->version != NULL)
10604 	    xmlFree((void *) ctxt->version);
10605 	ctxt->version = version;
10606     }
10607 
10608     /*
10609      * We may have the encoding declaration
10610      */
10611     if (!IS_BLANK_CH(RAW)) {
10612         if ((RAW == '?') && (NXT(1) == '>')) {
10613 	    SKIP(2);
10614 	    return;
10615 	}
10616 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10617     }
10618     xmlParseEncodingDecl(ctxt);
10619     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10620          (ctxt->instate == XML_PARSER_EOF)) {
10621 	/*
10622 	 * The XML REC instructs us to stop parsing right here
10623 	 */
10624         return;
10625     }
10626 
10627     /*
10628      * We may have the standalone status.
10629      */
10630     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10631         if ((RAW == '?') && (NXT(1) == '>')) {
10632 	    SKIP(2);
10633 	    return;
10634 	}
10635 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10636     }
10637 
10638     /*
10639      * We can grow the input buffer freely at that point
10640      */
10641     GROW;
10642 
10643     SKIP_BLANKS;
10644     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10645 
10646     SKIP_BLANKS;
10647     if ((RAW == '?') && (NXT(1) == '>')) {
10648         SKIP(2);
10649     } else if (RAW == '>') {
10650         /* Deprecated old WD ... */
10651 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10652 	NEXT;
10653     } else {
10654 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10655 	MOVETO_ENDTAG(CUR_PTR);
10656 	NEXT;
10657     }
10658 }
10659 
10660 /**
10661  * xmlParseMisc:
10662  * @ctxt:  an XML parser context
10663  *
10664  * parse an XML Misc* optional field.
10665  *
10666  * [27] Misc ::= Comment | PI |  S
10667  */
10668 
10669 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10670 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10671     while (ctxt->instate != XML_PARSER_EOF) {
10672         SKIP_BLANKS;
10673         GROW;
10674         if ((RAW == '<') && (NXT(1) == '?')) {
10675 	    xmlParsePI(ctxt);
10676         } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10677 	    xmlParseComment(ctxt);
10678         } else {
10679             break;
10680         }
10681     }
10682 }
10683 
10684 /**
10685  * xmlParseDocument:
10686  * @ctxt:  an XML parser context
10687  *
10688  * parse an XML document (and build a tree if using the standard SAX
10689  * interface).
10690  *
10691  * [1] document ::= prolog element Misc*
10692  *
10693  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10694  *
10695  * Returns 0, -1 in case of error. the parser context is augmented
10696  *                as a result of the parsing.
10697  */
10698 
10699 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10700 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10701     xmlChar start[4];
10702     xmlCharEncoding enc;
10703 
10704     xmlInitParser();
10705 
10706     if ((ctxt == NULL) || (ctxt->input == NULL))
10707         return(-1);
10708 
10709     GROW;
10710 
10711     /*
10712      * SAX: detecting the level.
10713      */
10714     xmlDetectSAX2(ctxt);
10715 
10716     /*
10717      * SAX: beginning of the document processing.
10718      */
10719     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10720         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10721     if (ctxt->instate == XML_PARSER_EOF)
10722 	return(-1);
10723 
10724     if ((ctxt->encoding == NULL) &&
10725         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10726 	/*
10727 	 * Get the 4 first bytes and decode the charset
10728 	 * if enc != XML_CHAR_ENCODING_NONE
10729 	 * plug some encoding conversion routines.
10730 	 */
10731 	start[0] = RAW;
10732 	start[1] = NXT(1);
10733 	start[2] = NXT(2);
10734 	start[3] = NXT(3);
10735 	enc = xmlDetectCharEncoding(&start[0], 4);
10736 	if (enc != XML_CHAR_ENCODING_NONE) {
10737 	    xmlSwitchEncoding(ctxt, enc);
10738 	}
10739     }
10740 
10741 
10742     if (CUR == 0) {
10743 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10744 	return(-1);
10745     }
10746 
10747     /*
10748      * Check for the XMLDecl in the Prolog.
10749      * do not GROW here to avoid the detected encoder to decode more
10750      * than just the first line, unless the amount of data is really
10751      * too small to hold "<?xml version="1.0" encoding="foo"
10752      */
10753     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10754        GROW;
10755     }
10756     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10757 
10758 	/*
10759 	 * Note that we will switch encoding on the fly.
10760 	 */
10761 	xmlParseXMLDecl(ctxt);
10762 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10763 	    (ctxt->instate == XML_PARSER_EOF)) {
10764 	    /*
10765 	     * The XML REC instructs us to stop parsing right here
10766 	     */
10767 	    return(-1);
10768 	}
10769 	ctxt->standalone = ctxt->input->standalone;
10770 	SKIP_BLANKS;
10771     } else {
10772 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10773     }
10774     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10775         ctxt->sax->startDocument(ctxt->userData);
10776     if (ctxt->instate == XML_PARSER_EOF)
10777 	return(-1);
10778     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10779         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10780 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10781     }
10782 
10783     /*
10784      * The Misc part of the Prolog
10785      */
10786     xmlParseMisc(ctxt);
10787 
10788     /*
10789      * Then possibly doc type declaration(s) and more Misc
10790      * (doctypedecl Misc*)?
10791      */
10792     GROW;
10793     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10794 
10795 	ctxt->inSubset = 1;
10796 	xmlParseDocTypeDecl(ctxt);
10797 	if (RAW == '[') {
10798 	    ctxt->instate = XML_PARSER_DTD;
10799 	    xmlParseInternalSubset(ctxt);
10800 	    if (ctxt->instate == XML_PARSER_EOF)
10801 		return(-1);
10802 	}
10803 
10804 	/*
10805 	 * Create and update the external subset.
10806 	 */
10807 	ctxt->inSubset = 2;
10808 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10809 	    (!ctxt->disableSAX))
10810 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10811 	                              ctxt->extSubSystem, ctxt->extSubURI);
10812 	if (ctxt->instate == XML_PARSER_EOF)
10813 	    return(-1);
10814 	ctxt->inSubset = 0;
10815 
10816         xmlCleanSpecialAttr(ctxt);
10817 
10818 	ctxt->instate = XML_PARSER_PROLOG;
10819 	xmlParseMisc(ctxt);
10820     }
10821 
10822     /*
10823      * Time to start parsing the tree itself
10824      */
10825     GROW;
10826     if (RAW != '<') {
10827 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10828 		       "Start tag expected, '<' not found\n");
10829     } else {
10830 	ctxt->instate = XML_PARSER_CONTENT;
10831 	xmlParseElement(ctxt);
10832 	ctxt->instate = XML_PARSER_EPILOG;
10833 
10834 
10835 	/*
10836 	 * The Misc part at the end
10837 	 */
10838 	xmlParseMisc(ctxt);
10839 
10840 	if (RAW != 0) {
10841 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10842 	}
10843 	ctxt->instate = XML_PARSER_EOF;
10844     }
10845 
10846     /*
10847      * SAX: end of the document processing.
10848      */
10849     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10850         ctxt->sax->endDocument(ctxt->userData);
10851 
10852     /*
10853      * Remove locally kept entity definitions if the tree was not built
10854      */
10855     if ((ctxt->myDoc != NULL) &&
10856 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10857 	xmlFreeDoc(ctxt->myDoc);
10858 	ctxt->myDoc = NULL;
10859     }
10860 
10861     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10862         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10863 	if (ctxt->valid)
10864 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10865 	if (ctxt->nsWellFormed)
10866 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10867 	if (ctxt->options & XML_PARSE_OLD10)
10868 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10869     }
10870     if (! ctxt->wellFormed) {
10871 	ctxt->valid = 0;
10872 	return(-1);
10873     }
10874     return(0);
10875 }
10876 
10877 /**
10878  * xmlParseExtParsedEnt:
10879  * @ctxt:  an XML parser context
10880  *
10881  * parse a general parsed entity
10882  * An external general parsed entity is well-formed if it matches the
10883  * production labeled extParsedEnt.
10884  *
10885  * [78] extParsedEnt ::= TextDecl? content
10886  *
10887  * Returns 0, -1 in case of error. the parser context is augmented
10888  *                as a result of the parsing.
10889  */
10890 
10891 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10892 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10893     xmlChar start[4];
10894     xmlCharEncoding enc;
10895 
10896     if ((ctxt == NULL) || (ctxt->input == NULL))
10897         return(-1);
10898 
10899     xmlDefaultSAXHandlerInit();
10900 
10901     xmlDetectSAX2(ctxt);
10902 
10903     GROW;
10904 
10905     /*
10906      * SAX: beginning of the document processing.
10907      */
10908     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10909         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10910 
10911     /*
10912      * Get the 4 first bytes and decode the charset
10913      * if enc != XML_CHAR_ENCODING_NONE
10914      * plug some encoding conversion routines.
10915      */
10916     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10917 	start[0] = RAW;
10918 	start[1] = NXT(1);
10919 	start[2] = NXT(2);
10920 	start[3] = NXT(3);
10921 	enc = xmlDetectCharEncoding(start, 4);
10922 	if (enc != XML_CHAR_ENCODING_NONE) {
10923 	    xmlSwitchEncoding(ctxt, enc);
10924 	}
10925     }
10926 
10927 
10928     if (CUR == 0) {
10929 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10930     }
10931 
10932     /*
10933      * Check for the XMLDecl in the Prolog.
10934      */
10935     GROW;
10936     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10937 
10938 	/*
10939 	 * Note that we will switch encoding on the fly.
10940 	 */
10941 	xmlParseXMLDecl(ctxt);
10942 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10943 	    /*
10944 	     * The XML REC instructs us to stop parsing right here
10945 	     */
10946 	    return(-1);
10947 	}
10948 	SKIP_BLANKS;
10949     } else {
10950 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10951     }
10952     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10953         ctxt->sax->startDocument(ctxt->userData);
10954     if (ctxt->instate == XML_PARSER_EOF)
10955 	return(-1);
10956 
10957     /*
10958      * Doing validity checking on chunk doesn't make sense
10959      */
10960     ctxt->instate = XML_PARSER_CONTENT;
10961     ctxt->validate = 0;
10962     ctxt->loadsubset = 0;
10963     ctxt->depth = 0;
10964 
10965     xmlParseContent(ctxt);
10966     if (ctxt->instate == XML_PARSER_EOF)
10967 	return(-1);
10968 
10969     if ((RAW == '<') && (NXT(1) == '/')) {
10970 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10971     } else if (RAW != 0) {
10972 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10973     }
10974 
10975     /*
10976      * SAX: end of the document processing.
10977      */
10978     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10979         ctxt->sax->endDocument(ctxt->userData);
10980 
10981     if (! ctxt->wellFormed) return(-1);
10982     return(0);
10983 }
10984 
10985 #ifdef LIBXML_PUSH_ENABLED
10986 /************************************************************************
10987  *									*
10988  *		Progressive parsing interfaces				*
10989  *									*
10990  ************************************************************************/
10991 
10992 /**
10993  * xmlParseLookupSequence:
10994  * @ctxt:  an XML parser context
10995  * @first:  the first char to lookup
10996  * @next:  the next char to lookup or zero
10997  * @third:  the next char to lookup or zero
10998  *
10999  * Try to find if a sequence (first, next, third) or  just (first next) or
11000  * (first) is available in the input stream.
11001  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11002  * to avoid rescanning sequences of bytes, it DOES change the state of the
11003  * parser, do not use liberally.
11004  *
11005  * Returns the index to the current parsing point if the full sequence
11006  *      is available, -1 otherwise.
11007  */
11008 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11009 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11010                        xmlChar next, xmlChar third) {
11011     int base, len;
11012     xmlParserInputPtr in;
11013     const xmlChar *buf;
11014 
11015     in = ctxt->input;
11016     if (in == NULL) return(-1);
11017     base = in->cur - in->base;
11018     if (base < 0) return(-1);
11019     if (ctxt->checkIndex > base)
11020         base = ctxt->checkIndex;
11021     if (in->buf == NULL) {
11022 	buf = in->base;
11023 	len = in->length;
11024     } else {
11025 	buf = xmlBufContent(in->buf->buffer);
11026 	len = xmlBufUse(in->buf->buffer);
11027     }
11028     /* take into account the sequence length */
11029     if (third) len -= 2;
11030     else if (next) len --;
11031     for (;base < len;base++) {
11032         if (buf[base] == first) {
11033 	    if (third != 0) {
11034 		if ((buf[base + 1] != next) ||
11035 		    (buf[base + 2] != third)) continue;
11036 	    } else if (next != 0) {
11037 		if (buf[base + 1] != next) continue;
11038 	    }
11039 	    ctxt->checkIndex = 0;
11040 #ifdef DEBUG_PUSH
11041 	    if (next == 0)
11042 		xmlGenericError(xmlGenericErrorContext,
11043 			"PP: lookup '%c' found at %d\n",
11044 			first, base);
11045 	    else if (third == 0)
11046 		xmlGenericError(xmlGenericErrorContext,
11047 			"PP: lookup '%c%c' found at %d\n",
11048 			first, next, base);
11049 	    else
11050 		xmlGenericError(xmlGenericErrorContext,
11051 			"PP: lookup '%c%c%c' found at %d\n",
11052 			first, next, third, base);
11053 #endif
11054 	    return(base - (in->cur - in->base));
11055 	}
11056     }
11057     ctxt->checkIndex = base;
11058 #ifdef DEBUG_PUSH
11059     if (next == 0)
11060 	xmlGenericError(xmlGenericErrorContext,
11061 		"PP: lookup '%c' failed\n", first);
11062     else if (third == 0)
11063 	xmlGenericError(xmlGenericErrorContext,
11064 		"PP: lookup '%c%c' failed\n", first, next);
11065     else
11066 	xmlGenericError(xmlGenericErrorContext,
11067 		"PP: lookup '%c%c%c' failed\n", first, next, third);
11068 #endif
11069     return(-1);
11070 }
11071 
11072 /**
11073  * xmlParseGetLasts:
11074  * @ctxt:  an XML parser context
11075  * @lastlt:  pointer to store the last '<' from the input
11076  * @lastgt:  pointer to store the last '>' from the input
11077  *
11078  * Lookup the last < and > in the current chunk
11079  */
11080 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11081 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11082                  const xmlChar **lastgt) {
11083     const xmlChar *tmp;
11084 
11085     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11086 	xmlGenericError(xmlGenericErrorContext,
11087 		    "Internal error: xmlParseGetLasts\n");
11088 	return;
11089     }
11090     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11091         tmp = ctxt->input->end;
11092 	tmp--;
11093 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11094 	if (tmp < ctxt->input->base) {
11095 	    *lastlt = NULL;
11096 	    *lastgt = NULL;
11097 	} else {
11098 	    *lastlt = tmp;
11099 	    tmp++;
11100 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11101 	        if (*tmp == '\'') {
11102 		    tmp++;
11103 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11104 		    if (tmp < ctxt->input->end) tmp++;
11105 		} else if (*tmp == '"') {
11106 		    tmp++;
11107 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11108 		    if (tmp < ctxt->input->end) tmp++;
11109 		} else
11110 		    tmp++;
11111 	    }
11112 	    if (tmp < ctxt->input->end)
11113 	        *lastgt = tmp;
11114 	    else {
11115 	        tmp = *lastlt;
11116 		tmp--;
11117 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11118 		if (tmp >= ctxt->input->base)
11119 		    *lastgt = tmp;
11120 		else
11121 		    *lastgt = NULL;
11122 	    }
11123 	}
11124     } else {
11125         *lastlt = NULL;
11126 	*lastgt = NULL;
11127     }
11128 }
11129 /**
11130  * xmlCheckCdataPush:
11131  * @cur: pointer to the block of characters
11132  * @len: length of the block in bytes
11133  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11134  *
11135  * Check that the block of characters is okay as SCdata content [20]
11136  *
11137  * Returns the number of bytes to pass if okay, a negative index where an
11138  *         UTF-8 error occurred otherwise
11139  */
11140 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11141 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11142     int ix;
11143     unsigned char c;
11144     int codepoint;
11145 
11146     if ((utf == NULL) || (len <= 0))
11147         return(0);
11148 
11149     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11150         c = utf[ix];
11151         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11152 	    if (c >= 0x20)
11153 		ix++;
11154 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11155 	        ix++;
11156 	    else
11157 	        return(-ix);
11158 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11159 	    if (ix + 2 > len) return(complete ? -ix : ix);
11160 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11161 	        return(-ix);
11162 	    codepoint = (utf[ix] & 0x1f) << 6;
11163 	    codepoint |= utf[ix+1] & 0x3f;
11164 	    if (!xmlIsCharQ(codepoint))
11165 	        return(-ix);
11166 	    ix += 2;
11167 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11168 	    if (ix + 3 > len) return(complete ? -ix : ix);
11169 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11170 	        ((utf[ix+2] & 0xc0) != 0x80))
11171 		    return(-ix);
11172 	    codepoint = (utf[ix] & 0xf) << 12;
11173 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11174 	    codepoint |= utf[ix+2] & 0x3f;
11175 	    if (!xmlIsCharQ(codepoint))
11176 	        return(-ix);
11177 	    ix += 3;
11178 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11179 	    if (ix + 4 > len) return(complete ? -ix : ix);
11180 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11181 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11182 		((utf[ix+3] & 0xc0) != 0x80))
11183 		    return(-ix);
11184 	    codepoint = (utf[ix] & 0x7) << 18;
11185 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11186 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11187 	    codepoint |= utf[ix+3] & 0x3f;
11188 	    if (!xmlIsCharQ(codepoint))
11189 	        return(-ix);
11190 	    ix += 4;
11191 	} else				/* unknown encoding */
11192 	    return(-ix);
11193       }
11194       return(ix);
11195 }
11196 
11197 /**
11198  * xmlParseTryOrFinish:
11199  * @ctxt:  an XML parser context
11200  * @terminate:  last chunk indicator
11201  *
11202  * Try to progress on parsing
11203  *
11204  * Returns zero if no parsing was possible
11205  */
11206 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11207 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11208     int ret = 0;
11209     int avail, tlen;
11210     xmlChar cur, next;
11211     const xmlChar *lastlt, *lastgt;
11212 
11213     if (ctxt->input == NULL)
11214         return(0);
11215 
11216 #ifdef DEBUG_PUSH
11217     switch (ctxt->instate) {
11218 	case XML_PARSER_EOF:
11219 	    xmlGenericError(xmlGenericErrorContext,
11220 		    "PP: try EOF\n"); break;
11221 	case XML_PARSER_START:
11222 	    xmlGenericError(xmlGenericErrorContext,
11223 		    "PP: try START\n"); break;
11224 	case XML_PARSER_MISC:
11225 	    xmlGenericError(xmlGenericErrorContext,
11226 		    "PP: try MISC\n");break;
11227 	case XML_PARSER_COMMENT:
11228 	    xmlGenericError(xmlGenericErrorContext,
11229 		    "PP: try COMMENT\n");break;
11230 	case XML_PARSER_PROLOG:
11231 	    xmlGenericError(xmlGenericErrorContext,
11232 		    "PP: try PROLOG\n");break;
11233 	case XML_PARSER_START_TAG:
11234 	    xmlGenericError(xmlGenericErrorContext,
11235 		    "PP: try START_TAG\n");break;
11236 	case XML_PARSER_CONTENT:
11237 	    xmlGenericError(xmlGenericErrorContext,
11238 		    "PP: try CONTENT\n");break;
11239 	case XML_PARSER_CDATA_SECTION:
11240 	    xmlGenericError(xmlGenericErrorContext,
11241 		    "PP: try CDATA_SECTION\n");break;
11242 	case XML_PARSER_END_TAG:
11243 	    xmlGenericError(xmlGenericErrorContext,
11244 		    "PP: try END_TAG\n");break;
11245 	case XML_PARSER_ENTITY_DECL:
11246 	    xmlGenericError(xmlGenericErrorContext,
11247 		    "PP: try ENTITY_DECL\n");break;
11248 	case XML_PARSER_ENTITY_VALUE:
11249 	    xmlGenericError(xmlGenericErrorContext,
11250 		    "PP: try ENTITY_VALUE\n");break;
11251 	case XML_PARSER_ATTRIBUTE_VALUE:
11252 	    xmlGenericError(xmlGenericErrorContext,
11253 		    "PP: try ATTRIBUTE_VALUE\n");break;
11254 	case XML_PARSER_DTD:
11255 	    xmlGenericError(xmlGenericErrorContext,
11256 		    "PP: try DTD\n");break;
11257 	case XML_PARSER_EPILOG:
11258 	    xmlGenericError(xmlGenericErrorContext,
11259 		    "PP: try EPILOG\n");break;
11260 	case XML_PARSER_PI:
11261 	    xmlGenericError(xmlGenericErrorContext,
11262 		    "PP: try PI\n");break;
11263         case XML_PARSER_IGNORE:
11264             xmlGenericError(xmlGenericErrorContext,
11265 		    "PP: try IGNORE\n");break;
11266     }
11267 #endif
11268 
11269     if ((ctxt->input != NULL) &&
11270         (ctxt->input->cur - ctxt->input->base > 4096)) {
11271 	xmlSHRINK(ctxt);
11272 	ctxt->checkIndex = 0;
11273     }
11274     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11275 
11276     while (ctxt->instate != XML_PARSER_EOF) {
11277 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11278 	    return(0);
11279 
11280 	if (ctxt->input == NULL) break;
11281 	if (ctxt->input->buf == NULL)
11282 	    avail = ctxt->input->length -
11283 	            (ctxt->input->cur - ctxt->input->base);
11284 	else {
11285 	    /*
11286 	     * If we are operating on converted input, try to flush
11287 	     * remaining chars to avoid them stalling in the non-converted
11288 	     * buffer. But do not do this in document start where
11289 	     * encoding="..." may not have been read and we work on a
11290 	     * guessed encoding.
11291 	     */
11292 	    if ((ctxt->instate != XML_PARSER_START) &&
11293 	        (ctxt->input->buf->raw != NULL) &&
11294 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11295                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11296                                                  ctxt->input);
11297 		size_t current = ctxt->input->cur - ctxt->input->base;
11298 
11299 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11300                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11301                                       base, current);
11302 	    }
11303 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11304 		    (ctxt->input->cur - ctxt->input->base);
11305 	}
11306         if (avail < 1)
11307 	    goto done;
11308         switch (ctxt->instate) {
11309             case XML_PARSER_EOF:
11310 	        /*
11311 		 * Document parsing is done !
11312 		 */
11313 	        goto done;
11314             case XML_PARSER_START:
11315 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11316 		    xmlChar start[4];
11317 		    xmlCharEncoding enc;
11318 
11319 		    /*
11320 		     * Very first chars read from the document flow.
11321 		     */
11322 		    if (avail < 4)
11323 			goto done;
11324 
11325 		    /*
11326 		     * Get the 4 first bytes and decode the charset
11327 		     * if enc != XML_CHAR_ENCODING_NONE
11328 		     * plug some encoding conversion routines,
11329 		     * else xmlSwitchEncoding will set to (default)
11330 		     * UTF8.
11331 		     */
11332 		    start[0] = RAW;
11333 		    start[1] = NXT(1);
11334 		    start[2] = NXT(2);
11335 		    start[3] = NXT(3);
11336 		    enc = xmlDetectCharEncoding(start, 4);
11337 		    xmlSwitchEncoding(ctxt, enc);
11338 		    break;
11339 		}
11340 
11341 		if (avail < 2)
11342 		    goto done;
11343 		cur = ctxt->input->cur[0];
11344 		next = ctxt->input->cur[1];
11345 		if (cur == 0) {
11346 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11347 			ctxt->sax->setDocumentLocator(ctxt->userData,
11348 						      &xmlDefaultSAXLocator);
11349 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11350 		    xmlHaltParser(ctxt);
11351 #ifdef DEBUG_PUSH
11352 		    xmlGenericError(xmlGenericErrorContext,
11353 			    "PP: entering EOF\n");
11354 #endif
11355 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11356 			ctxt->sax->endDocument(ctxt->userData);
11357 		    goto done;
11358 		}
11359 	        if ((cur == '<') && (next == '?')) {
11360 		    /* PI or XML decl */
11361 		    if (avail < 5) return(ret);
11362 		    if ((!terminate) &&
11363 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11364 			return(ret);
11365 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11366 			ctxt->sax->setDocumentLocator(ctxt->userData,
11367 						      &xmlDefaultSAXLocator);
11368 		    if ((ctxt->input->cur[2] == 'x') &&
11369 			(ctxt->input->cur[3] == 'm') &&
11370 			(ctxt->input->cur[4] == 'l') &&
11371 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11372 			ret += 5;
11373 #ifdef DEBUG_PUSH
11374 			xmlGenericError(xmlGenericErrorContext,
11375 				"PP: Parsing XML Decl\n");
11376 #endif
11377 			xmlParseXMLDecl(ctxt);
11378 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11379 			    /*
11380 			     * The XML REC instructs us to stop parsing right
11381 			     * here
11382 			     */
11383 			    xmlHaltParser(ctxt);
11384 			    return(0);
11385 			}
11386 			ctxt->standalone = ctxt->input->standalone;
11387 			if ((ctxt->encoding == NULL) &&
11388 			    (ctxt->input->encoding != NULL))
11389 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11390 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11391 			    (!ctxt->disableSAX))
11392 			    ctxt->sax->startDocument(ctxt->userData);
11393 			ctxt->instate = XML_PARSER_MISC;
11394 #ifdef DEBUG_PUSH
11395 			xmlGenericError(xmlGenericErrorContext,
11396 				"PP: entering MISC\n");
11397 #endif
11398 		    } else {
11399 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11400 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11401 			    (!ctxt->disableSAX))
11402 			    ctxt->sax->startDocument(ctxt->userData);
11403 			ctxt->instate = XML_PARSER_MISC;
11404 #ifdef DEBUG_PUSH
11405 			xmlGenericError(xmlGenericErrorContext,
11406 				"PP: entering MISC\n");
11407 #endif
11408 		    }
11409 		} else {
11410 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11411 			ctxt->sax->setDocumentLocator(ctxt->userData,
11412 						      &xmlDefaultSAXLocator);
11413 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11414 		    if (ctxt->version == NULL) {
11415 		        xmlErrMemory(ctxt, NULL);
11416 			break;
11417 		    }
11418 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11419 		        (!ctxt->disableSAX))
11420 			ctxt->sax->startDocument(ctxt->userData);
11421 		    ctxt->instate = XML_PARSER_MISC;
11422 #ifdef DEBUG_PUSH
11423 		    xmlGenericError(xmlGenericErrorContext,
11424 			    "PP: entering MISC\n");
11425 #endif
11426 		}
11427 		break;
11428             case XML_PARSER_START_TAG: {
11429 	        const xmlChar *name;
11430 		const xmlChar *prefix = NULL;
11431 		const xmlChar *URI = NULL;
11432                 int line = ctxt->input->line;
11433 		int nsNr = ctxt->nsNr;
11434 
11435 		if ((avail < 2) && (ctxt->inputNr == 1))
11436 		    goto done;
11437 		cur = ctxt->input->cur[0];
11438 	        if (cur != '<') {
11439 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440 		    xmlHaltParser(ctxt);
11441 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11442 			ctxt->sax->endDocument(ctxt->userData);
11443 		    goto done;
11444 		}
11445 		if (!terminate) {
11446 		    if (ctxt->progressive) {
11447 		        /* > can be found unescaped in attribute values */
11448 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11449 			    goto done;
11450 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11451 			goto done;
11452 		    }
11453 		}
11454 		if (ctxt->spaceNr == 0)
11455 		    spacePush(ctxt, -1);
11456 		else if (*ctxt->space == -2)
11457 		    spacePush(ctxt, -1);
11458 		else
11459 		    spacePush(ctxt, *ctxt->space);
11460 #ifdef LIBXML_SAX1_ENABLED
11461 		if (ctxt->sax2)
11462 #endif /* LIBXML_SAX1_ENABLED */
11463 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11464 #ifdef LIBXML_SAX1_ENABLED
11465 		else
11466 		    name = xmlParseStartTag(ctxt);
11467 #endif /* LIBXML_SAX1_ENABLED */
11468 		if (ctxt->instate == XML_PARSER_EOF)
11469 		    goto done;
11470 		if (name == NULL) {
11471 		    spacePop(ctxt);
11472 		    xmlHaltParser(ctxt);
11473 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11474 			ctxt->sax->endDocument(ctxt->userData);
11475 		    goto done;
11476 		}
11477 #ifdef LIBXML_VALID_ENABLED
11478 		/*
11479 		 * [ VC: Root Element Type ]
11480 		 * The Name in the document type declaration must match
11481 		 * the element type of the root element.
11482 		 */
11483 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11484 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11485 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11486 #endif /* LIBXML_VALID_ENABLED */
11487 
11488 		/*
11489 		 * Check for an Empty Element.
11490 		 */
11491 		if ((RAW == '/') && (NXT(1) == '>')) {
11492 		    SKIP(2);
11493 
11494 		    if (ctxt->sax2) {
11495 			if ((ctxt->sax != NULL) &&
11496 			    (ctxt->sax->endElementNs != NULL) &&
11497 			    (!ctxt->disableSAX))
11498 			    ctxt->sax->endElementNs(ctxt->userData, name,
11499 			                            prefix, URI);
11500 			if (ctxt->nsNr - nsNr > 0)
11501 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11502 #ifdef LIBXML_SAX1_ENABLED
11503 		    } else {
11504 			if ((ctxt->sax != NULL) &&
11505 			    (ctxt->sax->endElement != NULL) &&
11506 			    (!ctxt->disableSAX))
11507 			    ctxt->sax->endElement(ctxt->userData, name);
11508 #endif /* LIBXML_SAX1_ENABLED */
11509 		    }
11510 		    if (ctxt->instate == XML_PARSER_EOF)
11511 			goto done;
11512 		    spacePop(ctxt);
11513 		    if (ctxt->nameNr == 0) {
11514 			ctxt->instate = XML_PARSER_EPILOG;
11515 		    } else {
11516 			ctxt->instate = XML_PARSER_CONTENT;
11517 		    }
11518                     ctxt->progressive = 1;
11519 		    break;
11520 		}
11521 		if (RAW == '>') {
11522 		    NEXT;
11523 		} else {
11524 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11525 					 "Couldn't find end of Start Tag %s\n",
11526 					 name);
11527 		    nodePop(ctxt);
11528 		    spacePop(ctxt);
11529 		}
11530                 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11531 
11532 		ctxt->instate = XML_PARSER_CONTENT;
11533                 ctxt->progressive = 1;
11534                 break;
11535 	    }
11536             case XML_PARSER_CONTENT: {
11537 		const xmlChar *test;
11538 		unsigned int cons;
11539 		if ((avail < 2) && (ctxt->inputNr == 1))
11540 		    goto done;
11541 		cur = ctxt->input->cur[0];
11542 		next = ctxt->input->cur[1];
11543 
11544 		test = CUR_PTR;
11545 	        cons = ctxt->input->consumed;
11546 		if ((cur == '<') && (next == '/')) {
11547 		    ctxt->instate = XML_PARSER_END_TAG;
11548 		    break;
11549 	        } else if ((cur == '<') && (next == '?')) {
11550 		    if ((!terminate) &&
11551 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11552                         ctxt->progressive = XML_PARSER_PI;
11553 			goto done;
11554                     }
11555 		    xmlParsePI(ctxt);
11556 		    ctxt->instate = XML_PARSER_CONTENT;
11557                     ctxt->progressive = 1;
11558 		} else if ((cur == '<') && (next != '!')) {
11559 		    ctxt->instate = XML_PARSER_START_TAG;
11560 		    break;
11561 		} else if ((cur == '<') && (next == '!') &&
11562 		           (ctxt->input->cur[2] == '-') &&
11563 			   (ctxt->input->cur[3] == '-')) {
11564 		    int term;
11565 
11566 	            if (avail < 4)
11567 		        goto done;
11568 		    ctxt->input->cur += 4;
11569 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11570 		    ctxt->input->cur -= 4;
11571 		    if ((!terminate) && (term < 0)) {
11572                         ctxt->progressive = XML_PARSER_COMMENT;
11573 			goto done;
11574                     }
11575 		    xmlParseComment(ctxt);
11576 		    ctxt->instate = XML_PARSER_CONTENT;
11577                     ctxt->progressive = 1;
11578 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11579 		    (ctxt->input->cur[2] == '[') &&
11580 		    (ctxt->input->cur[3] == 'C') &&
11581 		    (ctxt->input->cur[4] == 'D') &&
11582 		    (ctxt->input->cur[5] == 'A') &&
11583 		    (ctxt->input->cur[6] == 'T') &&
11584 		    (ctxt->input->cur[7] == 'A') &&
11585 		    (ctxt->input->cur[8] == '[')) {
11586 		    SKIP(9);
11587 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11588 		    break;
11589 		} else if ((cur == '<') && (next == '!') &&
11590 		           (avail < 9)) {
11591 		    goto done;
11592 		} else if (cur == '&') {
11593 		    if ((!terminate) &&
11594 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11595 			goto done;
11596 		    xmlParseReference(ctxt);
11597 		} else {
11598 		    /* TODO Avoid the extra copy, handle directly !!! */
11599 		    /*
11600 		     * Goal of the following test is:
11601 		     *  - minimize calls to the SAX 'character' callback
11602 		     *    when they are mergeable
11603 		     *  - handle an problem for isBlank when we only parse
11604 		     *    a sequence of blank chars and the next one is
11605 		     *    not available to check against '<' presence.
11606 		     *  - tries to homogenize the differences in SAX
11607 		     *    callbacks between the push and pull versions
11608 		     *    of the parser.
11609 		     */
11610 		    if ((ctxt->inputNr == 1) &&
11611 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11612 			if (!terminate) {
11613 			    if (ctxt->progressive) {
11614 				if ((lastlt == NULL) ||
11615 				    (ctxt->input->cur > lastlt))
11616 				    goto done;
11617 			    } else if (xmlParseLookupSequence(ctxt,
11618 			                                      '<', 0, 0) < 0) {
11619 				goto done;
11620 			    }
11621 			}
11622                     }
11623 		    ctxt->checkIndex = 0;
11624 		    xmlParseCharData(ctxt, 0);
11625 		}
11626 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11627 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11628 		                "detected an error in element content\n");
11629 		    xmlHaltParser(ctxt);
11630 		    break;
11631 		}
11632 		break;
11633 	    }
11634             case XML_PARSER_END_TAG:
11635 		if (avail < 2)
11636 		    goto done;
11637 		if (!terminate) {
11638 		    if (ctxt->progressive) {
11639 		        /* > can be found unescaped in attribute values */
11640 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11641 			    goto done;
11642 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11643 			goto done;
11644 		    }
11645 		}
11646 		if (ctxt->sax2) {
11647 	            xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11648 		    nameNsPop(ctxt);
11649 		}
11650 #ifdef LIBXML_SAX1_ENABLED
11651 		  else
11652 		    xmlParseEndTag1(ctxt, 0);
11653 #endif /* LIBXML_SAX1_ENABLED */
11654 		if (ctxt->instate == XML_PARSER_EOF) {
11655 		    /* Nothing */
11656 		} else if (ctxt->nameNr == 0) {
11657 		    ctxt->instate = XML_PARSER_EPILOG;
11658 		} else {
11659 		    ctxt->instate = XML_PARSER_CONTENT;
11660 		}
11661 		break;
11662             case XML_PARSER_CDATA_SECTION: {
11663 	        /*
11664 		 * The Push mode need to have the SAX callback for
11665 		 * cdataBlock merge back contiguous callbacks.
11666 		 */
11667 		int base;
11668 
11669 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11670 		if (base < 0) {
11671 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11672 		        int tmp;
11673 
11674 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11675 			                        XML_PARSER_BIG_BUFFER_SIZE, 0);
11676 			if (tmp < 0) {
11677 			    tmp = -tmp;
11678 			    ctxt->input->cur += tmp;
11679 			    goto encoding_error;
11680 			}
11681 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11682 			    if (ctxt->sax->cdataBlock != NULL)
11683 				ctxt->sax->cdataBlock(ctxt->userData,
11684 				                      ctxt->input->cur, tmp);
11685 			    else if (ctxt->sax->characters != NULL)
11686 				ctxt->sax->characters(ctxt->userData,
11687 				                      ctxt->input->cur, tmp);
11688 			}
11689 			if (ctxt->instate == XML_PARSER_EOF)
11690 			    goto done;
11691 			SKIPL(tmp);
11692 			ctxt->checkIndex = 0;
11693 		    }
11694 		    goto done;
11695 		} else {
11696 		    int tmp;
11697 
11698 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11699 		    if ((tmp < 0) || (tmp != base)) {
11700 			tmp = -tmp;
11701 			ctxt->input->cur += tmp;
11702 			goto encoding_error;
11703 		    }
11704 		    if ((ctxt->sax != NULL) && (base == 0) &&
11705 		        (ctxt->sax->cdataBlock != NULL) &&
11706 		        (!ctxt->disableSAX)) {
11707 			/*
11708 			 * Special case to provide identical behaviour
11709 			 * between pull and push parsers on enpty CDATA
11710 			 * sections
11711 			 */
11712 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11713 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11714 			               "<![CDATA[", 9)))
11715 			     ctxt->sax->cdataBlock(ctxt->userData,
11716 			                           BAD_CAST "", 0);
11717 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11718 			(!ctxt->disableSAX)) {
11719 			if (ctxt->sax->cdataBlock != NULL)
11720 			    ctxt->sax->cdataBlock(ctxt->userData,
11721 						  ctxt->input->cur, base);
11722 			else if (ctxt->sax->characters != NULL)
11723 			    ctxt->sax->characters(ctxt->userData,
11724 						  ctxt->input->cur, base);
11725 		    }
11726 		    if (ctxt->instate == XML_PARSER_EOF)
11727 			goto done;
11728 		    SKIPL(base + 3);
11729 		    ctxt->checkIndex = 0;
11730 		    ctxt->instate = XML_PARSER_CONTENT;
11731 #ifdef DEBUG_PUSH
11732 		    xmlGenericError(xmlGenericErrorContext,
11733 			    "PP: entering CONTENT\n");
11734 #endif
11735 		}
11736 		break;
11737 	    }
11738             case XML_PARSER_MISC:
11739 		SKIP_BLANKS;
11740 		if (ctxt->input->buf == NULL)
11741 		    avail = ctxt->input->length -
11742 		            (ctxt->input->cur - ctxt->input->base);
11743 		else
11744 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11745 		            (ctxt->input->cur - ctxt->input->base);
11746 		if (avail < 2)
11747 		    goto done;
11748 		cur = ctxt->input->cur[0];
11749 		next = ctxt->input->cur[1];
11750 	        if ((cur == '<') && (next == '?')) {
11751 		    if ((!terminate) &&
11752 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11753                         ctxt->progressive = XML_PARSER_PI;
11754 			goto done;
11755                     }
11756 #ifdef DEBUG_PUSH
11757 		    xmlGenericError(xmlGenericErrorContext,
11758 			    "PP: Parsing PI\n");
11759 #endif
11760 		    xmlParsePI(ctxt);
11761 		    if (ctxt->instate == XML_PARSER_EOF)
11762 			goto done;
11763 		    ctxt->instate = XML_PARSER_MISC;
11764                     ctxt->progressive = 1;
11765 		    ctxt->checkIndex = 0;
11766 		} else if ((cur == '<') && (next == '!') &&
11767 		    (ctxt->input->cur[2] == '-') &&
11768 		    (ctxt->input->cur[3] == '-')) {
11769 		    if ((!terminate) &&
11770 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11771                         ctxt->progressive = XML_PARSER_COMMENT;
11772 			goto done;
11773                     }
11774 #ifdef DEBUG_PUSH
11775 		    xmlGenericError(xmlGenericErrorContext,
11776 			    "PP: Parsing Comment\n");
11777 #endif
11778 		    xmlParseComment(ctxt);
11779 		    if (ctxt->instate == XML_PARSER_EOF)
11780 			goto done;
11781 		    ctxt->instate = XML_PARSER_MISC;
11782                     ctxt->progressive = 1;
11783 		    ctxt->checkIndex = 0;
11784 		} else if ((cur == '<') && (next == '!') &&
11785 		    (ctxt->input->cur[2] == 'D') &&
11786 		    (ctxt->input->cur[3] == 'O') &&
11787 		    (ctxt->input->cur[4] == 'C') &&
11788 		    (ctxt->input->cur[5] == 'T') &&
11789 		    (ctxt->input->cur[6] == 'Y') &&
11790 		    (ctxt->input->cur[7] == 'P') &&
11791 		    (ctxt->input->cur[8] == 'E')) {
11792 		    if ((!terminate) &&
11793 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11794                         ctxt->progressive = XML_PARSER_DTD;
11795 			goto done;
11796                     }
11797 #ifdef DEBUG_PUSH
11798 		    xmlGenericError(xmlGenericErrorContext,
11799 			    "PP: Parsing internal subset\n");
11800 #endif
11801 		    ctxt->inSubset = 1;
11802                     ctxt->progressive = 0;
11803 		    ctxt->checkIndex = 0;
11804 		    xmlParseDocTypeDecl(ctxt);
11805 		    if (ctxt->instate == XML_PARSER_EOF)
11806 			goto done;
11807 		    if (RAW == '[') {
11808 			ctxt->instate = XML_PARSER_DTD;
11809 #ifdef DEBUG_PUSH
11810 			xmlGenericError(xmlGenericErrorContext,
11811 				"PP: entering DTD\n");
11812 #endif
11813 		    } else {
11814 			/*
11815 			 * Create and update the external subset.
11816 			 */
11817 			ctxt->inSubset = 2;
11818 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11819 			    (ctxt->sax->externalSubset != NULL))
11820 			    ctxt->sax->externalSubset(ctxt->userData,
11821 				    ctxt->intSubName, ctxt->extSubSystem,
11822 				    ctxt->extSubURI);
11823 			ctxt->inSubset = 0;
11824 			xmlCleanSpecialAttr(ctxt);
11825 			ctxt->instate = XML_PARSER_PROLOG;
11826 #ifdef DEBUG_PUSH
11827 			xmlGenericError(xmlGenericErrorContext,
11828 				"PP: entering PROLOG\n");
11829 #endif
11830 		    }
11831 		} else if ((cur == '<') && (next == '!') &&
11832 		           (avail < 9)) {
11833 		    goto done;
11834 		} else {
11835 		    ctxt->instate = XML_PARSER_START_TAG;
11836 		    ctxt->progressive = XML_PARSER_START_TAG;
11837 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11838 #ifdef DEBUG_PUSH
11839 		    xmlGenericError(xmlGenericErrorContext,
11840 			    "PP: entering START_TAG\n");
11841 #endif
11842 		}
11843 		break;
11844             case XML_PARSER_PROLOG:
11845 		SKIP_BLANKS;
11846 		if (ctxt->input->buf == NULL)
11847 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11848 		else
11849 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11850                             (ctxt->input->cur - ctxt->input->base);
11851 		if (avail < 2)
11852 		    goto done;
11853 		cur = ctxt->input->cur[0];
11854 		next = ctxt->input->cur[1];
11855 	        if ((cur == '<') && (next == '?')) {
11856 		    if ((!terminate) &&
11857 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11858                         ctxt->progressive = XML_PARSER_PI;
11859 			goto done;
11860                     }
11861 #ifdef DEBUG_PUSH
11862 		    xmlGenericError(xmlGenericErrorContext,
11863 			    "PP: Parsing PI\n");
11864 #endif
11865 		    xmlParsePI(ctxt);
11866 		    if (ctxt->instate == XML_PARSER_EOF)
11867 			goto done;
11868 		    ctxt->instate = XML_PARSER_PROLOG;
11869                     ctxt->progressive = 1;
11870 		} else if ((cur == '<') && (next == '!') &&
11871 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11872 		    if ((!terminate) &&
11873 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11874                         ctxt->progressive = XML_PARSER_COMMENT;
11875 			goto done;
11876                     }
11877 #ifdef DEBUG_PUSH
11878 		    xmlGenericError(xmlGenericErrorContext,
11879 			    "PP: Parsing Comment\n");
11880 #endif
11881 		    xmlParseComment(ctxt);
11882 		    if (ctxt->instate == XML_PARSER_EOF)
11883 			goto done;
11884 		    ctxt->instate = XML_PARSER_PROLOG;
11885                     ctxt->progressive = 1;
11886 		} else if ((cur == '<') && (next == '!') &&
11887 		           (avail < 4)) {
11888 		    goto done;
11889 		} else {
11890 		    ctxt->instate = XML_PARSER_START_TAG;
11891 		    if (ctxt->progressive == 0)
11892 			ctxt->progressive = XML_PARSER_START_TAG;
11893 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11894 #ifdef DEBUG_PUSH
11895 		    xmlGenericError(xmlGenericErrorContext,
11896 			    "PP: entering START_TAG\n");
11897 #endif
11898 		}
11899 		break;
11900             case XML_PARSER_EPILOG:
11901 		SKIP_BLANKS;
11902 		if (ctxt->input->buf == NULL)
11903 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11904 		else
11905 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11906                             (ctxt->input->cur - ctxt->input->base);
11907 		if (avail < 2)
11908 		    goto done;
11909 		cur = ctxt->input->cur[0];
11910 		next = ctxt->input->cur[1];
11911 	        if ((cur == '<') && (next == '?')) {
11912 		    if ((!terminate) &&
11913 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11914                         ctxt->progressive = XML_PARSER_PI;
11915 			goto done;
11916                     }
11917 #ifdef DEBUG_PUSH
11918 		    xmlGenericError(xmlGenericErrorContext,
11919 			    "PP: Parsing PI\n");
11920 #endif
11921 		    xmlParsePI(ctxt);
11922 		    if (ctxt->instate == XML_PARSER_EOF)
11923 			goto done;
11924 		    ctxt->instate = XML_PARSER_EPILOG;
11925                     ctxt->progressive = 1;
11926 		} else if ((cur == '<') && (next == '!') &&
11927 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11928 		    if ((!terminate) &&
11929 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11930                         ctxt->progressive = XML_PARSER_COMMENT;
11931 			goto done;
11932                     }
11933 #ifdef DEBUG_PUSH
11934 		    xmlGenericError(xmlGenericErrorContext,
11935 			    "PP: Parsing Comment\n");
11936 #endif
11937 		    xmlParseComment(ctxt);
11938 		    if (ctxt->instate == XML_PARSER_EOF)
11939 			goto done;
11940 		    ctxt->instate = XML_PARSER_EPILOG;
11941                     ctxt->progressive = 1;
11942 		} else if ((cur == '<') && (next == '!') &&
11943 		           (avail < 4)) {
11944 		    goto done;
11945 		} else {
11946 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11947 		    xmlHaltParser(ctxt);
11948 #ifdef DEBUG_PUSH
11949 		    xmlGenericError(xmlGenericErrorContext,
11950 			    "PP: entering EOF\n");
11951 #endif
11952 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11953 			ctxt->sax->endDocument(ctxt->userData);
11954 		    goto done;
11955 		}
11956 		break;
11957             case XML_PARSER_DTD: {
11958 	        /*
11959 		 * Sorry but progressive parsing of the internal subset
11960 		 * is not expected to be supported. We first check that
11961 		 * the full content of the internal subset is available and
11962 		 * the parsing is launched only at that point.
11963 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11964 		 * section and not in a ']]>' sequence which are conditional
11965 		 * sections (whoever argued to keep that crap in XML deserve
11966 		 * a place in hell !).
11967 		 */
11968 		int base, i;
11969 		xmlChar *buf;
11970 	        xmlChar quote = 0;
11971                 size_t use;
11972 
11973 		base = ctxt->input->cur - ctxt->input->base;
11974 		if (base < 0) return(0);
11975 		if (ctxt->checkIndex > base)
11976 		    base = ctxt->checkIndex;
11977 		buf = xmlBufContent(ctxt->input->buf->buffer);
11978                 use = xmlBufUse(ctxt->input->buf->buffer);
11979 		for (;(unsigned int) base < use; base++) {
11980 		    if (quote != 0) {
11981 		        if (buf[base] == quote)
11982 			    quote = 0;
11983 			continue;
11984 		    }
11985 		    if ((quote == 0) && (buf[base] == '<')) {
11986 		        int found  = 0;
11987 			/* special handling of comments */
11988 		        if (((unsigned int) base + 4 < use) &&
11989 			    (buf[base + 1] == '!') &&
11990 			    (buf[base + 2] == '-') &&
11991 			    (buf[base + 3] == '-')) {
11992 			    for (;(unsigned int) base + 3 < use; base++) {
11993 				if ((buf[base] == '-') &&
11994 				    (buf[base + 1] == '-') &&
11995 				    (buf[base + 2] == '>')) {
11996 				    found = 1;
11997 				    base += 2;
11998 				    break;
11999 				}
12000 		            }
12001 			    if (!found) {
12002 #if 0
12003 			        fprintf(stderr, "unfinished comment\n");
12004 #endif
12005 			        break; /* for */
12006 		            }
12007 		            continue;
12008 			}
12009 		    }
12010 		    if (buf[base] == '"') {
12011 		        quote = '"';
12012 			continue;
12013 		    }
12014 		    if (buf[base] == '\'') {
12015 		        quote = '\'';
12016 			continue;
12017 		    }
12018 		    if (buf[base] == ']') {
12019 #if 0
12020 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
12021 			        buf[base + 1], buf[base + 2], buf[base + 3]);
12022 #endif
12023 		        if ((unsigned int) base +1 >= use)
12024 			    break;
12025 			if (buf[base + 1] == ']') {
12026 			    /* conditional crap, skip both ']' ! */
12027 			    base++;
12028 			    continue;
12029 			}
12030 		        for (i = 1; (unsigned int) base + i < use; i++) {
12031 			    if (buf[base + i] == '>') {
12032 #if 0
12033 			        fprintf(stderr, "found\n");
12034 #endif
12035 			        goto found_end_int_subset;
12036 			    }
12037 			    if (!IS_BLANK_CH(buf[base + i])) {
12038 #if 0
12039 			        fprintf(stderr, "not found\n");
12040 #endif
12041 			        goto not_end_of_int_subset;
12042 			    }
12043 			}
12044 #if 0
12045 			fprintf(stderr, "end of stream\n");
12046 #endif
12047 		        break;
12048 
12049 		    }
12050 not_end_of_int_subset:
12051                     continue; /* for */
12052 		}
12053 		/*
12054 		 * We didn't found the end of the Internal subset
12055 		 */
12056                 if (quote == 0)
12057                     ctxt->checkIndex = base;
12058                 else
12059                     ctxt->checkIndex = 0;
12060 #ifdef DEBUG_PUSH
12061 		if (next == 0)
12062 		    xmlGenericError(xmlGenericErrorContext,
12063 			    "PP: lookup of int subset end filed\n");
12064 #endif
12065 	        goto done;
12066 
12067 found_end_int_subset:
12068                 ctxt->checkIndex = 0;
12069 		xmlParseInternalSubset(ctxt);
12070 		if (ctxt->instate == XML_PARSER_EOF)
12071 		    goto done;
12072 		ctxt->inSubset = 2;
12073 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12074 		    (ctxt->sax->externalSubset != NULL))
12075 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12076 			    ctxt->extSubSystem, ctxt->extSubURI);
12077 		ctxt->inSubset = 0;
12078 		xmlCleanSpecialAttr(ctxt);
12079 		if (ctxt->instate == XML_PARSER_EOF)
12080 		    goto done;
12081 		ctxt->instate = XML_PARSER_PROLOG;
12082 		ctxt->checkIndex = 0;
12083 #ifdef DEBUG_PUSH
12084 		xmlGenericError(xmlGenericErrorContext,
12085 			"PP: entering PROLOG\n");
12086 #endif
12087                 break;
12088 	    }
12089             case XML_PARSER_COMMENT:
12090 		xmlGenericError(xmlGenericErrorContext,
12091 			"PP: internal error, state == COMMENT\n");
12092 		ctxt->instate = XML_PARSER_CONTENT;
12093 #ifdef DEBUG_PUSH
12094 		xmlGenericError(xmlGenericErrorContext,
12095 			"PP: entering CONTENT\n");
12096 #endif
12097 		break;
12098             case XML_PARSER_IGNORE:
12099 		xmlGenericError(xmlGenericErrorContext,
12100 			"PP: internal error, state == IGNORE");
12101 	        ctxt->instate = XML_PARSER_DTD;
12102 #ifdef DEBUG_PUSH
12103 		xmlGenericError(xmlGenericErrorContext,
12104 			"PP: entering DTD\n");
12105 #endif
12106 	        break;
12107             case XML_PARSER_PI:
12108 		xmlGenericError(xmlGenericErrorContext,
12109 			"PP: internal error, state == PI\n");
12110 		ctxt->instate = XML_PARSER_CONTENT;
12111 #ifdef DEBUG_PUSH
12112 		xmlGenericError(xmlGenericErrorContext,
12113 			"PP: entering CONTENT\n");
12114 #endif
12115 		break;
12116             case XML_PARSER_ENTITY_DECL:
12117 		xmlGenericError(xmlGenericErrorContext,
12118 			"PP: internal error, state == ENTITY_DECL\n");
12119 		ctxt->instate = XML_PARSER_DTD;
12120 #ifdef DEBUG_PUSH
12121 		xmlGenericError(xmlGenericErrorContext,
12122 			"PP: entering DTD\n");
12123 #endif
12124 		break;
12125             case XML_PARSER_ENTITY_VALUE:
12126 		xmlGenericError(xmlGenericErrorContext,
12127 			"PP: internal error, state == ENTITY_VALUE\n");
12128 		ctxt->instate = XML_PARSER_CONTENT;
12129 #ifdef DEBUG_PUSH
12130 		xmlGenericError(xmlGenericErrorContext,
12131 			"PP: entering DTD\n");
12132 #endif
12133 		break;
12134             case XML_PARSER_ATTRIBUTE_VALUE:
12135 		xmlGenericError(xmlGenericErrorContext,
12136 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12137 		ctxt->instate = XML_PARSER_START_TAG;
12138 #ifdef DEBUG_PUSH
12139 		xmlGenericError(xmlGenericErrorContext,
12140 			"PP: entering START_TAG\n");
12141 #endif
12142 		break;
12143             case XML_PARSER_SYSTEM_LITERAL:
12144 		xmlGenericError(xmlGenericErrorContext,
12145 			"PP: internal error, state == SYSTEM_LITERAL\n");
12146 		ctxt->instate = XML_PARSER_START_TAG;
12147 #ifdef DEBUG_PUSH
12148 		xmlGenericError(xmlGenericErrorContext,
12149 			"PP: entering START_TAG\n");
12150 #endif
12151 		break;
12152             case XML_PARSER_PUBLIC_LITERAL:
12153 		xmlGenericError(xmlGenericErrorContext,
12154 			"PP: internal error, state == PUBLIC_LITERAL\n");
12155 		ctxt->instate = XML_PARSER_START_TAG;
12156 #ifdef DEBUG_PUSH
12157 		xmlGenericError(xmlGenericErrorContext,
12158 			"PP: entering START_TAG\n");
12159 #endif
12160 		break;
12161 	}
12162     }
12163 done:
12164 #ifdef DEBUG_PUSH
12165     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12166 #endif
12167     return(ret);
12168 encoding_error:
12169     {
12170         char buffer[150];
12171 
12172 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12173 			ctxt->input->cur[0], ctxt->input->cur[1],
12174 			ctxt->input->cur[2], ctxt->input->cur[3]);
12175 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12176 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12177 		     BAD_CAST buffer, NULL);
12178     }
12179     return(0);
12180 }
12181 
12182 /**
12183  * xmlParseCheckTransition:
12184  * @ctxt:  an XML parser context
12185  * @chunk:  a char array
12186  * @size:  the size in byte of the chunk
12187  *
12188  * Check depending on the current parser state if the chunk given must be
12189  * processed immediately or one need more data to advance on parsing.
12190  *
12191  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12192  */
12193 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12194 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12195     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12196         return(-1);
12197     if (ctxt->instate == XML_PARSER_START_TAG) {
12198         if (memchr(chunk, '>', size) != NULL)
12199             return(1);
12200         return(0);
12201     }
12202     if (ctxt->progressive == XML_PARSER_COMMENT) {
12203         if (memchr(chunk, '>', size) != NULL)
12204             return(1);
12205         return(0);
12206     }
12207     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12208         if (memchr(chunk, '>', size) != NULL)
12209             return(1);
12210         return(0);
12211     }
12212     if (ctxt->progressive == XML_PARSER_PI) {
12213         if (memchr(chunk, '>', size) != NULL)
12214             return(1);
12215         return(0);
12216     }
12217     if (ctxt->instate == XML_PARSER_END_TAG) {
12218         if (memchr(chunk, '>', size) != NULL)
12219             return(1);
12220         return(0);
12221     }
12222     if ((ctxt->progressive == XML_PARSER_DTD) ||
12223         (ctxt->instate == XML_PARSER_DTD)) {
12224         if (memchr(chunk, '>', size) != NULL)
12225             return(1);
12226         return(0);
12227     }
12228     return(1);
12229 }
12230 
12231 /**
12232  * xmlParseChunk:
12233  * @ctxt:  an XML parser context
12234  * @chunk:  an char array
12235  * @size:  the size in byte of the chunk
12236  * @terminate:  last chunk indicator
12237  *
12238  * Parse a Chunk of memory
12239  *
12240  * Returns zero if no error, the xmlParserErrors otherwise.
12241  */
12242 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12243 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12244               int terminate) {
12245     int end_in_lf = 0;
12246     int remain = 0;
12247     size_t old_avail = 0;
12248     size_t avail = 0;
12249 
12250     if (ctxt == NULL)
12251         return(XML_ERR_INTERNAL_ERROR);
12252     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12253         return(ctxt->errNo);
12254     if (ctxt->instate == XML_PARSER_EOF)
12255         return(-1);
12256     if (ctxt->instate == XML_PARSER_START)
12257         xmlDetectSAX2(ctxt);
12258     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12259         (chunk[size - 1] == '\r')) {
12260 	end_in_lf = 1;
12261 	size--;
12262     }
12263 
12264 xmldecl_done:
12265 
12266     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12267         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12268 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12269 	size_t cur = ctxt->input->cur - ctxt->input->base;
12270 	int res;
12271 
12272         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12273         /*
12274          * Specific handling if we autodetected an encoding, we should not
12275          * push more than the first line ... which depend on the encoding
12276          * And only push the rest once the final encoding was detected
12277          */
12278         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12279             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12280             unsigned int len = 45;
12281 
12282             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12283                                BAD_CAST "UTF-16")) ||
12284                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12285                                BAD_CAST "UTF16")))
12286                 len = 90;
12287             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12288                                     BAD_CAST "UCS-4")) ||
12289                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12290                                     BAD_CAST "UCS4")))
12291                 len = 180;
12292 
12293             if (ctxt->input->buf->rawconsumed < len)
12294                 len -= ctxt->input->buf->rawconsumed;
12295 
12296             /*
12297              * Change size for reading the initial declaration only
12298              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12299              * will blindly copy extra bytes from memory.
12300              */
12301             if ((unsigned int) size > len) {
12302                 remain = size - len;
12303                 size = len;
12304             } else {
12305                 remain = 0;
12306             }
12307         }
12308 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12309         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12310 	if (res < 0) {
12311 	    ctxt->errNo = XML_PARSER_EOF;
12312 	    xmlHaltParser(ctxt);
12313 	    return (XML_PARSER_EOF);
12314 	}
12315 #ifdef DEBUG_PUSH
12316 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12317 #endif
12318 
12319     } else if (ctxt->instate != XML_PARSER_EOF) {
12320 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12321 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12322 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12323 		    (in->raw != NULL)) {
12324 		int nbchars;
12325 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12326 		size_t current = ctxt->input->cur - ctxt->input->base;
12327 
12328 		nbchars = xmlCharEncInput(in, terminate);
12329 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12330 		if (nbchars < 0) {
12331 		    /* TODO 2.6.0 */
12332 		    xmlGenericError(xmlGenericErrorContext,
12333 				    "xmlParseChunk: encoder error\n");
12334                     xmlHaltParser(ctxt);
12335 		    return(XML_ERR_INVALID_ENCODING);
12336 		}
12337 	    }
12338 	}
12339     }
12340     if (remain != 0) {
12341         xmlParseTryOrFinish(ctxt, 0);
12342     } else {
12343         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12344             avail = xmlBufUse(ctxt->input->buf->buffer);
12345         /*
12346          * Depending on the current state it may not be such
12347          * a good idea to try parsing if there is nothing in the chunk
12348          * which would be worth doing a parser state transition and we
12349          * need to wait for more data
12350          */
12351         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12352             (old_avail == 0) || (avail == 0) ||
12353             (xmlParseCheckTransition(ctxt,
12354                        (const char *)&ctxt->input->base[old_avail],
12355                                      avail - old_avail)))
12356             xmlParseTryOrFinish(ctxt, terminate);
12357     }
12358     if (ctxt->instate == XML_PARSER_EOF)
12359         return(ctxt->errNo);
12360 
12361     if ((ctxt->input != NULL) &&
12362          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12363          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12364         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12365         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12366         xmlHaltParser(ctxt);
12367     }
12368     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12369         return(ctxt->errNo);
12370 
12371     if (remain != 0) {
12372         chunk += size;
12373         size = remain;
12374         remain = 0;
12375         goto xmldecl_done;
12376     }
12377     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12378         (ctxt->input->buf != NULL)) {
12379 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12380 					 ctxt->input);
12381 	size_t current = ctxt->input->cur - ctxt->input->base;
12382 
12383 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12384 
12385 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12386 			      base, current);
12387     }
12388     if (terminate) {
12389 	/*
12390 	 * Check for termination
12391 	 */
12392 	int cur_avail = 0;
12393 
12394 	if (ctxt->input != NULL) {
12395 	    if (ctxt->input->buf == NULL)
12396 		cur_avail = ctxt->input->length -
12397 			    (ctxt->input->cur - ctxt->input->base);
12398 	    else
12399 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12400 			              (ctxt->input->cur - ctxt->input->base);
12401 	}
12402 
12403 	if ((ctxt->instate != XML_PARSER_EOF) &&
12404 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12405 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12406 	}
12407 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12408 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12409 	}
12410 	if (ctxt->instate != XML_PARSER_EOF) {
12411 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12412 		ctxt->sax->endDocument(ctxt->userData);
12413 	}
12414 	ctxt->instate = XML_PARSER_EOF;
12415     }
12416     if (ctxt->wellFormed == 0)
12417 	return((xmlParserErrors) ctxt->errNo);
12418     else
12419         return(0);
12420 }
12421 
12422 /************************************************************************
12423  *									*
12424  *		I/O front end functions to the parser			*
12425  *									*
12426  ************************************************************************/
12427 
12428 /**
12429  * xmlCreatePushParserCtxt:
12430  * @sax:  a SAX handler
12431  * @user_data:  The user data returned on SAX callbacks
12432  * @chunk:  a pointer to an array of chars
12433  * @size:  number of chars in the array
12434  * @filename:  an optional file name or URI
12435  *
12436  * Create a parser context for using the XML parser in push mode.
12437  * If @buffer and @size are non-NULL, the data is used to detect
12438  * the encoding.  The remaining characters will be parsed so they
12439  * don't need to be fed in again through xmlParseChunk.
12440  * To allow content encoding detection, @size should be >= 4
12441  * The value of @filename is used for fetching external entities
12442  * and error/warning reports.
12443  *
12444  * Returns the new parser context or NULL
12445  */
12446 
12447 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12448 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12449                         const char *chunk, int size, const char *filename) {
12450     xmlParserCtxtPtr ctxt;
12451     xmlParserInputPtr inputStream;
12452     xmlParserInputBufferPtr buf;
12453     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12454 
12455     /*
12456      * plug some encoding conversion routines
12457      */
12458     if ((chunk != NULL) && (size >= 4))
12459 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12460 
12461     buf = xmlAllocParserInputBuffer(enc);
12462     if (buf == NULL) return(NULL);
12463 
12464     ctxt = xmlNewParserCtxt();
12465     if (ctxt == NULL) {
12466         xmlErrMemory(NULL, "creating parser: out of memory\n");
12467 	xmlFreeParserInputBuffer(buf);
12468 	return(NULL);
12469     }
12470     ctxt->dictNames = 1;
12471     if (sax != NULL) {
12472 #ifdef LIBXML_SAX1_ENABLED
12473 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12474 #endif /* LIBXML_SAX1_ENABLED */
12475 	    xmlFree(ctxt->sax);
12476 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12477 	if (ctxt->sax == NULL) {
12478 	    xmlErrMemory(ctxt, NULL);
12479 	    xmlFreeParserInputBuffer(buf);
12480 	    xmlFreeParserCtxt(ctxt);
12481 	    return(NULL);
12482 	}
12483 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12484 	if (sax->initialized == XML_SAX2_MAGIC)
12485 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12486 	else
12487 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12488 	if (user_data != NULL)
12489 	    ctxt->userData = user_data;
12490     }
12491     if (filename == NULL) {
12492 	ctxt->directory = NULL;
12493     } else {
12494         ctxt->directory = xmlParserGetDirectory(filename);
12495     }
12496 
12497     inputStream = xmlNewInputStream(ctxt);
12498     if (inputStream == NULL) {
12499 	xmlFreeParserCtxt(ctxt);
12500 	xmlFreeParserInputBuffer(buf);
12501 	return(NULL);
12502     }
12503 
12504     if (filename == NULL)
12505 	inputStream->filename = NULL;
12506     else {
12507 	inputStream->filename = (char *)
12508 	    xmlCanonicPath((const xmlChar *) filename);
12509 	if (inputStream->filename == NULL) {
12510 	    xmlFreeParserCtxt(ctxt);
12511 	    xmlFreeParserInputBuffer(buf);
12512 	    return(NULL);
12513 	}
12514     }
12515     inputStream->buf = buf;
12516     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12517     inputPush(ctxt, inputStream);
12518 
12519     /*
12520      * If the caller didn't provide an initial 'chunk' for determining
12521      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12522      * that it can be automatically determined later
12523      */
12524     if ((size == 0) || (chunk == NULL)) {
12525 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12526     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12527 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12528 	size_t cur = ctxt->input->cur - ctxt->input->base;
12529 
12530 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12531 
12532         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12533 #ifdef DEBUG_PUSH
12534 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12535 #endif
12536     }
12537 
12538     if (enc != XML_CHAR_ENCODING_NONE) {
12539         xmlSwitchEncoding(ctxt, enc);
12540     }
12541 
12542     return(ctxt);
12543 }
12544 #endif /* LIBXML_PUSH_ENABLED */
12545 
12546 /**
12547  * xmlHaltParser:
12548  * @ctxt:  an XML parser context
12549  *
12550  * Blocks further parser processing don't override error
12551  * for internal use
12552  */
12553 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12554 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12555     if (ctxt == NULL)
12556         return;
12557     ctxt->instate = XML_PARSER_EOF;
12558     ctxt->disableSAX = 1;
12559     while (ctxt->inputNr > 1)
12560         xmlFreeInputStream(inputPop(ctxt));
12561     if (ctxt->input != NULL) {
12562         /*
12563 	 * in case there was a specific allocation deallocate before
12564 	 * overriding base
12565 	 */
12566         if (ctxt->input->free != NULL) {
12567 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12568 	    ctxt->input->free = NULL;
12569 	}
12570         if (ctxt->input->buf != NULL) {
12571             xmlFreeParserInputBuffer(ctxt->input->buf);
12572             ctxt->input->buf = NULL;
12573         }
12574 	ctxt->input->cur = BAD_CAST"";
12575         ctxt->input->length = 0;
12576 	ctxt->input->base = ctxt->input->cur;
12577         ctxt->input->end = ctxt->input->cur;
12578     }
12579 }
12580 
12581 /**
12582  * xmlStopParser:
12583  * @ctxt:  an XML parser context
12584  *
12585  * Blocks further parser processing
12586  */
12587 void
xmlStopParser(xmlParserCtxtPtr ctxt)12588 xmlStopParser(xmlParserCtxtPtr ctxt) {
12589     if (ctxt == NULL)
12590         return;
12591     xmlHaltParser(ctxt);
12592     ctxt->errNo = XML_ERR_USER_STOP;
12593 }
12594 
12595 /**
12596  * xmlCreateIOParserCtxt:
12597  * @sax:  a SAX handler
12598  * @user_data:  The user data returned on SAX callbacks
12599  * @ioread:  an I/O read function
12600  * @ioclose:  an I/O close function
12601  * @ioctx:  an I/O handler
12602  * @enc:  the charset encoding if known
12603  *
12604  * Create a parser context for using the XML parser with an existing
12605  * I/O stream
12606  *
12607  * Returns the new parser context or NULL
12608  */
12609 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12610 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12611 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12612 	void *ioctx, xmlCharEncoding enc) {
12613     xmlParserCtxtPtr ctxt;
12614     xmlParserInputPtr inputStream;
12615     xmlParserInputBufferPtr buf;
12616 
12617     if (ioread == NULL) return(NULL);
12618 
12619     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12620     if (buf == NULL) {
12621         if (ioclose != NULL)
12622             ioclose(ioctx);
12623         return (NULL);
12624     }
12625 
12626     ctxt = xmlNewParserCtxt();
12627     if (ctxt == NULL) {
12628 	xmlFreeParserInputBuffer(buf);
12629 	return(NULL);
12630     }
12631     if (sax != NULL) {
12632 #ifdef LIBXML_SAX1_ENABLED
12633 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12634 #endif /* LIBXML_SAX1_ENABLED */
12635 	    xmlFree(ctxt->sax);
12636 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12637 	if (ctxt->sax == NULL) {
12638 	    xmlFreeParserInputBuffer(buf);
12639 	    xmlErrMemory(ctxt, NULL);
12640 	    xmlFreeParserCtxt(ctxt);
12641 	    return(NULL);
12642 	}
12643 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12644 	if (sax->initialized == XML_SAX2_MAGIC)
12645 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12646 	else
12647 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12648 	if (user_data != NULL)
12649 	    ctxt->userData = user_data;
12650     }
12651 
12652     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12653     if (inputStream == NULL) {
12654 	xmlFreeParserCtxt(ctxt);
12655 	return(NULL);
12656     }
12657     inputPush(ctxt, inputStream);
12658 
12659     return(ctxt);
12660 }
12661 
12662 #ifdef LIBXML_VALID_ENABLED
12663 /************************************************************************
12664  *									*
12665  *		Front ends when parsing a DTD				*
12666  *									*
12667  ************************************************************************/
12668 
12669 /**
12670  * xmlIOParseDTD:
12671  * @sax:  the SAX handler block or NULL
12672  * @input:  an Input Buffer
12673  * @enc:  the charset encoding if known
12674  *
12675  * Load and parse a DTD
12676  *
12677  * Returns the resulting xmlDtdPtr or NULL in case of error.
12678  * @input will be freed by the function in any case.
12679  */
12680 
12681 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12682 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12683 	      xmlCharEncoding enc) {
12684     xmlDtdPtr ret = NULL;
12685     xmlParserCtxtPtr ctxt;
12686     xmlParserInputPtr pinput = NULL;
12687     xmlChar start[4];
12688 
12689     if (input == NULL)
12690 	return(NULL);
12691 
12692     ctxt = xmlNewParserCtxt();
12693     if (ctxt == NULL) {
12694         xmlFreeParserInputBuffer(input);
12695 	return(NULL);
12696     }
12697 
12698     /* We are loading a DTD */
12699     ctxt->options |= XML_PARSE_DTDLOAD;
12700 
12701     /*
12702      * Set-up the SAX context
12703      */
12704     if (sax != NULL) {
12705 	if (ctxt->sax != NULL)
12706 	    xmlFree(ctxt->sax);
12707         ctxt->sax = sax;
12708         ctxt->userData = ctxt;
12709     }
12710     xmlDetectSAX2(ctxt);
12711 
12712     /*
12713      * generate a parser input from the I/O handler
12714      */
12715 
12716     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12717     if (pinput == NULL) {
12718         if (sax != NULL) ctxt->sax = NULL;
12719         xmlFreeParserInputBuffer(input);
12720 	xmlFreeParserCtxt(ctxt);
12721 	return(NULL);
12722     }
12723 
12724     /*
12725      * plug some encoding conversion routines here.
12726      */
12727     if (xmlPushInput(ctxt, pinput) < 0) {
12728         if (sax != NULL) ctxt->sax = NULL;
12729 	xmlFreeParserCtxt(ctxt);
12730 	return(NULL);
12731     }
12732     if (enc != XML_CHAR_ENCODING_NONE) {
12733         xmlSwitchEncoding(ctxt, enc);
12734     }
12735 
12736     pinput->filename = NULL;
12737     pinput->line = 1;
12738     pinput->col = 1;
12739     pinput->base = ctxt->input->cur;
12740     pinput->cur = ctxt->input->cur;
12741     pinput->free = NULL;
12742 
12743     /*
12744      * let's parse that entity knowing it's an external subset.
12745      */
12746     ctxt->inSubset = 2;
12747     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12748     if (ctxt->myDoc == NULL) {
12749 	xmlErrMemory(ctxt, "New Doc failed");
12750 	return(NULL);
12751     }
12752     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12753     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12754 	                               BAD_CAST "none", BAD_CAST "none");
12755 
12756     if ((enc == XML_CHAR_ENCODING_NONE) &&
12757         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12758 	/*
12759 	 * Get the 4 first bytes and decode the charset
12760 	 * if enc != XML_CHAR_ENCODING_NONE
12761 	 * plug some encoding conversion routines.
12762 	 */
12763 	start[0] = RAW;
12764 	start[1] = NXT(1);
12765 	start[2] = NXT(2);
12766 	start[3] = NXT(3);
12767 	enc = xmlDetectCharEncoding(start, 4);
12768 	if (enc != XML_CHAR_ENCODING_NONE) {
12769 	    xmlSwitchEncoding(ctxt, enc);
12770 	}
12771     }
12772 
12773     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12774 
12775     if (ctxt->myDoc != NULL) {
12776 	if (ctxt->wellFormed) {
12777 	    ret = ctxt->myDoc->extSubset;
12778 	    ctxt->myDoc->extSubset = NULL;
12779 	    if (ret != NULL) {
12780 		xmlNodePtr tmp;
12781 
12782 		ret->doc = NULL;
12783 		tmp = ret->children;
12784 		while (tmp != NULL) {
12785 		    tmp->doc = NULL;
12786 		    tmp = tmp->next;
12787 		}
12788 	    }
12789 	} else {
12790 	    ret = NULL;
12791 	}
12792         xmlFreeDoc(ctxt->myDoc);
12793         ctxt->myDoc = NULL;
12794     }
12795     if (sax != NULL) ctxt->sax = NULL;
12796     xmlFreeParserCtxt(ctxt);
12797 
12798     return(ret);
12799 }
12800 
12801 /**
12802  * xmlSAXParseDTD:
12803  * @sax:  the SAX handler block
12804  * @ExternalID:  a NAME* containing the External ID of the DTD
12805  * @SystemID:  a NAME* containing the URL to the DTD
12806  *
12807  * Load and parse an external subset.
12808  *
12809  * Returns the resulting xmlDtdPtr or NULL in case of error.
12810  */
12811 
12812 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12813 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12814                           const xmlChar *SystemID) {
12815     xmlDtdPtr ret = NULL;
12816     xmlParserCtxtPtr ctxt;
12817     xmlParserInputPtr input = NULL;
12818     xmlCharEncoding enc;
12819     xmlChar* systemIdCanonic;
12820 
12821     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12822 
12823     ctxt = xmlNewParserCtxt();
12824     if (ctxt == NULL) {
12825 	return(NULL);
12826     }
12827 
12828     /* We are loading a DTD */
12829     ctxt->options |= XML_PARSE_DTDLOAD;
12830 
12831     /*
12832      * Set-up the SAX context
12833      */
12834     if (sax != NULL) {
12835 	if (ctxt->sax != NULL)
12836 	    xmlFree(ctxt->sax);
12837         ctxt->sax = sax;
12838         ctxt->userData = ctxt;
12839     }
12840 
12841     /*
12842      * Canonicalise the system ID
12843      */
12844     systemIdCanonic = xmlCanonicPath(SystemID);
12845     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12846 	xmlFreeParserCtxt(ctxt);
12847 	return(NULL);
12848     }
12849 
12850     /*
12851      * Ask the Entity resolver to load the damn thing
12852      */
12853 
12854     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12855 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12856 	                                 systemIdCanonic);
12857     if (input == NULL) {
12858         if (sax != NULL) ctxt->sax = NULL;
12859 	xmlFreeParserCtxt(ctxt);
12860 	if (systemIdCanonic != NULL)
12861 	    xmlFree(systemIdCanonic);
12862 	return(NULL);
12863     }
12864 
12865     /*
12866      * plug some encoding conversion routines here.
12867      */
12868     if (xmlPushInput(ctxt, input) < 0) {
12869         if (sax != NULL) ctxt->sax = NULL;
12870 	xmlFreeParserCtxt(ctxt);
12871 	if (systemIdCanonic != NULL)
12872 	    xmlFree(systemIdCanonic);
12873 	return(NULL);
12874     }
12875     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12876 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12877 	xmlSwitchEncoding(ctxt, enc);
12878     }
12879 
12880     if (input->filename == NULL)
12881 	input->filename = (char *) systemIdCanonic;
12882     else
12883 	xmlFree(systemIdCanonic);
12884     input->line = 1;
12885     input->col = 1;
12886     input->base = ctxt->input->cur;
12887     input->cur = ctxt->input->cur;
12888     input->free = NULL;
12889 
12890     /*
12891      * let's parse that entity knowing it's an external subset.
12892      */
12893     ctxt->inSubset = 2;
12894     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12895     if (ctxt->myDoc == NULL) {
12896 	xmlErrMemory(ctxt, "New Doc failed");
12897         if (sax != NULL) ctxt->sax = NULL;
12898 	xmlFreeParserCtxt(ctxt);
12899 	return(NULL);
12900     }
12901     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12902     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12903 	                               ExternalID, SystemID);
12904     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12905 
12906     if (ctxt->myDoc != NULL) {
12907 	if (ctxt->wellFormed) {
12908 	    ret = ctxt->myDoc->extSubset;
12909 	    ctxt->myDoc->extSubset = NULL;
12910 	    if (ret != NULL) {
12911 		xmlNodePtr tmp;
12912 
12913 		ret->doc = NULL;
12914 		tmp = ret->children;
12915 		while (tmp != NULL) {
12916 		    tmp->doc = NULL;
12917 		    tmp = tmp->next;
12918 		}
12919 	    }
12920 	} else {
12921 	    ret = NULL;
12922 	}
12923         xmlFreeDoc(ctxt->myDoc);
12924         ctxt->myDoc = NULL;
12925     }
12926     if (sax != NULL) ctxt->sax = NULL;
12927     xmlFreeParserCtxt(ctxt);
12928 
12929     return(ret);
12930 }
12931 
12932 
12933 /**
12934  * xmlParseDTD:
12935  * @ExternalID:  a NAME* containing the External ID of the DTD
12936  * @SystemID:  a NAME* containing the URL to the DTD
12937  *
12938  * Load and parse an external subset.
12939  *
12940  * Returns the resulting xmlDtdPtr or NULL in case of error.
12941  */
12942 
12943 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12944 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12945     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12946 }
12947 #endif /* LIBXML_VALID_ENABLED */
12948 
12949 /************************************************************************
12950  *									*
12951  *		Front ends when parsing an Entity			*
12952  *									*
12953  ************************************************************************/
12954 
12955 /**
12956  * xmlParseCtxtExternalEntity:
12957  * @ctx:  the existing parsing context
12958  * @URL:  the URL for the entity to load
12959  * @ID:  the System ID for the entity to load
12960  * @lst:  the return value for the set of parsed nodes
12961  *
12962  * Parse an external general entity within an existing parsing context
12963  * An external general parsed entity is well-formed if it matches the
12964  * production labeled extParsedEnt.
12965  *
12966  * [78] extParsedEnt ::= TextDecl? content
12967  *
12968  * Returns 0 if the entity is well formed, -1 in case of args problem and
12969  *    the parser error code otherwise
12970  */
12971 
12972 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12973 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12974 	               const xmlChar *ID, xmlNodePtr *lst) {
12975     void *userData;
12976 
12977     if (ctx == NULL) return(-1);
12978     /*
12979      * If the user provided their own SAX callbacks, then reuse the
12980      * userData callback field, otherwise the expected setup in a
12981      * DOM builder is to have userData == ctxt
12982      */
12983     if (ctx->userData == ctx)
12984         userData = NULL;
12985     else
12986         userData = ctx->userData;
12987     return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12988                                          userData, ctx->depth + 1,
12989                                          URL, ID, lst);
12990 }
12991 
12992 /**
12993  * xmlParseExternalEntityPrivate:
12994  * @doc:  the document the chunk pertains to
12995  * @oldctxt:  the previous parser context if available
12996  * @sax:  the SAX handler block (possibly NULL)
12997  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12998  * @depth:  Used for loop detection, use 0
12999  * @URL:  the URL for the entity to load
13000  * @ID:  the System ID for the entity to load
13001  * @list:  the return value for the set of parsed nodes
13002  *
13003  * Private version of xmlParseExternalEntity()
13004  *
13005  * Returns 0 if the entity is well formed, -1 in case of args problem and
13006  *    the parser error code otherwise
13007  */
13008 
13009 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13010 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13011 	              xmlSAXHandlerPtr sax,
13012 		      void *user_data, int depth, const xmlChar *URL,
13013 		      const xmlChar *ID, xmlNodePtr *list) {
13014     xmlParserCtxtPtr ctxt;
13015     xmlDocPtr newDoc;
13016     xmlNodePtr newRoot;
13017     xmlSAXHandlerPtr oldsax = NULL;
13018     xmlParserErrors ret = XML_ERR_OK;
13019     xmlChar start[4];
13020     xmlCharEncoding enc;
13021 
13022     if (((depth > 40) &&
13023 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13024 	(depth > 1024)) {
13025 	return(XML_ERR_ENTITY_LOOP);
13026     }
13027 
13028     if (list != NULL)
13029         *list = NULL;
13030     if ((URL == NULL) && (ID == NULL))
13031 	return(XML_ERR_INTERNAL_ERROR);
13032     if (doc == NULL)
13033 	return(XML_ERR_INTERNAL_ERROR);
13034 
13035 
13036     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13037     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13038     ctxt->userData = ctxt;
13039     if (sax != NULL) {
13040 	oldsax = ctxt->sax;
13041         ctxt->sax = sax;
13042 	if (user_data != NULL)
13043 	    ctxt->userData = user_data;
13044     }
13045     xmlDetectSAX2(ctxt);
13046     newDoc = xmlNewDoc(BAD_CAST "1.0");
13047     if (newDoc == NULL) {
13048 	xmlFreeParserCtxt(ctxt);
13049 	return(XML_ERR_INTERNAL_ERROR);
13050     }
13051     newDoc->properties = XML_DOC_INTERNAL;
13052     if (doc) {
13053         newDoc->intSubset = doc->intSubset;
13054         newDoc->extSubset = doc->extSubset;
13055         if (doc->dict) {
13056             newDoc->dict = doc->dict;
13057             xmlDictReference(newDoc->dict);
13058         }
13059         if (doc->URL != NULL) {
13060             newDoc->URL = xmlStrdup(doc->URL);
13061         }
13062     }
13063     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13064     if (newRoot == NULL) {
13065 	if (sax != NULL)
13066 	    ctxt->sax = oldsax;
13067 	xmlFreeParserCtxt(ctxt);
13068 	newDoc->intSubset = NULL;
13069 	newDoc->extSubset = NULL;
13070         xmlFreeDoc(newDoc);
13071 	return(XML_ERR_INTERNAL_ERROR);
13072     }
13073     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13074     nodePush(ctxt, newDoc->children);
13075     if (doc == NULL) {
13076         ctxt->myDoc = newDoc;
13077     } else {
13078         ctxt->myDoc = doc;
13079         newRoot->doc = doc;
13080     }
13081 
13082     /*
13083      * Get the 4 first bytes and decode the charset
13084      * if enc != XML_CHAR_ENCODING_NONE
13085      * plug some encoding conversion routines.
13086      */
13087     GROW;
13088     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13089 	start[0] = RAW;
13090 	start[1] = NXT(1);
13091 	start[2] = NXT(2);
13092 	start[3] = NXT(3);
13093 	enc = xmlDetectCharEncoding(start, 4);
13094 	if (enc != XML_CHAR_ENCODING_NONE) {
13095 	    xmlSwitchEncoding(ctxt, enc);
13096 	}
13097     }
13098 
13099     /*
13100      * Parse a possible text declaration first
13101      */
13102     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13103 	xmlParseTextDecl(ctxt);
13104         /*
13105          * An XML-1.0 document can't reference an entity not XML-1.0
13106          */
13107         if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13108             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13109             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13110                            "Version mismatch between document and entity\n");
13111         }
13112     }
13113 
13114     ctxt->instate = XML_PARSER_CONTENT;
13115     ctxt->depth = depth;
13116     if (oldctxt != NULL) {
13117 	ctxt->_private = oldctxt->_private;
13118 	ctxt->loadsubset = oldctxt->loadsubset;
13119 	ctxt->validate = oldctxt->validate;
13120 	ctxt->valid = oldctxt->valid;
13121 	ctxt->replaceEntities = oldctxt->replaceEntities;
13122         if (oldctxt->validate) {
13123             ctxt->vctxt.error = oldctxt->vctxt.error;
13124             ctxt->vctxt.warning = oldctxt->vctxt.warning;
13125             ctxt->vctxt.userData = oldctxt->vctxt.userData;
13126         }
13127 	ctxt->external = oldctxt->external;
13128         if (ctxt->dict) xmlDictFree(ctxt->dict);
13129         ctxt->dict = oldctxt->dict;
13130         ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13131         ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13132         ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13133         ctxt->dictNames = oldctxt->dictNames;
13134         ctxt->attsDefault = oldctxt->attsDefault;
13135         ctxt->attsSpecial = oldctxt->attsSpecial;
13136         ctxt->linenumbers = oldctxt->linenumbers;
13137 	ctxt->record_info = oldctxt->record_info;
13138 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13139 	ctxt->node_seq.length = oldctxt->node_seq.length;
13140 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13141     } else {
13142 	/*
13143 	 * Doing validity checking on chunk without context
13144 	 * doesn't make sense
13145 	 */
13146 	ctxt->_private = NULL;
13147 	ctxt->validate = 0;
13148 	ctxt->external = 2;
13149 	ctxt->loadsubset = 0;
13150     }
13151 
13152     xmlParseContent(ctxt);
13153 
13154     if ((RAW == '<') && (NXT(1) == '/')) {
13155 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13156     } else if (RAW != 0) {
13157 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13158     }
13159     if (ctxt->node != newDoc->children) {
13160 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13161     }
13162 
13163     if (!ctxt->wellFormed) {
13164         if (ctxt->errNo == 0)
13165 	    ret = XML_ERR_INTERNAL_ERROR;
13166 	else
13167 	    ret = (xmlParserErrors)ctxt->errNo;
13168     } else {
13169 	if (list != NULL) {
13170 	    xmlNodePtr cur;
13171 
13172 	    /*
13173 	     * Return the newly created nodeset after unlinking it from
13174 	     * they pseudo parent.
13175 	     */
13176 	    cur = newDoc->children->children;
13177 	    *list = cur;
13178 	    while (cur != NULL) {
13179 		cur->parent = NULL;
13180 		cur = cur->next;
13181 	    }
13182             newDoc->children->children = NULL;
13183 	}
13184 	ret = XML_ERR_OK;
13185     }
13186 
13187     /*
13188      * Record in the parent context the number of entities replacement
13189      * done when parsing that reference.
13190      */
13191     if (oldctxt != NULL)
13192         oldctxt->nbentities += ctxt->nbentities;
13193 
13194     /*
13195      * Also record the size of the entity parsed
13196      */
13197     if (ctxt->input != NULL && oldctxt != NULL) {
13198 	oldctxt->sizeentities += ctxt->input->consumed;
13199 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13200     }
13201     /*
13202      * And record the last error if any
13203      */
13204     if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13205         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13206 
13207     if (sax != NULL)
13208 	ctxt->sax = oldsax;
13209     if (oldctxt != NULL) {
13210         ctxt->dict = NULL;
13211         ctxt->attsDefault = NULL;
13212         ctxt->attsSpecial = NULL;
13213         oldctxt->validate = ctxt->validate;
13214         oldctxt->valid = ctxt->valid;
13215         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13216         oldctxt->node_seq.length = ctxt->node_seq.length;
13217         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13218     }
13219     ctxt->node_seq.maximum = 0;
13220     ctxt->node_seq.length = 0;
13221     ctxt->node_seq.buffer = NULL;
13222     xmlFreeParserCtxt(ctxt);
13223     newDoc->intSubset = NULL;
13224     newDoc->extSubset = NULL;
13225     xmlFreeDoc(newDoc);
13226 
13227     return(ret);
13228 }
13229 
13230 #ifdef LIBXML_SAX1_ENABLED
13231 /**
13232  * xmlParseExternalEntity:
13233  * @doc:  the document the chunk pertains to
13234  * @sax:  the SAX handler block (possibly NULL)
13235  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13236  * @depth:  Used for loop detection, use 0
13237  * @URL:  the URL for the entity to load
13238  * @ID:  the System ID for the entity to load
13239  * @lst:  the return value for the set of parsed nodes
13240  *
13241  * Parse an external general entity
13242  * An external general parsed entity is well-formed if it matches the
13243  * production labeled extParsedEnt.
13244  *
13245  * [78] extParsedEnt ::= TextDecl? content
13246  *
13247  * Returns 0 if the entity is well formed, -1 in case of args problem and
13248  *    the parser error code otherwise
13249  */
13250 
13251 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13252 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13253 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13254     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13255 		                       ID, lst));
13256 }
13257 
13258 /**
13259  * xmlParseBalancedChunkMemory:
13260  * @doc:  the document the chunk pertains to (must not be NULL)
13261  * @sax:  the SAX handler block (possibly NULL)
13262  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13263  * @depth:  Used for loop detection, use 0
13264  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13265  * @lst:  the return value for the set of parsed nodes
13266  *
13267  * Parse a well-balanced chunk of an XML document
13268  * called by the parser
13269  * The allowed sequence for the Well Balanced Chunk is the one defined by
13270  * the content production in the XML grammar:
13271  *
13272  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13273  *
13274  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13275  *    the parser error code otherwise
13276  */
13277 
13278 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13279 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13280      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13281     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13282                                                 depth, string, lst, 0 );
13283 }
13284 #endif /* LIBXML_SAX1_ENABLED */
13285 
13286 /**
13287  * xmlParseBalancedChunkMemoryInternal:
13288  * @oldctxt:  the existing parsing context
13289  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13290  * @user_data:  the user data field for the parser context
13291  * @lst:  the return value for the set of parsed nodes
13292  *
13293  *
13294  * Parse a well-balanced chunk of an XML document
13295  * called by the parser
13296  * The allowed sequence for the Well Balanced Chunk is the one defined by
13297  * the content production in the XML grammar:
13298  *
13299  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13300  *
13301  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13302  * error code otherwise
13303  *
13304  * In case recover is set to 1, the nodelist will not be empty even if
13305  * the parsed chunk is not well balanced.
13306  */
13307 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13308 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13309 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13310     xmlParserCtxtPtr ctxt;
13311     xmlDocPtr newDoc = NULL;
13312     xmlNodePtr newRoot;
13313     xmlSAXHandlerPtr oldsax = NULL;
13314     xmlNodePtr content = NULL;
13315     xmlNodePtr last = NULL;
13316     int size;
13317     xmlParserErrors ret = XML_ERR_OK;
13318 #ifdef SAX2
13319     int i;
13320 #endif
13321 
13322     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13323         (oldctxt->depth >  1024)) {
13324 	return(XML_ERR_ENTITY_LOOP);
13325     }
13326 
13327 
13328     if (lst != NULL)
13329         *lst = NULL;
13330     if (string == NULL)
13331         return(XML_ERR_INTERNAL_ERROR);
13332 
13333     size = xmlStrlen(string);
13334 
13335     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13336     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13337     if (user_data != NULL)
13338 	ctxt->userData = user_data;
13339     else
13340 	ctxt->userData = ctxt;
13341     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13342     ctxt->dict = oldctxt->dict;
13343     ctxt->input_id = oldctxt->input_id + 1;
13344     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13345     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13346     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13347 
13348 #ifdef SAX2
13349     /* propagate namespaces down the entity */
13350     for (i = 0;i < oldctxt->nsNr;i += 2) {
13351         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13352     }
13353 #endif
13354 
13355     oldsax = ctxt->sax;
13356     ctxt->sax = oldctxt->sax;
13357     xmlDetectSAX2(ctxt);
13358     ctxt->replaceEntities = oldctxt->replaceEntities;
13359     ctxt->options = oldctxt->options;
13360 
13361     ctxt->_private = oldctxt->_private;
13362     if (oldctxt->myDoc == NULL) {
13363 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13364 	if (newDoc == NULL) {
13365 	    ctxt->sax = oldsax;
13366 	    ctxt->dict = NULL;
13367 	    xmlFreeParserCtxt(ctxt);
13368 	    return(XML_ERR_INTERNAL_ERROR);
13369 	}
13370 	newDoc->properties = XML_DOC_INTERNAL;
13371 	newDoc->dict = ctxt->dict;
13372 	xmlDictReference(newDoc->dict);
13373 	ctxt->myDoc = newDoc;
13374     } else {
13375 	ctxt->myDoc = oldctxt->myDoc;
13376         content = ctxt->myDoc->children;
13377 	last = ctxt->myDoc->last;
13378     }
13379     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13380     if (newRoot == NULL) {
13381 	ctxt->sax = oldsax;
13382 	ctxt->dict = NULL;
13383 	xmlFreeParserCtxt(ctxt);
13384 	if (newDoc != NULL) {
13385 	    xmlFreeDoc(newDoc);
13386 	}
13387 	return(XML_ERR_INTERNAL_ERROR);
13388     }
13389     ctxt->myDoc->children = NULL;
13390     ctxt->myDoc->last = NULL;
13391     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13392     nodePush(ctxt, ctxt->myDoc->children);
13393     ctxt->instate = XML_PARSER_CONTENT;
13394     ctxt->depth = oldctxt->depth + 1;
13395 
13396     ctxt->validate = 0;
13397     ctxt->loadsubset = oldctxt->loadsubset;
13398     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13399 	/*
13400 	 * ID/IDREF registration will be done in xmlValidateElement below
13401 	 */
13402 	ctxt->loadsubset |= XML_SKIP_IDS;
13403     }
13404     ctxt->dictNames = oldctxt->dictNames;
13405     ctxt->attsDefault = oldctxt->attsDefault;
13406     ctxt->attsSpecial = oldctxt->attsSpecial;
13407 
13408     xmlParseContent(ctxt);
13409     if ((RAW == '<') && (NXT(1) == '/')) {
13410 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13411     } else if (RAW != 0) {
13412 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13413     }
13414     if (ctxt->node != ctxt->myDoc->children) {
13415 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13416     }
13417 
13418     if (!ctxt->wellFormed) {
13419         if (ctxt->errNo == 0)
13420 	    ret = XML_ERR_INTERNAL_ERROR;
13421 	else
13422 	    ret = (xmlParserErrors)ctxt->errNo;
13423     } else {
13424       ret = XML_ERR_OK;
13425     }
13426 
13427     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13428 	xmlNodePtr cur;
13429 
13430 	/*
13431 	 * Return the newly created nodeset after unlinking it from
13432 	 * they pseudo parent.
13433 	 */
13434 	cur = ctxt->myDoc->children->children;
13435 	*lst = cur;
13436 	while (cur != NULL) {
13437 #ifdef LIBXML_VALID_ENABLED
13438 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13439 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13440 		(cur->type == XML_ELEMENT_NODE)) {
13441 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13442 			oldctxt->myDoc, cur);
13443 	    }
13444 #endif /* LIBXML_VALID_ENABLED */
13445 	    cur->parent = NULL;
13446 	    cur = cur->next;
13447 	}
13448 	ctxt->myDoc->children->children = NULL;
13449     }
13450     if (ctxt->myDoc != NULL) {
13451 	xmlFreeNode(ctxt->myDoc->children);
13452         ctxt->myDoc->children = content;
13453         ctxt->myDoc->last = last;
13454     }
13455 
13456     /*
13457      * Record in the parent context the number of entities replacement
13458      * done when parsing that reference.
13459      */
13460     if (oldctxt != NULL)
13461         oldctxt->nbentities += ctxt->nbentities;
13462 
13463     /*
13464      * Also record the last error if any
13465      */
13466     if (ctxt->lastError.code != XML_ERR_OK)
13467         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13468 
13469     ctxt->sax = oldsax;
13470     ctxt->dict = NULL;
13471     ctxt->attsDefault = NULL;
13472     ctxt->attsSpecial = NULL;
13473     xmlFreeParserCtxt(ctxt);
13474     if (newDoc != NULL) {
13475 	xmlFreeDoc(newDoc);
13476     }
13477 
13478     return(ret);
13479 }
13480 
13481 /**
13482  * xmlParseInNodeContext:
13483  * @node:  the context node
13484  * @data:  the input string
13485  * @datalen:  the input string length in bytes
13486  * @options:  a combination of xmlParserOption
13487  * @lst:  the return value for the set of parsed nodes
13488  *
13489  * Parse a well-balanced chunk of an XML document
13490  * within the context (DTD, namespaces, etc ...) of the given node.
13491  *
13492  * The allowed sequence for the data is a Well Balanced Chunk defined by
13493  * the content production in the XML grammar:
13494  *
13495  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13496  *
13497  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13498  * error code otherwise
13499  */
13500 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13501 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13502                       int options, xmlNodePtr *lst) {
13503 #ifdef SAX2
13504     xmlParserCtxtPtr ctxt;
13505     xmlDocPtr doc = NULL;
13506     xmlNodePtr fake, cur;
13507     int nsnr = 0;
13508 
13509     xmlParserErrors ret = XML_ERR_OK;
13510 
13511     /*
13512      * check all input parameters, grab the document
13513      */
13514     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13515         return(XML_ERR_INTERNAL_ERROR);
13516     switch (node->type) {
13517         case XML_ELEMENT_NODE:
13518         case XML_ATTRIBUTE_NODE:
13519         case XML_TEXT_NODE:
13520         case XML_CDATA_SECTION_NODE:
13521         case XML_ENTITY_REF_NODE:
13522         case XML_PI_NODE:
13523         case XML_COMMENT_NODE:
13524         case XML_DOCUMENT_NODE:
13525         case XML_HTML_DOCUMENT_NODE:
13526 	    break;
13527 	default:
13528 	    return(XML_ERR_INTERNAL_ERROR);
13529 
13530     }
13531     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13532            (node->type != XML_DOCUMENT_NODE) &&
13533 	   (node->type != XML_HTML_DOCUMENT_NODE))
13534 	node = node->parent;
13535     if (node == NULL)
13536 	return(XML_ERR_INTERNAL_ERROR);
13537     if (node->type == XML_ELEMENT_NODE)
13538 	doc = node->doc;
13539     else
13540         doc = (xmlDocPtr) node;
13541     if (doc == NULL)
13542 	return(XML_ERR_INTERNAL_ERROR);
13543 
13544     /*
13545      * allocate a context and set-up everything not related to the
13546      * node position in the tree
13547      */
13548     if (doc->type == XML_DOCUMENT_NODE)
13549 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13550 #ifdef LIBXML_HTML_ENABLED
13551     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13552 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13553         /*
13554          * When parsing in context, it makes no sense to add implied
13555          * elements like html/body/etc...
13556          */
13557         options |= HTML_PARSE_NOIMPLIED;
13558     }
13559 #endif
13560     else
13561         return(XML_ERR_INTERNAL_ERROR);
13562 
13563     if (ctxt == NULL)
13564         return(XML_ERR_NO_MEMORY);
13565 
13566     /*
13567      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13568      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13569      * we must wait until the last moment to free the original one.
13570      */
13571     if (doc->dict != NULL) {
13572         if (ctxt->dict != NULL)
13573 	    xmlDictFree(ctxt->dict);
13574 	ctxt->dict = doc->dict;
13575     } else
13576         options |= XML_PARSE_NODICT;
13577 
13578     if (doc->encoding != NULL) {
13579         xmlCharEncodingHandlerPtr hdlr;
13580 
13581         if (ctxt->encoding != NULL)
13582 	    xmlFree((xmlChar *) ctxt->encoding);
13583         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13584 
13585         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13586         if (hdlr != NULL) {
13587             xmlSwitchToEncoding(ctxt, hdlr);
13588 	} else {
13589             return(XML_ERR_UNSUPPORTED_ENCODING);
13590         }
13591     }
13592 
13593     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13594     xmlDetectSAX2(ctxt);
13595     ctxt->myDoc = doc;
13596     /* parsing in context, i.e. as within existing content */
13597     ctxt->input_id = 2;
13598     ctxt->instate = XML_PARSER_CONTENT;
13599 
13600     fake = xmlNewComment(NULL);
13601     if (fake == NULL) {
13602         xmlFreeParserCtxt(ctxt);
13603 	return(XML_ERR_NO_MEMORY);
13604     }
13605     xmlAddChild(node, fake);
13606 
13607     if (node->type == XML_ELEMENT_NODE) {
13608 	nodePush(ctxt, node);
13609 	/*
13610 	 * initialize the SAX2 namespaces stack
13611 	 */
13612 	cur = node;
13613 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13614 	    xmlNsPtr ns = cur->nsDef;
13615 	    const xmlChar *iprefix, *ihref;
13616 
13617 	    while (ns != NULL) {
13618 		if (ctxt->dict) {
13619 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13620 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13621 		} else {
13622 		    iprefix = ns->prefix;
13623 		    ihref = ns->href;
13624 		}
13625 
13626 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13627 		    nsPush(ctxt, iprefix, ihref);
13628 		    nsnr++;
13629 		}
13630 		ns = ns->next;
13631 	    }
13632 	    cur = cur->parent;
13633 	}
13634     }
13635 
13636     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13637 	/*
13638 	 * ID/IDREF registration will be done in xmlValidateElement below
13639 	 */
13640 	ctxt->loadsubset |= XML_SKIP_IDS;
13641     }
13642 
13643 #ifdef LIBXML_HTML_ENABLED
13644     if (doc->type == XML_HTML_DOCUMENT_NODE)
13645         __htmlParseContent(ctxt);
13646     else
13647 #endif
13648 	xmlParseContent(ctxt);
13649 
13650     nsPop(ctxt, nsnr);
13651     if ((RAW == '<') && (NXT(1) == '/')) {
13652 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13653     } else if (RAW != 0) {
13654 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13655     }
13656     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13657 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13658 	ctxt->wellFormed = 0;
13659     }
13660 
13661     if (!ctxt->wellFormed) {
13662         if (ctxt->errNo == 0)
13663 	    ret = XML_ERR_INTERNAL_ERROR;
13664 	else
13665 	    ret = (xmlParserErrors)ctxt->errNo;
13666     } else {
13667         ret = XML_ERR_OK;
13668     }
13669 
13670     /*
13671      * Return the newly created nodeset after unlinking it from
13672      * the pseudo sibling.
13673      */
13674 
13675     cur = fake->next;
13676     fake->next = NULL;
13677     node->last = fake;
13678 
13679     if (cur != NULL) {
13680 	cur->prev = NULL;
13681     }
13682 
13683     *lst = cur;
13684 
13685     while (cur != NULL) {
13686 	cur->parent = NULL;
13687 	cur = cur->next;
13688     }
13689 
13690     xmlUnlinkNode(fake);
13691     xmlFreeNode(fake);
13692 
13693 
13694     if (ret != XML_ERR_OK) {
13695         xmlFreeNodeList(*lst);
13696 	*lst = NULL;
13697     }
13698 
13699     if (doc->dict != NULL)
13700         ctxt->dict = NULL;
13701     xmlFreeParserCtxt(ctxt);
13702 
13703     return(ret);
13704 #else /* !SAX2 */
13705     return(XML_ERR_INTERNAL_ERROR);
13706 #endif
13707 }
13708 
13709 #ifdef LIBXML_SAX1_ENABLED
13710 /**
13711  * xmlParseBalancedChunkMemoryRecover:
13712  * @doc:  the document the chunk pertains to (must not be NULL)
13713  * @sax:  the SAX handler block (possibly NULL)
13714  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13715  * @depth:  Used for loop detection, use 0
13716  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13717  * @lst:  the return value for the set of parsed nodes
13718  * @recover: return nodes even if the data is broken (use 0)
13719  *
13720  *
13721  * Parse a well-balanced chunk of an XML document
13722  * called by the parser
13723  * The allowed sequence for the Well Balanced Chunk is the one defined by
13724  * the content production in the XML grammar:
13725  *
13726  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13727  *
13728  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13729  *    the parser error code otherwise
13730  *
13731  * In case recover is set to 1, the nodelist will not be empty even if
13732  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13733  * some extent.
13734  */
13735 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13736 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13737      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13738      int recover) {
13739     xmlParserCtxtPtr ctxt;
13740     xmlDocPtr newDoc;
13741     xmlSAXHandlerPtr oldsax = NULL;
13742     xmlNodePtr content, newRoot;
13743     int size;
13744     int ret = 0;
13745 
13746     if (depth > 40) {
13747 	return(XML_ERR_ENTITY_LOOP);
13748     }
13749 
13750 
13751     if (lst != NULL)
13752         *lst = NULL;
13753     if (string == NULL)
13754         return(-1);
13755 
13756     size = xmlStrlen(string);
13757 
13758     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13759     if (ctxt == NULL) return(-1);
13760     ctxt->userData = ctxt;
13761     if (sax != NULL) {
13762 	oldsax = ctxt->sax;
13763         ctxt->sax = sax;
13764 	if (user_data != NULL)
13765 	    ctxt->userData = user_data;
13766     }
13767     newDoc = xmlNewDoc(BAD_CAST "1.0");
13768     if (newDoc == NULL) {
13769 	xmlFreeParserCtxt(ctxt);
13770 	return(-1);
13771     }
13772     newDoc->properties = XML_DOC_INTERNAL;
13773     if ((doc != NULL) && (doc->dict != NULL)) {
13774         xmlDictFree(ctxt->dict);
13775 	ctxt->dict = doc->dict;
13776 	xmlDictReference(ctxt->dict);
13777 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13778 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13779 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13780 	ctxt->dictNames = 1;
13781     } else {
13782 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13783     }
13784     /* doc == NULL is only supported for historic reasons */
13785     if (doc != NULL) {
13786 	newDoc->intSubset = doc->intSubset;
13787 	newDoc->extSubset = doc->extSubset;
13788     }
13789     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13790     if (newRoot == NULL) {
13791 	if (sax != NULL)
13792 	    ctxt->sax = oldsax;
13793 	xmlFreeParserCtxt(ctxt);
13794 	newDoc->intSubset = NULL;
13795 	newDoc->extSubset = NULL;
13796         xmlFreeDoc(newDoc);
13797 	return(-1);
13798     }
13799     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13800     nodePush(ctxt, newRoot);
13801     /* doc == NULL is only supported for historic reasons */
13802     if (doc == NULL) {
13803 	ctxt->myDoc = newDoc;
13804     } else {
13805 	ctxt->myDoc = newDoc;
13806 	newDoc->children->doc = doc;
13807 	/* Ensure that doc has XML spec namespace */
13808 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13809 	newDoc->oldNs = doc->oldNs;
13810     }
13811     ctxt->instate = XML_PARSER_CONTENT;
13812     ctxt->input_id = 2;
13813     ctxt->depth = depth;
13814 
13815     /*
13816      * Doing validity checking on chunk doesn't make sense
13817      */
13818     ctxt->validate = 0;
13819     ctxt->loadsubset = 0;
13820     xmlDetectSAX2(ctxt);
13821 
13822     if ( doc != NULL ){
13823         content = doc->children;
13824         doc->children = NULL;
13825         xmlParseContent(ctxt);
13826         doc->children = content;
13827     }
13828     else {
13829         xmlParseContent(ctxt);
13830     }
13831     if ((RAW == '<') && (NXT(1) == '/')) {
13832 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13833     } else if (RAW != 0) {
13834 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13835     }
13836     if (ctxt->node != newDoc->children) {
13837 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13838     }
13839 
13840     if (!ctxt->wellFormed) {
13841         if (ctxt->errNo == 0)
13842 	    ret = 1;
13843 	else
13844 	    ret = ctxt->errNo;
13845     } else {
13846       ret = 0;
13847     }
13848 
13849     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13850 	xmlNodePtr cur;
13851 
13852 	/*
13853 	 * Return the newly created nodeset after unlinking it from
13854 	 * they pseudo parent.
13855 	 */
13856 	cur = newDoc->children->children;
13857 	*lst = cur;
13858 	while (cur != NULL) {
13859 	    xmlSetTreeDoc(cur, doc);
13860 	    cur->parent = NULL;
13861 	    cur = cur->next;
13862 	}
13863 	newDoc->children->children = NULL;
13864     }
13865 
13866     if (sax != NULL)
13867 	ctxt->sax = oldsax;
13868     xmlFreeParserCtxt(ctxt);
13869     newDoc->intSubset = NULL;
13870     newDoc->extSubset = NULL;
13871     /* This leaks the namespace list if doc == NULL */
13872     newDoc->oldNs = NULL;
13873     xmlFreeDoc(newDoc);
13874 
13875     return(ret);
13876 }
13877 
13878 /**
13879  * xmlSAXParseEntity:
13880  * @sax:  the SAX handler block
13881  * @filename:  the filename
13882  *
13883  * parse an XML external entity out of context and build a tree.
13884  * It use the given SAX function block to handle the parsing callback.
13885  * If sax is NULL, fallback to the default DOM tree building routines.
13886  *
13887  * [78] extParsedEnt ::= TextDecl? content
13888  *
13889  * This correspond to a "Well Balanced" chunk
13890  *
13891  * Returns the resulting document tree
13892  */
13893 
13894 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13895 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13896     xmlDocPtr ret;
13897     xmlParserCtxtPtr ctxt;
13898 
13899     ctxt = xmlCreateFileParserCtxt(filename);
13900     if (ctxt == NULL) {
13901 	return(NULL);
13902     }
13903     if (sax != NULL) {
13904 	if (ctxt->sax != NULL)
13905 	    xmlFree(ctxt->sax);
13906         ctxt->sax = sax;
13907         ctxt->userData = NULL;
13908     }
13909 
13910     xmlParseExtParsedEnt(ctxt);
13911 
13912     if (ctxt->wellFormed)
13913 	ret = ctxt->myDoc;
13914     else {
13915         ret = NULL;
13916         xmlFreeDoc(ctxt->myDoc);
13917         ctxt->myDoc = NULL;
13918     }
13919     if (sax != NULL)
13920         ctxt->sax = NULL;
13921     xmlFreeParserCtxt(ctxt);
13922 
13923     return(ret);
13924 }
13925 
13926 /**
13927  * xmlParseEntity:
13928  * @filename:  the filename
13929  *
13930  * parse an XML external entity out of context and build a tree.
13931  *
13932  * [78] extParsedEnt ::= TextDecl? content
13933  *
13934  * This correspond to a "Well Balanced" chunk
13935  *
13936  * Returns the resulting document tree
13937  */
13938 
13939 xmlDocPtr
xmlParseEntity(const char * filename)13940 xmlParseEntity(const char *filename) {
13941     return(xmlSAXParseEntity(NULL, filename));
13942 }
13943 #endif /* LIBXML_SAX1_ENABLED */
13944 
13945 /**
13946  * xmlCreateEntityParserCtxtInternal:
13947  * @URL:  the entity URL
13948  * @ID:  the entity PUBLIC ID
13949  * @base:  a possible base for the target URI
13950  * @pctx:  parser context used to set options on new context
13951  *
13952  * Create a parser context for an external entity
13953  * Automatic support for ZLIB/Compress compressed document is provided
13954  * by default if found at compile-time.
13955  *
13956  * Returns the new parser context or NULL
13957  */
13958 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13959 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13960 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13961     xmlParserCtxtPtr ctxt;
13962     xmlParserInputPtr inputStream;
13963     char *directory = NULL;
13964     xmlChar *uri;
13965 
13966     ctxt = xmlNewParserCtxt();
13967     if (ctxt == NULL) {
13968 	return(NULL);
13969     }
13970 
13971     if (pctx != NULL) {
13972         ctxt->options = pctx->options;
13973         ctxt->_private = pctx->_private;
13974 	/*
13975 	 * this is a subparser of pctx, so the input_id should be
13976 	 * incremented to distinguish from main entity
13977 	 */
13978 	ctxt->input_id = pctx->input_id + 1;
13979     }
13980 
13981     /* Don't read from stdin. */
13982     if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13983         URL = BAD_CAST "./-";
13984 
13985     uri = xmlBuildURI(URL, base);
13986 
13987     if (uri == NULL) {
13988 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13989 	if (inputStream == NULL) {
13990 	    xmlFreeParserCtxt(ctxt);
13991 	    return(NULL);
13992 	}
13993 
13994 	inputPush(ctxt, inputStream);
13995 
13996 	if ((ctxt->directory == NULL) && (directory == NULL))
13997 	    directory = xmlParserGetDirectory((char *)URL);
13998 	if ((ctxt->directory == NULL) && (directory != NULL))
13999 	    ctxt->directory = directory;
14000     } else {
14001 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14002 	if (inputStream == NULL) {
14003 	    xmlFree(uri);
14004 	    xmlFreeParserCtxt(ctxt);
14005 	    return(NULL);
14006 	}
14007 
14008 	inputPush(ctxt, inputStream);
14009 
14010 	if ((ctxt->directory == NULL) && (directory == NULL))
14011 	    directory = xmlParserGetDirectory((char *)uri);
14012 	if ((ctxt->directory == NULL) && (directory != NULL))
14013 	    ctxt->directory = directory;
14014 	xmlFree(uri);
14015     }
14016     return(ctxt);
14017 }
14018 
14019 /**
14020  * xmlCreateEntityParserCtxt:
14021  * @URL:  the entity URL
14022  * @ID:  the entity PUBLIC ID
14023  * @base:  a possible base for the target URI
14024  *
14025  * Create a parser context for an external entity
14026  * Automatic support for ZLIB/Compress compressed document is provided
14027  * by default if found at compile-time.
14028  *
14029  * Returns the new parser context or NULL
14030  */
14031 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14032 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14033 	                  const xmlChar *base) {
14034     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14035 
14036 }
14037 
14038 /************************************************************************
14039  *									*
14040  *		Front ends when parsing from a file			*
14041  *									*
14042  ************************************************************************/
14043 
14044 /**
14045  * xmlCreateURLParserCtxt:
14046  * @filename:  the filename or URL
14047  * @options:  a combination of xmlParserOption
14048  *
14049  * Create a parser context for a file or URL content.
14050  * Automatic support for ZLIB/Compress compressed document is provided
14051  * by default if found at compile-time and for file accesses
14052  *
14053  * Returns the new parser context or NULL
14054  */
14055 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14056 xmlCreateURLParserCtxt(const char *filename, int options)
14057 {
14058     xmlParserCtxtPtr ctxt;
14059     xmlParserInputPtr inputStream;
14060     char *directory = NULL;
14061 
14062     ctxt = xmlNewParserCtxt();
14063     if (ctxt == NULL) {
14064 	xmlErrMemory(NULL, "cannot allocate parser context");
14065 	return(NULL);
14066     }
14067 
14068     if (options)
14069 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14070     ctxt->linenumbers = 1;
14071 
14072     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14073     if (inputStream == NULL) {
14074 	xmlFreeParserCtxt(ctxt);
14075 	return(NULL);
14076     }
14077 
14078     inputPush(ctxt, inputStream);
14079     if ((ctxt->directory == NULL) && (directory == NULL))
14080         directory = xmlParserGetDirectory(filename);
14081     if ((ctxt->directory == NULL) && (directory != NULL))
14082         ctxt->directory = directory;
14083 
14084     return(ctxt);
14085 }
14086 
14087 /**
14088  * xmlCreateFileParserCtxt:
14089  * @filename:  the filename
14090  *
14091  * Create a parser context for a file content.
14092  * Automatic support for ZLIB/Compress compressed document is provided
14093  * by default if found at compile-time.
14094  *
14095  * Returns the new parser context or NULL
14096  */
14097 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14098 xmlCreateFileParserCtxt(const char *filename)
14099 {
14100     return(xmlCreateURLParserCtxt(filename, 0));
14101 }
14102 
14103 #ifdef LIBXML_SAX1_ENABLED
14104 /**
14105  * xmlSAXParseFileWithData:
14106  * @sax:  the SAX handler block
14107  * @filename:  the filename
14108  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14109  *             documents
14110  * @data:  the userdata
14111  *
14112  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14113  * compressed document is provided by default if found at compile-time.
14114  * It use the given SAX function block to handle the parsing callback.
14115  * If sax is NULL, fallback to the default DOM tree building routines.
14116  *
14117  * User data (void *) is stored within the parser context in the
14118  * context's _private member, so it is available nearly everywhere in libxml
14119  *
14120  * Returns the resulting document tree
14121  */
14122 
14123 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14124 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14125                         int recovery, void *data) {
14126     xmlDocPtr ret;
14127     xmlParserCtxtPtr ctxt;
14128 
14129     xmlInitParser();
14130 
14131     ctxt = xmlCreateFileParserCtxt(filename);
14132     if (ctxt == NULL) {
14133 	return(NULL);
14134     }
14135     if (sax != NULL) {
14136 	if (ctxt->sax != NULL)
14137 	    xmlFree(ctxt->sax);
14138         ctxt->sax = sax;
14139     }
14140     xmlDetectSAX2(ctxt);
14141     if (data!=NULL) {
14142 	ctxt->_private = data;
14143     }
14144 
14145     if (ctxt->directory == NULL)
14146         ctxt->directory = xmlParserGetDirectory(filename);
14147 
14148     ctxt->recovery = recovery;
14149 
14150     xmlParseDocument(ctxt);
14151 
14152     if ((ctxt->wellFormed) || recovery) {
14153         ret = ctxt->myDoc;
14154 	if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14155 	    if (ctxt->input->buf->compressed > 0)
14156 		ret->compression = 9;
14157 	    else
14158 		ret->compression = ctxt->input->buf->compressed;
14159 	}
14160     }
14161     else {
14162        ret = NULL;
14163        xmlFreeDoc(ctxt->myDoc);
14164        ctxt->myDoc = NULL;
14165     }
14166     if (sax != NULL)
14167         ctxt->sax = NULL;
14168     xmlFreeParserCtxt(ctxt);
14169 
14170     return(ret);
14171 }
14172 
14173 /**
14174  * xmlSAXParseFile:
14175  * @sax:  the SAX handler block
14176  * @filename:  the filename
14177  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14178  *             documents
14179  *
14180  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14181  * compressed document is provided by default if found at compile-time.
14182  * It use the given SAX function block to handle the parsing callback.
14183  * If sax is NULL, fallback to the default DOM tree building routines.
14184  *
14185  * Returns the resulting document tree
14186  */
14187 
14188 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14189 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14190                           int recovery) {
14191     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14192 }
14193 
14194 /**
14195  * xmlRecoverDoc:
14196  * @cur:  a pointer to an array of xmlChar
14197  *
14198  * parse an XML in-memory document and build a tree.
14199  * In the case the document is not Well Formed, a attempt to build a
14200  * tree is tried anyway
14201  *
14202  * Returns the resulting document tree or NULL in case of failure
14203  */
14204 
14205 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14206 xmlRecoverDoc(const xmlChar *cur) {
14207     return(xmlSAXParseDoc(NULL, cur, 1));
14208 }
14209 
14210 /**
14211  * xmlParseFile:
14212  * @filename:  the filename
14213  *
14214  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14215  * compressed document is provided by default if found at compile-time.
14216  *
14217  * Returns the resulting document tree if the file was wellformed,
14218  * NULL otherwise.
14219  */
14220 
14221 xmlDocPtr
xmlParseFile(const char * filename)14222 xmlParseFile(const char *filename) {
14223     return(xmlSAXParseFile(NULL, filename, 0));
14224 }
14225 
14226 /**
14227  * xmlRecoverFile:
14228  * @filename:  the filename
14229  *
14230  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14231  * compressed document is provided by default if found at compile-time.
14232  * In the case the document is not Well Formed, it attempts to build
14233  * a tree anyway
14234  *
14235  * Returns the resulting document tree or NULL in case of failure
14236  */
14237 
14238 xmlDocPtr
xmlRecoverFile(const char * filename)14239 xmlRecoverFile(const char *filename) {
14240     return(xmlSAXParseFile(NULL, filename, 1));
14241 }
14242 
14243 
14244 /**
14245  * xmlSetupParserForBuffer:
14246  * @ctxt:  an XML parser context
14247  * @buffer:  a xmlChar * buffer
14248  * @filename:  a file name
14249  *
14250  * Setup the parser context to parse a new buffer; Clears any prior
14251  * contents from the parser context. The buffer parameter must not be
14252  * NULL, but the filename parameter can be
14253  */
14254 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14255 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14256                              const char* filename)
14257 {
14258     xmlParserInputPtr input;
14259 
14260     if ((ctxt == NULL) || (buffer == NULL))
14261         return;
14262 
14263     input = xmlNewInputStream(ctxt);
14264     if (input == NULL) {
14265         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14266         xmlClearParserCtxt(ctxt);
14267         return;
14268     }
14269 
14270     xmlClearParserCtxt(ctxt);
14271     if (filename != NULL)
14272         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14273     input->base = buffer;
14274     input->cur = buffer;
14275     input->end = &buffer[xmlStrlen(buffer)];
14276     inputPush(ctxt, input);
14277 }
14278 
14279 /**
14280  * xmlSAXUserParseFile:
14281  * @sax:  a SAX handler
14282  * @user_data:  The user data returned on SAX callbacks
14283  * @filename:  a file name
14284  *
14285  * parse an XML file and call the given SAX handler routines.
14286  * Automatic support for ZLIB/Compress compressed document is provided
14287  *
14288  * Returns 0 in case of success or a error number otherwise
14289  */
14290 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14291 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14292                     const char *filename) {
14293     int ret = 0;
14294     xmlParserCtxtPtr ctxt;
14295 
14296     ctxt = xmlCreateFileParserCtxt(filename);
14297     if (ctxt == NULL) return -1;
14298     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14299 	xmlFree(ctxt->sax);
14300     ctxt->sax = sax;
14301     xmlDetectSAX2(ctxt);
14302 
14303     if (user_data != NULL)
14304 	ctxt->userData = user_data;
14305 
14306     xmlParseDocument(ctxt);
14307 
14308     if (ctxt->wellFormed)
14309 	ret = 0;
14310     else {
14311         if (ctxt->errNo != 0)
14312 	    ret = ctxt->errNo;
14313 	else
14314 	    ret = -1;
14315     }
14316     if (sax != NULL)
14317 	ctxt->sax = NULL;
14318     if (ctxt->myDoc != NULL) {
14319         xmlFreeDoc(ctxt->myDoc);
14320 	ctxt->myDoc = NULL;
14321     }
14322     xmlFreeParserCtxt(ctxt);
14323 
14324     return ret;
14325 }
14326 #endif /* LIBXML_SAX1_ENABLED */
14327 
14328 /************************************************************************
14329  *									*
14330  *		Front ends when parsing from memory			*
14331  *									*
14332  ************************************************************************/
14333 
14334 /**
14335  * xmlCreateMemoryParserCtxt:
14336  * @buffer:  a pointer to a char array
14337  * @size:  the size of the array
14338  *
14339  * Create a parser context for an XML in-memory document.
14340  *
14341  * Returns the new parser context or NULL
14342  */
14343 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14344 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14345     xmlParserCtxtPtr ctxt;
14346     xmlParserInputPtr input;
14347     xmlParserInputBufferPtr buf;
14348 
14349     if (buffer == NULL)
14350 	return(NULL);
14351     if (size <= 0)
14352 	return(NULL);
14353 
14354     ctxt = xmlNewParserCtxt();
14355     if (ctxt == NULL)
14356 	return(NULL);
14357 
14358     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14359     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14360     if (buf == NULL) {
14361 	xmlFreeParserCtxt(ctxt);
14362 	return(NULL);
14363     }
14364 
14365     input = xmlNewInputStream(ctxt);
14366     if (input == NULL) {
14367 	xmlFreeParserInputBuffer(buf);
14368 	xmlFreeParserCtxt(ctxt);
14369 	return(NULL);
14370     }
14371 
14372     input->filename = NULL;
14373     input->buf = buf;
14374     xmlBufResetInput(input->buf->buffer, input);
14375 
14376     inputPush(ctxt, input);
14377     return(ctxt);
14378 }
14379 
14380 #ifdef LIBXML_SAX1_ENABLED
14381 /**
14382  * xmlSAXParseMemoryWithData:
14383  * @sax:  the SAX handler block
14384  * @buffer:  an pointer to a char array
14385  * @size:  the size of the array
14386  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14387  *             documents
14388  * @data:  the userdata
14389  *
14390  * parse an XML in-memory block and use the given SAX function block
14391  * to handle the parsing callback. If sax is NULL, fallback to the default
14392  * DOM tree building routines.
14393  *
14394  * User data (void *) is stored within the parser context in the
14395  * context's _private member, so it is available nearly everywhere in libxml
14396  *
14397  * Returns the resulting document tree
14398  */
14399 
14400 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14401 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14402 	          int size, int recovery, void *data) {
14403     xmlDocPtr ret;
14404     xmlParserCtxtPtr ctxt;
14405 
14406     xmlInitParser();
14407 
14408     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14409     if (ctxt == NULL) return(NULL);
14410     if (sax != NULL) {
14411 	if (ctxt->sax != NULL)
14412 	    xmlFree(ctxt->sax);
14413         ctxt->sax = sax;
14414     }
14415     xmlDetectSAX2(ctxt);
14416     if (data!=NULL) {
14417 	ctxt->_private=data;
14418     }
14419 
14420     ctxt->recovery = recovery;
14421 
14422     xmlParseDocument(ctxt);
14423 
14424     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14425     else {
14426        ret = NULL;
14427        xmlFreeDoc(ctxt->myDoc);
14428        ctxt->myDoc = NULL;
14429     }
14430     if (sax != NULL)
14431 	ctxt->sax = NULL;
14432     xmlFreeParserCtxt(ctxt);
14433 
14434     return(ret);
14435 }
14436 
14437 /**
14438  * xmlSAXParseMemory:
14439  * @sax:  the SAX handler block
14440  * @buffer:  an pointer to a char array
14441  * @size:  the size of the array
14442  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14443  *             documents
14444  *
14445  * parse an XML in-memory block and use the given SAX function block
14446  * to handle the parsing callback. If sax is NULL, fallback to the default
14447  * DOM tree building routines.
14448  *
14449  * Returns the resulting document tree
14450  */
14451 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14452 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14453 	          int size, int recovery) {
14454     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14455 }
14456 
14457 /**
14458  * xmlParseMemory:
14459  * @buffer:  an pointer to a char array
14460  * @size:  the size of the array
14461  *
14462  * parse an XML in-memory block and build a tree.
14463  *
14464  * Returns the resulting document tree
14465  */
14466 
xmlParseMemory(const char * buffer,int size)14467 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14468    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14469 }
14470 
14471 /**
14472  * xmlRecoverMemory:
14473  * @buffer:  an pointer to a char array
14474  * @size:  the size of the array
14475  *
14476  * parse an XML in-memory block and build a tree.
14477  * In the case the document is not Well Formed, an attempt to
14478  * build a tree is tried anyway
14479  *
14480  * Returns the resulting document tree or NULL in case of error
14481  */
14482 
xmlRecoverMemory(const char * buffer,int size)14483 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14484    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14485 }
14486 
14487 /**
14488  * xmlSAXUserParseMemory:
14489  * @sax:  a SAX handler
14490  * @user_data:  The user data returned on SAX callbacks
14491  * @buffer:  an in-memory XML document input
14492  * @size:  the length of the XML document in bytes
14493  *
14494  * A better SAX parsing routine.
14495  * parse an XML in-memory buffer and call the given SAX handler routines.
14496  *
14497  * Returns 0 in case of success or a error number otherwise
14498  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14499 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14500 			  const char *buffer, int size) {
14501     int ret = 0;
14502     xmlParserCtxtPtr ctxt;
14503 
14504     xmlInitParser();
14505 
14506     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14507     if (ctxt == NULL) return -1;
14508     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14509         xmlFree(ctxt->sax);
14510     ctxt->sax = sax;
14511     xmlDetectSAX2(ctxt);
14512 
14513     if (user_data != NULL)
14514 	ctxt->userData = user_data;
14515 
14516     xmlParseDocument(ctxt);
14517 
14518     if (ctxt->wellFormed)
14519 	ret = 0;
14520     else {
14521         if (ctxt->errNo != 0)
14522 	    ret = ctxt->errNo;
14523 	else
14524 	    ret = -1;
14525     }
14526     if (sax != NULL)
14527         ctxt->sax = NULL;
14528     if (ctxt->myDoc != NULL) {
14529         xmlFreeDoc(ctxt->myDoc);
14530 	ctxt->myDoc = NULL;
14531     }
14532     xmlFreeParserCtxt(ctxt);
14533 
14534     return ret;
14535 }
14536 #endif /* LIBXML_SAX1_ENABLED */
14537 
14538 /**
14539  * xmlCreateDocParserCtxt:
14540  * @cur:  a pointer to an array of xmlChar
14541  *
14542  * Creates a parser context for an XML in-memory document.
14543  *
14544  * Returns the new parser context or NULL
14545  */
14546 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14547 xmlCreateDocParserCtxt(const xmlChar *cur) {
14548     int len;
14549 
14550     if (cur == NULL)
14551 	return(NULL);
14552     len = xmlStrlen(cur);
14553     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14554 }
14555 
14556 #ifdef LIBXML_SAX1_ENABLED
14557 /**
14558  * xmlSAXParseDoc:
14559  * @sax:  the SAX handler block
14560  * @cur:  a pointer to an array of xmlChar
14561  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14562  *             documents
14563  *
14564  * parse an XML in-memory document and build a tree.
14565  * It use the given SAX function block to handle the parsing callback.
14566  * If sax is NULL, fallback to the default DOM tree building routines.
14567  *
14568  * Returns the resulting document tree
14569  */
14570 
14571 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14572 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14573     xmlDocPtr ret;
14574     xmlParserCtxtPtr ctxt;
14575     xmlSAXHandlerPtr oldsax = NULL;
14576 
14577     if (cur == NULL) return(NULL);
14578 
14579 
14580     ctxt = xmlCreateDocParserCtxt(cur);
14581     if (ctxt == NULL) return(NULL);
14582     if (sax != NULL) {
14583         oldsax = ctxt->sax;
14584         ctxt->sax = sax;
14585         ctxt->userData = NULL;
14586     }
14587     xmlDetectSAX2(ctxt);
14588 
14589     xmlParseDocument(ctxt);
14590     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14591     else {
14592        ret = NULL;
14593        xmlFreeDoc(ctxt->myDoc);
14594        ctxt->myDoc = NULL;
14595     }
14596     if (sax != NULL)
14597 	ctxt->sax = oldsax;
14598     xmlFreeParserCtxt(ctxt);
14599 
14600     return(ret);
14601 }
14602 
14603 /**
14604  * xmlParseDoc:
14605  * @cur:  a pointer to an array of xmlChar
14606  *
14607  * parse an XML in-memory document and build a tree.
14608  *
14609  * Returns the resulting document tree
14610  */
14611 
14612 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14613 xmlParseDoc(const xmlChar *cur) {
14614     return(xmlSAXParseDoc(NULL, cur, 0));
14615 }
14616 #endif /* LIBXML_SAX1_ENABLED */
14617 
14618 #ifdef LIBXML_LEGACY_ENABLED
14619 /************************************************************************
14620  *									*
14621  *	Specific function to keep track of entities references		*
14622  *	and used by the XSLT debugger					*
14623  *									*
14624  ************************************************************************/
14625 
14626 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14627 
14628 /**
14629  * xmlAddEntityReference:
14630  * @ent : A valid entity
14631  * @firstNode : A valid first node for children of entity
14632  * @lastNode : A valid last node of children entity
14633  *
14634  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14635  */
14636 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14637 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14638                       xmlNodePtr lastNode)
14639 {
14640     if (xmlEntityRefFunc != NULL) {
14641         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14642     }
14643 }
14644 
14645 
14646 /**
14647  * xmlSetEntityReferenceFunc:
14648  * @func: A valid function
14649  *
14650  * Set the function to call call back when a xml reference has been made
14651  */
14652 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14653 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14654 {
14655     xmlEntityRefFunc = func;
14656 }
14657 #endif /* LIBXML_LEGACY_ENABLED */
14658 
14659 /************************************************************************
14660  *									*
14661  *				Miscellaneous				*
14662  *									*
14663  ************************************************************************/
14664 
14665 #ifdef LIBXML_XPATH_ENABLED
14666 #include <libxml/xpath.h>
14667 #endif
14668 
14669 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14670 static int xmlParserInitialized = 0;
14671 
14672 /**
14673  * xmlInitParser:
14674  *
14675  * Initialization function for the XML parser.
14676  * This is not reentrant. Call once before processing in case of
14677  * use in multithreaded programs.
14678  */
14679 
14680 void
xmlInitParser(void)14681 xmlInitParser(void) {
14682     if (xmlParserInitialized != 0)
14683 	return;
14684 
14685 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14686     if (xmlFree == free)
14687         atexit(xmlCleanupParser);
14688 #endif
14689 
14690 #ifdef LIBXML_THREAD_ENABLED
14691     __xmlGlobalInitMutexLock();
14692     if (xmlParserInitialized == 0) {
14693 #endif
14694 	xmlInitThreads();
14695 	xmlInitGlobals();
14696 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14697 	    (xmlGenericError == NULL))
14698 	    initGenericErrorDefaultFunc(NULL);
14699 	xmlInitMemory();
14700         xmlInitializeDict();
14701 	xmlInitCharEncodingHandlers();
14702 	xmlDefaultSAXHandlerInit();
14703 	xmlRegisterDefaultInputCallbacks();
14704 #ifdef LIBXML_OUTPUT_ENABLED
14705 	xmlRegisterDefaultOutputCallbacks();
14706 #endif /* LIBXML_OUTPUT_ENABLED */
14707 #ifdef LIBXML_HTML_ENABLED
14708 	htmlInitAutoClose();
14709 	htmlDefaultSAXHandlerInit();
14710 #endif
14711 #ifdef LIBXML_XPATH_ENABLED
14712 	xmlXPathInit();
14713 #endif
14714 	xmlParserInitialized = 1;
14715 #ifdef LIBXML_THREAD_ENABLED
14716     }
14717     __xmlGlobalInitMutexUnlock();
14718 #endif
14719 }
14720 
14721 /**
14722  * xmlCleanupParser:
14723  *
14724  * This function name is somewhat misleading. It does not clean up
14725  * parser state, it cleans up memory allocated by the library itself.
14726  * It is a cleanup function for the XML library. It tries to reclaim all
14727  * related global memory allocated for the library processing.
14728  * It doesn't deallocate any document related memory. One should
14729  * call xmlCleanupParser() only when the process has finished using
14730  * the library and all XML/HTML documents built with it.
14731  * See also xmlInitParser() which has the opposite function of preparing
14732  * the library for operations.
14733  *
14734  * WARNING: if your application is multithreaded or has plugin support
14735  *          calling this may crash the application if another thread or
14736  *          a plugin is still using libxml2. It's sometimes very hard to
14737  *          guess if libxml2 is in use in the application, some libraries
14738  *          or plugins may use it without notice. In case of doubt abstain
14739  *          from calling this function or do it just before calling exit()
14740  *          to avoid leak reports from valgrind !
14741  */
14742 
14743 void
xmlCleanupParser(void)14744 xmlCleanupParser(void) {
14745     if (!xmlParserInitialized)
14746 	return;
14747 
14748     xmlCleanupCharEncodingHandlers();
14749 #ifdef LIBXML_CATALOG_ENABLED
14750     xmlCatalogCleanup();
14751 #endif
14752     xmlDictCleanup();
14753     xmlCleanupInputCallbacks();
14754 #ifdef LIBXML_OUTPUT_ENABLED
14755     xmlCleanupOutputCallbacks();
14756 #endif
14757 #ifdef LIBXML_SCHEMAS_ENABLED
14758     xmlSchemaCleanupTypes();
14759     xmlRelaxNGCleanupTypes();
14760 #endif
14761     xmlResetLastError();
14762     xmlCleanupGlobals();
14763     xmlCleanupThreads(); /* must be last if called not from the main thread */
14764     xmlCleanupMemory();
14765     xmlParserInitialized = 0;
14766 }
14767 
14768 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14769     !defined(_WIN32)
14770 static void
14771 ATTRIBUTE_DESTRUCTOR
xmlDestructor(void)14772 xmlDestructor(void) {
14773     /*
14774      * Calling custom deallocation functions in a destructor can cause
14775      * problems, for example with Nokogiri.
14776      */
14777     if (xmlFree == free)
14778         xmlCleanupParser();
14779 }
14780 #endif
14781 
14782 /************************************************************************
14783  *									*
14784  *	New set (2.6.0) of simpler and more flexible APIs		*
14785  *									*
14786  ************************************************************************/
14787 
14788 /**
14789  * DICT_FREE:
14790  * @str:  a string
14791  *
14792  * Free a string if it is not owned by the "dict" dictionary in the
14793  * current scope
14794  */
14795 #define DICT_FREE(str)						\
14796 	if ((str) && ((!dict) ||				\
14797 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14798 	    xmlFree((char *)(str));
14799 
14800 /**
14801  * xmlCtxtReset:
14802  * @ctxt: an XML parser context
14803  *
14804  * Reset a parser context
14805  */
14806 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14807 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14808 {
14809     xmlParserInputPtr input;
14810     xmlDictPtr dict;
14811 
14812     if (ctxt == NULL)
14813         return;
14814 
14815     dict = ctxt->dict;
14816 
14817     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14818         xmlFreeInputStream(input);
14819     }
14820     ctxt->inputNr = 0;
14821     ctxt->input = NULL;
14822 
14823     ctxt->spaceNr = 0;
14824     if (ctxt->spaceTab != NULL) {
14825 	ctxt->spaceTab[0] = -1;
14826 	ctxt->space = &ctxt->spaceTab[0];
14827     } else {
14828         ctxt->space = NULL;
14829     }
14830 
14831 
14832     ctxt->nodeNr = 0;
14833     ctxt->node = NULL;
14834 
14835     ctxt->nameNr = 0;
14836     ctxt->name = NULL;
14837 
14838     DICT_FREE(ctxt->version);
14839     ctxt->version = NULL;
14840     DICT_FREE(ctxt->encoding);
14841     ctxt->encoding = NULL;
14842     DICT_FREE(ctxt->directory);
14843     ctxt->directory = NULL;
14844     DICT_FREE(ctxt->extSubURI);
14845     ctxt->extSubURI = NULL;
14846     DICT_FREE(ctxt->extSubSystem);
14847     ctxt->extSubSystem = NULL;
14848     if (ctxt->myDoc != NULL)
14849         xmlFreeDoc(ctxt->myDoc);
14850     ctxt->myDoc = NULL;
14851 
14852     ctxt->standalone = -1;
14853     ctxt->hasExternalSubset = 0;
14854     ctxt->hasPErefs = 0;
14855     ctxt->html = 0;
14856     ctxt->external = 0;
14857     ctxt->instate = XML_PARSER_START;
14858     ctxt->token = 0;
14859 
14860     ctxt->wellFormed = 1;
14861     ctxt->nsWellFormed = 1;
14862     ctxt->disableSAX = 0;
14863     ctxt->valid = 1;
14864 #if 0
14865     ctxt->vctxt.userData = ctxt;
14866     ctxt->vctxt.error = xmlParserValidityError;
14867     ctxt->vctxt.warning = xmlParserValidityWarning;
14868 #endif
14869     ctxt->record_info = 0;
14870     ctxt->checkIndex = 0;
14871     ctxt->inSubset = 0;
14872     ctxt->errNo = XML_ERR_OK;
14873     ctxt->depth = 0;
14874     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14875     ctxt->catalogs = NULL;
14876     ctxt->nbentities = 0;
14877     ctxt->sizeentities = 0;
14878     ctxt->sizeentcopy = 0;
14879     xmlInitNodeInfoSeq(&ctxt->node_seq);
14880 
14881     if (ctxt->attsDefault != NULL) {
14882         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14883         ctxt->attsDefault = NULL;
14884     }
14885     if (ctxt->attsSpecial != NULL) {
14886         xmlHashFree(ctxt->attsSpecial, NULL);
14887         ctxt->attsSpecial = NULL;
14888     }
14889 
14890 #ifdef LIBXML_CATALOG_ENABLED
14891     if (ctxt->catalogs != NULL)
14892 	xmlCatalogFreeLocal(ctxt->catalogs);
14893 #endif
14894     if (ctxt->lastError.code != XML_ERR_OK)
14895         xmlResetError(&ctxt->lastError);
14896 }
14897 
14898 /**
14899  * xmlCtxtResetPush:
14900  * @ctxt: an XML parser context
14901  * @chunk:  a pointer to an array of chars
14902  * @size:  number of chars in the array
14903  * @filename:  an optional file name or URI
14904  * @encoding:  the document encoding, or NULL
14905  *
14906  * Reset a push parser context
14907  *
14908  * Returns 0 in case of success and 1 in case of error
14909  */
14910 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14911 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14912                  int size, const char *filename, const char *encoding)
14913 {
14914     xmlParserInputPtr inputStream;
14915     xmlParserInputBufferPtr buf;
14916     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14917 
14918     if (ctxt == NULL)
14919         return(1);
14920 
14921     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14922         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14923 
14924     buf = xmlAllocParserInputBuffer(enc);
14925     if (buf == NULL)
14926         return(1);
14927 
14928     if (ctxt == NULL) {
14929         xmlFreeParserInputBuffer(buf);
14930         return(1);
14931     }
14932 
14933     xmlCtxtReset(ctxt);
14934 
14935     if (filename == NULL) {
14936         ctxt->directory = NULL;
14937     } else {
14938         ctxt->directory = xmlParserGetDirectory(filename);
14939     }
14940 
14941     inputStream = xmlNewInputStream(ctxt);
14942     if (inputStream == NULL) {
14943         xmlFreeParserInputBuffer(buf);
14944         return(1);
14945     }
14946 
14947     if (filename == NULL)
14948         inputStream->filename = NULL;
14949     else
14950         inputStream->filename = (char *)
14951             xmlCanonicPath((const xmlChar *) filename);
14952     inputStream->buf = buf;
14953     xmlBufResetInput(buf->buffer, inputStream);
14954 
14955     inputPush(ctxt, inputStream);
14956 
14957     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14958         (ctxt->input->buf != NULL)) {
14959 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14960         size_t cur = ctxt->input->cur - ctxt->input->base;
14961 
14962         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14963 
14964         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14965 #ifdef DEBUG_PUSH
14966         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14967 #endif
14968     }
14969 
14970     if (encoding != NULL) {
14971         xmlCharEncodingHandlerPtr hdlr;
14972 
14973         if (ctxt->encoding != NULL)
14974 	    xmlFree((xmlChar *) ctxt->encoding);
14975         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14976 
14977         hdlr = xmlFindCharEncodingHandler(encoding);
14978         if (hdlr != NULL) {
14979             xmlSwitchToEncoding(ctxt, hdlr);
14980 	} else {
14981 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14982 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14983         }
14984     } else if (enc != XML_CHAR_ENCODING_NONE) {
14985         xmlSwitchEncoding(ctxt, enc);
14986     }
14987 
14988     return(0);
14989 }
14990 
14991 
14992 /**
14993  * xmlCtxtUseOptionsInternal:
14994  * @ctxt: an XML parser context
14995  * @options:  a combination of xmlParserOption
14996  * @encoding:  the user provided encoding to use
14997  *
14998  * Applies the options to the parser context
14999  *
15000  * Returns 0 in case of success, the set of unknown or unimplemented options
15001  *         in case of error.
15002  */
15003 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15004 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15005 {
15006     if (ctxt == NULL)
15007         return(-1);
15008     if (encoding != NULL) {
15009         if (ctxt->encoding != NULL)
15010 	    xmlFree((xmlChar *) ctxt->encoding);
15011         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15012     }
15013     if (options & XML_PARSE_RECOVER) {
15014         ctxt->recovery = 1;
15015         options -= XML_PARSE_RECOVER;
15016 	ctxt->options |= XML_PARSE_RECOVER;
15017     } else
15018         ctxt->recovery = 0;
15019     if (options & XML_PARSE_DTDLOAD) {
15020         ctxt->loadsubset = XML_DETECT_IDS;
15021         options -= XML_PARSE_DTDLOAD;
15022 	ctxt->options |= XML_PARSE_DTDLOAD;
15023     } else
15024         ctxt->loadsubset = 0;
15025     if (options & XML_PARSE_DTDATTR) {
15026         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15027         options -= XML_PARSE_DTDATTR;
15028 	ctxt->options |= XML_PARSE_DTDATTR;
15029     }
15030     if (options & XML_PARSE_NOENT) {
15031         ctxt->replaceEntities = 1;
15032         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15033         options -= XML_PARSE_NOENT;
15034 	ctxt->options |= XML_PARSE_NOENT;
15035     } else
15036         ctxt->replaceEntities = 0;
15037     if (options & XML_PARSE_PEDANTIC) {
15038         ctxt->pedantic = 1;
15039         options -= XML_PARSE_PEDANTIC;
15040 	ctxt->options |= XML_PARSE_PEDANTIC;
15041     } else
15042         ctxt->pedantic = 0;
15043     if (options & XML_PARSE_NOBLANKS) {
15044         ctxt->keepBlanks = 0;
15045         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15046         options -= XML_PARSE_NOBLANKS;
15047 	ctxt->options |= XML_PARSE_NOBLANKS;
15048     } else
15049         ctxt->keepBlanks = 1;
15050     if (options & XML_PARSE_DTDVALID) {
15051         ctxt->validate = 1;
15052         if (options & XML_PARSE_NOWARNING)
15053             ctxt->vctxt.warning = NULL;
15054         if (options & XML_PARSE_NOERROR)
15055             ctxt->vctxt.error = NULL;
15056         options -= XML_PARSE_DTDVALID;
15057 	ctxt->options |= XML_PARSE_DTDVALID;
15058     } else
15059         ctxt->validate = 0;
15060     if (options & XML_PARSE_NOWARNING) {
15061         ctxt->sax->warning = NULL;
15062         options -= XML_PARSE_NOWARNING;
15063     }
15064     if (options & XML_PARSE_NOERROR) {
15065         ctxt->sax->error = NULL;
15066         ctxt->sax->fatalError = NULL;
15067         options -= XML_PARSE_NOERROR;
15068     }
15069 #ifdef LIBXML_SAX1_ENABLED
15070     if (options & XML_PARSE_SAX1) {
15071         ctxt->sax->startElement = xmlSAX2StartElement;
15072         ctxt->sax->endElement = xmlSAX2EndElement;
15073         ctxt->sax->startElementNs = NULL;
15074         ctxt->sax->endElementNs = NULL;
15075         ctxt->sax->initialized = 1;
15076         options -= XML_PARSE_SAX1;
15077 	ctxt->options |= XML_PARSE_SAX1;
15078     }
15079 #endif /* LIBXML_SAX1_ENABLED */
15080     if (options & XML_PARSE_NODICT) {
15081         ctxt->dictNames = 0;
15082         options -= XML_PARSE_NODICT;
15083 	ctxt->options |= XML_PARSE_NODICT;
15084     } else {
15085         ctxt->dictNames = 1;
15086     }
15087     if (options & XML_PARSE_NOCDATA) {
15088         ctxt->sax->cdataBlock = NULL;
15089         options -= XML_PARSE_NOCDATA;
15090 	ctxt->options |= XML_PARSE_NOCDATA;
15091     }
15092     if (options & XML_PARSE_NSCLEAN) {
15093 	ctxt->options |= XML_PARSE_NSCLEAN;
15094         options -= XML_PARSE_NSCLEAN;
15095     }
15096     if (options & XML_PARSE_NONET) {
15097 	ctxt->options |= XML_PARSE_NONET;
15098         options -= XML_PARSE_NONET;
15099     }
15100     if (options & XML_PARSE_COMPACT) {
15101 	ctxt->options |= XML_PARSE_COMPACT;
15102         options -= XML_PARSE_COMPACT;
15103     }
15104     if (options & XML_PARSE_OLD10) {
15105 	ctxt->options |= XML_PARSE_OLD10;
15106         options -= XML_PARSE_OLD10;
15107     }
15108     if (options & XML_PARSE_NOBASEFIX) {
15109 	ctxt->options |= XML_PARSE_NOBASEFIX;
15110         options -= XML_PARSE_NOBASEFIX;
15111     }
15112     if (options & XML_PARSE_HUGE) {
15113 	ctxt->options |= XML_PARSE_HUGE;
15114         options -= XML_PARSE_HUGE;
15115         if (ctxt->dict != NULL)
15116             xmlDictSetLimit(ctxt->dict, 0);
15117     }
15118     if (options & XML_PARSE_OLDSAX) {
15119 	ctxt->options |= XML_PARSE_OLDSAX;
15120         options -= XML_PARSE_OLDSAX;
15121     }
15122     if (options & XML_PARSE_IGNORE_ENC) {
15123 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15124         options -= XML_PARSE_IGNORE_ENC;
15125     }
15126     if (options & XML_PARSE_BIG_LINES) {
15127 	ctxt->options |= XML_PARSE_BIG_LINES;
15128         options -= XML_PARSE_BIG_LINES;
15129     }
15130     ctxt->linenumbers = 1;
15131     return (options);
15132 }
15133 
15134 /**
15135  * xmlCtxtUseOptions:
15136  * @ctxt: an XML parser context
15137  * @options:  a combination of xmlParserOption
15138  *
15139  * Applies the options to the parser context
15140  *
15141  * Returns 0 in case of success, the set of unknown or unimplemented options
15142  *         in case of error.
15143  */
15144 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15145 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15146 {
15147    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15148 }
15149 
15150 /**
15151  * xmlDoRead:
15152  * @ctxt:  an XML parser context
15153  * @URL:  the base URL to use for the document
15154  * @encoding:  the document encoding, or NULL
15155  * @options:  a combination of xmlParserOption
15156  * @reuse:  keep the context for reuse
15157  *
15158  * Common front-end for the xmlRead functions
15159  *
15160  * Returns the resulting document tree or NULL
15161  */
15162 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15163 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15164           int options, int reuse)
15165 {
15166     xmlDocPtr ret;
15167 
15168     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15169     if (encoding != NULL) {
15170         xmlCharEncodingHandlerPtr hdlr;
15171 
15172 	hdlr = xmlFindCharEncodingHandler(encoding);
15173 	if (hdlr != NULL)
15174 	    xmlSwitchToEncoding(ctxt, hdlr);
15175     }
15176     if ((URL != NULL) && (ctxt->input != NULL) &&
15177         (ctxt->input->filename == NULL))
15178         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15179     xmlParseDocument(ctxt);
15180     if ((ctxt->wellFormed) || ctxt->recovery)
15181         ret = ctxt->myDoc;
15182     else {
15183         ret = NULL;
15184 	if (ctxt->myDoc != NULL) {
15185 	    xmlFreeDoc(ctxt->myDoc);
15186 	}
15187     }
15188     ctxt->myDoc = NULL;
15189     if (!reuse) {
15190 	xmlFreeParserCtxt(ctxt);
15191     }
15192 
15193     return (ret);
15194 }
15195 
15196 /**
15197  * xmlReadDoc:
15198  * @cur:  a pointer to a zero terminated string
15199  * @URL:  the base URL to use for the document
15200  * @encoding:  the document encoding, or NULL
15201  * @options:  a combination of xmlParserOption
15202  *
15203  * parse an XML in-memory document and build a tree.
15204  *
15205  * Returns the resulting document tree
15206  */
15207 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15208 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15209 {
15210     xmlParserCtxtPtr ctxt;
15211 
15212     if (cur == NULL)
15213         return (NULL);
15214     xmlInitParser();
15215 
15216     ctxt = xmlCreateDocParserCtxt(cur);
15217     if (ctxt == NULL)
15218         return (NULL);
15219     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15220 }
15221 
15222 /**
15223  * xmlReadFile:
15224  * @filename:  a file or URL
15225  * @encoding:  the document encoding, or NULL
15226  * @options:  a combination of xmlParserOption
15227  *
15228  * parse an XML file from the filesystem or the network.
15229  *
15230  * Returns the resulting document tree
15231  */
15232 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15233 xmlReadFile(const char *filename, const char *encoding, int options)
15234 {
15235     xmlParserCtxtPtr ctxt;
15236 
15237     xmlInitParser();
15238     ctxt = xmlCreateURLParserCtxt(filename, options);
15239     if (ctxt == NULL)
15240         return (NULL);
15241     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15242 }
15243 
15244 /**
15245  * xmlReadMemory:
15246  * @buffer:  a pointer to a char array
15247  * @size:  the size of the array
15248  * @URL:  the base URL to use for the document
15249  * @encoding:  the document encoding, or NULL
15250  * @options:  a combination of xmlParserOption
15251  *
15252  * parse an XML in-memory document and build a tree.
15253  *
15254  * Returns the resulting document tree
15255  */
15256 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15257 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15258 {
15259     xmlParserCtxtPtr ctxt;
15260 
15261     xmlInitParser();
15262     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15263     if (ctxt == NULL)
15264         return (NULL);
15265     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15266 }
15267 
15268 /**
15269  * xmlReadFd:
15270  * @fd:  an open file descriptor
15271  * @URL:  the base URL to use for the document
15272  * @encoding:  the document encoding, or NULL
15273  * @options:  a combination of xmlParserOption
15274  *
15275  * parse an XML from a file descriptor and build a tree.
15276  * NOTE that the file descriptor will not be closed when the
15277  *      reader is closed or reset.
15278  *
15279  * Returns the resulting document tree
15280  */
15281 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15282 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15283 {
15284     xmlParserCtxtPtr ctxt;
15285     xmlParserInputBufferPtr input;
15286     xmlParserInputPtr stream;
15287 
15288     if (fd < 0)
15289         return (NULL);
15290     xmlInitParser();
15291 
15292     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15293     if (input == NULL)
15294         return (NULL);
15295     input->closecallback = NULL;
15296     ctxt = xmlNewParserCtxt();
15297     if (ctxt == NULL) {
15298         xmlFreeParserInputBuffer(input);
15299         return (NULL);
15300     }
15301     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15302     if (stream == NULL) {
15303         xmlFreeParserInputBuffer(input);
15304 	xmlFreeParserCtxt(ctxt);
15305         return (NULL);
15306     }
15307     inputPush(ctxt, stream);
15308     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15309 }
15310 
15311 /**
15312  * xmlReadIO:
15313  * @ioread:  an I/O read function
15314  * @ioclose:  an I/O close function
15315  * @ioctx:  an I/O handler
15316  * @URL:  the base URL to use for the document
15317  * @encoding:  the document encoding, or NULL
15318  * @options:  a combination of xmlParserOption
15319  *
15320  * parse an XML document from I/O functions and source and build a tree.
15321  *
15322  * Returns the resulting document tree
15323  */
15324 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15325 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15326           void *ioctx, const char *URL, const char *encoding, int options)
15327 {
15328     xmlParserCtxtPtr ctxt;
15329     xmlParserInputBufferPtr input;
15330     xmlParserInputPtr stream;
15331 
15332     if (ioread == NULL)
15333         return (NULL);
15334     xmlInitParser();
15335 
15336     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15337                                          XML_CHAR_ENCODING_NONE);
15338     if (input == NULL) {
15339         if (ioclose != NULL)
15340             ioclose(ioctx);
15341         return (NULL);
15342     }
15343     ctxt = xmlNewParserCtxt();
15344     if (ctxt == NULL) {
15345         xmlFreeParserInputBuffer(input);
15346         return (NULL);
15347     }
15348     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15349     if (stream == NULL) {
15350         xmlFreeParserInputBuffer(input);
15351 	xmlFreeParserCtxt(ctxt);
15352         return (NULL);
15353     }
15354     inputPush(ctxt, stream);
15355     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15356 }
15357 
15358 /**
15359  * xmlCtxtReadDoc:
15360  * @ctxt:  an XML parser context
15361  * @cur:  a pointer to a zero terminated string
15362  * @URL:  the base URL to use for the document
15363  * @encoding:  the document encoding, or NULL
15364  * @options:  a combination of xmlParserOption
15365  *
15366  * parse an XML in-memory document and build a tree.
15367  * This reuses the existing @ctxt parser context
15368  *
15369  * Returns the resulting document tree
15370  */
15371 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15372 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15373                const char *URL, const char *encoding, int options)
15374 {
15375     xmlParserInputPtr stream;
15376 
15377     if (cur == NULL)
15378         return (NULL);
15379     if (ctxt == NULL)
15380         return (NULL);
15381     xmlInitParser();
15382 
15383     xmlCtxtReset(ctxt);
15384 
15385     stream = xmlNewStringInputStream(ctxt, cur);
15386     if (stream == NULL) {
15387         return (NULL);
15388     }
15389     inputPush(ctxt, stream);
15390     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15391 }
15392 
15393 /**
15394  * xmlCtxtReadFile:
15395  * @ctxt:  an XML parser context
15396  * @filename:  a file or URL
15397  * @encoding:  the document encoding, or NULL
15398  * @options:  a combination of xmlParserOption
15399  *
15400  * parse an XML file from the filesystem or the network.
15401  * This reuses the existing @ctxt parser context
15402  *
15403  * Returns the resulting document tree
15404  */
15405 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15406 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15407                 const char *encoding, int options)
15408 {
15409     xmlParserInputPtr stream;
15410 
15411     if (filename == NULL)
15412         return (NULL);
15413     if (ctxt == NULL)
15414         return (NULL);
15415     xmlInitParser();
15416 
15417     xmlCtxtReset(ctxt);
15418 
15419     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15420     if (stream == NULL) {
15421         return (NULL);
15422     }
15423     inputPush(ctxt, stream);
15424     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15425 }
15426 
15427 /**
15428  * xmlCtxtReadMemory:
15429  * @ctxt:  an XML parser context
15430  * @buffer:  a pointer to a char array
15431  * @size:  the size of the array
15432  * @URL:  the base URL to use for the document
15433  * @encoding:  the document encoding, or NULL
15434  * @options:  a combination of xmlParserOption
15435  *
15436  * parse an XML in-memory document and build a tree.
15437  * This reuses the existing @ctxt parser context
15438  *
15439  * Returns the resulting document tree
15440  */
15441 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15442 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15443                   const char *URL, const char *encoding, int options)
15444 {
15445     xmlParserInputBufferPtr input;
15446     xmlParserInputPtr stream;
15447 
15448     if (ctxt == NULL)
15449         return (NULL);
15450     if (buffer == NULL)
15451         return (NULL);
15452     xmlInitParser();
15453 
15454     xmlCtxtReset(ctxt);
15455 
15456     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15457     if (input == NULL) {
15458 	return(NULL);
15459     }
15460 
15461     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15462     if (stream == NULL) {
15463 	xmlFreeParserInputBuffer(input);
15464 	return(NULL);
15465     }
15466 
15467     inputPush(ctxt, stream);
15468     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15469 }
15470 
15471 /**
15472  * xmlCtxtReadFd:
15473  * @ctxt:  an XML parser context
15474  * @fd:  an open file descriptor
15475  * @URL:  the base URL to use for the document
15476  * @encoding:  the document encoding, or NULL
15477  * @options:  a combination of xmlParserOption
15478  *
15479  * parse an XML from a file descriptor and build a tree.
15480  * This reuses the existing @ctxt parser context
15481  * NOTE that the file descriptor will not be closed when the
15482  *      reader is closed or reset.
15483  *
15484  * Returns the resulting document tree
15485  */
15486 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15487 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15488               const char *URL, const char *encoding, int options)
15489 {
15490     xmlParserInputBufferPtr input;
15491     xmlParserInputPtr stream;
15492 
15493     if (fd < 0)
15494         return (NULL);
15495     if (ctxt == NULL)
15496         return (NULL);
15497     xmlInitParser();
15498 
15499     xmlCtxtReset(ctxt);
15500 
15501 
15502     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15503     if (input == NULL)
15504         return (NULL);
15505     input->closecallback = NULL;
15506     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15507     if (stream == NULL) {
15508         xmlFreeParserInputBuffer(input);
15509         return (NULL);
15510     }
15511     inputPush(ctxt, stream);
15512     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15513 }
15514 
15515 /**
15516  * xmlCtxtReadIO:
15517  * @ctxt:  an XML parser context
15518  * @ioread:  an I/O read function
15519  * @ioclose:  an I/O close function
15520  * @ioctx:  an I/O handler
15521  * @URL:  the base URL to use for the document
15522  * @encoding:  the document encoding, or NULL
15523  * @options:  a combination of xmlParserOption
15524  *
15525  * parse an XML document from I/O functions and source and build a tree.
15526  * This reuses the existing @ctxt parser context
15527  *
15528  * Returns the resulting document tree
15529  */
15530 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15531 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15532               xmlInputCloseCallback ioclose, void *ioctx,
15533 	      const char *URL,
15534               const char *encoding, int options)
15535 {
15536     xmlParserInputBufferPtr input;
15537     xmlParserInputPtr stream;
15538 
15539     if (ioread == NULL)
15540         return (NULL);
15541     if (ctxt == NULL)
15542         return (NULL);
15543     xmlInitParser();
15544 
15545     xmlCtxtReset(ctxt);
15546 
15547     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15548                                          XML_CHAR_ENCODING_NONE);
15549     if (input == NULL) {
15550         if (ioclose != NULL)
15551             ioclose(ioctx);
15552         return (NULL);
15553     }
15554     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15555     if (stream == NULL) {
15556         xmlFreeParserInputBuffer(input);
15557         return (NULL);
15558     }
15559     inputPush(ctxt, stream);
15560     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15561 }
15562 
15563 #define bottom_parser
15564 #include "elfgcchack.h"
15565