• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 struct _xmlStartTag {
91     const xmlChar *prefix;
92     const xmlChar *URI;
93     int line;
94     int nsNr;
95 };
96 
97 static void
98 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99 
100 static xmlParserCtxtPtr
101 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
103 
104 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105 
106 static int
107 xmlParseElementStart(xmlParserCtxtPtr ctxt);
108 
109 static void
110 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111 
112 /************************************************************************
113  *									*
114  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
115  *									*
116  ************************************************************************/
117 
118 #define XML_MAX_HUGE_LENGTH 1000000000
119 
120 #define XML_PARSER_BIG_ENTITY 1000
121 #define XML_PARSER_LOT_ENTITY 5000
122 
123 /*
124  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
125  *    replacement over the size in byte of the input indicates that you have
126  *    and exponential behaviour. A value of 10 correspond to at least 3 entity
127  *    replacement per byte of input.
128  */
129 #define XML_PARSER_NON_LINEAR 10
130 
131 /*
132  * xmlParserEntityCheck
133  *
134  * Function to check non-linear entity expansion behaviour
135  * This is here to detect and stop exponential linear entity expansion
136  * This is not a limitation of the parser but a safety
137  * boundary feature. It can be disabled with the XML_PARSE_HUGE
138  * parser option.
139  */
140 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)141 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
142                      xmlEntityPtr ent, size_t replacement)
143 {
144     size_t consumed = 0;
145     int i;
146 
147     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
148         return (0);
149     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
150         return (1);
151 
152     /*
153      * This may look absurd but is needed to detect
154      * entities problems
155      */
156     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
157 	(ent->content != NULL) && (ent->checked == 0) &&
158 	(ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
159 	unsigned long oldnbent = ctxt->nbentities, diff;
160 	xmlChar *rep;
161 
162 	ent->checked = 1;
163 
164         ++ctxt->depth;
165 	rep = xmlStringDecodeEntities(ctxt, ent->content,
166 				  XML_SUBSTITUTE_REF, 0, 0, 0);
167         --ctxt->depth;
168 	if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
169 	    ent->content[0] = 0;
170 	}
171 
172         diff = ctxt->nbentities - oldnbent + 1;
173         if (diff > INT_MAX / 2)
174             diff = INT_MAX / 2;
175 	ent->checked = diff * 2;
176 	if (rep != NULL) {
177 	    if (xmlStrchr(rep, '<'))
178 		ent->checked |= 1;
179 	    xmlFree(rep);
180 	    rep = NULL;
181 	}
182     }
183 
184     /*
185      * Prevent entity exponential check, not just replacement while
186      * parsing the DTD
187      * The check is potentially costly so do that only once in a thousand
188      */
189     if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
190         (ctxt->nbentities % 1024 == 0)) {
191 	for (i = 0;i < ctxt->inputNr;i++) {
192 	    consumed += ctxt->inputTab[i]->consumed +
193 	               (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
194 	}
195 	if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
196 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 	    ctxt->instate = XML_PARSER_EOF;
198 	    return (1);
199 	}
200 	consumed = 0;
201     }
202 
203 
204 
205     if (replacement != 0) {
206 	if (replacement < XML_MAX_TEXT_LENGTH)
207 	    return(0);
208 
209         /*
210 	 * If the volume of entity copy reaches 10 times the
211 	 * amount of parsed data and over the large text threshold
212 	 * then that's very likely to be an abuse.
213 	 */
214         if (ctxt->input != NULL) {
215 	    consumed = ctxt->input->consumed +
216 	               (ctxt->input->cur - ctxt->input->base);
217 	}
218         consumed += ctxt->sizeentities;
219 
220         if (replacement < XML_PARSER_NON_LINEAR * consumed)
221 	    return(0);
222     } else if (size != 0) {
223         /*
224          * Do the check based on the replacement size of the entity
225          */
226         if (size < XML_PARSER_BIG_ENTITY)
227 	    return(0);
228 
229         /*
230          * A limit on the amount of text data reasonably used
231          */
232         if (ctxt->input != NULL) {
233             consumed = ctxt->input->consumed +
234                 (ctxt->input->cur - ctxt->input->base);
235         }
236         consumed += ctxt->sizeentities;
237 
238         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
239 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
240             return (0);
241     } else if (ent != NULL) {
242         /*
243          * use the number of parsed entities in the replacement
244          */
245         size = ent->checked / 2;
246 
247         /*
248          * The amount of data parsed counting entities size only once
249          */
250         if (ctxt->input != NULL) {
251             consumed = ctxt->input->consumed +
252                 (ctxt->input->cur - ctxt->input->base);
253         }
254         consumed += ctxt->sizeentities;
255 
256         /*
257          * Check the density of entities for the amount of data
258 	 * knowing an entity reference will take at least 3 bytes
259          */
260         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
261             return (0);
262     } else {
263         /*
264          * strange we got no data for checking
265          */
266 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
267 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
268 	    (ctxt->nbentities <= 10000))
269 	    return (0);
270     }
271     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
272     return (1);
273 }
274 
275 /**
276  * xmlParserMaxDepth:
277  *
278  * arbitrary depth limit for the XML documents that we allow to
279  * process. This is not a limitation of the parser but a safety
280  * boundary feature. It can be disabled with the XML_PARSE_HUGE
281  * parser option.
282  */
283 unsigned int xmlParserMaxDepth = 256;
284 
285 
286 
287 #define SAX2 1
288 #define XML_PARSER_BIG_BUFFER_SIZE 300
289 #define XML_PARSER_BUFFER_SIZE 100
290 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
291 
292 /**
293  * XML_PARSER_CHUNK_SIZE
294  *
295  * When calling GROW that's the minimal amount of data
296  * the parser expected to have received. It is not a hard
297  * limit but an optimization when reading strings like Names
298  * It is not strictly needed as long as inputs available characters
299  * are followed by 0, which should be provided by the I/O level
300  */
301 #define XML_PARSER_CHUNK_SIZE 100
302 
303 /*
304  * List of XML prefixed PI allowed by W3C specs
305  */
306 
307 static const char *xmlW3CPIs[] = {
308     "xml-stylesheet",
309     "xml-model",
310     NULL
311 };
312 
313 
314 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
315 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
316                                               const xmlChar **str);
317 
318 static xmlParserErrors
319 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
320 	              xmlSAXHandlerPtr sax,
321 		      void *user_data, int depth, const xmlChar *URL,
322 		      const xmlChar *ID, xmlNodePtr *list);
323 
324 static int
325 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
326                           const char *encoding);
327 #ifdef LIBXML_LEGACY_ENABLED
328 static void
329 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
330                       xmlNodePtr lastNode);
331 #endif /* LIBXML_LEGACY_ENABLED */
332 
333 static xmlParserErrors
334 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
335 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
336 
337 static int
338 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
339 
340 /************************************************************************
341  *									*
342  *		Some factorized error routines				*
343  *									*
344  ************************************************************************/
345 
346 /**
347  * xmlErrAttributeDup:
348  * @ctxt:  an XML parser context
349  * @prefix:  the attribute prefix
350  * @localname:  the attribute localname
351  *
352  * Handle a redefinition of attribute error
353  */
354 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)355 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
356                    const xmlChar * localname)
357 {
358     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
359         (ctxt->instate == XML_PARSER_EOF))
360 	return;
361     if (ctxt != NULL)
362 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
363 
364     if (prefix == NULL)
365         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
366                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
367                         (const char *) localname, NULL, NULL, 0, 0,
368                         "Attribute %s redefined\n", localname);
369     else
370         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
371                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
372                         (const char *) prefix, (const char *) localname,
373                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
374                         localname);
375     if (ctxt != NULL) {
376 	ctxt->wellFormed = 0;
377 	if (ctxt->recovery == 0)
378 	    ctxt->disableSAX = 1;
379     }
380 }
381 
382 /**
383  * xmlFatalErr:
384  * @ctxt:  an XML parser context
385  * @error:  the error number
386  * @extra:  extra information string
387  *
388  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
389  */
390 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)391 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
392 {
393     const char *errmsg;
394 
395     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
396         (ctxt->instate == XML_PARSER_EOF))
397 	return;
398     switch (error) {
399         case XML_ERR_INVALID_HEX_CHARREF:
400             errmsg = "CharRef: invalid hexadecimal value";
401             break;
402         case XML_ERR_INVALID_DEC_CHARREF:
403             errmsg = "CharRef: invalid decimal value";
404             break;
405         case XML_ERR_INVALID_CHARREF:
406             errmsg = "CharRef: invalid value";
407             break;
408         case XML_ERR_INTERNAL_ERROR:
409             errmsg = "internal error";
410             break;
411         case XML_ERR_PEREF_AT_EOF:
412             errmsg = "PEReference at end of document";
413             break;
414         case XML_ERR_PEREF_IN_PROLOG:
415             errmsg = "PEReference in prolog";
416             break;
417         case XML_ERR_PEREF_IN_EPILOG:
418             errmsg = "PEReference in epilog";
419             break;
420         case XML_ERR_PEREF_NO_NAME:
421             errmsg = "PEReference: no name";
422             break;
423         case XML_ERR_PEREF_SEMICOL_MISSING:
424             errmsg = "PEReference: expecting ';'";
425             break;
426         case XML_ERR_ENTITY_LOOP:
427             errmsg = "Detected an entity reference loop";
428             break;
429         case XML_ERR_ENTITY_NOT_STARTED:
430             errmsg = "EntityValue: \" or ' expected";
431             break;
432         case XML_ERR_ENTITY_PE_INTERNAL:
433             errmsg = "PEReferences forbidden in internal subset";
434             break;
435         case XML_ERR_ENTITY_NOT_FINISHED:
436             errmsg = "EntityValue: \" or ' expected";
437             break;
438         case XML_ERR_ATTRIBUTE_NOT_STARTED:
439             errmsg = "AttValue: \" or ' expected";
440             break;
441         case XML_ERR_LT_IN_ATTRIBUTE:
442             errmsg = "Unescaped '<' not allowed in attributes values";
443             break;
444         case XML_ERR_LITERAL_NOT_STARTED:
445             errmsg = "SystemLiteral \" or ' expected";
446             break;
447         case XML_ERR_LITERAL_NOT_FINISHED:
448             errmsg = "Unfinished System or Public ID \" or ' expected";
449             break;
450         case XML_ERR_MISPLACED_CDATA_END:
451             errmsg = "Sequence ']]>' not allowed in content";
452             break;
453         case XML_ERR_URI_REQUIRED:
454             errmsg = "SYSTEM or PUBLIC, the URI is missing";
455             break;
456         case XML_ERR_PUBID_REQUIRED:
457             errmsg = "PUBLIC, the Public Identifier is missing";
458             break;
459         case XML_ERR_HYPHEN_IN_COMMENT:
460             errmsg = "Comment must not contain '--' (double-hyphen)";
461             break;
462         case XML_ERR_PI_NOT_STARTED:
463             errmsg = "xmlParsePI : no target name";
464             break;
465         case XML_ERR_RESERVED_XML_NAME:
466             errmsg = "Invalid PI name";
467             break;
468         case XML_ERR_NOTATION_NOT_STARTED:
469             errmsg = "NOTATION: Name expected here";
470             break;
471         case XML_ERR_NOTATION_NOT_FINISHED:
472             errmsg = "'>' required to close NOTATION declaration";
473             break;
474         case XML_ERR_VALUE_REQUIRED:
475             errmsg = "Entity value required";
476             break;
477         case XML_ERR_URI_FRAGMENT:
478             errmsg = "Fragment not allowed";
479             break;
480         case XML_ERR_ATTLIST_NOT_STARTED:
481             errmsg = "'(' required to start ATTLIST enumeration";
482             break;
483         case XML_ERR_NMTOKEN_REQUIRED:
484             errmsg = "NmToken expected in ATTLIST enumeration";
485             break;
486         case XML_ERR_ATTLIST_NOT_FINISHED:
487             errmsg = "')' required to finish ATTLIST enumeration";
488             break;
489         case XML_ERR_MIXED_NOT_STARTED:
490             errmsg = "MixedContentDecl : '|' or ')*' expected";
491             break;
492         case XML_ERR_PCDATA_REQUIRED:
493             errmsg = "MixedContentDecl : '#PCDATA' expected";
494             break;
495         case XML_ERR_ELEMCONTENT_NOT_STARTED:
496             errmsg = "ContentDecl : Name or '(' expected";
497             break;
498         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
499             errmsg = "ContentDecl : ',' '|' or ')' expected";
500             break;
501         case XML_ERR_PEREF_IN_INT_SUBSET:
502             errmsg =
503                 "PEReference: forbidden within markup decl in internal subset";
504             break;
505         case XML_ERR_GT_REQUIRED:
506             errmsg = "expected '>'";
507             break;
508         case XML_ERR_CONDSEC_INVALID:
509             errmsg = "XML conditional section '[' expected";
510             break;
511         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
512             errmsg = "Content error in the external subset";
513             break;
514         case XML_ERR_CONDSEC_INVALID_KEYWORD:
515             errmsg =
516                 "conditional section INCLUDE or IGNORE keyword expected";
517             break;
518         case XML_ERR_CONDSEC_NOT_FINISHED:
519             errmsg = "XML conditional section not closed";
520             break;
521         case XML_ERR_XMLDECL_NOT_STARTED:
522             errmsg = "Text declaration '<?xml' required";
523             break;
524         case XML_ERR_XMLDECL_NOT_FINISHED:
525             errmsg = "parsing XML declaration: '?>' expected";
526             break;
527         case XML_ERR_EXT_ENTITY_STANDALONE:
528             errmsg = "external parsed entities cannot be standalone";
529             break;
530         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
531             errmsg = "EntityRef: expecting ';'";
532             break;
533         case XML_ERR_DOCTYPE_NOT_FINISHED:
534             errmsg = "DOCTYPE improperly terminated";
535             break;
536         case XML_ERR_LTSLASH_REQUIRED:
537             errmsg = "EndTag: '</' not found";
538             break;
539         case XML_ERR_EQUAL_REQUIRED:
540             errmsg = "expected '='";
541             break;
542         case XML_ERR_STRING_NOT_CLOSED:
543             errmsg = "String not closed expecting \" or '";
544             break;
545         case XML_ERR_STRING_NOT_STARTED:
546             errmsg = "String not started expecting ' or \"";
547             break;
548         case XML_ERR_ENCODING_NAME:
549             errmsg = "Invalid XML encoding name";
550             break;
551         case XML_ERR_STANDALONE_VALUE:
552             errmsg = "standalone accepts only 'yes' or 'no'";
553             break;
554         case XML_ERR_DOCUMENT_EMPTY:
555             errmsg = "Document is empty";
556             break;
557         case XML_ERR_DOCUMENT_END:
558             errmsg = "Extra content at the end of the document";
559             break;
560         case XML_ERR_NOT_WELL_BALANCED:
561             errmsg = "chunk is not well balanced";
562             break;
563         case XML_ERR_EXTRA_CONTENT:
564             errmsg = "extra content at the end of well balanced chunk";
565             break;
566         case XML_ERR_VERSION_MISSING:
567             errmsg = "Malformed declaration expecting version";
568             break;
569         case XML_ERR_NAME_TOO_LONG:
570             errmsg = "Name too long";
571             break;
572 #if 0
573         case:
574             errmsg = "";
575             break;
576 #endif
577         default:
578             errmsg = "Unregistered error message";
579     }
580     if (ctxt != NULL)
581 	ctxt->errNo = error;
582     if (info == NULL) {
583         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
584                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
585                         errmsg);
586     } else {
587         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
588                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
589                         errmsg, info);
590     }
591     if (ctxt != NULL) {
592 	ctxt->wellFormed = 0;
593 	if (ctxt->recovery == 0)
594 	    ctxt->disableSAX = 1;
595     }
596 }
597 
598 /**
599  * xmlFatalErrMsg:
600  * @ctxt:  an XML parser context
601  * @error:  the error number
602  * @msg:  the error message
603  *
604  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
605  */
606 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)607 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
608                const char *msg)
609 {
610     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
611         (ctxt->instate == XML_PARSER_EOF))
612 	return;
613     if (ctxt != NULL)
614 	ctxt->errNo = error;
615     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
616                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
617     if (ctxt != NULL) {
618 	ctxt->wellFormed = 0;
619 	if (ctxt->recovery == 0)
620 	    ctxt->disableSAX = 1;
621     }
622 }
623 
624 /**
625  * xmlWarningMsg:
626  * @ctxt:  an XML parser context
627  * @error:  the error number
628  * @msg:  the error message
629  * @str1:  extra data
630  * @str2:  extra data
631  *
632  * Handle a warning.
633  */
634 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)635 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
636               const char *msg, const xmlChar *str1, const xmlChar *str2)
637 {
638     xmlStructuredErrorFunc schannel = NULL;
639 
640     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641         (ctxt->instate == XML_PARSER_EOF))
642 	return;
643     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
644         (ctxt->sax->initialized == XML_SAX2_MAGIC))
645         schannel = ctxt->sax->serror;
646     if (ctxt != NULL) {
647         __xmlRaiseError(schannel,
648                     (ctxt->sax) ? ctxt->sax->warning : NULL,
649                     ctxt->userData,
650                     ctxt, NULL, XML_FROM_PARSER, error,
651                     XML_ERR_WARNING, NULL, 0,
652 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
653 		    msg, (const char *) str1, (const char *) str2);
654     } else {
655         __xmlRaiseError(schannel, NULL, NULL,
656                     ctxt, NULL, XML_FROM_PARSER, error,
657                     XML_ERR_WARNING, NULL, 0,
658 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
659 		    msg, (const char *) str1, (const char *) str2);
660     }
661 }
662 
663 /**
664  * xmlValidityError:
665  * @ctxt:  an XML parser context
666  * @error:  the error number
667  * @msg:  the error message
668  * @str1:  extra data
669  *
670  * Handle a validity error.
671  */
672 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)673 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
674               const char *msg, const xmlChar *str1, const xmlChar *str2)
675 {
676     xmlStructuredErrorFunc schannel = NULL;
677 
678     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
679         (ctxt->instate == XML_PARSER_EOF))
680 	return;
681     if (ctxt != NULL) {
682 	ctxt->errNo = error;
683 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
684 	    schannel = ctxt->sax->serror;
685     }
686     if (ctxt != NULL) {
687         __xmlRaiseError(schannel,
688                     ctxt->vctxt.error, ctxt->vctxt.userData,
689                     ctxt, NULL, XML_FROM_DTD, error,
690                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
691 		    (const char *) str2, NULL, 0, 0,
692 		    msg, (const char *) str1, (const char *) str2);
693 	ctxt->valid = 0;
694     } else {
695         __xmlRaiseError(schannel, NULL, NULL,
696                     ctxt, NULL, XML_FROM_DTD, error,
697                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
698 		    (const char *) str2, NULL, 0, 0,
699 		    msg, (const char *) str1, (const char *) str2);
700     }
701 }
702 
703 /**
704  * xmlFatalErrMsgInt:
705  * @ctxt:  an XML parser context
706  * @error:  the error number
707  * @msg:  the error message
708  * @val:  an integer value
709  *
710  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
711  */
712 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)713 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714                   const char *msg, int val)
715 {
716     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
717         (ctxt->instate == XML_PARSER_EOF))
718 	return;
719     if (ctxt != NULL)
720 	ctxt->errNo = error;
721     __xmlRaiseError(NULL, NULL, NULL,
722                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
723                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
724     if (ctxt != NULL) {
725 	ctxt->wellFormed = 0;
726 	if (ctxt->recovery == 0)
727 	    ctxt->disableSAX = 1;
728     }
729 }
730 
731 /**
732  * xmlFatalErrMsgStrIntStr:
733  * @ctxt:  an XML parser context
734  * @error:  the error number
735  * @msg:  the error message
736  * @str1:  an string info
737  * @val:  an integer value
738  * @str2:  an string info
739  *
740  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
741  */
742 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)743 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
744                   const char *msg, const xmlChar *str1, int val,
745 		  const xmlChar *str2)
746 {
747     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
748         (ctxt->instate == XML_PARSER_EOF))
749 	return;
750     if (ctxt != NULL)
751 	ctxt->errNo = error;
752     __xmlRaiseError(NULL, NULL, NULL,
753                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
754                     NULL, 0, (const char *) str1, (const char *) str2,
755 		    NULL, val, 0, msg, str1, val, str2);
756     if (ctxt != NULL) {
757 	ctxt->wellFormed = 0;
758 	if (ctxt->recovery == 0)
759 	    ctxt->disableSAX = 1;
760     }
761 }
762 
763 /**
764  * xmlFatalErrMsgStr:
765  * @ctxt:  an XML parser context
766  * @error:  the error number
767  * @msg:  the error message
768  * @val:  a string value
769  *
770  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
771  */
772 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)773 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
774                   const char *msg, const xmlChar * val)
775 {
776     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
777         (ctxt->instate == XML_PARSER_EOF))
778 	return;
779     if (ctxt != NULL)
780 	ctxt->errNo = error;
781     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
782                     XML_FROM_PARSER, error, XML_ERR_FATAL,
783                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
784                     val);
785     if (ctxt != NULL) {
786 	ctxt->wellFormed = 0;
787 	if (ctxt->recovery == 0)
788 	    ctxt->disableSAX = 1;
789     }
790 }
791 
792 /**
793  * xmlErrMsgStr:
794  * @ctxt:  an XML parser context
795  * @error:  the error number
796  * @msg:  the error message
797  * @val:  a string value
798  *
799  * Handle a non fatal parser error
800  */
801 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)802 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
803                   const char *msg, const xmlChar * val)
804 {
805     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
806         (ctxt->instate == XML_PARSER_EOF))
807 	return;
808     if (ctxt != NULL)
809 	ctxt->errNo = error;
810     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
811                     XML_FROM_PARSER, error, XML_ERR_ERROR,
812                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
813                     val);
814 }
815 
816 /**
817  * xmlNsErr:
818  * @ctxt:  an XML parser context
819  * @error:  the error number
820  * @msg:  the message
821  * @info1:  extra information string
822  * @info2:  extra information string
823  *
824  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
825  */
826 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)827 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
828          const char *msg,
829          const xmlChar * info1, const xmlChar * info2,
830          const xmlChar * info3)
831 {
832     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
833         (ctxt->instate == XML_PARSER_EOF))
834 	return;
835     if (ctxt != NULL)
836 	ctxt->errNo = error;
837     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
838                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
839                     (const char *) info2, (const char *) info3, 0, 0, msg,
840                     info1, info2, info3);
841     if (ctxt != NULL)
842 	ctxt->nsWellFormed = 0;
843 }
844 
845 /**
846  * xmlNsWarn
847  * @ctxt:  an XML parser context
848  * @error:  the error number
849  * @msg:  the message
850  * @info1:  extra information string
851  * @info2:  extra information string
852  *
853  * Handle a namespace warning error
854  */
855 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)856 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
857          const char *msg,
858          const xmlChar * info1, const xmlChar * info2,
859          const xmlChar * info3)
860 {
861     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
862         (ctxt->instate == XML_PARSER_EOF))
863 	return;
864     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
865                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
866                     (const char *) info2, (const char *) info3, 0, 0, msg,
867                     info1, info2, info3);
868 }
869 
870 /************************************************************************
871  *									*
872  *		Library wide options					*
873  *									*
874  ************************************************************************/
875 
876 /**
877   * xmlHasFeature:
878   * @feature: the feature to be examined
879   *
880   * Examines if the library has been compiled with a given feature.
881   *
882   * Returns a non-zero value if the feature exist, otherwise zero.
883   * Returns zero (0) if the feature does not exist or an unknown
884   * unknown feature is requested, non-zero otherwise.
885   */
886 int
xmlHasFeature(xmlFeature feature)887 xmlHasFeature(xmlFeature feature)
888 {
889     switch (feature) {
890 	case XML_WITH_THREAD:
891 #ifdef LIBXML_THREAD_ENABLED
892 	    return(1);
893 #else
894 	    return(0);
895 #endif
896         case XML_WITH_TREE:
897 #ifdef LIBXML_TREE_ENABLED
898             return(1);
899 #else
900             return(0);
901 #endif
902         case XML_WITH_OUTPUT:
903 #ifdef LIBXML_OUTPUT_ENABLED
904             return(1);
905 #else
906             return(0);
907 #endif
908         case XML_WITH_PUSH:
909 #ifdef LIBXML_PUSH_ENABLED
910             return(1);
911 #else
912             return(0);
913 #endif
914         case XML_WITH_READER:
915 #ifdef LIBXML_READER_ENABLED
916             return(1);
917 #else
918             return(0);
919 #endif
920         case XML_WITH_PATTERN:
921 #ifdef LIBXML_PATTERN_ENABLED
922             return(1);
923 #else
924             return(0);
925 #endif
926         case XML_WITH_WRITER:
927 #ifdef LIBXML_WRITER_ENABLED
928             return(1);
929 #else
930             return(0);
931 #endif
932         case XML_WITH_SAX1:
933 #ifdef LIBXML_SAX1_ENABLED
934             return(1);
935 #else
936             return(0);
937 #endif
938         case XML_WITH_FTP:
939 #ifdef LIBXML_FTP_ENABLED
940             return(1);
941 #else
942             return(0);
943 #endif
944         case XML_WITH_HTTP:
945 #ifdef LIBXML_HTTP_ENABLED
946             return(1);
947 #else
948             return(0);
949 #endif
950         case XML_WITH_VALID:
951 #ifdef LIBXML_VALID_ENABLED
952             return(1);
953 #else
954             return(0);
955 #endif
956         case XML_WITH_HTML:
957 #ifdef LIBXML_HTML_ENABLED
958             return(1);
959 #else
960             return(0);
961 #endif
962         case XML_WITH_LEGACY:
963 #ifdef LIBXML_LEGACY_ENABLED
964             return(1);
965 #else
966             return(0);
967 #endif
968         case XML_WITH_C14N:
969 #ifdef LIBXML_C14N_ENABLED
970             return(1);
971 #else
972             return(0);
973 #endif
974         case XML_WITH_CATALOG:
975 #ifdef LIBXML_CATALOG_ENABLED
976             return(1);
977 #else
978             return(0);
979 #endif
980         case XML_WITH_XPATH:
981 #ifdef LIBXML_XPATH_ENABLED
982             return(1);
983 #else
984             return(0);
985 #endif
986         case XML_WITH_XPTR:
987 #ifdef LIBXML_XPTR_ENABLED
988             return(1);
989 #else
990             return(0);
991 #endif
992         case XML_WITH_XINCLUDE:
993 #ifdef LIBXML_XINCLUDE_ENABLED
994             return(1);
995 #else
996             return(0);
997 #endif
998         case XML_WITH_ICONV:
999 #ifdef LIBXML_ICONV_ENABLED
1000             return(1);
1001 #else
1002             return(0);
1003 #endif
1004         case XML_WITH_ISO8859X:
1005 #ifdef LIBXML_ISO8859X_ENABLED
1006             return(1);
1007 #else
1008             return(0);
1009 #endif
1010         case XML_WITH_UNICODE:
1011 #ifdef LIBXML_UNICODE_ENABLED
1012             return(1);
1013 #else
1014             return(0);
1015 #endif
1016         case XML_WITH_REGEXP:
1017 #ifdef LIBXML_REGEXP_ENABLED
1018             return(1);
1019 #else
1020             return(0);
1021 #endif
1022         case XML_WITH_AUTOMATA:
1023 #ifdef LIBXML_AUTOMATA_ENABLED
1024             return(1);
1025 #else
1026             return(0);
1027 #endif
1028         case XML_WITH_EXPR:
1029 #ifdef LIBXML_EXPR_ENABLED
1030             return(1);
1031 #else
1032             return(0);
1033 #endif
1034         case XML_WITH_SCHEMAS:
1035 #ifdef LIBXML_SCHEMAS_ENABLED
1036             return(1);
1037 #else
1038             return(0);
1039 #endif
1040         case XML_WITH_SCHEMATRON:
1041 #ifdef LIBXML_SCHEMATRON_ENABLED
1042             return(1);
1043 #else
1044             return(0);
1045 #endif
1046         case XML_WITH_MODULES:
1047 #ifdef LIBXML_MODULES_ENABLED
1048             return(1);
1049 #else
1050             return(0);
1051 #endif
1052         case XML_WITH_DEBUG:
1053 #ifdef LIBXML_DEBUG_ENABLED
1054             return(1);
1055 #else
1056             return(0);
1057 #endif
1058         case XML_WITH_DEBUG_MEM:
1059 #ifdef DEBUG_MEMORY_LOCATION
1060             return(1);
1061 #else
1062             return(0);
1063 #endif
1064         case XML_WITH_DEBUG_RUN:
1065 #ifdef LIBXML_DEBUG_RUNTIME
1066             return(1);
1067 #else
1068             return(0);
1069 #endif
1070         case XML_WITH_ZLIB:
1071 #ifdef LIBXML_ZLIB_ENABLED
1072             return(1);
1073 #else
1074             return(0);
1075 #endif
1076         case XML_WITH_LZMA:
1077 #ifdef LIBXML_LZMA_ENABLED
1078             return(1);
1079 #else
1080             return(0);
1081 #endif
1082         case XML_WITH_ICU:
1083 #ifdef LIBXML_ICU_ENABLED
1084             return(1);
1085 #else
1086             return(0);
1087 #endif
1088         default:
1089 	    break;
1090      }
1091      return(0);
1092 }
1093 
1094 /************************************************************************
1095  *									*
1096  *		SAX2 defaulted attributes handling			*
1097  *									*
1098  ************************************************************************/
1099 
1100 /**
1101  * xmlDetectSAX2:
1102  * @ctxt:  an XML parser context
1103  *
1104  * Do the SAX2 detection and specific initialization
1105  */
1106 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1107 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1108     xmlSAXHandlerPtr sax;
1109 
1110     /* Avoid unused variable warning if features are disabled. */
1111     (void) sax;
1112 
1113     if (ctxt == NULL) return;
1114     sax = ctxt->sax;
1115 #ifdef LIBXML_SAX1_ENABLED
1116     if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1117         ((sax->startElementNs != NULL) ||
1118          (sax->endElementNs != NULL) ||
1119          ((sax->startElement == NULL) && (sax->endElement == NULL))))
1120         ctxt->sax2 = 1;
1121 #else
1122     ctxt->sax2 = 1;
1123 #endif /* LIBXML_SAX1_ENABLED */
1124 
1125     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1126     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1127     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1128     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1129 		(ctxt->str_xml_ns == NULL)) {
1130         xmlErrMemory(ctxt, NULL);
1131     }
1132 }
1133 
1134 typedef struct _xmlDefAttrs xmlDefAttrs;
1135 typedef xmlDefAttrs *xmlDefAttrsPtr;
1136 struct _xmlDefAttrs {
1137     int nbAttrs;	/* number of defaulted attributes on that element */
1138     int maxAttrs;       /* the size of the array */
1139 #if __STDC_VERSION__ >= 199901L
1140     /* Using a C99 flexible array member avoids UBSan errors. */
1141     const xmlChar *values[]; /* array of localname/prefix/values/external */
1142 #else
1143     const xmlChar *values[5];
1144 #endif
1145 };
1146 
1147 /**
1148  * xmlAttrNormalizeSpace:
1149  * @src: the source string
1150  * @dst: the target string
1151  *
1152  * Normalize the space in non CDATA attribute values:
1153  * If the attribute type is not CDATA, then the XML processor MUST further
1154  * process the normalized attribute value by discarding any leading and
1155  * trailing space (#x20) characters, and by replacing sequences of space
1156  * (#x20) characters by a single space (#x20) character.
1157  * Note that the size of dst need to be at least src, and if one doesn't need
1158  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1159  * passing src as dst is just fine.
1160  *
1161  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1162  *         is needed.
1163  */
1164 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1165 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1166 {
1167     if ((src == NULL) || (dst == NULL))
1168         return(NULL);
1169 
1170     while (*src == 0x20) src++;
1171     while (*src != 0) {
1172 	if (*src == 0x20) {
1173 	    while (*src == 0x20) src++;
1174 	    if (*src != 0)
1175 		*dst++ = 0x20;
1176 	} else {
1177 	    *dst++ = *src++;
1178 	}
1179     }
1180     *dst = 0;
1181     if (dst == src)
1182        return(NULL);
1183     return(dst);
1184 }
1185 
1186 /**
1187  * xmlAttrNormalizeSpace2:
1188  * @src: the source string
1189  *
1190  * Normalize the space in non CDATA attribute values, a slightly more complex
1191  * front end to avoid allocation problems when running on attribute values
1192  * coming from the input.
1193  *
1194  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1195  *         is needed.
1196  */
1197 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1198 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1199 {
1200     int i;
1201     int remove_head = 0;
1202     int need_realloc = 0;
1203     const xmlChar *cur;
1204 
1205     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1206         return(NULL);
1207     i = *len;
1208     if (i <= 0)
1209         return(NULL);
1210 
1211     cur = src;
1212     while (*cur == 0x20) {
1213         cur++;
1214 	remove_head++;
1215     }
1216     while (*cur != 0) {
1217 	if (*cur == 0x20) {
1218 	    cur++;
1219 	    if ((*cur == 0x20) || (*cur == 0)) {
1220 	        need_realloc = 1;
1221 		break;
1222 	    }
1223 	} else
1224 	    cur++;
1225     }
1226     if (need_realloc) {
1227         xmlChar *ret;
1228 
1229 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1230 	if (ret == NULL) {
1231 	    xmlErrMemory(ctxt, NULL);
1232 	    return(NULL);
1233 	}
1234 	xmlAttrNormalizeSpace(ret, ret);
1235 	*len = (int) strlen((const char *)ret);
1236         return(ret);
1237     } else if (remove_head) {
1238         *len -= remove_head;
1239         memmove(src, src + remove_head, 1 + *len);
1240 	return(src);
1241     }
1242     return(NULL);
1243 }
1244 
1245 /**
1246  * xmlAddDefAttrs:
1247  * @ctxt:  an XML parser context
1248  * @fullname:  the element fullname
1249  * @fullattr:  the attribute fullname
1250  * @value:  the attribute value
1251  *
1252  * Add a defaulted attribute for an element
1253  */
1254 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1255 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1256                const xmlChar *fullname,
1257                const xmlChar *fullattr,
1258                const xmlChar *value) {
1259     xmlDefAttrsPtr defaults;
1260     int len;
1261     const xmlChar *name;
1262     const xmlChar *prefix;
1263 
1264     /*
1265      * Allows to detect attribute redefinitions
1266      */
1267     if (ctxt->attsSpecial != NULL) {
1268         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1269 	    return;
1270     }
1271 
1272     if (ctxt->attsDefault == NULL) {
1273         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1274 	if (ctxt->attsDefault == NULL)
1275 	    goto mem_error;
1276     }
1277 
1278     /*
1279      * split the element name into prefix:localname , the string found
1280      * are within the DTD and then not associated to namespace names.
1281      */
1282     name = xmlSplitQName3(fullname, &len);
1283     if (name == NULL) {
1284         name = xmlDictLookup(ctxt->dict, fullname, -1);
1285 	prefix = NULL;
1286     } else {
1287         name = xmlDictLookup(ctxt->dict, name, -1);
1288 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1289     }
1290 
1291     /*
1292      * make sure there is some storage
1293      */
1294     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1295     if (defaults == NULL) {
1296         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1297 	                   (4 * 5) * sizeof(const xmlChar *));
1298 	if (defaults == NULL)
1299 	    goto mem_error;
1300 	defaults->nbAttrs = 0;
1301 	defaults->maxAttrs = 4;
1302 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1303 	                        defaults, NULL) < 0) {
1304 	    xmlFree(defaults);
1305 	    goto mem_error;
1306 	}
1307     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1308         xmlDefAttrsPtr temp;
1309 
1310         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1311 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1312 	if (temp == NULL)
1313 	    goto mem_error;
1314 	defaults = temp;
1315 	defaults->maxAttrs *= 2;
1316 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1317 	                        defaults, NULL) < 0) {
1318 	    xmlFree(defaults);
1319 	    goto mem_error;
1320 	}
1321     }
1322 
1323     /*
1324      * Split the element name into prefix:localname , the string found
1325      * are within the DTD and hen not associated to namespace names.
1326      */
1327     name = xmlSplitQName3(fullattr, &len);
1328     if (name == NULL) {
1329         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1330 	prefix = NULL;
1331     } else {
1332         name = xmlDictLookup(ctxt->dict, name, -1);
1333 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1334     }
1335 
1336     defaults->values[5 * defaults->nbAttrs] = name;
1337     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1338     /* intern the string and precompute the end */
1339     len = xmlStrlen(value);
1340     value = xmlDictLookup(ctxt->dict, value, len);
1341     defaults->values[5 * defaults->nbAttrs + 2] = value;
1342     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1343     if (ctxt->external)
1344         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1345     else
1346         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1347     defaults->nbAttrs++;
1348 
1349     return;
1350 
1351 mem_error:
1352     xmlErrMemory(ctxt, NULL);
1353     return;
1354 }
1355 
1356 /**
1357  * xmlAddSpecialAttr:
1358  * @ctxt:  an XML parser context
1359  * @fullname:  the element fullname
1360  * @fullattr:  the attribute fullname
1361  * @type:  the attribute type
1362  *
1363  * Register this attribute type
1364  */
1365 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1366 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1367 		  const xmlChar *fullname,
1368 		  const xmlChar *fullattr,
1369 		  int type)
1370 {
1371     if (ctxt->attsSpecial == NULL) {
1372         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1373 	if (ctxt->attsSpecial == NULL)
1374 	    goto mem_error;
1375     }
1376 
1377     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1378         return;
1379 
1380     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1381                      (void *) (ptrdiff_t) type);
1382     return;
1383 
1384 mem_error:
1385     xmlErrMemory(ctxt, NULL);
1386     return;
1387 }
1388 
1389 /**
1390  * xmlCleanSpecialAttrCallback:
1391  *
1392  * Removes CDATA attributes from the special attribute table
1393  */
1394 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1395 xmlCleanSpecialAttrCallback(void *payload, void *data,
1396                             const xmlChar *fullname, const xmlChar *fullattr,
1397                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1398     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1399 
1400     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1401         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1402     }
1403 }
1404 
1405 /**
1406  * xmlCleanSpecialAttr:
1407  * @ctxt:  an XML parser context
1408  *
1409  * Trim the list of attributes defined to remove all those of type
1410  * CDATA as they are not special. This call should be done when finishing
1411  * to parse the DTD and before starting to parse the document root.
1412  */
1413 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1414 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1415 {
1416     if (ctxt->attsSpecial == NULL)
1417         return;
1418 
1419     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1420 
1421     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1422         xmlHashFree(ctxt->attsSpecial, NULL);
1423         ctxt->attsSpecial = NULL;
1424     }
1425     return;
1426 }
1427 
1428 /**
1429  * xmlCheckLanguageID:
1430  * @lang:  pointer to the string value
1431  *
1432  * Checks that the value conforms to the LanguageID production:
1433  *
1434  * NOTE: this is somewhat deprecated, those productions were removed from
1435  *       the XML Second edition.
1436  *
1437  * [33] LanguageID ::= Langcode ('-' Subcode)*
1438  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1439  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1440  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1441  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1442  * [38] Subcode ::= ([a-z] | [A-Z])+
1443  *
1444  * The current REC reference the successors of RFC 1766, currently 5646
1445  *
1446  * http://www.rfc-editor.org/rfc/rfc5646.txt
1447  * langtag       = language
1448  *                 ["-" script]
1449  *                 ["-" region]
1450  *                 *("-" variant)
1451  *                 *("-" extension)
1452  *                 ["-" privateuse]
1453  * language      = 2*3ALPHA            ; shortest ISO 639 code
1454  *                 ["-" extlang]       ; sometimes followed by
1455  *                                     ; extended language subtags
1456  *               / 4ALPHA              ; or reserved for future use
1457  *               / 5*8ALPHA            ; or registered language subtag
1458  *
1459  * extlang       = 3ALPHA              ; selected ISO 639 codes
1460  *                 *2("-" 3ALPHA)      ; permanently reserved
1461  *
1462  * script        = 4ALPHA              ; ISO 15924 code
1463  *
1464  * region        = 2ALPHA              ; ISO 3166-1 code
1465  *               / 3DIGIT              ; UN M.49 code
1466  *
1467  * variant       = 5*8alphanum         ; registered variants
1468  *               / (DIGIT 3alphanum)
1469  *
1470  * extension     = singleton 1*("-" (2*8alphanum))
1471  *
1472  *                                     ; Single alphanumerics
1473  *                                     ; "x" reserved for private use
1474  * singleton     = DIGIT               ; 0 - 9
1475  *               / %x41-57             ; A - W
1476  *               / %x59-5A             ; Y - Z
1477  *               / %x61-77             ; a - w
1478  *               / %x79-7A             ; y - z
1479  *
1480  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1481  * The parser below doesn't try to cope with extension or privateuse
1482  * that could be added but that's not interoperable anyway
1483  *
1484  * Returns 1 if correct 0 otherwise
1485  **/
1486 int
xmlCheckLanguageID(const xmlChar * lang)1487 xmlCheckLanguageID(const xmlChar * lang)
1488 {
1489     const xmlChar *cur = lang, *nxt;
1490 
1491     if (cur == NULL)
1492         return (0);
1493     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1494         ((cur[0] == 'I') && (cur[1] == '-')) ||
1495         ((cur[0] == 'x') && (cur[1] == '-')) ||
1496         ((cur[0] == 'X') && (cur[1] == '-'))) {
1497         /*
1498          * Still allow IANA code and user code which were coming
1499          * from the previous version of the XML-1.0 specification
1500          * it's deprecated but we should not fail
1501          */
1502         cur += 2;
1503         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1504                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1505             cur++;
1506         return(cur[0] == 0);
1507     }
1508     nxt = cur;
1509     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1510            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1511            nxt++;
1512     if (nxt - cur >= 4) {
1513         /*
1514          * Reserved
1515          */
1516         if ((nxt - cur > 8) || (nxt[0] != 0))
1517             return(0);
1518         return(1);
1519     }
1520     if (nxt - cur < 2)
1521         return(0);
1522     /* we got an ISO 639 code */
1523     if (nxt[0] == 0)
1524         return(1);
1525     if (nxt[0] != '-')
1526         return(0);
1527 
1528     nxt++;
1529     cur = nxt;
1530     /* now we can have extlang or script or region or variant */
1531     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1532         goto region_m49;
1533 
1534     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1535            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1536            nxt++;
1537     if (nxt - cur == 4)
1538         goto script;
1539     if (nxt - cur == 2)
1540         goto region;
1541     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1542         goto variant;
1543     if (nxt - cur != 3)
1544         return(0);
1545     /* we parsed an extlang */
1546     if (nxt[0] == 0)
1547         return(1);
1548     if (nxt[0] != '-')
1549         return(0);
1550 
1551     nxt++;
1552     cur = nxt;
1553     /* now we can have script or region or variant */
1554     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1555         goto region_m49;
1556 
1557     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1558            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1559            nxt++;
1560     if (nxt - cur == 2)
1561         goto region;
1562     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1563         goto variant;
1564     if (nxt - cur != 4)
1565         return(0);
1566     /* we parsed a script */
1567 script:
1568     if (nxt[0] == 0)
1569         return(1);
1570     if (nxt[0] != '-')
1571         return(0);
1572 
1573     nxt++;
1574     cur = nxt;
1575     /* now we can have region or variant */
1576     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1577         goto region_m49;
1578 
1579     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1580            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1581            nxt++;
1582 
1583     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1584         goto variant;
1585     if (nxt - cur != 2)
1586         return(0);
1587     /* we parsed a region */
1588 region:
1589     if (nxt[0] == 0)
1590         return(1);
1591     if (nxt[0] != '-')
1592         return(0);
1593 
1594     nxt++;
1595     cur = nxt;
1596     /* now we can just have a variant */
1597     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1598            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1599            nxt++;
1600 
1601     if ((nxt - cur < 5) || (nxt - cur > 8))
1602         return(0);
1603 
1604     /* we parsed a variant */
1605 variant:
1606     if (nxt[0] == 0)
1607         return(1);
1608     if (nxt[0] != '-')
1609         return(0);
1610     /* extensions and private use subtags not checked */
1611     return (1);
1612 
1613 region_m49:
1614     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1615         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1616         nxt += 3;
1617         goto region;
1618     }
1619     return(0);
1620 }
1621 
1622 /************************************************************************
1623  *									*
1624  *		Parser stacks related functions and macros		*
1625  *									*
1626  ************************************************************************/
1627 
1628 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1629                                             const xmlChar ** str);
1630 
1631 #ifdef SAX2
1632 /**
1633  * nsPush:
1634  * @ctxt:  an XML parser context
1635  * @prefix:  the namespace prefix or NULL
1636  * @URL:  the namespace name
1637  *
1638  * Pushes a new parser namespace on top of the ns stack
1639  *
1640  * Returns -1 in case of error, -2 if the namespace should be discarded
1641  *	   and the index in the stack otherwise.
1642  */
1643 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1644 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1645 {
1646     if (ctxt->options & XML_PARSE_NSCLEAN) {
1647         int i;
1648 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1649 	    if (ctxt->nsTab[i] == prefix) {
1650 		/* in scope */
1651 	        if (ctxt->nsTab[i + 1] == URL)
1652 		    return(-2);
1653 		/* out of scope keep it */
1654 		break;
1655 	    }
1656 	}
1657     }
1658     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1659 	ctxt->nsMax = 10;
1660 	ctxt->nsNr = 0;
1661 	ctxt->nsTab = (const xmlChar **)
1662 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1663 	if (ctxt->nsTab == NULL) {
1664 	    xmlErrMemory(ctxt, NULL);
1665 	    ctxt->nsMax = 0;
1666             return (-1);
1667 	}
1668     } else if (ctxt->nsNr >= ctxt->nsMax) {
1669         const xmlChar ** tmp;
1670         ctxt->nsMax *= 2;
1671         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1672 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1673         if (tmp == NULL) {
1674             xmlErrMemory(ctxt, NULL);
1675 	    ctxt->nsMax /= 2;
1676             return (-1);
1677         }
1678 	ctxt->nsTab = tmp;
1679     }
1680     ctxt->nsTab[ctxt->nsNr++] = prefix;
1681     ctxt->nsTab[ctxt->nsNr++] = URL;
1682     return (ctxt->nsNr);
1683 }
1684 /**
1685  * nsPop:
1686  * @ctxt: an XML parser context
1687  * @nr:  the number to pop
1688  *
1689  * Pops the top @nr parser prefix/namespace from the ns stack
1690  *
1691  * Returns the number of namespaces removed
1692  */
1693 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1694 nsPop(xmlParserCtxtPtr ctxt, int nr)
1695 {
1696     int i;
1697 
1698     if (ctxt->nsTab == NULL) return(0);
1699     if (ctxt->nsNr < nr) {
1700         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1701         nr = ctxt->nsNr;
1702     }
1703     if (ctxt->nsNr <= 0)
1704         return (0);
1705 
1706     for (i = 0;i < nr;i++) {
1707          ctxt->nsNr--;
1708 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1709     }
1710     return(nr);
1711 }
1712 #endif
1713 
1714 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1715 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1716     const xmlChar **atts;
1717     int *attallocs;
1718     int maxatts;
1719 
1720     if (ctxt->atts == NULL) {
1721 	maxatts = 55; /* allow for 10 attrs by default */
1722 	atts = (const xmlChar **)
1723 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1724 	if (atts == NULL) goto mem_error;
1725 	ctxt->atts = atts;
1726 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1727 	if (attallocs == NULL) goto mem_error;
1728 	ctxt->attallocs = attallocs;
1729 	ctxt->maxatts = maxatts;
1730     } else if (nr + 5 > ctxt->maxatts) {
1731 	maxatts = (nr + 5) * 2;
1732 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1733 				     maxatts * sizeof(const xmlChar *));
1734 	if (atts == NULL) goto mem_error;
1735 	ctxt->atts = atts;
1736 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1737 	                             (maxatts / 5) * sizeof(int));
1738 	if (attallocs == NULL) goto mem_error;
1739 	ctxt->attallocs = attallocs;
1740 	ctxt->maxatts = maxatts;
1741     }
1742     return(ctxt->maxatts);
1743 mem_error:
1744     xmlErrMemory(ctxt, NULL);
1745     return(-1);
1746 }
1747 
1748 /**
1749  * inputPush:
1750  * @ctxt:  an XML parser context
1751  * @value:  the parser input
1752  *
1753  * Pushes a new parser input on top of the input stack
1754  *
1755  * Returns -1 in case of error, the index in the stack otherwise
1756  */
1757 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1758 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1759 {
1760     if ((ctxt == NULL) || (value == NULL))
1761         return(-1);
1762     if (ctxt->inputNr >= ctxt->inputMax) {
1763         ctxt->inputMax *= 2;
1764         ctxt->inputTab =
1765             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1766                                              ctxt->inputMax *
1767                                              sizeof(ctxt->inputTab[0]));
1768         if (ctxt->inputTab == NULL) {
1769             xmlErrMemory(ctxt, NULL);
1770 	    xmlFreeInputStream(value);
1771 	    ctxt->inputMax /= 2;
1772 	    value = NULL;
1773             return (-1);
1774         }
1775     }
1776     ctxt->inputTab[ctxt->inputNr] = value;
1777     ctxt->input = value;
1778     return (ctxt->inputNr++);
1779 }
1780 /**
1781  * inputPop:
1782  * @ctxt: an XML parser context
1783  *
1784  * Pops the top parser input from the input stack
1785  *
1786  * Returns the input just removed
1787  */
1788 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1789 inputPop(xmlParserCtxtPtr ctxt)
1790 {
1791     xmlParserInputPtr ret;
1792 
1793     if (ctxt == NULL)
1794         return(NULL);
1795     if (ctxt->inputNr <= 0)
1796         return (NULL);
1797     ctxt->inputNr--;
1798     if (ctxt->inputNr > 0)
1799         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1800     else
1801         ctxt->input = NULL;
1802     ret = ctxt->inputTab[ctxt->inputNr];
1803     ctxt->inputTab[ctxt->inputNr] = NULL;
1804     return (ret);
1805 }
1806 /**
1807  * nodePush:
1808  * @ctxt:  an XML parser context
1809  * @value:  the element node
1810  *
1811  * Pushes a new element node on top of the node stack
1812  *
1813  * Returns -1 in case of error, the index in the stack otherwise
1814  */
1815 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1816 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1817 {
1818     if (ctxt == NULL) return(0);
1819     if (ctxt->nodeNr >= ctxt->nodeMax) {
1820         xmlNodePtr *tmp;
1821 
1822 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1823                                       ctxt->nodeMax * 2 *
1824                                       sizeof(ctxt->nodeTab[0]));
1825         if (tmp == NULL) {
1826             xmlErrMemory(ctxt, NULL);
1827             return (-1);
1828         }
1829         ctxt->nodeTab = tmp;
1830 	ctxt->nodeMax *= 2;
1831     }
1832     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1833         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1834 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1835 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1836 			  xmlParserMaxDepth);
1837 	xmlHaltParser(ctxt);
1838 	return(-1);
1839     }
1840     ctxt->nodeTab[ctxt->nodeNr] = value;
1841     ctxt->node = value;
1842     return (ctxt->nodeNr++);
1843 }
1844 
1845 /**
1846  * nodePop:
1847  * @ctxt: an XML parser context
1848  *
1849  * Pops the top element node from the node stack
1850  *
1851  * Returns the node just removed
1852  */
1853 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1854 nodePop(xmlParserCtxtPtr ctxt)
1855 {
1856     xmlNodePtr ret;
1857 
1858     if (ctxt == NULL) return(NULL);
1859     if (ctxt->nodeNr <= 0)
1860         return (NULL);
1861     ctxt->nodeNr--;
1862     if (ctxt->nodeNr > 0)
1863         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1864     else
1865         ctxt->node = NULL;
1866     ret = ctxt->nodeTab[ctxt->nodeNr];
1867     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1868     return (ret);
1869 }
1870 
1871 /**
1872  * nameNsPush:
1873  * @ctxt:  an XML parser context
1874  * @value:  the element name
1875  * @prefix:  the element prefix
1876  * @URI:  the element namespace name
1877  * @line:  the current line number for error messages
1878  * @nsNr:  the number of namespaces pushed on the namespace table
1879  *
1880  * Pushes a new element name/prefix/URL on top of the name stack
1881  *
1882  * Returns -1 in case of error, the index in the stack otherwise
1883  */
1884 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)1885 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1886            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1887 {
1888     xmlStartTag *tag;
1889 
1890     if (ctxt->nameNr >= ctxt->nameMax) {
1891         const xmlChar * *tmp;
1892         xmlStartTag *tmp2;
1893         ctxt->nameMax *= 2;
1894         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1895                                     ctxt->nameMax *
1896                                     sizeof(ctxt->nameTab[0]));
1897         if (tmp == NULL) {
1898 	    ctxt->nameMax /= 2;
1899 	    goto mem_error;
1900         }
1901 	ctxt->nameTab = tmp;
1902         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1903                                     ctxt->nameMax *
1904                                     sizeof(ctxt->pushTab[0]));
1905         if (tmp2 == NULL) {
1906 	    ctxt->nameMax /= 2;
1907 	    goto mem_error;
1908         }
1909 	ctxt->pushTab = tmp2;
1910     } else if (ctxt->pushTab == NULL) {
1911         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1912                                             sizeof(ctxt->pushTab[0]));
1913         if (ctxt->pushTab == NULL)
1914             goto mem_error;
1915     }
1916     ctxt->nameTab[ctxt->nameNr] = value;
1917     ctxt->name = value;
1918     tag = &ctxt->pushTab[ctxt->nameNr];
1919     tag->prefix = prefix;
1920     tag->URI = URI;
1921     tag->line = line;
1922     tag->nsNr = nsNr;
1923     return (ctxt->nameNr++);
1924 mem_error:
1925     xmlErrMemory(ctxt, NULL);
1926     return (-1);
1927 }
1928 #ifdef LIBXML_PUSH_ENABLED
1929 /**
1930  * nameNsPop:
1931  * @ctxt: an XML parser context
1932  *
1933  * Pops the top element/prefix/URI name from the name stack
1934  *
1935  * Returns the name just removed
1936  */
1937 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1938 nameNsPop(xmlParserCtxtPtr ctxt)
1939 {
1940     const xmlChar *ret;
1941 
1942     if (ctxt->nameNr <= 0)
1943         return (NULL);
1944     ctxt->nameNr--;
1945     if (ctxt->nameNr > 0)
1946         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1947     else
1948         ctxt->name = NULL;
1949     ret = ctxt->nameTab[ctxt->nameNr];
1950     ctxt->nameTab[ctxt->nameNr] = NULL;
1951     return (ret);
1952 }
1953 #endif /* LIBXML_PUSH_ENABLED */
1954 
1955 /**
1956  * namePush:
1957  * @ctxt:  an XML parser context
1958  * @value:  the element name
1959  *
1960  * Pushes a new element name on top of the name stack
1961  *
1962  * Returns -1 in case of error, the index in the stack otherwise
1963  */
1964 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1965 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1966 {
1967     if (ctxt == NULL) return (-1);
1968 
1969     if (ctxt->nameNr >= ctxt->nameMax) {
1970         const xmlChar * *tmp;
1971         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1972                                     ctxt->nameMax * 2 *
1973                                     sizeof(ctxt->nameTab[0]));
1974         if (tmp == NULL) {
1975 	    goto mem_error;
1976         }
1977 	ctxt->nameTab = tmp;
1978         ctxt->nameMax *= 2;
1979     }
1980     ctxt->nameTab[ctxt->nameNr] = value;
1981     ctxt->name = value;
1982     return (ctxt->nameNr++);
1983 mem_error:
1984     xmlErrMemory(ctxt, NULL);
1985     return (-1);
1986 }
1987 /**
1988  * namePop:
1989  * @ctxt: an XML parser context
1990  *
1991  * Pops the top element name from the name stack
1992  *
1993  * Returns the name just removed
1994  */
1995 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1996 namePop(xmlParserCtxtPtr ctxt)
1997 {
1998     const xmlChar *ret;
1999 
2000     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2001         return (NULL);
2002     ctxt->nameNr--;
2003     if (ctxt->nameNr > 0)
2004         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2005     else
2006         ctxt->name = NULL;
2007     ret = ctxt->nameTab[ctxt->nameNr];
2008     ctxt->nameTab[ctxt->nameNr] = NULL;
2009     return (ret);
2010 }
2011 
spacePush(xmlParserCtxtPtr ctxt,int val)2012 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2013     if (ctxt->spaceNr >= ctxt->spaceMax) {
2014         int *tmp;
2015 
2016 	ctxt->spaceMax *= 2;
2017         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2018 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2019         if (tmp == NULL) {
2020 	    xmlErrMemory(ctxt, NULL);
2021 	    ctxt->spaceMax /=2;
2022 	    return(-1);
2023 	}
2024 	ctxt->spaceTab = tmp;
2025     }
2026     ctxt->spaceTab[ctxt->spaceNr] = val;
2027     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2028     return(ctxt->spaceNr++);
2029 }
2030 
spacePop(xmlParserCtxtPtr ctxt)2031 static int spacePop(xmlParserCtxtPtr ctxt) {
2032     int ret;
2033     if (ctxt->spaceNr <= 0) return(0);
2034     ctxt->spaceNr--;
2035     if (ctxt->spaceNr > 0)
2036 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2037     else
2038         ctxt->space = &ctxt->spaceTab[0];
2039     ret = ctxt->spaceTab[ctxt->spaceNr];
2040     ctxt->spaceTab[ctxt->spaceNr] = -1;
2041     return(ret);
2042 }
2043 
2044 /*
2045  * Macros for accessing the content. Those should be used only by the parser,
2046  * and not exported.
2047  *
2048  * Dirty macros, i.e. one often need to make assumption on the context to
2049  * use them
2050  *
2051  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2052  *           To be used with extreme caution since operations consuming
2053  *           characters may move the input buffer to a different location !
2054  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2055  *           This should be used internally by the parser
2056  *           only to compare to ASCII values otherwise it would break when
2057  *           running with UTF-8 encoding.
2058  *   RAW     same as CUR but in the input buffer, bypass any token
2059  *           extraction that may have been done
2060  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2061  *           to compare on ASCII based substring.
2062  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2063  *           strings without newlines within the parser.
2064  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2065  *           defined char within the parser.
2066  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2067  *
2068  *   NEXT    Skip to the next character, this does the proper decoding
2069  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2070  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2071  *   CUR_CHAR(l) returns the current unicode character (int), set l
2072  *           to the number of xmlChars used for the encoding [0-5].
2073  *   CUR_SCHAR  same but operate on a string instead of the context
2074  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2075  *            the index
2076  *   GROW, SHRINK  handling of input buffers
2077  */
2078 
2079 #define RAW (*ctxt->input->cur)
2080 #define CUR (*ctxt->input->cur)
2081 #define NXT(val) ctxt->input->cur[(val)]
2082 #define CUR_PTR ctxt->input->cur
2083 #define BASE_PTR ctxt->input->base
2084 
2085 #define CMP4( s, c1, c2, c3, c4 ) \
2086   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2087     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2088 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2089   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2090 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2091   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2092 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2093   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2094 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2095   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2096 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2097   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2098     ((unsigned char *) s)[ 8 ] == c9 )
2099 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2100   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2101     ((unsigned char *) s)[ 9 ] == c10 )
2102 
2103 #define SKIP(val) do {							\
2104     ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2105     if (*ctxt->input->cur == 0)						\
2106         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2107   } while (0)
2108 
2109 #define SKIPL(val) do {							\
2110     int skipl;								\
2111     for(skipl=0; skipl<val; skipl++) {					\
2112 	if (*(ctxt->input->cur) == '\n') {				\
2113 	ctxt->input->line++; ctxt->input->col = 1;			\
2114 	} else ctxt->input->col++;					\
2115 	ctxt->input->cur++;						\
2116     }									\
2117     if (*ctxt->input->cur == 0)						\
2118         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2119   } while (0)
2120 
2121 #define SHRINK if ((ctxt->progressive == 0) &&				\
2122 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2123 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2124 	xmlSHRINK (ctxt);
2125 
xmlSHRINK(xmlParserCtxtPtr ctxt)2126 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2127     xmlParserInputShrink(ctxt->input);
2128     if (*ctxt->input->cur == 0)
2129         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2130 }
2131 
2132 #define GROW if ((ctxt->progressive == 0) &&				\
2133 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2134 	xmlGROW (ctxt);
2135 
xmlGROW(xmlParserCtxtPtr ctxt)2136 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2137     ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2138     ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2139 
2140     if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2141          (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2142          ((ctxt->input->buf) &&
2143           (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2144         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2145         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2146         xmlHaltParser(ctxt);
2147 	return;
2148     }
2149     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2150     if ((ctxt->input->cur > ctxt->input->end) ||
2151         (ctxt->input->cur < ctxt->input->base)) {
2152         xmlHaltParser(ctxt);
2153         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2154 	return;
2155     }
2156     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2157         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158 }
2159 
2160 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2161 
2162 #define NEXT xmlNextChar(ctxt)
2163 
2164 #define NEXT1 {								\
2165 	ctxt->input->col++;						\
2166 	ctxt->input->cur++;						\
2167 	if (*ctxt->input->cur == 0)					\
2168 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2169     }
2170 
2171 #define NEXTL(l) do {							\
2172     if (*(ctxt->input->cur) == '\n') {					\
2173 	ctxt->input->line++; ctxt->input->col = 1;			\
2174     } else ctxt->input->col++;						\
2175     ctxt->input->cur += l;				\
2176   } while (0)
2177 
2178 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2179 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2180 
2181 #define COPY_BUF(l,b,i,v)						\
2182     if (l == 1) b[i++] = (xmlChar) v;					\
2183     else i += xmlCopyCharMultiByte(&b[i],v)
2184 
2185 /**
2186  * xmlSkipBlankChars:
2187  * @ctxt:  the XML parser context
2188  *
2189  * skip all blanks character found at that point in the input streams.
2190  * It pops up finished entities in the process if allowable at that point.
2191  *
2192  * Returns the number of space chars skipped
2193  */
2194 
2195 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2196 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2197     int res = 0;
2198 
2199     /*
2200      * It's Okay to use CUR/NEXT here since all the blanks are on
2201      * the ASCII range.
2202      */
2203     if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2204         (ctxt->instate == XML_PARSER_START)) {
2205 	const xmlChar *cur;
2206 	/*
2207 	 * if we are in the document content, go really fast
2208 	 */
2209 	cur = ctxt->input->cur;
2210 	while (IS_BLANK_CH(*cur)) {
2211 	    if (*cur == '\n') {
2212 		ctxt->input->line++; ctxt->input->col = 1;
2213 	    } else {
2214 		ctxt->input->col++;
2215 	    }
2216 	    cur++;
2217 	    res++;
2218 	    if (*cur == 0) {
2219 		ctxt->input->cur = cur;
2220 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2221 		cur = ctxt->input->cur;
2222 	    }
2223 	}
2224 	ctxt->input->cur = cur;
2225     } else {
2226         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2227 
2228 	while (1) {
2229             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2230 		NEXT;
2231 	    } else if (CUR == '%') {
2232                 /*
2233                  * Need to handle support of entities branching here
2234                  */
2235 	        if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2236                     break;
2237 	        xmlParsePEReference(ctxt);
2238             } else if (CUR == 0) {
2239                 if (ctxt->inputNr <= 1)
2240                     break;
2241                 xmlPopInput(ctxt);
2242             } else {
2243                 break;
2244             }
2245 
2246             /*
2247              * Also increase the counter when entering or exiting a PERef.
2248              * The spec says: "When a parameter-entity reference is recognized
2249              * in the DTD and included, its replacement text MUST be enlarged
2250              * by the attachment of one leading and one following space (#x20)
2251              * character."
2252              */
2253 	    res++;
2254         }
2255     }
2256     return(res);
2257 }
2258 
2259 /************************************************************************
2260  *									*
2261  *		Commodity functions to handle entities			*
2262  *									*
2263  ************************************************************************/
2264 
2265 /**
2266  * xmlPopInput:
2267  * @ctxt:  an XML parser context
2268  *
2269  * xmlPopInput: the current input pointed by ctxt->input came to an end
2270  *          pop it and return the next char.
2271  *
2272  * Returns the current xmlChar in the parser context
2273  */
2274 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2275 xmlPopInput(xmlParserCtxtPtr ctxt) {
2276     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2277     if (xmlParserDebugEntities)
2278 	xmlGenericError(xmlGenericErrorContext,
2279 		"Popping input %d\n", ctxt->inputNr);
2280     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2281         (ctxt->instate != XML_PARSER_EOF))
2282         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2283                     "Unfinished entity outside the DTD");
2284     xmlFreeInputStream(inputPop(ctxt));
2285     if (*ctxt->input->cur == 0)
2286         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2287     return(CUR);
2288 }
2289 
2290 /**
2291  * xmlPushInput:
2292  * @ctxt:  an XML parser context
2293  * @input:  an XML parser input fragment (entity, XML fragment ...).
2294  *
2295  * xmlPushInput: switch to a new input stream which is stacked on top
2296  *               of the previous one(s).
2297  * Returns -1 in case of error or the index in the input stack
2298  */
2299 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2300 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2301     int ret;
2302     if (input == NULL) return(-1);
2303 
2304     if (xmlParserDebugEntities) {
2305 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2306 	    xmlGenericError(xmlGenericErrorContext,
2307 		    "%s(%d): ", ctxt->input->filename,
2308 		    ctxt->input->line);
2309 	xmlGenericError(xmlGenericErrorContext,
2310 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2311     }
2312     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2313         (ctxt->inputNr > 1024)) {
2314         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2315         while (ctxt->inputNr > 1)
2316             xmlFreeInputStream(inputPop(ctxt));
2317 	return(-1);
2318     }
2319     ret = inputPush(ctxt, input);
2320     if (ctxt->instate == XML_PARSER_EOF)
2321         return(-1);
2322     GROW;
2323     return(ret);
2324 }
2325 
2326 /**
2327  * xmlParseCharRef:
2328  * @ctxt:  an XML parser context
2329  *
2330  * parse Reference declarations
2331  *
2332  * [66] CharRef ::= '&#' [0-9]+ ';' |
2333  *                  '&#x' [0-9a-fA-F]+ ';'
2334  *
2335  * [ WFC: Legal Character ]
2336  * Characters referred to using character references must match the
2337  * production for Char.
2338  *
2339  * Returns the value parsed (as an int), 0 in case of error
2340  */
2341 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2342 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2343     int val = 0;
2344     int count = 0;
2345 
2346     /*
2347      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2348      */
2349     if ((RAW == '&') && (NXT(1) == '#') &&
2350         (NXT(2) == 'x')) {
2351 	SKIP(3);
2352 	GROW;
2353 	while (RAW != ';') { /* loop blocked by count */
2354 	    if (count++ > 20) {
2355 		count = 0;
2356 		GROW;
2357                 if (ctxt->instate == XML_PARSER_EOF)
2358                     return(0);
2359 	    }
2360 	    if ((RAW >= '0') && (RAW <= '9'))
2361 	        val = val * 16 + (CUR - '0');
2362 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2363 	        val = val * 16 + (CUR - 'a') + 10;
2364 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2365 	        val = val * 16 + (CUR - 'A') + 10;
2366 	    else {
2367 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2368 		val = 0;
2369 		break;
2370 	    }
2371 	    if (val > 0x110000)
2372 	        val = 0x110000;
2373 
2374 	    NEXT;
2375 	    count++;
2376 	}
2377 	if (RAW == ';') {
2378 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2379 	    ctxt->input->col++;
2380 	    ctxt->input->cur++;
2381 	}
2382     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2383 	SKIP(2);
2384 	GROW;
2385 	while (RAW != ';') { /* loop blocked by count */
2386 	    if (count++ > 20) {
2387 		count = 0;
2388 		GROW;
2389                 if (ctxt->instate == XML_PARSER_EOF)
2390                     return(0);
2391 	    }
2392 	    if ((RAW >= '0') && (RAW <= '9'))
2393 	        val = val * 10 + (CUR - '0');
2394 	    else {
2395 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2396 		val = 0;
2397 		break;
2398 	    }
2399 	    if (val > 0x110000)
2400 	        val = 0x110000;
2401 
2402 	    NEXT;
2403 	    count++;
2404 	}
2405 	if (RAW == ';') {
2406 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2407 	    ctxt->input->col++;
2408 	    ctxt->input->cur++;
2409 	}
2410     } else {
2411         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2412     }
2413 
2414     /*
2415      * [ WFC: Legal Character ]
2416      * Characters referred to using character references must match the
2417      * production for Char.
2418      */
2419     if (val >= 0x110000) {
2420         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2421                 "xmlParseCharRef: character reference out of bounds\n",
2422 	        val);
2423     } else if (IS_CHAR(val)) {
2424         return(val);
2425     } else {
2426         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2427                           "xmlParseCharRef: invalid xmlChar value %d\n",
2428 	                  val);
2429     }
2430     return(0);
2431 }
2432 
2433 /**
2434  * xmlParseStringCharRef:
2435  * @ctxt:  an XML parser context
2436  * @str:  a pointer to an index in the string
2437  *
2438  * parse Reference declarations, variant parsing from a string rather
2439  * than an an input flow.
2440  *
2441  * [66] CharRef ::= '&#' [0-9]+ ';' |
2442  *                  '&#x' [0-9a-fA-F]+ ';'
2443  *
2444  * [ WFC: Legal Character ]
2445  * Characters referred to using character references must match the
2446  * production for Char.
2447  *
2448  * Returns the value parsed (as an int), 0 in case of error, str will be
2449  *         updated to the current value of the index
2450  */
2451 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2452 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2453     const xmlChar *ptr;
2454     xmlChar cur;
2455     int val = 0;
2456 
2457     if ((str == NULL) || (*str == NULL)) return(0);
2458     ptr = *str;
2459     cur = *ptr;
2460     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2461 	ptr += 3;
2462 	cur = *ptr;
2463 	while (cur != ';') { /* Non input consuming loop */
2464 	    if ((cur >= '0') && (cur <= '9'))
2465 	        val = val * 16 + (cur - '0');
2466 	    else if ((cur >= 'a') && (cur <= 'f'))
2467 	        val = val * 16 + (cur - 'a') + 10;
2468 	    else if ((cur >= 'A') && (cur <= 'F'))
2469 	        val = val * 16 + (cur - 'A') + 10;
2470 	    else {
2471 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2472 		val = 0;
2473 		break;
2474 	    }
2475 	    if (val > 0x110000)
2476 	        val = 0x110000;
2477 
2478 	    ptr++;
2479 	    cur = *ptr;
2480 	}
2481 	if (cur == ';')
2482 	    ptr++;
2483     } else if  ((cur == '&') && (ptr[1] == '#')){
2484 	ptr += 2;
2485 	cur = *ptr;
2486 	while (cur != ';') { /* Non input consuming loops */
2487 	    if ((cur >= '0') && (cur <= '9'))
2488 	        val = val * 10 + (cur - '0');
2489 	    else {
2490 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2491 		val = 0;
2492 		break;
2493 	    }
2494 	    if (val > 0x110000)
2495 	        val = 0x110000;
2496 
2497 	    ptr++;
2498 	    cur = *ptr;
2499 	}
2500 	if (cur == ';')
2501 	    ptr++;
2502     } else {
2503 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2504 	return(0);
2505     }
2506     *str = ptr;
2507 
2508     /*
2509      * [ WFC: Legal Character ]
2510      * Characters referred to using character references must match the
2511      * production for Char.
2512      */
2513     if (val >= 0x110000) {
2514         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2515                 "xmlParseStringCharRef: character reference out of bounds\n",
2516                 val);
2517     } else if (IS_CHAR(val)) {
2518         return(val);
2519     } else {
2520         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2521 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2522 			  val);
2523     }
2524     return(0);
2525 }
2526 
2527 /**
2528  * xmlParserHandlePEReference:
2529  * @ctxt:  the parser context
2530  *
2531  * [69] PEReference ::= '%' Name ';'
2532  *
2533  * [ WFC: No Recursion ]
2534  * A parsed entity must not contain a recursive
2535  * reference to itself, either directly or indirectly.
2536  *
2537  * [ WFC: Entity Declared ]
2538  * In a document without any DTD, a document with only an internal DTD
2539  * subset which contains no parameter entity references, or a document
2540  * with "standalone='yes'", ...  ... The declaration of a parameter
2541  * entity must precede any reference to it...
2542  *
2543  * [ VC: Entity Declared ]
2544  * In a document with an external subset or external parameter entities
2545  * with "standalone='no'", ...  ... The declaration of a parameter entity
2546  * must precede any reference to it...
2547  *
2548  * [ WFC: In DTD ]
2549  * Parameter-entity references may only appear in the DTD.
2550  * NOTE: misleading but this is handled.
2551  *
2552  * A PEReference may have been detected in the current input stream
2553  * the handling is done accordingly to
2554  *      http://www.w3.org/TR/REC-xml#entproc
2555  * i.e.
2556  *   - Included in literal in entity values
2557  *   - Included as Parameter Entity reference within DTDs
2558  */
2559 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2560 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2561     switch(ctxt->instate) {
2562 	case XML_PARSER_CDATA_SECTION:
2563 	    return;
2564         case XML_PARSER_COMMENT:
2565 	    return;
2566 	case XML_PARSER_START_TAG:
2567 	    return;
2568 	case XML_PARSER_END_TAG:
2569 	    return;
2570         case XML_PARSER_EOF:
2571 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2572 	    return;
2573         case XML_PARSER_PROLOG:
2574 	case XML_PARSER_START:
2575 	case XML_PARSER_MISC:
2576 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2577 	    return;
2578 	case XML_PARSER_ENTITY_DECL:
2579         case XML_PARSER_CONTENT:
2580         case XML_PARSER_ATTRIBUTE_VALUE:
2581         case XML_PARSER_PI:
2582 	case XML_PARSER_SYSTEM_LITERAL:
2583 	case XML_PARSER_PUBLIC_LITERAL:
2584 	    /* we just ignore it there */
2585 	    return;
2586         case XML_PARSER_EPILOG:
2587 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2588 	    return;
2589 	case XML_PARSER_ENTITY_VALUE:
2590 	    /*
2591 	     * NOTE: in the case of entity values, we don't do the
2592 	     *       substitution here since we need the literal
2593 	     *       entity value to be able to save the internal
2594 	     *       subset of the document.
2595 	     *       This will be handled by xmlStringDecodeEntities
2596 	     */
2597 	    return;
2598         case XML_PARSER_DTD:
2599 	    /*
2600 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2601 	     * In the internal DTD subset, parameter-entity references
2602 	     * can occur only where markup declarations can occur, not
2603 	     * within markup declarations.
2604 	     * In that case this is handled in xmlParseMarkupDecl
2605 	     */
2606 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2607 		return;
2608 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2609 		return;
2610             break;
2611         case XML_PARSER_IGNORE:
2612             return;
2613     }
2614 
2615     xmlParsePEReference(ctxt);
2616 }
2617 
2618 /*
2619  * Macro used to grow the current buffer.
2620  * buffer##_size is expected to be a size_t
2621  * mem_error: is expected to handle memory allocation failures
2622  */
2623 #define growBuffer(buffer, n) {						\
2624     xmlChar *tmp;							\
2625     size_t new_size = buffer##_size * 2 + n;                            \
2626     if (new_size < buffer##_size) goto mem_error;                       \
2627     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2628     if (tmp == NULL) goto mem_error;					\
2629     buffer = tmp;							\
2630     buffer##_size = new_size;                                           \
2631 }
2632 
2633 /**
2634  * xmlStringLenDecodeEntities:
2635  * @ctxt:  the parser context
2636  * @str:  the input string
2637  * @len: the string length
2638  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2639  * @end:  an end marker xmlChar, 0 if none
2640  * @end2:  an end marker xmlChar, 0 if none
2641  * @end3:  an end marker xmlChar, 0 if none
2642  *
2643  * Takes a entity string content and process to do the adequate substitutions.
2644  *
2645  * [67] Reference ::= EntityRef | CharRef
2646  *
2647  * [69] PEReference ::= '%' Name ';'
2648  *
2649  * Returns A newly allocated string with the substitution done. The caller
2650  *      must deallocate it !
2651  */
2652 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2653 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2654 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2655     xmlChar *buffer = NULL;
2656     size_t buffer_size = 0;
2657     size_t nbchars = 0;
2658 
2659     xmlChar *current = NULL;
2660     xmlChar *rep = NULL;
2661     const xmlChar *last;
2662     xmlEntityPtr ent;
2663     int c,l;
2664 
2665     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2666 	return(NULL);
2667     last = str + len;
2668 
2669     if (((ctxt->depth > 40) &&
2670          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2671 	(ctxt->depth > 1024)) {
2672 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2673 	return(NULL);
2674     }
2675 
2676     /*
2677      * allocate a translation buffer.
2678      */
2679     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2680     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2681     if (buffer == NULL) goto mem_error;
2682 
2683     /*
2684      * OK loop until we reach one of the ending char or a size limit.
2685      * we are operating on already parsed values.
2686      */
2687     if (str < last)
2688 	c = CUR_SCHAR(str, l);
2689     else
2690         c = 0;
2691     while ((c != 0) && (c != end) && /* non input consuming loop */
2692            (c != end2) && (c != end3) &&
2693            (ctxt->instate != XML_PARSER_EOF)) {
2694 
2695 	if (c == 0) break;
2696         if ((c == '&') && (str[1] == '#')) {
2697 	    int val = xmlParseStringCharRef(ctxt, &str);
2698 	    if (val == 0)
2699                 goto int_error;
2700 	    COPY_BUF(0,buffer,nbchars,val);
2701 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2702 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2703 	    }
2704 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2705 	    if (xmlParserDebugEntities)
2706 		xmlGenericError(xmlGenericErrorContext,
2707 			"String decoding Entity Reference: %.30s\n",
2708 			str);
2709 	    ent = xmlParseStringEntityRef(ctxt, &str);
2710 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2711 	    if (ent != NULL)
2712 	        ctxt->nbentities += ent->checked / 2;
2713 	    if ((ent != NULL) &&
2714 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2715 		if (ent->content != NULL) {
2716 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2717 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2718 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2719 		    }
2720 		} else {
2721 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2722 			    "predefined entity has no content\n");
2723                     goto int_error;
2724 		}
2725 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2726 		ctxt->depth++;
2727 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2728 			                      0, 0, 0);
2729 		ctxt->depth--;
2730 		if (rep == NULL) {
2731                     ent->content[0] = 0;
2732                     goto int_error;
2733                 }
2734 
2735                 current = rep;
2736                 while (*current != 0) { /* non input consuming loop */
2737                     buffer[nbchars++] = *current++;
2738                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2739                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2740                             goto int_error;
2741                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2742                     }
2743                 }
2744                 xmlFree(rep);
2745                 rep = NULL;
2746 	    } else if (ent != NULL) {
2747 		int i = xmlStrlen(ent->name);
2748 		const xmlChar *cur = ent->name;
2749 
2750 		buffer[nbchars++] = '&';
2751 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2752 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2753 		}
2754 		for (;i > 0;i--)
2755 		    buffer[nbchars++] = *cur++;
2756 		buffer[nbchars++] = ';';
2757 	    }
2758 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2759 	    if (xmlParserDebugEntities)
2760 		xmlGenericError(xmlGenericErrorContext,
2761 			"String decoding PE Reference: %.30s\n", str);
2762 	    ent = xmlParseStringPEReference(ctxt, &str);
2763 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2764 	    if (ent != NULL)
2765 	        ctxt->nbentities += ent->checked / 2;
2766 	    if (ent != NULL) {
2767                 if (ent->content == NULL) {
2768 		    /*
2769 		     * Note: external parsed entities will not be loaded,
2770 		     * it is not required for a non-validating parser to
2771 		     * complete external PEReferences coming from the
2772 		     * internal subset
2773 		     */
2774 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2775 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2776 			(ctxt->validate != 0)) {
2777 			xmlLoadEntityContent(ctxt, ent);
2778 		    } else {
2779 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2780 		  "not validating will not read content for PE entity %s\n",
2781 		                      ent->name, NULL);
2782 		    }
2783 		}
2784 		ctxt->depth++;
2785 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2786 			                      0, 0, 0);
2787 		ctxt->depth--;
2788 		if (rep == NULL) {
2789                     if (ent->content != NULL)
2790                         ent->content[0] = 0;
2791                     goto int_error;
2792                 }
2793                 current = rep;
2794                 while (*current != 0) { /* non input consuming loop */
2795                     buffer[nbchars++] = *current++;
2796                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2798                             goto int_error;
2799                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2800                     }
2801                 }
2802                 xmlFree(rep);
2803                 rep = NULL;
2804 	    }
2805 	} else {
2806 	    COPY_BUF(l,buffer,nbchars,c);
2807 	    str += l;
2808 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2809 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2810 	    }
2811 	}
2812 	if (str < last)
2813 	    c = CUR_SCHAR(str, l);
2814 	else
2815 	    c = 0;
2816     }
2817     buffer[nbchars] = 0;
2818     return(buffer);
2819 
2820 mem_error:
2821     xmlErrMemory(ctxt, NULL);
2822 int_error:
2823     if (rep != NULL)
2824         xmlFree(rep);
2825     if (buffer != NULL)
2826         xmlFree(buffer);
2827     return(NULL);
2828 }
2829 
2830 /**
2831  * xmlStringDecodeEntities:
2832  * @ctxt:  the parser context
2833  * @str:  the input string
2834  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2835  * @end:  an end marker xmlChar, 0 if none
2836  * @end2:  an end marker xmlChar, 0 if none
2837  * @end3:  an end marker xmlChar, 0 if none
2838  *
2839  * Takes a entity string content and process to do the adequate substitutions.
2840  *
2841  * [67] Reference ::= EntityRef | CharRef
2842  *
2843  * [69] PEReference ::= '%' Name ';'
2844  *
2845  * Returns A newly allocated string with the substitution done. The caller
2846  *      must deallocate it !
2847  */
2848 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2849 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2850 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2851     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2852     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2853            end, end2, end3));
2854 }
2855 
2856 /************************************************************************
2857  *									*
2858  *		Commodity functions, cleanup needed ?			*
2859  *									*
2860  ************************************************************************/
2861 
2862 /**
2863  * areBlanks:
2864  * @ctxt:  an XML parser context
2865  * @str:  a xmlChar *
2866  * @len:  the size of @str
2867  * @blank_chars: we know the chars are blanks
2868  *
2869  * Is this a sequence of blank chars that one can ignore ?
2870  *
2871  * Returns 1 if ignorable 0 otherwise.
2872  */
2873 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2874 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2875                      int blank_chars) {
2876     int i, ret;
2877     xmlNodePtr lastChild;
2878 
2879     /*
2880      * Don't spend time trying to differentiate them, the same callback is
2881      * used !
2882      */
2883     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2884 	return(0);
2885 
2886     /*
2887      * Check for xml:space value.
2888      */
2889     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2890         (*(ctxt->space) == -2))
2891 	return(0);
2892 
2893     /*
2894      * Check that the string is made of blanks
2895      */
2896     if (blank_chars == 0) {
2897 	for (i = 0;i < len;i++)
2898 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2899     }
2900 
2901     /*
2902      * Look if the element is mixed content in the DTD if available
2903      */
2904     if (ctxt->node == NULL) return(0);
2905     if (ctxt->myDoc != NULL) {
2906 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2907         if (ret == 0) return(1);
2908         if (ret == 1) return(0);
2909     }
2910 
2911     /*
2912      * Otherwise, heuristic :-\
2913      */
2914     if ((RAW != '<') && (RAW != 0xD)) return(0);
2915     if ((ctxt->node->children == NULL) &&
2916 	(RAW == '<') && (NXT(1) == '/')) return(0);
2917 
2918     lastChild = xmlGetLastChild(ctxt->node);
2919     if (lastChild == NULL) {
2920         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2921             (ctxt->node->content != NULL)) return(0);
2922     } else if (xmlNodeIsText(lastChild))
2923         return(0);
2924     else if ((ctxt->node->children != NULL) &&
2925              (xmlNodeIsText(ctxt->node->children)))
2926         return(0);
2927     return(1);
2928 }
2929 
2930 /************************************************************************
2931  *									*
2932  *		Extra stuff for namespace support			*
2933  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2934  *									*
2935  ************************************************************************/
2936 
2937 /**
2938  * xmlSplitQName:
2939  * @ctxt:  an XML parser context
2940  * @name:  an XML parser context
2941  * @prefix:  a xmlChar **
2942  *
2943  * parse an UTF8 encoded XML qualified name string
2944  *
2945  * [NS 5] QName ::= (Prefix ':')? LocalPart
2946  *
2947  * [NS 6] Prefix ::= NCName
2948  *
2949  * [NS 7] LocalPart ::= NCName
2950  *
2951  * Returns the local part, and prefix is updated
2952  *   to get the Prefix if any.
2953  */
2954 
2955 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2956 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2957     xmlChar buf[XML_MAX_NAMELEN + 5];
2958     xmlChar *buffer = NULL;
2959     int len = 0;
2960     int max = XML_MAX_NAMELEN;
2961     xmlChar *ret = NULL;
2962     const xmlChar *cur = name;
2963     int c;
2964 
2965     if (prefix == NULL) return(NULL);
2966     *prefix = NULL;
2967 
2968     if (cur == NULL) return(NULL);
2969 
2970 #ifndef XML_XML_NAMESPACE
2971     /* xml: prefix is not really a namespace */
2972     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2973         (cur[2] == 'l') && (cur[3] == ':'))
2974 	return(xmlStrdup(name));
2975 #endif
2976 
2977     /* nasty but well=formed */
2978     if (cur[0] == ':')
2979 	return(xmlStrdup(name));
2980 
2981     c = *cur++;
2982     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2983 	buf[len++] = c;
2984 	c = *cur++;
2985     }
2986     if (len >= max) {
2987 	/*
2988 	 * Okay someone managed to make a huge name, so he's ready to pay
2989 	 * for the processing speed.
2990 	 */
2991 	max = len * 2;
2992 
2993 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2994 	if (buffer == NULL) {
2995 	    xmlErrMemory(ctxt, NULL);
2996 	    return(NULL);
2997 	}
2998 	memcpy(buffer, buf, len);
2999 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3000 	    if (len + 10 > max) {
3001 	        xmlChar *tmp;
3002 
3003 		max *= 2;
3004 		tmp = (xmlChar *) xmlRealloc(buffer,
3005 						max * sizeof(xmlChar));
3006 		if (tmp == NULL) {
3007 		    xmlFree(buffer);
3008 		    xmlErrMemory(ctxt, NULL);
3009 		    return(NULL);
3010 		}
3011 		buffer = tmp;
3012 	    }
3013 	    buffer[len++] = c;
3014 	    c = *cur++;
3015 	}
3016 	buffer[len] = 0;
3017     }
3018 
3019     if ((c == ':') && (*cur == 0)) {
3020         if (buffer != NULL)
3021 	    xmlFree(buffer);
3022 	*prefix = NULL;
3023 	return(xmlStrdup(name));
3024     }
3025 
3026     if (buffer == NULL)
3027 	ret = xmlStrndup(buf, len);
3028     else {
3029 	ret = buffer;
3030 	buffer = NULL;
3031 	max = XML_MAX_NAMELEN;
3032     }
3033 
3034 
3035     if (c == ':') {
3036 	c = *cur;
3037         *prefix = ret;
3038 	if (c == 0) {
3039 	    return(xmlStrndup(BAD_CAST "", 0));
3040 	}
3041 	len = 0;
3042 
3043 	/*
3044 	 * Check that the first character is proper to start
3045 	 * a new name
3046 	 */
3047 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3048 	      ((c >= 0x41) && (c <= 0x5A)) ||
3049 	      (c == '_') || (c == ':'))) {
3050 	    int l;
3051 	    int first = CUR_SCHAR(cur, l);
3052 
3053 	    if (!IS_LETTER(first) && (first != '_')) {
3054 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3055 			    "Name %s is not XML Namespace compliant\n",
3056 				  name);
3057 	    }
3058 	}
3059 	cur++;
3060 
3061 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3062 	    buf[len++] = c;
3063 	    c = *cur++;
3064 	}
3065 	if (len >= max) {
3066 	    /*
3067 	     * Okay someone managed to make a huge name, so he's ready to pay
3068 	     * for the processing speed.
3069 	     */
3070 	    max = len * 2;
3071 
3072 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3073 	    if (buffer == NULL) {
3074 	        xmlErrMemory(ctxt, NULL);
3075 		return(NULL);
3076 	    }
3077 	    memcpy(buffer, buf, len);
3078 	    while (c != 0) { /* tested bigname2.xml */
3079 		if (len + 10 > max) {
3080 		    xmlChar *tmp;
3081 
3082 		    max *= 2;
3083 		    tmp = (xmlChar *) xmlRealloc(buffer,
3084 						    max * sizeof(xmlChar));
3085 		    if (tmp == NULL) {
3086 			xmlErrMemory(ctxt, NULL);
3087 			xmlFree(buffer);
3088 			return(NULL);
3089 		    }
3090 		    buffer = tmp;
3091 		}
3092 		buffer[len++] = c;
3093 		c = *cur++;
3094 	    }
3095 	    buffer[len] = 0;
3096 	}
3097 
3098 	if (buffer == NULL)
3099 	    ret = xmlStrndup(buf, len);
3100 	else {
3101 	    ret = buffer;
3102 	}
3103     }
3104 
3105     return(ret);
3106 }
3107 
3108 /************************************************************************
3109  *									*
3110  *			The parser itself				*
3111  *	Relates to http://www.w3.org/TR/REC-xml				*
3112  *									*
3113  ************************************************************************/
3114 
3115 /************************************************************************
3116  *									*
3117  *	Routines to parse Name, NCName and NmToken			*
3118  *									*
3119  ************************************************************************/
3120 #ifdef DEBUG
3121 static unsigned long nbParseName = 0;
3122 static unsigned long nbParseNmToken = 0;
3123 static unsigned long nbParseNCName = 0;
3124 static unsigned long nbParseNCNameComplex = 0;
3125 static unsigned long nbParseNameComplex = 0;
3126 static unsigned long nbParseStringName = 0;
3127 #endif
3128 
3129 /*
3130  * The two following functions are related to the change of accepted
3131  * characters for Name and NmToken in the Revision 5 of XML-1.0
3132  * They correspond to the modified production [4] and the new production [4a]
3133  * changes in that revision. Also note that the macros used for the
3134  * productions Letter, Digit, CombiningChar and Extender are not needed
3135  * anymore.
3136  * We still keep compatibility to pre-revision5 parsing semantic if the
3137  * new XML_PARSE_OLD10 option is given to the parser.
3138  */
3139 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3140 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3141     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3142         /*
3143 	 * Use the new checks of production [4] [4a] amd [5] of the
3144 	 * Update 5 of XML-1.0
3145 	 */
3146 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3147 	    (((c >= 'a') && (c <= 'z')) ||
3148 	     ((c >= 'A') && (c <= 'Z')) ||
3149 	     (c == '_') || (c == ':') ||
3150 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3151 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3152 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3153 	     ((c >= 0x370) && (c <= 0x37D)) ||
3154 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3155 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3156 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3157 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3158 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3159 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3160 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3161 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3162 	    return(1);
3163     } else {
3164         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3165 	    return(1);
3166     }
3167     return(0);
3168 }
3169 
3170 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3171 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3172     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3173         /*
3174 	 * Use the new checks of production [4] [4a] amd [5] of the
3175 	 * Update 5 of XML-1.0
3176 	 */
3177 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3178 	    (((c >= 'a') && (c <= 'z')) ||
3179 	     ((c >= 'A') && (c <= 'Z')) ||
3180 	     ((c >= '0') && (c <= '9')) || /* !start */
3181 	     (c == '_') || (c == ':') ||
3182 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3183 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3184 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3185 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3186 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3187 	     ((c >= 0x370) && (c <= 0x37D)) ||
3188 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3189 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3190 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3191 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3192 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3193 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3194 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3195 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3196 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3197 	     return(1);
3198     } else {
3199         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3200             (c == '.') || (c == '-') ||
3201 	    (c == '_') || (c == ':') ||
3202 	    (IS_COMBINING(c)) ||
3203 	    (IS_EXTENDER(c)))
3204 	    return(1);
3205     }
3206     return(0);
3207 }
3208 
3209 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3210                                           int *len, int *alloc, int normalize);
3211 
3212 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3213 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3214     int len = 0, l;
3215     int c;
3216     int count = 0;
3217     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3218                     XML_MAX_TEXT_LENGTH :
3219                     XML_MAX_NAME_LENGTH;
3220 
3221 #ifdef DEBUG
3222     nbParseNameComplex++;
3223 #endif
3224 
3225     /*
3226      * Handler for more complex cases
3227      */
3228     GROW;
3229     if (ctxt->instate == XML_PARSER_EOF)
3230         return(NULL);
3231     c = CUR_CHAR(l);
3232     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3233         /*
3234 	 * Use the new checks of production [4] [4a] amd [5] of the
3235 	 * Update 5 of XML-1.0
3236 	 */
3237 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238 	    (!(((c >= 'a') && (c <= 'z')) ||
3239 	       ((c >= 'A') && (c <= 'Z')) ||
3240 	       (c == '_') || (c == ':') ||
3241 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3242 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3243 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3244 	       ((c >= 0x370) && (c <= 0x37D)) ||
3245 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3246 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3247 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3248 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3249 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3250 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3251 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3252 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3253 	    return(NULL);
3254 	}
3255 	len += l;
3256 	NEXTL(l);
3257 	c = CUR_CHAR(l);
3258 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3259 	       (((c >= 'a') && (c <= 'z')) ||
3260 	        ((c >= 'A') && (c <= 'Z')) ||
3261 	        ((c >= '0') && (c <= '9')) || /* !start */
3262 	        (c == '_') || (c == ':') ||
3263 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3264 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3265 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3266 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3267 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3268 	        ((c >= 0x370) && (c <= 0x37D)) ||
3269 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3270 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3271 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3272 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3273 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3274 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3275 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3276 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3277 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3278 		)) {
3279 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3280 		count = 0;
3281 		GROW;
3282                 if (ctxt->instate == XML_PARSER_EOF)
3283                     return(NULL);
3284 	    }
3285             if (len <= INT_MAX - l)
3286 	        len += l;
3287 	    NEXTL(l);
3288 	    c = CUR_CHAR(l);
3289 	}
3290     } else {
3291 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3292 	    (!IS_LETTER(c) && (c != '_') &&
3293 	     (c != ':'))) {
3294 	    return(NULL);
3295 	}
3296 	len += l;
3297 	NEXTL(l);
3298 	c = CUR_CHAR(l);
3299 
3300 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3301 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3302 		(c == '.') || (c == '-') ||
3303 		(c == '_') || (c == ':') ||
3304 		(IS_COMBINING(c)) ||
3305 		(IS_EXTENDER(c)))) {
3306 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3307 		count = 0;
3308 		GROW;
3309                 if (ctxt->instate == XML_PARSER_EOF)
3310                     return(NULL);
3311 	    }
3312             if (len <= INT_MAX - l)
3313 	        len += l;
3314 	    NEXTL(l);
3315 	    c = CUR_CHAR(l);
3316 	}
3317     }
3318     if (len > maxLength) {
3319         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3320         return(NULL);
3321     }
3322     if (ctxt->input->cur - ctxt->input->base < len) {
3323         /*
3324          * There were a couple of bugs where PERefs lead to to a change
3325          * of the buffer. Check the buffer size to avoid passing an invalid
3326          * pointer to xmlDictLookup.
3327          */
3328         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3329                     "unexpected change of input buffer");
3330         return (NULL);
3331     }
3332     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3333         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3334     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3335 }
3336 
3337 /**
3338  * xmlParseName:
3339  * @ctxt:  an XML parser context
3340  *
3341  * parse an XML name.
3342  *
3343  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3344  *                  CombiningChar | Extender
3345  *
3346  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3347  *
3348  * [6] Names ::= Name (#x20 Name)*
3349  *
3350  * Returns the Name parsed or NULL
3351  */
3352 
3353 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3354 xmlParseName(xmlParserCtxtPtr ctxt) {
3355     const xmlChar *in;
3356     const xmlChar *ret;
3357     size_t count = 0;
3358     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3359                        XML_MAX_TEXT_LENGTH :
3360                        XML_MAX_NAME_LENGTH;
3361 
3362     GROW;
3363 
3364 #ifdef DEBUG
3365     nbParseName++;
3366 #endif
3367 
3368     /*
3369      * Accelerator for simple ASCII names
3370      */
3371     in = ctxt->input->cur;
3372     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 	((*in >= 0x41) && (*in <= 0x5A)) ||
3374 	(*in == '_') || (*in == ':')) {
3375 	in++;
3376 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3379 	       (*in == '_') || (*in == '-') ||
3380 	       (*in == ':') || (*in == '.'))
3381 	    in++;
3382 	if ((*in > 0) && (*in < 0x80)) {
3383 	    count = in - ctxt->input->cur;
3384             if (count > maxLength) {
3385                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386                 return(NULL);
3387             }
3388 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389 	    ctxt->input->cur = in;
3390 	    ctxt->input->col += count;
3391 	    if (ret == NULL)
3392 	        xmlErrMemory(ctxt, NULL);
3393 	    return(ret);
3394 	}
3395     }
3396     /* accelerator for special cases */
3397     return(xmlParseNameComplex(ctxt));
3398 }
3399 
3400 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3401 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402     int len = 0, l;
3403     int c;
3404     int count = 0;
3405     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406                     XML_MAX_TEXT_LENGTH :
3407                     XML_MAX_NAME_LENGTH;
3408     size_t startPosition = 0;
3409 
3410 #ifdef DEBUG
3411     nbParseNCNameComplex++;
3412 #endif
3413 
3414     /*
3415      * Handler for more complex cases
3416      */
3417     GROW;
3418     startPosition = CUR_PTR - BASE_PTR;
3419     c = CUR_CHAR(l);
3420     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3421 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3422 	return(NULL);
3423     }
3424 
3425     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3426 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3427 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3428 	    count = 0;
3429 	    GROW;
3430             if (ctxt->instate == XML_PARSER_EOF)
3431                 return(NULL);
3432 	}
3433         if (len <= INT_MAX - l)
3434 	    len += l;
3435 	NEXTL(l);
3436 	c = CUR_CHAR(l);
3437 	if (c == 0) {
3438 	    count = 0;
3439 	    /*
3440 	     * when shrinking to extend the buffer we really need to preserve
3441 	     * the part of the name we already parsed. Hence rolling back
3442 	     * by current length.
3443 	     */
3444 	    ctxt->input->cur -= l;
3445 	    GROW;
3446             if (ctxt->instate == XML_PARSER_EOF)
3447                 return(NULL);
3448 	    ctxt->input->cur += l;
3449 	    c = CUR_CHAR(l);
3450 	}
3451     }
3452     if (len > maxLength) {
3453         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3454         return(NULL);
3455     }
3456     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3457 }
3458 
3459 /**
3460  * xmlParseNCName:
3461  * @ctxt:  an XML parser context
3462  * @len:  length of the string parsed
3463  *
3464  * parse an XML name.
3465  *
3466  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3467  *                      CombiningChar | Extender
3468  *
3469  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3470  *
3471  * Returns the Name parsed or NULL
3472  */
3473 
3474 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3475 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3476     const xmlChar *in, *e;
3477     const xmlChar *ret;
3478     size_t count = 0;
3479     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3480                        XML_MAX_TEXT_LENGTH :
3481                        XML_MAX_NAME_LENGTH;
3482 
3483 #ifdef DEBUG
3484     nbParseNCName++;
3485 #endif
3486 
3487     /*
3488      * Accelerator for simple ASCII names
3489      */
3490     in = ctxt->input->cur;
3491     e = ctxt->input->end;
3492     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3493 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3494 	 (*in == '_')) && (in < e)) {
3495 	in++;
3496 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3497 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3498 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3499 	        (*in == '_') || (*in == '-') ||
3500 	        (*in == '.')) && (in < e))
3501 	    in++;
3502 	if (in >= e)
3503 	    goto complex;
3504 	if ((*in > 0) && (*in < 0x80)) {
3505 	    count = in - ctxt->input->cur;
3506             if (count > maxLength) {
3507                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3508                 return(NULL);
3509             }
3510 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3511 	    ctxt->input->cur = in;
3512 	    ctxt->input->col += count;
3513 	    if (ret == NULL) {
3514 	        xmlErrMemory(ctxt, NULL);
3515 	    }
3516 	    return(ret);
3517 	}
3518     }
3519 complex:
3520     return(xmlParseNCNameComplex(ctxt));
3521 }
3522 
3523 /**
3524  * xmlParseNameAndCompare:
3525  * @ctxt:  an XML parser context
3526  *
3527  * parse an XML name and compares for match
3528  * (specialized for endtag parsing)
3529  *
3530  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3531  * and the name for mismatch
3532  */
3533 
3534 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3535 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3536     register const xmlChar *cmp = other;
3537     register const xmlChar *in;
3538     const xmlChar *ret;
3539 
3540     GROW;
3541     if (ctxt->instate == XML_PARSER_EOF)
3542         return(NULL);
3543 
3544     in = ctxt->input->cur;
3545     while (*in != 0 && *in == *cmp) {
3546 	++in;
3547 	++cmp;
3548     }
3549     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3550 	/* success */
3551 	ctxt->input->col += in - ctxt->input->cur;
3552 	ctxt->input->cur = in;
3553 	return (const xmlChar*) 1;
3554     }
3555     /* failure (or end of input buffer), check with full function */
3556     ret = xmlParseName (ctxt);
3557     /* strings coming from the dictionary direct compare possible */
3558     if (ret == other) {
3559 	return (const xmlChar*) 1;
3560     }
3561     return ret;
3562 }
3563 
3564 /**
3565  * xmlParseStringName:
3566  * @ctxt:  an XML parser context
3567  * @str:  a pointer to the string pointer (IN/OUT)
3568  *
3569  * parse an XML name.
3570  *
3571  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3572  *                  CombiningChar | Extender
3573  *
3574  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3575  *
3576  * [6] Names ::= Name (#x20 Name)*
3577  *
3578  * Returns the Name parsed or NULL. The @str pointer
3579  * is updated to the current location in the string.
3580  */
3581 
3582 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3583 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3584     xmlChar buf[XML_MAX_NAMELEN + 5];
3585     const xmlChar *cur = *str;
3586     int len = 0, l;
3587     int c;
3588     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3589                     XML_MAX_TEXT_LENGTH :
3590                     XML_MAX_NAME_LENGTH;
3591 
3592 #ifdef DEBUG
3593     nbParseStringName++;
3594 #endif
3595 
3596     c = CUR_SCHAR(cur, l);
3597     if (!xmlIsNameStartChar(ctxt, c)) {
3598 	return(NULL);
3599     }
3600 
3601     COPY_BUF(l,buf,len,c);
3602     cur += l;
3603     c = CUR_SCHAR(cur, l);
3604     while (xmlIsNameChar(ctxt, c)) {
3605 	COPY_BUF(l,buf,len,c);
3606 	cur += l;
3607 	c = CUR_SCHAR(cur, l);
3608 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3609 	    /*
3610 	     * Okay someone managed to make a huge name, so he's ready to pay
3611 	     * for the processing speed.
3612 	     */
3613 	    xmlChar *buffer;
3614 	    int max = len * 2;
3615 
3616 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3617 	    if (buffer == NULL) {
3618 	        xmlErrMemory(ctxt, NULL);
3619 		return(NULL);
3620 	    }
3621 	    memcpy(buffer, buf, len);
3622 	    while (xmlIsNameChar(ctxt, c)) {
3623 		if (len + 10 > max) {
3624 		    xmlChar *tmp;
3625 
3626 		    max *= 2;
3627 		    tmp = (xmlChar *) xmlRealloc(buffer,
3628 			                            max * sizeof(xmlChar));
3629 		    if (tmp == NULL) {
3630 			xmlErrMemory(ctxt, NULL);
3631 			xmlFree(buffer);
3632 			return(NULL);
3633 		    }
3634 		    buffer = tmp;
3635 		}
3636 		COPY_BUF(l,buffer,len,c);
3637 		cur += l;
3638 		c = CUR_SCHAR(cur, l);
3639                 if (len > maxLength) {
3640                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3641                     xmlFree(buffer);
3642                     return(NULL);
3643                 }
3644 	    }
3645 	    buffer[len] = 0;
3646 	    *str = cur;
3647 	    return(buffer);
3648 	}
3649     }
3650     if (len > maxLength) {
3651         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3652         return(NULL);
3653     }
3654     *str = cur;
3655     return(xmlStrndup(buf, len));
3656 }
3657 
3658 /**
3659  * xmlParseNmtoken:
3660  * @ctxt:  an XML parser context
3661  *
3662  * parse an XML Nmtoken.
3663  *
3664  * [7] Nmtoken ::= (NameChar)+
3665  *
3666  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3667  *
3668  * Returns the Nmtoken parsed or NULL
3669  */
3670 
3671 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3672 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3673     xmlChar buf[XML_MAX_NAMELEN + 5];
3674     int len = 0, l;
3675     int c;
3676     int count = 0;
3677     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3678                     XML_MAX_TEXT_LENGTH :
3679                     XML_MAX_NAME_LENGTH;
3680 
3681 #ifdef DEBUG
3682     nbParseNmToken++;
3683 #endif
3684 
3685     GROW;
3686     if (ctxt->instate == XML_PARSER_EOF)
3687         return(NULL);
3688     c = CUR_CHAR(l);
3689 
3690     while (xmlIsNameChar(ctxt, c)) {
3691 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3692 	    count = 0;
3693 	    GROW;
3694 	}
3695 	COPY_BUF(l,buf,len,c);
3696 	NEXTL(l);
3697 	c = CUR_CHAR(l);
3698 	if (c == 0) {
3699 	    count = 0;
3700 	    GROW;
3701 	    if (ctxt->instate == XML_PARSER_EOF)
3702 		return(NULL);
3703             c = CUR_CHAR(l);
3704 	}
3705 	if (len >= XML_MAX_NAMELEN) {
3706 	    /*
3707 	     * Okay someone managed to make a huge token, so he's ready to pay
3708 	     * for the processing speed.
3709 	     */
3710 	    xmlChar *buffer;
3711 	    int max = len * 2;
3712 
3713 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3714 	    if (buffer == NULL) {
3715 	        xmlErrMemory(ctxt, NULL);
3716 		return(NULL);
3717 	    }
3718 	    memcpy(buffer, buf, len);
3719 	    while (xmlIsNameChar(ctxt, c)) {
3720 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3721 		    count = 0;
3722 		    GROW;
3723                     if (ctxt->instate == XML_PARSER_EOF) {
3724                         xmlFree(buffer);
3725                         return(NULL);
3726                     }
3727 		}
3728 		if (len + 10 > max) {
3729 		    xmlChar *tmp;
3730 
3731 		    max *= 2;
3732 		    tmp = (xmlChar *) xmlRealloc(buffer,
3733 			                            max * sizeof(xmlChar));
3734 		    if (tmp == NULL) {
3735 			xmlErrMemory(ctxt, NULL);
3736 			xmlFree(buffer);
3737 			return(NULL);
3738 		    }
3739 		    buffer = tmp;
3740 		}
3741 		COPY_BUF(l,buffer,len,c);
3742 		NEXTL(l);
3743 		c = CUR_CHAR(l);
3744                 if (len > maxLength) {
3745                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3746                     xmlFree(buffer);
3747                     return(NULL);
3748                 }
3749 	    }
3750 	    buffer[len] = 0;
3751 	    return(buffer);
3752 	}
3753     }
3754     if (len == 0)
3755         return(NULL);
3756     if (len > maxLength) {
3757         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3758         return(NULL);
3759     }
3760     return(xmlStrndup(buf, len));
3761 }
3762 
3763 /**
3764  * xmlParseEntityValue:
3765  * @ctxt:  an XML parser context
3766  * @orig:  if non-NULL store a copy of the original entity value
3767  *
3768  * parse a value for ENTITY declarations
3769  *
3770  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3772  *
3773  * Returns the EntityValue parsed with reference substituted or NULL
3774  */
3775 
3776 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3777 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778     xmlChar *buf = NULL;
3779     int len = 0;
3780     int size = XML_PARSER_BUFFER_SIZE;
3781     int c, l;
3782     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783                     XML_MAX_HUGE_LENGTH :
3784                     XML_MAX_TEXT_LENGTH;
3785     xmlChar stop;
3786     xmlChar *ret = NULL;
3787     const xmlChar *cur = NULL;
3788     xmlParserInputPtr input;
3789 
3790     if (RAW == '"') stop = '"';
3791     else if (RAW == '\'') stop = '\'';
3792     else {
3793 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794 	return(NULL);
3795     }
3796     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3797     if (buf == NULL) {
3798 	xmlErrMemory(ctxt, NULL);
3799 	return(NULL);
3800     }
3801 
3802     /*
3803      * The content of the entity definition is copied in a buffer.
3804      */
3805 
3806     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807     input = ctxt->input;
3808     GROW;
3809     if (ctxt->instate == XML_PARSER_EOF)
3810         goto error;
3811     NEXT;
3812     c = CUR_CHAR(l);
3813     /*
3814      * NOTE: 4.4.5 Included in Literal
3815      * When a parameter entity reference appears in a literal entity
3816      * value, ... a single or double quote character in the replacement
3817      * text is always treated as a normal data character and will not
3818      * terminate the literal.
3819      * In practice it means we stop the loop only when back at parsing
3820      * the initial entity and the quote is found
3821      */
3822     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824 	if (len + 5 >= size) {
3825 	    xmlChar *tmp;
3826 
3827 	    size *= 2;
3828 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3829 	    if (tmp == NULL) {
3830 		xmlErrMemory(ctxt, NULL);
3831                 goto error;
3832 	    }
3833 	    buf = tmp;
3834 	}
3835 	COPY_BUF(l,buf,len,c);
3836 	NEXTL(l);
3837 
3838 	GROW;
3839 	c = CUR_CHAR(l);
3840 	if (c == 0) {
3841 	    GROW;
3842 	    c = CUR_CHAR(l);
3843 	}
3844 
3845         if (len > maxLength) {
3846             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847                            "entity value too long\n");
3848             goto error;
3849         }
3850     }
3851     buf[len] = 0;
3852     if (ctxt->instate == XML_PARSER_EOF)
3853         goto error;
3854     if (c != stop) {
3855         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856         goto error;
3857     }
3858     NEXT;
3859 
3860     /*
3861      * Raise problem w.r.t. '&' and '%' being used in non-entities
3862      * reference constructs. Note Charref will be handled in
3863      * xmlStringDecodeEntities()
3864      */
3865     cur = buf;
3866     while (*cur != 0) { /* non input consuming */
3867 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868 	    xmlChar *name;
3869 	    xmlChar tmp = *cur;
3870             int nameOk = 0;
3871 
3872 	    cur++;
3873 	    name = xmlParseStringName(ctxt, &cur);
3874             if (name != NULL) {
3875                 nameOk = 1;
3876                 xmlFree(name);
3877             }
3878             if ((nameOk == 0) || (*cur != ';')) {
3879 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880 	    "EntityValue: '%c' forbidden except for entities references\n",
3881 	                          tmp);
3882                 goto error;
3883 	    }
3884 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885 		(ctxt->inputNr == 1)) {
3886 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887                 goto error;
3888 	    }
3889 	    if (*cur == 0)
3890 	        break;
3891 	}
3892 	cur++;
3893     }
3894 
3895     /*
3896      * Then PEReference entities are substituted.
3897      *
3898      * NOTE: 4.4.7 Bypassed
3899      * When a general entity reference appears in the EntityValue in
3900      * an entity declaration, it is bypassed and left as is.
3901      * so XML_SUBSTITUTE_REF is not set here.
3902      */
3903     ++ctxt->depth;
3904     ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905                                   0, 0, 0);
3906     --ctxt->depth;
3907     if (orig != NULL) {
3908         *orig = buf;
3909         buf = NULL;
3910     }
3911 
3912 error:
3913     if (buf != NULL)
3914         xmlFree(buf);
3915     return(ret);
3916 }
3917 
3918 /**
3919  * xmlParseAttValueComplex:
3920  * @ctxt:  an XML parser context
3921  * @len:   the resulting attribute len
3922  * @normalize:  whether to apply the inner normalization
3923  *
3924  * parse a value for an attribute, this is the fallback function
3925  * of xmlParseAttValue() when the attribute parsing requires handling
3926  * of non-ASCII characters, or normalization compaction.
3927  *
3928  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929  */
3930 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3931 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932     xmlChar limit = 0;
3933     xmlChar *buf = NULL;
3934     xmlChar *rep = NULL;
3935     size_t len = 0;
3936     size_t buf_size = 0;
3937     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938                        XML_MAX_HUGE_LENGTH :
3939                        XML_MAX_TEXT_LENGTH;
3940     int c, l, in_space = 0;
3941     xmlChar *current = NULL;
3942     xmlEntityPtr ent;
3943 
3944     if (NXT(0) == '"') {
3945 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946 	limit = '"';
3947         NEXT;
3948     } else if (NXT(0) == '\'') {
3949 	limit = '\'';
3950 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951         NEXT;
3952     } else {
3953 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954 	return(NULL);
3955     }
3956 
3957     /*
3958      * allocate a translation buffer.
3959      */
3960     buf_size = XML_PARSER_BUFFER_SIZE;
3961     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962     if (buf == NULL) goto mem_error;
3963 
3964     /*
3965      * OK loop until we reach one of the ending char or a size limit.
3966      */
3967     c = CUR_CHAR(l);
3968     while (((NXT(0) != limit) && /* checked */
3969             (IS_CHAR(c)) && (c != '<')) &&
3970             (ctxt->instate != XML_PARSER_EOF)) {
3971 	if (c == '&') {
3972 	    in_space = 0;
3973 	    if (NXT(1) == '#') {
3974 		int val = xmlParseCharRef(ctxt);
3975 
3976 		if (val == '&') {
3977 		    if (ctxt->replaceEntities) {
3978 			if (len + 10 > buf_size) {
3979 			    growBuffer(buf, 10);
3980 			}
3981 			buf[len++] = '&';
3982 		    } else {
3983 			/*
3984 			 * The reparsing will be done in xmlStringGetNodeList()
3985 			 * called by the attribute() function in SAX.c
3986 			 */
3987 			if (len + 10 > buf_size) {
3988 			    growBuffer(buf, 10);
3989 			}
3990 			buf[len++] = '&';
3991 			buf[len++] = '#';
3992 			buf[len++] = '3';
3993 			buf[len++] = '8';
3994 			buf[len++] = ';';
3995 		    }
3996 		} else if (val != 0) {
3997 		    if (len + 10 > buf_size) {
3998 			growBuffer(buf, 10);
3999 		    }
4000 		    len += xmlCopyChar(0, &buf[len], val);
4001 		}
4002 	    } else {
4003 		ent = xmlParseEntityRef(ctxt);
4004 		ctxt->nbentities++;
4005 		if (ent != NULL)
4006 		    ctxt->nbentities += ent->owner;
4007 		if ((ent != NULL) &&
4008 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009 		    if (len + 10 > buf_size) {
4010 			growBuffer(buf, 10);
4011 		    }
4012 		    if ((ctxt->replaceEntities == 0) &&
4013 		        (ent->content[0] == '&')) {
4014 			buf[len++] = '&';
4015 			buf[len++] = '#';
4016 			buf[len++] = '3';
4017 			buf[len++] = '8';
4018 			buf[len++] = ';';
4019 		    } else {
4020 			buf[len++] = ent->content[0];
4021 		    }
4022 		} else if ((ent != NULL) &&
4023 		           (ctxt->replaceEntities != 0)) {
4024 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025 			++ctxt->depth;
4026 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4027 						      XML_SUBSTITUTE_REF,
4028 						      0, 0, 0);
4029 			--ctxt->depth;
4030 			if (rep != NULL) {
4031 			    current = rep;
4032 			    while (*current != 0) { /* non input consuming */
4033                                 if ((*current == 0xD) || (*current == 0xA) ||
4034                                     (*current == 0x9)) {
4035                                     buf[len++] = 0x20;
4036                                     current++;
4037                                 } else
4038                                     buf[len++] = *current++;
4039 				if (len + 10 > buf_size) {
4040 				    growBuffer(buf, 10);
4041 				}
4042 			    }
4043 			    xmlFree(rep);
4044 			    rep = NULL;
4045 			}
4046 		    } else {
4047 			if (len + 10 > buf_size) {
4048 			    growBuffer(buf, 10);
4049 			}
4050 			if (ent->content != NULL)
4051 			    buf[len++] = ent->content[0];
4052 		    }
4053 		} else if (ent != NULL) {
4054 		    int i = xmlStrlen(ent->name);
4055 		    const xmlChar *cur = ent->name;
4056 
4057 		    /*
4058 		     * This may look absurd but is needed to detect
4059 		     * entities problems
4060 		     */
4061 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062 			(ent->content != NULL) && (ent->checked == 0)) {
4063 			unsigned long oldnbent = ctxt->nbentities, diff;
4064 
4065 			++ctxt->depth;
4066 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4067 						  XML_SUBSTITUTE_REF, 0, 0, 0);
4068 			--ctxt->depth;
4069 
4070                         diff = ctxt->nbentities - oldnbent + 1;
4071                         if (diff > INT_MAX / 2)
4072                             diff = INT_MAX / 2;
4073                         ent->checked = diff * 2;
4074 			if (rep != NULL) {
4075 			    if (xmlStrchr(rep, '<'))
4076 			        ent->checked |= 1;
4077 			    xmlFree(rep);
4078 			    rep = NULL;
4079 			} else {
4080                             ent->content[0] = 0;
4081                         }
4082 		    }
4083 
4084 		    /*
4085 		     * Just output the reference
4086 		     */
4087 		    buf[len++] = '&';
4088 		    while (len + i + 10 > buf_size) {
4089 			growBuffer(buf, i + 10);
4090 		    }
4091 		    for (;i > 0;i--)
4092 			buf[len++] = *cur++;
4093 		    buf[len++] = ';';
4094 		}
4095 	    }
4096 	} else {
4097 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098 	        if ((len != 0) || (!normalize)) {
4099 		    if ((!normalize) || (!in_space)) {
4100 			COPY_BUF(l,buf,len,0x20);
4101 			while (len + 10 > buf_size) {
4102 			    growBuffer(buf, 10);
4103 			}
4104 		    }
4105 		    in_space = 1;
4106 		}
4107 	    } else {
4108 	        in_space = 0;
4109 		COPY_BUF(l,buf,len,c);
4110 		if (len + 10 > buf_size) {
4111 		    growBuffer(buf, 10);
4112 		}
4113 	    }
4114 	    NEXTL(l);
4115 	}
4116 	GROW;
4117 	c = CUR_CHAR(l);
4118         if (len > maxLength) {
4119             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120                            "AttValue length too long\n");
4121             goto mem_error;
4122         }
4123     }
4124     if (ctxt->instate == XML_PARSER_EOF)
4125         goto error;
4126 
4127     if ((in_space) && (normalize)) {
4128         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129     }
4130     buf[len] = 0;
4131     if (RAW == '<') {
4132 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133     } else if (RAW != limit) {
4134 	if ((c != 0) && (!IS_CHAR(c))) {
4135 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136 			   "invalid character in attribute value\n");
4137 	} else {
4138 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139 			   "AttValue: ' expected\n");
4140         }
4141     } else
4142 	NEXT;
4143 
4144     if (attlen != NULL) *attlen = (int) len;
4145     return(buf);
4146 
4147 mem_error:
4148     xmlErrMemory(ctxt, NULL);
4149 error:
4150     if (buf != NULL)
4151         xmlFree(buf);
4152     if (rep != NULL)
4153         xmlFree(rep);
4154     return(NULL);
4155 }
4156 
4157 /**
4158  * xmlParseAttValue:
4159  * @ctxt:  an XML parser context
4160  *
4161  * parse a value for an attribute
4162  * Note: the parser won't do substitution of entities here, this
4163  * will be handled later in xmlStringGetNodeList
4164  *
4165  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4166  *                   "'" ([^<&'] | Reference)* "'"
4167  *
4168  * 3.3.3 Attribute-Value Normalization:
4169  * Before the value of an attribute is passed to the application or
4170  * checked for validity, the XML processor must normalize it as follows:
4171  * - a character reference is processed by appending the referenced
4172  *   character to the attribute value
4173  * - an entity reference is processed by recursively processing the
4174  *   replacement text of the entity
4175  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4176  *   appending #x20 to the normalized value, except that only a single
4177  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4178  *   parsed entity or the literal entity value of an internal parsed entity
4179  * - other characters are processed by appending them to the normalized value
4180  * If the declared value is not CDATA, then the XML processor must further
4181  * process the normalized attribute value by discarding any leading and
4182  * trailing space (#x20) characters, and by replacing sequences of space
4183  * (#x20) characters by a single space (#x20) character.
4184  * All attributes for which no declaration has been read should be treated
4185  * by a non-validating parser as if declared CDATA.
4186  *
4187  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4188  */
4189 
4190 
4191 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4192 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4193     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4194     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4195 }
4196 
4197 /**
4198  * xmlParseSystemLiteral:
4199  * @ctxt:  an XML parser context
4200  *
4201  * parse an XML Literal
4202  *
4203  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4204  *
4205  * Returns the SystemLiteral parsed or NULL
4206  */
4207 
4208 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4209 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4210     xmlChar *buf = NULL;
4211     int len = 0;
4212     int size = XML_PARSER_BUFFER_SIZE;
4213     int cur, l;
4214     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4215                     XML_MAX_TEXT_LENGTH :
4216                     XML_MAX_NAME_LENGTH;
4217     xmlChar stop;
4218     int state = ctxt->instate;
4219     int count = 0;
4220 
4221     SHRINK;
4222     if (RAW == '"') {
4223         NEXT;
4224 	stop = '"';
4225     } else if (RAW == '\'') {
4226         NEXT;
4227 	stop = '\'';
4228     } else {
4229 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4230 	return(NULL);
4231     }
4232 
4233     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4234     if (buf == NULL) {
4235         xmlErrMemory(ctxt, NULL);
4236 	return(NULL);
4237     }
4238     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4239     cur = CUR_CHAR(l);
4240     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4241 	if (len + 5 >= size) {
4242 	    xmlChar *tmp;
4243 
4244 	    size *= 2;
4245 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4246 	    if (tmp == NULL) {
4247 	        xmlFree(buf);
4248 		xmlErrMemory(ctxt, NULL);
4249 		ctxt->instate = (xmlParserInputState) state;
4250 		return(NULL);
4251 	    }
4252 	    buf = tmp;
4253 	}
4254 	count++;
4255 	if (count > 50) {
4256 	    SHRINK;
4257 	    GROW;
4258 	    count = 0;
4259             if (ctxt->instate == XML_PARSER_EOF) {
4260 	        xmlFree(buf);
4261 		return(NULL);
4262             }
4263 	}
4264 	COPY_BUF(l,buf,len,cur);
4265 	NEXTL(l);
4266 	cur = CUR_CHAR(l);
4267 	if (cur == 0) {
4268 	    GROW;
4269 	    SHRINK;
4270 	    cur = CUR_CHAR(l);
4271 	}
4272         if (len > maxLength) {
4273             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4274             xmlFree(buf);
4275             ctxt->instate = (xmlParserInputState) state;
4276             return(NULL);
4277         }
4278     }
4279     buf[len] = 0;
4280     ctxt->instate = (xmlParserInputState) state;
4281     if (!IS_CHAR(cur)) {
4282 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4283     } else {
4284 	NEXT;
4285     }
4286     return(buf);
4287 }
4288 
4289 /**
4290  * xmlParsePubidLiteral:
4291  * @ctxt:  an XML parser context
4292  *
4293  * parse an XML public literal
4294  *
4295  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4296  *
4297  * Returns the PubidLiteral parsed or NULL.
4298  */
4299 
4300 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4301 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4302     xmlChar *buf = NULL;
4303     int len = 0;
4304     int size = XML_PARSER_BUFFER_SIZE;
4305     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4306                     XML_MAX_TEXT_LENGTH :
4307                     XML_MAX_NAME_LENGTH;
4308     xmlChar cur;
4309     xmlChar stop;
4310     int count = 0;
4311     xmlParserInputState oldstate = ctxt->instate;
4312 
4313     SHRINK;
4314     if (RAW == '"') {
4315         NEXT;
4316 	stop = '"';
4317     } else if (RAW == '\'') {
4318         NEXT;
4319 	stop = '\'';
4320     } else {
4321 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4322 	return(NULL);
4323     }
4324     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4325     if (buf == NULL) {
4326 	xmlErrMemory(ctxt, NULL);
4327 	return(NULL);
4328     }
4329     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4330     cur = CUR;
4331     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4332 	if (len + 1 >= size) {
4333 	    xmlChar *tmp;
4334 
4335 	    size *= 2;
4336 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4337 	    if (tmp == NULL) {
4338 		xmlErrMemory(ctxt, NULL);
4339 		xmlFree(buf);
4340 		return(NULL);
4341 	    }
4342 	    buf = tmp;
4343 	}
4344 	buf[len++] = cur;
4345 	count++;
4346 	if (count > 50) {
4347 	    SHRINK;
4348 	    GROW;
4349 	    count = 0;
4350             if (ctxt->instate == XML_PARSER_EOF) {
4351 		xmlFree(buf);
4352 		return(NULL);
4353             }
4354 	}
4355 	NEXT;
4356 	cur = CUR;
4357 	if (cur == 0) {
4358 	    GROW;
4359 	    SHRINK;
4360 	    cur = CUR;
4361 	}
4362         if (len > maxLength) {
4363             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4364             xmlFree(buf);
4365             return(NULL);
4366         }
4367     }
4368     buf[len] = 0;
4369     if (cur != stop) {
4370 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4371     } else {
4372 	NEXT;
4373     }
4374     ctxt->instate = oldstate;
4375     return(buf);
4376 }
4377 
4378 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4379 
4380 /*
4381  * used for the test in the inner loop of the char data testing
4382  */
4383 static const unsigned char test_char_data[256] = {
4384     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4386     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4389     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4390     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4391     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4392     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4393     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4394     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4395     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4396     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4397     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4398     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4399     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4400     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4401     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4416 };
4417 
4418 /**
4419  * xmlParseCharData:
4420  * @ctxt:  an XML parser context
4421  * @cdata:  int indicating whether we are within a CDATA section
4422  *
4423  * parse a CharData section.
4424  * if we are within a CDATA section ']]>' marks an end of section.
4425  *
4426  * The right angle bracket (>) may be represented using the string "&gt;",
4427  * and must, for compatibility, be escaped using "&gt;" or a character
4428  * reference when it appears in the string "]]>" in content, when that
4429  * string is not marking the end of a CDATA section.
4430  *
4431  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4432  */
4433 
4434 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4435 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4436     const xmlChar *in;
4437     int nbchar = 0;
4438     int line = ctxt->input->line;
4439     int col = ctxt->input->col;
4440     int ccol;
4441 
4442     SHRINK;
4443     GROW;
4444     /*
4445      * Accelerated common case where input don't need to be
4446      * modified before passing it to the handler.
4447      */
4448     if (!cdata) {
4449 	in = ctxt->input->cur;
4450 	do {
4451 get_more_space:
4452 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4453 	    if (*in == 0xA) {
4454 		do {
4455 		    ctxt->input->line++; ctxt->input->col = 1;
4456 		    in++;
4457 		} while (*in == 0xA);
4458 		goto get_more_space;
4459 	    }
4460 	    if (*in == '<') {
4461 		nbchar = in - ctxt->input->cur;
4462 		if (nbchar > 0) {
4463 		    const xmlChar *tmp = ctxt->input->cur;
4464 		    ctxt->input->cur = in;
4465 
4466 		    if ((ctxt->sax != NULL) &&
4467 		        (ctxt->sax->ignorableWhitespace !=
4468 		         ctxt->sax->characters)) {
4469 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4470 			    if (ctxt->sax->ignorableWhitespace != NULL)
4471 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4472 						       tmp, nbchar);
4473 			} else {
4474 			    if (ctxt->sax->characters != NULL)
4475 				ctxt->sax->characters(ctxt->userData,
4476 						      tmp, nbchar);
4477 			    if (*ctxt->space == -1)
4478 			        *ctxt->space = -2;
4479 			}
4480 		    } else if ((ctxt->sax != NULL) &&
4481 		               (ctxt->sax->characters != NULL)) {
4482 			ctxt->sax->characters(ctxt->userData,
4483 					      tmp, nbchar);
4484 		    }
4485 		}
4486 		return;
4487 	    }
4488 
4489 get_more:
4490             ccol = ctxt->input->col;
4491 	    while (test_char_data[*in]) {
4492 		in++;
4493 		ccol++;
4494 	    }
4495 	    ctxt->input->col = ccol;
4496 	    if (*in == 0xA) {
4497 		do {
4498 		    ctxt->input->line++; ctxt->input->col = 1;
4499 		    in++;
4500 		} while (*in == 0xA);
4501 		goto get_more;
4502 	    }
4503 	    if (*in == ']') {
4504 		if ((in[1] == ']') && (in[2] == '>')) {
4505 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4506 		    ctxt->input->cur = in + 1;
4507 		    return;
4508 		}
4509 		in++;
4510 		ctxt->input->col++;
4511 		goto get_more;
4512 	    }
4513 	    nbchar = in - ctxt->input->cur;
4514 	    if (nbchar > 0) {
4515 		if ((ctxt->sax != NULL) &&
4516 		    (ctxt->sax->ignorableWhitespace !=
4517 		     ctxt->sax->characters) &&
4518 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4519 		    const xmlChar *tmp = ctxt->input->cur;
4520 		    ctxt->input->cur = in;
4521 
4522 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4523 		        if (ctxt->sax->ignorableWhitespace != NULL)
4524 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4525 							   tmp, nbchar);
4526 		    } else {
4527 		        if (ctxt->sax->characters != NULL)
4528 			    ctxt->sax->characters(ctxt->userData,
4529 						  tmp, nbchar);
4530 			if (*ctxt->space == -1)
4531 			    *ctxt->space = -2;
4532 		    }
4533                     line = ctxt->input->line;
4534                     col = ctxt->input->col;
4535 		} else if (ctxt->sax != NULL) {
4536 		    if (ctxt->sax->characters != NULL)
4537 			ctxt->sax->characters(ctxt->userData,
4538 					      ctxt->input->cur, nbchar);
4539                     line = ctxt->input->line;
4540                     col = ctxt->input->col;
4541 		}
4542                 /* something really bad happened in the SAX callback */
4543                 if (ctxt->instate != XML_PARSER_CONTENT)
4544                     return;
4545 	    }
4546 	    ctxt->input->cur = in;
4547 	    if (*in == 0xD) {
4548 		in++;
4549 		if (*in == 0xA) {
4550 		    ctxt->input->cur = in;
4551 		    in++;
4552 		    ctxt->input->line++; ctxt->input->col = 1;
4553 		    continue; /* while */
4554 		}
4555 		in--;
4556 	    }
4557 	    if (*in == '<') {
4558 		return;
4559 	    }
4560 	    if (*in == '&') {
4561 		return;
4562 	    }
4563 	    SHRINK;
4564 	    GROW;
4565             if (ctxt->instate == XML_PARSER_EOF)
4566 		return;
4567 	    in = ctxt->input->cur;
4568 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4569 	nbchar = 0;
4570     }
4571     ctxt->input->line = line;
4572     ctxt->input->col = col;
4573     xmlParseCharDataComplex(ctxt, cdata);
4574 }
4575 
4576 /**
4577  * xmlParseCharDataComplex:
4578  * @ctxt:  an XML parser context
4579  * @cdata:  int indicating whether we are within a CDATA section
4580  *
4581  * parse a CharData section.this is the fallback function
4582  * of xmlParseCharData() when the parsing requires handling
4583  * of non-ASCII characters.
4584  */
4585 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4586 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4587     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4588     int nbchar = 0;
4589     int cur, l;
4590     int count = 0;
4591 
4592     SHRINK;
4593     GROW;
4594     cur = CUR_CHAR(l);
4595     while ((cur != '<') && /* checked */
4596            (cur != '&') &&
4597 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4598 	if ((cur == ']') && (NXT(1) == ']') &&
4599 	    (NXT(2) == '>')) {
4600 	    if (cdata) break;
4601 	    else {
4602 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4603 	    }
4604 	}
4605 	COPY_BUF(l,buf,nbchar,cur);
4606 	/* move current position before possible calling of ctxt->sax->characters */
4607 	NEXTL(l);
4608 	cur = CUR_CHAR(l);
4609 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4610 	    buf[nbchar] = 0;
4611 
4612 	    /*
4613 	     * OK the segment is to be consumed as chars.
4614 	     */
4615 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4616 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4617 		    if (ctxt->sax->ignorableWhitespace != NULL)
4618 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4619 			                               buf, nbchar);
4620 		} else {
4621 		    if (ctxt->sax->characters != NULL)
4622 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4623 		    if ((ctxt->sax->characters !=
4624 		         ctxt->sax->ignorableWhitespace) &&
4625 			(*ctxt->space == -1))
4626 			*ctxt->space = -2;
4627 		}
4628 	    }
4629 	    nbchar = 0;
4630             /* something really bad happened in the SAX callback */
4631             if (ctxt->instate != XML_PARSER_CONTENT)
4632                 return;
4633 	}
4634 	count++;
4635 	if (count > 50) {
4636 	    SHRINK;
4637 	    GROW;
4638 	    count = 0;
4639             if (ctxt->instate == XML_PARSER_EOF)
4640 		return;
4641 	}
4642     }
4643     if (nbchar != 0) {
4644         buf[nbchar] = 0;
4645 	/*
4646 	 * OK the segment is to be consumed as chars.
4647 	 */
4648 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4649 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4650 		if (ctxt->sax->ignorableWhitespace != NULL)
4651 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4652 	    } else {
4653 		if (ctxt->sax->characters != NULL)
4654 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4655 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4656 		    (*ctxt->space == -1))
4657 		    *ctxt->space = -2;
4658 	    }
4659 	}
4660     }
4661     if ((cur != 0) && (!IS_CHAR(cur))) {
4662 	/* Generate the error and skip the offending character */
4663         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4664                           "PCDATA invalid Char value %d\n",
4665 	                  cur);
4666 	NEXTL(l);
4667     }
4668 }
4669 
4670 /**
4671  * xmlParseExternalID:
4672  * @ctxt:  an XML parser context
4673  * @publicID:  a xmlChar** receiving PubidLiteral
4674  * @strict: indicate whether we should restrict parsing to only
4675  *          production [75], see NOTE below
4676  *
4677  * Parse an External ID or a Public ID
4678  *
4679  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4680  *       'PUBLIC' S PubidLiteral S SystemLiteral
4681  *
4682  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4683  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4684  *
4685  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4686  *
4687  * Returns the function returns SystemLiteral and in the second
4688  *                case publicID receives PubidLiteral, is strict is off
4689  *                it is possible to return NULL and have publicID set.
4690  */
4691 
4692 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4693 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4694     xmlChar *URI = NULL;
4695 
4696     SHRINK;
4697 
4698     *publicID = NULL;
4699     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4700         SKIP(6);
4701 	if (SKIP_BLANKS == 0) {
4702 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703 	                   "Space required after 'SYSTEM'\n");
4704 	}
4705 	URI = xmlParseSystemLiteral(ctxt);
4706 	if (URI == NULL) {
4707 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4708         }
4709     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4710         SKIP(6);
4711 	if (SKIP_BLANKS == 0) {
4712 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713 		    "Space required after 'PUBLIC'\n");
4714 	}
4715 	*publicID = xmlParsePubidLiteral(ctxt);
4716 	if (*publicID == NULL) {
4717 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4718 	}
4719 	if (strict) {
4720 	    /*
4721 	     * We don't handle [83] so "S SystemLiteral" is required.
4722 	     */
4723 	    if (SKIP_BLANKS == 0) {
4724 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4725 			"Space required after the Public Identifier\n");
4726 	    }
4727 	} else {
4728 	    /*
4729 	     * We handle [83] so we return immediately, if
4730 	     * "S SystemLiteral" is not detected. We skip blanks if no
4731              * system literal was found, but this is harmless since we must
4732              * be at the end of a NotationDecl.
4733 	     */
4734 	    if (SKIP_BLANKS == 0) return(NULL);
4735 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
4736 	}
4737 	URI = xmlParseSystemLiteral(ctxt);
4738 	if (URI == NULL) {
4739 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4740         }
4741     }
4742     return(URI);
4743 }
4744 
4745 /**
4746  * xmlParseCommentComplex:
4747  * @ctxt:  an XML parser context
4748  * @buf:  the already parsed part of the buffer
4749  * @len:  number of bytes in the buffer
4750  * @size:  allocated size of the buffer
4751  *
4752  * Skip an XML (SGML) comment <!-- .... -->
4753  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4754  *  must not occur within comments. "
4755  * This is the slow routine in case the accelerator for ascii didn't work
4756  *
4757  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4758  */
4759 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4760 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4761                        size_t len, size_t size) {
4762     int q, ql;
4763     int r, rl;
4764     int cur, l;
4765     size_t count = 0;
4766     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4767                        XML_MAX_HUGE_LENGTH :
4768                        XML_MAX_TEXT_LENGTH;
4769     int inputid;
4770 
4771     inputid = ctxt->input->id;
4772 
4773     if (buf == NULL) {
4774         len = 0;
4775 	size = XML_PARSER_BUFFER_SIZE;
4776 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4777 	if (buf == NULL) {
4778 	    xmlErrMemory(ctxt, NULL);
4779 	    return;
4780 	}
4781     }
4782     GROW;	/* Assure there's enough input data */
4783     q = CUR_CHAR(ql);
4784     if (q == 0)
4785         goto not_terminated;
4786     if (!IS_CHAR(q)) {
4787         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4788                           "xmlParseComment: invalid xmlChar value %d\n",
4789 	                  q);
4790 	xmlFree (buf);
4791 	return;
4792     }
4793     NEXTL(ql);
4794     r = CUR_CHAR(rl);
4795     if (r == 0)
4796         goto not_terminated;
4797     if (!IS_CHAR(r)) {
4798         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4799                           "xmlParseComment: invalid xmlChar value %d\n",
4800 	                  q);
4801 	xmlFree (buf);
4802 	return;
4803     }
4804     NEXTL(rl);
4805     cur = CUR_CHAR(l);
4806     if (cur == 0)
4807         goto not_terminated;
4808     while (IS_CHAR(cur) && /* checked */
4809            ((cur != '>') ||
4810 	    (r != '-') || (q != '-'))) {
4811 	if ((r == '-') && (q == '-')) {
4812 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4813 	}
4814 	if (len + 5 >= size) {
4815 	    xmlChar *new_buf;
4816             size_t new_size;
4817 
4818 	    new_size = size * 2;
4819 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4820 	    if (new_buf == NULL) {
4821 		xmlFree (buf);
4822 		xmlErrMemory(ctxt, NULL);
4823 		return;
4824 	    }
4825 	    buf = new_buf;
4826             size = new_size;
4827 	}
4828 	COPY_BUF(ql,buf,len,q);
4829 	q = r;
4830 	ql = rl;
4831 	r = cur;
4832 	rl = l;
4833 
4834 	count++;
4835 	if (count > 50) {
4836 	    SHRINK;
4837 	    GROW;
4838 	    count = 0;
4839             if (ctxt->instate == XML_PARSER_EOF) {
4840 		xmlFree(buf);
4841 		return;
4842             }
4843 	}
4844 	NEXTL(l);
4845 	cur = CUR_CHAR(l);
4846 	if (cur == 0) {
4847 	    SHRINK;
4848 	    GROW;
4849 	    cur = CUR_CHAR(l);
4850 	}
4851 
4852         if (len > maxLength) {
4853             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4854                          "Comment too big found", NULL);
4855             xmlFree (buf);
4856             return;
4857         }
4858     }
4859     buf[len] = 0;
4860     if (cur == 0) {
4861 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862 	                     "Comment not terminated \n<!--%.50s\n", buf);
4863     } else if (!IS_CHAR(cur)) {
4864         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4865                           "xmlParseComment: invalid xmlChar value %d\n",
4866 	                  cur);
4867     } else {
4868 	if (inputid != ctxt->input->id) {
4869 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4870 		           "Comment doesn't start and stop in the same"
4871                            " entity\n");
4872 	}
4873         NEXT;
4874 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4875 	    (!ctxt->disableSAX))
4876 	    ctxt->sax->comment(ctxt->userData, buf);
4877     }
4878     xmlFree(buf);
4879     return;
4880 not_terminated:
4881     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4882 			 "Comment not terminated\n", NULL);
4883     xmlFree(buf);
4884     return;
4885 }
4886 
4887 /**
4888  * xmlParseComment:
4889  * @ctxt:  an XML parser context
4890  *
4891  * Skip an XML (SGML) comment <!-- .... -->
4892  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4893  *  must not occur within comments. "
4894  *
4895  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4896  */
4897 void
xmlParseComment(xmlParserCtxtPtr ctxt)4898 xmlParseComment(xmlParserCtxtPtr ctxt) {
4899     xmlChar *buf = NULL;
4900     size_t size = XML_PARSER_BUFFER_SIZE;
4901     size_t len = 0;
4902     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4903                        XML_MAX_HUGE_LENGTH :
4904                        XML_MAX_TEXT_LENGTH;
4905     xmlParserInputState state;
4906     const xmlChar *in;
4907     size_t nbchar = 0;
4908     int ccol;
4909     int inputid;
4910 
4911     /*
4912      * Check that there is a comment right here.
4913      */
4914     if ((RAW != '<') || (NXT(1) != '!') ||
4915         (NXT(2) != '-') || (NXT(3) != '-')) return;
4916     state = ctxt->instate;
4917     ctxt->instate = XML_PARSER_COMMENT;
4918     inputid = ctxt->input->id;
4919     SKIP(4);
4920     SHRINK;
4921     GROW;
4922 
4923     /*
4924      * Accelerated common case where input don't need to be
4925      * modified before passing it to the handler.
4926      */
4927     in = ctxt->input->cur;
4928     do {
4929 	if (*in == 0xA) {
4930 	    do {
4931 		ctxt->input->line++; ctxt->input->col = 1;
4932 		in++;
4933 	    } while (*in == 0xA);
4934 	}
4935 get_more:
4936         ccol = ctxt->input->col;
4937 	while (((*in > '-') && (*in <= 0x7F)) ||
4938 	       ((*in >= 0x20) && (*in < '-')) ||
4939 	       (*in == 0x09)) {
4940 		    in++;
4941 		    ccol++;
4942 	}
4943 	ctxt->input->col = ccol;
4944 	if (*in == 0xA) {
4945 	    do {
4946 		ctxt->input->line++; ctxt->input->col = 1;
4947 		in++;
4948 	    } while (*in == 0xA);
4949 	    goto get_more;
4950 	}
4951 	nbchar = in - ctxt->input->cur;
4952 	/*
4953 	 * save current set of data
4954 	 */
4955 	if (nbchar > 0) {
4956 	    if ((ctxt->sax != NULL) &&
4957 		(ctxt->sax->comment != NULL)) {
4958 		if (buf == NULL) {
4959 		    if ((*in == '-') && (in[1] == '-'))
4960 		        size = nbchar + 1;
4961 		    else
4962 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4963 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4964 		    if (buf == NULL) {
4965 		        xmlErrMemory(ctxt, NULL);
4966 			ctxt->instate = state;
4967 			return;
4968 		    }
4969 		    len = 0;
4970 		} else if (len + nbchar + 1 >= size) {
4971 		    xmlChar *new_buf;
4972 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4973 		    new_buf = (xmlChar *) xmlRealloc(buf,
4974 		                                     size * sizeof(xmlChar));
4975 		    if (new_buf == NULL) {
4976 		        xmlFree (buf);
4977 			xmlErrMemory(ctxt, NULL);
4978 			ctxt->instate = state;
4979 			return;
4980 		    }
4981 		    buf = new_buf;
4982 		}
4983 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4984 		len += nbchar;
4985 		buf[len] = 0;
4986 	    }
4987 	}
4988         if (len > maxLength) {
4989             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4990                          "Comment too big found", NULL);
4991             xmlFree (buf);
4992             return;
4993         }
4994 	ctxt->input->cur = in;
4995 	if (*in == 0xA) {
4996 	    in++;
4997 	    ctxt->input->line++; ctxt->input->col = 1;
4998 	}
4999 	if (*in == 0xD) {
5000 	    in++;
5001 	    if (*in == 0xA) {
5002 		ctxt->input->cur = in;
5003 		in++;
5004 		ctxt->input->line++; ctxt->input->col = 1;
5005 		goto get_more;
5006 	    }
5007 	    in--;
5008 	}
5009 	SHRINK;
5010 	GROW;
5011         if (ctxt->instate == XML_PARSER_EOF) {
5012             xmlFree(buf);
5013             return;
5014         }
5015 	in = ctxt->input->cur;
5016 	if (*in == '-') {
5017 	    if (in[1] == '-') {
5018 	        if (in[2] == '>') {
5019 		    if (ctxt->input->id != inputid) {
5020 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5021 			               "comment doesn't start and stop in the"
5022                                        " same entity\n");
5023 		    }
5024 		    SKIP(3);
5025 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5026 		        (!ctxt->disableSAX)) {
5027 			if (buf != NULL)
5028 			    ctxt->sax->comment(ctxt->userData, buf);
5029 			else
5030 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5031 		    }
5032 		    if (buf != NULL)
5033 		        xmlFree(buf);
5034 		    if (ctxt->instate != XML_PARSER_EOF)
5035 			ctxt->instate = state;
5036 		    return;
5037 		}
5038 		if (buf != NULL) {
5039 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5040 		                      "Double hyphen within comment: "
5041                                       "<!--%.50s\n",
5042 				      buf);
5043 		} else
5044 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5045 		                      "Double hyphen within comment\n", NULL);
5046                 if (ctxt->instate == XML_PARSER_EOF) {
5047                     xmlFree(buf);
5048                     return;
5049                 }
5050 		in++;
5051 		ctxt->input->col++;
5052 	    }
5053 	    in++;
5054 	    ctxt->input->col++;
5055 	    goto get_more;
5056 	}
5057     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5058     xmlParseCommentComplex(ctxt, buf, len, size);
5059     ctxt->instate = state;
5060     return;
5061 }
5062 
5063 
5064 /**
5065  * xmlParsePITarget:
5066  * @ctxt:  an XML parser context
5067  *
5068  * parse the name of a PI
5069  *
5070  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5071  *
5072  * Returns the PITarget name or NULL
5073  */
5074 
5075 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5076 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5077     const xmlChar *name;
5078 
5079     name = xmlParseName(ctxt);
5080     if ((name != NULL) &&
5081         ((name[0] == 'x') || (name[0] == 'X')) &&
5082         ((name[1] == 'm') || (name[1] == 'M')) &&
5083         ((name[2] == 'l') || (name[2] == 'L'))) {
5084 	int i;
5085 	if ((name[0] == 'x') && (name[1] == 'm') &&
5086 	    (name[2] == 'l') && (name[3] == 0)) {
5087 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5088 		 "XML declaration allowed only at the start of the document\n");
5089 	    return(name);
5090 	} else if (name[3] == 0) {
5091 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5092 	    return(name);
5093 	}
5094 	for (i = 0;;i++) {
5095 	    if (xmlW3CPIs[i] == NULL) break;
5096 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5097 	        return(name);
5098 	}
5099 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5100 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5101 		      NULL, NULL);
5102     }
5103     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5104 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5105 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5106     }
5107     return(name);
5108 }
5109 
5110 #ifdef LIBXML_CATALOG_ENABLED
5111 /**
5112  * xmlParseCatalogPI:
5113  * @ctxt:  an XML parser context
5114  * @catalog:  the PI value string
5115  *
5116  * parse an XML Catalog Processing Instruction.
5117  *
5118  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5119  *
5120  * Occurs only if allowed by the user and if happening in the Misc
5121  * part of the document before any doctype information
5122  * This will add the given catalog to the parsing context in order
5123  * to be used if there is a resolution need further down in the document
5124  */
5125 
5126 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5127 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5128     xmlChar *URL = NULL;
5129     const xmlChar *tmp, *base;
5130     xmlChar marker;
5131 
5132     tmp = catalog;
5133     while (IS_BLANK_CH(*tmp)) tmp++;
5134     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5135 	goto error;
5136     tmp += 7;
5137     while (IS_BLANK_CH(*tmp)) tmp++;
5138     if (*tmp != '=') {
5139 	return;
5140     }
5141     tmp++;
5142     while (IS_BLANK_CH(*tmp)) tmp++;
5143     marker = *tmp;
5144     if ((marker != '\'') && (marker != '"'))
5145 	goto error;
5146     tmp++;
5147     base = tmp;
5148     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5149     if (*tmp == 0)
5150 	goto error;
5151     URL = xmlStrndup(base, tmp - base);
5152     tmp++;
5153     while (IS_BLANK_CH(*tmp)) tmp++;
5154     if (*tmp != 0)
5155 	goto error;
5156 
5157     if (URL != NULL) {
5158 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5159 	xmlFree(URL);
5160     }
5161     return;
5162 
5163 error:
5164     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5165 	          "Catalog PI syntax error: %s\n",
5166 		  catalog, NULL);
5167     if (URL != NULL)
5168 	xmlFree(URL);
5169 }
5170 #endif
5171 
5172 /**
5173  * xmlParsePI:
5174  * @ctxt:  an XML parser context
5175  *
5176  * parse an XML Processing Instruction.
5177  *
5178  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5179  *
5180  * The processing is transferred to SAX once parsed.
5181  */
5182 
5183 void
xmlParsePI(xmlParserCtxtPtr ctxt)5184 xmlParsePI(xmlParserCtxtPtr ctxt) {
5185     xmlChar *buf = NULL;
5186     size_t len = 0;
5187     size_t size = XML_PARSER_BUFFER_SIZE;
5188     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5189                        XML_MAX_HUGE_LENGTH :
5190                        XML_MAX_TEXT_LENGTH;
5191     int cur, l;
5192     const xmlChar *target;
5193     xmlParserInputState state;
5194     int count = 0;
5195 
5196     if ((RAW == '<') && (NXT(1) == '?')) {
5197 	int inputid = ctxt->input->id;
5198 	state = ctxt->instate;
5199         ctxt->instate = XML_PARSER_PI;
5200 	/*
5201 	 * this is a Processing Instruction.
5202 	 */
5203 	SKIP(2);
5204 	SHRINK;
5205 
5206 	/*
5207 	 * Parse the target name and check for special support like
5208 	 * namespace.
5209 	 */
5210         target = xmlParsePITarget(ctxt);
5211 	if (target != NULL) {
5212 	    if ((RAW == '?') && (NXT(1) == '>')) {
5213 		if (inputid != ctxt->input->id) {
5214 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5215 	                           "PI declaration doesn't start and stop in"
5216                                    " the same entity\n");
5217 		}
5218 		SKIP(2);
5219 
5220 		/*
5221 		 * SAX: PI detected.
5222 		 */
5223 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 		    (ctxt->sax->processingInstruction != NULL))
5225 		    ctxt->sax->processingInstruction(ctxt->userData,
5226 		                                     target, NULL);
5227 		if (ctxt->instate != XML_PARSER_EOF)
5228 		    ctxt->instate = state;
5229 		return;
5230 	    }
5231 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5232 	    if (buf == NULL) {
5233 		xmlErrMemory(ctxt, NULL);
5234 		ctxt->instate = state;
5235 		return;
5236 	    }
5237 	    if (SKIP_BLANKS == 0) {
5238 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5239 			  "ParsePI: PI %s space expected\n", target);
5240 	    }
5241 	    cur = CUR_CHAR(l);
5242 	    while (IS_CHAR(cur) && /* checked */
5243 		   ((cur != '?') || (NXT(1) != '>'))) {
5244 		if (len + 5 >= size) {
5245 		    xmlChar *tmp;
5246                     size_t new_size = size * 2;
5247 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5248 		    if (tmp == NULL) {
5249 			xmlErrMemory(ctxt, NULL);
5250 			xmlFree(buf);
5251 			ctxt->instate = state;
5252 			return;
5253 		    }
5254 		    buf = tmp;
5255                     size = new_size;
5256 		}
5257 		count++;
5258 		if (count > 50) {
5259 		    SHRINK;
5260 		    GROW;
5261                     if (ctxt->instate == XML_PARSER_EOF) {
5262                         xmlFree(buf);
5263                         return;
5264                     }
5265 		    count = 0;
5266 		}
5267 		COPY_BUF(l,buf,len,cur);
5268 		NEXTL(l);
5269 		cur = CUR_CHAR(l);
5270 		if (cur == 0) {
5271 		    SHRINK;
5272 		    GROW;
5273 		    cur = CUR_CHAR(l);
5274 		}
5275                 if (len > maxLength) {
5276                     xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5277                                       "PI %s too big found", target);
5278                     xmlFree(buf);
5279                     ctxt->instate = state;
5280                     return;
5281                 }
5282 	    }
5283 	    buf[len] = 0;
5284 	    if (cur != '?') {
5285 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5286 		      "ParsePI: PI %s never end ...\n", target);
5287 	    } else {
5288 		if (inputid != ctxt->input->id) {
5289 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5290 	                           "PI declaration doesn't start and stop in"
5291                                    " the same entity\n");
5292 		}
5293 		SKIP(2);
5294 
5295 #ifdef LIBXML_CATALOG_ENABLED
5296 		if (((state == XML_PARSER_MISC) ||
5297 	             (state == XML_PARSER_START)) &&
5298 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5299 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5300 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5301 			(allow == XML_CATA_ALLOW_ALL))
5302 			xmlParseCatalogPI(ctxt, buf);
5303 		}
5304 #endif
5305 
5306 
5307 		/*
5308 		 * SAX: PI detected.
5309 		 */
5310 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5311 		    (ctxt->sax->processingInstruction != NULL))
5312 		    ctxt->sax->processingInstruction(ctxt->userData,
5313 		                                     target, buf);
5314 	    }
5315 	    xmlFree(buf);
5316 	} else {
5317 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5318 	}
5319 	if (ctxt->instate != XML_PARSER_EOF)
5320 	    ctxt->instate = state;
5321     }
5322 }
5323 
5324 /**
5325  * xmlParseNotationDecl:
5326  * @ctxt:  an XML parser context
5327  *
5328  * parse a notation declaration
5329  *
5330  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5331  *
5332  * Hence there is actually 3 choices:
5333  *     'PUBLIC' S PubidLiteral
5334  *     'PUBLIC' S PubidLiteral S SystemLiteral
5335  * and 'SYSTEM' S SystemLiteral
5336  *
5337  * See the NOTE on xmlParseExternalID().
5338  */
5339 
5340 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5341 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5342     const xmlChar *name;
5343     xmlChar *Pubid;
5344     xmlChar *Systemid;
5345 
5346     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5347 	int inputid = ctxt->input->id;
5348 	SHRINK;
5349 	SKIP(10);
5350 	if (SKIP_BLANKS == 0) {
5351 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 			   "Space required after '<!NOTATION'\n");
5353 	    return;
5354 	}
5355 
5356         name = xmlParseName(ctxt);
5357 	if (name == NULL) {
5358 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5359 	    return;
5360 	}
5361 	if (xmlStrchr(name, ':') != NULL) {
5362 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5363 		     "colons are forbidden from notation names '%s'\n",
5364 		     name, NULL, NULL);
5365 	}
5366 	if (SKIP_BLANKS == 0) {
5367 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5368 		     "Space required after the NOTATION name'\n");
5369 	    return;
5370 	}
5371 
5372 	/*
5373 	 * Parse the IDs.
5374 	 */
5375 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5376 	SKIP_BLANKS;
5377 
5378 	if (RAW == '>') {
5379 	    if (inputid != ctxt->input->id) {
5380 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5381 	                       "Notation declaration doesn't start and stop"
5382                                " in the same entity\n");
5383 	    }
5384 	    NEXT;
5385 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5386 		(ctxt->sax->notationDecl != NULL))
5387 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5388 	} else {
5389 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5390 	}
5391 	if (Systemid != NULL) xmlFree(Systemid);
5392 	if (Pubid != NULL) xmlFree(Pubid);
5393     }
5394 }
5395 
5396 /**
5397  * xmlParseEntityDecl:
5398  * @ctxt:  an XML parser context
5399  *
5400  * parse <!ENTITY declarations
5401  *
5402  * [70] EntityDecl ::= GEDecl | PEDecl
5403  *
5404  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5405  *
5406  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5407  *
5408  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5409  *
5410  * [74] PEDef ::= EntityValue | ExternalID
5411  *
5412  * [76] NDataDecl ::= S 'NDATA' S Name
5413  *
5414  * [ VC: Notation Declared ]
5415  * The Name must match the declared name of a notation.
5416  */
5417 
5418 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5419 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5420     const xmlChar *name = NULL;
5421     xmlChar *value = NULL;
5422     xmlChar *URI = NULL, *literal = NULL;
5423     const xmlChar *ndata = NULL;
5424     int isParameter = 0;
5425     xmlChar *orig = NULL;
5426 
5427     /* GROW; done in the caller */
5428     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5429 	int inputid = ctxt->input->id;
5430 	SHRINK;
5431 	SKIP(8);
5432 	if (SKIP_BLANKS == 0) {
5433 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5434 			   "Space required after '<!ENTITY'\n");
5435 	}
5436 
5437 	if (RAW == '%') {
5438 	    NEXT;
5439 	    if (SKIP_BLANKS == 0) {
5440 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5441 			       "Space required after '%%'\n");
5442 	    }
5443 	    isParameter = 1;
5444 	}
5445 
5446         name = xmlParseName(ctxt);
5447 	if (name == NULL) {
5448 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5449 	                   "xmlParseEntityDecl: no name\n");
5450             return;
5451 	}
5452 	if (xmlStrchr(name, ':') != NULL) {
5453 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5454 		     "colons are forbidden from entities names '%s'\n",
5455 		     name, NULL, NULL);
5456 	}
5457 	if (SKIP_BLANKS == 0) {
5458 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5459 			   "Space required after the entity name\n");
5460 	}
5461 
5462 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5463 	/*
5464 	 * handle the various case of definitions...
5465 	 */
5466 	if (isParameter) {
5467 	    if ((RAW == '"') || (RAW == '\'')) {
5468 	        value = xmlParseEntityValue(ctxt, &orig);
5469 		if (value) {
5470 		    if ((ctxt->sax != NULL) &&
5471 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5472 			ctxt->sax->entityDecl(ctxt->userData, name,
5473 		                    XML_INTERNAL_PARAMETER_ENTITY,
5474 				    NULL, NULL, value);
5475 		}
5476 	    } else {
5477 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5478 		if ((URI == NULL) && (literal == NULL)) {
5479 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5480 		}
5481 		if (URI) {
5482 		    xmlURIPtr uri;
5483 
5484 		    uri = xmlParseURI((const char *) URI);
5485 		    if (uri == NULL) {
5486 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5487 				     "Invalid URI: %s\n", URI);
5488 			/*
5489 			 * This really ought to be a well formedness error
5490 			 * but the XML Core WG decided otherwise c.f. issue
5491 			 * E26 of the XML erratas.
5492 			 */
5493 		    } else {
5494 			if (uri->fragment != NULL) {
5495 			    /*
5496 			     * Okay this is foolish to block those but not
5497 			     * invalid URIs.
5498 			     */
5499 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5500 			} else {
5501 			    if ((ctxt->sax != NULL) &&
5502 				(!ctxt->disableSAX) &&
5503 				(ctxt->sax->entityDecl != NULL))
5504 				ctxt->sax->entityDecl(ctxt->userData, name,
5505 					    XML_EXTERNAL_PARAMETER_ENTITY,
5506 					    literal, URI, NULL);
5507 			}
5508 			xmlFreeURI(uri);
5509 		    }
5510 		}
5511 	    }
5512 	} else {
5513 	    if ((RAW == '"') || (RAW == '\'')) {
5514 	        value = xmlParseEntityValue(ctxt, &orig);
5515 		if ((ctxt->sax != NULL) &&
5516 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5517 		    ctxt->sax->entityDecl(ctxt->userData, name,
5518 				XML_INTERNAL_GENERAL_ENTITY,
5519 				NULL, NULL, value);
5520 		/*
5521 		 * For expat compatibility in SAX mode.
5522 		 */
5523 		if ((ctxt->myDoc == NULL) ||
5524 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5525 		    if (ctxt->myDoc == NULL) {
5526 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5527 			if (ctxt->myDoc == NULL) {
5528 			    xmlErrMemory(ctxt, "New Doc failed");
5529 			    return;
5530 			}
5531 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5532 		    }
5533 		    if (ctxt->myDoc->intSubset == NULL)
5534 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5535 					    BAD_CAST "fake", NULL, NULL);
5536 
5537 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5538 			              NULL, NULL, value);
5539 		}
5540 	    } else {
5541 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5542 		if ((URI == NULL) && (literal == NULL)) {
5543 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5544 		}
5545 		if (URI) {
5546 		    xmlURIPtr uri;
5547 
5548 		    uri = xmlParseURI((const char *)URI);
5549 		    if (uri == NULL) {
5550 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5551 				     "Invalid URI: %s\n", URI);
5552 			/*
5553 			 * This really ought to be a well formedness error
5554 			 * but the XML Core WG decided otherwise c.f. issue
5555 			 * E26 of the XML erratas.
5556 			 */
5557 		    } else {
5558 			if (uri->fragment != NULL) {
5559 			    /*
5560 			     * Okay this is foolish to block those but not
5561 			     * invalid URIs.
5562 			     */
5563 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5564 			}
5565 			xmlFreeURI(uri);
5566 		    }
5567 		}
5568 		if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5569 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5570 				   "Space required before 'NDATA'\n");
5571 		}
5572 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5573 		    SKIP(5);
5574 		    if (SKIP_BLANKS == 0) {
5575 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5576 				       "Space required after 'NDATA'\n");
5577 		    }
5578 		    ndata = xmlParseName(ctxt);
5579 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5580 		        (ctxt->sax->unparsedEntityDecl != NULL))
5581 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5582 				    literal, URI, ndata);
5583 		} else {
5584 		    if ((ctxt->sax != NULL) &&
5585 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5586 			ctxt->sax->entityDecl(ctxt->userData, name,
5587 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5588 				    literal, URI, NULL);
5589 		    /*
5590 		     * For expat compatibility in SAX mode.
5591 		     * assuming the entity replacement was asked for
5592 		     */
5593 		    if ((ctxt->replaceEntities != 0) &&
5594 			((ctxt->myDoc == NULL) ||
5595 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5596 			if (ctxt->myDoc == NULL) {
5597 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5598 			    if (ctxt->myDoc == NULL) {
5599 			        xmlErrMemory(ctxt, "New Doc failed");
5600 				return;
5601 			    }
5602 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5603 			}
5604 
5605 			if (ctxt->myDoc->intSubset == NULL)
5606 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5607 						BAD_CAST "fake", NULL, NULL);
5608 			xmlSAX2EntityDecl(ctxt, name,
5609 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5610 				          literal, URI, NULL);
5611 		    }
5612 		}
5613 	    }
5614 	}
5615 	if (ctxt->instate == XML_PARSER_EOF)
5616 	    goto done;
5617 	SKIP_BLANKS;
5618 	if (RAW != '>') {
5619 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5620 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5621 	    xmlHaltParser(ctxt);
5622 	} else {
5623 	    if (inputid != ctxt->input->id) {
5624 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5625 	                       "Entity declaration doesn't start and stop in"
5626                                " the same entity\n");
5627 	    }
5628 	    NEXT;
5629 	}
5630 	if (orig != NULL) {
5631 	    /*
5632 	     * Ugly mechanism to save the raw entity value.
5633 	     */
5634 	    xmlEntityPtr cur = NULL;
5635 
5636 	    if (isParameter) {
5637 	        if ((ctxt->sax != NULL) &&
5638 		    (ctxt->sax->getParameterEntity != NULL))
5639 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5640 	    } else {
5641 	        if ((ctxt->sax != NULL) &&
5642 		    (ctxt->sax->getEntity != NULL))
5643 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5644 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5645 		    cur = xmlSAX2GetEntity(ctxt, name);
5646 		}
5647 	    }
5648             if ((cur != NULL) && (cur->orig == NULL)) {
5649 		cur->orig = orig;
5650                 orig = NULL;
5651 	    }
5652 	}
5653 
5654 done:
5655 	if (value != NULL) xmlFree(value);
5656 	if (URI != NULL) xmlFree(URI);
5657 	if (literal != NULL) xmlFree(literal);
5658         if (orig != NULL) xmlFree(orig);
5659     }
5660 }
5661 
5662 /**
5663  * xmlParseDefaultDecl:
5664  * @ctxt:  an XML parser context
5665  * @value:  Receive a possible fixed default value for the attribute
5666  *
5667  * Parse an attribute default declaration
5668  *
5669  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5670  *
5671  * [ VC: Required Attribute ]
5672  * if the default declaration is the keyword #REQUIRED, then the
5673  * attribute must be specified for all elements of the type in the
5674  * attribute-list declaration.
5675  *
5676  * [ VC: Attribute Default Legal ]
5677  * The declared default value must meet the lexical constraints of
5678  * the declared attribute type c.f. xmlValidateAttributeDecl()
5679  *
5680  * [ VC: Fixed Attribute Default ]
5681  * if an attribute has a default value declared with the #FIXED
5682  * keyword, instances of that attribute must match the default value.
5683  *
5684  * [ WFC: No < in Attribute Values ]
5685  * handled in xmlParseAttValue()
5686  *
5687  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5688  *          or XML_ATTRIBUTE_FIXED.
5689  */
5690 
5691 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5692 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5693     int val;
5694     xmlChar *ret;
5695 
5696     *value = NULL;
5697     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5698 	SKIP(9);
5699 	return(XML_ATTRIBUTE_REQUIRED);
5700     }
5701     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5702 	SKIP(8);
5703 	return(XML_ATTRIBUTE_IMPLIED);
5704     }
5705     val = XML_ATTRIBUTE_NONE;
5706     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5707 	SKIP(6);
5708 	val = XML_ATTRIBUTE_FIXED;
5709 	if (SKIP_BLANKS == 0) {
5710 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5711 			   "Space required after '#FIXED'\n");
5712 	}
5713     }
5714     ret = xmlParseAttValue(ctxt);
5715     ctxt->instate = XML_PARSER_DTD;
5716     if (ret == NULL) {
5717 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5718 		       "Attribute default value declaration error\n");
5719     } else
5720         *value = ret;
5721     return(val);
5722 }
5723 
5724 /**
5725  * xmlParseNotationType:
5726  * @ctxt:  an XML parser context
5727  *
5728  * parse an Notation attribute type.
5729  *
5730  * Note: the leading 'NOTATION' S part has already being parsed...
5731  *
5732  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5733  *
5734  * [ VC: Notation Attributes ]
5735  * Values of this type must match one of the notation names included
5736  * in the declaration; all notation names in the declaration must be declared.
5737  *
5738  * Returns: the notation attribute tree built while parsing
5739  */
5740 
5741 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5742 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5743     const xmlChar *name;
5744     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5745 
5746     if (RAW != '(') {
5747 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5748 	return(NULL);
5749     }
5750     SHRINK;
5751     do {
5752         NEXT;
5753 	SKIP_BLANKS;
5754         name = xmlParseName(ctxt);
5755 	if (name == NULL) {
5756 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5757 			   "Name expected in NOTATION declaration\n");
5758             xmlFreeEnumeration(ret);
5759 	    return(NULL);
5760 	}
5761 	tmp = ret;
5762 	while (tmp != NULL) {
5763 	    if (xmlStrEqual(name, tmp->name)) {
5764 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5765 	  "standalone: attribute notation value token %s duplicated\n",
5766 				 name, NULL);
5767 		if (!xmlDictOwns(ctxt->dict, name))
5768 		    xmlFree((xmlChar *) name);
5769 		break;
5770 	    }
5771 	    tmp = tmp->next;
5772 	}
5773 	if (tmp == NULL) {
5774 	    cur = xmlCreateEnumeration(name);
5775 	    if (cur == NULL) {
5776                 xmlFreeEnumeration(ret);
5777                 return(NULL);
5778             }
5779 	    if (last == NULL) ret = last = cur;
5780 	    else {
5781 		last->next = cur;
5782 		last = cur;
5783 	    }
5784 	}
5785 	SKIP_BLANKS;
5786     } while (RAW == '|');
5787     if (RAW != ')') {
5788 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5789         xmlFreeEnumeration(ret);
5790 	return(NULL);
5791     }
5792     NEXT;
5793     return(ret);
5794 }
5795 
5796 /**
5797  * xmlParseEnumerationType:
5798  * @ctxt:  an XML parser context
5799  *
5800  * parse an Enumeration attribute type.
5801  *
5802  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5803  *
5804  * [ VC: Enumeration ]
5805  * Values of this type must match one of the Nmtoken tokens in
5806  * the declaration
5807  *
5808  * Returns: the enumeration attribute tree built while parsing
5809  */
5810 
5811 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5812 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5813     xmlChar *name;
5814     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5815 
5816     if (RAW != '(') {
5817 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5818 	return(NULL);
5819     }
5820     SHRINK;
5821     do {
5822         NEXT;
5823 	SKIP_BLANKS;
5824         name = xmlParseNmtoken(ctxt);
5825 	if (name == NULL) {
5826 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5827 	    return(ret);
5828 	}
5829 	tmp = ret;
5830 	while (tmp != NULL) {
5831 	    if (xmlStrEqual(name, tmp->name)) {
5832 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5833 	  "standalone: attribute enumeration value token %s duplicated\n",
5834 				 name, NULL);
5835 		if (!xmlDictOwns(ctxt->dict, name))
5836 		    xmlFree(name);
5837 		break;
5838 	    }
5839 	    tmp = tmp->next;
5840 	}
5841 	if (tmp == NULL) {
5842 	    cur = xmlCreateEnumeration(name);
5843 	    if (!xmlDictOwns(ctxt->dict, name))
5844 		xmlFree(name);
5845 	    if (cur == NULL) {
5846                 xmlFreeEnumeration(ret);
5847                 return(NULL);
5848             }
5849 	    if (last == NULL) ret = last = cur;
5850 	    else {
5851 		last->next = cur;
5852 		last = cur;
5853 	    }
5854 	}
5855 	SKIP_BLANKS;
5856     } while (RAW == '|');
5857     if (RAW != ')') {
5858 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5859 	return(ret);
5860     }
5861     NEXT;
5862     return(ret);
5863 }
5864 
5865 /**
5866  * xmlParseEnumeratedType:
5867  * @ctxt:  an XML parser context
5868  * @tree:  the enumeration tree built while parsing
5869  *
5870  * parse an Enumerated attribute type.
5871  *
5872  * [57] EnumeratedType ::= NotationType | Enumeration
5873  *
5874  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5875  *
5876  *
5877  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5878  */
5879 
5880 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5881 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5882     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5883 	SKIP(8);
5884 	if (SKIP_BLANKS == 0) {
5885 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5886 			   "Space required after 'NOTATION'\n");
5887 	    return(0);
5888 	}
5889 	*tree = xmlParseNotationType(ctxt);
5890 	if (*tree == NULL) return(0);
5891 	return(XML_ATTRIBUTE_NOTATION);
5892     }
5893     *tree = xmlParseEnumerationType(ctxt);
5894     if (*tree == NULL) return(0);
5895     return(XML_ATTRIBUTE_ENUMERATION);
5896 }
5897 
5898 /**
5899  * xmlParseAttributeType:
5900  * @ctxt:  an XML parser context
5901  * @tree:  the enumeration tree built while parsing
5902  *
5903  * parse the Attribute list def for an element
5904  *
5905  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5906  *
5907  * [55] StringType ::= 'CDATA'
5908  *
5909  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5910  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5911  *
5912  * Validity constraints for attribute values syntax are checked in
5913  * xmlValidateAttributeValue()
5914  *
5915  * [ VC: ID ]
5916  * Values of type ID must match the Name production. A name must not
5917  * appear more than once in an XML document as a value of this type;
5918  * i.e., ID values must uniquely identify the elements which bear them.
5919  *
5920  * [ VC: One ID per Element Type ]
5921  * No element type may have more than one ID attribute specified.
5922  *
5923  * [ VC: ID Attribute Default ]
5924  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5925  *
5926  * [ VC: IDREF ]
5927  * Values of type IDREF must match the Name production, and values
5928  * of type IDREFS must match Names; each IDREF Name must match the value
5929  * of an ID attribute on some element in the XML document; i.e. IDREF
5930  * values must match the value of some ID attribute.
5931  *
5932  * [ VC: Entity Name ]
5933  * Values of type ENTITY must match the Name production, values
5934  * of type ENTITIES must match Names; each Entity Name must match the
5935  * name of an unparsed entity declared in the DTD.
5936  *
5937  * [ VC: Name Token ]
5938  * Values of type NMTOKEN must match the Nmtoken production; values
5939  * of type NMTOKENS must match Nmtokens.
5940  *
5941  * Returns the attribute type
5942  */
5943 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5944 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5945     SHRINK;
5946     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5947 	SKIP(5);
5948 	return(XML_ATTRIBUTE_CDATA);
5949      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5950 	SKIP(6);
5951 	return(XML_ATTRIBUTE_IDREFS);
5952      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5953 	SKIP(5);
5954 	return(XML_ATTRIBUTE_IDREF);
5955      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5956         SKIP(2);
5957 	return(XML_ATTRIBUTE_ID);
5958      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5959 	SKIP(6);
5960 	return(XML_ATTRIBUTE_ENTITY);
5961      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5962 	SKIP(8);
5963 	return(XML_ATTRIBUTE_ENTITIES);
5964      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5965 	SKIP(8);
5966 	return(XML_ATTRIBUTE_NMTOKENS);
5967      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5968 	SKIP(7);
5969 	return(XML_ATTRIBUTE_NMTOKEN);
5970      }
5971      return(xmlParseEnumeratedType(ctxt, tree));
5972 }
5973 
5974 /**
5975  * xmlParseAttributeListDecl:
5976  * @ctxt:  an XML parser context
5977  *
5978  * : parse the Attribute list def for an element
5979  *
5980  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5981  *
5982  * [53] AttDef ::= S Name S AttType S DefaultDecl
5983  *
5984  */
5985 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5986 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5987     const xmlChar *elemName;
5988     const xmlChar *attrName;
5989     xmlEnumerationPtr tree;
5990 
5991     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5992 	int inputid = ctxt->input->id;
5993 
5994 	SKIP(9);
5995 	if (SKIP_BLANKS == 0) {
5996 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5997 		                 "Space required after '<!ATTLIST'\n");
5998 	}
5999         elemName = xmlParseName(ctxt);
6000 	if (elemName == NULL) {
6001 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6002 			   "ATTLIST: no name for Element\n");
6003 	    return;
6004 	}
6005 	SKIP_BLANKS;
6006 	GROW;
6007 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6008 	    int type;
6009 	    int def;
6010 	    xmlChar *defaultValue = NULL;
6011 
6012 	    GROW;
6013             tree = NULL;
6014 	    attrName = xmlParseName(ctxt);
6015 	    if (attrName == NULL) {
6016 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6017 			       "ATTLIST: no name for Attribute\n");
6018 		break;
6019 	    }
6020 	    GROW;
6021 	    if (SKIP_BLANKS == 0) {
6022 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023 		        "Space required after the attribute name\n");
6024 		break;
6025 	    }
6026 
6027 	    type = xmlParseAttributeType(ctxt, &tree);
6028 	    if (type <= 0) {
6029 	        break;
6030 	    }
6031 
6032 	    GROW;
6033 	    if (SKIP_BLANKS == 0) {
6034 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6035 			       "Space required after the attribute type\n");
6036 	        if (tree != NULL)
6037 		    xmlFreeEnumeration(tree);
6038 		break;
6039 	    }
6040 
6041 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6042 	    if (def <= 0) {
6043                 if (defaultValue != NULL)
6044 		    xmlFree(defaultValue);
6045 	        if (tree != NULL)
6046 		    xmlFreeEnumeration(tree);
6047 	        break;
6048 	    }
6049 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6050 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6051 
6052 	    GROW;
6053             if (RAW != '>') {
6054 		if (SKIP_BLANKS == 0) {
6055 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6056 			"Space required after the attribute default value\n");
6057 		    if (defaultValue != NULL)
6058 			xmlFree(defaultValue);
6059 		    if (tree != NULL)
6060 			xmlFreeEnumeration(tree);
6061 		    break;
6062 		}
6063 	    }
6064 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6065 		(ctxt->sax->attributeDecl != NULL))
6066 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6067 	                        type, def, defaultValue, tree);
6068 	    else if (tree != NULL)
6069 		xmlFreeEnumeration(tree);
6070 
6071 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6072 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6073 		(def != XML_ATTRIBUTE_REQUIRED)) {
6074 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6075 	    }
6076 	    if (ctxt->sax2) {
6077 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6078 	    }
6079 	    if (defaultValue != NULL)
6080 	        xmlFree(defaultValue);
6081 	    GROW;
6082 	}
6083 	if (RAW == '>') {
6084 	    if (inputid != ctxt->input->id) {
6085 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6086                                "Attribute list declaration doesn't start and"
6087                                " stop in the same entity\n");
6088 	    }
6089 	    NEXT;
6090 	}
6091     }
6092 }
6093 
6094 /**
6095  * xmlParseElementMixedContentDecl:
6096  * @ctxt:  an XML parser context
6097  * @inputchk:  the input used for the current entity, needed for boundary checks
6098  *
6099  * parse the declaration for a Mixed Element content
6100  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6101  *
6102  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6103  *                '(' S? '#PCDATA' S? ')'
6104  *
6105  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6106  *
6107  * [ VC: No Duplicate Types ]
6108  * The same name must not appear more than once in a single
6109  * mixed-content declaration.
6110  *
6111  * returns: the list of the xmlElementContentPtr describing the element choices
6112  */
6113 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6114 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6115     xmlElementContentPtr ret = NULL, cur = NULL, n;
6116     const xmlChar *elem = NULL;
6117 
6118     GROW;
6119     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6120 	SKIP(7);
6121 	SKIP_BLANKS;
6122 	SHRINK;
6123 	if (RAW == ')') {
6124 	    if (ctxt->input->id != inputchk) {
6125 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6126                                "Element content declaration doesn't start and"
6127                                " stop in the same entity\n");
6128 	    }
6129 	    NEXT;
6130 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6131 	    if (ret == NULL)
6132 	        return(NULL);
6133 	    if (RAW == '*') {
6134 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6135 		NEXT;
6136 	    }
6137 	    return(ret);
6138 	}
6139 	if ((RAW == '(') || (RAW == '|')) {
6140 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6141 	    if (ret == NULL) return(NULL);
6142 	}
6143 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6144 	    NEXT;
6145 	    if (elem == NULL) {
6146 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6147 		if (ret == NULL) {
6148 		    xmlFreeDocElementContent(ctxt->myDoc, cur);
6149                     return(NULL);
6150                 }
6151 		ret->c1 = cur;
6152 		if (cur != NULL)
6153 		    cur->parent = ret;
6154 		cur = ret;
6155 	    } else {
6156 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6157 		if (n == NULL) {
6158 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6159                     return(NULL);
6160                 }
6161 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6162 		if (n->c1 != NULL)
6163 		    n->c1->parent = n;
6164 	        cur->c2 = n;
6165 		if (n != NULL)
6166 		    n->parent = cur;
6167 		cur = n;
6168 	    }
6169 	    SKIP_BLANKS;
6170 	    elem = xmlParseName(ctxt);
6171 	    if (elem == NULL) {
6172 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6173 			"xmlParseElementMixedContentDecl : Name expected\n");
6174 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6175 		return(NULL);
6176 	    }
6177 	    SKIP_BLANKS;
6178 	    GROW;
6179 	}
6180 	if ((RAW == ')') && (NXT(1) == '*')) {
6181 	    if (elem != NULL) {
6182 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6183 		                               XML_ELEMENT_CONTENT_ELEMENT);
6184 		if (cur->c2 != NULL)
6185 		    cur->c2->parent = cur;
6186             }
6187             if (ret != NULL)
6188                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6189 	    if (ctxt->input->id != inputchk) {
6190 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6191                                "Element content declaration doesn't start and"
6192                                " stop in the same entity\n");
6193 	    }
6194 	    SKIP(2);
6195 	} else {
6196 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6197 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6198 	    return(NULL);
6199 	}
6200 
6201     } else {
6202 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6203     }
6204     return(ret);
6205 }
6206 
6207 /**
6208  * xmlParseElementChildrenContentDeclPriv:
6209  * @ctxt:  an XML parser context
6210  * @inputchk:  the input used for the current entity, needed for boundary checks
6211  * @depth: the level of recursion
6212  *
6213  * parse the declaration for a Mixed Element content
6214  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6215  *
6216  *
6217  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6218  *
6219  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6220  *
6221  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6222  *
6223  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6224  *
6225  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6226  * TODO Parameter-entity replacement text must be properly nested
6227  *	with parenthesized groups. That is to say, if either of the
6228  *	opening or closing parentheses in a choice, seq, or Mixed
6229  *	construct is contained in the replacement text for a parameter
6230  *	entity, both must be contained in the same replacement text. For
6231  *	interoperability, if a parameter-entity reference appears in a
6232  *	choice, seq, or Mixed construct, its replacement text should not
6233  *	be empty, and neither the first nor last non-blank character of
6234  *	the replacement text should be a connector (| or ,).
6235  *
6236  * Returns the tree of xmlElementContentPtr describing the element
6237  *          hierarchy.
6238  */
6239 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6240 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6241                                        int depth) {
6242     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6243     const xmlChar *elem;
6244     xmlChar type = 0;
6245 
6246     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6247         (depth >  2048)) {
6248         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6249 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6250                           depth);
6251 	return(NULL);
6252     }
6253     SKIP_BLANKS;
6254     GROW;
6255     if (RAW == '(') {
6256 	int inputid = ctxt->input->id;
6257 
6258         /* Recurse on first child */
6259 	NEXT;
6260 	SKIP_BLANKS;
6261         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6262                                                            depth + 1);
6263         if (cur == NULL)
6264             return(NULL);
6265 	SKIP_BLANKS;
6266 	GROW;
6267     } else {
6268 	elem = xmlParseName(ctxt);
6269 	if (elem == NULL) {
6270 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6271 	    return(NULL);
6272 	}
6273         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6274 	if (cur == NULL) {
6275 	    xmlErrMemory(ctxt, NULL);
6276 	    return(NULL);
6277 	}
6278 	GROW;
6279 	if (RAW == '?') {
6280 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6281 	    NEXT;
6282 	} else if (RAW == '*') {
6283 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6284 	    NEXT;
6285 	} else if (RAW == '+') {
6286 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6287 	    NEXT;
6288 	} else {
6289 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6290 	}
6291 	GROW;
6292     }
6293     SKIP_BLANKS;
6294     SHRINK;
6295     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6296         /*
6297 	 * Each loop we parse one separator and one element.
6298 	 */
6299         if (RAW == ',') {
6300 	    if (type == 0) type = CUR;
6301 
6302 	    /*
6303 	     * Detect "Name | Name , Name" error
6304 	     */
6305 	    else if (type != CUR) {
6306 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6307 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6308 		                  type);
6309 		if ((last != NULL) && (last != ret))
6310 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6311 		if (ret != NULL)
6312 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6313 		return(NULL);
6314 	    }
6315 	    NEXT;
6316 
6317 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6318 	    if (op == NULL) {
6319 		if ((last != NULL) && (last != ret))
6320 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6321 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6322 		return(NULL);
6323 	    }
6324 	    if (last == NULL) {
6325 		op->c1 = ret;
6326 		if (ret != NULL)
6327 		    ret->parent = op;
6328 		ret = cur = op;
6329 	    } else {
6330 	        cur->c2 = op;
6331 		if (op != NULL)
6332 		    op->parent = cur;
6333 		op->c1 = last;
6334 		if (last != NULL)
6335 		    last->parent = op;
6336 		cur =op;
6337 		last = NULL;
6338 	    }
6339 	} else if (RAW == '|') {
6340 	    if (type == 0) type = CUR;
6341 
6342 	    /*
6343 	     * Detect "Name , Name | Name" error
6344 	     */
6345 	    else if (type != CUR) {
6346 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6347 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6348 				  type);
6349 		if ((last != NULL) && (last != ret))
6350 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6351 		if (ret != NULL)
6352 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6353 		return(NULL);
6354 	    }
6355 	    NEXT;
6356 
6357 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6358 	    if (op == NULL) {
6359 		if ((last != NULL) && (last != ret))
6360 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6361 		if (ret != NULL)
6362 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6363 		return(NULL);
6364 	    }
6365 	    if (last == NULL) {
6366 		op->c1 = ret;
6367 		if (ret != NULL)
6368 		    ret->parent = op;
6369 		ret = cur = op;
6370 	    } else {
6371 	        cur->c2 = op;
6372 		if (op != NULL)
6373 		    op->parent = cur;
6374 		op->c1 = last;
6375 		if (last != NULL)
6376 		    last->parent = op;
6377 		cur =op;
6378 		last = NULL;
6379 	    }
6380 	} else {
6381 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6382 	    if ((last != NULL) && (last != ret))
6383 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6384 	    if (ret != NULL)
6385 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6386 	    return(NULL);
6387 	}
6388 	GROW;
6389 	SKIP_BLANKS;
6390 	GROW;
6391 	if (RAW == '(') {
6392 	    int inputid = ctxt->input->id;
6393 	    /* Recurse on second child */
6394 	    NEXT;
6395 	    SKIP_BLANKS;
6396 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6397                                                           depth + 1);
6398             if (last == NULL) {
6399 		if (ret != NULL)
6400 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6401 		return(NULL);
6402             }
6403 	    SKIP_BLANKS;
6404 	} else {
6405 	    elem = xmlParseName(ctxt);
6406 	    if (elem == NULL) {
6407 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6408 		if (ret != NULL)
6409 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6410 		return(NULL);
6411 	    }
6412 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6413 	    if (last == NULL) {
6414 		if (ret != NULL)
6415 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6416 		return(NULL);
6417 	    }
6418 	    if (RAW == '?') {
6419 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6420 		NEXT;
6421 	    } else if (RAW == '*') {
6422 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6423 		NEXT;
6424 	    } else if (RAW == '+') {
6425 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6426 		NEXT;
6427 	    } else {
6428 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6429 	    }
6430 	}
6431 	SKIP_BLANKS;
6432 	GROW;
6433     }
6434     if ((cur != NULL) && (last != NULL)) {
6435         cur->c2 = last;
6436 	if (last != NULL)
6437 	    last->parent = cur;
6438     }
6439     if (ctxt->input->id != inputchk) {
6440 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6441                        "Element content declaration doesn't start and stop in"
6442                        " the same entity\n");
6443     }
6444     NEXT;
6445     if (RAW == '?') {
6446 	if (ret != NULL) {
6447 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6448 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6449 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6450 	    else
6451 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6452 	}
6453 	NEXT;
6454     } else if (RAW == '*') {
6455 	if (ret != NULL) {
6456 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6457 	    cur = ret;
6458 	    /*
6459 	     * Some normalization:
6460 	     * (a | b* | c?)* == (a | b | c)*
6461 	     */
6462 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6463 		if ((cur->c1 != NULL) &&
6464 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6465 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6466 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6467 		if ((cur->c2 != NULL) &&
6468 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6469 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6470 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6471 		cur = cur->c2;
6472 	    }
6473 	}
6474 	NEXT;
6475     } else if (RAW == '+') {
6476 	if (ret != NULL) {
6477 	    int found = 0;
6478 
6479 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6480 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6481 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6482 	    else
6483 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6484 	    /*
6485 	     * Some normalization:
6486 	     * (a | b*)+ == (a | b)*
6487 	     * (a | b?)+ == (a | b)*
6488 	     */
6489 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6490 		if ((cur->c1 != NULL) &&
6491 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6492 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6493 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6494 		    found = 1;
6495 		}
6496 		if ((cur->c2 != NULL) &&
6497 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6499 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6500 		    found = 1;
6501 		}
6502 		cur = cur->c2;
6503 	    }
6504 	    if (found)
6505 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6506 	}
6507 	NEXT;
6508     }
6509     return(ret);
6510 }
6511 
6512 /**
6513  * xmlParseElementChildrenContentDecl:
6514  * @ctxt:  an XML parser context
6515  * @inputchk:  the input used for the current entity, needed for boundary checks
6516  *
6517  * parse the declaration for a Mixed Element content
6518  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6519  *
6520  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6521  *
6522  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6523  *
6524  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6525  *
6526  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6527  *
6528  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6529  * TODO Parameter-entity replacement text must be properly nested
6530  *	with parenthesized groups. That is to say, if either of the
6531  *	opening or closing parentheses in a choice, seq, or Mixed
6532  *	construct is contained in the replacement text for a parameter
6533  *	entity, both must be contained in the same replacement text. For
6534  *	interoperability, if a parameter-entity reference appears in a
6535  *	choice, seq, or Mixed construct, its replacement text should not
6536  *	be empty, and neither the first nor last non-blank character of
6537  *	the replacement text should be a connector (| or ,).
6538  *
6539  * Returns the tree of xmlElementContentPtr describing the element
6540  *          hierarchy.
6541  */
6542 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6543 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6544     /* stub left for API/ABI compat */
6545     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6546 }
6547 
6548 /**
6549  * xmlParseElementContentDecl:
6550  * @ctxt:  an XML parser context
6551  * @name:  the name of the element being defined.
6552  * @result:  the Element Content pointer will be stored here if any
6553  *
6554  * parse the declaration for an Element content either Mixed or Children,
6555  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6556  *
6557  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6558  *
6559  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6560  */
6561 
6562 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6563 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6564                            xmlElementContentPtr *result) {
6565 
6566     xmlElementContentPtr tree = NULL;
6567     int inputid = ctxt->input->id;
6568     int res;
6569 
6570     *result = NULL;
6571 
6572     if (RAW != '(') {
6573 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6574 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6575 	return(-1);
6576     }
6577     NEXT;
6578     GROW;
6579     if (ctxt->instate == XML_PARSER_EOF)
6580         return(-1);
6581     SKIP_BLANKS;
6582     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6583         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6584 	res = XML_ELEMENT_TYPE_MIXED;
6585     } else {
6586         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6587 	res = XML_ELEMENT_TYPE_ELEMENT;
6588     }
6589     SKIP_BLANKS;
6590     *result = tree;
6591     return(res);
6592 }
6593 
6594 /**
6595  * xmlParseElementDecl:
6596  * @ctxt:  an XML parser context
6597  *
6598  * parse an Element declaration.
6599  *
6600  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6601  *
6602  * [ VC: Unique Element Type Declaration ]
6603  * No element type may be declared more than once
6604  *
6605  * Returns the type of the element, or -1 in case of error
6606  */
6607 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6608 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6609     const xmlChar *name;
6610     int ret = -1;
6611     xmlElementContentPtr content  = NULL;
6612 
6613     /* GROW; done in the caller */
6614     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6615 	int inputid = ctxt->input->id;
6616 
6617 	SKIP(9);
6618 	if (SKIP_BLANKS == 0) {
6619 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6620 		           "Space required after 'ELEMENT'\n");
6621 	    return(-1);
6622 	}
6623         name = xmlParseName(ctxt);
6624 	if (name == NULL) {
6625 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6626 			   "xmlParseElementDecl: no name for Element\n");
6627 	    return(-1);
6628 	}
6629 	if (SKIP_BLANKS == 0) {
6630 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6631 			   "Space required after the element name\n");
6632 	}
6633 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6634 	    SKIP(5);
6635 	    /*
6636 	     * Element must always be empty.
6637 	     */
6638 	    ret = XML_ELEMENT_TYPE_EMPTY;
6639 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6640 	           (NXT(2) == 'Y')) {
6641 	    SKIP(3);
6642 	    /*
6643 	     * Element is a generic container.
6644 	     */
6645 	    ret = XML_ELEMENT_TYPE_ANY;
6646 	} else if (RAW == '(') {
6647 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6648 	} else {
6649 	    /*
6650 	     * [ WFC: PEs in Internal Subset ] error handling.
6651 	     */
6652 	    if ((RAW == '%') && (ctxt->external == 0) &&
6653 	        (ctxt->inputNr == 1)) {
6654 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6655 	  "PEReference: forbidden within markup decl in internal subset\n");
6656 	    } else {
6657 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6658 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6659             }
6660 	    return(-1);
6661 	}
6662 
6663 	SKIP_BLANKS;
6664 
6665 	if (RAW != '>') {
6666 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6667 	    if (content != NULL) {
6668 		xmlFreeDocElementContent(ctxt->myDoc, content);
6669 	    }
6670 	} else {
6671 	    if (inputid != ctxt->input->id) {
6672 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6673                                "Element declaration doesn't start and stop in"
6674                                " the same entity\n");
6675 	    }
6676 
6677 	    NEXT;
6678 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6679 		(ctxt->sax->elementDecl != NULL)) {
6680 		if (content != NULL)
6681 		    content->parent = NULL;
6682 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6683 		                       content);
6684 		if ((content != NULL) && (content->parent == NULL)) {
6685 		    /*
6686 		     * this is a trick: if xmlAddElementDecl is called,
6687 		     * instead of copying the full tree it is plugged directly
6688 		     * if called from the parser. Avoid duplicating the
6689 		     * interfaces or change the API/ABI
6690 		     */
6691 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6692 		}
6693 	    } else if (content != NULL) {
6694 		xmlFreeDocElementContent(ctxt->myDoc, content);
6695 	    }
6696 	}
6697     }
6698     return(ret);
6699 }
6700 
6701 /**
6702  * xmlParseConditionalSections
6703  * @ctxt:  an XML parser context
6704  *
6705  * [61] conditionalSect ::= includeSect | ignoreSect
6706  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6707  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6708  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6709  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6710  */
6711 
6712 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6713 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6714     int *inputIds = NULL;
6715     size_t inputIdsSize = 0;
6716     size_t depth = 0;
6717 
6718     while (ctxt->instate != XML_PARSER_EOF) {
6719         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6720             int id = ctxt->input->id;
6721 
6722             SKIP(3);
6723             SKIP_BLANKS;
6724 
6725             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6726                 SKIP(7);
6727                 SKIP_BLANKS;
6728                 if (RAW != '[') {
6729                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6730                     xmlHaltParser(ctxt);
6731                     goto error;
6732                 }
6733                 if (ctxt->input->id != id) {
6734                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6735                                    "All markup of the conditional section is"
6736                                    " not in the same entity\n");
6737                 }
6738                 NEXT;
6739 
6740                 if (inputIdsSize <= depth) {
6741                     int *tmp;
6742 
6743                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6744                     tmp = (int *) xmlRealloc(inputIds,
6745                             inputIdsSize * sizeof(int));
6746                     if (tmp == NULL) {
6747                         xmlErrMemory(ctxt, NULL);
6748                         goto error;
6749                     }
6750                     inputIds = tmp;
6751                 }
6752                 inputIds[depth] = id;
6753                 depth++;
6754             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6755                 int state;
6756                 xmlParserInputState instate;
6757                 size_t ignoreDepth = 0;
6758 
6759                 SKIP(6);
6760                 SKIP_BLANKS;
6761                 if (RAW != '[') {
6762                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6763                     xmlHaltParser(ctxt);
6764                     goto error;
6765                 }
6766                 if (ctxt->input->id != id) {
6767                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6768                                    "All markup of the conditional section is"
6769                                    " not in the same entity\n");
6770                 }
6771                 NEXT;
6772 
6773                 /*
6774                  * Parse up to the end of the conditional section but disable
6775                  * SAX event generating DTD building in the meantime
6776                  */
6777                 state = ctxt->disableSAX;
6778                 instate = ctxt->instate;
6779                 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6780                 ctxt->instate = XML_PARSER_IGNORE;
6781 
6782                 while (RAW != 0) {
6783                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6784                         SKIP(3);
6785                         ignoreDepth++;
6786                         /* Check for integer overflow */
6787                         if (ignoreDepth == 0) {
6788                             xmlErrMemory(ctxt, NULL);
6789                             goto error;
6790                         }
6791                     } else if ((RAW == ']') && (NXT(1) == ']') &&
6792                                (NXT(2) == '>')) {
6793                         if (ignoreDepth == 0)
6794                             break;
6795                         SKIP(3);
6796                         ignoreDepth--;
6797                     } else {
6798                         NEXT;
6799                     }
6800                 }
6801 
6802                 ctxt->disableSAX = state;
6803                 ctxt->instate = instate;
6804 
6805 		if (RAW == 0) {
6806 		    xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6807                     goto error;
6808 		}
6809                 if (ctxt->input->id != id) {
6810                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6811                                    "All markup of the conditional section is"
6812                                    " not in the same entity\n");
6813                 }
6814                 SKIP(3);
6815             } else {
6816                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6817                 xmlHaltParser(ctxt);
6818                 goto error;
6819             }
6820         } else if ((depth > 0) &&
6821                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6822             depth--;
6823             if (ctxt->input->id != inputIds[depth]) {
6824                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6825                                "All markup of the conditional section is not"
6826                                " in the same entity\n");
6827             }
6828             SKIP(3);
6829         } else {
6830             const xmlChar *check = CUR_PTR;
6831             unsigned int cons = ctxt->input->consumed;
6832 
6833             xmlParseMarkupDecl(ctxt);
6834 
6835             if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6836                 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6837                 xmlHaltParser(ctxt);
6838                 goto error;
6839             }
6840         }
6841 
6842         if (depth == 0)
6843             break;
6844 
6845         SKIP_BLANKS;
6846         GROW;
6847     }
6848 
6849 error:
6850     xmlFree(inputIds);
6851 }
6852 
6853 /**
6854  * xmlParseMarkupDecl:
6855  * @ctxt:  an XML parser context
6856  *
6857  * parse Markup declarations
6858  *
6859  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6860  *                     NotationDecl | PI | Comment
6861  *
6862  * [ VC: Proper Declaration/PE Nesting ]
6863  * Parameter-entity replacement text must be properly nested with
6864  * markup declarations. That is to say, if either the first character
6865  * or the last character of a markup declaration (markupdecl above) is
6866  * contained in the replacement text for a parameter-entity reference,
6867  * both must be contained in the same replacement text.
6868  *
6869  * [ WFC: PEs in Internal Subset ]
6870  * In the internal DTD subset, parameter-entity references can occur
6871  * only where markup declarations can occur, not within markup declarations.
6872  * (This does not apply to references that occur in external parameter
6873  * entities or to the external subset.)
6874  */
6875 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6876 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6877     GROW;
6878     if (CUR == '<') {
6879         if (NXT(1) == '!') {
6880 	    switch (NXT(2)) {
6881 	        case 'E':
6882 		    if (NXT(3) == 'L')
6883 			xmlParseElementDecl(ctxt);
6884 		    else if (NXT(3) == 'N')
6885 			xmlParseEntityDecl(ctxt);
6886 		    break;
6887 	        case 'A':
6888 		    xmlParseAttributeListDecl(ctxt);
6889 		    break;
6890 	        case 'N':
6891 		    xmlParseNotationDecl(ctxt);
6892 		    break;
6893 	        case '-':
6894 		    xmlParseComment(ctxt);
6895 		    break;
6896 		default:
6897 		    /* there is an error but it will be detected later */
6898 		    break;
6899 	    }
6900 	} else if (NXT(1) == '?') {
6901 	    xmlParsePI(ctxt);
6902 	}
6903     }
6904 
6905     /*
6906      * detect requirement to exit there and act accordingly
6907      * and avoid having instate overridden later on
6908      */
6909     if (ctxt->instate == XML_PARSER_EOF)
6910         return;
6911 
6912     ctxt->instate = XML_PARSER_DTD;
6913 }
6914 
6915 /**
6916  * xmlParseTextDecl:
6917  * @ctxt:  an XML parser context
6918  *
6919  * parse an XML declaration header for external entities
6920  *
6921  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6922  */
6923 
6924 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6925 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6926     xmlChar *version;
6927     const xmlChar *encoding;
6928     int oldstate;
6929 
6930     /*
6931      * We know that '<?xml' is here.
6932      */
6933     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6934 	SKIP(5);
6935     } else {
6936 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6937 	return;
6938     }
6939 
6940     /* Avoid expansion of parameter entities when skipping blanks. */
6941     oldstate = ctxt->instate;
6942     ctxt->instate = XML_PARSER_START;
6943 
6944     if (SKIP_BLANKS == 0) {
6945 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6946 		       "Space needed after '<?xml'\n");
6947     }
6948 
6949     /*
6950      * We may have the VersionInfo here.
6951      */
6952     version = xmlParseVersionInfo(ctxt);
6953     if (version == NULL)
6954 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6955     else {
6956 	if (SKIP_BLANKS == 0) {
6957 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6958 		           "Space needed here\n");
6959 	}
6960     }
6961     ctxt->input->version = version;
6962 
6963     /*
6964      * We must have the encoding declaration
6965      */
6966     encoding = xmlParseEncodingDecl(ctxt);
6967     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6968 	/*
6969 	 * The XML REC instructs us to stop parsing right here
6970 	 */
6971         ctxt->instate = oldstate;
6972         return;
6973     }
6974     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6975 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6976 		       "Missing encoding in text declaration\n");
6977     }
6978 
6979     SKIP_BLANKS;
6980     if ((RAW == '?') && (NXT(1) == '>')) {
6981         SKIP(2);
6982     } else if (RAW == '>') {
6983         /* Deprecated old WD ... */
6984 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6985 	NEXT;
6986     } else {
6987 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6988 	MOVETO_ENDTAG(CUR_PTR);
6989 	NEXT;
6990     }
6991 
6992     ctxt->instate = oldstate;
6993 }
6994 
6995 /**
6996  * xmlParseExternalSubset:
6997  * @ctxt:  an XML parser context
6998  * @ExternalID: the external identifier
6999  * @SystemID: the system identifier (or URL)
7000  *
7001  * parse Markup declarations from an external subset
7002  *
7003  * [30] extSubset ::= textDecl? extSubsetDecl
7004  *
7005  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7006  */
7007 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7008 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7009                        const xmlChar *SystemID) {
7010     xmlDetectSAX2(ctxt);
7011     GROW;
7012 
7013     if ((ctxt->encoding == NULL) &&
7014         (ctxt->input->end - ctxt->input->cur >= 4)) {
7015         xmlChar start[4];
7016 	xmlCharEncoding enc;
7017 
7018 	start[0] = RAW;
7019 	start[1] = NXT(1);
7020 	start[2] = NXT(2);
7021 	start[3] = NXT(3);
7022 	enc = xmlDetectCharEncoding(start, 4);
7023 	if (enc != XML_CHAR_ENCODING_NONE)
7024 	    xmlSwitchEncoding(ctxt, enc);
7025     }
7026 
7027     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7028 	xmlParseTextDecl(ctxt);
7029 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7030 	    /*
7031 	     * The XML REC instructs us to stop parsing right here
7032 	     */
7033 	    xmlHaltParser(ctxt);
7034 	    return;
7035 	}
7036     }
7037     if (ctxt->myDoc == NULL) {
7038         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7039 	if (ctxt->myDoc == NULL) {
7040 	    xmlErrMemory(ctxt, "New Doc failed");
7041 	    return;
7042 	}
7043 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7044     }
7045     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7046         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7047 
7048     ctxt->instate = XML_PARSER_DTD;
7049     ctxt->external = 1;
7050     SKIP_BLANKS;
7051     while (((RAW == '<') && (NXT(1) == '?')) ||
7052            ((RAW == '<') && (NXT(1) == '!')) ||
7053 	   (RAW == '%')) {
7054 	const xmlChar *check = CUR_PTR;
7055 	unsigned int cons = ctxt->input->consumed;
7056 
7057 	GROW;
7058         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7059 	    xmlParseConditionalSections(ctxt);
7060 	} else
7061 	    xmlParseMarkupDecl(ctxt);
7062         SKIP_BLANKS;
7063 
7064 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7065 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7066 	    break;
7067 	}
7068     }
7069 
7070     if (RAW != 0) {
7071 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7072     }
7073 
7074 }
7075 
7076 /**
7077  * xmlParseReference:
7078  * @ctxt:  an XML parser context
7079  *
7080  * parse and handle entity references in content, depending on the SAX
7081  * interface, this may end-up in a call to character() if this is a
7082  * CharRef, a predefined entity, if there is no reference() callback.
7083  * or if the parser was asked to switch to that mode.
7084  *
7085  * [67] Reference ::= EntityRef | CharRef
7086  */
7087 void
xmlParseReference(xmlParserCtxtPtr ctxt)7088 xmlParseReference(xmlParserCtxtPtr ctxt) {
7089     xmlEntityPtr ent;
7090     xmlChar *val;
7091     int was_checked;
7092     xmlNodePtr list = NULL;
7093     xmlParserErrors ret = XML_ERR_OK;
7094 
7095 
7096     if (RAW != '&')
7097         return;
7098 
7099     /*
7100      * Simple case of a CharRef
7101      */
7102     if (NXT(1) == '#') {
7103 	int i = 0;
7104 	xmlChar out[16];
7105 	int hex = NXT(2);
7106 	int value = xmlParseCharRef(ctxt);
7107 
7108 	if (value == 0)
7109 	    return;
7110 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7111 	    /*
7112 	     * So we are using non-UTF-8 buffers
7113 	     * Check that the char fit on 8bits, if not
7114 	     * generate a CharRef.
7115 	     */
7116 	    if (value <= 0xFF) {
7117 		out[0] = value;
7118 		out[1] = 0;
7119 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7120 		    (!ctxt->disableSAX))
7121 		    ctxt->sax->characters(ctxt->userData, out, 1);
7122 	    } else {
7123 		if ((hex == 'x') || (hex == 'X'))
7124 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7125 		else
7126 		    snprintf((char *)out, sizeof(out), "#%d", value);
7127 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7128 		    (!ctxt->disableSAX))
7129 		    ctxt->sax->reference(ctxt->userData, out);
7130 	    }
7131 	} else {
7132 	    /*
7133 	     * Just encode the value in UTF-8
7134 	     */
7135 	    COPY_BUF(0 ,out, i, value);
7136 	    out[i] = 0;
7137 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7138 		(!ctxt->disableSAX))
7139 		ctxt->sax->characters(ctxt->userData, out, i);
7140 	}
7141 	return;
7142     }
7143 
7144     /*
7145      * We are seeing an entity reference
7146      */
7147     ent = xmlParseEntityRef(ctxt);
7148     if (ent == NULL) return;
7149     if (!ctxt->wellFormed)
7150 	return;
7151     was_checked = ent->checked;
7152 
7153     /* special case of predefined entities */
7154     if ((ent->name == NULL) ||
7155         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7156 	val = ent->content;
7157 	if (val == NULL) return;
7158 	/*
7159 	 * inline the entity.
7160 	 */
7161 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7162 	    (!ctxt->disableSAX))
7163 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7164 	return;
7165     }
7166 
7167     /*
7168      * The first reference to the entity trigger a parsing phase
7169      * where the ent->children is filled with the result from
7170      * the parsing.
7171      * Note: external parsed entities will not be loaded, it is not
7172      * required for a non-validating parser, unless the parsing option
7173      * of validating, or substituting entities were given. Doing so is
7174      * far more secure as the parser will only process data coming from
7175      * the document entity by default.
7176      */
7177     if (((ent->checked == 0) ||
7178          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7179         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7180          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7181 	unsigned long oldnbent = ctxt->nbentities, diff;
7182 
7183 	/*
7184 	 * This is a bit hackish but this seems the best
7185 	 * way to make sure both SAX and DOM entity support
7186 	 * behaves okay.
7187 	 */
7188 	void *user_data;
7189 	if (ctxt->userData == ctxt)
7190 	    user_data = NULL;
7191 	else
7192 	    user_data = ctxt->userData;
7193 
7194 	/*
7195 	 * Check that this entity is well formed
7196 	 * 4.3.2: An internal general parsed entity is well-formed
7197 	 * if its replacement text matches the production labeled
7198 	 * content.
7199 	 */
7200 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7201 	    ctxt->depth++;
7202 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7203 	                                              user_data, &list);
7204 	    ctxt->depth--;
7205 
7206 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7207 	    ctxt->depth++;
7208 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7209 	                                   user_data, ctxt->depth, ent->URI,
7210 					   ent->ExternalID, &list);
7211 	    ctxt->depth--;
7212 	} else {
7213 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7214 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7215 			 "invalid entity type found\n", NULL);
7216 	}
7217 
7218 	/*
7219 	 * Store the number of entities needing parsing for this entity
7220 	 * content and do checkings
7221 	 */
7222         diff = ctxt->nbentities - oldnbent + 1;
7223         if (diff > INT_MAX / 2)
7224             diff = INT_MAX / 2;
7225         ent->checked = diff * 2;
7226 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7227 	    ent->checked |= 1;
7228 	if (ret == XML_ERR_ENTITY_LOOP) {
7229 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7230             xmlHaltParser(ctxt);
7231 	    xmlFreeNodeList(list);
7232 	    return;
7233 	}
7234 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7235 	    xmlFreeNodeList(list);
7236 	    return;
7237 	}
7238 
7239 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7240 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7241 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7242 		(ent->children == NULL)) {
7243 		ent->children = list;
7244                 /*
7245                  * Prune it directly in the generated document
7246                  * except for single text nodes.
7247                  */
7248                 if ((ctxt->replaceEntities == 0) ||
7249                     (ctxt->parseMode == XML_PARSE_READER) ||
7250                     ((list->type == XML_TEXT_NODE) &&
7251                      (list->next == NULL))) {
7252                     ent->owner = 1;
7253                     while (list != NULL) {
7254                         list->parent = (xmlNodePtr) ent;
7255                         xmlSetTreeDoc(list, ent->doc);
7256                         if (list->next == NULL)
7257                             ent->last = list;
7258                         list = list->next;
7259                     }
7260                     list = NULL;
7261                 } else {
7262                     ent->owner = 0;
7263                     while (list != NULL) {
7264                         list->parent = (xmlNodePtr) ctxt->node;
7265                         list->doc = ctxt->myDoc;
7266                         if (list->next == NULL)
7267                             ent->last = list;
7268                         list = list->next;
7269                     }
7270                     list = ent->children;
7271 #ifdef LIBXML_LEGACY_ENABLED
7272                     if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7273                         xmlAddEntityReference(ent, list, NULL);
7274 #endif /* LIBXML_LEGACY_ENABLED */
7275                 }
7276 	    } else {
7277 		xmlFreeNodeList(list);
7278 		list = NULL;
7279 	    }
7280 	} else if ((ret != XML_ERR_OK) &&
7281 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7282 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7283 		     "Entity '%s' failed to parse\n", ent->name);
7284             if (ent->content != NULL)
7285                 ent->content[0] = 0;
7286 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7287 	} else if (list != NULL) {
7288 	    xmlFreeNodeList(list);
7289 	    list = NULL;
7290 	}
7291 	if (ent->checked == 0)
7292 	    ent->checked = 2;
7293 
7294         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7295         was_checked = 0;
7296     } else if (ent->checked != 1) {
7297 	ctxt->nbentities += ent->checked / 2;
7298     }
7299 
7300     /*
7301      * Now that the entity content has been gathered
7302      * provide it to the application, this can take different forms based
7303      * on the parsing modes.
7304      */
7305     if (ent->children == NULL) {
7306 	/*
7307 	 * Probably running in SAX mode and the callbacks don't
7308 	 * build the entity content. So unless we already went
7309 	 * though parsing for first checking go though the entity
7310 	 * content to generate callbacks associated to the entity
7311 	 */
7312 	if (was_checked != 0) {
7313 	    void *user_data;
7314 	    /*
7315 	     * This is a bit hackish but this seems the best
7316 	     * way to make sure both SAX and DOM entity support
7317 	     * behaves okay.
7318 	     */
7319 	    if (ctxt->userData == ctxt)
7320 		user_data = NULL;
7321 	    else
7322 		user_data = ctxt->userData;
7323 
7324 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7325 		ctxt->depth++;
7326 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7327 				   ent->content, user_data, NULL);
7328 		ctxt->depth--;
7329 	    } else if (ent->etype ==
7330 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7331 		ctxt->depth++;
7332 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7333 			   ctxt->sax, user_data, ctxt->depth,
7334 			   ent->URI, ent->ExternalID, NULL);
7335 		ctxt->depth--;
7336 	    } else {
7337 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7338 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7339 			     "invalid entity type found\n", NULL);
7340 	    }
7341 	    if (ret == XML_ERR_ENTITY_LOOP) {
7342 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7343 		return;
7344 	    }
7345 	}
7346 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7347 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7348 	    /*
7349 	     * Entity reference callback comes second, it's somewhat
7350 	     * superfluous but a compatibility to historical behaviour
7351 	     */
7352 	    ctxt->sax->reference(ctxt->userData, ent->name);
7353 	}
7354 	return;
7355     }
7356 
7357     /*
7358      * If we didn't get any children for the entity being built
7359      */
7360     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7361 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7362 	/*
7363 	 * Create a node.
7364 	 */
7365 	ctxt->sax->reference(ctxt->userData, ent->name);
7366 	return;
7367     }
7368 
7369     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7370 	/*
7371 	 * There is a problem on the handling of _private for entities
7372 	 * (bug 155816): Should we copy the content of the field from
7373 	 * the entity (possibly overwriting some value set by the user
7374 	 * when a copy is created), should we leave it alone, or should
7375 	 * we try to take care of different situations?  The problem
7376 	 * is exacerbated by the usage of this field by the xmlReader.
7377 	 * To fix this bug, we look at _private on the created node
7378 	 * and, if it's NULL, we copy in whatever was in the entity.
7379 	 * If it's not NULL we leave it alone.  This is somewhat of a
7380 	 * hack - maybe we should have further tests to determine
7381 	 * what to do.
7382 	 */
7383 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7384 	    /*
7385 	     * Seems we are generating the DOM content, do
7386 	     * a simple tree copy for all references except the first
7387 	     * In the first occurrence list contains the replacement.
7388 	     */
7389 	    if (((list == NULL) && (ent->owner == 0)) ||
7390 		(ctxt->parseMode == XML_PARSE_READER)) {
7391 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7392 
7393 		/*
7394 		 * We are copying here, make sure there is no abuse
7395 		 */
7396 		ctxt->sizeentcopy += ent->length + 5;
7397 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7398 		    return;
7399 
7400 		/*
7401 		 * when operating on a reader, the entities definitions
7402 		 * are always owning the entities subtree.
7403 		if (ctxt->parseMode == XML_PARSE_READER)
7404 		    ent->owner = 1;
7405 		 */
7406 
7407 		cur = ent->children;
7408 		while (cur != NULL) {
7409 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7410 		    if (nw != NULL) {
7411 			if (nw->_private == NULL)
7412 			    nw->_private = cur->_private;
7413 			if (firstChild == NULL){
7414 			    firstChild = nw;
7415 			}
7416 			nw = xmlAddChild(ctxt->node, nw);
7417 		    }
7418 		    if (cur == ent->last) {
7419 			/*
7420 			 * needed to detect some strange empty
7421 			 * node cases in the reader tests
7422 			 */
7423 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7424 			    (nw != NULL) &&
7425 			    (nw->type == XML_ELEMENT_NODE) &&
7426 			    (nw->children == NULL))
7427 			    nw->extra = 1;
7428 
7429 			break;
7430 		    }
7431 		    cur = cur->next;
7432 		}
7433 #ifdef LIBXML_LEGACY_ENABLED
7434 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7435 		  xmlAddEntityReference(ent, firstChild, nw);
7436 #endif /* LIBXML_LEGACY_ENABLED */
7437 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7438 		xmlNodePtr nw = NULL, cur, next, last,
7439 			   firstChild = NULL;
7440 
7441 		/*
7442 		 * We are copying here, make sure there is no abuse
7443 		 */
7444 		ctxt->sizeentcopy += ent->length + 5;
7445 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7446 		    return;
7447 
7448 		/*
7449 		 * Copy the entity child list and make it the new
7450 		 * entity child list. The goal is to make sure any
7451 		 * ID or REF referenced will be the one from the
7452 		 * document content and not the entity copy.
7453 		 */
7454 		cur = ent->children;
7455 		ent->children = NULL;
7456 		last = ent->last;
7457 		ent->last = NULL;
7458 		while (cur != NULL) {
7459 		    next = cur->next;
7460 		    cur->next = NULL;
7461 		    cur->parent = NULL;
7462 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7463 		    if (nw != NULL) {
7464 			if (nw->_private == NULL)
7465 			    nw->_private = cur->_private;
7466 			if (firstChild == NULL){
7467 			    firstChild = cur;
7468 			}
7469 			xmlAddChild((xmlNodePtr) ent, nw);
7470 			xmlAddChild(ctxt->node, cur);
7471 		    }
7472 		    if (cur == last)
7473 			break;
7474 		    cur = next;
7475 		}
7476 		if (ent->owner == 0)
7477 		    ent->owner = 1;
7478 #ifdef LIBXML_LEGACY_ENABLED
7479 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7480 		  xmlAddEntityReference(ent, firstChild, nw);
7481 #endif /* LIBXML_LEGACY_ENABLED */
7482 	    } else {
7483 		const xmlChar *nbktext;
7484 
7485 		/*
7486 		 * the name change is to avoid coalescing of the
7487 		 * node with a possible previous text one which
7488 		 * would make ent->children a dangling pointer
7489 		 */
7490 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7491 					-1);
7492 		if (ent->children->type == XML_TEXT_NODE)
7493 		    ent->children->name = nbktext;
7494 		if ((ent->last != ent->children) &&
7495 		    (ent->last->type == XML_TEXT_NODE))
7496 		    ent->last->name = nbktext;
7497 		xmlAddChildList(ctxt->node, ent->children);
7498 	    }
7499 
7500 	    /*
7501 	     * This is to avoid a nasty side effect, see
7502 	     * characters() in SAX.c
7503 	     */
7504 	    ctxt->nodemem = 0;
7505 	    ctxt->nodelen = 0;
7506 	    return;
7507 	}
7508     }
7509 }
7510 
7511 /**
7512  * xmlParseEntityRef:
7513  * @ctxt:  an XML parser context
7514  *
7515  * parse ENTITY references declarations
7516  *
7517  * [68] EntityRef ::= '&' Name ';'
7518  *
7519  * [ WFC: Entity Declared ]
7520  * In a document without any DTD, a document with only an internal DTD
7521  * subset which contains no parameter entity references, or a document
7522  * with "standalone='yes'", the Name given in the entity reference
7523  * must match that in an entity declaration, except that well-formed
7524  * documents need not declare any of the following entities: amp, lt,
7525  * gt, apos, quot.  The declaration of a parameter entity must precede
7526  * any reference to it.  Similarly, the declaration of a general entity
7527  * must precede any reference to it which appears in a default value in an
7528  * attribute-list declaration. Note that if entities are declared in the
7529  * external subset or in external parameter entities, a non-validating
7530  * processor is not obligated to read and process their declarations;
7531  * for such documents, the rule that an entity must be declared is a
7532  * well-formedness constraint only if standalone='yes'.
7533  *
7534  * [ WFC: Parsed Entity ]
7535  * An entity reference must not contain the name of an unparsed entity
7536  *
7537  * Returns the xmlEntityPtr if found, or NULL otherwise.
7538  */
7539 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7540 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7541     const xmlChar *name;
7542     xmlEntityPtr ent = NULL;
7543 
7544     GROW;
7545     if (ctxt->instate == XML_PARSER_EOF)
7546         return(NULL);
7547 
7548     if (RAW != '&')
7549         return(NULL);
7550     NEXT;
7551     name = xmlParseName(ctxt);
7552     if (name == NULL) {
7553 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7554 		       "xmlParseEntityRef: no name\n");
7555         return(NULL);
7556     }
7557     if (RAW != ';') {
7558 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7559 	return(NULL);
7560     }
7561     NEXT;
7562 
7563     /*
7564      * Predefined entities override any extra definition
7565      */
7566     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7567         ent = xmlGetPredefinedEntity(name);
7568         if (ent != NULL)
7569             return(ent);
7570     }
7571 
7572     /*
7573      * Increase the number of entity references parsed
7574      */
7575     ctxt->nbentities++;
7576 
7577     /*
7578      * Ask first SAX for entity resolution, otherwise try the
7579      * entities which may have stored in the parser context.
7580      */
7581     if (ctxt->sax != NULL) {
7582 	if (ctxt->sax->getEntity != NULL)
7583 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7584 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7585 	    (ctxt->options & XML_PARSE_OLDSAX))
7586 	    ent = xmlGetPredefinedEntity(name);
7587 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7588 	    (ctxt->userData==ctxt)) {
7589 	    ent = xmlSAX2GetEntity(ctxt, name);
7590 	}
7591     }
7592     if (ctxt->instate == XML_PARSER_EOF)
7593 	return(NULL);
7594     /*
7595      * [ WFC: Entity Declared ]
7596      * In a document without any DTD, a document with only an
7597      * internal DTD subset which contains no parameter entity
7598      * references, or a document with "standalone='yes'", the
7599      * Name given in the entity reference must match that in an
7600      * entity declaration, except that well-formed documents
7601      * need not declare any of the following entities: amp, lt,
7602      * gt, apos, quot.
7603      * The declaration of a parameter entity must precede any
7604      * reference to it.
7605      * Similarly, the declaration of a general entity must
7606      * precede any reference to it which appears in a default
7607      * value in an attribute-list declaration. Note that if
7608      * entities are declared in the external subset or in
7609      * external parameter entities, a non-validating processor
7610      * is not obligated to read and process their declarations;
7611      * for such documents, the rule that an entity must be
7612      * declared is a well-formedness constraint only if
7613      * standalone='yes'.
7614      */
7615     if (ent == NULL) {
7616 	if ((ctxt->standalone == 1) ||
7617 	    ((ctxt->hasExternalSubset == 0) &&
7618 	     (ctxt->hasPErefs == 0))) {
7619 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7620 		     "Entity '%s' not defined\n", name);
7621 	} else {
7622 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7623 		     "Entity '%s' not defined\n", name);
7624 	    if ((ctxt->inSubset == 0) &&
7625 		(ctxt->sax != NULL) &&
7626 		(ctxt->sax->reference != NULL)) {
7627 		ctxt->sax->reference(ctxt->userData, name);
7628 	    }
7629 	}
7630 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7631 	ctxt->valid = 0;
7632     }
7633 
7634     /*
7635      * [ WFC: Parsed Entity ]
7636      * An entity reference must not contain the name of an
7637      * unparsed entity
7638      */
7639     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7640 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7641 		 "Entity reference to unparsed entity %s\n", name);
7642     }
7643 
7644     /*
7645      * [ WFC: No External Entity References ]
7646      * Attribute values cannot contain direct or indirect
7647      * entity references to external entities.
7648      */
7649     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7650 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7651 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7652 	     "Attribute references external entity '%s'\n", name);
7653     }
7654     /*
7655      * [ WFC: No < in Attribute Values ]
7656      * The replacement text of any entity referred to directly or
7657      * indirectly in an attribute value (other than "&lt;") must
7658      * not contain a <.
7659      */
7660     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7661 	     (ent != NULL) &&
7662 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7663 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7664 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7665 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7666 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7667         }
7668     }
7669 
7670     /*
7671      * Internal check, no parameter entities here ...
7672      */
7673     else {
7674 	switch (ent->etype) {
7675 	    case XML_INTERNAL_PARAMETER_ENTITY:
7676 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7677 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7678 	     "Attempt to reference the parameter entity '%s'\n",
7679 			      name);
7680 	    break;
7681 	    default:
7682 	    break;
7683 	}
7684     }
7685 
7686     /*
7687      * [ WFC: No Recursion ]
7688      * A parsed entity must not contain a recursive reference
7689      * to itself, either directly or indirectly.
7690      * Done somewhere else
7691      */
7692     return(ent);
7693 }
7694 
7695 /**
7696  * xmlParseStringEntityRef:
7697  * @ctxt:  an XML parser context
7698  * @str:  a pointer to an index in the string
7699  *
7700  * parse ENTITY references declarations, but this version parses it from
7701  * a string value.
7702  *
7703  * [68] EntityRef ::= '&' Name ';'
7704  *
7705  * [ WFC: Entity Declared ]
7706  * In a document without any DTD, a document with only an internal DTD
7707  * subset which contains no parameter entity references, or a document
7708  * with "standalone='yes'", the Name given in the entity reference
7709  * must match that in an entity declaration, except that well-formed
7710  * documents need not declare any of the following entities: amp, lt,
7711  * gt, apos, quot.  The declaration of a parameter entity must precede
7712  * any reference to it.  Similarly, the declaration of a general entity
7713  * must precede any reference to it which appears in a default value in an
7714  * attribute-list declaration. Note that if entities are declared in the
7715  * external subset or in external parameter entities, a non-validating
7716  * processor is not obligated to read and process their declarations;
7717  * for such documents, the rule that an entity must be declared is a
7718  * well-formedness constraint only if standalone='yes'.
7719  *
7720  * [ WFC: Parsed Entity ]
7721  * An entity reference must not contain the name of an unparsed entity
7722  *
7723  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7724  * is updated to the current location in the string.
7725  */
7726 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7727 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7728     xmlChar *name;
7729     const xmlChar *ptr;
7730     xmlChar cur;
7731     xmlEntityPtr ent = NULL;
7732 
7733     if ((str == NULL) || (*str == NULL))
7734         return(NULL);
7735     ptr = *str;
7736     cur = *ptr;
7737     if (cur != '&')
7738 	return(NULL);
7739 
7740     ptr++;
7741     name = xmlParseStringName(ctxt, &ptr);
7742     if (name == NULL) {
7743 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7744 		       "xmlParseStringEntityRef: no name\n");
7745 	*str = ptr;
7746 	return(NULL);
7747     }
7748     if (*ptr != ';') {
7749 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7750         xmlFree(name);
7751 	*str = ptr;
7752 	return(NULL);
7753     }
7754     ptr++;
7755 
7756 
7757     /*
7758      * Predefined entities override any extra definition
7759      */
7760     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7761         ent = xmlGetPredefinedEntity(name);
7762         if (ent != NULL) {
7763             xmlFree(name);
7764             *str = ptr;
7765             return(ent);
7766         }
7767     }
7768 
7769     /*
7770      * Increase the number of entity references parsed
7771      */
7772     ctxt->nbentities++;
7773 
7774     /*
7775      * Ask first SAX for entity resolution, otherwise try the
7776      * entities which may have stored in the parser context.
7777      */
7778     if (ctxt->sax != NULL) {
7779 	if (ctxt->sax->getEntity != NULL)
7780 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7781 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7782 	    ent = xmlGetPredefinedEntity(name);
7783 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7784 	    ent = xmlSAX2GetEntity(ctxt, name);
7785 	}
7786     }
7787     if (ctxt->instate == XML_PARSER_EOF) {
7788 	xmlFree(name);
7789 	return(NULL);
7790     }
7791 
7792     /*
7793      * [ WFC: Entity Declared ]
7794      * In a document without any DTD, a document with only an
7795      * internal DTD subset which contains no parameter entity
7796      * references, or a document with "standalone='yes'", the
7797      * Name given in the entity reference must match that in an
7798      * entity declaration, except that well-formed documents
7799      * need not declare any of the following entities: amp, lt,
7800      * gt, apos, quot.
7801      * The declaration of a parameter entity must precede any
7802      * reference to it.
7803      * Similarly, the declaration of a general entity must
7804      * precede any reference to it which appears in a default
7805      * value in an attribute-list declaration. Note that if
7806      * entities are declared in the external subset or in
7807      * external parameter entities, a non-validating processor
7808      * is not obligated to read and process their declarations;
7809      * for such documents, the rule that an entity must be
7810      * declared is a well-formedness constraint only if
7811      * standalone='yes'.
7812      */
7813     if (ent == NULL) {
7814 	if ((ctxt->standalone == 1) ||
7815 	    ((ctxt->hasExternalSubset == 0) &&
7816 	     (ctxt->hasPErefs == 0))) {
7817 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7818 		     "Entity '%s' not defined\n", name);
7819 	} else {
7820 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7821 			  "Entity '%s' not defined\n",
7822 			  name);
7823 	}
7824 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7825 	/* TODO ? check regressions ctxt->valid = 0; */
7826     }
7827 
7828     /*
7829      * [ WFC: Parsed Entity ]
7830      * An entity reference must not contain the name of an
7831      * unparsed entity
7832      */
7833     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7834 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7835 		 "Entity reference to unparsed entity %s\n", name);
7836     }
7837 
7838     /*
7839      * [ WFC: No External Entity References ]
7840      * Attribute values cannot contain direct or indirect
7841      * entity references to external entities.
7842      */
7843     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7844 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7845 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7846 	 "Attribute references external entity '%s'\n", name);
7847     }
7848     /*
7849      * [ WFC: No < in Attribute Values ]
7850      * The replacement text of any entity referred to directly or
7851      * indirectly in an attribute value (other than "&lt;") must
7852      * not contain a <.
7853      */
7854     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7855 	     (ent != NULL) && (ent->content != NULL) &&
7856 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7857 	     (xmlStrchr(ent->content, '<'))) {
7858 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7859      "'<' in entity '%s' is not allowed in attributes values\n",
7860 			  name);
7861     }
7862 
7863     /*
7864      * Internal check, no parameter entities here ...
7865      */
7866     else {
7867 	switch (ent->etype) {
7868 	    case XML_INTERNAL_PARAMETER_ENTITY:
7869 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7870 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7871 	     "Attempt to reference the parameter entity '%s'\n",
7872 				  name);
7873 	    break;
7874 	    default:
7875 	    break;
7876 	}
7877     }
7878 
7879     /*
7880      * [ WFC: No Recursion ]
7881      * A parsed entity must not contain a recursive reference
7882      * to itself, either directly or indirectly.
7883      * Done somewhere else
7884      */
7885 
7886     xmlFree(name);
7887     *str = ptr;
7888     return(ent);
7889 }
7890 
7891 /**
7892  * xmlParsePEReference:
7893  * @ctxt:  an XML parser context
7894  *
7895  * parse PEReference declarations
7896  * The entity content is handled directly by pushing it's content as
7897  * a new input stream.
7898  *
7899  * [69] PEReference ::= '%' Name ';'
7900  *
7901  * [ WFC: No Recursion ]
7902  * A parsed entity must not contain a recursive
7903  * reference to itself, either directly or indirectly.
7904  *
7905  * [ WFC: Entity Declared ]
7906  * In a document without any DTD, a document with only an internal DTD
7907  * subset which contains no parameter entity references, or a document
7908  * with "standalone='yes'", ...  ... The declaration of a parameter
7909  * entity must precede any reference to it...
7910  *
7911  * [ VC: Entity Declared ]
7912  * In a document with an external subset or external parameter entities
7913  * with "standalone='no'", ...  ... The declaration of a parameter entity
7914  * must precede any reference to it...
7915  *
7916  * [ WFC: In DTD ]
7917  * Parameter-entity references may only appear in the DTD.
7918  * NOTE: misleading but this is handled.
7919  */
7920 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7921 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7922 {
7923     const xmlChar *name;
7924     xmlEntityPtr entity = NULL;
7925     xmlParserInputPtr input;
7926 
7927     if (RAW != '%')
7928         return;
7929     NEXT;
7930     name = xmlParseName(ctxt);
7931     if (name == NULL) {
7932 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7933 	return;
7934     }
7935     if (xmlParserDebugEntities)
7936 	xmlGenericError(xmlGenericErrorContext,
7937 		"PEReference: %s\n", name);
7938     if (RAW != ';') {
7939 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7940         return;
7941     }
7942 
7943     NEXT;
7944 
7945     /*
7946      * Increase the number of entity references parsed
7947      */
7948     ctxt->nbentities++;
7949 
7950     /*
7951      * Request the entity from SAX
7952      */
7953     if ((ctxt->sax != NULL) &&
7954 	(ctxt->sax->getParameterEntity != NULL))
7955 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7956     if (ctxt->instate == XML_PARSER_EOF)
7957 	return;
7958     if (entity == NULL) {
7959 	/*
7960 	 * [ WFC: Entity Declared ]
7961 	 * In a document without any DTD, a document with only an
7962 	 * internal DTD subset which contains no parameter entity
7963 	 * references, or a document with "standalone='yes'", ...
7964 	 * ... The declaration of a parameter entity must precede
7965 	 * any reference to it...
7966 	 */
7967 	if ((ctxt->standalone == 1) ||
7968 	    ((ctxt->hasExternalSubset == 0) &&
7969 	     (ctxt->hasPErefs == 0))) {
7970 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7971 			      "PEReference: %%%s; not found\n",
7972 			      name);
7973 	} else {
7974 	    /*
7975 	     * [ VC: Entity Declared ]
7976 	     * In a document with an external subset or external
7977 	     * parameter entities with "standalone='no'", ...
7978 	     * ... The declaration of a parameter entity must
7979 	     * precede any reference to it...
7980 	     */
7981             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7982                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7983                                  "PEReference: %%%s; not found\n",
7984                                  name, NULL);
7985             } else
7986                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7987                               "PEReference: %%%s; not found\n",
7988                               name, NULL);
7989             ctxt->valid = 0;
7990 	}
7991 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
7992     } else {
7993 	/*
7994 	 * Internal checking in case the entity quest barfed
7995 	 */
7996 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7997 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7998 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7999 		  "Internal: %%%s; is not a parameter entity\n",
8000 			  name, NULL);
8001 	} else {
8002             xmlChar start[4];
8003             xmlCharEncoding enc;
8004 
8005 	    if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8006 	        return;
8007 
8008 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8009 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8010 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8011 		((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8012 		((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8013 		(ctxt->replaceEntities == 0) &&
8014 		(ctxt->validate == 0))
8015 		return;
8016 
8017 	    input = xmlNewEntityInputStream(ctxt, entity);
8018 	    if (xmlPushInput(ctxt, input) < 0) {
8019                 xmlFreeInputStream(input);
8020 		return;
8021             }
8022 
8023 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8024                 /*
8025                  * Get the 4 first bytes and decode the charset
8026                  * if enc != XML_CHAR_ENCODING_NONE
8027                  * plug some encoding conversion routines.
8028                  * Note that, since we may have some non-UTF8
8029                  * encoding (like UTF16, bug 135229), the 'length'
8030                  * is not known, but we can calculate based upon
8031                  * the amount of data in the buffer.
8032                  */
8033                 GROW
8034                 if (ctxt->instate == XML_PARSER_EOF)
8035                     return;
8036                 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8037                     start[0] = RAW;
8038                     start[1] = NXT(1);
8039                     start[2] = NXT(2);
8040                     start[3] = NXT(3);
8041                     enc = xmlDetectCharEncoding(start, 4);
8042                     if (enc != XML_CHAR_ENCODING_NONE) {
8043                         xmlSwitchEncoding(ctxt, enc);
8044                     }
8045                 }
8046 
8047                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8048                     (IS_BLANK_CH(NXT(5)))) {
8049                     xmlParseTextDecl(ctxt);
8050                 }
8051             }
8052 	}
8053     }
8054     ctxt->hasPErefs = 1;
8055 }
8056 
8057 /**
8058  * xmlLoadEntityContent:
8059  * @ctxt:  an XML parser context
8060  * @entity: an unloaded system entity
8061  *
8062  * Load the original content of the given system entity from the
8063  * ExternalID/SystemID given. This is to be used for Included in Literal
8064  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8065  *
8066  * Returns 0 in case of success and -1 in case of failure
8067  */
8068 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8069 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8070     xmlParserInputPtr input;
8071     xmlBufferPtr buf;
8072     int l, c;
8073     int count = 0;
8074 
8075     if ((ctxt == NULL) || (entity == NULL) ||
8076         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8077 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8078 	(entity->content != NULL)) {
8079 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8080 	            "xmlLoadEntityContent parameter error");
8081         return(-1);
8082     }
8083 
8084     if (xmlParserDebugEntities)
8085 	xmlGenericError(xmlGenericErrorContext,
8086 		"Reading %s entity content input\n", entity->name);
8087 
8088     buf = xmlBufferCreate();
8089     if (buf == NULL) {
8090 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8091 	            "xmlLoadEntityContent parameter error");
8092         return(-1);
8093     }
8094 
8095     input = xmlNewEntityInputStream(ctxt, entity);
8096     if (input == NULL) {
8097 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8098 	            "xmlLoadEntityContent input error");
8099 	xmlBufferFree(buf);
8100         return(-1);
8101     }
8102 
8103     /*
8104      * Push the entity as the current input, read char by char
8105      * saving to the buffer until the end of the entity or an error
8106      */
8107     if (xmlPushInput(ctxt, input) < 0) {
8108         xmlBufferFree(buf);
8109 	return(-1);
8110     }
8111 
8112     GROW;
8113     c = CUR_CHAR(l);
8114     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8115            (IS_CHAR(c))) {
8116         xmlBufferAdd(buf, ctxt->input->cur, l);
8117 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8118 	    count = 0;
8119 	    GROW;
8120             if (ctxt->instate == XML_PARSER_EOF) {
8121                 xmlBufferFree(buf);
8122                 return(-1);
8123             }
8124 	}
8125 	NEXTL(l);
8126 	c = CUR_CHAR(l);
8127 	if (c == 0) {
8128 	    count = 0;
8129 	    GROW;
8130             if (ctxt->instate == XML_PARSER_EOF) {
8131                 xmlBufferFree(buf);
8132                 return(-1);
8133             }
8134 	    c = CUR_CHAR(l);
8135 	}
8136     }
8137 
8138     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8139         xmlPopInput(ctxt);
8140     } else if (!IS_CHAR(c)) {
8141         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8142                           "xmlLoadEntityContent: invalid char value %d\n",
8143 	                  c);
8144 	xmlBufferFree(buf);
8145 	return(-1);
8146     }
8147     entity->content = buf->content;
8148     buf->content = NULL;
8149     xmlBufferFree(buf);
8150 
8151     return(0);
8152 }
8153 
8154 /**
8155  * xmlParseStringPEReference:
8156  * @ctxt:  an XML parser context
8157  * @str:  a pointer to an index in the string
8158  *
8159  * parse PEReference declarations
8160  *
8161  * [69] PEReference ::= '%' Name ';'
8162  *
8163  * [ WFC: No Recursion ]
8164  * A parsed entity must not contain a recursive
8165  * reference to itself, either directly or indirectly.
8166  *
8167  * [ WFC: Entity Declared ]
8168  * In a document without any DTD, a document with only an internal DTD
8169  * subset which contains no parameter entity references, or a document
8170  * with "standalone='yes'", ...  ... The declaration of a parameter
8171  * entity must precede any reference to it...
8172  *
8173  * [ VC: Entity Declared ]
8174  * In a document with an external subset or external parameter entities
8175  * with "standalone='no'", ...  ... The declaration of a parameter entity
8176  * must precede any reference to it...
8177  *
8178  * [ WFC: In DTD ]
8179  * Parameter-entity references may only appear in the DTD.
8180  * NOTE: misleading but this is handled.
8181  *
8182  * Returns the string of the entity content.
8183  *         str is updated to the current value of the index
8184  */
8185 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8186 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8187     const xmlChar *ptr;
8188     xmlChar cur;
8189     xmlChar *name;
8190     xmlEntityPtr entity = NULL;
8191 
8192     if ((str == NULL) || (*str == NULL)) return(NULL);
8193     ptr = *str;
8194     cur = *ptr;
8195     if (cur != '%')
8196         return(NULL);
8197     ptr++;
8198     name = xmlParseStringName(ctxt, &ptr);
8199     if (name == NULL) {
8200 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8201 		       "xmlParseStringPEReference: no name\n");
8202 	*str = ptr;
8203 	return(NULL);
8204     }
8205     cur = *ptr;
8206     if (cur != ';') {
8207 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8208 	xmlFree(name);
8209 	*str = ptr;
8210 	return(NULL);
8211     }
8212     ptr++;
8213 
8214     /*
8215      * Increase the number of entity references parsed
8216      */
8217     ctxt->nbentities++;
8218 
8219     /*
8220      * Request the entity from SAX
8221      */
8222     if ((ctxt->sax != NULL) &&
8223 	(ctxt->sax->getParameterEntity != NULL))
8224 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8225     if (ctxt->instate == XML_PARSER_EOF) {
8226 	xmlFree(name);
8227 	*str = ptr;
8228 	return(NULL);
8229     }
8230     if (entity == NULL) {
8231 	/*
8232 	 * [ WFC: Entity Declared ]
8233 	 * In a document without any DTD, a document with only an
8234 	 * internal DTD subset which contains no parameter entity
8235 	 * references, or a document with "standalone='yes'", ...
8236 	 * ... The declaration of a parameter entity must precede
8237 	 * any reference to it...
8238 	 */
8239 	if ((ctxt->standalone == 1) ||
8240 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8241 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8242 		 "PEReference: %%%s; not found\n", name);
8243 	} else {
8244 	    /*
8245 	     * [ VC: Entity Declared ]
8246 	     * In a document with an external subset or external
8247 	     * parameter entities with "standalone='no'", ...
8248 	     * ... The declaration of a parameter entity must
8249 	     * precede any reference to it...
8250 	     */
8251 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8252 			  "PEReference: %%%s; not found\n",
8253 			  name, NULL);
8254 	    ctxt->valid = 0;
8255 	}
8256 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8257     } else {
8258 	/*
8259 	 * Internal checking in case the entity quest barfed
8260 	 */
8261 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8262 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8263 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8264 			  "%%%s; is not a parameter entity\n",
8265 			  name, NULL);
8266 	}
8267     }
8268     ctxt->hasPErefs = 1;
8269     xmlFree(name);
8270     *str = ptr;
8271     return(entity);
8272 }
8273 
8274 /**
8275  * xmlParseDocTypeDecl:
8276  * @ctxt:  an XML parser context
8277  *
8278  * parse a DOCTYPE declaration
8279  *
8280  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8281  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8282  *
8283  * [ VC: Root Element Type ]
8284  * The Name in the document type declaration must match the element
8285  * type of the root element.
8286  */
8287 
8288 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8289 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8290     const xmlChar *name = NULL;
8291     xmlChar *ExternalID = NULL;
8292     xmlChar *URI = NULL;
8293 
8294     /*
8295      * We know that '<!DOCTYPE' has been detected.
8296      */
8297     SKIP(9);
8298 
8299     SKIP_BLANKS;
8300 
8301     /*
8302      * Parse the DOCTYPE name.
8303      */
8304     name = xmlParseName(ctxt);
8305     if (name == NULL) {
8306 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8307 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8308     }
8309     ctxt->intSubName = name;
8310 
8311     SKIP_BLANKS;
8312 
8313     /*
8314      * Check for SystemID and ExternalID
8315      */
8316     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8317 
8318     if ((URI != NULL) || (ExternalID != NULL)) {
8319         ctxt->hasExternalSubset = 1;
8320     }
8321     ctxt->extSubURI = URI;
8322     ctxt->extSubSystem = ExternalID;
8323 
8324     SKIP_BLANKS;
8325 
8326     /*
8327      * Create and update the internal subset.
8328      */
8329     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8330 	(!ctxt->disableSAX))
8331 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8332     if (ctxt->instate == XML_PARSER_EOF)
8333 	return;
8334 
8335     /*
8336      * Is there any internal subset declarations ?
8337      * they are handled separately in xmlParseInternalSubset()
8338      */
8339     if (RAW == '[')
8340 	return;
8341 
8342     /*
8343      * We should be at the end of the DOCTYPE declaration.
8344      */
8345     if (RAW != '>') {
8346 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8347     }
8348     NEXT;
8349 }
8350 
8351 /**
8352  * xmlParseInternalSubset:
8353  * @ctxt:  an XML parser context
8354  *
8355  * parse the internal subset declaration
8356  *
8357  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8358  */
8359 
8360 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8361 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8362     /*
8363      * Is there any DTD definition ?
8364      */
8365     if (RAW == '[') {
8366         int baseInputNr = ctxt->inputNr;
8367         ctxt->instate = XML_PARSER_DTD;
8368         NEXT;
8369 	/*
8370 	 * Parse the succession of Markup declarations and
8371 	 * PEReferences.
8372 	 * Subsequence (markupdecl | PEReference | S)*
8373 	 */
8374 	while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8375                (ctxt->instate != XML_PARSER_EOF)) {
8376 	    const xmlChar *check = CUR_PTR;
8377 	    unsigned int cons = ctxt->input->consumed;
8378 
8379 	    SKIP_BLANKS;
8380 	    xmlParseMarkupDecl(ctxt);
8381 	    xmlParsePEReference(ctxt);
8382 
8383             /*
8384              * Conditional sections are allowed from external entities included
8385              * by PE References in the internal subset.
8386              */
8387             if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8388                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8389                 xmlParseConditionalSections(ctxt);
8390             }
8391 
8392 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8393 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8394 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8395                 if (ctxt->inputNr > baseInputNr)
8396                     xmlPopInput(ctxt);
8397                 else
8398 		    break;
8399 	    }
8400 	}
8401 	if (RAW == ']') {
8402 	    NEXT;
8403 	    SKIP_BLANKS;
8404 	}
8405     }
8406 
8407     /*
8408      * We should be at the end of the DOCTYPE declaration.
8409      */
8410     if (RAW != '>') {
8411 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8412 	return;
8413     }
8414     NEXT;
8415 }
8416 
8417 #ifdef LIBXML_SAX1_ENABLED
8418 /**
8419  * xmlParseAttribute:
8420  * @ctxt:  an XML parser context
8421  * @value:  a xmlChar ** used to store the value of the attribute
8422  *
8423  * parse an attribute
8424  *
8425  * [41] Attribute ::= Name Eq AttValue
8426  *
8427  * [ WFC: No External Entity References ]
8428  * Attribute values cannot contain direct or indirect entity references
8429  * to external entities.
8430  *
8431  * [ WFC: No < in Attribute Values ]
8432  * The replacement text of any entity referred to directly or indirectly in
8433  * an attribute value (other than "&lt;") must not contain a <.
8434  *
8435  * [ VC: Attribute Value Type ]
8436  * The attribute must have been declared; the value must be of the type
8437  * declared for it.
8438  *
8439  * [25] Eq ::= S? '=' S?
8440  *
8441  * With namespace:
8442  *
8443  * [NS 11] Attribute ::= QName Eq AttValue
8444  *
8445  * Also the case QName == xmlns:??? is handled independently as a namespace
8446  * definition.
8447  *
8448  * Returns the attribute name, and the value in *value.
8449  */
8450 
8451 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8452 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8453     const xmlChar *name;
8454     xmlChar *val;
8455 
8456     *value = NULL;
8457     GROW;
8458     name = xmlParseName(ctxt);
8459     if (name == NULL) {
8460 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8461 	               "error parsing attribute name\n");
8462         return(NULL);
8463     }
8464 
8465     /*
8466      * read the value
8467      */
8468     SKIP_BLANKS;
8469     if (RAW == '=') {
8470         NEXT;
8471 	SKIP_BLANKS;
8472 	val = xmlParseAttValue(ctxt);
8473 	ctxt->instate = XML_PARSER_CONTENT;
8474     } else {
8475 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8476 	       "Specification mandates value for attribute %s\n", name);
8477 	return(NULL);
8478     }
8479 
8480     /*
8481      * Check that xml:lang conforms to the specification
8482      * No more registered as an error, just generate a warning now
8483      * since this was deprecated in XML second edition
8484      */
8485     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8486 	if (!xmlCheckLanguageID(val)) {
8487 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8488 		          "Malformed value for xml:lang : %s\n",
8489 			  val, NULL);
8490 	}
8491     }
8492 
8493     /*
8494      * Check that xml:space conforms to the specification
8495      */
8496     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8497 	if (xmlStrEqual(val, BAD_CAST "default"))
8498 	    *(ctxt->space) = 0;
8499 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8500 	    *(ctxt->space) = 1;
8501 	else {
8502 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8503 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8504                                  val, NULL);
8505 	}
8506     }
8507 
8508     *value = val;
8509     return(name);
8510 }
8511 
8512 /**
8513  * xmlParseStartTag:
8514  * @ctxt:  an XML parser context
8515  *
8516  * parse a start of tag either for rule element or
8517  * EmptyElement. In both case we don't parse the tag closing chars.
8518  *
8519  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8520  *
8521  * [ WFC: Unique Att Spec ]
8522  * No attribute name may appear more than once in the same start-tag or
8523  * empty-element tag.
8524  *
8525  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8526  *
8527  * [ WFC: Unique Att Spec ]
8528  * No attribute name may appear more than once in the same start-tag or
8529  * empty-element tag.
8530  *
8531  * With namespace:
8532  *
8533  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8534  *
8535  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8536  *
8537  * Returns the element name parsed
8538  */
8539 
8540 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8541 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8542     const xmlChar *name;
8543     const xmlChar *attname;
8544     xmlChar *attvalue;
8545     const xmlChar **atts = ctxt->atts;
8546     int nbatts = 0;
8547     int maxatts = ctxt->maxatts;
8548     int i;
8549 
8550     if (RAW != '<') return(NULL);
8551     NEXT1;
8552 
8553     name = xmlParseName(ctxt);
8554     if (name == NULL) {
8555 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8556 	     "xmlParseStartTag: invalid element name\n");
8557         return(NULL);
8558     }
8559 
8560     /*
8561      * Now parse the attributes, it ends up with the ending
8562      *
8563      * (S Attribute)* S?
8564      */
8565     SKIP_BLANKS;
8566     GROW;
8567 
8568     while (((RAW != '>') &&
8569 	   ((RAW != '/') || (NXT(1) != '>')) &&
8570 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8571 	const xmlChar *q = CUR_PTR;
8572 	unsigned int cons = ctxt->input->consumed;
8573 
8574 	attname = xmlParseAttribute(ctxt, &attvalue);
8575         if ((attname != NULL) && (attvalue != NULL)) {
8576 	    /*
8577 	     * [ WFC: Unique Att Spec ]
8578 	     * No attribute name may appear more than once in the same
8579 	     * start-tag or empty-element tag.
8580 	     */
8581 	    for (i = 0; i < nbatts;i += 2) {
8582 	        if (xmlStrEqual(atts[i], attname)) {
8583 		    xmlErrAttributeDup(ctxt, NULL, attname);
8584 		    xmlFree(attvalue);
8585 		    goto failed;
8586 		}
8587 	    }
8588 	    /*
8589 	     * Add the pair to atts
8590 	     */
8591 	    if (atts == NULL) {
8592 	        maxatts = 22; /* allow for 10 attrs by default */
8593 	        atts = (const xmlChar **)
8594 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8595 		if (atts == NULL) {
8596 		    xmlErrMemory(ctxt, NULL);
8597 		    if (attvalue != NULL)
8598 			xmlFree(attvalue);
8599 		    goto failed;
8600 		}
8601 		ctxt->atts = atts;
8602 		ctxt->maxatts = maxatts;
8603 	    } else if (nbatts + 4 > maxatts) {
8604 	        const xmlChar **n;
8605 
8606 	        maxatts *= 2;
8607 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8608 					     maxatts * sizeof(const xmlChar *));
8609 		if (n == NULL) {
8610 		    xmlErrMemory(ctxt, NULL);
8611 		    if (attvalue != NULL)
8612 			xmlFree(attvalue);
8613 		    goto failed;
8614 		}
8615 		atts = n;
8616 		ctxt->atts = atts;
8617 		ctxt->maxatts = maxatts;
8618 	    }
8619 	    atts[nbatts++] = attname;
8620 	    atts[nbatts++] = attvalue;
8621 	    atts[nbatts] = NULL;
8622 	    atts[nbatts + 1] = NULL;
8623 	} else {
8624 	    if (attvalue != NULL)
8625 		xmlFree(attvalue);
8626 	}
8627 
8628 failed:
8629 
8630 	GROW
8631 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8632 	    break;
8633 	if (SKIP_BLANKS == 0) {
8634 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8635 			   "attributes construct error\n");
8636 	}
8637         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8638             (attname == NULL) && (attvalue == NULL)) {
8639 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8640 			   "xmlParseStartTag: problem parsing attributes\n");
8641 	    break;
8642 	}
8643 	SHRINK;
8644         GROW;
8645     }
8646 
8647     /*
8648      * SAX: Start of Element !
8649      */
8650     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8651 	(!ctxt->disableSAX)) {
8652 	if (nbatts > 0)
8653 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8654 	else
8655 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8656     }
8657 
8658     if (atts != NULL) {
8659         /* Free only the content strings */
8660         for (i = 1;i < nbatts;i+=2)
8661 	    if (atts[i] != NULL)
8662 	       xmlFree((xmlChar *) atts[i]);
8663     }
8664     return(name);
8665 }
8666 
8667 /**
8668  * xmlParseEndTag1:
8669  * @ctxt:  an XML parser context
8670  * @line:  line of the start tag
8671  * @nsNr:  number of namespaces on the start tag
8672  *
8673  * parse an end of tag
8674  *
8675  * [42] ETag ::= '</' Name S? '>'
8676  *
8677  * With namespace
8678  *
8679  * [NS 9] ETag ::= '</' QName S? '>'
8680  */
8681 
8682 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8683 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8684     const xmlChar *name;
8685 
8686     GROW;
8687     if ((RAW != '<') || (NXT(1) != '/')) {
8688 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8689 		       "xmlParseEndTag: '</' not found\n");
8690 	return;
8691     }
8692     SKIP(2);
8693 
8694     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8695 
8696     /*
8697      * We should definitely be at the ending "S? '>'" part
8698      */
8699     GROW;
8700     SKIP_BLANKS;
8701     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8702 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8703     } else
8704 	NEXT1;
8705 
8706     /*
8707      * [ WFC: Element Type Match ]
8708      * The Name in an element's end-tag must match the element type in the
8709      * start-tag.
8710      *
8711      */
8712     if (name != (xmlChar*)1) {
8713         if (name == NULL) name = BAD_CAST "unparsable";
8714         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8715 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8716 		                ctxt->name, line, name);
8717     }
8718 
8719     /*
8720      * SAX: End of Tag
8721      */
8722     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8723 	(!ctxt->disableSAX))
8724         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8725 
8726     namePop(ctxt);
8727     spacePop(ctxt);
8728     return;
8729 }
8730 
8731 /**
8732  * xmlParseEndTag:
8733  * @ctxt:  an XML parser context
8734  *
8735  * parse an end of tag
8736  *
8737  * [42] ETag ::= '</' Name S? '>'
8738  *
8739  * With namespace
8740  *
8741  * [NS 9] ETag ::= '</' QName S? '>'
8742  */
8743 
8744 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8745 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8746     xmlParseEndTag1(ctxt, 0);
8747 }
8748 #endif /* LIBXML_SAX1_ENABLED */
8749 
8750 /************************************************************************
8751  *									*
8752  *		      SAX 2 specific operations				*
8753  *									*
8754  ************************************************************************/
8755 
8756 /*
8757  * xmlGetNamespace:
8758  * @ctxt:  an XML parser context
8759  * @prefix:  the prefix to lookup
8760  *
8761  * Lookup the namespace name for the @prefix (which ca be NULL)
8762  * The prefix must come from the @ctxt->dict dictionary
8763  *
8764  * Returns the namespace name or NULL if not bound
8765  */
8766 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8767 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8768     int i;
8769 
8770     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8771     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8772         if (ctxt->nsTab[i] == prefix) {
8773 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8774 	        return(NULL);
8775 	    return(ctxt->nsTab[i + 1]);
8776 	}
8777     return(NULL);
8778 }
8779 
8780 /**
8781  * xmlParseQName:
8782  * @ctxt:  an XML parser context
8783  * @prefix:  pointer to store the prefix part
8784  *
8785  * parse an XML Namespace QName
8786  *
8787  * [6]  QName  ::= (Prefix ':')? LocalPart
8788  * [7]  Prefix  ::= NCName
8789  * [8]  LocalPart  ::= NCName
8790  *
8791  * Returns the Name parsed or NULL
8792  */
8793 
8794 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8795 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8796     const xmlChar *l, *p;
8797 
8798     GROW;
8799 
8800     l = xmlParseNCName(ctxt);
8801     if (l == NULL) {
8802         if (CUR == ':') {
8803 	    l = xmlParseName(ctxt);
8804 	    if (l != NULL) {
8805 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8806 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8807 		*prefix = NULL;
8808 		return(l);
8809 	    }
8810 	}
8811         return(NULL);
8812     }
8813     if (CUR == ':') {
8814         NEXT;
8815 	p = l;
8816 	l = xmlParseNCName(ctxt);
8817 	if (l == NULL) {
8818 	    xmlChar *tmp;
8819 
8820             if (ctxt->instate == XML_PARSER_EOF)
8821                 return(NULL);
8822             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8823 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8824 	    l = xmlParseNmtoken(ctxt);
8825 	    if (l == NULL) {
8826                 if (ctxt->instate == XML_PARSER_EOF)
8827                     return(NULL);
8828 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8829             } else {
8830 		tmp = xmlBuildQName(l, p, NULL, 0);
8831 		xmlFree((char *)l);
8832 	    }
8833 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8834 	    if (tmp != NULL) xmlFree(tmp);
8835 	    *prefix = NULL;
8836 	    return(p);
8837 	}
8838 	if (CUR == ':') {
8839 	    xmlChar *tmp;
8840 
8841             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8842 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8843 	    NEXT;
8844 	    tmp = (xmlChar *) xmlParseName(ctxt);
8845 	    if (tmp != NULL) {
8846 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8847 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8848 		if (tmp != NULL) xmlFree(tmp);
8849 		*prefix = p;
8850 		return(l);
8851 	    }
8852             if (ctxt->instate == XML_PARSER_EOF)
8853                 return(NULL);
8854 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8855 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8856 	    if (tmp != NULL) xmlFree(tmp);
8857 	    *prefix = p;
8858 	    return(l);
8859 	}
8860 	*prefix = p;
8861     } else
8862         *prefix = NULL;
8863     return(l);
8864 }
8865 
8866 /**
8867  * xmlParseQNameAndCompare:
8868  * @ctxt:  an XML parser context
8869  * @name:  the localname
8870  * @prefix:  the prefix, if any.
8871  *
8872  * parse an XML name and compares for match
8873  * (specialized for endtag parsing)
8874  *
8875  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8876  * and the name for mismatch
8877  */
8878 
8879 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8880 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8881                         xmlChar const *prefix) {
8882     const xmlChar *cmp;
8883     const xmlChar *in;
8884     const xmlChar *ret;
8885     const xmlChar *prefix2;
8886 
8887     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8888 
8889     GROW;
8890     in = ctxt->input->cur;
8891 
8892     cmp = prefix;
8893     while (*in != 0 && *in == *cmp) {
8894 	++in;
8895 	++cmp;
8896     }
8897     if ((*cmp == 0) && (*in == ':')) {
8898         in++;
8899 	cmp = name;
8900 	while (*in != 0 && *in == *cmp) {
8901 	    ++in;
8902 	    ++cmp;
8903 	}
8904 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8905 	    /* success */
8906             ctxt->input->col += in - ctxt->input->cur;
8907 	    ctxt->input->cur = in;
8908 	    return((const xmlChar*) 1);
8909 	}
8910     }
8911     /*
8912      * all strings coms from the dictionary, equality can be done directly
8913      */
8914     ret = xmlParseQName (ctxt, &prefix2);
8915     if ((ret == name) && (prefix == prefix2))
8916 	return((const xmlChar*) 1);
8917     return ret;
8918 }
8919 
8920 /**
8921  * xmlParseAttValueInternal:
8922  * @ctxt:  an XML parser context
8923  * @len:  attribute len result
8924  * @alloc:  whether the attribute was reallocated as a new string
8925  * @normalize:  if 1 then further non-CDATA normalization must be done
8926  *
8927  * parse a value for an attribute.
8928  * NOTE: if no normalization is needed, the routine will return pointers
8929  *       directly from the data buffer.
8930  *
8931  * 3.3.3 Attribute-Value Normalization:
8932  * Before the value of an attribute is passed to the application or
8933  * checked for validity, the XML processor must normalize it as follows:
8934  * - a character reference is processed by appending the referenced
8935  *   character to the attribute value
8936  * - an entity reference is processed by recursively processing the
8937  *   replacement text of the entity
8938  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8939  *   appending #x20 to the normalized value, except that only a single
8940  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8941  *   parsed entity or the literal entity value of an internal parsed entity
8942  * - other characters are processed by appending them to the normalized value
8943  * If the declared value is not CDATA, then the XML processor must further
8944  * process the normalized attribute value by discarding any leading and
8945  * trailing space (#x20) characters, and by replacing sequences of space
8946  * (#x20) characters by a single space (#x20) character.
8947  * All attributes for which no declaration has been read should be treated
8948  * by a non-validating parser as if declared CDATA.
8949  *
8950  * Returns the AttValue parsed or NULL. The value has to be freed by the
8951  *     caller if it was copied, this can be detected by val[*len] == 0.
8952  */
8953 
8954 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8955     const xmlChar *oldbase = ctxt->input->base;\
8956     GROW;\
8957     if (ctxt->instate == XML_PARSER_EOF)\
8958         return(NULL);\
8959     if (oldbase != ctxt->input->base) {\
8960         ptrdiff_t delta = ctxt->input->base - oldbase;\
8961         start = start + delta;\
8962         in = in + delta;\
8963     }\
8964     end = ctxt->input->end;
8965 
8966 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8967 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8968                          int normalize)
8969 {
8970     xmlChar limit = 0;
8971     const xmlChar *in = NULL, *start, *end, *last;
8972     xmlChar *ret = NULL;
8973     int line, col;
8974     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8975                     XML_MAX_HUGE_LENGTH :
8976                     XML_MAX_TEXT_LENGTH;
8977 
8978     GROW;
8979     in = (xmlChar *) CUR_PTR;
8980     line = ctxt->input->line;
8981     col = ctxt->input->col;
8982     if (*in != '"' && *in != '\'') {
8983         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8984         return (NULL);
8985     }
8986     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8987 
8988     /*
8989      * try to handle in this routine the most common case where no
8990      * allocation of a new string is required and where content is
8991      * pure ASCII.
8992      */
8993     limit = *in++;
8994     col++;
8995     end = ctxt->input->end;
8996     start = in;
8997     if (in >= end) {
8998         GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8999     }
9000     if (normalize) {
9001         /*
9002 	 * Skip any leading spaces
9003 	 */
9004 	while ((in < end) && (*in != limit) &&
9005 	       ((*in == 0x20) || (*in == 0x9) ||
9006 	        (*in == 0xA) || (*in == 0xD))) {
9007 	    if (*in == 0xA) {
9008 	        line++; col = 1;
9009 	    } else {
9010 	        col++;
9011 	    }
9012 	    in++;
9013 	    start = in;
9014 	    if (in >= end) {
9015                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9016                 if ((in - start) > maxLength) {
9017                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9018                                    "AttValue length too long\n");
9019                     return(NULL);
9020                 }
9021 	    }
9022 	}
9023 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9024 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9025 	    col++;
9026 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
9027 	    if (in >= end) {
9028                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9029                 if ((in - start) > maxLength) {
9030                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9031                                    "AttValue length too long\n");
9032                     return(NULL);
9033                 }
9034 	    }
9035 	}
9036 	last = in;
9037 	/*
9038 	 * skip the trailing blanks
9039 	 */
9040 	while ((last[-1] == 0x20) && (last > start)) last--;
9041 	while ((in < end) && (*in != limit) &&
9042 	       ((*in == 0x20) || (*in == 0x9) ||
9043 	        (*in == 0xA) || (*in == 0xD))) {
9044 	    if (*in == 0xA) {
9045 	        line++, col = 1;
9046 	    } else {
9047 	        col++;
9048 	    }
9049 	    in++;
9050 	    if (in >= end) {
9051 		const xmlChar *oldbase = ctxt->input->base;
9052 		GROW;
9053                 if (ctxt->instate == XML_PARSER_EOF)
9054                     return(NULL);
9055 		if (oldbase != ctxt->input->base) {
9056 		    ptrdiff_t delta = ctxt->input->base - oldbase;
9057 		    start = start + delta;
9058 		    in = in + delta;
9059 		    last = last + delta;
9060 		}
9061 		end = ctxt->input->end;
9062                 if ((in - start) > maxLength) {
9063                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9064                                    "AttValue length too long\n");
9065                     return(NULL);
9066                 }
9067 	    }
9068 	}
9069         if ((in - start) > maxLength) {
9070             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9071                            "AttValue length too long\n");
9072             return(NULL);
9073         }
9074 	if (*in != limit) goto need_complex;
9075     } else {
9076 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9077 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9078 	    in++;
9079 	    col++;
9080 	    if (in >= end) {
9081                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9082                 if ((in - start) > maxLength) {
9083                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9084                                    "AttValue length too long\n");
9085                     return(NULL);
9086                 }
9087 	    }
9088 	}
9089 	last = in;
9090         if ((in - start) > maxLength) {
9091             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9092                            "AttValue length too long\n");
9093             return(NULL);
9094         }
9095 	if (*in != limit) goto need_complex;
9096     }
9097     in++;
9098     col++;
9099     if (len != NULL) {
9100         *len = last - start;
9101         ret = (xmlChar *) start;
9102     } else {
9103         if (alloc) *alloc = 1;
9104         ret = xmlStrndup(start, last - start);
9105     }
9106     CUR_PTR = in;
9107     ctxt->input->line = line;
9108     ctxt->input->col = col;
9109     if (alloc) *alloc = 0;
9110     return ret;
9111 need_complex:
9112     if (alloc) *alloc = 1;
9113     return xmlParseAttValueComplex(ctxt, len, normalize);
9114 }
9115 
9116 /**
9117  * xmlParseAttribute2:
9118  * @ctxt:  an XML parser context
9119  * @pref:  the element prefix
9120  * @elem:  the element name
9121  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9122  * @value:  a xmlChar ** used to store the value of the attribute
9123  * @len:  an int * to save the length of the attribute
9124  * @alloc:  an int * to indicate if the attribute was allocated
9125  *
9126  * parse an attribute in the new SAX2 framework.
9127  *
9128  * Returns the attribute name, and the value in *value, .
9129  */
9130 
9131 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9132 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9133                    const xmlChar * pref, const xmlChar * elem,
9134                    const xmlChar ** prefix, xmlChar ** value,
9135                    int *len, int *alloc)
9136 {
9137     const xmlChar *name;
9138     xmlChar *val, *internal_val = NULL;
9139     int normalize = 0;
9140 
9141     *value = NULL;
9142     GROW;
9143     name = xmlParseQName(ctxt, prefix);
9144     if (name == NULL) {
9145         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9146                        "error parsing attribute name\n");
9147         return (NULL);
9148     }
9149 
9150     /*
9151      * get the type if needed
9152      */
9153     if (ctxt->attsSpecial != NULL) {
9154         int type;
9155 
9156         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9157                                                  pref, elem, *prefix, name);
9158         if (type != 0)
9159             normalize = 1;
9160     }
9161 
9162     /*
9163      * read the value
9164      */
9165     SKIP_BLANKS;
9166     if (RAW == '=') {
9167         NEXT;
9168         SKIP_BLANKS;
9169         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9170 	if (normalize) {
9171 	    /*
9172 	     * Sometimes a second normalisation pass for spaces is needed
9173 	     * but that only happens if charrefs or entities references
9174 	     * have been used in the attribute value, i.e. the attribute
9175 	     * value have been extracted in an allocated string already.
9176 	     */
9177 	    if (*alloc) {
9178 	        const xmlChar *val2;
9179 
9180 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9181 		if ((val2 != NULL) && (val2 != val)) {
9182 		    xmlFree(val);
9183 		    val = (xmlChar *) val2;
9184 		}
9185 	    }
9186 	}
9187         ctxt->instate = XML_PARSER_CONTENT;
9188     } else {
9189         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9190                           "Specification mandates value for attribute %s\n",
9191                           name);
9192         return (NULL);
9193     }
9194 
9195     if (*prefix == ctxt->str_xml) {
9196         /*
9197          * Check that xml:lang conforms to the specification
9198          * No more registered as an error, just generate a warning now
9199          * since this was deprecated in XML second edition
9200          */
9201         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9202             internal_val = xmlStrndup(val, *len);
9203             if (!xmlCheckLanguageID(internal_val)) {
9204                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9205                               "Malformed value for xml:lang : %s\n",
9206                               internal_val, NULL);
9207             }
9208         }
9209 
9210         /*
9211          * Check that xml:space conforms to the specification
9212          */
9213         if (xmlStrEqual(name, BAD_CAST "space")) {
9214             internal_val = xmlStrndup(val, *len);
9215             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9216                 *(ctxt->space) = 0;
9217             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9218                 *(ctxt->space) = 1;
9219             else {
9220                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9221                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9222                               internal_val, NULL);
9223             }
9224         }
9225         if (internal_val) {
9226             xmlFree(internal_val);
9227         }
9228     }
9229 
9230     *value = val;
9231     return (name);
9232 }
9233 /**
9234  * xmlParseStartTag2:
9235  * @ctxt:  an XML parser context
9236  *
9237  * parse a start of tag either for rule element or
9238  * EmptyElement. In both case we don't parse the tag closing chars.
9239  * This routine is called when running SAX2 parsing
9240  *
9241  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9242  *
9243  * [ WFC: Unique Att Spec ]
9244  * No attribute name may appear more than once in the same start-tag or
9245  * empty-element tag.
9246  *
9247  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9248  *
9249  * [ WFC: Unique Att Spec ]
9250  * No attribute name may appear more than once in the same start-tag or
9251  * empty-element tag.
9252  *
9253  * With namespace:
9254  *
9255  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9256  *
9257  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9258  *
9259  * Returns the element name parsed
9260  */
9261 
9262 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9263 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9264                   const xmlChar **URI, int *tlen) {
9265     const xmlChar *localname;
9266     const xmlChar *prefix;
9267     const xmlChar *attname;
9268     const xmlChar *aprefix;
9269     const xmlChar *nsname;
9270     xmlChar *attvalue;
9271     const xmlChar **atts = ctxt->atts;
9272     int maxatts = ctxt->maxatts;
9273     int nratts, nbatts, nbdef, inputid;
9274     int i, j, nbNs, attval;
9275     unsigned long cur;
9276     int nsNr = ctxt->nsNr;
9277 
9278     if (RAW != '<') return(NULL);
9279     NEXT1;
9280 
9281     /*
9282      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9283      *       point since the attribute values may be stored as pointers to
9284      *       the buffer and calling SHRINK would destroy them !
9285      *       The Shrinking is only possible once the full set of attribute
9286      *       callbacks have been done.
9287      */
9288     SHRINK;
9289     cur = ctxt->input->cur - ctxt->input->base;
9290     inputid = ctxt->input->id;
9291     nbatts = 0;
9292     nratts = 0;
9293     nbdef = 0;
9294     nbNs = 0;
9295     attval = 0;
9296     /* Forget any namespaces added during an earlier parse of this element. */
9297     ctxt->nsNr = nsNr;
9298 
9299     localname = xmlParseQName(ctxt, &prefix);
9300     if (localname == NULL) {
9301 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9302 		       "StartTag: invalid element name\n");
9303         return(NULL);
9304     }
9305     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9306 
9307     /*
9308      * Now parse the attributes, it ends up with the ending
9309      *
9310      * (S Attribute)* S?
9311      */
9312     SKIP_BLANKS;
9313     GROW;
9314 
9315     while (((RAW != '>') &&
9316 	   ((RAW != '/') || (NXT(1) != '>')) &&
9317 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9318 	const xmlChar *q = CUR_PTR;
9319 	unsigned int cons = ctxt->input->consumed;
9320 	int len = -1, alloc = 0;
9321 
9322 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9323 	                             &aprefix, &attvalue, &len, &alloc);
9324         if ((attname == NULL) || (attvalue == NULL))
9325             goto next_attr;
9326 	if (len < 0) len = xmlStrlen(attvalue);
9327 
9328         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9329             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9330             xmlURIPtr uri;
9331 
9332             if (URL == NULL) {
9333                 xmlErrMemory(ctxt, "dictionary allocation failure");
9334                 if ((attvalue != NULL) && (alloc != 0))
9335                     xmlFree(attvalue);
9336                 localname = NULL;
9337                 goto done;
9338             }
9339             if (*URL != 0) {
9340                 uri = xmlParseURI((const char *) URL);
9341                 if (uri == NULL) {
9342                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9343                              "xmlns: '%s' is not a valid URI\n",
9344                                        URL, NULL, NULL);
9345                 } else {
9346                     if (uri->scheme == NULL) {
9347                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9348                                   "xmlns: URI %s is not absolute\n",
9349                                   URL, NULL, NULL);
9350                     }
9351                     xmlFreeURI(uri);
9352                 }
9353                 if (URL == ctxt->str_xml_ns) {
9354                     if (attname != ctxt->str_xml) {
9355                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9356                      "xml namespace URI cannot be the default namespace\n",
9357                                  NULL, NULL, NULL);
9358                     }
9359                     goto next_attr;
9360                 }
9361                 if ((len == 29) &&
9362                     (xmlStrEqual(URL,
9363                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9364                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9365                          "reuse of the xmlns namespace name is forbidden\n",
9366                              NULL, NULL, NULL);
9367                     goto next_attr;
9368                 }
9369             }
9370             /*
9371              * check that it's not a defined namespace
9372              */
9373             for (j = 1;j <= nbNs;j++)
9374                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9375                     break;
9376             if (j <= nbNs)
9377                 xmlErrAttributeDup(ctxt, NULL, attname);
9378             else
9379                 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9380 
9381         } else if (aprefix == ctxt->str_xmlns) {
9382             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9383             xmlURIPtr uri;
9384 
9385             if (attname == ctxt->str_xml) {
9386                 if (URL != ctxt->str_xml_ns) {
9387                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9388                              "xml namespace prefix mapped to wrong URI\n",
9389                              NULL, NULL, NULL);
9390                 }
9391                 /*
9392                  * Do not keep a namespace definition node
9393                  */
9394                 goto next_attr;
9395             }
9396             if (URL == ctxt->str_xml_ns) {
9397                 if (attname != ctxt->str_xml) {
9398                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399                              "xml namespace URI mapped to wrong prefix\n",
9400                              NULL, NULL, NULL);
9401                 }
9402                 goto next_attr;
9403             }
9404             if (attname == ctxt->str_xmlns) {
9405                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9406                          "redefinition of the xmlns prefix is forbidden\n",
9407                          NULL, NULL, NULL);
9408                 goto next_attr;
9409             }
9410             if ((len == 29) &&
9411                 (xmlStrEqual(URL,
9412                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9413                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9414                          "reuse of the xmlns namespace name is forbidden\n",
9415                          NULL, NULL, NULL);
9416                 goto next_attr;
9417             }
9418             if ((URL == NULL) || (URL[0] == 0)) {
9419                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9420                          "xmlns:%s: Empty XML namespace is not allowed\n",
9421                               attname, NULL, NULL);
9422                 goto next_attr;
9423             } else {
9424                 uri = xmlParseURI((const char *) URL);
9425                 if (uri == NULL) {
9426                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9427                          "xmlns:%s: '%s' is not a valid URI\n",
9428                                        attname, URL, NULL);
9429                 } else {
9430                     if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9431                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9432                                   "xmlns:%s: URI %s is not absolute\n",
9433                                   attname, URL, NULL);
9434                     }
9435                     xmlFreeURI(uri);
9436                 }
9437             }
9438 
9439             /*
9440              * check that it's not a defined namespace
9441              */
9442             for (j = 1;j <= nbNs;j++)
9443                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9444                     break;
9445             if (j <= nbNs)
9446                 xmlErrAttributeDup(ctxt, aprefix, attname);
9447             else
9448                 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9449 
9450         } else {
9451             /*
9452              * Add the pair to atts
9453              */
9454             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9455                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9456                     goto next_attr;
9457                 }
9458                 maxatts = ctxt->maxatts;
9459                 atts = ctxt->atts;
9460             }
9461             ctxt->attallocs[nratts++] = alloc;
9462             atts[nbatts++] = attname;
9463             atts[nbatts++] = aprefix;
9464             /*
9465              * The namespace URI field is used temporarily to point at the
9466              * base of the current input buffer for non-alloced attributes.
9467              * When the input buffer is reallocated, all the pointers become
9468              * invalid, but they can be reconstructed later.
9469              */
9470             if (alloc)
9471                 atts[nbatts++] = NULL;
9472             else
9473                 atts[nbatts++] = ctxt->input->base;
9474             atts[nbatts++] = attvalue;
9475             attvalue += len;
9476             atts[nbatts++] = attvalue;
9477             /*
9478              * tag if some deallocation is needed
9479              */
9480             if (alloc != 0) attval = 1;
9481             attvalue = NULL; /* moved into atts */
9482         }
9483 
9484 next_attr:
9485         if ((attvalue != NULL) && (alloc != 0)) {
9486             xmlFree(attvalue);
9487             attvalue = NULL;
9488         }
9489 
9490 	GROW
9491         if (ctxt->instate == XML_PARSER_EOF)
9492             break;
9493 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9494 	    break;
9495 	if (SKIP_BLANKS == 0) {
9496 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9497 			   "attributes construct error\n");
9498 	    break;
9499 	}
9500         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9501             (attname == NULL) && (attvalue == NULL)) {
9502 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9503 	         "xmlParseStartTag: problem parsing attributes\n");
9504 	    break;
9505 	}
9506         GROW;
9507     }
9508 
9509     if (ctxt->input->id != inputid) {
9510         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9511                     "Unexpected change of input\n");
9512         localname = NULL;
9513         goto done;
9514     }
9515 
9516     /* Reconstruct attribute value pointers. */
9517     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9518         if (atts[i+2] != NULL) {
9519             /*
9520              * Arithmetic on dangling pointers is technically undefined
9521              * behavior, but well...
9522              */
9523             ptrdiff_t offset = ctxt->input->base - atts[i+2];
9524             atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9525             atts[i+3] += offset;  /* value */
9526             atts[i+4] += offset;  /* valuend */
9527         }
9528     }
9529 
9530     /*
9531      * The attributes defaulting
9532      */
9533     if (ctxt->attsDefault != NULL) {
9534         xmlDefAttrsPtr defaults;
9535 
9536 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9537 	if (defaults != NULL) {
9538 	    for (i = 0;i < defaults->nbAttrs;i++) {
9539 	        attname = defaults->values[5 * i];
9540 		aprefix = defaults->values[5 * i + 1];
9541 
9542                 /*
9543 		 * special work for namespaces defaulted defs
9544 		 */
9545 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9546 		    /*
9547 		     * check that it's not a defined namespace
9548 		     */
9549 		    for (j = 1;j <= nbNs;j++)
9550 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9551 			    break;
9552 	            if (j <= nbNs) continue;
9553 
9554 		    nsname = xmlGetNamespace(ctxt, NULL);
9555 		    if (nsname != defaults->values[5 * i + 2]) {
9556 			if (nsPush(ctxt, NULL,
9557 			           defaults->values[5 * i + 2]) > 0)
9558 			    nbNs++;
9559 		    }
9560 		} else if (aprefix == ctxt->str_xmlns) {
9561 		    /*
9562 		     * check that it's not a defined namespace
9563 		     */
9564 		    for (j = 1;j <= nbNs;j++)
9565 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9566 			    break;
9567 	            if (j <= nbNs) continue;
9568 
9569 		    nsname = xmlGetNamespace(ctxt, attname);
9570 		    if (nsname != defaults->values[2]) {
9571 			if (nsPush(ctxt, attname,
9572 			           defaults->values[5 * i + 2]) > 0)
9573 			    nbNs++;
9574 		    }
9575 		} else {
9576 		    /*
9577 		     * check that it's not a defined attribute
9578 		     */
9579 		    for (j = 0;j < nbatts;j+=5) {
9580 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9581 			    break;
9582 		    }
9583 		    if (j < nbatts) continue;
9584 
9585 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9586 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9587                             localname = NULL;
9588                             goto done;
9589 			}
9590 			maxatts = ctxt->maxatts;
9591 			atts = ctxt->atts;
9592 		    }
9593 		    atts[nbatts++] = attname;
9594 		    atts[nbatts++] = aprefix;
9595 		    if (aprefix == NULL)
9596 			atts[nbatts++] = NULL;
9597 		    else
9598 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9599 		    atts[nbatts++] = defaults->values[5 * i + 2];
9600 		    atts[nbatts++] = defaults->values[5 * i + 3];
9601 		    if ((ctxt->standalone == 1) &&
9602 		        (defaults->values[5 * i + 4] != NULL)) {
9603 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9604 	  "standalone: attribute %s on %s defaulted from external subset\n",
9605 	                                 attname, localname);
9606 		    }
9607 		    nbdef++;
9608 		}
9609 	    }
9610 	}
9611     }
9612 
9613     /*
9614      * The attributes checkings
9615      */
9616     for (i = 0; i < nbatts;i += 5) {
9617         /*
9618 	* The default namespace does not apply to attribute names.
9619 	*/
9620 	if (atts[i + 1] != NULL) {
9621 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9622 	    if (nsname == NULL) {
9623 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9624 		    "Namespace prefix %s for %s on %s is not defined\n",
9625 		    atts[i + 1], atts[i], localname);
9626 	    }
9627 	    atts[i + 2] = nsname;
9628 	} else
9629 	    nsname = NULL;
9630 	/*
9631 	 * [ WFC: Unique Att Spec ]
9632 	 * No attribute name may appear more than once in the same
9633 	 * start-tag or empty-element tag.
9634 	 * As extended by the Namespace in XML REC.
9635 	 */
9636         for (j = 0; j < i;j += 5) {
9637 	    if (atts[i] == atts[j]) {
9638 	        if (atts[i+1] == atts[j+1]) {
9639 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9640 		    break;
9641 		}
9642 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9643 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9644 			     "Namespaced Attribute %s in '%s' redefined\n",
9645 			     atts[i], nsname, NULL);
9646 		    break;
9647 		}
9648 	    }
9649 	}
9650     }
9651 
9652     nsname = xmlGetNamespace(ctxt, prefix);
9653     if ((prefix != NULL) && (nsname == NULL)) {
9654 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9655 	         "Namespace prefix %s on %s is not defined\n",
9656 		 prefix, localname, NULL);
9657     }
9658     *pref = prefix;
9659     *URI = nsname;
9660 
9661     /*
9662      * SAX: Start of Element !
9663      */
9664     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9665 	(!ctxt->disableSAX)) {
9666 	if (nbNs > 0)
9667 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9668 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9669 			  nbatts / 5, nbdef, atts);
9670 	else
9671 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9672 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9673     }
9674 
9675 done:
9676     /*
9677      * Free up attribute allocated strings if needed
9678      */
9679     if (attval != 0) {
9680 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9681 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9682 	        xmlFree((xmlChar *) atts[i]);
9683     }
9684 
9685     return(localname);
9686 }
9687 
9688 /**
9689  * xmlParseEndTag2:
9690  * @ctxt:  an XML parser context
9691  * @line:  line of the start tag
9692  * @nsNr:  number of namespaces on the start tag
9693  *
9694  * parse an end of tag
9695  *
9696  * [42] ETag ::= '</' Name S? '>'
9697  *
9698  * With namespace
9699  *
9700  * [NS 9] ETag ::= '</' QName S? '>'
9701  */
9702 
9703 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9704 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9705     const xmlChar *name;
9706 
9707     GROW;
9708     if ((RAW != '<') || (NXT(1) != '/')) {
9709 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9710 	return;
9711     }
9712     SKIP(2);
9713 
9714     if (tag->prefix == NULL)
9715         name = xmlParseNameAndCompare(ctxt, ctxt->name);
9716     else
9717         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9718 
9719     /*
9720      * We should definitely be at the ending "S? '>'" part
9721      */
9722     GROW;
9723     if (ctxt->instate == XML_PARSER_EOF)
9724         return;
9725     SKIP_BLANKS;
9726     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9727 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9728     } else
9729 	NEXT1;
9730 
9731     /*
9732      * [ WFC: Element Type Match ]
9733      * The Name in an element's end-tag must match the element type in the
9734      * start-tag.
9735      *
9736      */
9737     if (name != (xmlChar*)1) {
9738         if (name == NULL) name = BAD_CAST "unparsable";
9739         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9740 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9741 		                ctxt->name, tag->line, name);
9742     }
9743 
9744     /*
9745      * SAX: End of Tag
9746      */
9747     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9748 	(!ctxt->disableSAX))
9749 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9750                                 tag->URI);
9751 
9752     spacePop(ctxt);
9753     if (tag->nsNr != 0)
9754 	nsPop(ctxt, tag->nsNr);
9755 }
9756 
9757 /**
9758  * xmlParseCDSect:
9759  * @ctxt:  an XML parser context
9760  *
9761  * Parse escaped pure raw content.
9762  *
9763  * [18] CDSect ::= CDStart CData CDEnd
9764  *
9765  * [19] CDStart ::= '<![CDATA['
9766  *
9767  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9768  *
9769  * [21] CDEnd ::= ']]>'
9770  */
9771 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9772 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9773     xmlChar *buf = NULL;
9774     int len = 0;
9775     int size = XML_PARSER_BUFFER_SIZE;
9776     int r, rl;
9777     int	s, sl;
9778     int cur, l;
9779     int count = 0;
9780     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9781                     XML_MAX_HUGE_LENGTH :
9782                     XML_MAX_TEXT_LENGTH;
9783 
9784     /* Check 2.6.0 was NXT(0) not RAW */
9785     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9786 	SKIP(9);
9787     } else
9788         return;
9789 
9790     ctxt->instate = XML_PARSER_CDATA_SECTION;
9791     r = CUR_CHAR(rl);
9792     if (!IS_CHAR(r)) {
9793 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9794 	ctxt->instate = XML_PARSER_CONTENT;
9795         return;
9796     }
9797     NEXTL(rl);
9798     s = CUR_CHAR(sl);
9799     if (!IS_CHAR(s)) {
9800 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9801 	ctxt->instate = XML_PARSER_CONTENT;
9802         return;
9803     }
9804     NEXTL(sl);
9805     cur = CUR_CHAR(l);
9806     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9807     if (buf == NULL) {
9808 	xmlErrMemory(ctxt, NULL);
9809 	return;
9810     }
9811     while (IS_CHAR(cur) &&
9812            ((r != ']') || (s != ']') || (cur != '>'))) {
9813 	if (len + 5 >= size) {
9814 	    xmlChar *tmp;
9815 
9816 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9817 	    if (tmp == NULL) {
9818 	        xmlFree(buf);
9819 		xmlErrMemory(ctxt, NULL);
9820 		return;
9821 	    }
9822 	    buf = tmp;
9823 	    size *= 2;
9824 	}
9825 	COPY_BUF(rl,buf,len,r);
9826 	r = s;
9827 	rl = sl;
9828 	s = cur;
9829 	sl = l;
9830 	count++;
9831 	if (count > 50) {
9832 	    SHRINK;
9833 	    GROW;
9834             if (ctxt->instate == XML_PARSER_EOF) {
9835 		xmlFree(buf);
9836 		return;
9837             }
9838 	    count = 0;
9839 	}
9840 	NEXTL(l);
9841 	cur = CUR_CHAR(l);
9842         if (len > maxLength) {
9843             xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9844                            "CData section too big found\n");
9845             xmlFree(buf);
9846             return;
9847         }
9848     }
9849     buf[len] = 0;
9850     ctxt->instate = XML_PARSER_CONTENT;
9851     if (cur != '>') {
9852 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9853 	                     "CData section not finished\n%.50s\n", buf);
9854 	xmlFree(buf);
9855         return;
9856     }
9857     NEXTL(l);
9858 
9859     /*
9860      * OK the buffer is to be consumed as cdata.
9861      */
9862     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9863 	if (ctxt->sax->cdataBlock != NULL)
9864 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9865 	else if (ctxt->sax->characters != NULL)
9866 	    ctxt->sax->characters(ctxt->userData, buf, len);
9867     }
9868     xmlFree(buf);
9869 }
9870 
9871 /**
9872  * xmlParseContentInternal:
9873  * @ctxt:  an XML parser context
9874  *
9875  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9876  * unexpected EOF to the caller.
9877  */
9878 
9879 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9880 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9881     int nameNr = ctxt->nameNr;
9882 
9883     GROW;
9884     while ((RAW != 0) &&
9885 	   (ctxt->instate != XML_PARSER_EOF)) {
9886 	const xmlChar *test = CUR_PTR;
9887 	unsigned int cons = ctxt->input->consumed;
9888 	const xmlChar *cur = ctxt->input->cur;
9889 
9890 	/*
9891 	 * First case : a Processing Instruction.
9892 	 */
9893 	if ((*cur == '<') && (cur[1] == '?')) {
9894 	    xmlParsePI(ctxt);
9895 	}
9896 
9897 	/*
9898 	 * Second case : a CDSection
9899 	 */
9900 	/* 2.6.0 test was *cur not RAW */
9901 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9902 	    xmlParseCDSect(ctxt);
9903 	}
9904 
9905 	/*
9906 	 * Third case :  a comment
9907 	 */
9908 	else if ((*cur == '<') && (NXT(1) == '!') &&
9909 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9910 	    xmlParseComment(ctxt);
9911 	    ctxt->instate = XML_PARSER_CONTENT;
9912 	}
9913 
9914 	/*
9915 	 * Fourth case :  a sub-element.
9916 	 */
9917 	else if (*cur == '<') {
9918             if (NXT(1) == '/') {
9919                 if (ctxt->nameNr <= nameNr)
9920                     break;
9921 	        xmlParseElementEnd(ctxt);
9922             } else {
9923 	        xmlParseElementStart(ctxt);
9924             }
9925 	}
9926 
9927 	/*
9928 	 * Fifth case : a reference. If if has not been resolved,
9929 	 *    parsing returns it's Name, create the node
9930 	 */
9931 
9932 	else if (*cur == '&') {
9933 	    xmlParseReference(ctxt);
9934 	}
9935 
9936 	/*
9937 	 * Last case, text. Note that References are handled directly.
9938 	 */
9939 	else {
9940 	    xmlParseCharData(ctxt, 0);
9941 	}
9942 
9943 	GROW;
9944 	SHRINK;
9945 
9946 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9947 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9948 	                "detected an error in element content\n");
9949 	    xmlHaltParser(ctxt);
9950             break;
9951 	}
9952     }
9953 }
9954 
9955 /**
9956  * xmlParseContent:
9957  * @ctxt:  an XML parser context
9958  *
9959  * Parse a content sequence. Stops at EOF or '</'.
9960  *
9961  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9962  */
9963 
9964 void
xmlParseContent(xmlParserCtxtPtr ctxt)9965 xmlParseContent(xmlParserCtxtPtr ctxt) {
9966     int nameNr = ctxt->nameNr;
9967 
9968     xmlParseContentInternal(ctxt);
9969 
9970     if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9971         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9972         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9973         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9974                 "Premature end of data in tag %s line %d\n",
9975 		name, line, NULL);
9976     }
9977 }
9978 
9979 /**
9980  * xmlParseElement:
9981  * @ctxt:  an XML parser context
9982  *
9983  * parse an XML element
9984  *
9985  * [39] element ::= EmptyElemTag | STag content ETag
9986  *
9987  * [ WFC: Element Type Match ]
9988  * The Name in an element's end-tag must match the element type in the
9989  * start-tag.
9990  *
9991  */
9992 
9993 void
xmlParseElement(xmlParserCtxtPtr ctxt)9994 xmlParseElement(xmlParserCtxtPtr ctxt) {
9995     if (xmlParseElementStart(ctxt) != 0)
9996         return;
9997 
9998     xmlParseContentInternal(ctxt);
9999     if (ctxt->instate == XML_PARSER_EOF)
10000 	return;
10001 
10002     if (CUR == 0) {
10003         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10004         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10005         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10006                 "Premature end of data in tag %s line %d\n",
10007 		name, line, NULL);
10008         return;
10009     }
10010 
10011     xmlParseElementEnd(ctxt);
10012 }
10013 
10014 /**
10015  * xmlParseElementStart:
10016  * @ctxt:  an XML parser context
10017  *
10018  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10019  * opening tag was parsed, 1 if an empty element was parsed.
10020  */
10021 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10022 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10023     const xmlChar *name;
10024     const xmlChar *prefix = NULL;
10025     const xmlChar *URI = NULL;
10026     xmlParserNodeInfo node_info;
10027     int line, tlen = 0;
10028     xmlNodePtr ret;
10029     int nsNr = ctxt->nsNr;
10030 
10031     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10032         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10033 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10034 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10035 			  xmlParserMaxDepth);
10036 	xmlHaltParser(ctxt);
10037 	return(-1);
10038     }
10039 
10040     /* Capture start position */
10041     if (ctxt->record_info) {
10042         node_info.begin_pos = ctxt->input->consumed +
10043                           (CUR_PTR - ctxt->input->base);
10044 	node_info.begin_line = ctxt->input->line;
10045     }
10046 
10047     if (ctxt->spaceNr == 0)
10048 	spacePush(ctxt, -1);
10049     else if (*ctxt->space == -2)
10050 	spacePush(ctxt, -1);
10051     else
10052 	spacePush(ctxt, *ctxt->space);
10053 
10054     line = ctxt->input->line;
10055 #ifdef LIBXML_SAX1_ENABLED
10056     if (ctxt->sax2)
10057 #endif /* LIBXML_SAX1_ENABLED */
10058         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10059 #ifdef LIBXML_SAX1_ENABLED
10060     else
10061 	name = xmlParseStartTag(ctxt);
10062 #endif /* LIBXML_SAX1_ENABLED */
10063     if (ctxt->instate == XML_PARSER_EOF)
10064 	return(-1);
10065     if (name == NULL) {
10066 	spacePop(ctxt);
10067         return(-1);
10068     }
10069     nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10070     ret = ctxt->node;
10071 
10072 #ifdef LIBXML_VALID_ENABLED
10073     /*
10074      * [ VC: Root Element Type ]
10075      * The Name in the document type declaration must match the element
10076      * type of the root element.
10077      */
10078     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10079         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10080         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10081 #endif /* LIBXML_VALID_ENABLED */
10082 
10083     /*
10084      * Check for an Empty Element.
10085      */
10086     if ((RAW == '/') && (NXT(1) == '>')) {
10087         SKIP(2);
10088 	if (ctxt->sax2) {
10089 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10090 		(!ctxt->disableSAX))
10091 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10092 #ifdef LIBXML_SAX1_ENABLED
10093 	} else {
10094 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10095 		(!ctxt->disableSAX))
10096 		ctxt->sax->endElement(ctxt->userData, name);
10097 #endif /* LIBXML_SAX1_ENABLED */
10098 	}
10099 	namePop(ctxt);
10100 	spacePop(ctxt);
10101 	if (nsNr != ctxt->nsNr)
10102 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10103 	if ( ret != NULL && ctxt->record_info ) {
10104 	   node_info.end_pos = ctxt->input->consumed +
10105 			      (CUR_PTR - ctxt->input->base);
10106 	   node_info.end_line = ctxt->input->line;
10107 	   node_info.node = ret;
10108 	   xmlParserAddNodeInfo(ctxt, &node_info);
10109 	}
10110 	return(1);
10111     }
10112     if (RAW == '>') {
10113         NEXT1;
10114     } else {
10115         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10116 		     "Couldn't find end of Start Tag %s line %d\n",
10117 		                name, line, NULL);
10118 
10119 	/*
10120 	 * end of parsing of this node.
10121 	 */
10122 	nodePop(ctxt);
10123 	namePop(ctxt);
10124 	spacePop(ctxt);
10125 	if (nsNr != ctxt->nsNr)
10126 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10127 
10128 	/*
10129 	 * Capture end position and add node
10130 	 */
10131 	if ( ret != NULL && ctxt->record_info ) {
10132 	   node_info.end_pos = ctxt->input->consumed +
10133 			      (CUR_PTR - ctxt->input->base);
10134 	   node_info.end_line = ctxt->input->line;
10135 	   node_info.node = ret;
10136 	   xmlParserAddNodeInfo(ctxt, &node_info);
10137 	}
10138 	return(-1);
10139     }
10140 
10141     return(0);
10142 }
10143 
10144 /**
10145  * xmlParseElementEnd:
10146  * @ctxt:  an XML parser context
10147  *
10148  * Parse the end of an XML element.
10149  */
10150 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10151 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10152     xmlParserNodeInfo node_info;
10153     xmlNodePtr ret = ctxt->node;
10154 
10155     if (ctxt->nameNr <= 0)
10156         return;
10157 
10158     /*
10159      * parse the end of tag: '</' should be here.
10160      */
10161     if (ctxt->sax2) {
10162 	xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10163 	namePop(ctxt);
10164     }
10165 #ifdef LIBXML_SAX1_ENABLED
10166     else
10167 	xmlParseEndTag1(ctxt, 0);
10168 #endif /* LIBXML_SAX1_ENABLED */
10169 
10170     /*
10171      * Capture end position and add node
10172      */
10173     if ( ret != NULL && ctxt->record_info ) {
10174        node_info.end_pos = ctxt->input->consumed +
10175                           (CUR_PTR - ctxt->input->base);
10176        node_info.end_line = ctxt->input->line;
10177        node_info.node = ret;
10178        xmlParserAddNodeInfo(ctxt, &node_info);
10179     }
10180 }
10181 
10182 /**
10183  * xmlParseVersionNum:
10184  * @ctxt:  an XML parser context
10185  *
10186  * parse the XML version value.
10187  *
10188  * [26] VersionNum ::= '1.' [0-9]+
10189  *
10190  * In practice allow [0-9].[0-9]+ at that level
10191  *
10192  * Returns the string giving the XML version number, or NULL
10193  */
10194 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10195 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10196     xmlChar *buf = NULL;
10197     int len = 0;
10198     int size = 10;
10199     xmlChar cur;
10200 
10201     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10202     if (buf == NULL) {
10203 	xmlErrMemory(ctxt, NULL);
10204 	return(NULL);
10205     }
10206     cur = CUR;
10207     if (!((cur >= '0') && (cur <= '9'))) {
10208 	xmlFree(buf);
10209 	return(NULL);
10210     }
10211     buf[len++] = cur;
10212     NEXT;
10213     cur=CUR;
10214     if (cur != '.') {
10215 	xmlFree(buf);
10216 	return(NULL);
10217     }
10218     buf[len++] = cur;
10219     NEXT;
10220     cur=CUR;
10221     while ((cur >= '0') && (cur <= '9')) {
10222 	if (len + 1 >= size) {
10223 	    xmlChar *tmp;
10224 
10225 	    size *= 2;
10226 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10227 	    if (tmp == NULL) {
10228 	        xmlFree(buf);
10229 		xmlErrMemory(ctxt, NULL);
10230 		return(NULL);
10231 	    }
10232 	    buf = tmp;
10233 	}
10234 	buf[len++] = cur;
10235 	NEXT;
10236 	cur=CUR;
10237     }
10238     buf[len] = 0;
10239     return(buf);
10240 }
10241 
10242 /**
10243  * xmlParseVersionInfo:
10244  * @ctxt:  an XML parser context
10245  *
10246  * parse the XML version.
10247  *
10248  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10249  *
10250  * [25] Eq ::= S? '=' S?
10251  *
10252  * Returns the version string, e.g. "1.0"
10253  */
10254 
10255 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10256 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10257     xmlChar *version = NULL;
10258 
10259     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10260 	SKIP(7);
10261 	SKIP_BLANKS;
10262 	if (RAW != '=') {
10263 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10264 	    return(NULL);
10265         }
10266 	NEXT;
10267 	SKIP_BLANKS;
10268 	if (RAW == '"') {
10269 	    NEXT;
10270 	    version = xmlParseVersionNum(ctxt);
10271 	    if (RAW != '"') {
10272 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10273 	    } else
10274 	        NEXT;
10275 	} else if (RAW == '\''){
10276 	    NEXT;
10277 	    version = xmlParseVersionNum(ctxt);
10278 	    if (RAW != '\'') {
10279 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10280 	    } else
10281 	        NEXT;
10282 	} else {
10283 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10284 	}
10285     }
10286     return(version);
10287 }
10288 
10289 /**
10290  * xmlParseEncName:
10291  * @ctxt:  an XML parser context
10292  *
10293  * parse the XML encoding name
10294  *
10295  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10296  *
10297  * Returns the encoding name value or NULL
10298  */
10299 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10300 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10301     xmlChar *buf = NULL;
10302     int len = 0;
10303     int size = 10;
10304     xmlChar cur;
10305 
10306     cur = CUR;
10307     if (((cur >= 'a') && (cur <= 'z')) ||
10308         ((cur >= 'A') && (cur <= 'Z'))) {
10309 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10310 	if (buf == NULL) {
10311 	    xmlErrMemory(ctxt, NULL);
10312 	    return(NULL);
10313 	}
10314 
10315 	buf[len++] = cur;
10316 	NEXT;
10317 	cur = CUR;
10318 	while (((cur >= 'a') && (cur <= 'z')) ||
10319 	       ((cur >= 'A') && (cur <= 'Z')) ||
10320 	       ((cur >= '0') && (cur <= '9')) ||
10321 	       (cur == '.') || (cur == '_') ||
10322 	       (cur == '-')) {
10323 	    if (len + 1 >= size) {
10324 	        xmlChar *tmp;
10325 
10326 		size *= 2;
10327 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10328 		if (tmp == NULL) {
10329 		    xmlErrMemory(ctxt, NULL);
10330 		    xmlFree(buf);
10331 		    return(NULL);
10332 		}
10333 		buf = tmp;
10334 	    }
10335 	    buf[len++] = cur;
10336 	    NEXT;
10337 	    cur = CUR;
10338 	    if (cur == 0) {
10339 	        SHRINK;
10340 		GROW;
10341 		cur = CUR;
10342 	    }
10343         }
10344 	buf[len] = 0;
10345     } else {
10346 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10347     }
10348     return(buf);
10349 }
10350 
10351 /**
10352  * xmlParseEncodingDecl:
10353  * @ctxt:  an XML parser context
10354  *
10355  * parse the XML encoding declaration
10356  *
10357  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10358  *
10359  * this setups the conversion filters.
10360  *
10361  * Returns the encoding value or NULL
10362  */
10363 
10364 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10365 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10366     xmlChar *encoding = NULL;
10367 
10368     SKIP_BLANKS;
10369     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10370 	SKIP(8);
10371 	SKIP_BLANKS;
10372 	if (RAW != '=') {
10373 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10374 	    return(NULL);
10375         }
10376 	NEXT;
10377 	SKIP_BLANKS;
10378 	if (RAW == '"') {
10379 	    NEXT;
10380 	    encoding = xmlParseEncName(ctxt);
10381 	    if (RAW != '"') {
10382 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10383 		xmlFree((xmlChar *) encoding);
10384 		return(NULL);
10385 	    } else
10386 	        NEXT;
10387 	} else if (RAW == '\''){
10388 	    NEXT;
10389 	    encoding = xmlParseEncName(ctxt);
10390 	    if (RAW != '\'') {
10391 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10392 		xmlFree((xmlChar *) encoding);
10393 		return(NULL);
10394 	    } else
10395 	        NEXT;
10396 	} else {
10397 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10398 	}
10399 
10400         /*
10401          * Non standard parsing, allowing the user to ignore encoding
10402          */
10403         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10404 	    xmlFree((xmlChar *) encoding);
10405             return(NULL);
10406 	}
10407 
10408 	/*
10409 	 * UTF-16 encoding switch has already taken place at this stage,
10410 	 * more over the little-endian/big-endian selection is already done
10411 	 */
10412         if ((encoding != NULL) &&
10413 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10414 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10415 	    /*
10416 	     * If no encoding was passed to the parser, that we are
10417 	     * using UTF-16 and no decoder is present i.e. the
10418 	     * document is apparently UTF-8 compatible, then raise an
10419 	     * encoding mismatch fatal error
10420 	     */
10421 	    if ((ctxt->encoding == NULL) &&
10422 	        (ctxt->input->buf != NULL) &&
10423 	        (ctxt->input->buf->encoder == NULL)) {
10424 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10425 		  "Document labelled UTF-16 but has UTF-8 content\n");
10426 	    }
10427 	    if (ctxt->encoding != NULL)
10428 		xmlFree((xmlChar *) ctxt->encoding);
10429 	    ctxt->encoding = encoding;
10430 	}
10431 	/*
10432 	 * UTF-8 encoding is handled natively
10433 	 */
10434         else if ((encoding != NULL) &&
10435 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10436 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10437 	    if (ctxt->encoding != NULL)
10438 		xmlFree((xmlChar *) ctxt->encoding);
10439 	    ctxt->encoding = encoding;
10440 	}
10441 	else if (encoding != NULL) {
10442 	    xmlCharEncodingHandlerPtr handler;
10443 
10444 	    if (ctxt->input->encoding != NULL)
10445 		xmlFree((xmlChar *) ctxt->input->encoding);
10446 	    ctxt->input->encoding = encoding;
10447 
10448             handler = xmlFindCharEncodingHandler((const char *) encoding);
10449 	    if (handler != NULL) {
10450 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10451 		    /* failed to convert */
10452 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10453 		    return(NULL);
10454 		}
10455 	    } else {
10456 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10457 			"Unsupported encoding %s\n", encoding);
10458 		return(NULL);
10459 	    }
10460 	}
10461     }
10462     return(encoding);
10463 }
10464 
10465 /**
10466  * xmlParseSDDecl:
10467  * @ctxt:  an XML parser context
10468  *
10469  * parse the XML standalone declaration
10470  *
10471  * [32] SDDecl ::= S 'standalone' Eq
10472  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10473  *
10474  * [ VC: Standalone Document Declaration ]
10475  * TODO The standalone document declaration must have the value "no"
10476  * if any external markup declarations contain declarations of:
10477  *  - attributes with default values, if elements to which these
10478  *    attributes apply appear in the document without specifications
10479  *    of values for these attributes, or
10480  *  - entities (other than amp, lt, gt, apos, quot), if references
10481  *    to those entities appear in the document, or
10482  *  - attributes with values subject to normalization, where the
10483  *    attribute appears in the document with a value which will change
10484  *    as a result of normalization, or
10485  *  - element types with element content, if white space occurs directly
10486  *    within any instance of those types.
10487  *
10488  * Returns:
10489  *   1 if standalone="yes"
10490  *   0 if standalone="no"
10491  *  -2 if standalone attribute is missing or invalid
10492  *	  (A standalone value of -2 means that the XML declaration was found,
10493  *	   but no value was specified for the standalone attribute).
10494  */
10495 
10496 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10497 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10498     int standalone = -2;
10499 
10500     SKIP_BLANKS;
10501     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10502 	SKIP(10);
10503         SKIP_BLANKS;
10504 	if (RAW != '=') {
10505 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10506 	    return(standalone);
10507         }
10508 	NEXT;
10509 	SKIP_BLANKS;
10510         if (RAW == '\''){
10511 	    NEXT;
10512 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10513 	        standalone = 0;
10514                 SKIP(2);
10515 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10516 	               (NXT(2) == 's')) {
10517 	        standalone = 1;
10518 		SKIP(3);
10519             } else {
10520 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10521 	    }
10522 	    if (RAW != '\'') {
10523 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10524 	    } else
10525 	        NEXT;
10526 	} else if (RAW == '"'){
10527 	    NEXT;
10528 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10529 	        standalone = 0;
10530 		SKIP(2);
10531 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10532 	               (NXT(2) == 's')) {
10533 	        standalone = 1;
10534                 SKIP(3);
10535             } else {
10536 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10537 	    }
10538 	    if (RAW != '"') {
10539 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10540 	    } else
10541 	        NEXT;
10542 	} else {
10543 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10544         }
10545     }
10546     return(standalone);
10547 }
10548 
10549 /**
10550  * xmlParseXMLDecl:
10551  * @ctxt:  an XML parser context
10552  *
10553  * parse an XML declaration header
10554  *
10555  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10556  */
10557 
10558 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10559 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10560     xmlChar *version;
10561 
10562     /*
10563      * This value for standalone indicates that the document has an
10564      * XML declaration but it does not have a standalone attribute.
10565      * It will be overwritten later if a standalone attribute is found.
10566      */
10567     ctxt->input->standalone = -2;
10568 
10569     /*
10570      * We know that '<?xml' is here.
10571      */
10572     SKIP(5);
10573 
10574     if (!IS_BLANK_CH(RAW)) {
10575 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10576 	               "Blank needed after '<?xml'\n");
10577     }
10578     SKIP_BLANKS;
10579 
10580     /*
10581      * We must have the VersionInfo here.
10582      */
10583     version = xmlParseVersionInfo(ctxt);
10584     if (version == NULL) {
10585 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10586     } else {
10587 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10588 	    /*
10589 	     * Changed here for XML-1.0 5th edition
10590 	     */
10591 	    if (ctxt->options & XML_PARSE_OLD10) {
10592 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10593 			          "Unsupported version '%s'\n",
10594 			          version);
10595 	    } else {
10596 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10597 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10598 		                  "Unsupported version '%s'\n",
10599 				  version, NULL);
10600 		} else {
10601 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10602 				      "Unsupported version '%s'\n",
10603 				      version);
10604 		}
10605 	    }
10606 	}
10607 	if (ctxt->version != NULL)
10608 	    xmlFree((void *) ctxt->version);
10609 	ctxt->version = version;
10610     }
10611 
10612     /*
10613      * We may have the encoding declaration
10614      */
10615     if (!IS_BLANK_CH(RAW)) {
10616         if ((RAW == '?') && (NXT(1) == '>')) {
10617 	    SKIP(2);
10618 	    return;
10619 	}
10620 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10621     }
10622     xmlParseEncodingDecl(ctxt);
10623     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10624          (ctxt->instate == XML_PARSER_EOF)) {
10625 	/*
10626 	 * The XML REC instructs us to stop parsing right here
10627 	 */
10628         return;
10629     }
10630 
10631     /*
10632      * We may have the standalone status.
10633      */
10634     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10635         if ((RAW == '?') && (NXT(1) == '>')) {
10636 	    SKIP(2);
10637 	    return;
10638 	}
10639 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10640     }
10641 
10642     /*
10643      * We can grow the input buffer freely at that point
10644      */
10645     GROW;
10646 
10647     SKIP_BLANKS;
10648     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10649 
10650     SKIP_BLANKS;
10651     if ((RAW == '?') && (NXT(1) == '>')) {
10652         SKIP(2);
10653     } else if (RAW == '>') {
10654         /* Deprecated old WD ... */
10655 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10656 	NEXT;
10657     } else {
10658 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10659 	MOVETO_ENDTAG(CUR_PTR);
10660 	NEXT;
10661     }
10662 }
10663 
10664 /**
10665  * xmlParseMisc:
10666  * @ctxt:  an XML parser context
10667  *
10668  * parse an XML Misc* optional field.
10669  *
10670  * [27] Misc ::= Comment | PI |  S
10671  */
10672 
10673 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10674 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10675     while (ctxt->instate != XML_PARSER_EOF) {
10676         SKIP_BLANKS;
10677         GROW;
10678         if ((RAW == '<') && (NXT(1) == '?')) {
10679 	    xmlParsePI(ctxt);
10680         } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10681 	    xmlParseComment(ctxt);
10682         } else {
10683             break;
10684         }
10685     }
10686 }
10687 
10688 /**
10689  * xmlParseDocument:
10690  * @ctxt:  an XML parser context
10691  *
10692  * parse an XML document (and build a tree if using the standard SAX
10693  * interface).
10694  *
10695  * [1] document ::= prolog element Misc*
10696  *
10697  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10698  *
10699  * Returns 0, -1 in case of error. the parser context is augmented
10700  *                as a result of the parsing.
10701  */
10702 
10703 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10704 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10705     xmlChar start[4];
10706     xmlCharEncoding enc;
10707 
10708     xmlInitParser();
10709 
10710     if ((ctxt == NULL) || (ctxt->input == NULL))
10711         return(-1);
10712 
10713     GROW;
10714 
10715     /*
10716      * SAX: detecting the level.
10717      */
10718     xmlDetectSAX2(ctxt);
10719 
10720     /*
10721      * SAX: beginning of the document processing.
10722      */
10723     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10724         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10725     if (ctxt->instate == XML_PARSER_EOF)
10726 	return(-1);
10727 
10728     if ((ctxt->encoding == NULL) &&
10729         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10730 	/*
10731 	 * Get the 4 first bytes and decode the charset
10732 	 * if enc != XML_CHAR_ENCODING_NONE
10733 	 * plug some encoding conversion routines.
10734 	 */
10735 	start[0] = RAW;
10736 	start[1] = NXT(1);
10737 	start[2] = NXT(2);
10738 	start[3] = NXT(3);
10739 	enc = xmlDetectCharEncoding(&start[0], 4);
10740 	if (enc != XML_CHAR_ENCODING_NONE) {
10741 	    xmlSwitchEncoding(ctxt, enc);
10742 	}
10743     }
10744 
10745 
10746     if (CUR == 0) {
10747 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10748 	return(-1);
10749     }
10750 
10751     /*
10752      * Check for the XMLDecl in the Prolog.
10753      * do not GROW here to avoid the detected encoder to decode more
10754      * than just the first line, unless the amount of data is really
10755      * too small to hold "<?xml version="1.0" encoding="foo"
10756      */
10757     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10758        GROW;
10759     }
10760     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10761 
10762 	/*
10763 	 * Note that we will switch encoding on the fly.
10764 	 */
10765 	xmlParseXMLDecl(ctxt);
10766 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10767 	    (ctxt->instate == XML_PARSER_EOF)) {
10768 	    /*
10769 	     * The XML REC instructs us to stop parsing right here
10770 	     */
10771 	    return(-1);
10772 	}
10773 	ctxt->standalone = ctxt->input->standalone;
10774 	SKIP_BLANKS;
10775     } else {
10776 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10777     }
10778     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10779         ctxt->sax->startDocument(ctxt->userData);
10780     if (ctxt->instate == XML_PARSER_EOF)
10781 	return(-1);
10782     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10783         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10784 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10785     }
10786 
10787     /*
10788      * The Misc part of the Prolog
10789      */
10790     xmlParseMisc(ctxt);
10791 
10792     /*
10793      * Then possibly doc type declaration(s) and more Misc
10794      * (doctypedecl Misc*)?
10795      */
10796     GROW;
10797     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10798 
10799 	ctxt->inSubset = 1;
10800 	xmlParseDocTypeDecl(ctxt);
10801 	if (RAW == '[') {
10802 	    ctxt->instate = XML_PARSER_DTD;
10803 	    xmlParseInternalSubset(ctxt);
10804 	    if (ctxt->instate == XML_PARSER_EOF)
10805 		return(-1);
10806 	}
10807 
10808 	/*
10809 	 * Create and update the external subset.
10810 	 */
10811 	ctxt->inSubset = 2;
10812 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10813 	    (!ctxt->disableSAX))
10814 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10815 	                              ctxt->extSubSystem, ctxt->extSubURI);
10816 	if (ctxt->instate == XML_PARSER_EOF)
10817 	    return(-1);
10818 	ctxt->inSubset = 0;
10819 
10820         xmlCleanSpecialAttr(ctxt);
10821 
10822 	ctxt->instate = XML_PARSER_PROLOG;
10823 	xmlParseMisc(ctxt);
10824     }
10825 
10826     /*
10827      * Time to start parsing the tree itself
10828      */
10829     GROW;
10830     if (RAW != '<') {
10831 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10832 		       "Start tag expected, '<' not found\n");
10833     } else {
10834 	ctxt->instate = XML_PARSER_CONTENT;
10835 	xmlParseElement(ctxt);
10836 	ctxt->instate = XML_PARSER_EPILOG;
10837 
10838 
10839 	/*
10840 	 * The Misc part at the end
10841 	 */
10842 	xmlParseMisc(ctxt);
10843 
10844 	if (RAW != 0) {
10845 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10846 	}
10847 	ctxt->instate = XML_PARSER_EOF;
10848     }
10849 
10850     /*
10851      * SAX: end of the document processing.
10852      */
10853     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10854         ctxt->sax->endDocument(ctxt->userData);
10855 
10856     /*
10857      * Remove locally kept entity definitions if the tree was not built
10858      */
10859     if ((ctxt->myDoc != NULL) &&
10860 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10861 	xmlFreeDoc(ctxt->myDoc);
10862 	ctxt->myDoc = NULL;
10863     }
10864 
10865     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10866         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10867 	if (ctxt->valid)
10868 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10869 	if (ctxt->nsWellFormed)
10870 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10871 	if (ctxt->options & XML_PARSE_OLD10)
10872 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10873     }
10874     if (! ctxt->wellFormed) {
10875 	ctxt->valid = 0;
10876 	return(-1);
10877     }
10878     return(0);
10879 }
10880 
10881 /**
10882  * xmlParseExtParsedEnt:
10883  * @ctxt:  an XML parser context
10884  *
10885  * parse a general parsed entity
10886  * An external general parsed entity is well-formed if it matches the
10887  * production labeled extParsedEnt.
10888  *
10889  * [78] extParsedEnt ::= TextDecl? content
10890  *
10891  * Returns 0, -1 in case of error. the parser context is augmented
10892  *                as a result of the parsing.
10893  */
10894 
10895 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10896 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10897     xmlChar start[4];
10898     xmlCharEncoding enc;
10899 
10900     if ((ctxt == NULL) || (ctxt->input == NULL))
10901         return(-1);
10902 
10903     xmlDefaultSAXHandlerInit();
10904 
10905     xmlDetectSAX2(ctxt);
10906 
10907     GROW;
10908 
10909     /*
10910      * SAX: beginning of the document processing.
10911      */
10912     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10913         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10914 
10915     /*
10916      * Get the 4 first bytes and decode the charset
10917      * if enc != XML_CHAR_ENCODING_NONE
10918      * plug some encoding conversion routines.
10919      */
10920     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10921 	start[0] = RAW;
10922 	start[1] = NXT(1);
10923 	start[2] = NXT(2);
10924 	start[3] = NXT(3);
10925 	enc = xmlDetectCharEncoding(start, 4);
10926 	if (enc != XML_CHAR_ENCODING_NONE) {
10927 	    xmlSwitchEncoding(ctxt, enc);
10928 	}
10929     }
10930 
10931 
10932     if (CUR == 0) {
10933 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10934     }
10935 
10936     /*
10937      * Check for the XMLDecl in the Prolog.
10938      */
10939     GROW;
10940     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10941 
10942 	/*
10943 	 * Note that we will switch encoding on the fly.
10944 	 */
10945 	xmlParseXMLDecl(ctxt);
10946 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10947 	    /*
10948 	     * The XML REC instructs us to stop parsing right here
10949 	     */
10950 	    return(-1);
10951 	}
10952 	SKIP_BLANKS;
10953     } else {
10954 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10955     }
10956     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10957         ctxt->sax->startDocument(ctxt->userData);
10958     if (ctxt->instate == XML_PARSER_EOF)
10959 	return(-1);
10960 
10961     /*
10962      * Doing validity checking on chunk doesn't make sense
10963      */
10964     ctxt->instate = XML_PARSER_CONTENT;
10965     ctxt->validate = 0;
10966     ctxt->loadsubset = 0;
10967     ctxt->depth = 0;
10968 
10969     xmlParseContent(ctxt);
10970     if (ctxt->instate == XML_PARSER_EOF)
10971 	return(-1);
10972 
10973     if ((RAW == '<') && (NXT(1) == '/')) {
10974 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10975     } else if (RAW != 0) {
10976 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10977     }
10978 
10979     /*
10980      * SAX: end of the document processing.
10981      */
10982     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10983         ctxt->sax->endDocument(ctxt->userData);
10984 
10985     if (! ctxt->wellFormed) return(-1);
10986     return(0);
10987 }
10988 
10989 #ifdef LIBXML_PUSH_ENABLED
10990 /************************************************************************
10991  *									*
10992  *		Progressive parsing interfaces				*
10993  *									*
10994  ************************************************************************/
10995 
10996 /**
10997  * xmlParseLookupSequence:
10998  * @ctxt:  an XML parser context
10999  * @first:  the first char to lookup
11000  * @next:  the next char to lookup or zero
11001  * @third:  the next char to lookup or zero
11002  *
11003  * Try to find if a sequence (first, next, third) or  just (first next) or
11004  * (first) is available in the input stream.
11005  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11006  * to avoid rescanning sequences of bytes, it DOES change the state of the
11007  * parser, do not use liberally.
11008  *
11009  * Returns the index to the current parsing point if the full sequence
11010  *      is available, -1 otherwise.
11011  */
11012 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11013 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11014                        xmlChar next, xmlChar third) {
11015     int base, len;
11016     xmlParserInputPtr in;
11017     const xmlChar *buf;
11018 
11019     in = ctxt->input;
11020     if (in == NULL) return(-1);
11021     base = in->cur - in->base;
11022     if (base < 0) return(-1);
11023     if (ctxt->checkIndex > base)
11024         base = ctxt->checkIndex;
11025     if (in->buf == NULL) {
11026 	buf = in->base;
11027 	len = in->length;
11028     } else {
11029 	buf = xmlBufContent(in->buf->buffer);
11030 	len = xmlBufUse(in->buf->buffer);
11031     }
11032     /* take into account the sequence length */
11033     if (third) len -= 2;
11034     else if (next) len --;
11035     for (;base < len;base++) {
11036         if (buf[base] == first) {
11037 	    if (third != 0) {
11038 		if ((buf[base + 1] != next) ||
11039 		    (buf[base + 2] != third)) continue;
11040 	    } else if (next != 0) {
11041 		if (buf[base + 1] != next) continue;
11042 	    }
11043 	    ctxt->checkIndex = 0;
11044 #ifdef DEBUG_PUSH
11045 	    if (next == 0)
11046 		xmlGenericError(xmlGenericErrorContext,
11047 			"PP: lookup '%c' found at %d\n",
11048 			first, base);
11049 	    else if (third == 0)
11050 		xmlGenericError(xmlGenericErrorContext,
11051 			"PP: lookup '%c%c' found at %d\n",
11052 			first, next, base);
11053 	    else
11054 		xmlGenericError(xmlGenericErrorContext,
11055 			"PP: lookup '%c%c%c' found at %d\n",
11056 			first, next, third, base);
11057 #endif
11058 	    return(base - (in->cur - in->base));
11059 	}
11060     }
11061     ctxt->checkIndex = base;
11062 #ifdef DEBUG_PUSH
11063     if (next == 0)
11064 	xmlGenericError(xmlGenericErrorContext,
11065 		"PP: lookup '%c' failed\n", first);
11066     else if (third == 0)
11067 	xmlGenericError(xmlGenericErrorContext,
11068 		"PP: lookup '%c%c' failed\n", first, next);
11069     else
11070 	xmlGenericError(xmlGenericErrorContext,
11071 		"PP: lookup '%c%c%c' failed\n", first, next, third);
11072 #endif
11073     return(-1);
11074 }
11075 
11076 /**
11077  * xmlParseGetLasts:
11078  * @ctxt:  an XML parser context
11079  * @lastlt:  pointer to store the last '<' from the input
11080  * @lastgt:  pointer to store the last '>' from the input
11081  *
11082  * Lookup the last < and > in the current chunk
11083  */
11084 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11085 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11086                  const xmlChar **lastgt) {
11087     const xmlChar *tmp;
11088 
11089     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11090 	xmlGenericError(xmlGenericErrorContext,
11091 		    "Internal error: xmlParseGetLasts\n");
11092 	return;
11093     }
11094     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11095         tmp = ctxt->input->end;
11096 	tmp--;
11097 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11098 	if (tmp < ctxt->input->base) {
11099 	    *lastlt = NULL;
11100 	    *lastgt = NULL;
11101 	} else {
11102 	    *lastlt = tmp;
11103 	    tmp++;
11104 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11105 	        if (*tmp == '\'') {
11106 		    tmp++;
11107 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11108 		    if (tmp < ctxt->input->end) tmp++;
11109 		} else if (*tmp == '"') {
11110 		    tmp++;
11111 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11112 		    if (tmp < ctxt->input->end) tmp++;
11113 		} else
11114 		    tmp++;
11115 	    }
11116 	    if (tmp < ctxt->input->end)
11117 	        *lastgt = tmp;
11118 	    else {
11119 	        tmp = *lastlt;
11120 		tmp--;
11121 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11122 		if (tmp >= ctxt->input->base)
11123 		    *lastgt = tmp;
11124 		else
11125 		    *lastgt = NULL;
11126 	    }
11127 	}
11128     } else {
11129         *lastlt = NULL;
11130 	*lastgt = NULL;
11131     }
11132 }
11133 /**
11134  * xmlCheckCdataPush:
11135  * @cur: pointer to the block of characters
11136  * @len: length of the block in bytes
11137  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11138  *
11139  * Check that the block of characters is okay as SCdata content [20]
11140  *
11141  * Returns the number of bytes to pass if okay, a negative index where an
11142  *         UTF-8 error occurred otherwise
11143  */
11144 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11145 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11146     int ix;
11147     unsigned char c;
11148     int codepoint;
11149 
11150     if ((utf == NULL) || (len <= 0))
11151         return(0);
11152 
11153     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11154         c = utf[ix];
11155         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11156 	    if (c >= 0x20)
11157 		ix++;
11158 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11159 	        ix++;
11160 	    else
11161 	        return(-ix);
11162 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11163 	    if (ix + 2 > len) return(complete ? -ix : ix);
11164 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11165 	        return(-ix);
11166 	    codepoint = (utf[ix] & 0x1f) << 6;
11167 	    codepoint |= utf[ix+1] & 0x3f;
11168 	    if (!xmlIsCharQ(codepoint))
11169 	        return(-ix);
11170 	    ix += 2;
11171 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11172 	    if (ix + 3 > len) return(complete ? -ix : ix);
11173 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11174 	        ((utf[ix+2] & 0xc0) != 0x80))
11175 		    return(-ix);
11176 	    codepoint = (utf[ix] & 0xf) << 12;
11177 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11178 	    codepoint |= utf[ix+2] & 0x3f;
11179 	    if (!xmlIsCharQ(codepoint))
11180 	        return(-ix);
11181 	    ix += 3;
11182 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11183 	    if (ix + 4 > len) return(complete ? -ix : ix);
11184 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11185 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11186 		((utf[ix+3] & 0xc0) != 0x80))
11187 		    return(-ix);
11188 	    codepoint = (utf[ix] & 0x7) << 18;
11189 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11190 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11191 	    codepoint |= utf[ix+3] & 0x3f;
11192 	    if (!xmlIsCharQ(codepoint))
11193 	        return(-ix);
11194 	    ix += 4;
11195 	} else				/* unknown encoding */
11196 	    return(-ix);
11197       }
11198       return(ix);
11199 }
11200 
11201 /**
11202  * xmlParseTryOrFinish:
11203  * @ctxt:  an XML parser context
11204  * @terminate:  last chunk indicator
11205  *
11206  * Try to progress on parsing
11207  *
11208  * Returns zero if no parsing was possible
11209  */
11210 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11211 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11212     int ret = 0;
11213     int avail, tlen;
11214     xmlChar cur, next;
11215     const xmlChar *lastlt, *lastgt;
11216 
11217     if (ctxt->input == NULL)
11218         return(0);
11219 
11220 #ifdef DEBUG_PUSH
11221     switch (ctxt->instate) {
11222 	case XML_PARSER_EOF:
11223 	    xmlGenericError(xmlGenericErrorContext,
11224 		    "PP: try EOF\n"); break;
11225 	case XML_PARSER_START:
11226 	    xmlGenericError(xmlGenericErrorContext,
11227 		    "PP: try START\n"); break;
11228 	case XML_PARSER_MISC:
11229 	    xmlGenericError(xmlGenericErrorContext,
11230 		    "PP: try MISC\n");break;
11231 	case XML_PARSER_COMMENT:
11232 	    xmlGenericError(xmlGenericErrorContext,
11233 		    "PP: try COMMENT\n");break;
11234 	case XML_PARSER_PROLOG:
11235 	    xmlGenericError(xmlGenericErrorContext,
11236 		    "PP: try PROLOG\n");break;
11237 	case XML_PARSER_START_TAG:
11238 	    xmlGenericError(xmlGenericErrorContext,
11239 		    "PP: try START_TAG\n");break;
11240 	case XML_PARSER_CONTENT:
11241 	    xmlGenericError(xmlGenericErrorContext,
11242 		    "PP: try CONTENT\n");break;
11243 	case XML_PARSER_CDATA_SECTION:
11244 	    xmlGenericError(xmlGenericErrorContext,
11245 		    "PP: try CDATA_SECTION\n");break;
11246 	case XML_PARSER_END_TAG:
11247 	    xmlGenericError(xmlGenericErrorContext,
11248 		    "PP: try END_TAG\n");break;
11249 	case XML_PARSER_ENTITY_DECL:
11250 	    xmlGenericError(xmlGenericErrorContext,
11251 		    "PP: try ENTITY_DECL\n");break;
11252 	case XML_PARSER_ENTITY_VALUE:
11253 	    xmlGenericError(xmlGenericErrorContext,
11254 		    "PP: try ENTITY_VALUE\n");break;
11255 	case XML_PARSER_ATTRIBUTE_VALUE:
11256 	    xmlGenericError(xmlGenericErrorContext,
11257 		    "PP: try ATTRIBUTE_VALUE\n");break;
11258 	case XML_PARSER_DTD:
11259 	    xmlGenericError(xmlGenericErrorContext,
11260 		    "PP: try DTD\n");break;
11261 	case XML_PARSER_EPILOG:
11262 	    xmlGenericError(xmlGenericErrorContext,
11263 		    "PP: try EPILOG\n");break;
11264 	case XML_PARSER_PI:
11265 	    xmlGenericError(xmlGenericErrorContext,
11266 		    "PP: try PI\n");break;
11267         case XML_PARSER_IGNORE:
11268             xmlGenericError(xmlGenericErrorContext,
11269 		    "PP: try IGNORE\n");break;
11270     }
11271 #endif
11272 
11273     if ((ctxt->input != NULL) &&
11274         (ctxt->input->cur - ctxt->input->base > 4096)) {
11275 	xmlSHRINK(ctxt);
11276 	ctxt->checkIndex = 0;
11277     }
11278     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11279 
11280     while (ctxt->instate != XML_PARSER_EOF) {
11281 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11282 	    return(0);
11283 
11284 	if (ctxt->input == NULL) break;
11285 	if (ctxt->input->buf == NULL)
11286 	    avail = ctxt->input->length -
11287 	            (ctxt->input->cur - ctxt->input->base);
11288 	else {
11289 	    /*
11290 	     * If we are operating on converted input, try to flush
11291 	     * remaining chars to avoid them stalling in the non-converted
11292 	     * buffer. But do not do this in document start where
11293 	     * encoding="..." may not have been read and we work on a
11294 	     * guessed encoding.
11295 	     */
11296 	    if ((ctxt->instate != XML_PARSER_START) &&
11297 	        (ctxt->input->buf->raw != NULL) &&
11298 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11299                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11300                                                  ctxt->input);
11301 		size_t current = ctxt->input->cur - ctxt->input->base;
11302 
11303 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11304                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11305                                       base, current);
11306 	    }
11307 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11308 		    (ctxt->input->cur - ctxt->input->base);
11309 	}
11310         if (avail < 1)
11311 	    goto done;
11312         switch (ctxt->instate) {
11313             case XML_PARSER_EOF:
11314 	        /*
11315 		 * Document parsing is done !
11316 		 */
11317 	        goto done;
11318             case XML_PARSER_START:
11319 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11320 		    xmlChar start[4];
11321 		    xmlCharEncoding enc;
11322 
11323 		    /*
11324 		     * Very first chars read from the document flow.
11325 		     */
11326 		    if (avail < 4)
11327 			goto done;
11328 
11329 		    /*
11330 		     * Get the 4 first bytes and decode the charset
11331 		     * if enc != XML_CHAR_ENCODING_NONE
11332 		     * plug some encoding conversion routines,
11333 		     * else xmlSwitchEncoding will set to (default)
11334 		     * UTF8.
11335 		     */
11336 		    start[0] = RAW;
11337 		    start[1] = NXT(1);
11338 		    start[2] = NXT(2);
11339 		    start[3] = NXT(3);
11340 		    enc = xmlDetectCharEncoding(start, 4);
11341 		    xmlSwitchEncoding(ctxt, enc);
11342 		    break;
11343 		}
11344 
11345 		if (avail < 2)
11346 		    goto done;
11347 		cur = ctxt->input->cur[0];
11348 		next = ctxt->input->cur[1];
11349 		if (cur == 0) {
11350 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11351 			ctxt->sax->setDocumentLocator(ctxt->userData,
11352 						      &xmlDefaultSAXLocator);
11353 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11354 		    xmlHaltParser(ctxt);
11355 #ifdef DEBUG_PUSH
11356 		    xmlGenericError(xmlGenericErrorContext,
11357 			    "PP: entering EOF\n");
11358 #endif
11359 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11360 			ctxt->sax->endDocument(ctxt->userData);
11361 		    goto done;
11362 		}
11363 	        if ((cur == '<') && (next == '?')) {
11364 		    /* PI or XML decl */
11365 		    if (avail < 5) return(ret);
11366 		    if ((!terminate) &&
11367 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11368 			return(ret);
11369 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11370 			ctxt->sax->setDocumentLocator(ctxt->userData,
11371 						      &xmlDefaultSAXLocator);
11372 		    if ((ctxt->input->cur[2] == 'x') &&
11373 			(ctxt->input->cur[3] == 'm') &&
11374 			(ctxt->input->cur[4] == 'l') &&
11375 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11376 			ret += 5;
11377 #ifdef DEBUG_PUSH
11378 			xmlGenericError(xmlGenericErrorContext,
11379 				"PP: Parsing XML Decl\n");
11380 #endif
11381 			xmlParseXMLDecl(ctxt);
11382 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11383 			    /*
11384 			     * The XML REC instructs us to stop parsing right
11385 			     * here
11386 			     */
11387 			    xmlHaltParser(ctxt);
11388 			    return(0);
11389 			}
11390 			ctxt->standalone = ctxt->input->standalone;
11391 			if ((ctxt->encoding == NULL) &&
11392 			    (ctxt->input->encoding != NULL))
11393 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11394 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11395 			    (!ctxt->disableSAX))
11396 			    ctxt->sax->startDocument(ctxt->userData);
11397 			ctxt->instate = XML_PARSER_MISC;
11398 #ifdef DEBUG_PUSH
11399 			xmlGenericError(xmlGenericErrorContext,
11400 				"PP: entering MISC\n");
11401 #endif
11402 		    } else {
11403 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11404 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11405 			    (!ctxt->disableSAX))
11406 			    ctxt->sax->startDocument(ctxt->userData);
11407 			ctxt->instate = XML_PARSER_MISC;
11408 #ifdef DEBUG_PUSH
11409 			xmlGenericError(xmlGenericErrorContext,
11410 				"PP: entering MISC\n");
11411 #endif
11412 		    }
11413 		} else {
11414 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11415 			ctxt->sax->setDocumentLocator(ctxt->userData,
11416 						      &xmlDefaultSAXLocator);
11417 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11418 		    if (ctxt->version == NULL) {
11419 		        xmlErrMemory(ctxt, NULL);
11420 			break;
11421 		    }
11422 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11423 		        (!ctxt->disableSAX))
11424 			ctxt->sax->startDocument(ctxt->userData);
11425 		    ctxt->instate = XML_PARSER_MISC;
11426 #ifdef DEBUG_PUSH
11427 		    xmlGenericError(xmlGenericErrorContext,
11428 			    "PP: entering MISC\n");
11429 #endif
11430 		}
11431 		break;
11432             case XML_PARSER_START_TAG: {
11433 	        const xmlChar *name;
11434 		const xmlChar *prefix = NULL;
11435 		const xmlChar *URI = NULL;
11436                 int line = ctxt->input->line;
11437 		int nsNr = ctxt->nsNr;
11438 
11439 		if ((avail < 2) && (ctxt->inputNr == 1))
11440 		    goto done;
11441 		cur = ctxt->input->cur[0];
11442 	        if (cur != '<') {
11443 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11444 		    xmlHaltParser(ctxt);
11445 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446 			ctxt->sax->endDocument(ctxt->userData);
11447 		    goto done;
11448 		}
11449 		if (!terminate) {
11450 		    if (ctxt->progressive) {
11451 		        /* > can be found unescaped in attribute values */
11452 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11453 			    goto done;
11454 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11455 			goto done;
11456 		    }
11457 		}
11458 		if (ctxt->spaceNr == 0)
11459 		    spacePush(ctxt, -1);
11460 		else if (*ctxt->space == -2)
11461 		    spacePush(ctxt, -1);
11462 		else
11463 		    spacePush(ctxt, *ctxt->space);
11464 #ifdef LIBXML_SAX1_ENABLED
11465 		if (ctxt->sax2)
11466 #endif /* LIBXML_SAX1_ENABLED */
11467 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11468 #ifdef LIBXML_SAX1_ENABLED
11469 		else
11470 		    name = xmlParseStartTag(ctxt);
11471 #endif /* LIBXML_SAX1_ENABLED */
11472 		if (ctxt->instate == XML_PARSER_EOF)
11473 		    goto done;
11474 		if (name == NULL) {
11475 		    spacePop(ctxt);
11476 		    xmlHaltParser(ctxt);
11477 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11478 			ctxt->sax->endDocument(ctxt->userData);
11479 		    goto done;
11480 		}
11481 #ifdef LIBXML_VALID_ENABLED
11482 		/*
11483 		 * [ VC: Root Element Type ]
11484 		 * The Name in the document type declaration must match
11485 		 * the element type of the root element.
11486 		 */
11487 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11488 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11489 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11490 #endif /* LIBXML_VALID_ENABLED */
11491 
11492 		/*
11493 		 * Check for an Empty Element.
11494 		 */
11495 		if ((RAW == '/') && (NXT(1) == '>')) {
11496 		    SKIP(2);
11497 
11498 		    if (ctxt->sax2) {
11499 			if ((ctxt->sax != NULL) &&
11500 			    (ctxt->sax->endElementNs != NULL) &&
11501 			    (!ctxt->disableSAX))
11502 			    ctxt->sax->endElementNs(ctxt->userData, name,
11503 			                            prefix, URI);
11504 			if (ctxt->nsNr - nsNr > 0)
11505 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11506 #ifdef LIBXML_SAX1_ENABLED
11507 		    } else {
11508 			if ((ctxt->sax != NULL) &&
11509 			    (ctxt->sax->endElement != NULL) &&
11510 			    (!ctxt->disableSAX))
11511 			    ctxt->sax->endElement(ctxt->userData, name);
11512 #endif /* LIBXML_SAX1_ENABLED */
11513 		    }
11514 		    if (ctxt->instate == XML_PARSER_EOF)
11515 			goto done;
11516 		    spacePop(ctxt);
11517 		    if (ctxt->nameNr == 0) {
11518 			ctxt->instate = XML_PARSER_EPILOG;
11519 		    } else {
11520 			ctxt->instate = XML_PARSER_CONTENT;
11521 		    }
11522                     ctxt->progressive = 1;
11523 		    break;
11524 		}
11525 		if (RAW == '>') {
11526 		    NEXT;
11527 		} else {
11528 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11529 					 "Couldn't find end of Start Tag %s\n",
11530 					 name);
11531 		    nodePop(ctxt);
11532 		    spacePop(ctxt);
11533 		}
11534                 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11535 
11536 		ctxt->instate = XML_PARSER_CONTENT;
11537                 ctxt->progressive = 1;
11538                 break;
11539 	    }
11540             case XML_PARSER_CONTENT: {
11541 		const xmlChar *test;
11542 		unsigned int cons;
11543 		if ((avail < 2) && (ctxt->inputNr == 1))
11544 		    goto done;
11545 		cur = ctxt->input->cur[0];
11546 		next = ctxt->input->cur[1];
11547 
11548 		test = CUR_PTR;
11549 	        cons = ctxt->input->consumed;
11550 		if ((cur == '<') && (next == '/')) {
11551 		    ctxt->instate = XML_PARSER_END_TAG;
11552 		    break;
11553 	        } else if ((cur == '<') && (next == '?')) {
11554 		    if ((!terminate) &&
11555 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11556                         ctxt->progressive = XML_PARSER_PI;
11557 			goto done;
11558                     }
11559 		    xmlParsePI(ctxt);
11560 		    ctxt->instate = XML_PARSER_CONTENT;
11561                     ctxt->progressive = 1;
11562 		} else if ((cur == '<') && (next != '!')) {
11563 		    ctxt->instate = XML_PARSER_START_TAG;
11564 		    break;
11565 		} else if ((cur == '<') && (next == '!') &&
11566 		           (ctxt->input->cur[2] == '-') &&
11567 			   (ctxt->input->cur[3] == '-')) {
11568 		    int term;
11569 
11570 	            if (avail < 4)
11571 		        goto done;
11572 		    ctxt->input->cur += 4;
11573 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11574 		    ctxt->input->cur -= 4;
11575 		    if ((!terminate) && (term < 0)) {
11576                         ctxt->progressive = XML_PARSER_COMMENT;
11577 			goto done;
11578                     }
11579 		    xmlParseComment(ctxt);
11580 		    ctxt->instate = XML_PARSER_CONTENT;
11581                     ctxt->progressive = 1;
11582 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11583 		    (ctxt->input->cur[2] == '[') &&
11584 		    (ctxt->input->cur[3] == 'C') &&
11585 		    (ctxt->input->cur[4] == 'D') &&
11586 		    (ctxt->input->cur[5] == 'A') &&
11587 		    (ctxt->input->cur[6] == 'T') &&
11588 		    (ctxt->input->cur[7] == 'A') &&
11589 		    (ctxt->input->cur[8] == '[')) {
11590 		    SKIP(9);
11591 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11592 		    break;
11593 		} else if ((cur == '<') && (next == '!') &&
11594 		           (avail < 9)) {
11595 		    goto done;
11596 		} else if (cur == '&') {
11597 		    if ((!terminate) &&
11598 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11599 			goto done;
11600 		    xmlParseReference(ctxt);
11601 		} else {
11602 		    /* TODO Avoid the extra copy, handle directly !!! */
11603 		    /*
11604 		     * Goal of the following test is:
11605 		     *  - minimize calls to the SAX 'character' callback
11606 		     *    when they are mergeable
11607 		     *  - handle an problem for isBlank when we only parse
11608 		     *    a sequence of blank chars and the next one is
11609 		     *    not available to check against '<' presence.
11610 		     *  - tries to homogenize the differences in SAX
11611 		     *    callbacks between the push and pull versions
11612 		     *    of the parser.
11613 		     */
11614 		    if ((ctxt->inputNr == 1) &&
11615 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11616 			if (!terminate) {
11617 			    if (ctxt->progressive) {
11618 				if ((lastlt == NULL) ||
11619 				    (ctxt->input->cur > lastlt))
11620 				    goto done;
11621 			    } else if (xmlParseLookupSequence(ctxt,
11622 			                                      '<', 0, 0) < 0) {
11623 				goto done;
11624 			    }
11625 			}
11626                     }
11627 		    ctxt->checkIndex = 0;
11628 		    xmlParseCharData(ctxt, 0);
11629 		}
11630 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11631 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11632 		                "detected an error in element content\n");
11633 		    xmlHaltParser(ctxt);
11634 		    break;
11635 		}
11636 		break;
11637 	    }
11638             case XML_PARSER_END_TAG:
11639 		if (avail < 2)
11640 		    goto done;
11641 		if (!terminate) {
11642 		    if (ctxt->progressive) {
11643 		        /* > can be found unescaped in attribute values */
11644 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11645 			    goto done;
11646 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11647 			goto done;
11648 		    }
11649 		}
11650 		if (ctxt->sax2) {
11651 	            xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11652 		    nameNsPop(ctxt);
11653 		}
11654 #ifdef LIBXML_SAX1_ENABLED
11655 		  else
11656 		    xmlParseEndTag1(ctxt, 0);
11657 #endif /* LIBXML_SAX1_ENABLED */
11658 		if (ctxt->instate == XML_PARSER_EOF) {
11659 		    /* Nothing */
11660 		} else if (ctxt->nameNr == 0) {
11661 		    ctxt->instate = XML_PARSER_EPILOG;
11662 		} else {
11663 		    ctxt->instate = XML_PARSER_CONTENT;
11664 		}
11665 		break;
11666             case XML_PARSER_CDATA_SECTION: {
11667 	        /*
11668 		 * The Push mode need to have the SAX callback for
11669 		 * cdataBlock merge back contiguous callbacks.
11670 		 */
11671 		int base;
11672 
11673 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11674 		if (base < 0) {
11675 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11676 		        int tmp;
11677 
11678 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11679 			                        XML_PARSER_BIG_BUFFER_SIZE, 0);
11680 			if (tmp < 0) {
11681 			    tmp = -tmp;
11682 			    ctxt->input->cur += tmp;
11683 			    goto encoding_error;
11684 			}
11685 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11686 			    if (ctxt->sax->cdataBlock != NULL)
11687 				ctxt->sax->cdataBlock(ctxt->userData,
11688 				                      ctxt->input->cur, tmp);
11689 			    else if (ctxt->sax->characters != NULL)
11690 				ctxt->sax->characters(ctxt->userData,
11691 				                      ctxt->input->cur, tmp);
11692 			}
11693 			if (ctxt->instate == XML_PARSER_EOF)
11694 			    goto done;
11695 			SKIPL(tmp);
11696 			ctxt->checkIndex = 0;
11697 		    }
11698 		    goto done;
11699 		} else {
11700 		    int tmp;
11701 
11702 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11703 		    if ((tmp < 0) || (tmp != base)) {
11704 			tmp = -tmp;
11705 			ctxt->input->cur += tmp;
11706 			goto encoding_error;
11707 		    }
11708 		    if ((ctxt->sax != NULL) && (base == 0) &&
11709 		        (ctxt->sax->cdataBlock != NULL) &&
11710 		        (!ctxt->disableSAX)) {
11711 			/*
11712 			 * Special case to provide identical behaviour
11713 			 * between pull and push parsers on enpty CDATA
11714 			 * sections
11715 			 */
11716 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11717 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11718 			               "<![CDATA[", 9)))
11719 			     ctxt->sax->cdataBlock(ctxt->userData,
11720 			                           BAD_CAST "", 0);
11721 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11722 			(!ctxt->disableSAX)) {
11723 			if (ctxt->sax->cdataBlock != NULL)
11724 			    ctxt->sax->cdataBlock(ctxt->userData,
11725 						  ctxt->input->cur, base);
11726 			else if (ctxt->sax->characters != NULL)
11727 			    ctxt->sax->characters(ctxt->userData,
11728 						  ctxt->input->cur, base);
11729 		    }
11730 		    if (ctxt->instate == XML_PARSER_EOF)
11731 			goto done;
11732 		    SKIPL(base + 3);
11733 		    ctxt->checkIndex = 0;
11734 		    ctxt->instate = XML_PARSER_CONTENT;
11735 #ifdef DEBUG_PUSH
11736 		    xmlGenericError(xmlGenericErrorContext,
11737 			    "PP: entering CONTENT\n");
11738 #endif
11739 		}
11740 		break;
11741 	    }
11742             case XML_PARSER_MISC:
11743 		SKIP_BLANKS;
11744 		if (ctxt->input->buf == NULL)
11745 		    avail = ctxt->input->length -
11746 		            (ctxt->input->cur - ctxt->input->base);
11747 		else
11748 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11749 		            (ctxt->input->cur - ctxt->input->base);
11750 		if (avail < 2)
11751 		    goto done;
11752 		cur = ctxt->input->cur[0];
11753 		next = ctxt->input->cur[1];
11754 	        if ((cur == '<') && (next == '?')) {
11755 		    if ((!terminate) &&
11756 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11757                         ctxt->progressive = XML_PARSER_PI;
11758 			goto done;
11759                     }
11760 #ifdef DEBUG_PUSH
11761 		    xmlGenericError(xmlGenericErrorContext,
11762 			    "PP: Parsing PI\n");
11763 #endif
11764 		    xmlParsePI(ctxt);
11765 		    if (ctxt->instate == XML_PARSER_EOF)
11766 			goto done;
11767 		    ctxt->instate = XML_PARSER_MISC;
11768                     ctxt->progressive = 1;
11769 		    ctxt->checkIndex = 0;
11770 		} else if ((cur == '<') && (next == '!') &&
11771 		    (ctxt->input->cur[2] == '-') &&
11772 		    (ctxt->input->cur[3] == '-')) {
11773 		    if ((!terminate) &&
11774 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11775                         ctxt->progressive = XML_PARSER_COMMENT;
11776 			goto done;
11777                     }
11778 #ifdef DEBUG_PUSH
11779 		    xmlGenericError(xmlGenericErrorContext,
11780 			    "PP: Parsing Comment\n");
11781 #endif
11782 		    xmlParseComment(ctxt);
11783 		    if (ctxt->instate == XML_PARSER_EOF)
11784 			goto done;
11785 		    ctxt->instate = XML_PARSER_MISC;
11786                     ctxt->progressive = 1;
11787 		    ctxt->checkIndex = 0;
11788 		} else if ((cur == '<') && (next == '!') &&
11789 		    (ctxt->input->cur[2] == 'D') &&
11790 		    (ctxt->input->cur[3] == 'O') &&
11791 		    (ctxt->input->cur[4] == 'C') &&
11792 		    (ctxt->input->cur[5] == 'T') &&
11793 		    (ctxt->input->cur[6] == 'Y') &&
11794 		    (ctxt->input->cur[7] == 'P') &&
11795 		    (ctxt->input->cur[8] == 'E')) {
11796 		    if ((!terminate) &&
11797 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11798                         ctxt->progressive = XML_PARSER_DTD;
11799 			goto done;
11800                     }
11801 #ifdef DEBUG_PUSH
11802 		    xmlGenericError(xmlGenericErrorContext,
11803 			    "PP: Parsing internal subset\n");
11804 #endif
11805 		    ctxt->inSubset = 1;
11806                     ctxt->progressive = 0;
11807 		    ctxt->checkIndex = 0;
11808 		    xmlParseDocTypeDecl(ctxt);
11809 		    if (ctxt->instate == XML_PARSER_EOF)
11810 			goto done;
11811 		    if (RAW == '[') {
11812 			ctxt->instate = XML_PARSER_DTD;
11813 #ifdef DEBUG_PUSH
11814 			xmlGenericError(xmlGenericErrorContext,
11815 				"PP: entering DTD\n");
11816 #endif
11817 		    } else {
11818 			/*
11819 			 * Create and update the external subset.
11820 			 */
11821 			ctxt->inSubset = 2;
11822 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11823 			    (ctxt->sax->externalSubset != NULL))
11824 			    ctxt->sax->externalSubset(ctxt->userData,
11825 				    ctxt->intSubName, ctxt->extSubSystem,
11826 				    ctxt->extSubURI);
11827 			ctxt->inSubset = 0;
11828 			xmlCleanSpecialAttr(ctxt);
11829 			ctxt->instate = XML_PARSER_PROLOG;
11830 #ifdef DEBUG_PUSH
11831 			xmlGenericError(xmlGenericErrorContext,
11832 				"PP: entering PROLOG\n");
11833 #endif
11834 		    }
11835 		} else if ((cur == '<') && (next == '!') &&
11836 		           (avail < 9)) {
11837 		    goto done;
11838 		} else {
11839 		    ctxt->instate = XML_PARSER_START_TAG;
11840 		    ctxt->progressive = XML_PARSER_START_TAG;
11841 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11842 #ifdef DEBUG_PUSH
11843 		    xmlGenericError(xmlGenericErrorContext,
11844 			    "PP: entering START_TAG\n");
11845 #endif
11846 		}
11847 		break;
11848             case XML_PARSER_PROLOG:
11849 		SKIP_BLANKS;
11850 		if (ctxt->input->buf == NULL)
11851 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11852 		else
11853 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11854                             (ctxt->input->cur - ctxt->input->base);
11855 		if (avail < 2)
11856 		    goto done;
11857 		cur = ctxt->input->cur[0];
11858 		next = ctxt->input->cur[1];
11859 	        if ((cur == '<') && (next == '?')) {
11860 		    if ((!terminate) &&
11861 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11862                         ctxt->progressive = XML_PARSER_PI;
11863 			goto done;
11864                     }
11865 #ifdef DEBUG_PUSH
11866 		    xmlGenericError(xmlGenericErrorContext,
11867 			    "PP: Parsing PI\n");
11868 #endif
11869 		    xmlParsePI(ctxt);
11870 		    if (ctxt->instate == XML_PARSER_EOF)
11871 			goto done;
11872 		    ctxt->instate = XML_PARSER_PROLOG;
11873                     ctxt->progressive = 1;
11874 		} else if ((cur == '<') && (next == '!') &&
11875 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11876 		    if ((!terminate) &&
11877 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11878                         ctxt->progressive = XML_PARSER_COMMENT;
11879 			goto done;
11880                     }
11881 #ifdef DEBUG_PUSH
11882 		    xmlGenericError(xmlGenericErrorContext,
11883 			    "PP: Parsing Comment\n");
11884 #endif
11885 		    xmlParseComment(ctxt);
11886 		    if (ctxt->instate == XML_PARSER_EOF)
11887 			goto done;
11888 		    ctxt->instate = XML_PARSER_PROLOG;
11889                     ctxt->progressive = 1;
11890 		} else if ((cur == '<') && (next == '!') &&
11891 		           (avail < 4)) {
11892 		    goto done;
11893 		} else {
11894 		    ctxt->instate = XML_PARSER_START_TAG;
11895 		    if (ctxt->progressive == 0)
11896 			ctxt->progressive = XML_PARSER_START_TAG;
11897 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11898 #ifdef DEBUG_PUSH
11899 		    xmlGenericError(xmlGenericErrorContext,
11900 			    "PP: entering START_TAG\n");
11901 #endif
11902 		}
11903 		break;
11904             case XML_PARSER_EPILOG:
11905 		SKIP_BLANKS;
11906 		if (ctxt->input->buf == NULL)
11907 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11908 		else
11909 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11910                             (ctxt->input->cur - ctxt->input->base);
11911 		if (avail < 2)
11912 		    goto done;
11913 		cur = ctxt->input->cur[0];
11914 		next = ctxt->input->cur[1];
11915 	        if ((cur == '<') && (next == '?')) {
11916 		    if ((!terminate) &&
11917 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11918                         ctxt->progressive = XML_PARSER_PI;
11919 			goto done;
11920                     }
11921 #ifdef DEBUG_PUSH
11922 		    xmlGenericError(xmlGenericErrorContext,
11923 			    "PP: Parsing PI\n");
11924 #endif
11925 		    xmlParsePI(ctxt);
11926 		    if (ctxt->instate == XML_PARSER_EOF)
11927 			goto done;
11928 		    ctxt->instate = XML_PARSER_EPILOG;
11929                     ctxt->progressive = 1;
11930 		} else if ((cur == '<') && (next == '!') &&
11931 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11932 		    if ((!terminate) &&
11933 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11934                         ctxt->progressive = XML_PARSER_COMMENT;
11935 			goto done;
11936                     }
11937 #ifdef DEBUG_PUSH
11938 		    xmlGenericError(xmlGenericErrorContext,
11939 			    "PP: Parsing Comment\n");
11940 #endif
11941 		    xmlParseComment(ctxt);
11942 		    if (ctxt->instate == XML_PARSER_EOF)
11943 			goto done;
11944 		    ctxt->instate = XML_PARSER_EPILOG;
11945                     ctxt->progressive = 1;
11946 		} else if ((cur == '<') && (next == '!') &&
11947 		           (avail < 4)) {
11948 		    goto done;
11949 		} else {
11950 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11951 		    xmlHaltParser(ctxt);
11952 #ifdef DEBUG_PUSH
11953 		    xmlGenericError(xmlGenericErrorContext,
11954 			    "PP: entering EOF\n");
11955 #endif
11956 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11957 			ctxt->sax->endDocument(ctxt->userData);
11958 		    goto done;
11959 		}
11960 		break;
11961             case XML_PARSER_DTD: {
11962 	        /*
11963 		 * Sorry but progressive parsing of the internal subset
11964 		 * is not expected to be supported. We first check that
11965 		 * the full content of the internal subset is available and
11966 		 * the parsing is launched only at that point.
11967 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11968 		 * section and not in a ']]>' sequence which are conditional
11969 		 * sections (whoever argued to keep that crap in XML deserve
11970 		 * a place in hell !).
11971 		 */
11972 		int base, i;
11973 		xmlChar *buf;
11974 	        xmlChar quote = 0;
11975                 size_t use;
11976 
11977 		base = ctxt->input->cur - ctxt->input->base;
11978 		if (base < 0) return(0);
11979 		if (ctxt->checkIndex > base)
11980 		    base = ctxt->checkIndex;
11981 		buf = xmlBufContent(ctxt->input->buf->buffer);
11982                 use = xmlBufUse(ctxt->input->buf->buffer);
11983 		for (;(unsigned int) base < use; base++) {
11984 		    if (quote != 0) {
11985 		        if (buf[base] == quote)
11986 			    quote = 0;
11987 			continue;
11988 		    }
11989 		    if ((quote == 0) && (buf[base] == '<')) {
11990 		        int found  = 0;
11991 			/* special handling of comments */
11992 		        if (((unsigned int) base + 4 < use) &&
11993 			    (buf[base + 1] == '!') &&
11994 			    (buf[base + 2] == '-') &&
11995 			    (buf[base + 3] == '-')) {
11996 			    for (;(unsigned int) base + 3 < use; base++) {
11997 				if ((buf[base] == '-') &&
11998 				    (buf[base + 1] == '-') &&
11999 				    (buf[base + 2] == '>')) {
12000 				    found = 1;
12001 				    base += 2;
12002 				    break;
12003 				}
12004 		            }
12005 			    if (!found) {
12006 #if 0
12007 			        fprintf(stderr, "unfinished comment\n");
12008 #endif
12009 			        break; /* for */
12010 		            }
12011 		            continue;
12012 			}
12013 		    }
12014 		    if (buf[base] == '"') {
12015 		        quote = '"';
12016 			continue;
12017 		    }
12018 		    if (buf[base] == '\'') {
12019 		        quote = '\'';
12020 			continue;
12021 		    }
12022 		    if (buf[base] == ']') {
12023 #if 0
12024 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
12025 			        buf[base + 1], buf[base + 2], buf[base + 3]);
12026 #endif
12027 		        if ((unsigned int) base +1 >= use)
12028 			    break;
12029 			if (buf[base + 1] == ']') {
12030 			    /* conditional crap, skip both ']' ! */
12031 			    base++;
12032 			    continue;
12033 			}
12034 		        for (i = 1; (unsigned int) base + i < use; i++) {
12035 			    if (buf[base + i] == '>') {
12036 #if 0
12037 			        fprintf(stderr, "found\n");
12038 #endif
12039 			        goto found_end_int_subset;
12040 			    }
12041 			    if (!IS_BLANK_CH(buf[base + i])) {
12042 #if 0
12043 			        fprintf(stderr, "not found\n");
12044 #endif
12045 			        goto not_end_of_int_subset;
12046 			    }
12047 			}
12048 #if 0
12049 			fprintf(stderr, "end of stream\n");
12050 #endif
12051 		        break;
12052 
12053 		    }
12054 not_end_of_int_subset:
12055                     continue; /* for */
12056 		}
12057 		/*
12058 		 * We didn't found the end of the Internal subset
12059 		 */
12060                 if (quote == 0)
12061                     ctxt->checkIndex = base;
12062                 else
12063                     ctxt->checkIndex = 0;
12064 #ifdef DEBUG_PUSH
12065 		if (next == 0)
12066 		    xmlGenericError(xmlGenericErrorContext,
12067 			    "PP: lookup of int subset end filed\n");
12068 #endif
12069 	        goto done;
12070 
12071 found_end_int_subset:
12072                 ctxt->checkIndex = 0;
12073 		xmlParseInternalSubset(ctxt);
12074 		if (ctxt->instate == XML_PARSER_EOF)
12075 		    goto done;
12076 		ctxt->inSubset = 2;
12077 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12078 		    (ctxt->sax->externalSubset != NULL))
12079 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12080 			    ctxt->extSubSystem, ctxt->extSubURI);
12081 		ctxt->inSubset = 0;
12082 		xmlCleanSpecialAttr(ctxt);
12083 		if (ctxt->instate == XML_PARSER_EOF)
12084 		    goto done;
12085 		ctxt->instate = XML_PARSER_PROLOG;
12086 		ctxt->checkIndex = 0;
12087 #ifdef DEBUG_PUSH
12088 		xmlGenericError(xmlGenericErrorContext,
12089 			"PP: entering PROLOG\n");
12090 #endif
12091                 break;
12092 	    }
12093             case XML_PARSER_COMMENT:
12094 		xmlGenericError(xmlGenericErrorContext,
12095 			"PP: internal error, state == COMMENT\n");
12096 		ctxt->instate = XML_PARSER_CONTENT;
12097 #ifdef DEBUG_PUSH
12098 		xmlGenericError(xmlGenericErrorContext,
12099 			"PP: entering CONTENT\n");
12100 #endif
12101 		break;
12102             case XML_PARSER_IGNORE:
12103 		xmlGenericError(xmlGenericErrorContext,
12104 			"PP: internal error, state == IGNORE");
12105 	        ctxt->instate = XML_PARSER_DTD;
12106 #ifdef DEBUG_PUSH
12107 		xmlGenericError(xmlGenericErrorContext,
12108 			"PP: entering DTD\n");
12109 #endif
12110 	        break;
12111             case XML_PARSER_PI:
12112 		xmlGenericError(xmlGenericErrorContext,
12113 			"PP: internal error, state == PI\n");
12114 		ctxt->instate = XML_PARSER_CONTENT;
12115 #ifdef DEBUG_PUSH
12116 		xmlGenericError(xmlGenericErrorContext,
12117 			"PP: entering CONTENT\n");
12118 #endif
12119 		break;
12120             case XML_PARSER_ENTITY_DECL:
12121 		xmlGenericError(xmlGenericErrorContext,
12122 			"PP: internal error, state == ENTITY_DECL\n");
12123 		ctxt->instate = XML_PARSER_DTD;
12124 #ifdef DEBUG_PUSH
12125 		xmlGenericError(xmlGenericErrorContext,
12126 			"PP: entering DTD\n");
12127 #endif
12128 		break;
12129             case XML_PARSER_ENTITY_VALUE:
12130 		xmlGenericError(xmlGenericErrorContext,
12131 			"PP: internal error, state == ENTITY_VALUE\n");
12132 		ctxt->instate = XML_PARSER_CONTENT;
12133 #ifdef DEBUG_PUSH
12134 		xmlGenericError(xmlGenericErrorContext,
12135 			"PP: entering DTD\n");
12136 #endif
12137 		break;
12138             case XML_PARSER_ATTRIBUTE_VALUE:
12139 		xmlGenericError(xmlGenericErrorContext,
12140 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12141 		ctxt->instate = XML_PARSER_START_TAG;
12142 #ifdef DEBUG_PUSH
12143 		xmlGenericError(xmlGenericErrorContext,
12144 			"PP: entering START_TAG\n");
12145 #endif
12146 		break;
12147             case XML_PARSER_SYSTEM_LITERAL:
12148 		xmlGenericError(xmlGenericErrorContext,
12149 			"PP: internal error, state == SYSTEM_LITERAL\n");
12150 		ctxt->instate = XML_PARSER_START_TAG;
12151 #ifdef DEBUG_PUSH
12152 		xmlGenericError(xmlGenericErrorContext,
12153 			"PP: entering START_TAG\n");
12154 #endif
12155 		break;
12156             case XML_PARSER_PUBLIC_LITERAL:
12157 		xmlGenericError(xmlGenericErrorContext,
12158 			"PP: internal error, state == PUBLIC_LITERAL\n");
12159 		ctxt->instate = XML_PARSER_START_TAG;
12160 #ifdef DEBUG_PUSH
12161 		xmlGenericError(xmlGenericErrorContext,
12162 			"PP: entering START_TAG\n");
12163 #endif
12164 		break;
12165 	}
12166     }
12167 done:
12168 #ifdef DEBUG_PUSH
12169     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12170 #endif
12171     return(ret);
12172 encoding_error:
12173     if (ctxt->input->end - ctxt->input->cur < 4) {
12174 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12175 		     "Input is not proper UTF-8, indicate encoding !\n",
12176 		     NULL, NULL);
12177     } else {
12178         char buffer[150];
12179 
12180 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12181 			ctxt->input->cur[0], ctxt->input->cur[1],
12182 			ctxt->input->cur[2], ctxt->input->cur[3]);
12183 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12184 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12185 		     BAD_CAST buffer, NULL);
12186     }
12187     return(0);
12188 }
12189 
12190 /**
12191  * xmlParseCheckTransition:
12192  * @ctxt:  an XML parser context
12193  * @chunk:  a char array
12194  * @size:  the size in byte of the chunk
12195  *
12196  * Check depending on the current parser state if the chunk given must be
12197  * processed immediately or one need more data to advance on parsing.
12198  *
12199  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12200  */
12201 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12202 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12203     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12204         return(-1);
12205     if (ctxt->instate == XML_PARSER_START_TAG) {
12206         if (memchr(chunk, '>', size) != NULL)
12207             return(1);
12208         return(0);
12209     }
12210     if (ctxt->progressive == XML_PARSER_COMMENT) {
12211         if (memchr(chunk, '>', size) != NULL)
12212             return(1);
12213         return(0);
12214     }
12215     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12216         if (memchr(chunk, '>', size) != NULL)
12217             return(1);
12218         return(0);
12219     }
12220     if (ctxt->progressive == XML_PARSER_PI) {
12221         if (memchr(chunk, '>', size) != NULL)
12222             return(1);
12223         return(0);
12224     }
12225     if (ctxt->instate == XML_PARSER_END_TAG) {
12226         if (memchr(chunk, '>', size) != NULL)
12227             return(1);
12228         return(0);
12229     }
12230     if ((ctxt->progressive == XML_PARSER_DTD) ||
12231         (ctxt->instate == XML_PARSER_DTD)) {
12232         if (memchr(chunk, '>', size) != NULL)
12233             return(1);
12234         return(0);
12235     }
12236     return(1);
12237 }
12238 
12239 /**
12240  * xmlParseChunk:
12241  * @ctxt:  an XML parser context
12242  * @chunk:  an char array
12243  * @size:  the size in byte of the chunk
12244  * @terminate:  last chunk indicator
12245  *
12246  * Parse a Chunk of memory
12247  *
12248  * Returns zero if no error, the xmlParserErrors otherwise.
12249  */
12250 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12251 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12252               int terminate) {
12253     int end_in_lf = 0;
12254     int remain = 0;
12255     size_t old_avail = 0;
12256     size_t avail = 0;
12257 
12258     if (ctxt == NULL)
12259         return(XML_ERR_INTERNAL_ERROR);
12260     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12261         return(ctxt->errNo);
12262     if (ctxt->instate == XML_PARSER_EOF)
12263         return(-1);
12264     if (ctxt->instate == XML_PARSER_START)
12265         xmlDetectSAX2(ctxt);
12266     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12267         (chunk[size - 1] == '\r')) {
12268 	end_in_lf = 1;
12269 	size--;
12270     }
12271 
12272 xmldecl_done:
12273 
12274     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12275         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12276 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12277 	size_t cur = ctxt->input->cur - ctxt->input->base;
12278 	int res;
12279 
12280         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12281         /*
12282          * Specific handling if we autodetected an encoding, we should not
12283          * push more than the first line ... which depend on the encoding
12284          * And only push the rest once the final encoding was detected
12285          */
12286         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12287             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12288             unsigned int len = 45;
12289 
12290             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12291                                BAD_CAST "UTF-16")) ||
12292                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12293                                BAD_CAST "UTF16")))
12294                 len = 90;
12295             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12296                                     BAD_CAST "UCS-4")) ||
12297                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12298                                     BAD_CAST "UCS4")))
12299                 len = 180;
12300 
12301             if (ctxt->input->buf->rawconsumed < len)
12302                 len -= ctxt->input->buf->rawconsumed;
12303 
12304             /*
12305              * Change size for reading the initial declaration only
12306              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12307              * will blindly copy extra bytes from memory.
12308              */
12309             if ((unsigned int) size > len) {
12310                 remain = size - len;
12311                 size = len;
12312             } else {
12313                 remain = 0;
12314             }
12315         }
12316 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12317         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12318 	if (res < 0) {
12319 	    ctxt->errNo = XML_PARSER_EOF;
12320 	    xmlHaltParser(ctxt);
12321 	    return (XML_PARSER_EOF);
12322 	}
12323 #ifdef DEBUG_PUSH
12324 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12325 #endif
12326 
12327     } else if (ctxt->instate != XML_PARSER_EOF) {
12328 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12329 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12330 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12331 		    (in->raw != NULL)) {
12332 		int nbchars;
12333 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12334 		size_t current = ctxt->input->cur - ctxt->input->base;
12335 
12336 		nbchars = xmlCharEncInput(in, terminate);
12337 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12338 		if (nbchars < 0) {
12339 		    /* TODO 2.6.0 */
12340 		    xmlGenericError(xmlGenericErrorContext,
12341 				    "xmlParseChunk: encoder error\n");
12342                     xmlHaltParser(ctxt);
12343 		    return(XML_ERR_INVALID_ENCODING);
12344 		}
12345 	    }
12346 	}
12347     }
12348     if (remain != 0) {
12349         xmlParseTryOrFinish(ctxt, 0);
12350     } else {
12351         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12352             avail = xmlBufUse(ctxt->input->buf->buffer);
12353         /*
12354          * Depending on the current state it may not be such
12355          * a good idea to try parsing if there is nothing in the chunk
12356          * which would be worth doing a parser state transition and we
12357          * need to wait for more data
12358          */
12359         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12360             (old_avail == 0) || (avail == 0) ||
12361             (xmlParseCheckTransition(ctxt,
12362                        (const char *)&ctxt->input->base[old_avail],
12363                                      avail - old_avail)))
12364             xmlParseTryOrFinish(ctxt, terminate);
12365     }
12366     if (ctxt->instate == XML_PARSER_EOF)
12367         return(ctxt->errNo);
12368 
12369     if ((ctxt->input != NULL) &&
12370          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12371          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12372         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12373         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12374         xmlHaltParser(ctxt);
12375     }
12376     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12377         return(ctxt->errNo);
12378 
12379     if (remain != 0) {
12380         chunk += size;
12381         size = remain;
12382         remain = 0;
12383         goto xmldecl_done;
12384     }
12385     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12386         (ctxt->input->buf != NULL)) {
12387 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12388 					 ctxt->input);
12389 	size_t current = ctxt->input->cur - ctxt->input->base;
12390 
12391 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12392 
12393 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12394 			      base, current);
12395     }
12396     if (terminate) {
12397 	/*
12398 	 * Check for termination
12399 	 */
12400 	int cur_avail = 0;
12401 
12402 	if (ctxt->input != NULL) {
12403 	    if (ctxt->input->buf == NULL)
12404 		cur_avail = ctxt->input->length -
12405 			    (ctxt->input->cur - ctxt->input->base);
12406 	    else
12407 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12408 			              (ctxt->input->cur - ctxt->input->base);
12409 	}
12410 
12411 	if ((ctxt->instate != XML_PARSER_EOF) &&
12412 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12413 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12414 	}
12415 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12416 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12417 	}
12418 	if (ctxt->instate != XML_PARSER_EOF) {
12419 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12420 		ctxt->sax->endDocument(ctxt->userData);
12421 	}
12422 	ctxt->instate = XML_PARSER_EOF;
12423     }
12424     if (ctxt->wellFormed == 0)
12425 	return((xmlParserErrors) ctxt->errNo);
12426     else
12427         return(0);
12428 }
12429 
12430 /************************************************************************
12431  *									*
12432  *		I/O front end functions to the parser			*
12433  *									*
12434  ************************************************************************/
12435 
12436 /**
12437  * xmlCreatePushParserCtxt:
12438  * @sax:  a SAX handler
12439  * @user_data:  The user data returned on SAX callbacks
12440  * @chunk:  a pointer to an array of chars
12441  * @size:  number of chars in the array
12442  * @filename:  an optional file name or URI
12443  *
12444  * Create a parser context for using the XML parser in push mode.
12445  * If @buffer and @size are non-NULL, the data is used to detect
12446  * the encoding.  The remaining characters will be parsed so they
12447  * don't need to be fed in again through xmlParseChunk.
12448  * To allow content encoding detection, @size should be >= 4
12449  * The value of @filename is used for fetching external entities
12450  * and error/warning reports.
12451  *
12452  * Returns the new parser context or NULL
12453  */
12454 
12455 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12456 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12457                         const char *chunk, int size, const char *filename) {
12458     xmlParserCtxtPtr ctxt;
12459     xmlParserInputPtr inputStream;
12460     xmlParserInputBufferPtr buf;
12461     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12462 
12463     /*
12464      * plug some encoding conversion routines
12465      */
12466     if ((chunk != NULL) && (size >= 4))
12467 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12468 
12469     buf = xmlAllocParserInputBuffer(enc);
12470     if (buf == NULL) return(NULL);
12471 
12472     ctxt = xmlNewParserCtxt();
12473     if (ctxt == NULL) {
12474         xmlErrMemory(NULL, "creating parser: out of memory\n");
12475 	xmlFreeParserInputBuffer(buf);
12476 	return(NULL);
12477     }
12478     ctxt->dictNames = 1;
12479     if (sax != NULL) {
12480 #ifdef LIBXML_SAX1_ENABLED
12481 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12482 #endif /* LIBXML_SAX1_ENABLED */
12483 	    xmlFree(ctxt->sax);
12484 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12485 	if (ctxt->sax == NULL) {
12486 	    xmlErrMemory(ctxt, NULL);
12487 	    xmlFreeParserInputBuffer(buf);
12488 	    xmlFreeParserCtxt(ctxt);
12489 	    return(NULL);
12490 	}
12491 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12492 	if (sax->initialized == XML_SAX2_MAGIC)
12493 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12494 	else
12495 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12496 	if (user_data != NULL)
12497 	    ctxt->userData = user_data;
12498     }
12499     if (filename == NULL) {
12500 	ctxt->directory = NULL;
12501     } else {
12502         ctxt->directory = xmlParserGetDirectory(filename);
12503     }
12504 
12505     inputStream = xmlNewInputStream(ctxt);
12506     if (inputStream == NULL) {
12507 	xmlFreeParserCtxt(ctxt);
12508 	xmlFreeParserInputBuffer(buf);
12509 	return(NULL);
12510     }
12511 
12512     if (filename == NULL)
12513 	inputStream->filename = NULL;
12514     else {
12515 	inputStream->filename = (char *)
12516 	    xmlCanonicPath((const xmlChar *) filename);
12517 	if (inputStream->filename == NULL) {
12518 	    xmlFreeParserCtxt(ctxt);
12519 	    xmlFreeParserInputBuffer(buf);
12520 	    return(NULL);
12521 	}
12522     }
12523     inputStream->buf = buf;
12524     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12525     inputPush(ctxt, inputStream);
12526 
12527     /*
12528      * If the caller didn't provide an initial 'chunk' for determining
12529      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12530      * that it can be automatically determined later
12531      */
12532     if ((size == 0) || (chunk == NULL)) {
12533 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12534     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12535 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12536 	size_t cur = ctxt->input->cur - ctxt->input->base;
12537 
12538 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12539 
12540         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12541 #ifdef DEBUG_PUSH
12542 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12543 #endif
12544     }
12545 
12546     if (enc != XML_CHAR_ENCODING_NONE) {
12547         xmlSwitchEncoding(ctxt, enc);
12548     }
12549 
12550     return(ctxt);
12551 }
12552 #endif /* LIBXML_PUSH_ENABLED */
12553 
12554 /**
12555  * xmlHaltParser:
12556  * @ctxt:  an XML parser context
12557  *
12558  * Blocks further parser processing don't override error
12559  * for internal use
12560  */
12561 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12562 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12563     if (ctxt == NULL)
12564         return;
12565     ctxt->instate = XML_PARSER_EOF;
12566     ctxt->disableSAX = 1;
12567     while (ctxt->inputNr > 1)
12568         xmlFreeInputStream(inputPop(ctxt));
12569     if (ctxt->input != NULL) {
12570         /*
12571 	 * in case there was a specific allocation deallocate before
12572 	 * overriding base
12573 	 */
12574         if (ctxt->input->free != NULL) {
12575 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12576 	    ctxt->input->free = NULL;
12577 	}
12578         if (ctxt->input->buf != NULL) {
12579             xmlFreeParserInputBuffer(ctxt->input->buf);
12580             ctxt->input->buf = NULL;
12581         }
12582 	ctxt->input->cur = BAD_CAST"";
12583         ctxt->input->length = 0;
12584 	ctxt->input->base = ctxt->input->cur;
12585         ctxt->input->end = ctxt->input->cur;
12586     }
12587 }
12588 
12589 /**
12590  * xmlStopParser:
12591  * @ctxt:  an XML parser context
12592  *
12593  * Blocks further parser processing
12594  */
12595 void
xmlStopParser(xmlParserCtxtPtr ctxt)12596 xmlStopParser(xmlParserCtxtPtr ctxt) {
12597     if (ctxt == NULL)
12598         return;
12599     xmlHaltParser(ctxt);
12600     ctxt->errNo = XML_ERR_USER_STOP;
12601 }
12602 
12603 /**
12604  * xmlCreateIOParserCtxt:
12605  * @sax:  a SAX handler
12606  * @user_data:  The user data returned on SAX callbacks
12607  * @ioread:  an I/O read function
12608  * @ioclose:  an I/O close function
12609  * @ioctx:  an I/O handler
12610  * @enc:  the charset encoding if known
12611  *
12612  * Create a parser context for using the XML parser with an existing
12613  * I/O stream
12614  *
12615  * Returns the new parser context or NULL
12616  */
12617 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12618 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12619 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12620 	void *ioctx, xmlCharEncoding enc) {
12621     xmlParserCtxtPtr ctxt;
12622     xmlParserInputPtr inputStream;
12623     xmlParserInputBufferPtr buf;
12624 
12625     if (ioread == NULL) return(NULL);
12626 
12627     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12628     if (buf == NULL) {
12629         if (ioclose != NULL)
12630             ioclose(ioctx);
12631         return (NULL);
12632     }
12633 
12634     ctxt = xmlNewParserCtxt();
12635     if (ctxt == NULL) {
12636 	xmlFreeParserInputBuffer(buf);
12637 	return(NULL);
12638     }
12639     if (sax != NULL) {
12640 #ifdef LIBXML_SAX1_ENABLED
12641 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12642 #endif /* LIBXML_SAX1_ENABLED */
12643 	    xmlFree(ctxt->sax);
12644 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12645 	if (ctxt->sax == NULL) {
12646 	    xmlFreeParserInputBuffer(buf);
12647 	    xmlErrMemory(ctxt, NULL);
12648 	    xmlFreeParserCtxt(ctxt);
12649 	    return(NULL);
12650 	}
12651 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12652 	if (sax->initialized == XML_SAX2_MAGIC)
12653 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12654 	else
12655 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12656 	if (user_data != NULL)
12657 	    ctxt->userData = user_data;
12658     }
12659 
12660     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12661     if (inputStream == NULL) {
12662 	xmlFreeParserCtxt(ctxt);
12663 	return(NULL);
12664     }
12665     inputPush(ctxt, inputStream);
12666 
12667     return(ctxt);
12668 }
12669 
12670 #ifdef LIBXML_VALID_ENABLED
12671 /************************************************************************
12672  *									*
12673  *		Front ends when parsing a DTD				*
12674  *									*
12675  ************************************************************************/
12676 
12677 /**
12678  * xmlIOParseDTD:
12679  * @sax:  the SAX handler block or NULL
12680  * @input:  an Input Buffer
12681  * @enc:  the charset encoding if known
12682  *
12683  * Load and parse a DTD
12684  *
12685  * Returns the resulting xmlDtdPtr or NULL in case of error.
12686  * @input will be freed by the function in any case.
12687  */
12688 
12689 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12690 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12691 	      xmlCharEncoding enc) {
12692     xmlDtdPtr ret = NULL;
12693     xmlParserCtxtPtr ctxt;
12694     xmlParserInputPtr pinput = NULL;
12695     xmlChar start[4];
12696 
12697     if (input == NULL)
12698 	return(NULL);
12699 
12700     ctxt = xmlNewParserCtxt();
12701     if (ctxt == NULL) {
12702         xmlFreeParserInputBuffer(input);
12703 	return(NULL);
12704     }
12705 
12706     /* We are loading a DTD */
12707     ctxt->options |= XML_PARSE_DTDLOAD;
12708 
12709     /*
12710      * Set-up the SAX context
12711      */
12712     if (sax != NULL) {
12713 	if (ctxt->sax != NULL)
12714 	    xmlFree(ctxt->sax);
12715         ctxt->sax = sax;
12716         ctxt->userData = ctxt;
12717     }
12718     xmlDetectSAX2(ctxt);
12719 
12720     /*
12721      * generate a parser input from the I/O handler
12722      */
12723 
12724     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12725     if (pinput == NULL) {
12726         if (sax != NULL) ctxt->sax = NULL;
12727         xmlFreeParserInputBuffer(input);
12728 	xmlFreeParserCtxt(ctxt);
12729 	return(NULL);
12730     }
12731 
12732     /*
12733      * plug some encoding conversion routines here.
12734      */
12735     if (xmlPushInput(ctxt, pinput) < 0) {
12736         if (sax != NULL) ctxt->sax = NULL;
12737 	xmlFreeParserCtxt(ctxt);
12738 	return(NULL);
12739     }
12740     if (enc != XML_CHAR_ENCODING_NONE) {
12741         xmlSwitchEncoding(ctxt, enc);
12742     }
12743 
12744     pinput->filename = NULL;
12745     pinput->line = 1;
12746     pinput->col = 1;
12747     pinput->base = ctxt->input->cur;
12748     pinput->cur = ctxt->input->cur;
12749     pinput->free = NULL;
12750 
12751     /*
12752      * let's parse that entity knowing it's an external subset.
12753      */
12754     ctxt->inSubset = 2;
12755     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12756     if (ctxt->myDoc == NULL) {
12757 	xmlErrMemory(ctxt, "New Doc failed");
12758 	return(NULL);
12759     }
12760     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12761     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12762 	                               BAD_CAST "none", BAD_CAST "none");
12763 
12764     if ((enc == XML_CHAR_ENCODING_NONE) &&
12765         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12766 	/*
12767 	 * Get the 4 first bytes and decode the charset
12768 	 * if enc != XML_CHAR_ENCODING_NONE
12769 	 * plug some encoding conversion routines.
12770 	 */
12771 	start[0] = RAW;
12772 	start[1] = NXT(1);
12773 	start[2] = NXT(2);
12774 	start[3] = NXT(3);
12775 	enc = xmlDetectCharEncoding(start, 4);
12776 	if (enc != XML_CHAR_ENCODING_NONE) {
12777 	    xmlSwitchEncoding(ctxt, enc);
12778 	}
12779     }
12780 
12781     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12782 
12783     if (ctxt->myDoc != NULL) {
12784 	if (ctxt->wellFormed) {
12785 	    ret = ctxt->myDoc->extSubset;
12786 	    ctxt->myDoc->extSubset = NULL;
12787 	    if (ret != NULL) {
12788 		xmlNodePtr tmp;
12789 
12790 		ret->doc = NULL;
12791 		tmp = ret->children;
12792 		while (tmp != NULL) {
12793 		    tmp->doc = NULL;
12794 		    tmp = tmp->next;
12795 		}
12796 	    }
12797 	} else {
12798 	    ret = NULL;
12799 	}
12800         xmlFreeDoc(ctxt->myDoc);
12801         ctxt->myDoc = NULL;
12802     }
12803     if (sax != NULL) ctxt->sax = NULL;
12804     xmlFreeParserCtxt(ctxt);
12805 
12806     return(ret);
12807 }
12808 
12809 /**
12810  * xmlSAXParseDTD:
12811  * @sax:  the SAX handler block
12812  * @ExternalID:  a NAME* containing the External ID of the DTD
12813  * @SystemID:  a NAME* containing the URL to the DTD
12814  *
12815  * Load and parse an external subset.
12816  *
12817  * Returns the resulting xmlDtdPtr or NULL in case of error.
12818  */
12819 
12820 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12821 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12822                           const xmlChar *SystemID) {
12823     xmlDtdPtr ret = NULL;
12824     xmlParserCtxtPtr ctxt;
12825     xmlParserInputPtr input = NULL;
12826     xmlCharEncoding enc;
12827     xmlChar* systemIdCanonic;
12828 
12829     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12830 
12831     ctxt = xmlNewParserCtxt();
12832     if (ctxt == NULL) {
12833 	return(NULL);
12834     }
12835 
12836     /* We are loading a DTD */
12837     ctxt->options |= XML_PARSE_DTDLOAD;
12838 
12839     /*
12840      * Set-up the SAX context
12841      */
12842     if (sax != NULL) {
12843 	if (ctxt->sax != NULL)
12844 	    xmlFree(ctxt->sax);
12845         ctxt->sax = sax;
12846         ctxt->userData = ctxt;
12847     }
12848 
12849     /*
12850      * Canonicalise the system ID
12851      */
12852     systemIdCanonic = xmlCanonicPath(SystemID);
12853     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12854 	xmlFreeParserCtxt(ctxt);
12855 	return(NULL);
12856     }
12857 
12858     /*
12859      * Ask the Entity resolver to load the damn thing
12860      */
12861 
12862     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12863 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12864 	                                 systemIdCanonic);
12865     if (input == NULL) {
12866         if (sax != NULL) ctxt->sax = NULL;
12867 	xmlFreeParserCtxt(ctxt);
12868 	if (systemIdCanonic != NULL)
12869 	    xmlFree(systemIdCanonic);
12870 	return(NULL);
12871     }
12872 
12873     /*
12874      * plug some encoding conversion routines here.
12875      */
12876     if (xmlPushInput(ctxt, input) < 0) {
12877         if (sax != NULL) ctxt->sax = NULL;
12878 	xmlFreeParserCtxt(ctxt);
12879 	if (systemIdCanonic != NULL)
12880 	    xmlFree(systemIdCanonic);
12881 	return(NULL);
12882     }
12883     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12884 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12885 	xmlSwitchEncoding(ctxt, enc);
12886     }
12887 
12888     if (input->filename == NULL)
12889 	input->filename = (char *) systemIdCanonic;
12890     else
12891 	xmlFree(systemIdCanonic);
12892     input->line = 1;
12893     input->col = 1;
12894     input->base = ctxt->input->cur;
12895     input->cur = ctxt->input->cur;
12896     input->free = NULL;
12897 
12898     /*
12899      * let's parse that entity knowing it's an external subset.
12900      */
12901     ctxt->inSubset = 2;
12902     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12903     if (ctxt->myDoc == NULL) {
12904 	xmlErrMemory(ctxt, "New Doc failed");
12905         if (sax != NULL) ctxt->sax = NULL;
12906 	xmlFreeParserCtxt(ctxt);
12907 	return(NULL);
12908     }
12909     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12910     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12911 	                               ExternalID, SystemID);
12912     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12913 
12914     if (ctxt->myDoc != NULL) {
12915 	if (ctxt->wellFormed) {
12916 	    ret = ctxt->myDoc->extSubset;
12917 	    ctxt->myDoc->extSubset = NULL;
12918 	    if (ret != NULL) {
12919 		xmlNodePtr tmp;
12920 
12921 		ret->doc = NULL;
12922 		tmp = ret->children;
12923 		while (tmp != NULL) {
12924 		    tmp->doc = NULL;
12925 		    tmp = tmp->next;
12926 		}
12927 	    }
12928 	} else {
12929 	    ret = NULL;
12930 	}
12931         xmlFreeDoc(ctxt->myDoc);
12932         ctxt->myDoc = NULL;
12933     }
12934     if (sax != NULL) ctxt->sax = NULL;
12935     xmlFreeParserCtxt(ctxt);
12936 
12937     return(ret);
12938 }
12939 
12940 
12941 /**
12942  * xmlParseDTD:
12943  * @ExternalID:  a NAME* containing the External ID of the DTD
12944  * @SystemID:  a NAME* containing the URL to the DTD
12945  *
12946  * Load and parse an external subset.
12947  *
12948  * Returns the resulting xmlDtdPtr or NULL in case of error.
12949  */
12950 
12951 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12952 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12953     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12954 }
12955 #endif /* LIBXML_VALID_ENABLED */
12956 
12957 /************************************************************************
12958  *									*
12959  *		Front ends when parsing an Entity			*
12960  *									*
12961  ************************************************************************/
12962 
12963 /**
12964  * xmlParseCtxtExternalEntity:
12965  * @ctx:  the existing parsing context
12966  * @URL:  the URL for the entity to load
12967  * @ID:  the System ID for the entity to load
12968  * @lst:  the return value for the set of parsed nodes
12969  *
12970  * Parse an external general entity within an existing parsing context
12971  * An external general parsed entity is well-formed if it matches the
12972  * production labeled extParsedEnt.
12973  *
12974  * [78] extParsedEnt ::= TextDecl? content
12975  *
12976  * Returns 0 if the entity is well formed, -1 in case of args problem and
12977  *    the parser error code otherwise
12978  */
12979 
12980 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12981 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12982 	               const xmlChar *ID, xmlNodePtr *lst) {
12983     void *userData;
12984 
12985     if (ctx == NULL) return(-1);
12986     /*
12987      * If the user provided their own SAX callbacks, then reuse the
12988      * userData callback field, otherwise the expected setup in a
12989      * DOM builder is to have userData == ctxt
12990      */
12991     if (ctx->userData == ctx)
12992         userData = NULL;
12993     else
12994         userData = ctx->userData;
12995     return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12996                                          userData, ctx->depth + 1,
12997                                          URL, ID, lst);
12998 }
12999 
13000 /**
13001  * xmlParseExternalEntityPrivate:
13002  * @doc:  the document the chunk pertains to
13003  * @oldctxt:  the previous parser context if available
13004  * @sax:  the SAX handler block (possibly NULL)
13005  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13006  * @depth:  Used for loop detection, use 0
13007  * @URL:  the URL for the entity to load
13008  * @ID:  the System ID for the entity to load
13009  * @list:  the return value for the set of parsed nodes
13010  *
13011  * Private version of xmlParseExternalEntity()
13012  *
13013  * Returns 0 if the entity is well formed, -1 in case of args problem and
13014  *    the parser error code otherwise
13015  */
13016 
13017 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13018 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13019 	              xmlSAXHandlerPtr sax,
13020 		      void *user_data, int depth, const xmlChar *URL,
13021 		      const xmlChar *ID, xmlNodePtr *list) {
13022     xmlParserCtxtPtr ctxt;
13023     xmlDocPtr newDoc;
13024     xmlNodePtr newRoot;
13025     xmlSAXHandlerPtr oldsax = NULL;
13026     xmlParserErrors ret = XML_ERR_OK;
13027     xmlChar start[4];
13028     xmlCharEncoding enc;
13029 
13030     if (((depth > 40) &&
13031 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13032 	(depth > 1024)) {
13033 	return(XML_ERR_ENTITY_LOOP);
13034     }
13035 
13036     if (list != NULL)
13037         *list = NULL;
13038     if ((URL == NULL) && (ID == NULL))
13039 	return(XML_ERR_INTERNAL_ERROR);
13040     if (doc == NULL)
13041 	return(XML_ERR_INTERNAL_ERROR);
13042 
13043 
13044     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13045     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13046     ctxt->userData = ctxt;
13047     if (sax != NULL) {
13048 	oldsax = ctxt->sax;
13049         ctxt->sax = sax;
13050 	if (user_data != NULL)
13051 	    ctxt->userData = user_data;
13052     }
13053     xmlDetectSAX2(ctxt);
13054     newDoc = xmlNewDoc(BAD_CAST "1.0");
13055     if (newDoc == NULL) {
13056 	xmlFreeParserCtxt(ctxt);
13057 	return(XML_ERR_INTERNAL_ERROR);
13058     }
13059     newDoc->properties = XML_DOC_INTERNAL;
13060     if (doc) {
13061         newDoc->intSubset = doc->intSubset;
13062         newDoc->extSubset = doc->extSubset;
13063         if (doc->dict) {
13064             newDoc->dict = doc->dict;
13065             xmlDictReference(newDoc->dict);
13066         }
13067         if (doc->URL != NULL) {
13068             newDoc->URL = xmlStrdup(doc->URL);
13069         }
13070     }
13071     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13072     if (newRoot == NULL) {
13073 	if (sax != NULL)
13074 	    ctxt->sax = oldsax;
13075 	xmlFreeParserCtxt(ctxt);
13076 	newDoc->intSubset = NULL;
13077 	newDoc->extSubset = NULL;
13078         xmlFreeDoc(newDoc);
13079 	return(XML_ERR_INTERNAL_ERROR);
13080     }
13081     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13082     nodePush(ctxt, newDoc->children);
13083     if (doc == NULL) {
13084         ctxt->myDoc = newDoc;
13085     } else {
13086         ctxt->myDoc = doc;
13087         newRoot->doc = doc;
13088     }
13089 
13090     /*
13091      * Get the 4 first bytes and decode the charset
13092      * if enc != XML_CHAR_ENCODING_NONE
13093      * plug some encoding conversion routines.
13094      */
13095     GROW;
13096     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13097 	start[0] = RAW;
13098 	start[1] = NXT(1);
13099 	start[2] = NXT(2);
13100 	start[3] = NXT(3);
13101 	enc = xmlDetectCharEncoding(start, 4);
13102 	if (enc != XML_CHAR_ENCODING_NONE) {
13103 	    xmlSwitchEncoding(ctxt, enc);
13104 	}
13105     }
13106 
13107     /*
13108      * Parse a possible text declaration first
13109      */
13110     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13111 	xmlParseTextDecl(ctxt);
13112         /*
13113          * An XML-1.0 document can't reference an entity not XML-1.0
13114          */
13115         if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13116             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13117             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13118                            "Version mismatch between document and entity\n");
13119         }
13120     }
13121 
13122     ctxt->instate = XML_PARSER_CONTENT;
13123     ctxt->depth = depth;
13124     if (oldctxt != NULL) {
13125 	ctxt->_private = oldctxt->_private;
13126 	ctxt->loadsubset = oldctxt->loadsubset;
13127 	ctxt->validate = oldctxt->validate;
13128 	ctxt->valid = oldctxt->valid;
13129 	ctxt->replaceEntities = oldctxt->replaceEntities;
13130         if (oldctxt->validate) {
13131             ctxt->vctxt.error = oldctxt->vctxt.error;
13132             ctxt->vctxt.warning = oldctxt->vctxt.warning;
13133             ctxt->vctxt.userData = oldctxt->vctxt.userData;
13134         }
13135 	ctxt->external = oldctxt->external;
13136         if (ctxt->dict) xmlDictFree(ctxt->dict);
13137         ctxt->dict = oldctxt->dict;
13138         ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13139         ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13140         ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13141         ctxt->dictNames = oldctxt->dictNames;
13142         ctxt->attsDefault = oldctxt->attsDefault;
13143         ctxt->attsSpecial = oldctxt->attsSpecial;
13144         ctxt->linenumbers = oldctxt->linenumbers;
13145 	ctxt->record_info = oldctxt->record_info;
13146 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13147 	ctxt->node_seq.length = oldctxt->node_seq.length;
13148 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13149     } else {
13150 	/*
13151 	 * Doing validity checking on chunk without context
13152 	 * doesn't make sense
13153 	 */
13154 	ctxt->_private = NULL;
13155 	ctxt->validate = 0;
13156 	ctxt->external = 2;
13157 	ctxt->loadsubset = 0;
13158     }
13159 
13160     xmlParseContent(ctxt);
13161 
13162     if ((RAW == '<') && (NXT(1) == '/')) {
13163 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13164     } else if (RAW != 0) {
13165 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13166     }
13167     if (ctxt->node != newDoc->children) {
13168 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13169     }
13170 
13171     if (!ctxt->wellFormed) {
13172         if (ctxt->errNo == 0)
13173 	    ret = XML_ERR_INTERNAL_ERROR;
13174 	else
13175 	    ret = (xmlParserErrors)ctxt->errNo;
13176     } else {
13177 	if (list != NULL) {
13178 	    xmlNodePtr cur;
13179 
13180 	    /*
13181 	     * Return the newly created nodeset after unlinking it from
13182 	     * they pseudo parent.
13183 	     */
13184 	    cur = newDoc->children->children;
13185 	    *list = cur;
13186 	    while (cur != NULL) {
13187 		cur->parent = NULL;
13188 		cur = cur->next;
13189 	    }
13190             newDoc->children->children = NULL;
13191 	}
13192 	ret = XML_ERR_OK;
13193     }
13194 
13195     /*
13196      * Record in the parent context the number of entities replacement
13197      * done when parsing that reference.
13198      */
13199     if (oldctxt != NULL)
13200         oldctxt->nbentities += ctxt->nbentities;
13201 
13202     /*
13203      * Also record the size of the entity parsed
13204      */
13205     if (ctxt->input != NULL && oldctxt != NULL) {
13206 	oldctxt->sizeentities += ctxt->input->consumed;
13207 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13208     }
13209     /*
13210      * And record the last error if any
13211      */
13212     if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13213         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13214 
13215     if (sax != NULL)
13216 	ctxt->sax = oldsax;
13217     if (oldctxt != NULL) {
13218         ctxt->dict = NULL;
13219         ctxt->attsDefault = NULL;
13220         ctxt->attsSpecial = NULL;
13221         oldctxt->validate = ctxt->validate;
13222         oldctxt->valid = ctxt->valid;
13223         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13224         oldctxt->node_seq.length = ctxt->node_seq.length;
13225         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13226     }
13227     ctxt->node_seq.maximum = 0;
13228     ctxt->node_seq.length = 0;
13229     ctxt->node_seq.buffer = NULL;
13230     xmlFreeParserCtxt(ctxt);
13231     newDoc->intSubset = NULL;
13232     newDoc->extSubset = NULL;
13233     xmlFreeDoc(newDoc);
13234 
13235     return(ret);
13236 }
13237 
13238 #ifdef LIBXML_SAX1_ENABLED
13239 /**
13240  * xmlParseExternalEntity:
13241  * @doc:  the document the chunk pertains to
13242  * @sax:  the SAX handler block (possibly NULL)
13243  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13244  * @depth:  Used for loop detection, use 0
13245  * @URL:  the URL for the entity to load
13246  * @ID:  the System ID for the entity to load
13247  * @lst:  the return value for the set of parsed nodes
13248  *
13249  * Parse an external general entity
13250  * An external general parsed entity is well-formed if it matches the
13251  * production labeled extParsedEnt.
13252  *
13253  * [78] extParsedEnt ::= TextDecl? content
13254  *
13255  * Returns 0 if the entity is well formed, -1 in case of args problem and
13256  *    the parser error code otherwise
13257  */
13258 
13259 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13260 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13261 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13262     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13263 		                       ID, lst));
13264 }
13265 
13266 /**
13267  * xmlParseBalancedChunkMemory:
13268  * @doc:  the document the chunk pertains to (must not be NULL)
13269  * @sax:  the SAX handler block (possibly NULL)
13270  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13271  * @depth:  Used for loop detection, use 0
13272  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13273  * @lst:  the return value for the set of parsed nodes
13274  *
13275  * Parse a well-balanced chunk of an XML document
13276  * called by the parser
13277  * The allowed sequence for the Well Balanced Chunk is the one defined by
13278  * the content production in the XML grammar:
13279  *
13280  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13281  *
13282  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13283  *    the parser error code otherwise
13284  */
13285 
13286 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13287 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13288      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13289     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13290                                                 depth, string, lst, 0 );
13291 }
13292 #endif /* LIBXML_SAX1_ENABLED */
13293 
13294 /**
13295  * xmlParseBalancedChunkMemoryInternal:
13296  * @oldctxt:  the existing parsing context
13297  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13298  * @user_data:  the user data field for the parser context
13299  * @lst:  the return value for the set of parsed nodes
13300  *
13301  *
13302  * Parse a well-balanced chunk of an XML document
13303  * called by the parser
13304  * The allowed sequence for the Well Balanced Chunk is the one defined by
13305  * the content production in the XML grammar:
13306  *
13307  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13308  *
13309  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13310  * error code otherwise
13311  *
13312  * In case recover is set to 1, the nodelist will not be empty even if
13313  * the parsed chunk is not well balanced.
13314  */
13315 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13316 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13317 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13318     xmlParserCtxtPtr ctxt;
13319     xmlDocPtr newDoc = NULL;
13320     xmlNodePtr newRoot;
13321     xmlSAXHandlerPtr oldsax = NULL;
13322     xmlNodePtr content = NULL;
13323     xmlNodePtr last = NULL;
13324     int size;
13325     xmlParserErrors ret = XML_ERR_OK;
13326 #ifdef SAX2
13327     int i;
13328 #endif
13329 
13330     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13331         (oldctxt->depth >  1024)) {
13332 	return(XML_ERR_ENTITY_LOOP);
13333     }
13334 
13335 
13336     if (lst != NULL)
13337         *lst = NULL;
13338     if (string == NULL)
13339         return(XML_ERR_INTERNAL_ERROR);
13340 
13341     size = xmlStrlen(string);
13342 
13343     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13344     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13345     if (user_data != NULL)
13346 	ctxt->userData = user_data;
13347     else
13348 	ctxt->userData = ctxt;
13349     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13350     ctxt->dict = oldctxt->dict;
13351     ctxt->input_id = oldctxt->input_id + 1;
13352     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13353     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13354     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13355 
13356 #ifdef SAX2
13357     /* propagate namespaces down the entity */
13358     for (i = 0;i < oldctxt->nsNr;i += 2) {
13359         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13360     }
13361 #endif
13362 
13363     oldsax = ctxt->sax;
13364     ctxt->sax = oldctxt->sax;
13365     xmlDetectSAX2(ctxt);
13366     ctxt->replaceEntities = oldctxt->replaceEntities;
13367     ctxt->options = oldctxt->options;
13368 
13369     ctxt->_private = oldctxt->_private;
13370     if (oldctxt->myDoc == NULL) {
13371 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13372 	if (newDoc == NULL) {
13373 	    ctxt->sax = oldsax;
13374 	    ctxt->dict = NULL;
13375 	    xmlFreeParserCtxt(ctxt);
13376 	    return(XML_ERR_INTERNAL_ERROR);
13377 	}
13378 	newDoc->properties = XML_DOC_INTERNAL;
13379 	newDoc->dict = ctxt->dict;
13380 	xmlDictReference(newDoc->dict);
13381 	ctxt->myDoc = newDoc;
13382     } else {
13383 	ctxt->myDoc = oldctxt->myDoc;
13384         content = ctxt->myDoc->children;
13385 	last = ctxt->myDoc->last;
13386     }
13387     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13388     if (newRoot == NULL) {
13389 	ctxt->sax = oldsax;
13390 	ctxt->dict = NULL;
13391 	xmlFreeParserCtxt(ctxt);
13392 	if (newDoc != NULL) {
13393 	    xmlFreeDoc(newDoc);
13394 	}
13395 	return(XML_ERR_INTERNAL_ERROR);
13396     }
13397     ctxt->myDoc->children = NULL;
13398     ctxt->myDoc->last = NULL;
13399     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13400     nodePush(ctxt, ctxt->myDoc->children);
13401     ctxt->instate = XML_PARSER_CONTENT;
13402     ctxt->depth = oldctxt->depth + 1;
13403 
13404     ctxt->validate = 0;
13405     ctxt->loadsubset = oldctxt->loadsubset;
13406     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13407 	/*
13408 	 * ID/IDREF registration will be done in xmlValidateElement below
13409 	 */
13410 	ctxt->loadsubset |= XML_SKIP_IDS;
13411     }
13412     ctxt->dictNames = oldctxt->dictNames;
13413     ctxt->attsDefault = oldctxt->attsDefault;
13414     ctxt->attsSpecial = oldctxt->attsSpecial;
13415 
13416     xmlParseContent(ctxt);
13417     if ((RAW == '<') && (NXT(1) == '/')) {
13418 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13419     } else if (RAW != 0) {
13420 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13421     }
13422     if (ctxt->node != ctxt->myDoc->children) {
13423 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13424     }
13425 
13426     if (!ctxt->wellFormed) {
13427         if (ctxt->errNo == 0)
13428 	    ret = XML_ERR_INTERNAL_ERROR;
13429 	else
13430 	    ret = (xmlParserErrors)ctxt->errNo;
13431     } else {
13432       ret = XML_ERR_OK;
13433     }
13434 
13435     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13436 	xmlNodePtr cur;
13437 
13438 	/*
13439 	 * Return the newly created nodeset after unlinking it from
13440 	 * they pseudo parent.
13441 	 */
13442 	cur = ctxt->myDoc->children->children;
13443 	*lst = cur;
13444 	while (cur != NULL) {
13445 #ifdef LIBXML_VALID_ENABLED
13446 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13447 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13448 		(cur->type == XML_ELEMENT_NODE)) {
13449 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13450 			oldctxt->myDoc, cur);
13451 	    }
13452 #endif /* LIBXML_VALID_ENABLED */
13453 	    cur->parent = NULL;
13454 	    cur = cur->next;
13455 	}
13456 	ctxt->myDoc->children->children = NULL;
13457     }
13458     if (ctxt->myDoc != NULL) {
13459 	xmlFreeNode(ctxt->myDoc->children);
13460         ctxt->myDoc->children = content;
13461         ctxt->myDoc->last = last;
13462     }
13463 
13464     /*
13465      * Record in the parent context the number of entities replacement
13466      * done when parsing that reference.
13467      */
13468     if (oldctxt != NULL)
13469         oldctxt->nbentities += ctxt->nbentities;
13470 
13471     /*
13472      * Also record the last error if any
13473      */
13474     if (ctxt->lastError.code != XML_ERR_OK)
13475         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13476 
13477     ctxt->sax = oldsax;
13478     ctxt->dict = NULL;
13479     ctxt->attsDefault = NULL;
13480     ctxt->attsSpecial = NULL;
13481     xmlFreeParserCtxt(ctxt);
13482     if (newDoc != NULL) {
13483 	xmlFreeDoc(newDoc);
13484     }
13485 
13486     return(ret);
13487 }
13488 
13489 /**
13490  * xmlParseInNodeContext:
13491  * @node:  the context node
13492  * @data:  the input string
13493  * @datalen:  the input string length in bytes
13494  * @options:  a combination of xmlParserOption
13495  * @lst:  the return value for the set of parsed nodes
13496  *
13497  * Parse a well-balanced chunk of an XML document
13498  * within the context (DTD, namespaces, etc ...) of the given node.
13499  *
13500  * The allowed sequence for the data is a Well Balanced Chunk defined by
13501  * the content production in the XML grammar:
13502  *
13503  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13504  *
13505  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13506  * error code otherwise
13507  */
13508 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13509 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13510                       int options, xmlNodePtr *lst) {
13511 #ifdef SAX2
13512     xmlParserCtxtPtr ctxt;
13513     xmlDocPtr doc = NULL;
13514     xmlNodePtr fake, cur;
13515     int nsnr = 0;
13516 
13517     xmlParserErrors ret = XML_ERR_OK;
13518 
13519     /*
13520      * check all input parameters, grab the document
13521      */
13522     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13523         return(XML_ERR_INTERNAL_ERROR);
13524     switch (node->type) {
13525         case XML_ELEMENT_NODE:
13526         case XML_ATTRIBUTE_NODE:
13527         case XML_TEXT_NODE:
13528         case XML_CDATA_SECTION_NODE:
13529         case XML_ENTITY_REF_NODE:
13530         case XML_PI_NODE:
13531         case XML_COMMENT_NODE:
13532         case XML_DOCUMENT_NODE:
13533         case XML_HTML_DOCUMENT_NODE:
13534 	    break;
13535 	default:
13536 	    return(XML_ERR_INTERNAL_ERROR);
13537 
13538     }
13539     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13540            (node->type != XML_DOCUMENT_NODE) &&
13541 	   (node->type != XML_HTML_DOCUMENT_NODE))
13542 	node = node->parent;
13543     if (node == NULL)
13544 	return(XML_ERR_INTERNAL_ERROR);
13545     if (node->type == XML_ELEMENT_NODE)
13546 	doc = node->doc;
13547     else
13548         doc = (xmlDocPtr) node;
13549     if (doc == NULL)
13550 	return(XML_ERR_INTERNAL_ERROR);
13551 
13552     /*
13553      * allocate a context and set-up everything not related to the
13554      * node position in the tree
13555      */
13556     if (doc->type == XML_DOCUMENT_NODE)
13557 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13558 #ifdef LIBXML_HTML_ENABLED
13559     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13560 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13561         /*
13562          * When parsing in context, it makes no sense to add implied
13563          * elements like html/body/etc...
13564          */
13565         options |= HTML_PARSE_NOIMPLIED;
13566     }
13567 #endif
13568     else
13569         return(XML_ERR_INTERNAL_ERROR);
13570 
13571     if (ctxt == NULL)
13572         return(XML_ERR_NO_MEMORY);
13573 
13574     /*
13575      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13576      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13577      * we must wait until the last moment to free the original one.
13578      */
13579     if (doc->dict != NULL) {
13580         if (ctxt->dict != NULL)
13581 	    xmlDictFree(ctxt->dict);
13582 	ctxt->dict = doc->dict;
13583     } else
13584         options |= XML_PARSE_NODICT;
13585 
13586     if (doc->encoding != NULL) {
13587         xmlCharEncodingHandlerPtr hdlr;
13588 
13589         if (ctxt->encoding != NULL)
13590 	    xmlFree((xmlChar *) ctxt->encoding);
13591         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13592 
13593         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13594         if (hdlr != NULL) {
13595             xmlSwitchToEncoding(ctxt, hdlr);
13596 	} else {
13597             return(XML_ERR_UNSUPPORTED_ENCODING);
13598         }
13599     }
13600 
13601     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13602     xmlDetectSAX2(ctxt);
13603     ctxt->myDoc = doc;
13604     /* parsing in context, i.e. as within existing content */
13605     ctxt->input_id = 2;
13606     ctxt->instate = XML_PARSER_CONTENT;
13607 
13608     fake = xmlNewComment(NULL);
13609     if (fake == NULL) {
13610         xmlFreeParserCtxt(ctxt);
13611 	return(XML_ERR_NO_MEMORY);
13612     }
13613     xmlAddChild(node, fake);
13614 
13615     if (node->type == XML_ELEMENT_NODE) {
13616 	nodePush(ctxt, node);
13617 	/*
13618 	 * initialize the SAX2 namespaces stack
13619 	 */
13620 	cur = node;
13621 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13622 	    xmlNsPtr ns = cur->nsDef;
13623 	    const xmlChar *iprefix, *ihref;
13624 
13625 	    while (ns != NULL) {
13626 		if (ctxt->dict) {
13627 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13628 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13629 		} else {
13630 		    iprefix = ns->prefix;
13631 		    ihref = ns->href;
13632 		}
13633 
13634 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13635 		    nsPush(ctxt, iprefix, ihref);
13636 		    nsnr++;
13637 		}
13638 		ns = ns->next;
13639 	    }
13640 	    cur = cur->parent;
13641 	}
13642     }
13643 
13644     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13645 	/*
13646 	 * ID/IDREF registration will be done in xmlValidateElement below
13647 	 */
13648 	ctxt->loadsubset |= XML_SKIP_IDS;
13649     }
13650 
13651 #ifdef LIBXML_HTML_ENABLED
13652     if (doc->type == XML_HTML_DOCUMENT_NODE)
13653         __htmlParseContent(ctxt);
13654     else
13655 #endif
13656 	xmlParseContent(ctxt);
13657 
13658     nsPop(ctxt, nsnr);
13659     if ((RAW == '<') && (NXT(1) == '/')) {
13660 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13661     } else if (RAW != 0) {
13662 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13663     }
13664     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13665 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13666 	ctxt->wellFormed = 0;
13667     }
13668 
13669     if (!ctxt->wellFormed) {
13670         if (ctxt->errNo == 0)
13671 	    ret = XML_ERR_INTERNAL_ERROR;
13672 	else
13673 	    ret = (xmlParserErrors)ctxt->errNo;
13674     } else {
13675         ret = XML_ERR_OK;
13676     }
13677 
13678     /*
13679      * Return the newly created nodeset after unlinking it from
13680      * the pseudo sibling.
13681      */
13682 
13683     cur = fake->next;
13684     fake->next = NULL;
13685     node->last = fake;
13686 
13687     if (cur != NULL) {
13688 	cur->prev = NULL;
13689     }
13690 
13691     *lst = cur;
13692 
13693     while (cur != NULL) {
13694 	cur->parent = NULL;
13695 	cur = cur->next;
13696     }
13697 
13698     xmlUnlinkNode(fake);
13699     xmlFreeNode(fake);
13700 
13701 
13702     if (ret != XML_ERR_OK) {
13703         xmlFreeNodeList(*lst);
13704 	*lst = NULL;
13705     }
13706 
13707     if (doc->dict != NULL)
13708         ctxt->dict = NULL;
13709     xmlFreeParserCtxt(ctxt);
13710 
13711     return(ret);
13712 #else /* !SAX2 */
13713     return(XML_ERR_INTERNAL_ERROR);
13714 #endif
13715 }
13716 
13717 #ifdef LIBXML_SAX1_ENABLED
13718 /**
13719  * xmlParseBalancedChunkMemoryRecover:
13720  * @doc:  the document the chunk pertains to (must not be NULL)
13721  * @sax:  the SAX handler block (possibly NULL)
13722  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13723  * @depth:  Used for loop detection, use 0
13724  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13725  * @lst:  the return value for the set of parsed nodes
13726  * @recover: return nodes even if the data is broken (use 0)
13727  *
13728  *
13729  * Parse a well-balanced chunk of an XML document
13730  * called by the parser
13731  * The allowed sequence for the Well Balanced Chunk is the one defined by
13732  * the content production in the XML grammar:
13733  *
13734  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13735  *
13736  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13737  *    the parser error code otherwise
13738  *
13739  * In case recover is set to 1, the nodelist will not be empty even if
13740  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13741  * some extent.
13742  */
13743 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13744 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13745      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13746      int recover) {
13747     xmlParserCtxtPtr ctxt;
13748     xmlDocPtr newDoc;
13749     xmlSAXHandlerPtr oldsax = NULL;
13750     xmlNodePtr content, newRoot;
13751     int size;
13752     int ret = 0;
13753 
13754     if (depth > 40) {
13755 	return(XML_ERR_ENTITY_LOOP);
13756     }
13757 
13758 
13759     if (lst != NULL)
13760         *lst = NULL;
13761     if (string == NULL)
13762         return(-1);
13763 
13764     size = xmlStrlen(string);
13765 
13766     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13767     if (ctxt == NULL) return(-1);
13768     ctxt->userData = ctxt;
13769     if (sax != NULL) {
13770 	oldsax = ctxt->sax;
13771         ctxt->sax = sax;
13772 	if (user_data != NULL)
13773 	    ctxt->userData = user_data;
13774     }
13775     newDoc = xmlNewDoc(BAD_CAST "1.0");
13776     if (newDoc == NULL) {
13777 	xmlFreeParserCtxt(ctxt);
13778 	return(-1);
13779     }
13780     newDoc->properties = XML_DOC_INTERNAL;
13781     if ((doc != NULL) && (doc->dict != NULL)) {
13782         xmlDictFree(ctxt->dict);
13783 	ctxt->dict = doc->dict;
13784 	xmlDictReference(ctxt->dict);
13785 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13786 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13787 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13788 	ctxt->dictNames = 1;
13789     } else {
13790 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13791     }
13792     /* doc == NULL is only supported for historic reasons */
13793     if (doc != NULL) {
13794 	newDoc->intSubset = doc->intSubset;
13795 	newDoc->extSubset = doc->extSubset;
13796     }
13797     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13798     if (newRoot == NULL) {
13799 	if (sax != NULL)
13800 	    ctxt->sax = oldsax;
13801 	xmlFreeParserCtxt(ctxt);
13802 	newDoc->intSubset = NULL;
13803 	newDoc->extSubset = NULL;
13804         xmlFreeDoc(newDoc);
13805 	return(-1);
13806     }
13807     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13808     nodePush(ctxt, newRoot);
13809     /* doc == NULL is only supported for historic reasons */
13810     if (doc == NULL) {
13811 	ctxt->myDoc = newDoc;
13812     } else {
13813 	ctxt->myDoc = newDoc;
13814 	newDoc->children->doc = doc;
13815 	/* Ensure that doc has XML spec namespace */
13816 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13817 	newDoc->oldNs = doc->oldNs;
13818     }
13819     ctxt->instate = XML_PARSER_CONTENT;
13820     ctxt->input_id = 2;
13821     ctxt->depth = depth;
13822 
13823     /*
13824      * Doing validity checking on chunk doesn't make sense
13825      */
13826     ctxt->validate = 0;
13827     ctxt->loadsubset = 0;
13828     xmlDetectSAX2(ctxt);
13829 
13830     if ( doc != NULL ){
13831         content = doc->children;
13832         doc->children = NULL;
13833         xmlParseContent(ctxt);
13834         doc->children = content;
13835     }
13836     else {
13837         xmlParseContent(ctxt);
13838     }
13839     if ((RAW == '<') && (NXT(1) == '/')) {
13840 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13841     } else if (RAW != 0) {
13842 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13843     }
13844     if (ctxt->node != newDoc->children) {
13845 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13846     }
13847 
13848     if (!ctxt->wellFormed) {
13849         if (ctxt->errNo == 0)
13850 	    ret = 1;
13851 	else
13852 	    ret = ctxt->errNo;
13853     } else {
13854       ret = 0;
13855     }
13856 
13857     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13858 	xmlNodePtr cur;
13859 
13860 	/*
13861 	 * Return the newly created nodeset after unlinking it from
13862 	 * they pseudo parent.
13863 	 */
13864 	cur = newDoc->children->children;
13865 	*lst = cur;
13866 	while (cur != NULL) {
13867 	    xmlSetTreeDoc(cur, doc);
13868 	    cur->parent = NULL;
13869 	    cur = cur->next;
13870 	}
13871 	newDoc->children->children = NULL;
13872     }
13873 
13874     if (sax != NULL)
13875 	ctxt->sax = oldsax;
13876     xmlFreeParserCtxt(ctxt);
13877     newDoc->intSubset = NULL;
13878     newDoc->extSubset = NULL;
13879     /* This leaks the namespace list if doc == NULL */
13880     newDoc->oldNs = NULL;
13881     xmlFreeDoc(newDoc);
13882 
13883     return(ret);
13884 }
13885 
13886 /**
13887  * xmlSAXParseEntity:
13888  * @sax:  the SAX handler block
13889  * @filename:  the filename
13890  *
13891  * parse an XML external entity out of context and build a tree.
13892  * It use the given SAX function block to handle the parsing callback.
13893  * If sax is NULL, fallback to the default DOM tree building routines.
13894  *
13895  * [78] extParsedEnt ::= TextDecl? content
13896  *
13897  * This correspond to a "Well Balanced" chunk
13898  *
13899  * Returns the resulting document tree
13900  */
13901 
13902 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13903 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13904     xmlDocPtr ret;
13905     xmlParserCtxtPtr ctxt;
13906 
13907     ctxt = xmlCreateFileParserCtxt(filename);
13908     if (ctxt == NULL) {
13909 	return(NULL);
13910     }
13911     if (sax != NULL) {
13912 	if (ctxt->sax != NULL)
13913 	    xmlFree(ctxt->sax);
13914         ctxt->sax = sax;
13915         ctxt->userData = NULL;
13916     }
13917 
13918     xmlParseExtParsedEnt(ctxt);
13919 
13920     if (ctxt->wellFormed)
13921 	ret = ctxt->myDoc;
13922     else {
13923         ret = NULL;
13924         xmlFreeDoc(ctxt->myDoc);
13925         ctxt->myDoc = NULL;
13926     }
13927     if (sax != NULL)
13928         ctxt->sax = NULL;
13929     xmlFreeParserCtxt(ctxt);
13930 
13931     return(ret);
13932 }
13933 
13934 /**
13935  * xmlParseEntity:
13936  * @filename:  the filename
13937  *
13938  * parse an XML external entity out of context and build a tree.
13939  *
13940  * [78] extParsedEnt ::= TextDecl? content
13941  *
13942  * This correspond to a "Well Balanced" chunk
13943  *
13944  * Returns the resulting document tree
13945  */
13946 
13947 xmlDocPtr
xmlParseEntity(const char * filename)13948 xmlParseEntity(const char *filename) {
13949     return(xmlSAXParseEntity(NULL, filename));
13950 }
13951 #endif /* LIBXML_SAX1_ENABLED */
13952 
13953 /**
13954  * xmlCreateEntityParserCtxtInternal:
13955  * @URL:  the entity URL
13956  * @ID:  the entity PUBLIC ID
13957  * @base:  a possible base for the target URI
13958  * @pctx:  parser context used to set options on new context
13959  *
13960  * Create a parser context for an external entity
13961  * Automatic support for ZLIB/Compress compressed document is provided
13962  * by default if found at compile-time.
13963  *
13964  * Returns the new parser context or NULL
13965  */
13966 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13967 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13968 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13969     xmlParserCtxtPtr ctxt;
13970     xmlParserInputPtr inputStream;
13971     char *directory = NULL;
13972     xmlChar *uri;
13973 
13974     ctxt = xmlNewParserCtxt();
13975     if (ctxt == NULL) {
13976 	return(NULL);
13977     }
13978 
13979     if (pctx != NULL) {
13980         ctxt->options = pctx->options;
13981         ctxt->_private = pctx->_private;
13982 	/*
13983 	 * this is a subparser of pctx, so the input_id should be
13984 	 * incremented to distinguish from main entity
13985 	 */
13986 	ctxt->input_id = pctx->input_id + 1;
13987     }
13988 
13989     /* Don't read from stdin. */
13990     if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13991         URL = BAD_CAST "./-";
13992 
13993     uri = xmlBuildURI(URL, base);
13994 
13995     if (uri == NULL) {
13996 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13997 	if (inputStream == NULL) {
13998 	    xmlFreeParserCtxt(ctxt);
13999 	    return(NULL);
14000 	}
14001 
14002 	inputPush(ctxt, inputStream);
14003 
14004 	if ((ctxt->directory == NULL) && (directory == NULL))
14005 	    directory = xmlParserGetDirectory((char *)URL);
14006 	if ((ctxt->directory == NULL) && (directory != NULL))
14007 	    ctxt->directory = directory;
14008     } else {
14009 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14010 	if (inputStream == NULL) {
14011 	    xmlFree(uri);
14012 	    xmlFreeParserCtxt(ctxt);
14013 	    return(NULL);
14014 	}
14015 
14016 	inputPush(ctxt, inputStream);
14017 
14018 	if ((ctxt->directory == NULL) && (directory == NULL))
14019 	    directory = xmlParserGetDirectory((char *)uri);
14020 	if ((ctxt->directory == NULL) && (directory != NULL))
14021 	    ctxt->directory = directory;
14022 	xmlFree(uri);
14023     }
14024     return(ctxt);
14025 }
14026 
14027 /**
14028  * xmlCreateEntityParserCtxt:
14029  * @URL:  the entity URL
14030  * @ID:  the entity PUBLIC ID
14031  * @base:  a possible base for the target URI
14032  *
14033  * Create a parser context for an external entity
14034  * Automatic support for ZLIB/Compress compressed document is provided
14035  * by default if found at compile-time.
14036  *
14037  * Returns the new parser context or NULL
14038  */
14039 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14040 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14041 	                  const xmlChar *base) {
14042     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14043 
14044 }
14045 
14046 /************************************************************************
14047  *									*
14048  *		Front ends when parsing from a file			*
14049  *									*
14050  ************************************************************************/
14051 
14052 /**
14053  * xmlCreateURLParserCtxt:
14054  * @filename:  the filename or URL
14055  * @options:  a combination of xmlParserOption
14056  *
14057  * Create a parser context for a file or URL content.
14058  * Automatic support for ZLIB/Compress compressed document is provided
14059  * by default if found at compile-time and for file accesses
14060  *
14061  * Returns the new parser context or NULL
14062  */
14063 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14064 xmlCreateURLParserCtxt(const char *filename, int options)
14065 {
14066     xmlParserCtxtPtr ctxt;
14067     xmlParserInputPtr inputStream;
14068     char *directory = NULL;
14069 
14070     ctxt = xmlNewParserCtxt();
14071     if (ctxt == NULL) {
14072 	xmlErrMemory(NULL, "cannot allocate parser context");
14073 	return(NULL);
14074     }
14075 
14076     if (options)
14077 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14078     ctxt->linenumbers = 1;
14079 
14080     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14081     if (inputStream == NULL) {
14082 	xmlFreeParserCtxt(ctxt);
14083 	return(NULL);
14084     }
14085 
14086     inputPush(ctxt, inputStream);
14087     if ((ctxt->directory == NULL) && (directory == NULL))
14088         directory = xmlParserGetDirectory(filename);
14089     if ((ctxt->directory == NULL) && (directory != NULL))
14090         ctxt->directory = directory;
14091 
14092     return(ctxt);
14093 }
14094 
14095 /**
14096  * xmlCreateFileParserCtxt:
14097  * @filename:  the filename
14098  *
14099  * Create a parser context for a file content.
14100  * Automatic support for ZLIB/Compress compressed document is provided
14101  * by default if found at compile-time.
14102  *
14103  * Returns the new parser context or NULL
14104  */
14105 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14106 xmlCreateFileParserCtxt(const char *filename)
14107 {
14108     return(xmlCreateURLParserCtxt(filename, 0));
14109 }
14110 
14111 #ifdef LIBXML_SAX1_ENABLED
14112 /**
14113  * xmlSAXParseFileWithData:
14114  * @sax:  the SAX handler block
14115  * @filename:  the filename
14116  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14117  *             documents
14118  * @data:  the userdata
14119  *
14120  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14121  * compressed document is provided by default if found at compile-time.
14122  * It use the given SAX function block to handle the parsing callback.
14123  * If sax is NULL, fallback to the default DOM tree building routines.
14124  *
14125  * User data (void *) is stored within the parser context in the
14126  * context's _private member, so it is available nearly everywhere in libxml
14127  *
14128  * Returns the resulting document tree
14129  */
14130 
14131 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14132 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14133                         int recovery, void *data) {
14134     xmlDocPtr ret;
14135     xmlParserCtxtPtr ctxt;
14136 
14137     xmlInitParser();
14138 
14139     ctxt = xmlCreateFileParserCtxt(filename);
14140     if (ctxt == NULL) {
14141 	return(NULL);
14142     }
14143     if (sax != NULL) {
14144 	if (ctxt->sax != NULL)
14145 	    xmlFree(ctxt->sax);
14146         ctxt->sax = sax;
14147     }
14148     xmlDetectSAX2(ctxt);
14149     if (data!=NULL) {
14150 	ctxt->_private = data;
14151     }
14152 
14153     if (ctxt->directory == NULL)
14154         ctxt->directory = xmlParserGetDirectory(filename);
14155 
14156     ctxt->recovery = recovery;
14157 
14158     xmlParseDocument(ctxt);
14159 
14160     if ((ctxt->wellFormed) || recovery) {
14161         ret = ctxt->myDoc;
14162 	if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14163 	    if (ctxt->input->buf->compressed > 0)
14164 		ret->compression = 9;
14165 	    else
14166 		ret->compression = ctxt->input->buf->compressed;
14167 	}
14168     }
14169     else {
14170        ret = NULL;
14171        xmlFreeDoc(ctxt->myDoc);
14172        ctxt->myDoc = NULL;
14173     }
14174     if (sax != NULL)
14175         ctxt->sax = NULL;
14176     xmlFreeParserCtxt(ctxt);
14177 
14178     return(ret);
14179 }
14180 
14181 /**
14182  * xmlSAXParseFile:
14183  * @sax:  the SAX handler block
14184  * @filename:  the filename
14185  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14186  *             documents
14187  *
14188  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14189  * compressed document is provided by default if found at compile-time.
14190  * It use the given SAX function block to handle the parsing callback.
14191  * If sax is NULL, fallback to the default DOM tree building routines.
14192  *
14193  * Returns the resulting document tree
14194  */
14195 
14196 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14197 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14198                           int recovery) {
14199     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14200 }
14201 
14202 /**
14203  * xmlRecoverDoc:
14204  * @cur:  a pointer to an array of xmlChar
14205  *
14206  * parse an XML in-memory document and build a tree.
14207  * In the case the document is not Well Formed, a attempt to build a
14208  * tree is tried anyway
14209  *
14210  * Returns the resulting document tree or NULL in case of failure
14211  */
14212 
14213 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14214 xmlRecoverDoc(const xmlChar *cur) {
14215     return(xmlSAXParseDoc(NULL, cur, 1));
14216 }
14217 
14218 /**
14219  * xmlParseFile:
14220  * @filename:  the filename
14221  *
14222  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14223  * compressed document is provided by default if found at compile-time.
14224  *
14225  * Returns the resulting document tree if the file was wellformed,
14226  * NULL otherwise.
14227  */
14228 
14229 xmlDocPtr
xmlParseFile(const char * filename)14230 xmlParseFile(const char *filename) {
14231     return(xmlSAXParseFile(NULL, filename, 0));
14232 }
14233 
14234 /**
14235  * xmlRecoverFile:
14236  * @filename:  the filename
14237  *
14238  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14239  * compressed document is provided by default if found at compile-time.
14240  * In the case the document is not Well Formed, it attempts to build
14241  * a tree anyway
14242  *
14243  * Returns the resulting document tree or NULL in case of failure
14244  */
14245 
14246 xmlDocPtr
xmlRecoverFile(const char * filename)14247 xmlRecoverFile(const char *filename) {
14248     return(xmlSAXParseFile(NULL, filename, 1));
14249 }
14250 
14251 
14252 /**
14253  * xmlSetupParserForBuffer:
14254  * @ctxt:  an XML parser context
14255  * @buffer:  a xmlChar * buffer
14256  * @filename:  a file name
14257  *
14258  * Setup the parser context to parse a new buffer; Clears any prior
14259  * contents from the parser context. The buffer parameter must not be
14260  * NULL, but the filename parameter can be
14261  */
14262 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14263 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14264                              const char* filename)
14265 {
14266     xmlParserInputPtr input;
14267 
14268     if ((ctxt == NULL) || (buffer == NULL))
14269         return;
14270 
14271     input = xmlNewInputStream(ctxt);
14272     if (input == NULL) {
14273         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14274         xmlClearParserCtxt(ctxt);
14275         return;
14276     }
14277 
14278     xmlClearParserCtxt(ctxt);
14279     if (filename != NULL)
14280         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14281     input->base = buffer;
14282     input->cur = buffer;
14283     input->end = &buffer[xmlStrlen(buffer)];
14284     inputPush(ctxt, input);
14285 }
14286 
14287 /**
14288  * xmlSAXUserParseFile:
14289  * @sax:  a SAX handler
14290  * @user_data:  The user data returned on SAX callbacks
14291  * @filename:  a file name
14292  *
14293  * parse an XML file and call the given SAX handler routines.
14294  * Automatic support for ZLIB/Compress compressed document is provided
14295  *
14296  * Returns 0 in case of success or a error number otherwise
14297  */
14298 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14299 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14300                     const char *filename) {
14301     int ret = 0;
14302     xmlParserCtxtPtr ctxt;
14303 
14304     ctxt = xmlCreateFileParserCtxt(filename);
14305     if (ctxt == NULL) return -1;
14306     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14307 	xmlFree(ctxt->sax);
14308     ctxt->sax = sax;
14309     xmlDetectSAX2(ctxt);
14310 
14311     if (user_data != NULL)
14312 	ctxt->userData = user_data;
14313 
14314     xmlParseDocument(ctxt);
14315 
14316     if (ctxt->wellFormed)
14317 	ret = 0;
14318     else {
14319         if (ctxt->errNo != 0)
14320 	    ret = ctxt->errNo;
14321 	else
14322 	    ret = -1;
14323     }
14324     if (sax != NULL)
14325 	ctxt->sax = NULL;
14326     if (ctxt->myDoc != NULL) {
14327         xmlFreeDoc(ctxt->myDoc);
14328 	ctxt->myDoc = NULL;
14329     }
14330     xmlFreeParserCtxt(ctxt);
14331 
14332     return ret;
14333 }
14334 #endif /* LIBXML_SAX1_ENABLED */
14335 
14336 /************************************************************************
14337  *									*
14338  *		Front ends when parsing from memory			*
14339  *									*
14340  ************************************************************************/
14341 
14342 /**
14343  * xmlCreateMemoryParserCtxt:
14344  * @buffer:  a pointer to a char array
14345  * @size:  the size of the array
14346  *
14347  * Create a parser context for an XML in-memory document.
14348  *
14349  * Returns the new parser context or NULL
14350  */
14351 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14352 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14353     xmlParserCtxtPtr ctxt;
14354     xmlParserInputPtr input;
14355     xmlParserInputBufferPtr buf;
14356 
14357     if (buffer == NULL)
14358 	return(NULL);
14359     if (size <= 0)
14360 	return(NULL);
14361 
14362     ctxt = xmlNewParserCtxt();
14363     if (ctxt == NULL)
14364 	return(NULL);
14365 
14366     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14367     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14368     if (buf == NULL) {
14369 	xmlFreeParserCtxt(ctxt);
14370 	return(NULL);
14371     }
14372 
14373     input = xmlNewInputStream(ctxt);
14374     if (input == NULL) {
14375 	xmlFreeParserInputBuffer(buf);
14376 	xmlFreeParserCtxt(ctxt);
14377 	return(NULL);
14378     }
14379 
14380     input->filename = NULL;
14381     input->buf = buf;
14382     xmlBufResetInput(input->buf->buffer, input);
14383 
14384     inputPush(ctxt, input);
14385     return(ctxt);
14386 }
14387 
14388 #ifdef LIBXML_SAX1_ENABLED
14389 /**
14390  * xmlSAXParseMemoryWithData:
14391  * @sax:  the SAX handler block
14392  * @buffer:  an pointer to a char array
14393  * @size:  the size of the array
14394  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14395  *             documents
14396  * @data:  the userdata
14397  *
14398  * parse an XML in-memory block and use the given SAX function block
14399  * to handle the parsing callback. If sax is NULL, fallback to the default
14400  * DOM tree building routines.
14401  *
14402  * User data (void *) is stored within the parser context in the
14403  * context's _private member, so it is available nearly everywhere in libxml
14404  *
14405  * Returns the resulting document tree
14406  */
14407 
14408 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14409 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14410 	          int size, int recovery, void *data) {
14411     xmlDocPtr ret;
14412     xmlParserCtxtPtr ctxt;
14413 
14414     xmlInitParser();
14415 
14416     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14417     if (ctxt == NULL) return(NULL);
14418     if (sax != NULL) {
14419 	if (ctxt->sax != NULL)
14420 	    xmlFree(ctxt->sax);
14421         ctxt->sax = sax;
14422     }
14423     xmlDetectSAX2(ctxt);
14424     if (data!=NULL) {
14425 	ctxt->_private=data;
14426     }
14427 
14428     ctxt->recovery = recovery;
14429 
14430     xmlParseDocument(ctxt);
14431 
14432     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433     else {
14434        ret = NULL;
14435        xmlFreeDoc(ctxt->myDoc);
14436        ctxt->myDoc = NULL;
14437     }
14438     if (sax != NULL)
14439 	ctxt->sax = NULL;
14440     xmlFreeParserCtxt(ctxt);
14441 
14442     return(ret);
14443 }
14444 
14445 /**
14446  * xmlSAXParseMemory:
14447  * @sax:  the SAX handler block
14448  * @buffer:  an pointer to a char array
14449  * @size:  the size of the array
14450  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14451  *             documents
14452  *
14453  * parse an XML in-memory block and use the given SAX function block
14454  * to handle the parsing callback. If sax is NULL, fallback to the default
14455  * DOM tree building routines.
14456  *
14457  * Returns the resulting document tree
14458  */
14459 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14460 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14461 	          int size, int recovery) {
14462     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14463 }
14464 
14465 /**
14466  * xmlParseMemory:
14467  * @buffer:  an pointer to a char array
14468  * @size:  the size of the array
14469  *
14470  * parse an XML in-memory block and build a tree.
14471  *
14472  * Returns the resulting document tree
14473  */
14474 
xmlParseMemory(const char * buffer,int size)14475 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14476    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14477 }
14478 
14479 /**
14480  * xmlRecoverMemory:
14481  * @buffer:  an pointer to a char array
14482  * @size:  the size of the array
14483  *
14484  * parse an XML in-memory block and build a tree.
14485  * In the case the document is not Well Formed, an attempt to
14486  * build a tree is tried anyway
14487  *
14488  * Returns the resulting document tree or NULL in case of error
14489  */
14490 
xmlRecoverMemory(const char * buffer,int size)14491 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14492    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14493 }
14494 
14495 /**
14496  * xmlSAXUserParseMemory:
14497  * @sax:  a SAX handler
14498  * @user_data:  The user data returned on SAX callbacks
14499  * @buffer:  an in-memory XML document input
14500  * @size:  the length of the XML document in bytes
14501  *
14502  * A better SAX parsing routine.
14503  * parse an XML in-memory buffer and call the given SAX handler routines.
14504  *
14505  * Returns 0 in case of success or a error number otherwise
14506  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14507 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14508 			  const char *buffer, int size) {
14509     int ret = 0;
14510     xmlParserCtxtPtr ctxt;
14511 
14512     xmlInitParser();
14513 
14514     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14515     if (ctxt == NULL) return -1;
14516     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14517         xmlFree(ctxt->sax);
14518     ctxt->sax = sax;
14519     xmlDetectSAX2(ctxt);
14520 
14521     if (user_data != NULL)
14522 	ctxt->userData = user_data;
14523 
14524     xmlParseDocument(ctxt);
14525 
14526     if (ctxt->wellFormed)
14527 	ret = 0;
14528     else {
14529         if (ctxt->errNo != 0)
14530 	    ret = ctxt->errNo;
14531 	else
14532 	    ret = -1;
14533     }
14534     if (sax != NULL)
14535         ctxt->sax = NULL;
14536     if (ctxt->myDoc != NULL) {
14537         xmlFreeDoc(ctxt->myDoc);
14538 	ctxt->myDoc = NULL;
14539     }
14540     xmlFreeParserCtxt(ctxt);
14541 
14542     return ret;
14543 }
14544 #endif /* LIBXML_SAX1_ENABLED */
14545 
14546 /**
14547  * xmlCreateDocParserCtxt:
14548  * @cur:  a pointer to an array of xmlChar
14549  *
14550  * Creates a parser context for an XML in-memory document.
14551  *
14552  * Returns the new parser context or NULL
14553  */
14554 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14555 xmlCreateDocParserCtxt(const xmlChar *cur) {
14556     int len;
14557 
14558     if (cur == NULL)
14559 	return(NULL);
14560     len = xmlStrlen(cur);
14561     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14562 }
14563 
14564 #ifdef LIBXML_SAX1_ENABLED
14565 /**
14566  * xmlSAXParseDoc:
14567  * @sax:  the SAX handler block
14568  * @cur:  a pointer to an array of xmlChar
14569  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14570  *             documents
14571  *
14572  * parse an XML in-memory document and build a tree.
14573  * It use the given SAX function block to handle the parsing callback.
14574  * If sax is NULL, fallback to the default DOM tree building routines.
14575  *
14576  * Returns the resulting document tree
14577  */
14578 
14579 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14580 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14581     xmlDocPtr ret;
14582     xmlParserCtxtPtr ctxt;
14583     xmlSAXHandlerPtr oldsax = NULL;
14584 
14585     if (cur == NULL) return(NULL);
14586 
14587 
14588     ctxt = xmlCreateDocParserCtxt(cur);
14589     if (ctxt == NULL) return(NULL);
14590     if (sax != NULL) {
14591         oldsax = ctxt->sax;
14592         ctxt->sax = sax;
14593         ctxt->userData = NULL;
14594     }
14595     xmlDetectSAX2(ctxt);
14596 
14597     xmlParseDocument(ctxt);
14598     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14599     else {
14600        ret = NULL;
14601        xmlFreeDoc(ctxt->myDoc);
14602        ctxt->myDoc = NULL;
14603     }
14604     if (sax != NULL)
14605 	ctxt->sax = oldsax;
14606     xmlFreeParserCtxt(ctxt);
14607 
14608     return(ret);
14609 }
14610 
14611 /**
14612  * xmlParseDoc:
14613  * @cur:  a pointer to an array of xmlChar
14614  *
14615  * parse an XML in-memory document and build a tree.
14616  *
14617  * Returns the resulting document tree
14618  */
14619 
14620 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14621 xmlParseDoc(const xmlChar *cur) {
14622     return(xmlSAXParseDoc(NULL, cur, 0));
14623 }
14624 #endif /* LIBXML_SAX1_ENABLED */
14625 
14626 #ifdef LIBXML_LEGACY_ENABLED
14627 /************************************************************************
14628  *									*
14629  *	Specific function to keep track of entities references		*
14630  *	and used by the XSLT debugger					*
14631  *									*
14632  ************************************************************************/
14633 
14634 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14635 
14636 /**
14637  * xmlAddEntityReference:
14638  * @ent : A valid entity
14639  * @firstNode : A valid first node for children of entity
14640  * @lastNode : A valid last node of children entity
14641  *
14642  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14643  */
14644 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14645 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14646                       xmlNodePtr lastNode)
14647 {
14648     if (xmlEntityRefFunc != NULL) {
14649         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14650     }
14651 }
14652 
14653 
14654 /**
14655  * xmlSetEntityReferenceFunc:
14656  * @func: A valid function
14657  *
14658  * Set the function to call call back when a xml reference has been made
14659  */
14660 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14661 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14662 {
14663     xmlEntityRefFunc = func;
14664 }
14665 #endif /* LIBXML_LEGACY_ENABLED */
14666 
14667 /************************************************************************
14668  *									*
14669  *				Miscellaneous				*
14670  *									*
14671  ************************************************************************/
14672 
14673 #ifdef LIBXML_XPATH_ENABLED
14674 #include <libxml/xpath.h>
14675 #endif
14676 
14677 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14678 static int xmlParserInitialized = 0;
14679 
14680 /**
14681  * xmlInitParser:
14682  *
14683  * Initialization function for the XML parser.
14684  * This is not reentrant. Call once before processing in case of
14685  * use in multithreaded programs.
14686  */
14687 
14688 void
xmlInitParser(void)14689 xmlInitParser(void) {
14690     if (xmlParserInitialized != 0)
14691 	return;
14692 
14693 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14694     if (xmlFree == free)
14695         atexit(xmlCleanupParser);
14696 #endif
14697 
14698 #ifdef LIBXML_THREAD_ENABLED
14699     __xmlGlobalInitMutexLock();
14700     if (xmlParserInitialized == 0) {
14701 #endif
14702 	xmlInitThreads();
14703 	xmlInitGlobals();
14704 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14705 	    (xmlGenericError == NULL))
14706 	    initGenericErrorDefaultFunc(NULL);
14707 	xmlInitMemory();
14708         xmlInitializeDict();
14709 	xmlInitCharEncodingHandlers();
14710 	xmlDefaultSAXHandlerInit();
14711 	xmlRegisterDefaultInputCallbacks();
14712 #ifdef LIBXML_OUTPUT_ENABLED
14713 	xmlRegisterDefaultOutputCallbacks();
14714 #endif /* LIBXML_OUTPUT_ENABLED */
14715 #ifdef LIBXML_HTML_ENABLED
14716 	htmlInitAutoClose();
14717 	htmlDefaultSAXHandlerInit();
14718 #endif
14719 #ifdef LIBXML_XPATH_ENABLED
14720 	xmlXPathInit();
14721 #endif
14722 	xmlParserInitialized = 1;
14723 #ifdef LIBXML_THREAD_ENABLED
14724     }
14725     __xmlGlobalInitMutexUnlock();
14726 #endif
14727 }
14728 
14729 /**
14730  * xmlCleanupParser:
14731  *
14732  * This function name is somewhat misleading. It does not clean up
14733  * parser state, it cleans up memory allocated by the library itself.
14734  * It is a cleanup function for the XML library. It tries to reclaim all
14735  * related global memory allocated for the library processing.
14736  * It doesn't deallocate any document related memory. One should
14737  * call xmlCleanupParser() only when the process has finished using
14738  * the library and all XML/HTML documents built with it.
14739  * See also xmlInitParser() which has the opposite function of preparing
14740  * the library for operations.
14741  *
14742  * WARNING: if your application is multithreaded or has plugin support
14743  *          calling this may crash the application if another thread or
14744  *          a plugin is still using libxml2. It's sometimes very hard to
14745  *          guess if libxml2 is in use in the application, some libraries
14746  *          or plugins may use it without notice. In case of doubt abstain
14747  *          from calling this function or do it just before calling exit()
14748  *          to avoid leak reports from valgrind !
14749  */
14750 
14751 void
xmlCleanupParser(void)14752 xmlCleanupParser(void) {
14753     if (!xmlParserInitialized)
14754 	return;
14755 
14756     xmlCleanupCharEncodingHandlers();
14757 #ifdef LIBXML_CATALOG_ENABLED
14758     xmlCatalogCleanup();
14759 #endif
14760     xmlDictCleanup();
14761     xmlCleanupInputCallbacks();
14762 #ifdef LIBXML_OUTPUT_ENABLED
14763     xmlCleanupOutputCallbacks();
14764 #endif
14765 #ifdef LIBXML_SCHEMAS_ENABLED
14766     xmlSchemaCleanupTypes();
14767     xmlRelaxNGCleanupTypes();
14768 #endif
14769     xmlResetLastError();
14770     xmlCleanupGlobals();
14771     xmlCleanupThreads(); /* must be last if called not from the main thread */
14772     xmlCleanupMemory();
14773     xmlParserInitialized = 0;
14774 }
14775 
14776 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14777     !defined(_WIN32)
14778 static void
14779 ATTRIBUTE_DESTRUCTOR
xmlDestructor(void)14780 xmlDestructor(void) {
14781     /*
14782      * Calling custom deallocation functions in a destructor can cause
14783      * problems, for example with Nokogiri.
14784      */
14785     if (xmlFree == free)
14786         xmlCleanupParser();
14787 }
14788 #endif
14789 
14790 /************************************************************************
14791  *									*
14792  *	New set (2.6.0) of simpler and more flexible APIs		*
14793  *									*
14794  ************************************************************************/
14795 
14796 /**
14797  * DICT_FREE:
14798  * @str:  a string
14799  *
14800  * Free a string if it is not owned by the "dict" dictionary in the
14801  * current scope
14802  */
14803 #define DICT_FREE(str)						\
14804 	if ((str) && ((!dict) ||				\
14805 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14806 	    xmlFree((char *)(str));
14807 
14808 /**
14809  * xmlCtxtReset:
14810  * @ctxt: an XML parser context
14811  *
14812  * Reset a parser context
14813  */
14814 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14815 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14816 {
14817     xmlParserInputPtr input;
14818     xmlDictPtr dict;
14819 
14820     if (ctxt == NULL)
14821         return;
14822 
14823     dict = ctxt->dict;
14824 
14825     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14826         xmlFreeInputStream(input);
14827     }
14828     ctxt->inputNr = 0;
14829     ctxt->input = NULL;
14830 
14831     ctxt->spaceNr = 0;
14832     if (ctxt->spaceTab != NULL) {
14833 	ctxt->spaceTab[0] = -1;
14834 	ctxt->space = &ctxt->spaceTab[0];
14835     } else {
14836         ctxt->space = NULL;
14837     }
14838 
14839 
14840     ctxt->nodeNr = 0;
14841     ctxt->node = NULL;
14842 
14843     ctxt->nameNr = 0;
14844     ctxt->name = NULL;
14845 
14846     DICT_FREE(ctxt->version);
14847     ctxt->version = NULL;
14848     DICT_FREE(ctxt->encoding);
14849     ctxt->encoding = NULL;
14850     DICT_FREE(ctxt->directory);
14851     ctxt->directory = NULL;
14852     DICT_FREE(ctxt->extSubURI);
14853     ctxt->extSubURI = NULL;
14854     DICT_FREE(ctxt->extSubSystem);
14855     ctxt->extSubSystem = NULL;
14856     if (ctxt->myDoc != NULL)
14857         xmlFreeDoc(ctxt->myDoc);
14858     ctxt->myDoc = NULL;
14859 
14860     ctxt->standalone = -1;
14861     ctxt->hasExternalSubset = 0;
14862     ctxt->hasPErefs = 0;
14863     ctxt->html = 0;
14864     ctxt->external = 0;
14865     ctxt->instate = XML_PARSER_START;
14866     ctxt->token = 0;
14867 
14868     ctxt->wellFormed = 1;
14869     ctxt->nsWellFormed = 1;
14870     ctxt->disableSAX = 0;
14871     ctxt->valid = 1;
14872 #if 0
14873     ctxt->vctxt.userData = ctxt;
14874     ctxt->vctxt.error = xmlParserValidityError;
14875     ctxt->vctxt.warning = xmlParserValidityWarning;
14876 #endif
14877     ctxt->record_info = 0;
14878     ctxt->checkIndex = 0;
14879     ctxt->inSubset = 0;
14880     ctxt->errNo = XML_ERR_OK;
14881     ctxt->depth = 0;
14882     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14883     ctxt->catalogs = NULL;
14884     ctxt->nbentities = 0;
14885     ctxt->sizeentities = 0;
14886     ctxt->sizeentcopy = 0;
14887     xmlInitNodeInfoSeq(&ctxt->node_seq);
14888 
14889     if (ctxt->attsDefault != NULL) {
14890         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14891         ctxt->attsDefault = NULL;
14892     }
14893     if (ctxt->attsSpecial != NULL) {
14894         xmlHashFree(ctxt->attsSpecial, NULL);
14895         ctxt->attsSpecial = NULL;
14896     }
14897 
14898 #ifdef LIBXML_CATALOG_ENABLED
14899     if (ctxt->catalogs != NULL)
14900 	xmlCatalogFreeLocal(ctxt->catalogs);
14901 #endif
14902     if (ctxt->lastError.code != XML_ERR_OK)
14903         xmlResetError(&ctxt->lastError);
14904 }
14905 
14906 /**
14907  * xmlCtxtResetPush:
14908  * @ctxt: an XML parser context
14909  * @chunk:  a pointer to an array of chars
14910  * @size:  number of chars in the array
14911  * @filename:  an optional file name or URI
14912  * @encoding:  the document encoding, or NULL
14913  *
14914  * Reset a push parser context
14915  *
14916  * Returns 0 in case of success and 1 in case of error
14917  */
14918 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14919 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14920                  int size, const char *filename, const char *encoding)
14921 {
14922     xmlParserInputPtr inputStream;
14923     xmlParserInputBufferPtr buf;
14924     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14925 
14926     if (ctxt == NULL)
14927         return(1);
14928 
14929     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14930         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14931 
14932     buf = xmlAllocParserInputBuffer(enc);
14933     if (buf == NULL)
14934         return(1);
14935 
14936     if (ctxt == NULL) {
14937         xmlFreeParserInputBuffer(buf);
14938         return(1);
14939     }
14940 
14941     xmlCtxtReset(ctxt);
14942 
14943     if (filename == NULL) {
14944         ctxt->directory = NULL;
14945     } else {
14946         ctxt->directory = xmlParserGetDirectory(filename);
14947     }
14948 
14949     inputStream = xmlNewInputStream(ctxt);
14950     if (inputStream == NULL) {
14951         xmlFreeParserInputBuffer(buf);
14952         return(1);
14953     }
14954 
14955     if (filename == NULL)
14956         inputStream->filename = NULL;
14957     else
14958         inputStream->filename = (char *)
14959             xmlCanonicPath((const xmlChar *) filename);
14960     inputStream->buf = buf;
14961     xmlBufResetInput(buf->buffer, inputStream);
14962 
14963     inputPush(ctxt, inputStream);
14964 
14965     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14966         (ctxt->input->buf != NULL)) {
14967 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14968         size_t cur = ctxt->input->cur - ctxt->input->base;
14969 
14970         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14971 
14972         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14973 #ifdef DEBUG_PUSH
14974         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14975 #endif
14976     }
14977 
14978     if (encoding != NULL) {
14979         xmlCharEncodingHandlerPtr hdlr;
14980 
14981         if (ctxt->encoding != NULL)
14982 	    xmlFree((xmlChar *) ctxt->encoding);
14983         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14984 
14985         hdlr = xmlFindCharEncodingHandler(encoding);
14986         if (hdlr != NULL) {
14987             xmlSwitchToEncoding(ctxt, hdlr);
14988 	} else {
14989 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14990 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14991         }
14992     } else if (enc != XML_CHAR_ENCODING_NONE) {
14993         xmlSwitchEncoding(ctxt, enc);
14994     }
14995 
14996     return(0);
14997 }
14998 
14999 
15000 /**
15001  * xmlCtxtUseOptionsInternal:
15002  * @ctxt: an XML parser context
15003  * @options:  a combination of xmlParserOption
15004  * @encoding:  the user provided encoding to use
15005  *
15006  * Applies the options to the parser context
15007  *
15008  * Returns 0 in case of success, the set of unknown or unimplemented options
15009  *         in case of error.
15010  */
15011 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15012 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15013 {
15014     if (ctxt == NULL)
15015         return(-1);
15016     if (encoding != NULL) {
15017         if (ctxt->encoding != NULL)
15018 	    xmlFree((xmlChar *) ctxt->encoding);
15019         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15020     }
15021     if (options & XML_PARSE_RECOVER) {
15022         ctxt->recovery = 1;
15023         options -= XML_PARSE_RECOVER;
15024 	ctxt->options |= XML_PARSE_RECOVER;
15025     } else
15026         ctxt->recovery = 0;
15027     if (options & XML_PARSE_DTDLOAD) {
15028         ctxt->loadsubset = XML_DETECT_IDS;
15029         options -= XML_PARSE_DTDLOAD;
15030 	ctxt->options |= XML_PARSE_DTDLOAD;
15031     } else
15032         ctxt->loadsubset = 0;
15033     if (options & XML_PARSE_DTDATTR) {
15034         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15035         options -= XML_PARSE_DTDATTR;
15036 	ctxt->options |= XML_PARSE_DTDATTR;
15037     }
15038     if (options & XML_PARSE_NOENT) {
15039         ctxt->replaceEntities = 1;
15040         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15041         options -= XML_PARSE_NOENT;
15042 	ctxt->options |= XML_PARSE_NOENT;
15043     } else
15044         ctxt->replaceEntities = 0;
15045     if (options & XML_PARSE_PEDANTIC) {
15046         ctxt->pedantic = 1;
15047         options -= XML_PARSE_PEDANTIC;
15048 	ctxt->options |= XML_PARSE_PEDANTIC;
15049     } else
15050         ctxt->pedantic = 0;
15051     if (options & XML_PARSE_NOBLANKS) {
15052         ctxt->keepBlanks = 0;
15053         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15054         options -= XML_PARSE_NOBLANKS;
15055 	ctxt->options |= XML_PARSE_NOBLANKS;
15056     } else
15057         ctxt->keepBlanks = 1;
15058     if (options & XML_PARSE_DTDVALID) {
15059         ctxt->validate = 1;
15060         if (options & XML_PARSE_NOWARNING)
15061             ctxt->vctxt.warning = NULL;
15062         if (options & XML_PARSE_NOERROR)
15063             ctxt->vctxt.error = NULL;
15064         options -= XML_PARSE_DTDVALID;
15065 	ctxt->options |= XML_PARSE_DTDVALID;
15066     } else
15067         ctxt->validate = 0;
15068     if (options & XML_PARSE_NOWARNING) {
15069         ctxt->sax->warning = NULL;
15070         options -= XML_PARSE_NOWARNING;
15071     }
15072     if (options & XML_PARSE_NOERROR) {
15073         ctxt->sax->error = NULL;
15074         ctxt->sax->fatalError = NULL;
15075         options -= XML_PARSE_NOERROR;
15076     }
15077 #ifdef LIBXML_SAX1_ENABLED
15078     if (options & XML_PARSE_SAX1) {
15079         ctxt->sax->startElement = xmlSAX2StartElement;
15080         ctxt->sax->endElement = xmlSAX2EndElement;
15081         ctxt->sax->startElementNs = NULL;
15082         ctxt->sax->endElementNs = NULL;
15083         ctxt->sax->initialized = 1;
15084         options -= XML_PARSE_SAX1;
15085 	ctxt->options |= XML_PARSE_SAX1;
15086     }
15087 #endif /* LIBXML_SAX1_ENABLED */
15088     if (options & XML_PARSE_NODICT) {
15089         ctxt->dictNames = 0;
15090         options -= XML_PARSE_NODICT;
15091 	ctxt->options |= XML_PARSE_NODICT;
15092     } else {
15093         ctxt->dictNames = 1;
15094     }
15095     if (options & XML_PARSE_NOCDATA) {
15096         ctxt->sax->cdataBlock = NULL;
15097         options -= XML_PARSE_NOCDATA;
15098 	ctxt->options |= XML_PARSE_NOCDATA;
15099     }
15100     if (options & XML_PARSE_NSCLEAN) {
15101 	ctxt->options |= XML_PARSE_NSCLEAN;
15102         options -= XML_PARSE_NSCLEAN;
15103     }
15104     if (options & XML_PARSE_NONET) {
15105 	ctxt->options |= XML_PARSE_NONET;
15106         options -= XML_PARSE_NONET;
15107     }
15108     if (options & XML_PARSE_COMPACT) {
15109 	ctxt->options |= XML_PARSE_COMPACT;
15110         options -= XML_PARSE_COMPACT;
15111     }
15112     if (options & XML_PARSE_OLD10) {
15113 	ctxt->options |= XML_PARSE_OLD10;
15114         options -= XML_PARSE_OLD10;
15115     }
15116     if (options & XML_PARSE_NOBASEFIX) {
15117 	ctxt->options |= XML_PARSE_NOBASEFIX;
15118         options -= XML_PARSE_NOBASEFIX;
15119     }
15120     if (options & XML_PARSE_HUGE) {
15121 	ctxt->options |= XML_PARSE_HUGE;
15122         options -= XML_PARSE_HUGE;
15123         if (ctxt->dict != NULL)
15124             xmlDictSetLimit(ctxt->dict, 0);
15125     }
15126     if (options & XML_PARSE_OLDSAX) {
15127 	ctxt->options |= XML_PARSE_OLDSAX;
15128         options -= XML_PARSE_OLDSAX;
15129     }
15130     if (options & XML_PARSE_IGNORE_ENC) {
15131 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15132         options -= XML_PARSE_IGNORE_ENC;
15133     }
15134     if (options & XML_PARSE_BIG_LINES) {
15135 	ctxt->options |= XML_PARSE_BIG_LINES;
15136         options -= XML_PARSE_BIG_LINES;
15137     }
15138     ctxt->linenumbers = 1;
15139     return (options);
15140 }
15141 
15142 /**
15143  * xmlCtxtUseOptions:
15144  * @ctxt: an XML parser context
15145  * @options:  a combination of xmlParserOption
15146  *
15147  * Applies the options to the parser context
15148  *
15149  * Returns 0 in case of success, the set of unknown or unimplemented options
15150  *         in case of error.
15151  */
15152 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15153 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15154 {
15155    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15156 }
15157 
15158 /**
15159  * xmlDoRead:
15160  * @ctxt:  an XML parser context
15161  * @URL:  the base URL to use for the document
15162  * @encoding:  the document encoding, or NULL
15163  * @options:  a combination of xmlParserOption
15164  * @reuse:  keep the context for reuse
15165  *
15166  * Common front-end for the xmlRead functions
15167  *
15168  * Returns the resulting document tree or NULL
15169  */
15170 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15171 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15172           int options, int reuse)
15173 {
15174     xmlDocPtr ret;
15175 
15176     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15177     if (encoding != NULL) {
15178         xmlCharEncodingHandlerPtr hdlr;
15179 
15180 	hdlr = xmlFindCharEncodingHandler(encoding);
15181 	if (hdlr != NULL)
15182 	    xmlSwitchToEncoding(ctxt, hdlr);
15183     }
15184     if ((URL != NULL) && (ctxt->input != NULL) &&
15185         (ctxt->input->filename == NULL))
15186         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15187     xmlParseDocument(ctxt);
15188     if ((ctxt->wellFormed) || ctxt->recovery)
15189         ret = ctxt->myDoc;
15190     else {
15191         ret = NULL;
15192 	if (ctxt->myDoc != NULL) {
15193 	    xmlFreeDoc(ctxt->myDoc);
15194 	}
15195     }
15196     ctxt->myDoc = NULL;
15197     if (!reuse) {
15198 	xmlFreeParserCtxt(ctxt);
15199     }
15200 
15201     return (ret);
15202 }
15203 
15204 /**
15205  * xmlReadDoc:
15206  * @cur:  a pointer to a zero terminated string
15207  * @URL:  the base URL to use for the document
15208  * @encoding:  the document encoding, or NULL
15209  * @options:  a combination of xmlParserOption
15210  *
15211  * parse an XML in-memory document and build a tree.
15212  *
15213  * Returns the resulting document tree
15214  */
15215 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15216 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15217 {
15218     xmlParserCtxtPtr ctxt;
15219 
15220     if (cur == NULL)
15221         return (NULL);
15222     xmlInitParser();
15223 
15224     ctxt = xmlCreateDocParserCtxt(cur);
15225     if (ctxt == NULL)
15226         return (NULL);
15227     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15228 }
15229 
15230 /**
15231  * xmlReadFile:
15232  * @filename:  a file or URL
15233  * @encoding:  the document encoding, or NULL
15234  * @options:  a combination of xmlParserOption
15235  *
15236  * parse an XML file from the filesystem or the network.
15237  *
15238  * Returns the resulting document tree
15239  */
15240 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15241 xmlReadFile(const char *filename, const char *encoding, int options)
15242 {
15243     xmlParserCtxtPtr ctxt;
15244 
15245     xmlInitParser();
15246     ctxt = xmlCreateURLParserCtxt(filename, options);
15247     if (ctxt == NULL)
15248         return (NULL);
15249     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15250 }
15251 
15252 /**
15253  * xmlReadMemory:
15254  * @buffer:  a pointer to a char array
15255  * @size:  the size of the array
15256  * @URL:  the base URL to use for the document
15257  * @encoding:  the document encoding, or NULL
15258  * @options:  a combination of xmlParserOption
15259  *
15260  * parse an XML in-memory document and build a tree.
15261  *
15262  * Returns the resulting document tree
15263  */
15264 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15265 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15266 {
15267     xmlParserCtxtPtr ctxt;
15268 
15269     xmlInitParser();
15270     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15271     if (ctxt == NULL)
15272         return (NULL);
15273     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15274 }
15275 
15276 /**
15277  * xmlReadFd:
15278  * @fd:  an open file descriptor
15279  * @URL:  the base URL to use for the document
15280  * @encoding:  the document encoding, or NULL
15281  * @options:  a combination of xmlParserOption
15282  *
15283  * parse an XML from a file descriptor and build a tree.
15284  * NOTE that the file descriptor will not be closed when the
15285  *      reader is closed or reset.
15286  *
15287  * Returns the resulting document tree
15288  */
15289 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15290 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15291 {
15292     xmlParserCtxtPtr ctxt;
15293     xmlParserInputBufferPtr input;
15294     xmlParserInputPtr stream;
15295 
15296     if (fd < 0)
15297         return (NULL);
15298     xmlInitParser();
15299 
15300     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15301     if (input == NULL)
15302         return (NULL);
15303     input->closecallback = NULL;
15304     ctxt = xmlNewParserCtxt();
15305     if (ctxt == NULL) {
15306         xmlFreeParserInputBuffer(input);
15307         return (NULL);
15308     }
15309     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15310     if (stream == NULL) {
15311         xmlFreeParserInputBuffer(input);
15312 	xmlFreeParserCtxt(ctxt);
15313         return (NULL);
15314     }
15315     inputPush(ctxt, stream);
15316     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15317 }
15318 
15319 /**
15320  * xmlReadIO:
15321  * @ioread:  an I/O read function
15322  * @ioclose:  an I/O close function
15323  * @ioctx:  an I/O handler
15324  * @URL:  the base URL to use for the document
15325  * @encoding:  the document encoding, or NULL
15326  * @options:  a combination of xmlParserOption
15327  *
15328  * parse an XML document from I/O functions and source and build a tree.
15329  *
15330  * Returns the resulting document tree
15331  */
15332 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15333 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15334           void *ioctx, const char *URL, const char *encoding, int options)
15335 {
15336     xmlParserCtxtPtr ctxt;
15337     xmlParserInputBufferPtr input;
15338     xmlParserInputPtr stream;
15339 
15340     if (ioread == NULL)
15341         return (NULL);
15342     xmlInitParser();
15343 
15344     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15345                                          XML_CHAR_ENCODING_NONE);
15346     if (input == NULL) {
15347         if (ioclose != NULL)
15348             ioclose(ioctx);
15349         return (NULL);
15350     }
15351     ctxt = xmlNewParserCtxt();
15352     if (ctxt == NULL) {
15353         xmlFreeParserInputBuffer(input);
15354         return (NULL);
15355     }
15356     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15357     if (stream == NULL) {
15358         xmlFreeParserInputBuffer(input);
15359 	xmlFreeParserCtxt(ctxt);
15360         return (NULL);
15361     }
15362     inputPush(ctxt, stream);
15363     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15364 }
15365 
15366 /**
15367  * xmlCtxtReadDoc:
15368  * @ctxt:  an XML parser context
15369  * @cur:  a pointer to a zero terminated string
15370  * @URL:  the base URL to use for the document
15371  * @encoding:  the document encoding, or NULL
15372  * @options:  a combination of xmlParserOption
15373  *
15374  * parse an XML in-memory document and build a tree.
15375  * This reuses the existing @ctxt parser context
15376  *
15377  * Returns the resulting document tree
15378  */
15379 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15380 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15381                const char *URL, const char *encoding, int options)
15382 {
15383     xmlParserInputPtr stream;
15384 
15385     if (cur == NULL)
15386         return (NULL);
15387     if (ctxt == NULL)
15388         return (NULL);
15389     xmlInitParser();
15390 
15391     xmlCtxtReset(ctxt);
15392 
15393     stream = xmlNewStringInputStream(ctxt, cur);
15394     if (stream == NULL) {
15395         return (NULL);
15396     }
15397     inputPush(ctxt, stream);
15398     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15399 }
15400 
15401 /**
15402  * xmlCtxtReadFile:
15403  * @ctxt:  an XML parser context
15404  * @filename:  a file or URL
15405  * @encoding:  the document encoding, or NULL
15406  * @options:  a combination of xmlParserOption
15407  *
15408  * parse an XML file from the filesystem or the network.
15409  * This reuses the existing @ctxt parser context
15410  *
15411  * Returns the resulting document tree
15412  */
15413 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15414 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15415                 const char *encoding, int options)
15416 {
15417     xmlParserInputPtr stream;
15418 
15419     if (filename == NULL)
15420         return (NULL);
15421     if (ctxt == NULL)
15422         return (NULL);
15423     xmlInitParser();
15424 
15425     xmlCtxtReset(ctxt);
15426 
15427     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15428     if (stream == NULL) {
15429         return (NULL);
15430     }
15431     inputPush(ctxt, stream);
15432     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15433 }
15434 
15435 /**
15436  * xmlCtxtReadMemory:
15437  * @ctxt:  an XML parser context
15438  * @buffer:  a pointer to a char array
15439  * @size:  the size of the array
15440  * @URL:  the base URL to use for the document
15441  * @encoding:  the document encoding, or NULL
15442  * @options:  a combination of xmlParserOption
15443  *
15444  * parse an XML in-memory document and build a tree.
15445  * This reuses the existing @ctxt parser context
15446  *
15447  * Returns the resulting document tree
15448  */
15449 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15450 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15451                   const char *URL, const char *encoding, int options)
15452 {
15453     xmlParserInputBufferPtr input;
15454     xmlParserInputPtr stream;
15455 
15456     if (ctxt == NULL)
15457         return (NULL);
15458     if (buffer == NULL)
15459         return (NULL);
15460     xmlInitParser();
15461 
15462     xmlCtxtReset(ctxt);
15463 
15464     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15465     if (input == NULL) {
15466 	return(NULL);
15467     }
15468 
15469     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15470     if (stream == NULL) {
15471 	xmlFreeParserInputBuffer(input);
15472 	return(NULL);
15473     }
15474 
15475     inputPush(ctxt, stream);
15476     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15477 }
15478 
15479 /**
15480  * xmlCtxtReadFd:
15481  * @ctxt:  an XML parser context
15482  * @fd:  an open file descriptor
15483  * @URL:  the base URL to use for the document
15484  * @encoding:  the document encoding, or NULL
15485  * @options:  a combination of xmlParserOption
15486  *
15487  * parse an XML from a file descriptor and build a tree.
15488  * This reuses the existing @ctxt parser context
15489  * NOTE that the file descriptor will not be closed when the
15490  *      reader is closed or reset.
15491  *
15492  * Returns the resulting document tree
15493  */
15494 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15495 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15496               const char *URL, const char *encoding, int options)
15497 {
15498     xmlParserInputBufferPtr input;
15499     xmlParserInputPtr stream;
15500 
15501     if (fd < 0)
15502         return (NULL);
15503     if (ctxt == NULL)
15504         return (NULL);
15505     xmlInitParser();
15506 
15507     xmlCtxtReset(ctxt);
15508 
15509 
15510     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15511     if (input == NULL)
15512         return (NULL);
15513     input->closecallback = NULL;
15514     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15515     if (stream == NULL) {
15516         xmlFreeParserInputBuffer(input);
15517         return (NULL);
15518     }
15519     inputPush(ctxt, stream);
15520     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15521 }
15522 
15523 /**
15524  * xmlCtxtReadIO:
15525  * @ctxt:  an XML parser context
15526  * @ioread:  an I/O read function
15527  * @ioclose:  an I/O close function
15528  * @ioctx:  an I/O handler
15529  * @URL:  the base URL to use for the document
15530  * @encoding:  the document encoding, or NULL
15531  * @options:  a combination of xmlParserOption
15532  *
15533  * parse an XML document from I/O functions and source and build a tree.
15534  * This reuses the existing @ctxt parser context
15535  *
15536  * Returns the resulting document tree
15537  */
15538 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15539 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15540               xmlInputCloseCallback ioclose, void *ioctx,
15541 	      const char *URL,
15542               const char *encoding, int options)
15543 {
15544     xmlParserInputBufferPtr input;
15545     xmlParserInputPtr stream;
15546 
15547     if (ioread == NULL)
15548         return (NULL);
15549     if (ctxt == NULL)
15550         return (NULL);
15551     xmlInitParser();
15552 
15553     xmlCtxtReset(ctxt);
15554 
15555     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15556                                          XML_CHAR_ENCODING_NONE);
15557     if (input == NULL) {
15558         if (ioclose != NULL)
15559             ioclose(ioctx);
15560         return (NULL);
15561     }
15562     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15563     if (stream == NULL) {
15564         xmlFreeParserInputBuffer(input);
15565         return (NULL);
15566     }
15567     inputPush(ctxt, stream);
15568     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15569 }
15570 
15571