• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92 
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
96 
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98 
99 /************************************************************************
100  *									*
101  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
102  *									*
103  ************************************************************************/
104 
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107 
108 /*
109  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110  *    replacement over the size in byte of the input indicates that you have
111  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
112  *    replacement per byte of input.
113  */
114 #define XML_PARSER_NON_LINEAR 10
115 
116 /*
117  * xmlParserEntityCheck
118  *
119  * Function to check non-linear entity expansion behaviour
120  * This is here to detect and stop exponential linear entity expansion
121  * This is not a limitation of the parser but a safety
122  * boundary feature. It can be disabled with the XML_PARSE_HUGE
123  * parser option.
124  */
125 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127                      xmlEntityPtr ent, size_t replacement)
128 {
129     size_t consumed = 0;
130 
131     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132         return (0);
133     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134         return (1);
135 
136     /*
137      * This may look absurd but is needed to detect
138      * entities problems
139      */
140     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 	(ent->content != NULL) && (ent->checked == 0) &&
142 	(ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 	unsigned long oldnbent = ctxt->nbentities;
144 	xmlChar *rep;
145 
146 	ent->checked = 1;
147 
148         ++ctxt->depth;
149 	rep = xmlStringDecodeEntities(ctxt, ent->content,
150 				  XML_SUBSTITUTE_REF, 0, 0, 0);
151         --ctxt->depth;
152 	if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
153 	    ent->content[0] = 0;
154 	}
155 
156 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 	if (rep != NULL) {
158 	    if (xmlStrchr(rep, '<'))
159 		ent->checked |= 1;
160 	    xmlFree(rep);
161 	    rep = NULL;
162 	}
163     }
164     if (replacement != 0) {
165 	if (replacement < XML_MAX_TEXT_LENGTH)
166 	    return(0);
167 
168         /*
169 	 * If the volume of entity copy reaches 10 times the
170 	 * amount of parsed data and over the large text threshold
171 	 * then that's very likely to be an abuse.
172 	 */
173         if (ctxt->input != NULL) {
174 	    consumed = ctxt->input->consumed +
175 	               (ctxt->input->cur - ctxt->input->base);
176 	}
177         consumed += ctxt->sizeentities;
178 
179         if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 	    return(0);
181     } else if (size != 0) {
182         /*
183          * Do the check based on the replacement size of the entity
184          */
185         if (size < XML_PARSER_BIG_ENTITY)
186 	    return(0);
187 
188         /*
189          * A limit on the amount of text data reasonably used
190          */
191         if (ctxt->input != NULL) {
192             consumed = ctxt->input->consumed +
193                 (ctxt->input->cur - ctxt->input->base);
194         }
195         consumed += ctxt->sizeentities;
196 
197         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199             return (0);
200     } else if (ent != NULL) {
201         /*
202          * use the number of parsed entities in the replacement
203          */
204         size = ent->checked / 2;
205 
206         /*
207          * The amount of data parsed counting entities size only once
208          */
209         if (ctxt->input != NULL) {
210             consumed = ctxt->input->consumed +
211                 (ctxt->input->cur - ctxt->input->base);
212         }
213         consumed += ctxt->sizeentities;
214 
215         /*
216          * Check the density of entities for the amount of data
217 	 * knowing an entity reference will take at least 3 bytes
218          */
219         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220             return (0);
221     } else {
222         /*
223          * strange we got no data for checking
224          */
225 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 	    (ctxt->nbentities <= 10000))
228 	    return (0);
229     }
230     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231     return (1);
232 }
233 
234 /**
235  * xmlParserMaxDepth:
236  *
237  * arbitrary depth limit for the XML documents that we allow to
238  * process. This is not a limitation of the parser but a safety
239  * boundary feature. It can be disabled with the XML_PARSE_HUGE
240  * parser option.
241  */
242 unsigned int xmlParserMaxDepth = 256;
243 
244 
245 
246 #define SAX2 1
247 #define XML_PARSER_BIG_BUFFER_SIZE 300
248 #define XML_PARSER_BUFFER_SIZE 100
249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250 
251 /**
252  * XML_PARSER_CHUNK_SIZE
253  *
254  * When calling GROW that's the minimal amount of data
255  * the parser expected to have received. It is not a hard
256  * limit but an optimization when reading strings like Names
257  * It is not strictly needed as long as inputs available characters
258  * are followed by 0, which should be provided by the I/O level
259  */
260 #define XML_PARSER_CHUNK_SIZE 100
261 
262 /*
263  * List of XML prefixed PI allowed by W3C specs
264  */
265 
266 static const char *xmlW3CPIs[] = {
267     "xml-stylesheet",
268     "xml-model",
269     NULL
270 };
271 
272 
273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275                                               const xmlChar **str);
276 
277 static xmlParserErrors
278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 	              xmlSAXHandlerPtr sax,
280 		      void *user_data, int depth, const xmlChar *URL,
281 		      const xmlChar *ID, xmlNodePtr *list);
282 
283 static int
284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285                           const char *encoding);
286 #ifdef LIBXML_LEGACY_ENABLED
287 static void
288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289                       xmlNodePtr lastNode);
290 #endif /* LIBXML_LEGACY_ENABLED */
291 
292 static xmlParserErrors
293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
295 
296 static int
297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298 
299 /************************************************************************
300  *									*
301  *		Some factorized error routines				*
302  *									*
303  ************************************************************************/
304 
305 /**
306  * xmlErrAttributeDup:
307  * @ctxt:  an XML parser context
308  * @prefix:  the attribute prefix
309  * @localname:  the attribute localname
310  *
311  * Handle a redefinition of attribute error
312  */
313 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315                    const xmlChar * localname)
316 {
317     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318         (ctxt->instate == XML_PARSER_EOF))
319 	return;
320     if (ctxt != NULL)
321 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
322 
323     if (prefix == NULL)
324         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326                         (const char *) localname, NULL, NULL, 0, 0,
327                         "Attribute %s redefined\n", localname);
328     else
329         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331                         (const char *) prefix, (const char *) localname,
332                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333                         localname);
334     if (ctxt != NULL) {
335 	ctxt->wellFormed = 0;
336 	if (ctxt->recovery == 0)
337 	    ctxt->disableSAX = 1;
338     }
339 }
340 
341 /**
342  * xmlFatalErr:
343  * @ctxt:  an XML parser context
344  * @error:  the error number
345  * @extra:  extra information string
346  *
347  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348  */
349 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
351 {
352     const char *errmsg;
353 
354     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355         (ctxt->instate == XML_PARSER_EOF))
356 	return;
357     switch (error) {
358         case XML_ERR_INVALID_HEX_CHARREF:
359             errmsg = "CharRef: invalid hexadecimal value";
360             break;
361         case XML_ERR_INVALID_DEC_CHARREF:
362             errmsg = "CharRef: invalid decimal value";
363             break;
364         case XML_ERR_INVALID_CHARREF:
365             errmsg = "CharRef: invalid value";
366             break;
367         case XML_ERR_INTERNAL_ERROR:
368             errmsg = "internal error";
369             break;
370         case XML_ERR_PEREF_AT_EOF:
371             errmsg = "PEReference at end of document";
372             break;
373         case XML_ERR_PEREF_IN_PROLOG:
374             errmsg = "PEReference in prolog";
375             break;
376         case XML_ERR_PEREF_IN_EPILOG:
377             errmsg = "PEReference in epilog";
378             break;
379         case XML_ERR_PEREF_NO_NAME:
380             errmsg = "PEReference: no name";
381             break;
382         case XML_ERR_PEREF_SEMICOL_MISSING:
383             errmsg = "PEReference: expecting ';'";
384             break;
385         case XML_ERR_ENTITY_LOOP:
386             errmsg = "Detected an entity reference loop";
387             break;
388         case XML_ERR_ENTITY_NOT_STARTED:
389             errmsg = "EntityValue: \" or ' expected";
390             break;
391         case XML_ERR_ENTITY_PE_INTERNAL:
392             errmsg = "PEReferences forbidden in internal subset";
393             break;
394         case XML_ERR_ENTITY_NOT_FINISHED:
395             errmsg = "EntityValue: \" or ' expected";
396             break;
397         case XML_ERR_ATTRIBUTE_NOT_STARTED:
398             errmsg = "AttValue: \" or ' expected";
399             break;
400         case XML_ERR_LT_IN_ATTRIBUTE:
401             errmsg = "Unescaped '<' not allowed in attributes values";
402             break;
403         case XML_ERR_LITERAL_NOT_STARTED:
404             errmsg = "SystemLiteral \" or ' expected";
405             break;
406         case XML_ERR_LITERAL_NOT_FINISHED:
407             errmsg = "Unfinished System or Public ID \" or ' expected";
408             break;
409         case XML_ERR_MISPLACED_CDATA_END:
410             errmsg = "Sequence ']]>' not allowed in content";
411             break;
412         case XML_ERR_URI_REQUIRED:
413             errmsg = "SYSTEM or PUBLIC, the URI is missing";
414             break;
415         case XML_ERR_PUBID_REQUIRED:
416             errmsg = "PUBLIC, the Public Identifier is missing";
417             break;
418         case XML_ERR_HYPHEN_IN_COMMENT:
419             errmsg = "Comment must not contain '--' (double-hyphen)";
420             break;
421         case XML_ERR_PI_NOT_STARTED:
422             errmsg = "xmlParsePI : no target name";
423             break;
424         case XML_ERR_RESERVED_XML_NAME:
425             errmsg = "Invalid PI name";
426             break;
427         case XML_ERR_NOTATION_NOT_STARTED:
428             errmsg = "NOTATION: Name expected here";
429             break;
430         case XML_ERR_NOTATION_NOT_FINISHED:
431             errmsg = "'>' required to close NOTATION declaration";
432             break;
433         case XML_ERR_VALUE_REQUIRED:
434             errmsg = "Entity value required";
435             break;
436         case XML_ERR_URI_FRAGMENT:
437             errmsg = "Fragment not allowed";
438             break;
439         case XML_ERR_ATTLIST_NOT_STARTED:
440             errmsg = "'(' required to start ATTLIST enumeration";
441             break;
442         case XML_ERR_NMTOKEN_REQUIRED:
443             errmsg = "NmToken expected in ATTLIST enumeration";
444             break;
445         case XML_ERR_ATTLIST_NOT_FINISHED:
446             errmsg = "')' required to finish ATTLIST enumeration";
447             break;
448         case XML_ERR_MIXED_NOT_STARTED:
449             errmsg = "MixedContentDecl : '|' or ')*' expected";
450             break;
451         case XML_ERR_PCDATA_REQUIRED:
452             errmsg = "MixedContentDecl : '#PCDATA' expected";
453             break;
454         case XML_ERR_ELEMCONTENT_NOT_STARTED:
455             errmsg = "ContentDecl : Name or '(' expected";
456             break;
457         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458             errmsg = "ContentDecl : ',' '|' or ')' expected";
459             break;
460         case XML_ERR_PEREF_IN_INT_SUBSET:
461             errmsg =
462                 "PEReference: forbidden within markup decl in internal subset";
463             break;
464         case XML_ERR_GT_REQUIRED:
465             errmsg = "expected '>'";
466             break;
467         case XML_ERR_CONDSEC_INVALID:
468             errmsg = "XML conditional section '[' expected";
469             break;
470         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471             errmsg = "Content error in the external subset";
472             break;
473         case XML_ERR_CONDSEC_INVALID_KEYWORD:
474             errmsg =
475                 "conditional section INCLUDE or IGNORE keyword expected";
476             break;
477         case XML_ERR_CONDSEC_NOT_FINISHED:
478             errmsg = "XML conditional section not closed";
479             break;
480         case XML_ERR_XMLDECL_NOT_STARTED:
481             errmsg = "Text declaration '<?xml' required";
482             break;
483         case XML_ERR_XMLDECL_NOT_FINISHED:
484             errmsg = "parsing XML declaration: '?>' expected";
485             break;
486         case XML_ERR_EXT_ENTITY_STANDALONE:
487             errmsg = "external parsed entities cannot be standalone";
488             break;
489         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490             errmsg = "EntityRef: expecting ';'";
491             break;
492         case XML_ERR_DOCTYPE_NOT_FINISHED:
493             errmsg = "DOCTYPE improperly terminated";
494             break;
495         case XML_ERR_LTSLASH_REQUIRED:
496             errmsg = "EndTag: '</' not found";
497             break;
498         case XML_ERR_EQUAL_REQUIRED:
499             errmsg = "expected '='";
500             break;
501         case XML_ERR_STRING_NOT_CLOSED:
502             errmsg = "String not closed expecting \" or '";
503             break;
504         case XML_ERR_STRING_NOT_STARTED:
505             errmsg = "String not started expecting ' or \"";
506             break;
507         case XML_ERR_ENCODING_NAME:
508             errmsg = "Invalid XML encoding name";
509             break;
510         case XML_ERR_STANDALONE_VALUE:
511             errmsg = "standalone accepts only 'yes' or 'no'";
512             break;
513         case XML_ERR_DOCUMENT_EMPTY:
514             errmsg = "Document is empty";
515             break;
516         case XML_ERR_DOCUMENT_END:
517             errmsg = "Extra content at the end of the document";
518             break;
519         case XML_ERR_NOT_WELL_BALANCED:
520             errmsg = "chunk is not well balanced";
521             break;
522         case XML_ERR_EXTRA_CONTENT:
523             errmsg = "extra content at the end of well balanced chunk";
524             break;
525         case XML_ERR_VERSION_MISSING:
526             errmsg = "Malformed declaration expecting version";
527             break;
528         case XML_ERR_NAME_TOO_LONG:
529             errmsg = "Name too long use XML_PARSE_HUGE option";
530             break;
531 #if 0
532         case:
533             errmsg = "";
534             break;
535 #endif
536         default:
537             errmsg = "Unregistered error message";
538     }
539     if (ctxt != NULL)
540 	ctxt->errNo = error;
541     if (info == NULL) {
542         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544                         errmsg);
545     } else {
546         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548                         errmsg, info);
549     }
550     if (ctxt != NULL) {
551 	ctxt->wellFormed = 0;
552 	if (ctxt->recovery == 0)
553 	    ctxt->disableSAX = 1;
554     }
555 }
556 
557 /**
558  * xmlFatalErrMsg:
559  * @ctxt:  an XML parser context
560  * @error:  the error number
561  * @msg:  the error message
562  *
563  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564  */
565 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567                const char *msg)
568 {
569     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570         (ctxt->instate == XML_PARSER_EOF))
571 	return;
572     if (ctxt != NULL)
573 	ctxt->errNo = error;
574     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576     if (ctxt != NULL) {
577 	ctxt->wellFormed = 0;
578 	if (ctxt->recovery == 0)
579 	    ctxt->disableSAX = 1;
580     }
581 }
582 
583 /**
584  * xmlWarningMsg:
585  * @ctxt:  an XML parser context
586  * @error:  the error number
587  * @msg:  the error message
588  * @str1:  extra data
589  * @str2:  extra data
590  *
591  * Handle a warning.
592  */
593 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595               const char *msg, const xmlChar *str1, const xmlChar *str2)
596 {
597     xmlStructuredErrorFunc schannel = NULL;
598 
599     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600         (ctxt->instate == XML_PARSER_EOF))
601 	return;
602     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603         (ctxt->sax->initialized == XML_SAX2_MAGIC))
604         schannel = ctxt->sax->serror;
605     if (ctxt != NULL) {
606         __xmlRaiseError(schannel,
607                     (ctxt->sax) ? ctxt->sax->warning : NULL,
608                     ctxt->userData,
609                     ctxt, NULL, XML_FROM_PARSER, error,
610                     XML_ERR_WARNING, NULL, 0,
611 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
612 		    msg, (const char *) str1, (const char *) str2);
613     } else {
614         __xmlRaiseError(schannel, NULL, NULL,
615                     ctxt, NULL, XML_FROM_PARSER, error,
616                     XML_ERR_WARNING, NULL, 0,
617 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
618 		    msg, (const char *) str1, (const char *) str2);
619     }
620 }
621 
622 /**
623  * xmlValidityError:
624  * @ctxt:  an XML parser context
625  * @error:  the error number
626  * @msg:  the error message
627  * @str1:  extra data
628  *
629  * Handle a validity error.
630  */
631 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633               const char *msg, const xmlChar *str1, const xmlChar *str2)
634 {
635     xmlStructuredErrorFunc schannel = NULL;
636 
637     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638         (ctxt->instate == XML_PARSER_EOF))
639 	return;
640     if (ctxt != NULL) {
641 	ctxt->errNo = error;
642 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 	    schannel = ctxt->sax->serror;
644     }
645     if (ctxt != NULL) {
646         __xmlRaiseError(schannel,
647                     ctxt->vctxt.error, ctxt->vctxt.userData,
648                     ctxt, NULL, XML_FROM_DTD, error,
649                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 		    (const char *) str2, NULL, 0, 0,
651 		    msg, (const char *) str1, (const char *) str2);
652 	ctxt->valid = 0;
653     } else {
654         __xmlRaiseError(schannel, NULL, NULL,
655                     ctxt, NULL, XML_FROM_DTD, error,
656                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 		    (const char *) str2, NULL, 0, 0,
658 		    msg, (const char *) str1, (const char *) str2);
659     }
660 }
661 
662 /**
663  * xmlFatalErrMsgInt:
664  * @ctxt:  an XML parser context
665  * @error:  the error number
666  * @msg:  the error message
667  * @val:  an integer value
668  *
669  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670  */
671 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673                   const char *msg, int val)
674 {
675     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676         (ctxt->instate == XML_PARSER_EOF))
677 	return;
678     if (ctxt != NULL)
679 	ctxt->errNo = error;
680     __xmlRaiseError(NULL, NULL, NULL,
681                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683     if (ctxt != NULL) {
684 	ctxt->wellFormed = 0;
685 	if (ctxt->recovery == 0)
686 	    ctxt->disableSAX = 1;
687     }
688 }
689 
690 /**
691  * xmlFatalErrMsgStrIntStr:
692  * @ctxt:  an XML parser context
693  * @error:  the error number
694  * @msg:  the error message
695  * @str1:  an string info
696  * @val:  an integer value
697  * @str2:  an string info
698  *
699  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700  */
701 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703                   const char *msg, const xmlChar *str1, int val,
704 		  const xmlChar *str2)
705 {
706     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707         (ctxt->instate == XML_PARSER_EOF))
708 	return;
709     if (ctxt != NULL)
710 	ctxt->errNo = error;
711     __xmlRaiseError(NULL, NULL, NULL,
712                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713                     NULL, 0, (const char *) str1, (const char *) str2,
714 		    NULL, val, 0, msg, str1, val, str2);
715     if (ctxt != NULL) {
716 	ctxt->wellFormed = 0;
717 	if (ctxt->recovery == 0)
718 	    ctxt->disableSAX = 1;
719     }
720 }
721 
722 /**
723  * xmlFatalErrMsgStr:
724  * @ctxt:  an XML parser context
725  * @error:  the error number
726  * @msg:  the error message
727  * @val:  a string value
728  *
729  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730  */
731 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733                   const char *msg, const xmlChar * val)
734 {
735     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736         (ctxt->instate == XML_PARSER_EOF))
737 	return;
738     if (ctxt != NULL)
739 	ctxt->errNo = error;
740     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741                     XML_FROM_PARSER, error, XML_ERR_FATAL,
742                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743                     val);
744     if (ctxt != NULL) {
745 	ctxt->wellFormed = 0;
746 	if (ctxt->recovery == 0)
747 	    ctxt->disableSAX = 1;
748     }
749 }
750 
751 /**
752  * xmlErrMsgStr:
753  * @ctxt:  an XML parser context
754  * @error:  the error number
755  * @msg:  the error message
756  * @val:  a string value
757  *
758  * Handle a non fatal parser error
759  */
760 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762                   const char *msg, const xmlChar * val)
763 {
764     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765         (ctxt->instate == XML_PARSER_EOF))
766 	return;
767     if (ctxt != NULL)
768 	ctxt->errNo = error;
769     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770                     XML_FROM_PARSER, error, XML_ERR_ERROR,
771                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772                     val);
773 }
774 
775 /**
776  * xmlNsErr:
777  * @ctxt:  an XML parser context
778  * @error:  the error number
779  * @msg:  the message
780  * @info1:  extra information string
781  * @info2:  extra information string
782  *
783  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784  */
785 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787          const char *msg,
788          const xmlChar * info1, const xmlChar * info2,
789          const xmlChar * info3)
790 {
791     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792         (ctxt->instate == XML_PARSER_EOF))
793 	return;
794     if (ctxt != NULL)
795 	ctxt->errNo = error;
796     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
798                     (const char *) info2, (const char *) info3, 0, 0, msg,
799                     info1, info2, info3);
800     if (ctxt != NULL)
801 	ctxt->nsWellFormed = 0;
802 }
803 
804 /**
805  * xmlNsWarn
806  * @ctxt:  an XML parser context
807  * @error:  the error number
808  * @msg:  the message
809  * @info1:  extra information string
810  * @info2:  extra information string
811  *
812  * Handle a namespace warning error
813  */
814 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816          const char *msg,
817          const xmlChar * info1, const xmlChar * info2,
818          const xmlChar * info3)
819 {
820     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821         (ctxt->instate == XML_PARSER_EOF))
822 	return;
823     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
825                     (const char *) info2, (const char *) info3, 0, 0, msg,
826                     info1, info2, info3);
827 }
828 
829 /************************************************************************
830  *									*
831  *		Library wide options					*
832  *									*
833  ************************************************************************/
834 
835 /**
836   * xmlHasFeature:
837   * @feature: the feature to be examined
838   *
839   * Examines if the library has been compiled with a given feature.
840   *
841   * Returns a non-zero value if the feature exist, otherwise zero.
842   * Returns zero (0) if the feature does not exist or an unknown
843   * unknown feature is requested, non-zero otherwise.
844   */
845 int
xmlHasFeature(xmlFeature feature)846 xmlHasFeature(xmlFeature feature)
847 {
848     switch (feature) {
849 	case XML_WITH_THREAD:
850 #ifdef LIBXML_THREAD_ENABLED
851 	    return(1);
852 #else
853 	    return(0);
854 #endif
855         case XML_WITH_TREE:
856 #ifdef LIBXML_TREE_ENABLED
857             return(1);
858 #else
859             return(0);
860 #endif
861         case XML_WITH_OUTPUT:
862 #ifdef LIBXML_OUTPUT_ENABLED
863             return(1);
864 #else
865             return(0);
866 #endif
867         case XML_WITH_PUSH:
868 #ifdef LIBXML_PUSH_ENABLED
869             return(1);
870 #else
871             return(0);
872 #endif
873         case XML_WITH_READER:
874 #ifdef LIBXML_READER_ENABLED
875             return(1);
876 #else
877             return(0);
878 #endif
879         case XML_WITH_PATTERN:
880 #ifdef LIBXML_PATTERN_ENABLED
881             return(1);
882 #else
883             return(0);
884 #endif
885         case XML_WITH_WRITER:
886 #ifdef LIBXML_WRITER_ENABLED
887             return(1);
888 #else
889             return(0);
890 #endif
891         case XML_WITH_SAX1:
892 #ifdef LIBXML_SAX1_ENABLED
893             return(1);
894 #else
895             return(0);
896 #endif
897         case XML_WITH_FTP:
898 #ifdef LIBXML_FTP_ENABLED
899             return(1);
900 #else
901             return(0);
902 #endif
903         case XML_WITH_HTTP:
904 #ifdef LIBXML_HTTP_ENABLED
905             return(1);
906 #else
907             return(0);
908 #endif
909         case XML_WITH_VALID:
910 #ifdef LIBXML_VALID_ENABLED
911             return(1);
912 #else
913             return(0);
914 #endif
915         case XML_WITH_HTML:
916 #ifdef LIBXML_HTML_ENABLED
917             return(1);
918 #else
919             return(0);
920 #endif
921         case XML_WITH_LEGACY:
922 #ifdef LIBXML_LEGACY_ENABLED
923             return(1);
924 #else
925             return(0);
926 #endif
927         case XML_WITH_C14N:
928 #ifdef LIBXML_C14N_ENABLED
929             return(1);
930 #else
931             return(0);
932 #endif
933         case XML_WITH_CATALOG:
934 #ifdef LIBXML_CATALOG_ENABLED
935             return(1);
936 #else
937             return(0);
938 #endif
939         case XML_WITH_XPATH:
940 #ifdef LIBXML_XPATH_ENABLED
941             return(1);
942 #else
943             return(0);
944 #endif
945         case XML_WITH_XPTR:
946 #ifdef LIBXML_XPTR_ENABLED
947             return(1);
948 #else
949             return(0);
950 #endif
951         case XML_WITH_XINCLUDE:
952 #ifdef LIBXML_XINCLUDE_ENABLED
953             return(1);
954 #else
955             return(0);
956 #endif
957         case XML_WITH_ICONV:
958 #ifdef LIBXML_ICONV_ENABLED
959             return(1);
960 #else
961             return(0);
962 #endif
963         case XML_WITH_ISO8859X:
964 #ifdef LIBXML_ISO8859X_ENABLED
965             return(1);
966 #else
967             return(0);
968 #endif
969         case XML_WITH_UNICODE:
970 #ifdef LIBXML_UNICODE_ENABLED
971             return(1);
972 #else
973             return(0);
974 #endif
975         case XML_WITH_REGEXP:
976 #ifdef LIBXML_REGEXP_ENABLED
977             return(1);
978 #else
979             return(0);
980 #endif
981         case XML_WITH_AUTOMATA:
982 #ifdef LIBXML_AUTOMATA_ENABLED
983             return(1);
984 #else
985             return(0);
986 #endif
987         case XML_WITH_EXPR:
988 #ifdef LIBXML_EXPR_ENABLED
989             return(1);
990 #else
991             return(0);
992 #endif
993         case XML_WITH_SCHEMAS:
994 #ifdef LIBXML_SCHEMAS_ENABLED
995             return(1);
996 #else
997             return(0);
998 #endif
999         case XML_WITH_SCHEMATRON:
1000 #ifdef LIBXML_SCHEMATRON_ENABLED
1001             return(1);
1002 #else
1003             return(0);
1004 #endif
1005         case XML_WITH_MODULES:
1006 #ifdef LIBXML_MODULES_ENABLED
1007             return(1);
1008 #else
1009             return(0);
1010 #endif
1011         case XML_WITH_DEBUG:
1012 #ifdef LIBXML_DEBUG_ENABLED
1013             return(1);
1014 #else
1015             return(0);
1016 #endif
1017         case XML_WITH_DEBUG_MEM:
1018 #ifdef DEBUG_MEMORY_LOCATION
1019             return(1);
1020 #else
1021             return(0);
1022 #endif
1023         case XML_WITH_DEBUG_RUN:
1024 #ifdef LIBXML_DEBUG_RUNTIME
1025             return(1);
1026 #else
1027             return(0);
1028 #endif
1029         case XML_WITH_ZLIB:
1030 #ifdef LIBXML_ZLIB_ENABLED
1031             return(1);
1032 #else
1033             return(0);
1034 #endif
1035         case XML_WITH_LZMA:
1036 #ifdef LIBXML_LZMA_ENABLED
1037             return(1);
1038 #else
1039             return(0);
1040 #endif
1041         case XML_WITH_ICU:
1042 #ifdef LIBXML_ICU_ENABLED
1043             return(1);
1044 #else
1045             return(0);
1046 #endif
1047         default:
1048 	    break;
1049      }
1050      return(0);
1051 }
1052 
1053 /************************************************************************
1054  *									*
1055  *		SAX2 defaulted attributes handling			*
1056  *									*
1057  ************************************************************************/
1058 
1059 /**
1060  * xmlDetectSAX2:
1061  * @ctxt:  an XML parser context
1062  *
1063  * Do the SAX2 detection and specific intialization
1064  */
1065 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067     if (ctxt == NULL) return;
1068 #ifdef LIBXML_SAX1_ENABLED
1069     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070         ((ctxt->sax->startElementNs != NULL) ||
1071          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072 #else
1073     ctxt->sax2 = 1;
1074 #endif /* LIBXML_SAX1_ENABLED */
1075 
1076     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 		(ctxt->str_xml_ns == NULL)) {
1081         xmlErrMemory(ctxt, NULL);
1082     }
1083 }
1084 
1085 typedef struct _xmlDefAttrs xmlDefAttrs;
1086 typedef xmlDefAttrs *xmlDefAttrsPtr;
1087 struct _xmlDefAttrs {
1088     int nbAttrs;	/* number of defaulted attributes on that element */
1089     int maxAttrs;       /* the size of the array */
1090 #if __STDC_VERSION__ >= 199901L
1091     /* Using a C99 flexible array member avoids UBSan errors. */
1092     const xmlChar *values[]; /* array of localname/prefix/values/external */
1093 #else
1094     const xmlChar *values[5];
1095 #endif
1096 };
1097 
1098 /**
1099  * xmlAttrNormalizeSpace:
1100  * @src: the source string
1101  * @dst: the target string
1102  *
1103  * Normalize the space in non CDATA attribute values:
1104  * If the attribute type is not CDATA, then the XML processor MUST further
1105  * process the normalized attribute value by discarding any leading and
1106  * trailing space (#x20) characters, and by replacing sequences of space
1107  * (#x20) characters by a single space (#x20) character.
1108  * Note that the size of dst need to be at least src, and if one doesn't need
1109  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1110  * passing src as dst is just fine.
1111  *
1112  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1113  *         is needed.
1114  */
1115 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1116 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1117 {
1118     if ((src == NULL) || (dst == NULL))
1119         return(NULL);
1120 
1121     while (*src == 0x20) src++;
1122     while (*src != 0) {
1123 	if (*src == 0x20) {
1124 	    while (*src == 0x20) src++;
1125 	    if (*src != 0)
1126 		*dst++ = 0x20;
1127 	} else {
1128 	    *dst++ = *src++;
1129 	}
1130     }
1131     *dst = 0;
1132     if (dst == src)
1133        return(NULL);
1134     return(dst);
1135 }
1136 
1137 /**
1138  * xmlAttrNormalizeSpace2:
1139  * @src: the source string
1140  *
1141  * Normalize the space in non CDATA attribute values, a slightly more complex
1142  * front end to avoid allocation problems when running on attribute values
1143  * coming from the input.
1144  *
1145  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1146  *         is needed.
1147  */
1148 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1149 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1150 {
1151     int i;
1152     int remove_head = 0;
1153     int need_realloc = 0;
1154     const xmlChar *cur;
1155 
1156     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1157         return(NULL);
1158     i = *len;
1159     if (i <= 0)
1160         return(NULL);
1161 
1162     cur = src;
1163     while (*cur == 0x20) {
1164         cur++;
1165 	remove_head++;
1166     }
1167     while (*cur != 0) {
1168 	if (*cur == 0x20) {
1169 	    cur++;
1170 	    if ((*cur == 0x20) || (*cur == 0)) {
1171 	        need_realloc = 1;
1172 		break;
1173 	    }
1174 	} else
1175 	    cur++;
1176     }
1177     if (need_realloc) {
1178         xmlChar *ret;
1179 
1180 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1181 	if (ret == NULL) {
1182 	    xmlErrMemory(ctxt, NULL);
1183 	    return(NULL);
1184 	}
1185 	xmlAttrNormalizeSpace(ret, ret);
1186 	*len = (int) strlen((const char *)ret);
1187         return(ret);
1188     } else if (remove_head) {
1189         *len -= remove_head;
1190         memmove(src, src + remove_head, 1 + *len);
1191 	return(src);
1192     }
1193     return(NULL);
1194 }
1195 
1196 /**
1197  * xmlAddDefAttrs:
1198  * @ctxt:  an XML parser context
1199  * @fullname:  the element fullname
1200  * @fullattr:  the attribute fullname
1201  * @value:  the attribute value
1202  *
1203  * Add a defaulted attribute for an element
1204  */
1205 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1206 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1207                const xmlChar *fullname,
1208                const xmlChar *fullattr,
1209                const xmlChar *value) {
1210     xmlDefAttrsPtr defaults;
1211     int len;
1212     const xmlChar *name;
1213     const xmlChar *prefix;
1214 
1215     /*
1216      * Allows to detect attribute redefinitions
1217      */
1218     if (ctxt->attsSpecial != NULL) {
1219         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1220 	    return;
1221     }
1222 
1223     if (ctxt->attsDefault == NULL) {
1224         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1225 	if (ctxt->attsDefault == NULL)
1226 	    goto mem_error;
1227     }
1228 
1229     /*
1230      * split the element name into prefix:localname , the string found
1231      * are within the DTD and then not associated to namespace names.
1232      */
1233     name = xmlSplitQName3(fullname, &len);
1234     if (name == NULL) {
1235         name = xmlDictLookup(ctxt->dict, fullname, -1);
1236 	prefix = NULL;
1237     } else {
1238         name = xmlDictLookup(ctxt->dict, name, -1);
1239 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1240     }
1241 
1242     /*
1243      * make sure there is some storage
1244      */
1245     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1246     if (defaults == NULL) {
1247         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1248 	                   (4 * 5) * sizeof(const xmlChar *));
1249 	if (defaults == NULL)
1250 	    goto mem_error;
1251 	defaults->nbAttrs = 0;
1252 	defaults->maxAttrs = 4;
1253 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 	                        defaults, NULL) < 0) {
1255 	    xmlFree(defaults);
1256 	    goto mem_error;
1257 	}
1258     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1259         xmlDefAttrsPtr temp;
1260 
1261         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1262 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1263 	if (temp == NULL)
1264 	    goto mem_error;
1265 	defaults = temp;
1266 	defaults->maxAttrs *= 2;
1267 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1268 	                        defaults, NULL) < 0) {
1269 	    xmlFree(defaults);
1270 	    goto mem_error;
1271 	}
1272     }
1273 
1274     /*
1275      * Split the element name into prefix:localname , the string found
1276      * are within the DTD and hen not associated to namespace names.
1277      */
1278     name = xmlSplitQName3(fullattr, &len);
1279     if (name == NULL) {
1280         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1281 	prefix = NULL;
1282     } else {
1283         name = xmlDictLookup(ctxt->dict, name, -1);
1284 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1285     }
1286 
1287     defaults->values[5 * defaults->nbAttrs] = name;
1288     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1289     /* intern the string and precompute the end */
1290     len = xmlStrlen(value);
1291     value = xmlDictLookup(ctxt->dict, value, len);
1292     defaults->values[5 * defaults->nbAttrs + 2] = value;
1293     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1294     if (ctxt->external)
1295         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1296     else
1297         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1298     defaults->nbAttrs++;
1299 
1300     return;
1301 
1302 mem_error:
1303     xmlErrMemory(ctxt, NULL);
1304     return;
1305 }
1306 
1307 /**
1308  * xmlAddSpecialAttr:
1309  * @ctxt:  an XML parser context
1310  * @fullname:  the element fullname
1311  * @fullattr:  the attribute fullname
1312  * @type:  the attribute type
1313  *
1314  * Register this attribute type
1315  */
1316 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1317 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1318 		  const xmlChar *fullname,
1319 		  const xmlChar *fullattr,
1320 		  int type)
1321 {
1322     if (ctxt->attsSpecial == NULL) {
1323         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1324 	if (ctxt->attsSpecial == NULL)
1325 	    goto mem_error;
1326     }
1327 
1328     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1329         return;
1330 
1331     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1332                      (void *) (ptrdiff_t) type);
1333     return;
1334 
1335 mem_error:
1336     xmlErrMemory(ctxt, NULL);
1337     return;
1338 }
1339 
1340 /**
1341  * xmlCleanSpecialAttrCallback:
1342  *
1343  * Removes CDATA attributes from the special attribute table
1344  */
1345 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1346 xmlCleanSpecialAttrCallback(void *payload, void *data,
1347                             const xmlChar *fullname, const xmlChar *fullattr,
1348                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1349     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1350 
1351     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1352         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353     }
1354 }
1355 
1356 /**
1357  * xmlCleanSpecialAttr:
1358  * @ctxt:  an XML parser context
1359  *
1360  * Trim the list of attributes defined to remove all those of type
1361  * CDATA as they are not special. This call should be done when finishing
1362  * to parse the DTD and before starting to parse the document root.
1363  */
1364 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1365 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1366 {
1367     if (ctxt->attsSpecial == NULL)
1368         return;
1369 
1370     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1371 
1372     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1373         xmlHashFree(ctxt->attsSpecial, NULL);
1374         ctxt->attsSpecial = NULL;
1375     }
1376     return;
1377 }
1378 
1379 /**
1380  * xmlCheckLanguageID:
1381  * @lang:  pointer to the string value
1382  *
1383  * Checks that the value conforms to the LanguageID production:
1384  *
1385  * NOTE: this is somewhat deprecated, those productions were removed from
1386  *       the XML Second edition.
1387  *
1388  * [33] LanguageID ::= Langcode ('-' Subcode)*
1389  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1390  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1391  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1392  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1393  * [38] Subcode ::= ([a-z] | [A-Z])+
1394  *
1395  * The current REC reference the sucessors of RFC 1766, currently 5646
1396  *
1397  * http://www.rfc-editor.org/rfc/rfc5646.txt
1398  * langtag       = language
1399  *                 ["-" script]
1400  *                 ["-" region]
1401  *                 *("-" variant)
1402  *                 *("-" extension)
1403  *                 ["-" privateuse]
1404  * language      = 2*3ALPHA            ; shortest ISO 639 code
1405  *                 ["-" extlang]       ; sometimes followed by
1406  *                                     ; extended language subtags
1407  *               / 4ALPHA              ; or reserved for future use
1408  *               / 5*8ALPHA            ; or registered language subtag
1409  *
1410  * extlang       = 3ALPHA              ; selected ISO 639 codes
1411  *                 *2("-" 3ALPHA)      ; permanently reserved
1412  *
1413  * script        = 4ALPHA              ; ISO 15924 code
1414  *
1415  * region        = 2ALPHA              ; ISO 3166-1 code
1416  *               / 3DIGIT              ; UN M.49 code
1417  *
1418  * variant       = 5*8alphanum         ; registered variants
1419  *               / (DIGIT 3alphanum)
1420  *
1421  * extension     = singleton 1*("-" (2*8alphanum))
1422  *
1423  *                                     ; Single alphanumerics
1424  *                                     ; "x" reserved for private use
1425  * singleton     = DIGIT               ; 0 - 9
1426  *               / %x41-57             ; A - W
1427  *               / %x59-5A             ; Y - Z
1428  *               / %x61-77             ; a - w
1429  *               / %x79-7A             ; y - z
1430  *
1431  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1432  * The parser below doesn't try to cope with extension or privateuse
1433  * that could be added but that's not interoperable anyway
1434  *
1435  * Returns 1 if correct 0 otherwise
1436  **/
1437 int
xmlCheckLanguageID(const xmlChar * lang)1438 xmlCheckLanguageID(const xmlChar * lang)
1439 {
1440     const xmlChar *cur = lang, *nxt;
1441 
1442     if (cur == NULL)
1443         return (0);
1444     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1445         ((cur[0] == 'I') && (cur[1] == '-')) ||
1446         ((cur[0] == 'x') && (cur[1] == '-')) ||
1447         ((cur[0] == 'X') && (cur[1] == '-'))) {
1448         /*
1449          * Still allow IANA code and user code which were coming
1450          * from the previous version of the XML-1.0 specification
1451          * it's deprecated but we should not fail
1452          */
1453         cur += 2;
1454         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1455                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1456             cur++;
1457         return(cur[0] == 0);
1458     }
1459     nxt = cur;
1460     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1461            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1462            nxt++;
1463     if (nxt - cur >= 4) {
1464         /*
1465          * Reserved
1466          */
1467         if ((nxt - cur > 8) || (nxt[0] != 0))
1468             return(0);
1469         return(1);
1470     }
1471     if (nxt - cur < 2)
1472         return(0);
1473     /* we got an ISO 639 code */
1474     if (nxt[0] == 0)
1475         return(1);
1476     if (nxt[0] != '-')
1477         return(0);
1478 
1479     nxt++;
1480     cur = nxt;
1481     /* now we can have extlang or script or region or variant */
1482     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1483         goto region_m49;
1484 
1485     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1486            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487            nxt++;
1488     if (nxt - cur == 4)
1489         goto script;
1490     if (nxt - cur == 2)
1491         goto region;
1492     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1493         goto variant;
1494     if (nxt - cur != 3)
1495         return(0);
1496     /* we parsed an extlang */
1497     if (nxt[0] == 0)
1498         return(1);
1499     if (nxt[0] != '-')
1500         return(0);
1501 
1502     nxt++;
1503     cur = nxt;
1504     /* now we can have script or region or variant */
1505     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1506         goto region_m49;
1507 
1508     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510            nxt++;
1511     if (nxt - cur == 2)
1512         goto region;
1513     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1514         goto variant;
1515     if (nxt - cur != 4)
1516         return(0);
1517     /* we parsed a script */
1518 script:
1519     if (nxt[0] == 0)
1520         return(1);
1521     if (nxt[0] != '-')
1522         return(0);
1523 
1524     nxt++;
1525     cur = nxt;
1526     /* now we can have region or variant */
1527     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528         goto region_m49;
1529 
1530     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532            nxt++;
1533 
1534     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535         goto variant;
1536     if (nxt - cur != 2)
1537         return(0);
1538     /* we parsed a region */
1539 region:
1540     if (nxt[0] == 0)
1541         return(1);
1542     if (nxt[0] != '-')
1543         return(0);
1544 
1545     nxt++;
1546     cur = nxt;
1547     /* now we can just have a variant */
1548     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1549            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1550            nxt++;
1551 
1552     if ((nxt - cur < 5) || (nxt - cur > 8))
1553         return(0);
1554 
1555     /* we parsed a variant */
1556 variant:
1557     if (nxt[0] == 0)
1558         return(1);
1559     if (nxt[0] != '-')
1560         return(0);
1561     /* extensions and private use subtags not checked */
1562     return (1);
1563 
1564 region_m49:
1565     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1566         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1567         nxt += 3;
1568         goto region;
1569     }
1570     return(0);
1571 }
1572 
1573 /************************************************************************
1574  *									*
1575  *		Parser stacks related functions and macros		*
1576  *									*
1577  ************************************************************************/
1578 
1579 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1580                                             const xmlChar ** str);
1581 
1582 #ifdef SAX2
1583 /**
1584  * nsPush:
1585  * @ctxt:  an XML parser context
1586  * @prefix:  the namespace prefix or NULL
1587  * @URL:  the namespace name
1588  *
1589  * Pushes a new parser namespace on top of the ns stack
1590  *
1591  * Returns -1 in case of error, -2 if the namespace should be discarded
1592  *	   and the index in the stack otherwise.
1593  */
1594 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1595 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1596 {
1597     if (ctxt->options & XML_PARSE_NSCLEAN) {
1598         int i;
1599 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1600 	    if (ctxt->nsTab[i] == prefix) {
1601 		/* in scope */
1602 	        if (ctxt->nsTab[i + 1] == URL)
1603 		    return(-2);
1604 		/* out of scope keep it */
1605 		break;
1606 	    }
1607 	}
1608     }
1609     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1610 	ctxt->nsMax = 10;
1611 	ctxt->nsNr = 0;
1612 	ctxt->nsTab = (const xmlChar **)
1613 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1614 	if (ctxt->nsTab == NULL) {
1615 	    xmlErrMemory(ctxt, NULL);
1616 	    ctxt->nsMax = 0;
1617             return (-1);
1618 	}
1619     } else if (ctxt->nsNr >= ctxt->nsMax) {
1620         const xmlChar ** tmp;
1621         ctxt->nsMax *= 2;
1622         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1623 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1624         if (tmp == NULL) {
1625             xmlErrMemory(ctxt, NULL);
1626 	    ctxt->nsMax /= 2;
1627             return (-1);
1628         }
1629 	ctxt->nsTab = tmp;
1630     }
1631     ctxt->nsTab[ctxt->nsNr++] = prefix;
1632     ctxt->nsTab[ctxt->nsNr++] = URL;
1633     return (ctxt->nsNr);
1634 }
1635 /**
1636  * nsPop:
1637  * @ctxt: an XML parser context
1638  * @nr:  the number to pop
1639  *
1640  * Pops the top @nr parser prefix/namespace from the ns stack
1641  *
1642  * Returns the number of namespaces removed
1643  */
1644 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1645 nsPop(xmlParserCtxtPtr ctxt, int nr)
1646 {
1647     int i;
1648 
1649     if (ctxt->nsTab == NULL) return(0);
1650     if (ctxt->nsNr < nr) {
1651         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1652         nr = ctxt->nsNr;
1653     }
1654     if (ctxt->nsNr <= 0)
1655         return (0);
1656 
1657     for (i = 0;i < nr;i++) {
1658          ctxt->nsNr--;
1659 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1660     }
1661     return(nr);
1662 }
1663 #endif
1664 
1665 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1666 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1667     const xmlChar **atts;
1668     int *attallocs;
1669     int maxatts;
1670 
1671     if (ctxt->atts == NULL) {
1672 	maxatts = 55; /* allow for 10 attrs by default */
1673 	atts = (const xmlChar **)
1674 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1675 	if (atts == NULL) goto mem_error;
1676 	ctxt->atts = atts;
1677 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1678 	if (attallocs == NULL) goto mem_error;
1679 	ctxt->attallocs = attallocs;
1680 	ctxt->maxatts = maxatts;
1681     } else if (nr + 5 > ctxt->maxatts) {
1682 	maxatts = (nr + 5) * 2;
1683 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1684 				     maxatts * sizeof(const xmlChar *));
1685 	if (atts == NULL) goto mem_error;
1686 	ctxt->atts = atts;
1687 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1688 	                             (maxatts / 5) * sizeof(int));
1689 	if (attallocs == NULL) goto mem_error;
1690 	ctxt->attallocs = attallocs;
1691 	ctxt->maxatts = maxatts;
1692     }
1693     return(ctxt->maxatts);
1694 mem_error:
1695     xmlErrMemory(ctxt, NULL);
1696     return(-1);
1697 }
1698 
1699 /**
1700  * inputPush:
1701  * @ctxt:  an XML parser context
1702  * @value:  the parser input
1703  *
1704  * Pushes a new parser input on top of the input stack
1705  *
1706  * Returns -1 in case of error, the index in the stack otherwise
1707  */
1708 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1709 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1710 {
1711     if ((ctxt == NULL) || (value == NULL))
1712         return(-1);
1713     if (ctxt->inputNr >= ctxt->inputMax) {
1714         ctxt->inputMax *= 2;
1715         ctxt->inputTab =
1716             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1717                                              ctxt->inputMax *
1718                                              sizeof(ctxt->inputTab[0]));
1719         if (ctxt->inputTab == NULL) {
1720             xmlErrMemory(ctxt, NULL);
1721 	    xmlFreeInputStream(value);
1722 	    ctxt->inputMax /= 2;
1723 	    value = NULL;
1724             return (-1);
1725         }
1726     }
1727     ctxt->inputTab[ctxt->inputNr] = value;
1728     ctxt->input = value;
1729     return (ctxt->inputNr++);
1730 }
1731 /**
1732  * inputPop:
1733  * @ctxt: an XML parser context
1734  *
1735  * Pops the top parser input from the input stack
1736  *
1737  * Returns the input just removed
1738  */
1739 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1740 inputPop(xmlParserCtxtPtr ctxt)
1741 {
1742     xmlParserInputPtr ret;
1743 
1744     if (ctxt == NULL)
1745         return(NULL);
1746     if (ctxt->inputNr <= 0)
1747         return (NULL);
1748     ctxt->inputNr--;
1749     if (ctxt->inputNr > 0)
1750         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1751     else
1752         ctxt->input = NULL;
1753     ret = ctxt->inputTab[ctxt->inputNr];
1754     ctxt->inputTab[ctxt->inputNr] = NULL;
1755     return (ret);
1756 }
1757 /**
1758  * nodePush:
1759  * @ctxt:  an XML parser context
1760  * @value:  the element node
1761  *
1762  * Pushes a new element node on top of the node stack
1763  *
1764  * Returns -1 in case of error, the index in the stack otherwise
1765  */
1766 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1767 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1768 {
1769     if (ctxt == NULL) return(0);
1770     if (ctxt->nodeNr >= ctxt->nodeMax) {
1771         xmlNodePtr *tmp;
1772 
1773 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1774                                       ctxt->nodeMax * 2 *
1775                                       sizeof(ctxt->nodeTab[0]));
1776         if (tmp == NULL) {
1777             xmlErrMemory(ctxt, NULL);
1778             return (-1);
1779         }
1780         ctxt->nodeTab = tmp;
1781 	ctxt->nodeMax *= 2;
1782     }
1783     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1784         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1785 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1786 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1787 			  xmlParserMaxDepth);
1788 	xmlHaltParser(ctxt);
1789 	return(-1);
1790     }
1791     ctxt->nodeTab[ctxt->nodeNr] = value;
1792     ctxt->node = value;
1793     return (ctxt->nodeNr++);
1794 }
1795 
1796 /**
1797  * nodePop:
1798  * @ctxt: an XML parser context
1799  *
1800  * Pops the top element node from the node stack
1801  *
1802  * Returns the node just removed
1803  */
1804 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1805 nodePop(xmlParserCtxtPtr ctxt)
1806 {
1807     xmlNodePtr ret;
1808 
1809     if (ctxt == NULL) return(NULL);
1810     if (ctxt->nodeNr <= 0)
1811         return (NULL);
1812     ctxt->nodeNr--;
1813     if (ctxt->nodeNr > 0)
1814         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1815     else
1816         ctxt->node = NULL;
1817     ret = ctxt->nodeTab[ctxt->nodeNr];
1818     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1819     return (ret);
1820 }
1821 
1822 #ifdef LIBXML_PUSH_ENABLED
1823 /**
1824  * nameNsPush:
1825  * @ctxt:  an XML parser context
1826  * @value:  the element name
1827  * @prefix:  the element prefix
1828  * @URI:  the element namespace name
1829  *
1830  * Pushes a new element name/prefix/URL on top of the name stack
1831  *
1832  * Returns -1 in case of error, the index in the stack otherwise
1833  */
1834 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1835 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1836            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1837 {
1838     if (ctxt->nameNr >= ctxt->nameMax) {
1839         const xmlChar * *tmp;
1840         void **tmp2;
1841         ctxt->nameMax *= 2;
1842         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843                                     ctxt->nameMax *
1844                                     sizeof(ctxt->nameTab[0]));
1845         if (tmp == NULL) {
1846 	    ctxt->nameMax /= 2;
1847 	    goto mem_error;
1848         }
1849 	ctxt->nameTab = tmp;
1850         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1851                                     ctxt->nameMax * 3 *
1852                                     sizeof(ctxt->pushTab[0]));
1853         if (tmp2 == NULL) {
1854 	    ctxt->nameMax /= 2;
1855 	    goto mem_error;
1856         }
1857 	ctxt->pushTab = tmp2;
1858     }
1859     ctxt->nameTab[ctxt->nameNr] = value;
1860     ctxt->name = value;
1861     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1862     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1863     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1864     return (ctxt->nameNr++);
1865 mem_error:
1866     xmlErrMemory(ctxt, NULL);
1867     return (-1);
1868 }
1869 /**
1870  * nameNsPop:
1871  * @ctxt: an XML parser context
1872  *
1873  * Pops the top element/prefix/URI name from the name stack
1874  *
1875  * Returns the name just removed
1876  */
1877 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1878 nameNsPop(xmlParserCtxtPtr ctxt)
1879 {
1880     const xmlChar *ret;
1881 
1882     if (ctxt->nameNr <= 0)
1883         return (NULL);
1884     ctxt->nameNr--;
1885     if (ctxt->nameNr > 0)
1886         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1887     else
1888         ctxt->name = NULL;
1889     ret = ctxt->nameTab[ctxt->nameNr];
1890     ctxt->nameTab[ctxt->nameNr] = NULL;
1891     return (ret);
1892 }
1893 #endif /* LIBXML_PUSH_ENABLED */
1894 
1895 /**
1896  * namePush:
1897  * @ctxt:  an XML parser context
1898  * @value:  the element name
1899  *
1900  * Pushes a new element name on top of the name stack
1901  *
1902  * Returns -1 in case of error, the index in the stack otherwise
1903  */
1904 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1905 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1906 {
1907     if (ctxt == NULL) return (-1);
1908 
1909     if (ctxt->nameNr >= ctxt->nameMax) {
1910         const xmlChar * *tmp;
1911         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1912                                     ctxt->nameMax * 2 *
1913                                     sizeof(ctxt->nameTab[0]));
1914         if (tmp == NULL) {
1915 	    goto mem_error;
1916         }
1917 	ctxt->nameTab = tmp;
1918         ctxt->nameMax *= 2;
1919     }
1920     ctxt->nameTab[ctxt->nameNr] = value;
1921     ctxt->name = value;
1922     return (ctxt->nameNr++);
1923 mem_error:
1924     xmlErrMemory(ctxt, NULL);
1925     return (-1);
1926 }
1927 /**
1928  * namePop:
1929  * @ctxt: an XML parser context
1930  *
1931  * Pops the top element name from the name stack
1932  *
1933  * Returns the name just removed
1934  */
1935 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1936 namePop(xmlParserCtxtPtr ctxt)
1937 {
1938     const xmlChar *ret;
1939 
1940     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1941         return (NULL);
1942     ctxt->nameNr--;
1943     if (ctxt->nameNr > 0)
1944         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945     else
1946         ctxt->name = NULL;
1947     ret = ctxt->nameTab[ctxt->nameNr];
1948     ctxt->nameTab[ctxt->nameNr] = NULL;
1949     return (ret);
1950 }
1951 
spacePush(xmlParserCtxtPtr ctxt,int val)1952 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1953     if (ctxt->spaceNr >= ctxt->spaceMax) {
1954         int *tmp;
1955 
1956 	ctxt->spaceMax *= 2;
1957         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1958 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1959         if (tmp == NULL) {
1960 	    xmlErrMemory(ctxt, NULL);
1961 	    ctxt->spaceMax /=2;
1962 	    return(-1);
1963 	}
1964 	ctxt->spaceTab = tmp;
1965     }
1966     ctxt->spaceTab[ctxt->spaceNr] = val;
1967     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1968     return(ctxt->spaceNr++);
1969 }
1970 
spacePop(xmlParserCtxtPtr ctxt)1971 static int spacePop(xmlParserCtxtPtr ctxt) {
1972     int ret;
1973     if (ctxt->spaceNr <= 0) return(0);
1974     ctxt->spaceNr--;
1975     if (ctxt->spaceNr > 0)
1976 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1977     else
1978         ctxt->space = &ctxt->spaceTab[0];
1979     ret = ctxt->spaceTab[ctxt->spaceNr];
1980     ctxt->spaceTab[ctxt->spaceNr] = -1;
1981     return(ret);
1982 }
1983 
1984 /*
1985  * Macros for accessing the content. Those should be used only by the parser,
1986  * and not exported.
1987  *
1988  * Dirty macros, i.e. one often need to make assumption on the context to
1989  * use them
1990  *
1991  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1992  *           To be used with extreme caution since operations consuming
1993  *           characters may move the input buffer to a different location !
1994  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1995  *           This should be used internally by the parser
1996  *           only to compare to ASCII values otherwise it would break when
1997  *           running with UTF-8 encoding.
1998  *   RAW     same as CUR but in the input buffer, bypass any token
1999  *           extraction that may have been done
2000  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2001  *           to compare on ASCII based substring.
2002  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2003  *           strings without newlines within the parser.
2004  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2005  *           defined char within the parser.
2006  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2007  *
2008  *   NEXT    Skip to the next character, this does the proper decoding
2009  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2010  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2011  *   CUR_CHAR(l) returns the current unicode character (int), set l
2012  *           to the number of xmlChars used for the encoding [0-5].
2013  *   CUR_SCHAR  same but operate on a string instead of the context
2014  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2015  *            the index
2016  *   GROW, SHRINK  handling of input buffers
2017  */
2018 
2019 #define RAW (*ctxt->input->cur)
2020 #define CUR (*ctxt->input->cur)
2021 #define NXT(val) ctxt->input->cur[(val)]
2022 #define CUR_PTR ctxt->input->cur
2023 #define BASE_PTR ctxt->input->base
2024 
2025 #define CMP4( s, c1, c2, c3, c4 ) \
2026   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2027     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2028 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2029   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2030 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2031   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2032 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2033   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2034 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2035   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2036 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2037   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2038     ((unsigned char *) s)[ 8 ] == c9 )
2039 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2040   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2041     ((unsigned char *) s)[ 9 ] == c10 )
2042 
2043 #define SKIP(val) do {							\
2044     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2045     if (*ctxt->input->cur == 0)						\
2046         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2047   } while (0)
2048 
2049 #define SKIPL(val) do {							\
2050     int skipl;								\
2051     for(skipl=0; skipl<val; skipl++) {					\
2052 	if (*(ctxt->input->cur) == '\n') {				\
2053 	ctxt->input->line++; ctxt->input->col = 1;			\
2054 	} else ctxt->input->col++;					\
2055 	ctxt->nbChars++;						\
2056 	ctxt->input->cur++;						\
2057     }									\
2058     if (*ctxt->input->cur == 0)						\
2059         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2060   } while (0)
2061 
2062 #define SHRINK if ((ctxt->progressive == 0) &&				\
2063 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2064 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2065 	xmlSHRINK (ctxt);
2066 
xmlSHRINK(xmlParserCtxtPtr ctxt)2067 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2068     xmlParserInputShrink(ctxt->input);
2069     if (*ctxt->input->cur == 0)
2070         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2071 }
2072 
2073 #define GROW if ((ctxt->progressive == 0) &&				\
2074 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2075 	xmlGROW (ctxt);
2076 
xmlGROW(xmlParserCtxtPtr ctxt)2077 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078     unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079     unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080 
2081     if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082          (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083          ((ctxt->input->buf) &&
2084           (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2085         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2087         xmlHaltParser(ctxt);
2088 	return;
2089     }
2090     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2091     if ((ctxt->input->cur > ctxt->input->end) ||
2092         (ctxt->input->cur < ctxt->input->base)) {
2093         xmlHaltParser(ctxt);
2094         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095 	return;
2096     }
2097     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2098         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2099 }
2100 
2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102 
2103 #define NEXT xmlNextChar(ctxt)
2104 
2105 #define NEXT1 {								\
2106 	ctxt->input->col++;						\
2107 	ctxt->input->cur++;						\
2108 	ctxt->nbChars++;						\
2109 	if (*ctxt->input->cur == 0)					\
2110 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2111     }
2112 
2113 #define NEXTL(l) do {							\
2114     if (*(ctxt->input->cur) == '\n') {					\
2115 	ctxt->input->line++; ctxt->input->col = 1;			\
2116     } else ctxt->input->col++;						\
2117     ctxt->input->cur += l;				\
2118   } while (0)
2119 
2120 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122 
2123 #define COPY_BUF(l,b,i,v)						\
2124     if (l == 1) b[i++] = (xmlChar) v;					\
2125     else i += xmlCopyCharMultiByte(&b[i],v)
2126 
2127 /**
2128  * xmlSkipBlankChars:
2129  * @ctxt:  the XML parser context
2130  *
2131  * skip all blanks character found at that point in the input streams.
2132  * It pops up finished entities in the process if allowable at that point.
2133  *
2134  * Returns the number of space chars skipped
2135  */
2136 
2137 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2138 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2139     int res = 0;
2140 
2141     /*
2142      * It's Okay to use CUR/NEXT here since all the blanks are on
2143      * the ASCII range.
2144      */
2145     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146 	const xmlChar *cur;
2147 	/*
2148 	 * if we are in the document content, go really fast
2149 	 */
2150 	cur = ctxt->input->cur;
2151 	while (IS_BLANK_CH(*cur)) {
2152 	    if (*cur == '\n') {
2153 		ctxt->input->line++; ctxt->input->col = 1;
2154 	    } else {
2155 		ctxt->input->col++;
2156 	    }
2157 	    cur++;
2158 	    res++;
2159 	    if (*cur == 0) {
2160 		ctxt->input->cur = cur;
2161 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2162 		cur = ctxt->input->cur;
2163 	    }
2164 	}
2165 	ctxt->input->cur = cur;
2166     } else {
2167         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168 
2169 	while (1) {
2170             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2171 		NEXT;
2172 	    } else if (CUR == '%') {
2173                 /*
2174                  * Need to handle support of entities branching here
2175                  */
2176 	        if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177                     break;
2178 	        xmlParsePEReference(ctxt);
2179             } else if (CUR == 0) {
2180                 if (ctxt->inputNr <= 1)
2181                     break;
2182                 xmlPopInput(ctxt);
2183             } else {
2184                 break;
2185             }
2186 
2187             /*
2188              * Also increase the counter when entering or exiting a PERef.
2189              * The spec says: "When a parameter-entity reference is recognized
2190              * in the DTD and included, its replacement text MUST be enlarged
2191              * by the attachment of one leading and one following space (#x20)
2192              * character."
2193              */
2194 	    res++;
2195         }
2196     }
2197     return(res);
2198 }
2199 
2200 /************************************************************************
2201  *									*
2202  *		Commodity functions to handle entities			*
2203  *									*
2204  ************************************************************************/
2205 
2206 /**
2207  * xmlPopInput:
2208  * @ctxt:  an XML parser context
2209  *
2210  * xmlPopInput: the current input pointed by ctxt->input came to an end
2211  *          pop it and return the next char.
2212  *
2213  * Returns the current xmlChar in the parser context
2214  */
2215 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2216 xmlPopInput(xmlParserCtxtPtr ctxt) {
2217     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2218     if (xmlParserDebugEntities)
2219 	xmlGenericError(xmlGenericErrorContext,
2220 		"Popping input %d\n", ctxt->inputNr);
2221     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222         (ctxt->instate != XML_PARSER_EOF))
2223         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2224                     "Unfinished entity outside the DTD");
2225     xmlFreeInputStream(inputPop(ctxt));
2226     if (*ctxt->input->cur == 0)
2227         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2228     return(CUR);
2229 }
2230 
2231 /**
2232  * xmlPushInput:
2233  * @ctxt:  an XML parser context
2234  * @input:  an XML parser input fragment (entity, XML fragment ...).
2235  *
2236  * xmlPushInput: switch to a new input stream which is stacked on top
2237  *               of the previous one(s).
2238  * Returns -1 in case of error or the index in the input stack
2239  */
2240 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2241 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2242     int ret;
2243     if (input == NULL) return(-1);
2244 
2245     if (xmlParserDebugEntities) {
2246 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2247 	    xmlGenericError(xmlGenericErrorContext,
2248 		    "%s(%d): ", ctxt->input->filename,
2249 		    ctxt->input->line);
2250 	xmlGenericError(xmlGenericErrorContext,
2251 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252     }
2253     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254         (ctxt->inputNr > 1024)) {
2255         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2256         while (ctxt->inputNr > 1)
2257             xmlFreeInputStream(inputPop(ctxt));
2258 	return(-1);
2259     }
2260     ret = inputPush(ctxt, input);
2261     if (ctxt->instate == XML_PARSER_EOF)
2262         return(-1);
2263     GROW;
2264     return(ret);
2265 }
2266 
2267 /**
2268  * xmlParseCharRef:
2269  * @ctxt:  an XML parser context
2270  *
2271  * parse Reference declarations
2272  *
2273  * [66] CharRef ::= '&#' [0-9]+ ';' |
2274  *                  '&#x' [0-9a-fA-F]+ ';'
2275  *
2276  * [ WFC: Legal Character ]
2277  * Characters referred to using character references must match the
2278  * production for Char.
2279  *
2280  * Returns the value parsed (as an int), 0 in case of error
2281  */
2282 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2283 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2284     unsigned int val = 0;
2285     int count = 0;
2286     unsigned int outofrange = 0;
2287 
2288     /*
2289      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2290      */
2291     if ((RAW == '&') && (NXT(1) == '#') &&
2292         (NXT(2) == 'x')) {
2293 	SKIP(3);
2294 	GROW;
2295 	while (RAW != ';') { /* loop blocked by count */
2296 	    if (count++ > 20) {
2297 		count = 0;
2298 		GROW;
2299                 if (ctxt->instate == XML_PARSER_EOF)
2300                     return(0);
2301 	    }
2302 	    if ((RAW >= '0') && (RAW <= '9'))
2303 	        val = val * 16 + (CUR - '0');
2304 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2305 	        val = val * 16 + (CUR - 'a') + 10;
2306 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2307 	        val = val * 16 + (CUR - 'A') + 10;
2308 	    else {
2309 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2310 		val = 0;
2311 		break;
2312 	    }
2313 	    if (val > 0x10FFFF)
2314 	        outofrange = val;
2315 
2316 	    NEXT;
2317 	    count++;
2318 	}
2319 	if (RAW == ';') {
2320 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2321 	    ctxt->input->col++;
2322 	    ctxt->nbChars ++;
2323 	    ctxt->input->cur++;
2324 	}
2325     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2326 	SKIP(2);
2327 	GROW;
2328 	while (RAW != ';') { /* loop blocked by count */
2329 	    if (count++ > 20) {
2330 		count = 0;
2331 		GROW;
2332                 if (ctxt->instate == XML_PARSER_EOF)
2333                     return(0);
2334 	    }
2335 	    if ((RAW >= '0') && (RAW <= '9'))
2336 	        val = val * 10 + (CUR - '0');
2337 	    else {
2338 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2339 		val = 0;
2340 		break;
2341 	    }
2342 	    if (val > 0x10FFFF)
2343 	        outofrange = val;
2344 
2345 	    NEXT;
2346 	    count++;
2347 	}
2348 	if (RAW == ';') {
2349 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350 	    ctxt->input->col++;
2351 	    ctxt->nbChars ++;
2352 	    ctxt->input->cur++;
2353 	}
2354     } else {
2355         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2356     }
2357 
2358     /*
2359      * [ WFC: Legal Character ]
2360      * Characters referred to using character references must match the
2361      * production for Char.
2362      */
2363     if ((IS_CHAR(val) && (outofrange == 0))) {
2364         return(val);
2365     } else {
2366         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2367                           "xmlParseCharRef: invalid xmlChar value %d\n",
2368 	                  val);
2369     }
2370     return(0);
2371 }
2372 
2373 /**
2374  * xmlParseStringCharRef:
2375  * @ctxt:  an XML parser context
2376  * @str:  a pointer to an index in the string
2377  *
2378  * parse Reference declarations, variant parsing from a string rather
2379  * than an an input flow.
2380  *
2381  * [66] CharRef ::= '&#' [0-9]+ ';' |
2382  *                  '&#x' [0-9a-fA-F]+ ';'
2383  *
2384  * [ WFC: Legal Character ]
2385  * Characters referred to using character references must match the
2386  * production for Char.
2387  *
2388  * Returns the value parsed (as an int), 0 in case of error, str will be
2389  *         updated to the current value of the index
2390  */
2391 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2392 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2393     const xmlChar *ptr;
2394     xmlChar cur;
2395     unsigned int val = 0;
2396     unsigned int outofrange = 0;
2397 
2398     if ((str == NULL) || (*str == NULL)) return(0);
2399     ptr = *str;
2400     cur = *ptr;
2401     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2402 	ptr += 3;
2403 	cur = *ptr;
2404 	while (cur != ';') { /* Non input consuming loop */
2405 	    if ((cur >= '0') && (cur <= '9'))
2406 	        val = val * 16 + (cur - '0');
2407 	    else if ((cur >= 'a') && (cur <= 'f'))
2408 	        val = val * 16 + (cur - 'a') + 10;
2409 	    else if ((cur >= 'A') && (cur <= 'F'))
2410 	        val = val * 16 + (cur - 'A') + 10;
2411 	    else {
2412 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2413 		val = 0;
2414 		break;
2415 	    }
2416 	    if (val > 0x10FFFF)
2417 	        outofrange = val;
2418 
2419 	    ptr++;
2420 	    cur = *ptr;
2421 	}
2422 	if (cur == ';')
2423 	    ptr++;
2424     } else if  ((cur == '&') && (ptr[1] == '#')){
2425 	ptr += 2;
2426 	cur = *ptr;
2427 	while (cur != ';') { /* Non input consuming loops */
2428 	    if ((cur >= '0') && (cur <= '9'))
2429 	        val = val * 10 + (cur - '0');
2430 	    else {
2431 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2432 		val = 0;
2433 		break;
2434 	    }
2435 	    if (val > 0x10FFFF)
2436 	        outofrange = val;
2437 
2438 	    ptr++;
2439 	    cur = *ptr;
2440 	}
2441 	if (cur == ';')
2442 	    ptr++;
2443     } else {
2444 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2445 	return(0);
2446     }
2447     *str = ptr;
2448 
2449     /*
2450      * [ WFC: Legal Character ]
2451      * Characters referred to using character references must match the
2452      * production for Char.
2453      */
2454     if ((IS_CHAR(val) && (outofrange == 0))) {
2455         return(val);
2456     } else {
2457         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2458 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2459 			  val);
2460     }
2461     return(0);
2462 }
2463 
2464 /**
2465  * xmlParserHandlePEReference:
2466  * @ctxt:  the parser context
2467  *
2468  * [69] PEReference ::= '%' Name ';'
2469  *
2470  * [ WFC: No Recursion ]
2471  * A parsed entity must not contain a recursive
2472  * reference to itself, either directly or indirectly.
2473  *
2474  * [ WFC: Entity Declared ]
2475  * In a document without any DTD, a document with only an internal DTD
2476  * subset which contains no parameter entity references, or a document
2477  * with "standalone='yes'", ...  ... The declaration of a parameter
2478  * entity must precede any reference to it...
2479  *
2480  * [ VC: Entity Declared ]
2481  * In a document with an external subset or external parameter entities
2482  * with "standalone='no'", ...  ... The declaration of a parameter entity
2483  * must precede any reference to it...
2484  *
2485  * [ WFC: In DTD ]
2486  * Parameter-entity references may only appear in the DTD.
2487  * NOTE: misleading but this is handled.
2488  *
2489  * A PEReference may have been detected in the current input stream
2490  * the handling is done accordingly to
2491  *      http://www.w3.org/TR/REC-xml#entproc
2492  * i.e.
2493  *   - Included in literal in entity values
2494  *   - Included as Parameter Entity reference within DTDs
2495  */
2496 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2497 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2498     switch(ctxt->instate) {
2499 	case XML_PARSER_CDATA_SECTION:
2500 	    return;
2501         case XML_PARSER_COMMENT:
2502 	    return;
2503 	case XML_PARSER_START_TAG:
2504 	    return;
2505 	case XML_PARSER_END_TAG:
2506 	    return;
2507         case XML_PARSER_EOF:
2508 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2509 	    return;
2510         case XML_PARSER_PROLOG:
2511 	case XML_PARSER_START:
2512 	case XML_PARSER_MISC:
2513 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2514 	    return;
2515 	case XML_PARSER_ENTITY_DECL:
2516         case XML_PARSER_CONTENT:
2517         case XML_PARSER_ATTRIBUTE_VALUE:
2518         case XML_PARSER_PI:
2519 	case XML_PARSER_SYSTEM_LITERAL:
2520 	case XML_PARSER_PUBLIC_LITERAL:
2521 	    /* we just ignore it there */
2522 	    return;
2523         case XML_PARSER_EPILOG:
2524 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2525 	    return;
2526 	case XML_PARSER_ENTITY_VALUE:
2527 	    /*
2528 	     * NOTE: in the case of entity values, we don't do the
2529 	     *       substitution here since we need the literal
2530 	     *       entity value to be able to save the internal
2531 	     *       subset of the document.
2532 	     *       This will be handled by xmlStringDecodeEntities
2533 	     */
2534 	    return;
2535         case XML_PARSER_DTD:
2536 	    /*
2537 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2538 	     * In the internal DTD subset, parameter-entity references
2539 	     * can occur only where markup declarations can occur, not
2540 	     * within markup declarations.
2541 	     * In that case this is handled in xmlParseMarkupDecl
2542 	     */
2543 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2544 		return;
2545 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2546 		return;
2547             break;
2548         case XML_PARSER_IGNORE:
2549             return;
2550     }
2551 
2552     xmlParsePEReference(ctxt);
2553 }
2554 
2555 /*
2556  * Macro used to grow the current buffer.
2557  * buffer##_size is expected to be a size_t
2558  * mem_error: is expected to handle memory allocation failures
2559  */
2560 #define growBuffer(buffer, n) {						\
2561     xmlChar *tmp;							\
2562     size_t new_size = buffer##_size * 2 + n;                            \
2563     if (new_size < buffer##_size) goto mem_error;                       \
2564     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2565     if (tmp == NULL) goto mem_error;					\
2566     buffer = tmp;							\
2567     buffer##_size = new_size;                                           \
2568 }
2569 
2570 /**
2571  * xmlStringLenDecodeEntities:
2572  * @ctxt:  the parser context
2573  * @str:  the input string
2574  * @len: the string length
2575  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2576  * @end:  an end marker xmlChar, 0 if none
2577  * @end2:  an end marker xmlChar, 0 if none
2578  * @end3:  an end marker xmlChar, 0 if none
2579  *
2580  * Takes a entity string content and process to do the adequate substitutions.
2581  *
2582  * [67] Reference ::= EntityRef | CharRef
2583  *
2584  * [69] PEReference ::= '%' Name ';'
2585  *
2586  * Returns A newly allocated string with the substitution done. The caller
2587  *      must deallocate it !
2588  */
2589 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2590 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2591 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2592     xmlChar *buffer = NULL;
2593     size_t buffer_size = 0;
2594     size_t nbchars = 0;
2595 
2596     xmlChar *current = NULL;
2597     xmlChar *rep = NULL;
2598     const xmlChar *last;
2599     xmlEntityPtr ent;
2600     int c,l;
2601 
2602     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2603 	return(NULL);
2604     last = str + len;
2605 
2606     if (((ctxt->depth > 40) &&
2607          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2608 	(ctxt->depth > 1024)) {
2609 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2610 	return(NULL);
2611     }
2612 
2613     /*
2614      * allocate a translation buffer.
2615      */
2616     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2617     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2618     if (buffer == NULL) goto mem_error;
2619 
2620     /*
2621      * OK loop until we reach one of the ending char or a size limit.
2622      * we are operating on already parsed values.
2623      */
2624     if (str < last)
2625 	c = CUR_SCHAR(str, l);
2626     else
2627         c = 0;
2628     while ((c != 0) && (c != end) && /* non input consuming loop */
2629 	   (c != end2) && (c != end3)) {
2630 
2631 	if (c == 0) break;
2632         if ((c == '&') && (str[1] == '#')) {
2633 	    int val = xmlParseStringCharRef(ctxt, &str);
2634 	    if (val == 0)
2635                 goto int_error;
2636 	    COPY_BUF(0,buffer,nbchars,val);
2637 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2638 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2639 	    }
2640 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2641 	    if (xmlParserDebugEntities)
2642 		xmlGenericError(xmlGenericErrorContext,
2643 			"String decoding Entity Reference: %.30s\n",
2644 			str);
2645 	    ent = xmlParseStringEntityRef(ctxt, &str);
2646 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2647 	    if (ent != NULL)
2648 	        ctxt->nbentities += ent->checked / 2;
2649 	    if ((ent != NULL) &&
2650 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2651 		if (ent->content != NULL) {
2652 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2653 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2654 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2655 		    }
2656 		} else {
2657 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658 			    "predefined entity has no content\n");
2659                     goto int_error;
2660 		}
2661 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2662 		ctxt->depth++;
2663 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2664 			                      0, 0, 0);
2665 		ctxt->depth--;
2666 		if (rep == NULL)
2667                     goto int_error;
2668 
2669                 current = rep;
2670                 while (*current != 0) { /* non input consuming loop */
2671                     buffer[nbchars++] = *current++;
2672                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2673                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2674                             goto int_error;
2675                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2676                     }
2677                 }
2678                 xmlFree(rep);
2679                 rep = NULL;
2680 	    } else if (ent != NULL) {
2681 		int i = xmlStrlen(ent->name);
2682 		const xmlChar *cur = ent->name;
2683 
2684 		buffer[nbchars++] = '&';
2685 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2686 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2687 		}
2688 		for (;i > 0;i--)
2689 		    buffer[nbchars++] = *cur++;
2690 		buffer[nbchars++] = ';';
2691 	    }
2692 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2693 	    if (xmlParserDebugEntities)
2694 		xmlGenericError(xmlGenericErrorContext,
2695 			"String decoding PE Reference: %.30s\n", str);
2696 	    ent = xmlParseStringPEReference(ctxt, &str);
2697 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2698 	    if (ent != NULL)
2699 	        ctxt->nbentities += ent->checked / 2;
2700 	    if (ent != NULL) {
2701                 if (ent->content == NULL) {
2702 		    /*
2703 		     * Note: external parsed entities will not be loaded,
2704 		     * it is not required for a non-validating parser to
2705 		     * complete external PEreferences coming from the
2706 		     * internal subset
2707 		     */
2708 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2709 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2710 			(ctxt->validate != 0)) {
2711 			xmlLoadEntityContent(ctxt, ent);
2712 		    } else {
2713 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2714 		  "not validating will not read content for PE entity %s\n",
2715 		                      ent->name, NULL);
2716 		    }
2717 		}
2718 		ctxt->depth++;
2719 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2720 			                      0, 0, 0);
2721 		ctxt->depth--;
2722 		if (rep == NULL)
2723                     goto int_error;
2724                 current = rep;
2725                 while (*current != 0) { /* non input consuming loop */
2726                     buffer[nbchars++] = *current++;
2727                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2729                             goto int_error;
2730                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731                     }
2732                 }
2733                 xmlFree(rep);
2734                 rep = NULL;
2735 	    }
2736 	} else {
2737 	    COPY_BUF(l,buffer,nbchars,c);
2738 	    str += l;
2739 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2741 	    }
2742 	}
2743 	if (str < last)
2744 	    c = CUR_SCHAR(str, l);
2745 	else
2746 	    c = 0;
2747     }
2748     buffer[nbchars] = 0;
2749     return(buffer);
2750 
2751 mem_error:
2752     xmlErrMemory(ctxt, NULL);
2753 int_error:
2754     if (rep != NULL)
2755         xmlFree(rep);
2756     if (buffer != NULL)
2757         xmlFree(buffer);
2758     return(NULL);
2759 }
2760 
2761 /**
2762  * xmlStringDecodeEntities:
2763  * @ctxt:  the parser context
2764  * @str:  the input string
2765  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2766  * @end:  an end marker xmlChar, 0 if none
2767  * @end2:  an end marker xmlChar, 0 if none
2768  * @end3:  an end marker xmlChar, 0 if none
2769  *
2770  * Takes a entity string content and process to do the adequate substitutions.
2771  *
2772  * [67] Reference ::= EntityRef | CharRef
2773  *
2774  * [69] PEReference ::= '%' Name ';'
2775  *
2776  * Returns A newly allocated string with the substitution done. The caller
2777  *      must deallocate it !
2778  */
2779 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2780 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2781 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2782     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2783     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2784            end, end2, end3));
2785 }
2786 
2787 /************************************************************************
2788  *									*
2789  *		Commodity functions, cleanup needed ?			*
2790  *									*
2791  ************************************************************************/
2792 
2793 /**
2794  * areBlanks:
2795  * @ctxt:  an XML parser context
2796  * @str:  a xmlChar *
2797  * @len:  the size of @str
2798  * @blank_chars: we know the chars are blanks
2799  *
2800  * Is this a sequence of blank chars that one can ignore ?
2801  *
2802  * Returns 1 if ignorable 0 otherwise.
2803  */
2804 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2805 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806                      int blank_chars) {
2807     int i, ret;
2808     xmlNodePtr lastChild;
2809 
2810     /*
2811      * Don't spend time trying to differentiate them, the same callback is
2812      * used !
2813      */
2814     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2815 	return(0);
2816 
2817     /*
2818      * Check for xml:space value.
2819      */
2820     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2821         (*(ctxt->space) == -2))
2822 	return(0);
2823 
2824     /*
2825      * Check that the string is made of blanks
2826      */
2827     if (blank_chars == 0) {
2828 	for (i = 0;i < len;i++)
2829 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2830     }
2831 
2832     /*
2833      * Look if the element is mixed content in the DTD if available
2834      */
2835     if (ctxt->node == NULL) return(0);
2836     if (ctxt->myDoc != NULL) {
2837 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838         if (ret == 0) return(1);
2839         if (ret == 1) return(0);
2840     }
2841 
2842     /*
2843      * Otherwise, heuristic :-\
2844      */
2845     if ((RAW != '<') && (RAW != 0xD)) return(0);
2846     if ((ctxt->node->children == NULL) &&
2847 	(RAW == '<') && (NXT(1) == '/')) return(0);
2848 
2849     lastChild = xmlGetLastChild(ctxt->node);
2850     if (lastChild == NULL) {
2851         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2852             (ctxt->node->content != NULL)) return(0);
2853     } else if (xmlNodeIsText(lastChild))
2854         return(0);
2855     else if ((ctxt->node->children != NULL) &&
2856              (xmlNodeIsText(ctxt->node->children)))
2857         return(0);
2858     return(1);
2859 }
2860 
2861 /************************************************************************
2862  *									*
2863  *		Extra stuff for namespace support			*
2864  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2865  *									*
2866  ************************************************************************/
2867 
2868 /**
2869  * xmlSplitQName:
2870  * @ctxt:  an XML parser context
2871  * @name:  an XML parser context
2872  * @prefix:  a xmlChar **
2873  *
2874  * parse an UTF8 encoded XML qualified name string
2875  *
2876  * [NS 5] QName ::= (Prefix ':')? LocalPart
2877  *
2878  * [NS 6] Prefix ::= NCName
2879  *
2880  * [NS 7] LocalPart ::= NCName
2881  *
2882  * Returns the local part, and prefix is updated
2883  *   to get the Prefix if any.
2884  */
2885 
2886 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2887 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2888     xmlChar buf[XML_MAX_NAMELEN + 5];
2889     xmlChar *buffer = NULL;
2890     int len = 0;
2891     int max = XML_MAX_NAMELEN;
2892     xmlChar *ret = NULL;
2893     const xmlChar *cur = name;
2894     int c;
2895 
2896     if (prefix == NULL) return(NULL);
2897     *prefix = NULL;
2898 
2899     if (cur == NULL) return(NULL);
2900 
2901 #ifndef XML_XML_NAMESPACE
2902     /* xml: prefix is not really a namespace */
2903     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2904         (cur[2] == 'l') && (cur[3] == ':'))
2905 	return(xmlStrdup(name));
2906 #endif
2907 
2908     /* nasty but well=formed */
2909     if (cur[0] == ':')
2910 	return(xmlStrdup(name));
2911 
2912     c = *cur++;
2913     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2914 	buf[len++] = c;
2915 	c = *cur++;
2916     }
2917     if (len >= max) {
2918 	/*
2919 	 * Okay someone managed to make a huge name, so he's ready to pay
2920 	 * for the processing speed.
2921 	 */
2922 	max = len * 2;
2923 
2924 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2925 	if (buffer == NULL) {
2926 	    xmlErrMemory(ctxt, NULL);
2927 	    return(NULL);
2928 	}
2929 	memcpy(buffer, buf, len);
2930 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2931 	    if (len + 10 > max) {
2932 	        xmlChar *tmp;
2933 
2934 		max *= 2;
2935 		tmp = (xmlChar *) xmlRealloc(buffer,
2936 						max * sizeof(xmlChar));
2937 		if (tmp == NULL) {
2938 		    xmlFree(buffer);
2939 		    xmlErrMemory(ctxt, NULL);
2940 		    return(NULL);
2941 		}
2942 		buffer = tmp;
2943 	    }
2944 	    buffer[len++] = c;
2945 	    c = *cur++;
2946 	}
2947 	buffer[len] = 0;
2948     }
2949 
2950     if ((c == ':') && (*cur == 0)) {
2951         if (buffer != NULL)
2952 	    xmlFree(buffer);
2953 	*prefix = NULL;
2954 	return(xmlStrdup(name));
2955     }
2956 
2957     if (buffer == NULL)
2958 	ret = xmlStrndup(buf, len);
2959     else {
2960 	ret = buffer;
2961 	buffer = NULL;
2962 	max = XML_MAX_NAMELEN;
2963     }
2964 
2965 
2966     if (c == ':') {
2967 	c = *cur;
2968         *prefix = ret;
2969 	if (c == 0) {
2970 	    return(xmlStrndup(BAD_CAST "", 0));
2971 	}
2972 	len = 0;
2973 
2974 	/*
2975 	 * Check that the first character is proper to start
2976 	 * a new name
2977 	 */
2978 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
2979 	      ((c >= 0x41) && (c <= 0x5A)) ||
2980 	      (c == '_') || (c == ':'))) {
2981 	    int l;
2982 	    int first = CUR_SCHAR(cur, l);
2983 
2984 	    if (!IS_LETTER(first) && (first != '_')) {
2985 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2986 			    "Name %s is not XML Namespace compliant\n",
2987 				  name);
2988 	    }
2989 	}
2990 	cur++;
2991 
2992 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2993 	    buf[len++] = c;
2994 	    c = *cur++;
2995 	}
2996 	if (len >= max) {
2997 	    /*
2998 	     * Okay someone managed to make a huge name, so he's ready to pay
2999 	     * for the processing speed.
3000 	     */
3001 	    max = len * 2;
3002 
3003 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004 	    if (buffer == NULL) {
3005 	        xmlErrMemory(ctxt, NULL);
3006 		return(NULL);
3007 	    }
3008 	    memcpy(buffer, buf, len);
3009 	    while (c != 0) { /* tested bigname2.xml */
3010 		if (len + 10 > max) {
3011 		    xmlChar *tmp;
3012 
3013 		    max *= 2;
3014 		    tmp = (xmlChar *) xmlRealloc(buffer,
3015 						    max * sizeof(xmlChar));
3016 		    if (tmp == NULL) {
3017 			xmlErrMemory(ctxt, NULL);
3018 			xmlFree(buffer);
3019 			return(NULL);
3020 		    }
3021 		    buffer = tmp;
3022 		}
3023 		buffer[len++] = c;
3024 		c = *cur++;
3025 	    }
3026 	    buffer[len] = 0;
3027 	}
3028 
3029 	if (buffer == NULL)
3030 	    ret = xmlStrndup(buf, len);
3031 	else {
3032 	    ret = buffer;
3033 	}
3034     }
3035 
3036     return(ret);
3037 }
3038 
3039 /************************************************************************
3040  *									*
3041  *			The parser itself				*
3042  *	Relates to http://www.w3.org/TR/REC-xml				*
3043  *									*
3044  ************************************************************************/
3045 
3046 /************************************************************************
3047  *									*
3048  *	Routines to parse Name, NCName and NmToken			*
3049  *									*
3050  ************************************************************************/
3051 #ifdef DEBUG
3052 static unsigned long nbParseName = 0;
3053 static unsigned long nbParseNmToken = 0;
3054 static unsigned long nbParseNCName = 0;
3055 static unsigned long nbParseNCNameComplex = 0;
3056 static unsigned long nbParseNameComplex = 0;
3057 static unsigned long nbParseStringName = 0;
3058 #endif
3059 
3060 /*
3061  * The two following functions are related to the change of accepted
3062  * characters for Name and NmToken in the Revision 5 of XML-1.0
3063  * They correspond to the modified production [4] and the new production [4a]
3064  * changes in that revision. Also note that the macros used for the
3065  * productions Letter, Digit, CombiningChar and Extender are not needed
3066  * anymore.
3067  * We still keep compatibility to pre-revision5 parsing semantic if the
3068  * new XML_PARSE_OLD10 option is given to the parser.
3069  */
3070 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3071 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3072     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3073         /*
3074 	 * Use the new checks of production [4] [4a] amd [5] of the
3075 	 * Update 5 of XML-1.0
3076 	 */
3077 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3078 	    (((c >= 'a') && (c <= 'z')) ||
3079 	     ((c >= 'A') && (c <= 'Z')) ||
3080 	     (c == '_') || (c == ':') ||
3081 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3082 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3083 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3084 	     ((c >= 0x370) && (c <= 0x37D)) ||
3085 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3086 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3087 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3088 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3089 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3090 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3091 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3092 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3093 	    return(1);
3094     } else {
3095         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3096 	    return(1);
3097     }
3098     return(0);
3099 }
3100 
3101 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3102 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3103     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3104         /*
3105 	 * Use the new checks of production [4] [4a] amd [5] of the
3106 	 * Update 5 of XML-1.0
3107 	 */
3108 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3109 	    (((c >= 'a') && (c <= 'z')) ||
3110 	     ((c >= 'A') && (c <= 'Z')) ||
3111 	     ((c >= '0') && (c <= '9')) || /* !start */
3112 	     (c == '_') || (c == ':') ||
3113 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3114 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3115 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3116 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3117 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3118 	     ((c >= 0x370) && (c <= 0x37D)) ||
3119 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3120 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3121 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3122 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3123 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3124 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3125 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3126 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3127 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3128 	     return(1);
3129     } else {
3130         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131             (c == '.') || (c == '-') ||
3132 	    (c == '_') || (c == ':') ||
3133 	    (IS_COMBINING(c)) ||
3134 	    (IS_EXTENDER(c)))
3135 	    return(1);
3136     }
3137     return(0);
3138 }
3139 
3140 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3141                                           int *len, int *alloc, int normalize);
3142 
3143 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3144 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3145     int len = 0, l;
3146     int c;
3147     int count = 0;
3148 
3149 #ifdef DEBUG
3150     nbParseNameComplex++;
3151 #endif
3152 
3153     /*
3154      * Handler for more complex cases
3155      */
3156     GROW;
3157     if (ctxt->instate == XML_PARSER_EOF)
3158         return(NULL);
3159     c = CUR_CHAR(l);
3160     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161         /*
3162 	 * Use the new checks of production [4] [4a] amd [5] of the
3163 	 * Update 5 of XML-1.0
3164 	 */
3165 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3166 	    (!(((c >= 'a') && (c <= 'z')) ||
3167 	       ((c >= 'A') && (c <= 'Z')) ||
3168 	       (c == '_') || (c == ':') ||
3169 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3170 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3171 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3172 	       ((c >= 0x370) && (c <= 0x37D)) ||
3173 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3174 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3175 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3176 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3177 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3178 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3179 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3180 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3181 	    return(NULL);
3182 	}
3183 	len += l;
3184 	NEXTL(l);
3185 	c = CUR_CHAR(l);
3186 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3187 	       (((c >= 'a') && (c <= 'z')) ||
3188 	        ((c >= 'A') && (c <= 'Z')) ||
3189 	        ((c >= '0') && (c <= '9')) || /* !start */
3190 	        (c == '_') || (c == ':') ||
3191 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3192 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3193 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3194 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3195 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3196 	        ((c >= 0x370) && (c <= 0x37D)) ||
3197 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3198 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3199 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3200 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3201 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3206 		)) {
3207 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3208 		count = 0;
3209 		GROW;
3210                 if (ctxt->instate == XML_PARSER_EOF)
3211                     return(NULL);
3212 	    }
3213 	    len += l;
3214 	    NEXTL(l);
3215 	    c = CUR_CHAR(l);
3216 	}
3217     } else {
3218 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3219 	    (!IS_LETTER(c) && (c != '_') &&
3220 	     (c != ':'))) {
3221 	    return(NULL);
3222 	}
3223 	len += l;
3224 	NEXTL(l);
3225 	c = CUR_CHAR(l);
3226 
3227 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3228 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3229 		(c == '.') || (c == '-') ||
3230 		(c == '_') || (c == ':') ||
3231 		(IS_COMBINING(c)) ||
3232 		(IS_EXTENDER(c)))) {
3233 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3234 		count = 0;
3235 		GROW;
3236                 if (ctxt->instate == XML_PARSER_EOF)
3237                     return(NULL);
3238 	    }
3239 	    len += l;
3240 	    NEXTL(l);
3241 	    c = CUR_CHAR(l);
3242 	}
3243     }
3244     if ((len > XML_MAX_NAME_LENGTH) &&
3245         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3246         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3247         return(NULL);
3248     }
3249     if (ctxt->input->cur - ctxt->input->base < len) {
3250         /*
3251          * There were a couple of bugs where PERefs lead to to a change
3252          * of the buffer. Check the buffer size to avoid passing an invalid
3253          * pointer to xmlDictLookup.
3254          */
3255         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3256                     "unexpected change of input buffer");
3257         return (NULL);
3258     }
3259     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3260         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3261     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3262 }
3263 
3264 /**
3265  * xmlParseName:
3266  * @ctxt:  an XML parser context
3267  *
3268  * parse an XML name.
3269  *
3270  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3271  *                  CombiningChar | Extender
3272  *
3273  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3274  *
3275  * [6] Names ::= Name (#x20 Name)*
3276  *
3277  * Returns the Name parsed or NULL
3278  */
3279 
3280 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3281 xmlParseName(xmlParserCtxtPtr ctxt) {
3282     const xmlChar *in;
3283     const xmlChar *ret;
3284     int count = 0;
3285 
3286     GROW;
3287 
3288 #ifdef DEBUG
3289     nbParseName++;
3290 #endif
3291 
3292     /*
3293      * Accelerator for simple ASCII names
3294      */
3295     in = ctxt->input->cur;
3296     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3297 	((*in >= 0x41) && (*in <= 0x5A)) ||
3298 	(*in == '_') || (*in == ':')) {
3299 	in++;
3300 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3301 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3302 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3303 	       (*in == '_') || (*in == '-') ||
3304 	       (*in == ':') || (*in == '.'))
3305 	    in++;
3306 	if ((*in > 0) && (*in < 0x80)) {
3307 	    count = in - ctxt->input->cur;
3308             if ((count > XML_MAX_NAME_LENGTH) &&
3309                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3310                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3311                 return(NULL);
3312             }
3313 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3314 	    ctxt->input->cur = in;
3315 	    ctxt->nbChars += count;
3316 	    ctxt->input->col += count;
3317 	    if (ret == NULL)
3318 	        xmlErrMemory(ctxt, NULL);
3319 	    return(ret);
3320 	}
3321     }
3322     /* accelerator for special cases */
3323     return(xmlParseNameComplex(ctxt));
3324 }
3325 
3326 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3327 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3328     int len = 0, l;
3329     int c;
3330     int count = 0;
3331     size_t startPosition = 0;
3332 
3333 #ifdef DEBUG
3334     nbParseNCNameComplex++;
3335 #endif
3336 
3337     /*
3338      * Handler for more complex cases
3339      */
3340     GROW;
3341     startPosition = CUR_PTR - BASE_PTR;
3342     c = CUR_CHAR(l);
3343     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3344 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3345 	return(NULL);
3346     }
3347 
3348     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3349 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3350 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3351             if ((len > XML_MAX_NAME_LENGTH) &&
3352                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3353                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354                 return(NULL);
3355             }
3356 	    count = 0;
3357 	    GROW;
3358             if (ctxt->instate == XML_PARSER_EOF)
3359                 return(NULL);
3360 	}
3361 	len += l;
3362 	NEXTL(l);
3363 	c = CUR_CHAR(l);
3364 	if (c == 0) {
3365 	    count = 0;
3366 	    /*
3367 	     * when shrinking to extend the buffer we really need to preserve
3368 	     * the part of the name we already parsed. Hence rolling back
3369 	     * by current lenght.
3370 	     */
3371 	    ctxt->input->cur -= l;
3372 	    GROW;
3373             if (ctxt->instate == XML_PARSER_EOF)
3374                 return(NULL);
3375 	    ctxt->input->cur += l;
3376 	    c = CUR_CHAR(l);
3377 	}
3378     }
3379     if ((len > XML_MAX_NAME_LENGTH) &&
3380         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3381         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3382         return(NULL);
3383     }
3384     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3385 }
3386 
3387 /**
3388  * xmlParseNCName:
3389  * @ctxt:  an XML parser context
3390  * @len:  length of the string parsed
3391  *
3392  * parse an XML name.
3393  *
3394  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3395  *                      CombiningChar | Extender
3396  *
3397  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3398  *
3399  * Returns the Name parsed or NULL
3400  */
3401 
3402 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3403 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3404     const xmlChar *in, *e;
3405     const xmlChar *ret;
3406     int count = 0;
3407 
3408 #ifdef DEBUG
3409     nbParseNCName++;
3410 #endif
3411 
3412     /*
3413      * Accelerator for simple ASCII names
3414      */
3415     in = ctxt->input->cur;
3416     e = ctxt->input->end;
3417     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3418 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3419 	 (*in == '_')) && (in < e)) {
3420 	in++;
3421 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3422 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3423 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3424 	        (*in == '_') || (*in == '-') ||
3425 	        (*in == '.')) && (in < e))
3426 	    in++;
3427 	if (in >= e)
3428 	    goto complex;
3429 	if ((*in > 0) && (*in < 0x80)) {
3430 	    count = in - ctxt->input->cur;
3431             if ((count > XML_MAX_NAME_LENGTH) &&
3432                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3433                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3434                 return(NULL);
3435             }
3436 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3437 	    ctxt->input->cur = in;
3438 	    ctxt->nbChars += count;
3439 	    ctxt->input->col += count;
3440 	    if (ret == NULL) {
3441 	        xmlErrMemory(ctxt, NULL);
3442 	    }
3443 	    return(ret);
3444 	}
3445     }
3446 complex:
3447     return(xmlParseNCNameComplex(ctxt));
3448 }
3449 
3450 /**
3451  * xmlParseNameAndCompare:
3452  * @ctxt:  an XML parser context
3453  *
3454  * parse an XML name and compares for match
3455  * (specialized for endtag parsing)
3456  *
3457  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3458  * and the name for mismatch
3459  */
3460 
3461 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3462 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3463     register const xmlChar *cmp = other;
3464     register const xmlChar *in;
3465     const xmlChar *ret;
3466 
3467     GROW;
3468     if (ctxt->instate == XML_PARSER_EOF)
3469         return(NULL);
3470 
3471     in = ctxt->input->cur;
3472     while (*in != 0 && *in == *cmp) {
3473 	++in;
3474 	++cmp;
3475 	ctxt->input->col++;
3476     }
3477     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3478 	/* success */
3479 	ctxt->input->cur = in;
3480 	return (const xmlChar*) 1;
3481     }
3482     /* failure (or end of input buffer), check with full function */
3483     ret = xmlParseName (ctxt);
3484     /* strings coming from the dictionary direct compare possible */
3485     if (ret == other) {
3486 	return (const xmlChar*) 1;
3487     }
3488     return ret;
3489 }
3490 
3491 /**
3492  * xmlParseStringName:
3493  * @ctxt:  an XML parser context
3494  * @str:  a pointer to the string pointer (IN/OUT)
3495  *
3496  * parse an XML name.
3497  *
3498  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3499  *                  CombiningChar | Extender
3500  *
3501  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3502  *
3503  * [6] Names ::= Name (#x20 Name)*
3504  *
3505  * Returns the Name parsed or NULL. The @str pointer
3506  * is updated to the current location in the string.
3507  */
3508 
3509 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3510 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3511     xmlChar buf[XML_MAX_NAMELEN + 5];
3512     const xmlChar *cur = *str;
3513     int len = 0, l;
3514     int c;
3515 
3516 #ifdef DEBUG
3517     nbParseStringName++;
3518 #endif
3519 
3520     c = CUR_SCHAR(cur, l);
3521     if (!xmlIsNameStartChar(ctxt, c)) {
3522 	return(NULL);
3523     }
3524 
3525     COPY_BUF(l,buf,len,c);
3526     cur += l;
3527     c = CUR_SCHAR(cur, l);
3528     while (xmlIsNameChar(ctxt, c)) {
3529 	COPY_BUF(l,buf,len,c);
3530 	cur += l;
3531 	c = CUR_SCHAR(cur, l);
3532 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3533 	    /*
3534 	     * Okay someone managed to make a huge name, so he's ready to pay
3535 	     * for the processing speed.
3536 	     */
3537 	    xmlChar *buffer;
3538 	    int max = len * 2;
3539 
3540 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3541 	    if (buffer == NULL) {
3542 	        xmlErrMemory(ctxt, NULL);
3543 		return(NULL);
3544 	    }
3545 	    memcpy(buffer, buf, len);
3546 	    while (xmlIsNameChar(ctxt, c)) {
3547 		if (len + 10 > max) {
3548 		    xmlChar *tmp;
3549 
3550                     if ((len > XML_MAX_NAME_LENGTH) &&
3551                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 			xmlFree(buffer);
3554                         return(NULL);
3555                     }
3556 		    max *= 2;
3557 		    tmp = (xmlChar *) xmlRealloc(buffer,
3558 			                            max * sizeof(xmlChar));
3559 		    if (tmp == NULL) {
3560 			xmlErrMemory(ctxt, NULL);
3561 			xmlFree(buffer);
3562 			return(NULL);
3563 		    }
3564 		    buffer = tmp;
3565 		}
3566 		COPY_BUF(l,buffer,len,c);
3567 		cur += l;
3568 		c = CUR_SCHAR(cur, l);
3569 	    }
3570 	    buffer[len] = 0;
3571 	    *str = cur;
3572 	    return(buffer);
3573 	}
3574     }
3575     if ((len > XML_MAX_NAME_LENGTH) &&
3576         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578         return(NULL);
3579     }
3580     *str = cur;
3581     return(xmlStrndup(buf, len));
3582 }
3583 
3584 /**
3585  * xmlParseNmtoken:
3586  * @ctxt:  an XML parser context
3587  *
3588  * parse an XML Nmtoken.
3589  *
3590  * [7] Nmtoken ::= (NameChar)+
3591  *
3592  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3593  *
3594  * Returns the Nmtoken parsed or NULL
3595  */
3596 
3597 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3598 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3599     xmlChar buf[XML_MAX_NAMELEN + 5];
3600     int len = 0, l;
3601     int c;
3602     int count = 0;
3603 
3604 #ifdef DEBUG
3605     nbParseNmToken++;
3606 #endif
3607 
3608     GROW;
3609     if (ctxt->instate == XML_PARSER_EOF)
3610         return(NULL);
3611     c = CUR_CHAR(l);
3612 
3613     while (xmlIsNameChar(ctxt, c)) {
3614 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3615 	    count = 0;
3616 	    GROW;
3617 	}
3618 	COPY_BUF(l,buf,len,c);
3619 	NEXTL(l);
3620 	c = CUR_CHAR(l);
3621 	if (c == 0) {
3622 	    count = 0;
3623 	    GROW;
3624 	    if (ctxt->instate == XML_PARSER_EOF)
3625 		return(NULL);
3626             c = CUR_CHAR(l);
3627 	}
3628 	if (len >= XML_MAX_NAMELEN) {
3629 	    /*
3630 	     * Okay someone managed to make a huge token, so he's ready to pay
3631 	     * for the processing speed.
3632 	     */
3633 	    xmlChar *buffer;
3634 	    int max = len * 2;
3635 
3636 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3637 	    if (buffer == NULL) {
3638 	        xmlErrMemory(ctxt, NULL);
3639 		return(NULL);
3640 	    }
3641 	    memcpy(buffer, buf, len);
3642 	    while (xmlIsNameChar(ctxt, c)) {
3643 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3644 		    count = 0;
3645 		    GROW;
3646                     if (ctxt->instate == XML_PARSER_EOF) {
3647                         xmlFree(buffer);
3648                         return(NULL);
3649                     }
3650 		}
3651 		if (len + 10 > max) {
3652 		    xmlChar *tmp;
3653 
3654                     if ((max > XML_MAX_NAME_LENGTH) &&
3655                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3656                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3657                         xmlFree(buffer);
3658                         return(NULL);
3659                     }
3660 		    max *= 2;
3661 		    tmp = (xmlChar *) xmlRealloc(buffer,
3662 			                            max * sizeof(xmlChar));
3663 		    if (tmp == NULL) {
3664 			xmlErrMemory(ctxt, NULL);
3665 			xmlFree(buffer);
3666 			return(NULL);
3667 		    }
3668 		    buffer = tmp;
3669 		}
3670 		COPY_BUF(l,buffer,len,c);
3671 		NEXTL(l);
3672 		c = CUR_CHAR(l);
3673 	    }
3674 	    buffer[len] = 0;
3675 	    return(buffer);
3676 	}
3677     }
3678     if (len == 0)
3679         return(NULL);
3680     if ((len > XML_MAX_NAME_LENGTH) &&
3681         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3682         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3683         return(NULL);
3684     }
3685     return(xmlStrndup(buf, len));
3686 }
3687 
3688 /**
3689  * xmlParseEntityValue:
3690  * @ctxt:  an XML parser context
3691  * @orig:  if non-NULL store a copy of the original entity value
3692  *
3693  * parse a value for ENTITY declarations
3694  *
3695  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3696  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3697  *
3698  * Returns the EntityValue parsed with reference substituted or NULL
3699  */
3700 
3701 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3702 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3703     xmlChar *buf = NULL;
3704     int len = 0;
3705     int size = XML_PARSER_BUFFER_SIZE;
3706     int c, l;
3707     xmlChar stop;
3708     xmlChar *ret = NULL;
3709     const xmlChar *cur = NULL;
3710     xmlParserInputPtr input;
3711 
3712     if (RAW == '"') stop = '"';
3713     else if (RAW == '\'') stop = '\'';
3714     else {
3715 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3716 	return(NULL);
3717     }
3718     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3719     if (buf == NULL) {
3720 	xmlErrMemory(ctxt, NULL);
3721 	return(NULL);
3722     }
3723 
3724     /*
3725      * The content of the entity definition is copied in a buffer.
3726      */
3727 
3728     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3729     input = ctxt->input;
3730     GROW;
3731     if (ctxt->instate == XML_PARSER_EOF)
3732         goto error;
3733     NEXT;
3734     c = CUR_CHAR(l);
3735     /*
3736      * NOTE: 4.4.5 Included in Literal
3737      * When a parameter entity reference appears in a literal entity
3738      * value, ... a single or double quote character in the replacement
3739      * text is always treated as a normal data character and will not
3740      * terminate the literal.
3741      * In practice it means we stop the loop only when back at parsing
3742      * the initial entity and the quote is found
3743      */
3744     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3745 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3746 	if (len + 5 >= size) {
3747 	    xmlChar *tmp;
3748 
3749 	    size *= 2;
3750 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3751 	    if (tmp == NULL) {
3752 		xmlErrMemory(ctxt, NULL);
3753                 goto error;
3754 	    }
3755 	    buf = tmp;
3756 	}
3757 	COPY_BUF(l,buf,len,c);
3758 	NEXTL(l);
3759 
3760 	GROW;
3761 	c = CUR_CHAR(l);
3762 	if (c == 0) {
3763 	    GROW;
3764 	    c = CUR_CHAR(l);
3765 	}
3766     }
3767     buf[len] = 0;
3768     if (ctxt->instate == XML_PARSER_EOF)
3769         goto error;
3770     if (c != stop) {
3771         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3772         goto error;
3773     }
3774     NEXT;
3775 
3776     /*
3777      * Raise problem w.r.t. '&' and '%' being used in non-entities
3778      * reference constructs. Note Charref will be handled in
3779      * xmlStringDecodeEntities()
3780      */
3781     cur = buf;
3782     while (*cur != 0) { /* non input consuming */
3783 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3784 	    xmlChar *name;
3785 	    xmlChar tmp = *cur;
3786             int nameOk = 0;
3787 
3788 	    cur++;
3789 	    name = xmlParseStringName(ctxt, &cur);
3790             if (name != NULL) {
3791                 nameOk = 1;
3792                 xmlFree(name);
3793             }
3794             if ((nameOk == 0) || (*cur != ';')) {
3795 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3796 	    "EntityValue: '%c' forbidden except for entities references\n",
3797 	                          tmp);
3798                 goto error;
3799 	    }
3800 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3801 		(ctxt->inputNr == 1)) {
3802 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3803                 goto error;
3804 	    }
3805 	    if (*cur == 0)
3806 	        break;
3807 	}
3808 	cur++;
3809     }
3810 
3811     /*
3812      * Then PEReference entities are substituted.
3813      *
3814      * NOTE: 4.4.7 Bypassed
3815      * When a general entity reference appears in the EntityValue in
3816      * an entity declaration, it is bypassed and left as is.
3817      * so XML_SUBSTITUTE_REF is not set here.
3818      */
3819     ++ctxt->depth;
3820     ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3821                                   0, 0, 0);
3822     --ctxt->depth;
3823     if (orig != NULL) {
3824         *orig = buf;
3825         buf = NULL;
3826     }
3827 
3828 error:
3829     if (buf != NULL)
3830         xmlFree(buf);
3831     return(ret);
3832 }
3833 
3834 /**
3835  * xmlParseAttValueComplex:
3836  * @ctxt:  an XML parser context
3837  * @len:   the resulting attribute len
3838  * @normalize:  wether to apply the inner normalization
3839  *
3840  * parse a value for an attribute, this is the fallback function
3841  * of xmlParseAttValue() when the attribute parsing requires handling
3842  * of non-ASCII characters, or normalization compaction.
3843  *
3844  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3845  */
3846 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3847 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3848     xmlChar limit = 0;
3849     xmlChar *buf = NULL;
3850     xmlChar *rep = NULL;
3851     size_t len = 0;
3852     size_t buf_size = 0;
3853     int c, l, in_space = 0;
3854     xmlChar *current = NULL;
3855     xmlEntityPtr ent;
3856 
3857     if (NXT(0) == '"') {
3858 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3859 	limit = '"';
3860         NEXT;
3861     } else if (NXT(0) == '\'') {
3862 	limit = '\'';
3863 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864         NEXT;
3865     } else {
3866 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3867 	return(NULL);
3868     }
3869 
3870     /*
3871      * allocate a translation buffer.
3872      */
3873     buf_size = XML_PARSER_BUFFER_SIZE;
3874     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3875     if (buf == NULL) goto mem_error;
3876 
3877     /*
3878      * OK loop until we reach one of the ending char or a size limit.
3879      */
3880     c = CUR_CHAR(l);
3881     while (((NXT(0) != limit) && /* checked */
3882             (IS_CHAR(c)) && (c != '<')) &&
3883             (ctxt->instate != XML_PARSER_EOF)) {
3884         /*
3885          * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3886          * special option is given
3887          */
3888         if ((len > XML_MAX_TEXT_LENGTH) &&
3889             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3890             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3891                            "AttValue length too long\n");
3892             goto mem_error;
3893         }
3894 	if (c == 0) break;
3895 	if (c == '&') {
3896 	    in_space = 0;
3897 	    if (NXT(1) == '#') {
3898 		int val = xmlParseCharRef(ctxt);
3899 
3900 		if (val == '&') {
3901 		    if (ctxt->replaceEntities) {
3902 			if (len + 10 > buf_size) {
3903 			    growBuffer(buf, 10);
3904 			}
3905 			buf[len++] = '&';
3906 		    } else {
3907 			/*
3908 			 * The reparsing will be done in xmlStringGetNodeList()
3909 			 * called by the attribute() function in SAX.c
3910 			 */
3911 			if (len + 10 > buf_size) {
3912 			    growBuffer(buf, 10);
3913 			}
3914 			buf[len++] = '&';
3915 			buf[len++] = '#';
3916 			buf[len++] = '3';
3917 			buf[len++] = '8';
3918 			buf[len++] = ';';
3919 		    }
3920 		} else if (val != 0) {
3921 		    if (len + 10 > buf_size) {
3922 			growBuffer(buf, 10);
3923 		    }
3924 		    len += xmlCopyChar(0, &buf[len], val);
3925 		}
3926 	    } else {
3927 		ent = xmlParseEntityRef(ctxt);
3928 		ctxt->nbentities++;
3929 		if (ent != NULL)
3930 		    ctxt->nbentities += ent->owner;
3931 		if ((ent != NULL) &&
3932 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3933 		    if (len + 10 > buf_size) {
3934 			growBuffer(buf, 10);
3935 		    }
3936 		    if ((ctxt->replaceEntities == 0) &&
3937 		        (ent->content[0] == '&')) {
3938 			buf[len++] = '&';
3939 			buf[len++] = '#';
3940 			buf[len++] = '3';
3941 			buf[len++] = '8';
3942 			buf[len++] = ';';
3943 		    } else {
3944 			buf[len++] = ent->content[0];
3945 		    }
3946 		} else if ((ent != NULL) &&
3947 		           (ctxt->replaceEntities != 0)) {
3948 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3949 			++ctxt->depth;
3950 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3951 						      XML_SUBSTITUTE_REF,
3952 						      0, 0, 0);
3953 			--ctxt->depth;
3954 			if (rep != NULL) {
3955 			    current = rep;
3956 			    while (*current != 0) { /* non input consuming */
3957                                 if ((*current == 0xD) || (*current == 0xA) ||
3958                                     (*current == 0x9)) {
3959                                     buf[len++] = 0x20;
3960                                     current++;
3961                                 } else
3962                                     buf[len++] = *current++;
3963 				if (len + 10 > buf_size) {
3964 				    growBuffer(buf, 10);
3965 				}
3966 			    }
3967 			    xmlFree(rep);
3968 			    rep = NULL;
3969 			}
3970 		    } else {
3971 			if (len + 10 > buf_size) {
3972 			    growBuffer(buf, 10);
3973 			}
3974 			if (ent->content != NULL)
3975 			    buf[len++] = ent->content[0];
3976 		    }
3977 		} else if (ent != NULL) {
3978 		    int i = xmlStrlen(ent->name);
3979 		    const xmlChar *cur = ent->name;
3980 
3981 		    /*
3982 		     * This may look absurd but is needed to detect
3983 		     * entities problems
3984 		     */
3985 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3986 			(ent->content != NULL) && (ent->checked == 0)) {
3987 			unsigned long oldnbent = ctxt->nbentities;
3988 
3989 			++ctxt->depth;
3990 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3991 						  XML_SUBSTITUTE_REF, 0, 0, 0);
3992 			--ctxt->depth;
3993 
3994 			ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
3995 			if (rep != NULL) {
3996 			    if (xmlStrchr(rep, '<'))
3997 			        ent->checked |= 1;
3998 			    xmlFree(rep);
3999 			    rep = NULL;
4000 			} else {
4001                             ent->content[0] = 0;
4002                         }
4003 		    }
4004 
4005 		    /*
4006 		     * Just output the reference
4007 		     */
4008 		    buf[len++] = '&';
4009 		    while (len + i + 10 > buf_size) {
4010 			growBuffer(buf, i + 10);
4011 		    }
4012 		    for (;i > 0;i--)
4013 			buf[len++] = *cur++;
4014 		    buf[len++] = ';';
4015 		}
4016 	    }
4017 	} else {
4018 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4019 	        if ((len != 0) || (!normalize)) {
4020 		    if ((!normalize) || (!in_space)) {
4021 			COPY_BUF(l,buf,len,0x20);
4022 			while (len + 10 > buf_size) {
4023 			    growBuffer(buf, 10);
4024 			}
4025 		    }
4026 		    in_space = 1;
4027 		}
4028 	    } else {
4029 	        in_space = 0;
4030 		COPY_BUF(l,buf,len,c);
4031 		if (len + 10 > buf_size) {
4032 		    growBuffer(buf, 10);
4033 		}
4034 	    }
4035 	    NEXTL(l);
4036 	}
4037 	GROW;
4038 	c = CUR_CHAR(l);
4039     }
4040     if (ctxt->instate == XML_PARSER_EOF)
4041         goto error;
4042 
4043     if ((in_space) && (normalize)) {
4044         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4045     }
4046     buf[len] = 0;
4047     if (RAW == '<') {
4048 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4049     } else if (RAW != limit) {
4050 	if ((c != 0) && (!IS_CHAR(c))) {
4051 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4052 			   "invalid character in attribute value\n");
4053 	} else {
4054 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4055 			   "AttValue: ' expected\n");
4056         }
4057     } else
4058 	NEXT;
4059 
4060     /*
4061      * There we potentially risk an overflow, don't allow attribute value of
4062      * length more than INT_MAX it is a very reasonnable assumption !
4063      */
4064     if (len >= INT_MAX) {
4065         xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4066                        "AttValue length too long\n");
4067         goto mem_error;
4068     }
4069 
4070     if (attlen != NULL) *attlen = (int) len;
4071     return(buf);
4072 
4073 mem_error:
4074     xmlErrMemory(ctxt, NULL);
4075 error:
4076     if (buf != NULL)
4077         xmlFree(buf);
4078     if (rep != NULL)
4079         xmlFree(rep);
4080     return(NULL);
4081 }
4082 
4083 /**
4084  * xmlParseAttValue:
4085  * @ctxt:  an XML parser context
4086  *
4087  * parse a value for an attribute
4088  * Note: the parser won't do substitution of entities here, this
4089  * will be handled later in xmlStringGetNodeList
4090  *
4091  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4092  *                   "'" ([^<&'] | Reference)* "'"
4093  *
4094  * 3.3.3 Attribute-Value Normalization:
4095  * Before the value of an attribute is passed to the application or
4096  * checked for validity, the XML processor must normalize it as follows:
4097  * - a character reference is processed by appending the referenced
4098  *   character to the attribute value
4099  * - an entity reference is processed by recursively processing the
4100  *   replacement text of the entity
4101  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4102  *   appending #x20 to the normalized value, except that only a single
4103  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4104  *   parsed entity or the literal entity value of an internal parsed entity
4105  * - other characters are processed by appending them to the normalized value
4106  * If the declared value is not CDATA, then the XML processor must further
4107  * process the normalized attribute value by discarding any leading and
4108  * trailing space (#x20) characters, and by replacing sequences of space
4109  * (#x20) characters by a single space (#x20) character.
4110  * All attributes for which no declaration has been read should be treated
4111  * by a non-validating parser as if declared CDATA.
4112  *
4113  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4114  */
4115 
4116 
4117 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4118 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4119     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4120     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4121 }
4122 
4123 /**
4124  * xmlParseSystemLiteral:
4125  * @ctxt:  an XML parser context
4126  *
4127  * parse an XML Literal
4128  *
4129  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4130  *
4131  * Returns the SystemLiteral parsed or NULL
4132  */
4133 
4134 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4135 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4136     xmlChar *buf = NULL;
4137     int len = 0;
4138     int size = XML_PARSER_BUFFER_SIZE;
4139     int cur, l;
4140     xmlChar stop;
4141     int state = ctxt->instate;
4142     int count = 0;
4143 
4144     SHRINK;
4145     if (RAW == '"') {
4146         NEXT;
4147 	stop = '"';
4148     } else if (RAW == '\'') {
4149         NEXT;
4150 	stop = '\'';
4151     } else {
4152 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4153 	return(NULL);
4154     }
4155 
4156     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4157     if (buf == NULL) {
4158         xmlErrMemory(ctxt, NULL);
4159 	return(NULL);
4160     }
4161     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4162     cur = CUR_CHAR(l);
4163     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4164 	if (len + 5 >= size) {
4165 	    xmlChar *tmp;
4166 
4167             if ((size > XML_MAX_NAME_LENGTH) &&
4168                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4169                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4170                 xmlFree(buf);
4171 		ctxt->instate = (xmlParserInputState) state;
4172                 return(NULL);
4173             }
4174 	    size *= 2;
4175 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4176 	    if (tmp == NULL) {
4177 	        xmlFree(buf);
4178 		xmlErrMemory(ctxt, NULL);
4179 		ctxt->instate = (xmlParserInputState) state;
4180 		return(NULL);
4181 	    }
4182 	    buf = tmp;
4183 	}
4184 	count++;
4185 	if (count > 50) {
4186 	    GROW;
4187 	    count = 0;
4188             if (ctxt->instate == XML_PARSER_EOF) {
4189 	        xmlFree(buf);
4190 		return(NULL);
4191             }
4192 	}
4193 	COPY_BUF(l,buf,len,cur);
4194 	NEXTL(l);
4195 	cur = CUR_CHAR(l);
4196 	if (cur == 0) {
4197 	    GROW;
4198 	    SHRINK;
4199 	    cur = CUR_CHAR(l);
4200 	}
4201     }
4202     buf[len] = 0;
4203     ctxt->instate = (xmlParserInputState) state;
4204     if (!IS_CHAR(cur)) {
4205 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4206     } else {
4207 	NEXT;
4208     }
4209     return(buf);
4210 }
4211 
4212 /**
4213  * xmlParsePubidLiteral:
4214  * @ctxt:  an XML parser context
4215  *
4216  * parse an XML public literal
4217  *
4218  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4219  *
4220  * Returns the PubidLiteral parsed or NULL.
4221  */
4222 
4223 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4224 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4225     xmlChar *buf = NULL;
4226     int len = 0;
4227     int size = XML_PARSER_BUFFER_SIZE;
4228     xmlChar cur;
4229     xmlChar stop;
4230     int count = 0;
4231     xmlParserInputState oldstate = ctxt->instate;
4232 
4233     SHRINK;
4234     if (RAW == '"') {
4235         NEXT;
4236 	stop = '"';
4237     } else if (RAW == '\'') {
4238         NEXT;
4239 	stop = '\'';
4240     } else {
4241 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4242 	return(NULL);
4243     }
4244     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4245     if (buf == NULL) {
4246 	xmlErrMemory(ctxt, NULL);
4247 	return(NULL);
4248     }
4249     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4250     cur = CUR;
4251     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4252 	if (len + 1 >= size) {
4253 	    xmlChar *tmp;
4254 
4255             if ((size > XML_MAX_NAME_LENGTH) &&
4256                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4257                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4258                 xmlFree(buf);
4259                 return(NULL);
4260             }
4261 	    size *= 2;
4262 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4263 	    if (tmp == NULL) {
4264 		xmlErrMemory(ctxt, NULL);
4265 		xmlFree(buf);
4266 		return(NULL);
4267 	    }
4268 	    buf = tmp;
4269 	}
4270 	buf[len++] = cur;
4271 	count++;
4272 	if (count > 50) {
4273 	    GROW;
4274 	    count = 0;
4275             if (ctxt->instate == XML_PARSER_EOF) {
4276 		xmlFree(buf);
4277 		return(NULL);
4278             }
4279 	}
4280 	NEXT;
4281 	cur = CUR;
4282 	if (cur == 0) {
4283 	    GROW;
4284 	    SHRINK;
4285 	    cur = CUR;
4286 	}
4287     }
4288     buf[len] = 0;
4289     if (cur != stop) {
4290 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4291     } else {
4292 	NEXT;
4293     }
4294     ctxt->instate = oldstate;
4295     return(buf);
4296 }
4297 
4298 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4299 
4300 /*
4301  * used for the test in the inner loop of the char data testing
4302  */
4303 static const unsigned char test_char_data[256] = {
4304     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4305     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4306     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4307     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4308     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4309     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4310     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4311     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4312     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4313     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4314     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4315     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4316     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4317     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4318     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4319     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4320     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4321     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4322     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4323     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4324     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4325     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4326     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4336 };
4337 
4338 /**
4339  * xmlParseCharData:
4340  * @ctxt:  an XML parser context
4341  * @cdata:  int indicating whether we are within a CDATA section
4342  *
4343  * parse a CharData section.
4344  * if we are within a CDATA section ']]>' marks an end of section.
4345  *
4346  * The right angle bracket (>) may be represented using the string "&gt;",
4347  * and must, for compatibility, be escaped using "&gt;" or a character
4348  * reference when it appears in the string "]]>" in content, when that
4349  * string is not marking the end of a CDATA section.
4350  *
4351  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4352  */
4353 
4354 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4355 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4356     const xmlChar *in;
4357     int nbchar = 0;
4358     int line = ctxt->input->line;
4359     int col = ctxt->input->col;
4360     int ccol;
4361 
4362     SHRINK;
4363     GROW;
4364     /*
4365      * Accelerated common case where input don't need to be
4366      * modified before passing it to the handler.
4367      */
4368     if (!cdata) {
4369 	in = ctxt->input->cur;
4370 	do {
4371 get_more_space:
4372 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4373 	    if (*in == 0xA) {
4374 		do {
4375 		    ctxt->input->line++; ctxt->input->col = 1;
4376 		    in++;
4377 		} while (*in == 0xA);
4378 		goto get_more_space;
4379 	    }
4380 	    if (*in == '<') {
4381 		nbchar = in - ctxt->input->cur;
4382 		if (nbchar > 0) {
4383 		    const xmlChar *tmp = ctxt->input->cur;
4384 		    ctxt->input->cur = in;
4385 
4386 		    if ((ctxt->sax != NULL) &&
4387 		        (ctxt->sax->ignorableWhitespace !=
4388 		         ctxt->sax->characters)) {
4389 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4390 			    if (ctxt->sax->ignorableWhitespace != NULL)
4391 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4392 						       tmp, nbchar);
4393 			} else {
4394 			    if (ctxt->sax->characters != NULL)
4395 				ctxt->sax->characters(ctxt->userData,
4396 						      tmp, nbchar);
4397 			    if (*ctxt->space == -1)
4398 			        *ctxt->space = -2;
4399 			}
4400 		    } else if ((ctxt->sax != NULL) &&
4401 		               (ctxt->sax->characters != NULL)) {
4402 			ctxt->sax->characters(ctxt->userData,
4403 					      tmp, nbchar);
4404 		    }
4405 		}
4406 		return;
4407 	    }
4408 
4409 get_more:
4410             ccol = ctxt->input->col;
4411 	    while (test_char_data[*in]) {
4412 		in++;
4413 		ccol++;
4414 	    }
4415 	    ctxt->input->col = ccol;
4416 	    if (*in == 0xA) {
4417 		do {
4418 		    ctxt->input->line++; ctxt->input->col = 1;
4419 		    in++;
4420 		} while (*in == 0xA);
4421 		goto get_more;
4422 	    }
4423 	    if (*in == ']') {
4424 		if ((in[1] == ']') && (in[2] == '>')) {
4425 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4426 		    ctxt->input->cur = in + 1;
4427 		    return;
4428 		}
4429 		in++;
4430 		ctxt->input->col++;
4431 		goto get_more;
4432 	    }
4433 	    nbchar = in - ctxt->input->cur;
4434 	    if (nbchar > 0) {
4435 		if ((ctxt->sax != NULL) &&
4436 		    (ctxt->sax->ignorableWhitespace !=
4437 		     ctxt->sax->characters) &&
4438 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4439 		    const xmlChar *tmp = ctxt->input->cur;
4440 		    ctxt->input->cur = in;
4441 
4442 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4443 		        if (ctxt->sax->ignorableWhitespace != NULL)
4444 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4445 							   tmp, nbchar);
4446 		    } else {
4447 		        if (ctxt->sax->characters != NULL)
4448 			    ctxt->sax->characters(ctxt->userData,
4449 						  tmp, nbchar);
4450 			if (*ctxt->space == -1)
4451 			    *ctxt->space = -2;
4452 		    }
4453                     line = ctxt->input->line;
4454                     col = ctxt->input->col;
4455 		} else if (ctxt->sax != NULL) {
4456 		    if (ctxt->sax->characters != NULL)
4457 			ctxt->sax->characters(ctxt->userData,
4458 					      ctxt->input->cur, nbchar);
4459                     line = ctxt->input->line;
4460                     col = ctxt->input->col;
4461 		}
4462                 /* something really bad happened in the SAX callback */
4463                 if (ctxt->instate != XML_PARSER_CONTENT)
4464                     return;
4465 	    }
4466 	    ctxt->input->cur = in;
4467 	    if (*in == 0xD) {
4468 		in++;
4469 		if (*in == 0xA) {
4470 		    ctxt->input->cur = in;
4471 		    in++;
4472 		    ctxt->input->line++; ctxt->input->col = 1;
4473 		    continue; /* while */
4474 		}
4475 		in--;
4476 	    }
4477 	    if (*in == '<') {
4478 		return;
4479 	    }
4480 	    if (*in == '&') {
4481 		return;
4482 	    }
4483 	    SHRINK;
4484 	    GROW;
4485             if (ctxt->instate == XML_PARSER_EOF)
4486 		return;
4487 	    in = ctxt->input->cur;
4488 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4489 	nbchar = 0;
4490     }
4491     ctxt->input->line = line;
4492     ctxt->input->col = col;
4493     xmlParseCharDataComplex(ctxt, cdata);
4494 }
4495 
4496 /**
4497  * xmlParseCharDataComplex:
4498  * @ctxt:  an XML parser context
4499  * @cdata:  int indicating whether we are within a CDATA section
4500  *
4501  * parse a CharData section.this is the fallback function
4502  * of xmlParseCharData() when the parsing requires handling
4503  * of non-ASCII characters.
4504  */
4505 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4506 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4507     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4508     int nbchar = 0;
4509     int cur, l;
4510     int count = 0;
4511 
4512     SHRINK;
4513     GROW;
4514     cur = CUR_CHAR(l);
4515     while ((cur != '<') && /* checked */
4516            (cur != '&') &&
4517 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4518 	if ((cur == ']') && (NXT(1) == ']') &&
4519 	    (NXT(2) == '>')) {
4520 	    if (cdata) break;
4521 	    else {
4522 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4523 	    }
4524 	}
4525 	COPY_BUF(l,buf,nbchar,cur);
4526 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4527 	    buf[nbchar] = 0;
4528 
4529 	    /*
4530 	     * OK the segment is to be consumed as chars.
4531 	     */
4532 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4533 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4534 		    if (ctxt->sax->ignorableWhitespace != NULL)
4535 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4536 			                               buf, nbchar);
4537 		} else {
4538 		    if (ctxt->sax->characters != NULL)
4539 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4540 		    if ((ctxt->sax->characters !=
4541 		         ctxt->sax->ignorableWhitespace) &&
4542 			(*ctxt->space == -1))
4543 			*ctxt->space = -2;
4544 		}
4545 	    }
4546 	    nbchar = 0;
4547             /* something really bad happened in the SAX callback */
4548             if (ctxt->instate != XML_PARSER_CONTENT)
4549                 return;
4550 	}
4551 	count++;
4552 	if (count > 50) {
4553 	    GROW;
4554 	    count = 0;
4555             if (ctxt->instate == XML_PARSER_EOF)
4556 		return;
4557 	}
4558 	NEXTL(l);
4559 	cur = CUR_CHAR(l);
4560     }
4561     if (nbchar != 0) {
4562         buf[nbchar] = 0;
4563 	/*
4564 	 * OK the segment is to be consumed as chars.
4565 	 */
4566 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4567 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4568 		if (ctxt->sax->ignorableWhitespace != NULL)
4569 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4570 	    } else {
4571 		if (ctxt->sax->characters != NULL)
4572 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4573 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4574 		    (*ctxt->space == -1))
4575 		    *ctxt->space = -2;
4576 	    }
4577 	}
4578     }
4579     if ((cur != 0) && (!IS_CHAR(cur))) {
4580 	/* Generate the error and skip the offending character */
4581         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4582                           "PCDATA invalid Char value %d\n",
4583 	                  cur);
4584 	NEXTL(l);
4585     }
4586 }
4587 
4588 /**
4589  * xmlParseExternalID:
4590  * @ctxt:  an XML parser context
4591  * @publicID:  a xmlChar** receiving PubidLiteral
4592  * @strict: indicate whether we should restrict parsing to only
4593  *          production [75], see NOTE below
4594  *
4595  * Parse an External ID or a Public ID
4596  *
4597  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4598  *       'PUBLIC' S PubidLiteral S SystemLiteral
4599  *
4600  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4601  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4602  *
4603  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4604  *
4605  * Returns the function returns SystemLiteral and in the second
4606  *                case publicID receives PubidLiteral, is strict is off
4607  *                it is possible to return NULL and have publicID set.
4608  */
4609 
4610 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4611 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4612     xmlChar *URI = NULL;
4613 
4614     SHRINK;
4615 
4616     *publicID = NULL;
4617     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4618         SKIP(6);
4619 	if (SKIP_BLANKS == 0) {
4620 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4621 	                   "Space required after 'SYSTEM'\n");
4622 	}
4623 	URI = xmlParseSystemLiteral(ctxt);
4624 	if (URI == NULL) {
4625 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4626         }
4627     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4628         SKIP(6);
4629 	if (SKIP_BLANKS == 0) {
4630 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4631 		    "Space required after 'PUBLIC'\n");
4632 	}
4633 	*publicID = xmlParsePubidLiteral(ctxt);
4634 	if (*publicID == NULL) {
4635 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4636 	}
4637 	if (strict) {
4638 	    /*
4639 	     * We don't handle [83] so "S SystemLiteral" is required.
4640 	     */
4641 	    if (SKIP_BLANKS == 0) {
4642 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4643 			"Space required after the Public Identifier\n");
4644 	    }
4645 	} else {
4646 	    /*
4647 	     * We handle [83] so we return immediately, if
4648 	     * "S SystemLiteral" is not detected. We skip blanks if no
4649              * system literal was found, but this is harmless since we must
4650              * be at the end of a NotationDecl.
4651 	     */
4652 	    if (SKIP_BLANKS == 0) return(NULL);
4653 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
4654 	}
4655 	URI = xmlParseSystemLiteral(ctxt);
4656 	if (URI == NULL) {
4657 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4658         }
4659     }
4660     return(URI);
4661 }
4662 
4663 /**
4664  * xmlParseCommentComplex:
4665  * @ctxt:  an XML parser context
4666  * @buf:  the already parsed part of the buffer
4667  * @len:  number of bytes filles in the buffer
4668  * @size:  allocated size of the buffer
4669  *
4670  * Skip an XML (SGML) comment <!-- .... -->
4671  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4672  *  must not occur within comments. "
4673  * This is the slow routine in case the accelerator for ascii didn't work
4674  *
4675  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4676  */
4677 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4678 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4679                        size_t len, size_t size) {
4680     int q, ql;
4681     int r, rl;
4682     int cur, l;
4683     size_t count = 0;
4684     int inputid;
4685 
4686     inputid = ctxt->input->id;
4687 
4688     if (buf == NULL) {
4689         len = 0;
4690 	size = XML_PARSER_BUFFER_SIZE;
4691 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4692 	if (buf == NULL) {
4693 	    xmlErrMemory(ctxt, NULL);
4694 	    return;
4695 	}
4696     }
4697     GROW;	/* Assure there's enough input data */
4698     q = CUR_CHAR(ql);
4699     if (q == 0)
4700         goto not_terminated;
4701     if (!IS_CHAR(q)) {
4702         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4703                           "xmlParseComment: invalid xmlChar value %d\n",
4704 	                  q);
4705 	xmlFree (buf);
4706 	return;
4707     }
4708     NEXTL(ql);
4709     r = CUR_CHAR(rl);
4710     if (r == 0)
4711         goto not_terminated;
4712     if (!IS_CHAR(r)) {
4713         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714                           "xmlParseComment: invalid xmlChar value %d\n",
4715 	                  q);
4716 	xmlFree (buf);
4717 	return;
4718     }
4719     NEXTL(rl);
4720     cur = CUR_CHAR(l);
4721     if (cur == 0)
4722         goto not_terminated;
4723     while (IS_CHAR(cur) && /* checked */
4724            ((cur != '>') ||
4725 	    (r != '-') || (q != '-'))) {
4726 	if ((r == '-') && (q == '-')) {
4727 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4728 	}
4729         if ((len > XML_MAX_TEXT_LENGTH) &&
4730             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4731             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4732                          "Comment too big found", NULL);
4733             xmlFree (buf);
4734             return;
4735         }
4736 	if (len + 5 >= size) {
4737 	    xmlChar *new_buf;
4738             size_t new_size;
4739 
4740 	    new_size = size * 2;
4741 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4742 	    if (new_buf == NULL) {
4743 		xmlFree (buf);
4744 		xmlErrMemory(ctxt, NULL);
4745 		return;
4746 	    }
4747 	    buf = new_buf;
4748             size = new_size;
4749 	}
4750 	COPY_BUF(ql,buf,len,q);
4751 	q = r;
4752 	ql = rl;
4753 	r = cur;
4754 	rl = l;
4755 
4756 	count++;
4757 	if (count > 50) {
4758 	    GROW;
4759 	    count = 0;
4760             if (ctxt->instate == XML_PARSER_EOF) {
4761 		xmlFree(buf);
4762 		return;
4763             }
4764 	}
4765 	NEXTL(l);
4766 	cur = CUR_CHAR(l);
4767 	if (cur == 0) {
4768 	    SHRINK;
4769 	    GROW;
4770 	    cur = CUR_CHAR(l);
4771 	}
4772     }
4773     buf[len] = 0;
4774     if (cur == 0) {
4775 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4776 	                     "Comment not terminated \n<!--%.50s\n", buf);
4777     } else if (!IS_CHAR(cur)) {
4778         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779                           "xmlParseComment: invalid xmlChar value %d\n",
4780 	                  cur);
4781     } else {
4782 	if (inputid != ctxt->input->id) {
4783 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4784 		           "Comment doesn't start and stop in the same"
4785                            " entity\n");
4786 	}
4787         NEXT;
4788 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4789 	    (!ctxt->disableSAX))
4790 	    ctxt->sax->comment(ctxt->userData, buf);
4791     }
4792     xmlFree(buf);
4793     return;
4794 not_terminated:
4795     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4796 			 "Comment not terminated\n", NULL);
4797     xmlFree(buf);
4798     return;
4799 }
4800 
4801 /**
4802  * xmlParseComment:
4803  * @ctxt:  an XML parser context
4804  *
4805  * Skip an XML (SGML) comment <!-- .... -->
4806  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4807  *  must not occur within comments. "
4808  *
4809  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4810  */
4811 void
xmlParseComment(xmlParserCtxtPtr ctxt)4812 xmlParseComment(xmlParserCtxtPtr ctxt) {
4813     xmlChar *buf = NULL;
4814     size_t size = XML_PARSER_BUFFER_SIZE;
4815     size_t len = 0;
4816     xmlParserInputState state;
4817     const xmlChar *in;
4818     size_t nbchar = 0;
4819     int ccol;
4820     int inputid;
4821 
4822     /*
4823      * Check that there is a comment right here.
4824      */
4825     if ((RAW != '<') || (NXT(1) != '!') ||
4826         (NXT(2) != '-') || (NXT(3) != '-')) return;
4827     state = ctxt->instate;
4828     ctxt->instate = XML_PARSER_COMMENT;
4829     inputid = ctxt->input->id;
4830     SKIP(4);
4831     SHRINK;
4832     GROW;
4833 
4834     /*
4835      * Accelerated common case where input don't need to be
4836      * modified before passing it to the handler.
4837      */
4838     in = ctxt->input->cur;
4839     do {
4840 	if (*in == 0xA) {
4841 	    do {
4842 		ctxt->input->line++; ctxt->input->col = 1;
4843 		in++;
4844 	    } while (*in == 0xA);
4845 	}
4846 get_more:
4847         ccol = ctxt->input->col;
4848 	while (((*in > '-') && (*in <= 0x7F)) ||
4849 	       ((*in >= 0x20) && (*in < '-')) ||
4850 	       (*in == 0x09)) {
4851 		    in++;
4852 		    ccol++;
4853 	}
4854 	ctxt->input->col = ccol;
4855 	if (*in == 0xA) {
4856 	    do {
4857 		ctxt->input->line++; ctxt->input->col = 1;
4858 		in++;
4859 	    } while (*in == 0xA);
4860 	    goto get_more;
4861 	}
4862 	nbchar = in - ctxt->input->cur;
4863 	/*
4864 	 * save current set of data
4865 	 */
4866 	if (nbchar > 0) {
4867 	    if ((ctxt->sax != NULL) &&
4868 		(ctxt->sax->comment != NULL)) {
4869 		if (buf == NULL) {
4870 		    if ((*in == '-') && (in[1] == '-'))
4871 		        size = nbchar + 1;
4872 		    else
4873 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4874 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4875 		    if (buf == NULL) {
4876 		        xmlErrMemory(ctxt, NULL);
4877 			ctxt->instate = state;
4878 			return;
4879 		    }
4880 		    len = 0;
4881 		} else if (len + nbchar + 1 >= size) {
4882 		    xmlChar *new_buf;
4883 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4884 		    new_buf = (xmlChar *) xmlRealloc(buf,
4885 		                                     size * sizeof(xmlChar));
4886 		    if (new_buf == NULL) {
4887 		        xmlFree (buf);
4888 			xmlErrMemory(ctxt, NULL);
4889 			ctxt->instate = state;
4890 			return;
4891 		    }
4892 		    buf = new_buf;
4893 		}
4894 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4895 		len += nbchar;
4896 		buf[len] = 0;
4897 	    }
4898 	}
4899         if ((len > XML_MAX_TEXT_LENGTH) &&
4900             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4901             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902                          "Comment too big found", NULL);
4903             xmlFree (buf);
4904             return;
4905         }
4906 	ctxt->input->cur = in;
4907 	if (*in == 0xA) {
4908 	    in++;
4909 	    ctxt->input->line++; ctxt->input->col = 1;
4910 	}
4911 	if (*in == 0xD) {
4912 	    in++;
4913 	    if (*in == 0xA) {
4914 		ctxt->input->cur = in;
4915 		in++;
4916 		ctxt->input->line++; ctxt->input->col = 1;
4917 		continue; /* while */
4918 	    }
4919 	    in--;
4920 	}
4921 	SHRINK;
4922 	GROW;
4923         if (ctxt->instate == XML_PARSER_EOF) {
4924             xmlFree(buf);
4925             return;
4926         }
4927 	in = ctxt->input->cur;
4928 	if (*in == '-') {
4929 	    if (in[1] == '-') {
4930 	        if (in[2] == '>') {
4931 		    if (ctxt->input->id != inputid) {
4932 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4933 			               "comment doesn't start and stop in the"
4934                                        " same entity\n");
4935 		    }
4936 		    SKIP(3);
4937 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4938 		        (!ctxt->disableSAX)) {
4939 			if (buf != NULL)
4940 			    ctxt->sax->comment(ctxt->userData, buf);
4941 			else
4942 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4943 		    }
4944 		    if (buf != NULL)
4945 		        xmlFree(buf);
4946 		    if (ctxt->instate != XML_PARSER_EOF)
4947 			ctxt->instate = state;
4948 		    return;
4949 		}
4950 		if (buf != NULL) {
4951 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4952 		                      "Double hyphen within comment: "
4953                                       "<!--%.50s\n",
4954 				      buf);
4955 		} else
4956 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 		                      "Double hyphen within comment\n", NULL);
4958 		in++;
4959 		ctxt->input->col++;
4960 	    }
4961 	    in++;
4962 	    ctxt->input->col++;
4963 	    goto get_more;
4964 	}
4965     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4966     xmlParseCommentComplex(ctxt, buf, len, size);
4967     ctxt->instate = state;
4968     return;
4969 }
4970 
4971 
4972 /**
4973  * xmlParsePITarget:
4974  * @ctxt:  an XML parser context
4975  *
4976  * parse the name of a PI
4977  *
4978  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4979  *
4980  * Returns the PITarget name or NULL
4981  */
4982 
4983 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4984 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4985     const xmlChar *name;
4986 
4987     name = xmlParseName(ctxt);
4988     if ((name != NULL) &&
4989         ((name[0] == 'x') || (name[0] == 'X')) &&
4990         ((name[1] == 'm') || (name[1] == 'M')) &&
4991         ((name[2] == 'l') || (name[2] == 'L'))) {
4992 	int i;
4993 	if ((name[0] == 'x') && (name[1] == 'm') &&
4994 	    (name[2] == 'l') && (name[3] == 0)) {
4995 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4996 		 "XML declaration allowed only at the start of the document\n");
4997 	    return(name);
4998 	} else if (name[3] == 0) {
4999 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5000 	    return(name);
5001 	}
5002 	for (i = 0;;i++) {
5003 	    if (xmlW3CPIs[i] == NULL) break;
5004 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5005 	        return(name);
5006 	}
5007 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5008 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5009 		      NULL, NULL);
5010     }
5011     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5012 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5013 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5014     }
5015     return(name);
5016 }
5017 
5018 #ifdef LIBXML_CATALOG_ENABLED
5019 /**
5020  * xmlParseCatalogPI:
5021  * @ctxt:  an XML parser context
5022  * @catalog:  the PI value string
5023  *
5024  * parse an XML Catalog Processing Instruction.
5025  *
5026  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5027  *
5028  * Occurs only if allowed by the user and if happening in the Misc
5029  * part of the document before any doctype informations
5030  * This will add the given catalog to the parsing context in order
5031  * to be used if there is a resolution need further down in the document
5032  */
5033 
5034 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5035 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5036     xmlChar *URL = NULL;
5037     const xmlChar *tmp, *base;
5038     xmlChar marker;
5039 
5040     tmp = catalog;
5041     while (IS_BLANK_CH(*tmp)) tmp++;
5042     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5043 	goto error;
5044     tmp += 7;
5045     while (IS_BLANK_CH(*tmp)) tmp++;
5046     if (*tmp != '=') {
5047 	return;
5048     }
5049     tmp++;
5050     while (IS_BLANK_CH(*tmp)) tmp++;
5051     marker = *tmp;
5052     if ((marker != '\'') && (marker != '"'))
5053 	goto error;
5054     tmp++;
5055     base = tmp;
5056     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5057     if (*tmp == 0)
5058 	goto error;
5059     URL = xmlStrndup(base, tmp - base);
5060     tmp++;
5061     while (IS_BLANK_CH(*tmp)) tmp++;
5062     if (*tmp != 0)
5063 	goto error;
5064 
5065     if (URL != NULL) {
5066 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5067 	xmlFree(URL);
5068     }
5069     return;
5070 
5071 error:
5072     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5073 	          "Catalog PI syntax error: %s\n",
5074 		  catalog, NULL);
5075     if (URL != NULL)
5076 	xmlFree(URL);
5077 }
5078 #endif
5079 
5080 /**
5081  * xmlParsePI:
5082  * @ctxt:  an XML parser context
5083  *
5084  * parse an XML Processing Instruction.
5085  *
5086  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5087  *
5088  * The processing is transfered to SAX once parsed.
5089  */
5090 
5091 void
xmlParsePI(xmlParserCtxtPtr ctxt)5092 xmlParsePI(xmlParserCtxtPtr ctxt) {
5093     xmlChar *buf = NULL;
5094     size_t len = 0;
5095     size_t size = XML_PARSER_BUFFER_SIZE;
5096     int cur, l;
5097     const xmlChar *target;
5098     xmlParserInputState state;
5099     int count = 0;
5100 
5101     if ((RAW == '<') && (NXT(1) == '?')) {
5102 	int inputid = ctxt->input->id;
5103 	state = ctxt->instate;
5104         ctxt->instate = XML_PARSER_PI;
5105 	/*
5106 	 * this is a Processing Instruction.
5107 	 */
5108 	SKIP(2);
5109 	SHRINK;
5110 
5111 	/*
5112 	 * Parse the target name and check for special support like
5113 	 * namespace.
5114 	 */
5115         target = xmlParsePITarget(ctxt);
5116 	if (target != NULL) {
5117 	    if ((RAW == '?') && (NXT(1) == '>')) {
5118 		if (inputid != ctxt->input->id) {
5119 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5120 	                           "PI declaration doesn't start and stop in"
5121                                    " the same entity\n");
5122 		}
5123 		SKIP(2);
5124 
5125 		/*
5126 		 * SAX: PI detected.
5127 		 */
5128 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5129 		    (ctxt->sax->processingInstruction != NULL))
5130 		    ctxt->sax->processingInstruction(ctxt->userData,
5131 		                                     target, NULL);
5132 		if (ctxt->instate != XML_PARSER_EOF)
5133 		    ctxt->instate = state;
5134 		return;
5135 	    }
5136 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5137 	    if (buf == NULL) {
5138 		xmlErrMemory(ctxt, NULL);
5139 		ctxt->instate = state;
5140 		return;
5141 	    }
5142 	    if (SKIP_BLANKS == 0) {
5143 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5144 			  "ParsePI: PI %s space expected\n", target);
5145 	    }
5146 	    cur = CUR_CHAR(l);
5147 	    while (IS_CHAR(cur) && /* checked */
5148 		   ((cur != '?') || (NXT(1) != '>'))) {
5149 		if (len + 5 >= size) {
5150 		    xmlChar *tmp;
5151                     size_t new_size = size * 2;
5152 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5153 		    if (tmp == NULL) {
5154 			xmlErrMemory(ctxt, NULL);
5155 			xmlFree(buf);
5156 			ctxt->instate = state;
5157 			return;
5158 		    }
5159 		    buf = tmp;
5160                     size = new_size;
5161 		}
5162 		count++;
5163 		if (count > 50) {
5164 		    GROW;
5165                     if (ctxt->instate == XML_PARSER_EOF) {
5166                         xmlFree(buf);
5167                         return;
5168                     }
5169 		    count = 0;
5170                     if ((len > XML_MAX_TEXT_LENGTH) &&
5171                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5172                         xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5173                                           "PI %s too big found", target);
5174                         xmlFree(buf);
5175                         ctxt->instate = state;
5176                         return;
5177                     }
5178 		}
5179 		COPY_BUF(l,buf,len,cur);
5180 		NEXTL(l);
5181 		cur = CUR_CHAR(l);
5182 		if (cur == 0) {
5183 		    SHRINK;
5184 		    GROW;
5185 		    cur = CUR_CHAR(l);
5186 		}
5187 	    }
5188             if ((len > XML_MAX_TEXT_LENGTH) &&
5189                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5190                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5191                                   "PI %s too big found", target);
5192                 xmlFree(buf);
5193                 ctxt->instate = state;
5194                 return;
5195             }
5196 	    buf[len] = 0;
5197 	    if (cur != '?') {
5198 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5199 		      "ParsePI: PI %s never end ...\n", target);
5200 	    } else {
5201 		if (inputid != ctxt->input->id) {
5202 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5203 	                           "PI declaration doesn't start and stop in"
5204                                    " the same entity\n");
5205 		}
5206 		SKIP(2);
5207 
5208 #ifdef LIBXML_CATALOG_ENABLED
5209 		if (((state == XML_PARSER_MISC) ||
5210 	             (state == XML_PARSER_START)) &&
5211 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5212 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5213 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5214 			(allow == XML_CATA_ALLOW_ALL))
5215 			xmlParseCatalogPI(ctxt, buf);
5216 		}
5217 #endif
5218 
5219 
5220 		/*
5221 		 * SAX: PI detected.
5222 		 */
5223 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 		    (ctxt->sax->processingInstruction != NULL))
5225 		    ctxt->sax->processingInstruction(ctxt->userData,
5226 		                                     target, buf);
5227 	    }
5228 	    xmlFree(buf);
5229 	} else {
5230 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5231 	}
5232 	if (ctxt->instate != XML_PARSER_EOF)
5233 	    ctxt->instate = state;
5234     }
5235 }
5236 
5237 /**
5238  * xmlParseNotationDecl:
5239  * @ctxt:  an XML parser context
5240  *
5241  * parse a notation declaration
5242  *
5243  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5244  *
5245  * Hence there is actually 3 choices:
5246  *     'PUBLIC' S PubidLiteral
5247  *     'PUBLIC' S PubidLiteral S SystemLiteral
5248  * and 'SYSTEM' S SystemLiteral
5249  *
5250  * See the NOTE on xmlParseExternalID().
5251  */
5252 
5253 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5254 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5255     const xmlChar *name;
5256     xmlChar *Pubid;
5257     xmlChar *Systemid;
5258 
5259     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5260 	int inputid = ctxt->input->id;
5261 	SHRINK;
5262 	SKIP(10);
5263 	if (SKIP_BLANKS == 0) {
5264 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5265 			   "Space required after '<!NOTATION'\n");
5266 	    return;
5267 	}
5268 
5269         name = xmlParseName(ctxt);
5270 	if (name == NULL) {
5271 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5272 	    return;
5273 	}
5274 	if (xmlStrchr(name, ':') != NULL) {
5275 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5276 		     "colons are forbidden from notation names '%s'\n",
5277 		     name, NULL, NULL);
5278 	}
5279 	if (SKIP_BLANKS == 0) {
5280 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5281 		     "Space required after the NOTATION name'\n");
5282 	    return;
5283 	}
5284 
5285 	/*
5286 	 * Parse the IDs.
5287 	 */
5288 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5289 	SKIP_BLANKS;
5290 
5291 	if (RAW == '>') {
5292 	    if (inputid != ctxt->input->id) {
5293 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5294 	                       "Notation declaration doesn't start and stop"
5295                                " in the same entity\n");
5296 	    }
5297 	    NEXT;
5298 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 		(ctxt->sax->notationDecl != NULL))
5300 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5301 	} else {
5302 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5303 	}
5304 	if (Systemid != NULL) xmlFree(Systemid);
5305 	if (Pubid != NULL) xmlFree(Pubid);
5306     }
5307 }
5308 
5309 /**
5310  * xmlParseEntityDecl:
5311  * @ctxt:  an XML parser context
5312  *
5313  * parse <!ENTITY declarations
5314  *
5315  * [70] EntityDecl ::= GEDecl | PEDecl
5316  *
5317  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5318  *
5319  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5320  *
5321  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5322  *
5323  * [74] PEDef ::= EntityValue | ExternalID
5324  *
5325  * [76] NDataDecl ::= S 'NDATA' S Name
5326  *
5327  * [ VC: Notation Declared ]
5328  * The Name must match the declared name of a notation.
5329  */
5330 
5331 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5332 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5333     const xmlChar *name = NULL;
5334     xmlChar *value = NULL;
5335     xmlChar *URI = NULL, *literal = NULL;
5336     const xmlChar *ndata = NULL;
5337     int isParameter = 0;
5338     xmlChar *orig = NULL;
5339 
5340     /* GROW; done in the caller */
5341     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5342 	int inputid = ctxt->input->id;
5343 	SHRINK;
5344 	SKIP(8);
5345 	if (SKIP_BLANKS == 0) {
5346 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5347 			   "Space required after '<!ENTITY'\n");
5348 	}
5349 
5350 	if (RAW == '%') {
5351 	    NEXT;
5352 	    if (SKIP_BLANKS == 0) {
5353 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5354 			       "Space required after '%%'\n");
5355 	    }
5356 	    isParameter = 1;
5357 	}
5358 
5359         name = xmlParseName(ctxt);
5360 	if (name == NULL) {
5361 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5362 	                   "xmlParseEntityDecl: no name\n");
5363             return;
5364 	}
5365 	if (xmlStrchr(name, ':') != NULL) {
5366 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5367 		     "colons are forbidden from entities names '%s'\n",
5368 		     name, NULL, NULL);
5369 	}
5370 	if (SKIP_BLANKS == 0) {
5371 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5372 			   "Space required after the entity name\n");
5373 	}
5374 
5375 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5376 	/*
5377 	 * handle the various case of definitions...
5378 	 */
5379 	if (isParameter) {
5380 	    if ((RAW == '"') || (RAW == '\'')) {
5381 	        value = xmlParseEntityValue(ctxt, &orig);
5382 		if (value) {
5383 		    if ((ctxt->sax != NULL) &&
5384 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5385 			ctxt->sax->entityDecl(ctxt->userData, name,
5386 		                    XML_INTERNAL_PARAMETER_ENTITY,
5387 				    NULL, NULL, value);
5388 		}
5389 	    } else {
5390 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5391 		if ((URI == NULL) && (literal == NULL)) {
5392 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5393 		}
5394 		if (URI) {
5395 		    xmlURIPtr uri;
5396 
5397 		    uri = xmlParseURI((const char *) URI);
5398 		    if (uri == NULL) {
5399 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5400 				     "Invalid URI: %s\n", URI);
5401 			/*
5402 			 * This really ought to be a well formedness error
5403 			 * but the XML Core WG decided otherwise c.f. issue
5404 			 * E26 of the XML erratas.
5405 			 */
5406 		    } else {
5407 			if (uri->fragment != NULL) {
5408 			    /*
5409 			     * Okay this is foolish to block those but not
5410 			     * invalid URIs.
5411 			     */
5412 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5413 			} else {
5414 			    if ((ctxt->sax != NULL) &&
5415 				(!ctxt->disableSAX) &&
5416 				(ctxt->sax->entityDecl != NULL))
5417 				ctxt->sax->entityDecl(ctxt->userData, name,
5418 					    XML_EXTERNAL_PARAMETER_ENTITY,
5419 					    literal, URI, NULL);
5420 			}
5421 			xmlFreeURI(uri);
5422 		    }
5423 		}
5424 	    }
5425 	} else {
5426 	    if ((RAW == '"') || (RAW == '\'')) {
5427 	        value = xmlParseEntityValue(ctxt, &orig);
5428 		if ((ctxt->sax != NULL) &&
5429 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5430 		    ctxt->sax->entityDecl(ctxt->userData, name,
5431 				XML_INTERNAL_GENERAL_ENTITY,
5432 				NULL, NULL, value);
5433 		/*
5434 		 * For expat compatibility in SAX mode.
5435 		 */
5436 		if ((ctxt->myDoc == NULL) ||
5437 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5438 		    if (ctxt->myDoc == NULL) {
5439 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5440 			if (ctxt->myDoc == NULL) {
5441 			    xmlErrMemory(ctxt, "New Doc failed");
5442 			    return;
5443 			}
5444 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5445 		    }
5446 		    if (ctxt->myDoc->intSubset == NULL)
5447 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5448 					    BAD_CAST "fake", NULL, NULL);
5449 
5450 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5451 			              NULL, NULL, value);
5452 		}
5453 	    } else {
5454 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5455 		if ((URI == NULL) && (literal == NULL)) {
5456 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5457 		}
5458 		if (URI) {
5459 		    xmlURIPtr uri;
5460 
5461 		    uri = xmlParseURI((const char *)URI);
5462 		    if (uri == NULL) {
5463 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5464 				     "Invalid URI: %s\n", URI);
5465 			/*
5466 			 * This really ought to be a well formedness error
5467 			 * but the XML Core WG decided otherwise c.f. issue
5468 			 * E26 of the XML erratas.
5469 			 */
5470 		    } else {
5471 			if (uri->fragment != NULL) {
5472 			    /*
5473 			     * Okay this is foolish to block those but not
5474 			     * invalid URIs.
5475 			     */
5476 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5477 			}
5478 			xmlFreeURI(uri);
5479 		    }
5480 		}
5481 		if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5482 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5483 				   "Space required before 'NDATA'\n");
5484 		}
5485 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5486 		    SKIP(5);
5487 		    if (SKIP_BLANKS == 0) {
5488 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5489 				       "Space required after 'NDATA'\n");
5490 		    }
5491 		    ndata = xmlParseName(ctxt);
5492 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5493 		        (ctxt->sax->unparsedEntityDecl != NULL))
5494 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5495 				    literal, URI, ndata);
5496 		} else {
5497 		    if ((ctxt->sax != NULL) &&
5498 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499 			ctxt->sax->entityDecl(ctxt->userData, name,
5500 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5501 				    literal, URI, NULL);
5502 		    /*
5503 		     * For expat compatibility in SAX mode.
5504 		     * assuming the entity repalcement was asked for
5505 		     */
5506 		    if ((ctxt->replaceEntities != 0) &&
5507 			((ctxt->myDoc == NULL) ||
5508 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5509 			if (ctxt->myDoc == NULL) {
5510 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5511 			    if (ctxt->myDoc == NULL) {
5512 			        xmlErrMemory(ctxt, "New Doc failed");
5513 				return;
5514 			    }
5515 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5516 			}
5517 
5518 			if (ctxt->myDoc->intSubset == NULL)
5519 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5520 						BAD_CAST "fake", NULL, NULL);
5521 			xmlSAX2EntityDecl(ctxt, name,
5522 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5523 				          literal, URI, NULL);
5524 		    }
5525 		}
5526 	    }
5527 	}
5528 	if (ctxt->instate == XML_PARSER_EOF)
5529 	    goto done;
5530 	SKIP_BLANKS;
5531 	if (RAW != '>') {
5532 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5533 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5534 	    xmlHaltParser(ctxt);
5535 	} else {
5536 	    if (inputid != ctxt->input->id) {
5537 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5538 	                       "Entity declaration doesn't start and stop in"
5539                                " the same entity\n");
5540 	    }
5541 	    NEXT;
5542 	}
5543 	if (orig != NULL) {
5544 	    /*
5545 	     * Ugly mechanism to save the raw entity value.
5546 	     */
5547 	    xmlEntityPtr cur = NULL;
5548 
5549 	    if (isParameter) {
5550 	        if ((ctxt->sax != NULL) &&
5551 		    (ctxt->sax->getParameterEntity != NULL))
5552 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5553 	    } else {
5554 	        if ((ctxt->sax != NULL) &&
5555 		    (ctxt->sax->getEntity != NULL))
5556 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5557 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5558 		    cur = xmlSAX2GetEntity(ctxt, name);
5559 		}
5560 	    }
5561             if ((cur != NULL) && (cur->orig == NULL)) {
5562 		cur->orig = orig;
5563                 orig = NULL;
5564 	    }
5565 	}
5566 
5567 done:
5568 	if (value != NULL) xmlFree(value);
5569 	if (URI != NULL) xmlFree(URI);
5570 	if (literal != NULL) xmlFree(literal);
5571         if (orig != NULL) xmlFree(orig);
5572     }
5573 }
5574 
5575 /**
5576  * xmlParseDefaultDecl:
5577  * @ctxt:  an XML parser context
5578  * @value:  Receive a possible fixed default value for the attribute
5579  *
5580  * Parse an attribute default declaration
5581  *
5582  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5583  *
5584  * [ VC: Required Attribute ]
5585  * if the default declaration is the keyword #REQUIRED, then the
5586  * attribute must be specified for all elements of the type in the
5587  * attribute-list declaration.
5588  *
5589  * [ VC: Attribute Default Legal ]
5590  * The declared default value must meet the lexical constraints of
5591  * the declared attribute type c.f. xmlValidateAttributeDecl()
5592  *
5593  * [ VC: Fixed Attribute Default ]
5594  * if an attribute has a default value declared with the #FIXED
5595  * keyword, instances of that attribute must match the default value.
5596  *
5597  * [ WFC: No < in Attribute Values ]
5598  * handled in xmlParseAttValue()
5599  *
5600  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5601  *          or XML_ATTRIBUTE_FIXED.
5602  */
5603 
5604 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5605 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5606     int val;
5607     xmlChar *ret;
5608 
5609     *value = NULL;
5610     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5611 	SKIP(9);
5612 	return(XML_ATTRIBUTE_REQUIRED);
5613     }
5614     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5615 	SKIP(8);
5616 	return(XML_ATTRIBUTE_IMPLIED);
5617     }
5618     val = XML_ATTRIBUTE_NONE;
5619     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5620 	SKIP(6);
5621 	val = XML_ATTRIBUTE_FIXED;
5622 	if (SKIP_BLANKS == 0) {
5623 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5624 			   "Space required after '#FIXED'\n");
5625 	}
5626     }
5627     ret = xmlParseAttValue(ctxt);
5628     ctxt->instate = XML_PARSER_DTD;
5629     if (ret == NULL) {
5630 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5631 		       "Attribute default value declaration error\n");
5632     } else
5633         *value = ret;
5634     return(val);
5635 }
5636 
5637 /**
5638  * xmlParseNotationType:
5639  * @ctxt:  an XML parser context
5640  *
5641  * parse an Notation attribute type.
5642  *
5643  * Note: the leading 'NOTATION' S part has already being parsed...
5644  *
5645  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5646  *
5647  * [ VC: Notation Attributes ]
5648  * Values of this type must match one of the notation names included
5649  * in the declaration; all notation names in the declaration must be declared.
5650  *
5651  * Returns: the notation attribute tree built while parsing
5652  */
5653 
5654 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5655 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5656     const xmlChar *name;
5657     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5658 
5659     if (RAW != '(') {
5660 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5661 	return(NULL);
5662     }
5663     SHRINK;
5664     do {
5665         NEXT;
5666 	SKIP_BLANKS;
5667         name = xmlParseName(ctxt);
5668 	if (name == NULL) {
5669 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5670 			   "Name expected in NOTATION declaration\n");
5671             xmlFreeEnumeration(ret);
5672 	    return(NULL);
5673 	}
5674 	tmp = ret;
5675 	while (tmp != NULL) {
5676 	    if (xmlStrEqual(name, tmp->name)) {
5677 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5678 	  "standalone: attribute notation value token %s duplicated\n",
5679 				 name, NULL);
5680 		if (!xmlDictOwns(ctxt->dict, name))
5681 		    xmlFree((xmlChar *) name);
5682 		break;
5683 	    }
5684 	    tmp = tmp->next;
5685 	}
5686 	if (tmp == NULL) {
5687 	    cur = xmlCreateEnumeration(name);
5688 	    if (cur == NULL) {
5689                 xmlFreeEnumeration(ret);
5690                 return(NULL);
5691             }
5692 	    if (last == NULL) ret = last = cur;
5693 	    else {
5694 		last->next = cur;
5695 		last = cur;
5696 	    }
5697 	}
5698 	SKIP_BLANKS;
5699     } while (RAW == '|');
5700     if (RAW != ')') {
5701 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5702         xmlFreeEnumeration(ret);
5703 	return(NULL);
5704     }
5705     NEXT;
5706     return(ret);
5707 }
5708 
5709 /**
5710  * xmlParseEnumerationType:
5711  * @ctxt:  an XML parser context
5712  *
5713  * parse an Enumeration attribute type.
5714  *
5715  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5716  *
5717  * [ VC: Enumeration ]
5718  * Values of this type must match one of the Nmtoken tokens in
5719  * the declaration
5720  *
5721  * Returns: the enumeration attribute tree built while parsing
5722  */
5723 
5724 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5725 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5726     xmlChar *name;
5727     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5728 
5729     if (RAW != '(') {
5730 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5731 	return(NULL);
5732     }
5733     SHRINK;
5734     do {
5735         NEXT;
5736 	SKIP_BLANKS;
5737         name = xmlParseNmtoken(ctxt);
5738 	if (name == NULL) {
5739 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5740 	    return(ret);
5741 	}
5742 	tmp = ret;
5743 	while (tmp != NULL) {
5744 	    if (xmlStrEqual(name, tmp->name)) {
5745 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5746 	  "standalone: attribute enumeration value token %s duplicated\n",
5747 				 name, NULL);
5748 		if (!xmlDictOwns(ctxt->dict, name))
5749 		    xmlFree(name);
5750 		break;
5751 	    }
5752 	    tmp = tmp->next;
5753 	}
5754 	if (tmp == NULL) {
5755 	    cur = xmlCreateEnumeration(name);
5756 	    if (!xmlDictOwns(ctxt->dict, name))
5757 		xmlFree(name);
5758 	    if (cur == NULL) {
5759                 xmlFreeEnumeration(ret);
5760                 return(NULL);
5761             }
5762 	    if (last == NULL) ret = last = cur;
5763 	    else {
5764 		last->next = cur;
5765 		last = cur;
5766 	    }
5767 	}
5768 	SKIP_BLANKS;
5769     } while (RAW == '|');
5770     if (RAW != ')') {
5771 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5772 	return(ret);
5773     }
5774     NEXT;
5775     return(ret);
5776 }
5777 
5778 /**
5779  * xmlParseEnumeratedType:
5780  * @ctxt:  an XML parser context
5781  * @tree:  the enumeration tree built while parsing
5782  *
5783  * parse an Enumerated attribute type.
5784  *
5785  * [57] EnumeratedType ::= NotationType | Enumeration
5786  *
5787  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5788  *
5789  *
5790  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5791  */
5792 
5793 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5794 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5795     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5796 	SKIP(8);
5797 	if (SKIP_BLANKS == 0) {
5798 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5799 			   "Space required after 'NOTATION'\n");
5800 	    return(0);
5801 	}
5802 	*tree = xmlParseNotationType(ctxt);
5803 	if (*tree == NULL) return(0);
5804 	return(XML_ATTRIBUTE_NOTATION);
5805     }
5806     *tree = xmlParseEnumerationType(ctxt);
5807     if (*tree == NULL) return(0);
5808     return(XML_ATTRIBUTE_ENUMERATION);
5809 }
5810 
5811 /**
5812  * xmlParseAttributeType:
5813  * @ctxt:  an XML parser context
5814  * @tree:  the enumeration tree built while parsing
5815  *
5816  * parse the Attribute list def for an element
5817  *
5818  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5819  *
5820  * [55] StringType ::= 'CDATA'
5821  *
5822  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5823  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5824  *
5825  * Validity constraints for attribute values syntax are checked in
5826  * xmlValidateAttributeValue()
5827  *
5828  * [ VC: ID ]
5829  * Values of type ID must match the Name production. A name must not
5830  * appear more than once in an XML document as a value of this type;
5831  * i.e., ID values must uniquely identify the elements which bear them.
5832  *
5833  * [ VC: One ID per Element Type ]
5834  * No element type may have more than one ID attribute specified.
5835  *
5836  * [ VC: ID Attribute Default ]
5837  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5838  *
5839  * [ VC: IDREF ]
5840  * Values of type IDREF must match the Name production, and values
5841  * of type IDREFS must match Names; each IDREF Name must match the value
5842  * of an ID attribute on some element in the XML document; i.e. IDREF
5843  * values must match the value of some ID attribute.
5844  *
5845  * [ VC: Entity Name ]
5846  * Values of type ENTITY must match the Name production, values
5847  * of type ENTITIES must match Names; each Entity Name must match the
5848  * name of an unparsed entity declared in the DTD.
5849  *
5850  * [ VC: Name Token ]
5851  * Values of type NMTOKEN must match the Nmtoken production; values
5852  * of type NMTOKENS must match Nmtokens.
5853  *
5854  * Returns the attribute type
5855  */
5856 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5857 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5858     SHRINK;
5859     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5860 	SKIP(5);
5861 	return(XML_ATTRIBUTE_CDATA);
5862      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5863 	SKIP(6);
5864 	return(XML_ATTRIBUTE_IDREFS);
5865      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5866 	SKIP(5);
5867 	return(XML_ATTRIBUTE_IDREF);
5868      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5869         SKIP(2);
5870 	return(XML_ATTRIBUTE_ID);
5871      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5872 	SKIP(6);
5873 	return(XML_ATTRIBUTE_ENTITY);
5874      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5875 	SKIP(8);
5876 	return(XML_ATTRIBUTE_ENTITIES);
5877      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5878 	SKIP(8);
5879 	return(XML_ATTRIBUTE_NMTOKENS);
5880      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5881 	SKIP(7);
5882 	return(XML_ATTRIBUTE_NMTOKEN);
5883      }
5884      return(xmlParseEnumeratedType(ctxt, tree));
5885 }
5886 
5887 /**
5888  * xmlParseAttributeListDecl:
5889  * @ctxt:  an XML parser context
5890  *
5891  * : parse the Attribute list def for an element
5892  *
5893  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5894  *
5895  * [53] AttDef ::= S Name S AttType S DefaultDecl
5896  *
5897  */
5898 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5899 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5900     const xmlChar *elemName;
5901     const xmlChar *attrName;
5902     xmlEnumerationPtr tree;
5903 
5904     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5905 	int inputid = ctxt->input->id;
5906 
5907 	SKIP(9);
5908 	if (SKIP_BLANKS == 0) {
5909 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5910 		                 "Space required after '<!ATTLIST'\n");
5911 	}
5912         elemName = xmlParseName(ctxt);
5913 	if (elemName == NULL) {
5914 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5915 			   "ATTLIST: no name for Element\n");
5916 	    return;
5917 	}
5918 	SKIP_BLANKS;
5919 	GROW;
5920 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5921 	    int type;
5922 	    int def;
5923 	    xmlChar *defaultValue = NULL;
5924 
5925 	    GROW;
5926             tree = NULL;
5927 	    attrName = xmlParseName(ctxt);
5928 	    if (attrName == NULL) {
5929 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5930 			       "ATTLIST: no name for Attribute\n");
5931 		break;
5932 	    }
5933 	    GROW;
5934 	    if (SKIP_BLANKS == 0) {
5935 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5936 		        "Space required after the attribute name\n");
5937 		break;
5938 	    }
5939 
5940 	    type = xmlParseAttributeType(ctxt, &tree);
5941 	    if (type <= 0) {
5942 	        break;
5943 	    }
5944 
5945 	    GROW;
5946 	    if (SKIP_BLANKS == 0) {
5947 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5948 			       "Space required after the attribute type\n");
5949 	        if (tree != NULL)
5950 		    xmlFreeEnumeration(tree);
5951 		break;
5952 	    }
5953 
5954 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
5955 	    if (def <= 0) {
5956                 if (defaultValue != NULL)
5957 		    xmlFree(defaultValue);
5958 	        if (tree != NULL)
5959 		    xmlFreeEnumeration(tree);
5960 	        break;
5961 	    }
5962 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5963 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
5964 
5965 	    GROW;
5966             if (RAW != '>') {
5967 		if (SKIP_BLANKS == 0) {
5968 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5969 			"Space required after the attribute default value\n");
5970 		    if (defaultValue != NULL)
5971 			xmlFree(defaultValue);
5972 		    if (tree != NULL)
5973 			xmlFreeEnumeration(tree);
5974 		    break;
5975 		}
5976 	    }
5977 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5978 		(ctxt->sax->attributeDecl != NULL))
5979 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5980 	                        type, def, defaultValue, tree);
5981 	    else if (tree != NULL)
5982 		xmlFreeEnumeration(tree);
5983 
5984 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
5985 	        (def != XML_ATTRIBUTE_IMPLIED) &&
5986 		(def != XML_ATTRIBUTE_REQUIRED)) {
5987 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5988 	    }
5989 	    if (ctxt->sax2) {
5990 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5991 	    }
5992 	    if (defaultValue != NULL)
5993 	        xmlFree(defaultValue);
5994 	    GROW;
5995 	}
5996 	if (RAW == '>') {
5997 	    if (inputid != ctxt->input->id) {
5998 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5999                                "Attribute list declaration doesn't start and"
6000                                " stop in the same entity\n");
6001 	    }
6002 	    NEXT;
6003 	}
6004     }
6005 }
6006 
6007 /**
6008  * xmlParseElementMixedContentDecl:
6009  * @ctxt:  an XML parser context
6010  * @inputchk:  the input used for the current entity, needed for boundary checks
6011  *
6012  * parse the declaration for a Mixed Element content
6013  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6014  *
6015  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6016  *                '(' S? '#PCDATA' S? ')'
6017  *
6018  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6019  *
6020  * [ VC: No Duplicate Types ]
6021  * The same name must not appear more than once in a single
6022  * mixed-content declaration.
6023  *
6024  * returns: the list of the xmlElementContentPtr describing the element choices
6025  */
6026 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6027 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6028     xmlElementContentPtr ret = NULL, cur = NULL, n;
6029     const xmlChar *elem = NULL;
6030 
6031     GROW;
6032     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6033 	SKIP(7);
6034 	SKIP_BLANKS;
6035 	SHRINK;
6036 	if (RAW == ')') {
6037 	    if (ctxt->input->id != inputchk) {
6038 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6039                                "Element content declaration doesn't start and"
6040                                " stop in the same entity\n");
6041 	    }
6042 	    NEXT;
6043 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6044 	    if (ret == NULL)
6045 	        return(NULL);
6046 	    if (RAW == '*') {
6047 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6048 		NEXT;
6049 	    }
6050 	    return(ret);
6051 	}
6052 	if ((RAW == '(') || (RAW == '|')) {
6053 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6054 	    if (ret == NULL) return(NULL);
6055 	}
6056 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6057 	    NEXT;
6058 	    if (elem == NULL) {
6059 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6060 		if (ret == NULL) return(NULL);
6061 		ret->c1 = cur;
6062 		if (cur != NULL)
6063 		    cur->parent = ret;
6064 		cur = ret;
6065 	    } else {
6066 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6067 		if (n == NULL) return(NULL);
6068 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6069 		if (n->c1 != NULL)
6070 		    n->c1->parent = n;
6071 	        cur->c2 = n;
6072 		if (n != NULL)
6073 		    n->parent = cur;
6074 		cur = n;
6075 	    }
6076 	    SKIP_BLANKS;
6077 	    elem = xmlParseName(ctxt);
6078 	    if (elem == NULL) {
6079 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6080 			"xmlParseElementMixedContentDecl : Name expected\n");
6081 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6082 		return(NULL);
6083 	    }
6084 	    SKIP_BLANKS;
6085 	    GROW;
6086 	}
6087 	if ((RAW == ')') && (NXT(1) == '*')) {
6088 	    if (elem != NULL) {
6089 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6090 		                               XML_ELEMENT_CONTENT_ELEMENT);
6091 		if (cur->c2 != NULL)
6092 		    cur->c2->parent = cur;
6093             }
6094             if (ret != NULL)
6095                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6096 	    if (ctxt->input->id != inputchk) {
6097 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6098                                "Element content declaration doesn't start and"
6099                                " stop in the same entity\n");
6100 	    }
6101 	    SKIP(2);
6102 	} else {
6103 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6104 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6105 	    return(NULL);
6106 	}
6107 
6108     } else {
6109 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6110     }
6111     return(ret);
6112 }
6113 
6114 /**
6115  * xmlParseElementChildrenContentDeclPriv:
6116  * @ctxt:  an XML parser context
6117  * @inputchk:  the input used for the current entity, needed for boundary checks
6118  * @depth: the level of recursion
6119  *
6120  * parse the declaration for a Mixed Element content
6121  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6122  *
6123  *
6124  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6125  *
6126  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6127  *
6128  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6129  *
6130  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6131  *
6132  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6133  * TODO Parameter-entity replacement text must be properly nested
6134  *	with parenthesized groups. That is to say, if either of the
6135  *	opening or closing parentheses in a choice, seq, or Mixed
6136  *	construct is contained in the replacement text for a parameter
6137  *	entity, both must be contained in the same replacement text. For
6138  *	interoperability, if a parameter-entity reference appears in a
6139  *	choice, seq, or Mixed construct, its replacement text should not
6140  *	be empty, and neither the first nor last non-blank character of
6141  *	the replacement text should be a connector (| or ,).
6142  *
6143  * Returns the tree of xmlElementContentPtr describing the element
6144  *          hierarchy.
6145  */
6146 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6147 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6148                                        int depth) {
6149     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6150     const xmlChar *elem;
6151     xmlChar type = 0;
6152 
6153     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6154         (depth >  2048)) {
6155         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6156 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6157                           depth);
6158 	return(NULL);
6159     }
6160     SKIP_BLANKS;
6161     GROW;
6162     if (RAW == '(') {
6163 	int inputid = ctxt->input->id;
6164 
6165         /* Recurse on first child */
6166 	NEXT;
6167 	SKIP_BLANKS;
6168         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6169                                                            depth + 1);
6170 	SKIP_BLANKS;
6171 	GROW;
6172     } else {
6173 	elem = xmlParseName(ctxt);
6174 	if (elem == NULL) {
6175 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6176 	    return(NULL);
6177 	}
6178         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6179 	if (cur == NULL) {
6180 	    xmlErrMemory(ctxt, NULL);
6181 	    return(NULL);
6182 	}
6183 	GROW;
6184 	if (RAW == '?') {
6185 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6186 	    NEXT;
6187 	} else if (RAW == '*') {
6188 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6189 	    NEXT;
6190 	} else if (RAW == '+') {
6191 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6192 	    NEXT;
6193 	} else {
6194 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6195 	}
6196 	GROW;
6197     }
6198     SKIP_BLANKS;
6199     SHRINK;
6200     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6201         /*
6202 	 * Each loop we parse one separator and one element.
6203 	 */
6204         if (RAW == ',') {
6205 	    if (type == 0) type = CUR;
6206 
6207 	    /*
6208 	     * Detect "Name | Name , Name" error
6209 	     */
6210 	    else if (type != CUR) {
6211 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6212 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6213 		                  type);
6214 		if ((last != NULL) && (last != ret))
6215 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6216 		if (ret != NULL)
6217 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6218 		return(NULL);
6219 	    }
6220 	    NEXT;
6221 
6222 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6223 	    if (op == NULL) {
6224 		if ((last != NULL) && (last != ret))
6225 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6226 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6227 		return(NULL);
6228 	    }
6229 	    if (last == NULL) {
6230 		op->c1 = ret;
6231 		if (ret != NULL)
6232 		    ret->parent = op;
6233 		ret = cur = op;
6234 	    } else {
6235 	        cur->c2 = op;
6236 		if (op != NULL)
6237 		    op->parent = cur;
6238 		op->c1 = last;
6239 		if (last != NULL)
6240 		    last->parent = op;
6241 		cur =op;
6242 		last = NULL;
6243 	    }
6244 	} else if (RAW == '|') {
6245 	    if (type == 0) type = CUR;
6246 
6247 	    /*
6248 	     * Detect "Name , Name | Name" error
6249 	     */
6250 	    else if (type != CUR) {
6251 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6252 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6253 				  type);
6254 		if ((last != NULL) && (last != ret))
6255 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6256 		if (ret != NULL)
6257 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6258 		return(NULL);
6259 	    }
6260 	    NEXT;
6261 
6262 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6263 	    if (op == NULL) {
6264 		if ((last != NULL) && (last != ret))
6265 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6266 		if (ret != NULL)
6267 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6268 		return(NULL);
6269 	    }
6270 	    if (last == NULL) {
6271 		op->c1 = ret;
6272 		if (ret != NULL)
6273 		    ret->parent = op;
6274 		ret = cur = op;
6275 	    } else {
6276 	        cur->c2 = op;
6277 		if (op != NULL)
6278 		    op->parent = cur;
6279 		op->c1 = last;
6280 		if (last != NULL)
6281 		    last->parent = op;
6282 		cur =op;
6283 		last = NULL;
6284 	    }
6285 	} else {
6286 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6287 	    if ((last != NULL) && (last != ret))
6288 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6289 	    if (ret != NULL)
6290 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6291 	    return(NULL);
6292 	}
6293 	GROW;
6294 	SKIP_BLANKS;
6295 	GROW;
6296 	if (RAW == '(') {
6297 	    int inputid = ctxt->input->id;
6298 	    /* Recurse on second child */
6299 	    NEXT;
6300 	    SKIP_BLANKS;
6301 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6302                                                           depth + 1);
6303 	    SKIP_BLANKS;
6304 	} else {
6305 	    elem = xmlParseName(ctxt);
6306 	    if (elem == NULL) {
6307 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6308 		if (ret != NULL)
6309 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6310 		return(NULL);
6311 	    }
6312 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6313 	    if (last == NULL) {
6314 		if (ret != NULL)
6315 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6316 		return(NULL);
6317 	    }
6318 	    if (RAW == '?') {
6319 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6320 		NEXT;
6321 	    } else if (RAW == '*') {
6322 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6323 		NEXT;
6324 	    } else if (RAW == '+') {
6325 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6326 		NEXT;
6327 	    } else {
6328 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6329 	    }
6330 	}
6331 	SKIP_BLANKS;
6332 	GROW;
6333     }
6334     if ((cur != NULL) && (last != NULL)) {
6335         cur->c2 = last;
6336 	if (last != NULL)
6337 	    last->parent = cur;
6338     }
6339     if (ctxt->input->id != inputchk) {
6340 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6341                        "Element content declaration doesn't start and stop in"
6342                        " the same entity\n");
6343     }
6344     NEXT;
6345     if (RAW == '?') {
6346 	if (ret != NULL) {
6347 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6348 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6349 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6350 	    else
6351 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6352 	}
6353 	NEXT;
6354     } else if (RAW == '*') {
6355 	if (ret != NULL) {
6356 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6357 	    cur = ret;
6358 	    /*
6359 	     * Some normalization:
6360 	     * (a | b* | c?)* == (a | b | c)*
6361 	     */
6362 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6363 		if ((cur->c1 != NULL) &&
6364 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6365 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6366 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6367 		if ((cur->c2 != NULL) &&
6368 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6369 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6370 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6371 		cur = cur->c2;
6372 	    }
6373 	}
6374 	NEXT;
6375     } else if (RAW == '+') {
6376 	if (ret != NULL) {
6377 	    int found = 0;
6378 
6379 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6380 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6381 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6382 	    else
6383 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6384 	    /*
6385 	     * Some normalization:
6386 	     * (a | b*)+ == (a | b)*
6387 	     * (a | b?)+ == (a | b)*
6388 	     */
6389 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6390 		if ((cur->c1 != NULL) &&
6391 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6392 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6393 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6394 		    found = 1;
6395 		}
6396 		if ((cur->c2 != NULL) &&
6397 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6398 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6399 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6400 		    found = 1;
6401 		}
6402 		cur = cur->c2;
6403 	    }
6404 	    if (found)
6405 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6406 	}
6407 	NEXT;
6408     }
6409     return(ret);
6410 }
6411 
6412 /**
6413  * xmlParseElementChildrenContentDecl:
6414  * @ctxt:  an XML parser context
6415  * @inputchk:  the input used for the current entity, needed for boundary checks
6416  *
6417  * parse the declaration for a Mixed Element content
6418  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6419  *
6420  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6421  *
6422  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6423  *
6424  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6425  *
6426  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6427  *
6428  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6429  * TODO Parameter-entity replacement text must be properly nested
6430  *	with parenthesized groups. That is to say, if either of the
6431  *	opening or closing parentheses in a choice, seq, or Mixed
6432  *	construct is contained in the replacement text for a parameter
6433  *	entity, both must be contained in the same replacement text. For
6434  *	interoperability, if a parameter-entity reference appears in a
6435  *	choice, seq, or Mixed construct, its replacement text should not
6436  *	be empty, and neither the first nor last non-blank character of
6437  *	the replacement text should be a connector (| or ,).
6438  *
6439  * Returns the tree of xmlElementContentPtr describing the element
6440  *          hierarchy.
6441  */
6442 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6443 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6444     /* stub left for API/ABI compat */
6445     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6446 }
6447 
6448 /**
6449  * xmlParseElementContentDecl:
6450  * @ctxt:  an XML parser context
6451  * @name:  the name of the element being defined.
6452  * @result:  the Element Content pointer will be stored here if any
6453  *
6454  * parse the declaration for an Element content either Mixed or Children,
6455  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6456  *
6457  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6458  *
6459  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6460  */
6461 
6462 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6463 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6464                            xmlElementContentPtr *result) {
6465 
6466     xmlElementContentPtr tree = NULL;
6467     int inputid = ctxt->input->id;
6468     int res;
6469 
6470     *result = NULL;
6471 
6472     if (RAW != '(') {
6473 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6474 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6475 	return(-1);
6476     }
6477     NEXT;
6478     GROW;
6479     if (ctxt->instate == XML_PARSER_EOF)
6480         return(-1);
6481     SKIP_BLANKS;
6482     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6483         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6484 	res = XML_ELEMENT_TYPE_MIXED;
6485     } else {
6486         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6487 	res = XML_ELEMENT_TYPE_ELEMENT;
6488     }
6489     SKIP_BLANKS;
6490     *result = tree;
6491     return(res);
6492 }
6493 
6494 /**
6495  * xmlParseElementDecl:
6496  * @ctxt:  an XML parser context
6497  *
6498  * parse an Element declaration.
6499  *
6500  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6501  *
6502  * [ VC: Unique Element Type Declaration ]
6503  * No element type may be declared more than once
6504  *
6505  * Returns the type of the element, or -1 in case of error
6506  */
6507 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6508 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6509     const xmlChar *name;
6510     int ret = -1;
6511     xmlElementContentPtr content  = NULL;
6512 
6513     /* GROW; done in the caller */
6514     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6515 	int inputid = ctxt->input->id;
6516 
6517 	SKIP(9);
6518 	if (SKIP_BLANKS == 0) {
6519 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6520 		           "Space required after 'ELEMENT'\n");
6521 	    return(-1);
6522 	}
6523         name = xmlParseName(ctxt);
6524 	if (name == NULL) {
6525 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6526 			   "xmlParseElementDecl: no name for Element\n");
6527 	    return(-1);
6528 	}
6529 	if (SKIP_BLANKS == 0) {
6530 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6531 			   "Space required after the element name\n");
6532 	}
6533 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6534 	    SKIP(5);
6535 	    /*
6536 	     * Element must always be empty.
6537 	     */
6538 	    ret = XML_ELEMENT_TYPE_EMPTY;
6539 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6540 	           (NXT(2) == 'Y')) {
6541 	    SKIP(3);
6542 	    /*
6543 	     * Element is a generic container.
6544 	     */
6545 	    ret = XML_ELEMENT_TYPE_ANY;
6546 	} else if (RAW == '(') {
6547 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6548 	} else {
6549 	    /*
6550 	     * [ WFC: PEs in Internal Subset ] error handling.
6551 	     */
6552 	    if ((RAW == '%') && (ctxt->external == 0) &&
6553 	        (ctxt->inputNr == 1)) {
6554 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6555 	  "PEReference: forbidden within markup decl in internal subset\n");
6556 	    } else {
6557 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6558 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6559             }
6560 	    return(-1);
6561 	}
6562 
6563 	SKIP_BLANKS;
6564 
6565 	if (RAW != '>') {
6566 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6567 	    if (content != NULL) {
6568 		xmlFreeDocElementContent(ctxt->myDoc, content);
6569 	    }
6570 	} else {
6571 	    if (inputid != ctxt->input->id) {
6572 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6573                                "Element declaration doesn't start and stop in"
6574                                " the same entity\n");
6575 	    }
6576 
6577 	    NEXT;
6578 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6579 		(ctxt->sax->elementDecl != NULL)) {
6580 		if (content != NULL)
6581 		    content->parent = NULL;
6582 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6583 		                       content);
6584 		if ((content != NULL) && (content->parent == NULL)) {
6585 		    /*
6586 		     * this is a trick: if xmlAddElementDecl is called,
6587 		     * instead of copying the full tree it is plugged directly
6588 		     * if called from the parser. Avoid duplicating the
6589 		     * interfaces or change the API/ABI
6590 		     */
6591 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6592 		}
6593 	    } else if (content != NULL) {
6594 		xmlFreeDocElementContent(ctxt->myDoc, content);
6595 	    }
6596 	}
6597     }
6598     return(ret);
6599 }
6600 
6601 /**
6602  * xmlParseConditionalSections
6603  * @ctxt:  an XML parser context
6604  *
6605  * [61] conditionalSect ::= includeSect | ignoreSect
6606  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6607  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6608  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6609  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6610  */
6611 
6612 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6613 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6614     int id = ctxt->input->id;
6615 
6616     SKIP(3);
6617     SKIP_BLANKS;
6618     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6619 	SKIP(7);
6620 	SKIP_BLANKS;
6621 	if (RAW != '[') {
6622 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6623 	    xmlHaltParser(ctxt);
6624 	    return;
6625 	} else {
6626 	    if (ctxt->input->id != id) {
6627 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6628 	                       "All markup of the conditional section is not"
6629                                " in the same entity\n");
6630 	    }
6631 	    NEXT;
6632 	}
6633 	if (xmlParserDebugEntities) {
6634 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6635 		xmlGenericError(xmlGenericErrorContext,
6636 			"%s(%d): ", ctxt->input->filename,
6637 			ctxt->input->line);
6638 	    xmlGenericError(xmlGenericErrorContext,
6639 		    "Entering INCLUDE Conditional Section\n");
6640 	}
6641 
6642         SKIP_BLANKS;
6643         GROW;
6644 	while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6645 	        (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6646 	    const xmlChar *check = CUR_PTR;
6647 	    unsigned int cons = ctxt->input->consumed;
6648 
6649 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6650 		xmlParseConditionalSections(ctxt);
6651 	    } else
6652 		xmlParseMarkupDecl(ctxt);
6653 
6654             SKIP_BLANKS;
6655             GROW;
6656 
6657 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6658 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6659 		xmlHaltParser(ctxt);
6660 		break;
6661 	    }
6662 	}
6663 	if (xmlParserDebugEntities) {
6664 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6665 		xmlGenericError(xmlGenericErrorContext,
6666 			"%s(%d): ", ctxt->input->filename,
6667 			ctxt->input->line);
6668 	    xmlGenericError(xmlGenericErrorContext,
6669 		    "Leaving INCLUDE Conditional Section\n");
6670 	}
6671 
6672     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6673 	int state;
6674 	xmlParserInputState instate;
6675 	int depth = 0;
6676 
6677 	SKIP(6);
6678 	SKIP_BLANKS;
6679 	if (RAW != '[') {
6680 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6681 	    xmlHaltParser(ctxt);
6682 	    return;
6683 	} else {
6684 	    if (ctxt->input->id != id) {
6685 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6686 	                       "All markup of the conditional section is not"
6687                                " in the same entity\n");
6688 	    }
6689 	    NEXT;
6690 	}
6691 	if (xmlParserDebugEntities) {
6692 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6693 		xmlGenericError(xmlGenericErrorContext,
6694 			"%s(%d): ", ctxt->input->filename,
6695 			ctxt->input->line);
6696 	    xmlGenericError(xmlGenericErrorContext,
6697 		    "Entering IGNORE Conditional Section\n");
6698 	}
6699 
6700 	/*
6701 	 * Parse up to the end of the conditional section
6702 	 * But disable SAX event generating DTD building in the meantime
6703 	 */
6704 	state = ctxt->disableSAX;
6705 	instate = ctxt->instate;
6706 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6707 	ctxt->instate = XML_PARSER_IGNORE;
6708 
6709 	while (((depth >= 0) && (RAW != 0)) &&
6710                (ctxt->instate != XML_PARSER_EOF)) {
6711 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6712 	    depth++;
6713 	    SKIP(3);
6714 	    continue;
6715 	  }
6716 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6717 	    if (--depth >= 0) SKIP(3);
6718 	    continue;
6719 	  }
6720 	  NEXT;
6721 	  continue;
6722 	}
6723 
6724 	ctxt->disableSAX = state;
6725 	ctxt->instate = instate;
6726 
6727 	if (xmlParserDebugEntities) {
6728 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6729 		xmlGenericError(xmlGenericErrorContext,
6730 			"%s(%d): ", ctxt->input->filename,
6731 			ctxt->input->line);
6732 	    xmlGenericError(xmlGenericErrorContext,
6733 		    "Leaving IGNORE Conditional Section\n");
6734 	}
6735 
6736     } else {
6737 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6738 	xmlHaltParser(ctxt);
6739 	return;
6740     }
6741 
6742     if (RAW == 0)
6743         SHRINK;
6744 
6745     if (RAW == 0) {
6746 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6747     } else {
6748 	if (ctxt->input->id != id) {
6749 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 	                   "All markup of the conditional section is not in"
6751                            " the same entity\n");
6752 	}
6753 	if ((ctxt-> instate != XML_PARSER_EOF) &&
6754 	    ((ctxt->input->cur + 3) <= ctxt->input->end))
6755 	    SKIP(3);
6756     }
6757 }
6758 
6759 /**
6760  * xmlParseMarkupDecl:
6761  * @ctxt:  an XML parser context
6762  *
6763  * parse Markup declarations
6764  *
6765  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6766  *                     NotationDecl | PI | Comment
6767  *
6768  * [ VC: Proper Declaration/PE Nesting ]
6769  * Parameter-entity replacement text must be properly nested with
6770  * markup declarations. That is to say, if either the first character
6771  * or the last character of a markup declaration (markupdecl above) is
6772  * contained in the replacement text for a parameter-entity reference,
6773  * both must be contained in the same replacement text.
6774  *
6775  * [ WFC: PEs in Internal Subset ]
6776  * In the internal DTD subset, parameter-entity references can occur
6777  * only where markup declarations can occur, not within markup declarations.
6778  * (This does not apply to references that occur in external parameter
6779  * entities or to the external subset.)
6780  */
6781 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6782 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6783     GROW;
6784     if (CUR == '<') {
6785         if (NXT(1) == '!') {
6786 	    switch (NXT(2)) {
6787 	        case 'E':
6788 		    if (NXT(3) == 'L')
6789 			xmlParseElementDecl(ctxt);
6790 		    else if (NXT(3) == 'N')
6791 			xmlParseEntityDecl(ctxt);
6792 		    break;
6793 	        case 'A':
6794 		    xmlParseAttributeListDecl(ctxt);
6795 		    break;
6796 	        case 'N':
6797 		    xmlParseNotationDecl(ctxt);
6798 		    break;
6799 	        case '-':
6800 		    xmlParseComment(ctxt);
6801 		    break;
6802 		default:
6803 		    /* there is an error but it will be detected later */
6804 		    break;
6805 	    }
6806 	} else if (NXT(1) == '?') {
6807 	    xmlParsePI(ctxt);
6808 	}
6809     }
6810 
6811     /*
6812      * detect requirement to exit there and act accordingly
6813      * and avoid having instate overriden later on
6814      */
6815     if (ctxt->instate == XML_PARSER_EOF)
6816         return;
6817 
6818     /*
6819      * Conditional sections are allowed from entities included
6820      * by PE References in the internal subset.
6821      */
6822     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6823         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 	    xmlParseConditionalSections(ctxt);
6825 	}
6826     }
6827 
6828     ctxt->instate = XML_PARSER_DTD;
6829 }
6830 
6831 /**
6832  * xmlParseTextDecl:
6833  * @ctxt:  an XML parser context
6834  *
6835  * parse an XML declaration header for external entities
6836  *
6837  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6838  */
6839 
6840 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6841 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6842     xmlChar *version;
6843     const xmlChar *encoding;
6844 
6845     /*
6846      * We know that '<?xml' is here.
6847      */
6848     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6849 	SKIP(5);
6850     } else {
6851 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6852 	return;
6853     }
6854 
6855     if (SKIP_BLANKS == 0) {
6856 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6857 		       "Space needed after '<?xml'\n");
6858     }
6859 
6860     /*
6861      * We may have the VersionInfo here.
6862      */
6863     version = xmlParseVersionInfo(ctxt);
6864     if (version == NULL)
6865 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6866     else {
6867 	if (SKIP_BLANKS == 0) {
6868 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6869 		           "Space needed here\n");
6870 	}
6871     }
6872     ctxt->input->version = version;
6873 
6874     /*
6875      * We must have the encoding declaration
6876      */
6877     encoding = xmlParseEncodingDecl(ctxt);
6878     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6879 	/*
6880 	 * The XML REC instructs us to stop parsing right here
6881 	 */
6882         return;
6883     }
6884     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6885 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6886 		       "Missing encoding in text declaration\n");
6887     }
6888 
6889     SKIP_BLANKS;
6890     if ((RAW == '?') && (NXT(1) == '>')) {
6891         SKIP(2);
6892     } else if (RAW == '>') {
6893         /* Deprecated old WD ... */
6894 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6895 	NEXT;
6896     } else {
6897 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6898 	MOVETO_ENDTAG(CUR_PTR);
6899 	NEXT;
6900     }
6901 }
6902 
6903 /**
6904  * xmlParseExternalSubset:
6905  * @ctxt:  an XML parser context
6906  * @ExternalID: the external identifier
6907  * @SystemID: the system identifier (or URL)
6908  *
6909  * parse Markup declarations from an external subset
6910  *
6911  * [30] extSubset ::= textDecl? extSubsetDecl
6912  *
6913  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6914  */
6915 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6916 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6917                        const xmlChar *SystemID) {
6918     xmlDetectSAX2(ctxt);
6919     GROW;
6920 
6921     if ((ctxt->encoding == NULL) &&
6922         (ctxt->input->end - ctxt->input->cur >= 4)) {
6923         xmlChar start[4];
6924 	xmlCharEncoding enc;
6925 
6926 	start[0] = RAW;
6927 	start[1] = NXT(1);
6928 	start[2] = NXT(2);
6929 	start[3] = NXT(3);
6930 	enc = xmlDetectCharEncoding(start, 4);
6931 	if (enc != XML_CHAR_ENCODING_NONE)
6932 	    xmlSwitchEncoding(ctxt, enc);
6933     }
6934 
6935     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6936 	xmlParseTextDecl(ctxt);
6937 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6938 	    /*
6939 	     * The XML REC instructs us to stop parsing right here
6940 	     */
6941 	    xmlHaltParser(ctxt);
6942 	    return;
6943 	}
6944     }
6945     if (ctxt->myDoc == NULL) {
6946         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6947 	if (ctxt->myDoc == NULL) {
6948 	    xmlErrMemory(ctxt, "New Doc failed");
6949 	    return;
6950 	}
6951 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
6952     }
6953     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6954         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6955 
6956     ctxt->instate = XML_PARSER_DTD;
6957     ctxt->external = 1;
6958     SKIP_BLANKS;
6959     while (((RAW == '<') && (NXT(1) == '?')) ||
6960            ((RAW == '<') && (NXT(1) == '!')) ||
6961 	   (RAW == '%')) {
6962 	const xmlChar *check = CUR_PTR;
6963 	unsigned int cons = ctxt->input->consumed;
6964 
6965 	GROW;
6966         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6967 	    xmlParseConditionalSections(ctxt);
6968 	} else
6969 	    xmlParseMarkupDecl(ctxt);
6970         SKIP_BLANKS;
6971 
6972 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6973 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6974 	    break;
6975 	}
6976     }
6977 
6978     if (RAW != 0) {
6979 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6980     }
6981 
6982 }
6983 
6984 /**
6985  * xmlParseReference:
6986  * @ctxt:  an XML parser context
6987  *
6988  * parse and handle entity references in content, depending on the SAX
6989  * interface, this may end-up in a call to character() if this is a
6990  * CharRef, a predefined entity, if there is no reference() callback.
6991  * or if the parser was asked to switch to that mode.
6992  *
6993  * [67] Reference ::= EntityRef | CharRef
6994  */
6995 void
xmlParseReference(xmlParserCtxtPtr ctxt)6996 xmlParseReference(xmlParserCtxtPtr ctxt) {
6997     xmlEntityPtr ent;
6998     xmlChar *val;
6999     int was_checked;
7000     xmlNodePtr list = NULL;
7001     xmlParserErrors ret = XML_ERR_OK;
7002 
7003 
7004     if (RAW != '&')
7005         return;
7006 
7007     /*
7008      * Simple case of a CharRef
7009      */
7010     if (NXT(1) == '#') {
7011 	int i = 0;
7012 	xmlChar out[10];
7013 	int hex = NXT(2);
7014 	int value = xmlParseCharRef(ctxt);
7015 
7016 	if (value == 0)
7017 	    return;
7018 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7019 	    /*
7020 	     * So we are using non-UTF-8 buffers
7021 	     * Check that the char fit on 8bits, if not
7022 	     * generate a CharRef.
7023 	     */
7024 	    if (value <= 0xFF) {
7025 		out[0] = value;
7026 		out[1] = 0;
7027 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7028 		    (!ctxt->disableSAX))
7029 		    ctxt->sax->characters(ctxt->userData, out, 1);
7030 	    } else {
7031 		if ((hex == 'x') || (hex == 'X'))
7032 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7033 		else
7034 		    snprintf((char *)out, sizeof(out), "#%d", value);
7035 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7036 		    (!ctxt->disableSAX))
7037 		    ctxt->sax->reference(ctxt->userData, out);
7038 	    }
7039 	} else {
7040 	    /*
7041 	     * Just encode the value in UTF-8
7042 	     */
7043 	    COPY_BUF(0 ,out, i, value);
7044 	    out[i] = 0;
7045 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7046 		(!ctxt->disableSAX))
7047 		ctxt->sax->characters(ctxt->userData, out, i);
7048 	}
7049 	return;
7050     }
7051 
7052     /*
7053      * We are seeing an entity reference
7054      */
7055     ent = xmlParseEntityRef(ctxt);
7056     if (ent == NULL) return;
7057     if (!ctxt->wellFormed)
7058 	return;
7059     was_checked = ent->checked;
7060 
7061     /* special case of predefined entities */
7062     if ((ent->name == NULL) ||
7063         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7064 	val = ent->content;
7065 	if (val == NULL) return;
7066 	/*
7067 	 * inline the entity.
7068 	 */
7069 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7070 	    (!ctxt->disableSAX))
7071 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7072 	return;
7073     }
7074 
7075     /*
7076      * The first reference to the entity trigger a parsing phase
7077      * where the ent->children is filled with the result from
7078      * the parsing.
7079      * Note: external parsed entities will not be loaded, it is not
7080      * required for a non-validating parser, unless the parsing option
7081      * of validating, or substituting entities were given. Doing so is
7082      * far more secure as the parser will only process data coming from
7083      * the document entity by default.
7084      */
7085     if (((ent->checked == 0) ||
7086          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7087         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7088          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7089 	unsigned long oldnbent = ctxt->nbentities;
7090 
7091 	/*
7092 	 * This is a bit hackish but this seems the best
7093 	 * way to make sure both SAX and DOM entity support
7094 	 * behaves okay.
7095 	 */
7096 	void *user_data;
7097 	if (ctxt->userData == ctxt)
7098 	    user_data = NULL;
7099 	else
7100 	    user_data = ctxt->userData;
7101 
7102 	/*
7103 	 * Check that this entity is well formed
7104 	 * 4.3.2: An internal general parsed entity is well-formed
7105 	 * if its replacement text matches the production labeled
7106 	 * content.
7107 	 */
7108 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7109 	    ctxt->depth++;
7110 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7111 	                                              user_data, &list);
7112 	    ctxt->depth--;
7113 
7114 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7115 	    ctxt->depth++;
7116 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7117 	                                   user_data, ctxt->depth, ent->URI,
7118 					   ent->ExternalID, &list);
7119 	    ctxt->depth--;
7120 	} else {
7121 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7122 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7123 			 "invalid entity type found\n", NULL);
7124 	}
7125 
7126 	/*
7127 	 * Store the number of entities needing parsing for this entity
7128 	 * content and do checkings
7129 	 */
7130 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7131 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7132 	    ent->checked |= 1;
7133 	if (ret == XML_ERR_ENTITY_LOOP) {
7134 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7135 	    xmlFreeNodeList(list);
7136 	    return;
7137 	}
7138 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7139 	    xmlFreeNodeList(list);
7140 	    return;
7141 	}
7142 
7143 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7144 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7145 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7146 		(ent->children == NULL)) {
7147 		ent->children = list;
7148 		if (ctxt->replaceEntities) {
7149 		    /*
7150 		     * Prune it directly in the generated document
7151 		     * except for single text nodes.
7152 		     */
7153 		    if (((list->type == XML_TEXT_NODE) &&
7154 			 (list->next == NULL)) ||
7155 			(ctxt->parseMode == XML_PARSE_READER)) {
7156 			list->parent = (xmlNodePtr) ent;
7157 			list = NULL;
7158 			ent->owner = 1;
7159 		    } else {
7160 			ent->owner = 0;
7161 			while (list != NULL) {
7162 			    list->parent = (xmlNodePtr) ctxt->node;
7163 			    list->doc = ctxt->myDoc;
7164 			    if (list->next == NULL)
7165 				ent->last = list;
7166 			    list = list->next;
7167 			}
7168 			list = ent->children;
7169 #ifdef LIBXML_LEGACY_ENABLED
7170 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7171 			  xmlAddEntityReference(ent, list, NULL);
7172 #endif /* LIBXML_LEGACY_ENABLED */
7173 		    }
7174 		} else {
7175 		    ent->owner = 1;
7176 		    while (list != NULL) {
7177 			list->parent = (xmlNodePtr) ent;
7178 			xmlSetTreeDoc(list, ent->doc);
7179 			if (list->next == NULL)
7180 			    ent->last = list;
7181 			list = list->next;
7182 		    }
7183 		}
7184 	    } else {
7185 		xmlFreeNodeList(list);
7186 		list = NULL;
7187 	    }
7188 	} else if ((ret != XML_ERR_OK) &&
7189 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7190 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7191 		     "Entity '%s' failed to parse\n", ent->name);
7192             if (ent->content != NULL)
7193                 ent->content[0] = 0;
7194 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7195 	} else if (list != NULL) {
7196 	    xmlFreeNodeList(list);
7197 	    list = NULL;
7198 	}
7199 	if (ent->checked == 0)
7200 	    ent->checked = 2;
7201 
7202         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7203         was_checked = 0;
7204     } else if (ent->checked != 1) {
7205 	ctxt->nbentities += ent->checked / 2;
7206     }
7207 
7208     /*
7209      * Now that the entity content has been gathered
7210      * provide it to the application, this can take different forms based
7211      * on the parsing modes.
7212      */
7213     if (ent->children == NULL) {
7214 	/*
7215 	 * Probably running in SAX mode and the callbacks don't
7216 	 * build the entity content. So unless we already went
7217 	 * though parsing for first checking go though the entity
7218 	 * content to generate callbacks associated to the entity
7219 	 */
7220 	if (was_checked != 0) {
7221 	    void *user_data;
7222 	    /*
7223 	     * This is a bit hackish but this seems the best
7224 	     * way to make sure both SAX and DOM entity support
7225 	     * behaves okay.
7226 	     */
7227 	    if (ctxt->userData == ctxt)
7228 		user_data = NULL;
7229 	    else
7230 		user_data = ctxt->userData;
7231 
7232 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7233 		ctxt->depth++;
7234 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7235 				   ent->content, user_data, NULL);
7236 		ctxt->depth--;
7237 	    } else if (ent->etype ==
7238 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7239 		ctxt->depth++;
7240 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7241 			   ctxt->sax, user_data, ctxt->depth,
7242 			   ent->URI, ent->ExternalID, NULL);
7243 		ctxt->depth--;
7244 	    } else {
7245 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7246 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7247 			     "invalid entity type found\n", NULL);
7248 	    }
7249 	    if (ret == XML_ERR_ENTITY_LOOP) {
7250 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7251 		return;
7252 	    }
7253 	}
7254 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7255 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7256 	    /*
7257 	     * Entity reference callback comes second, it's somewhat
7258 	     * superfluous but a compatibility to historical behaviour
7259 	     */
7260 	    ctxt->sax->reference(ctxt->userData, ent->name);
7261 	}
7262 	return;
7263     }
7264 
7265     /*
7266      * If we didn't get any children for the entity being built
7267      */
7268     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7269 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7270 	/*
7271 	 * Create a node.
7272 	 */
7273 	ctxt->sax->reference(ctxt->userData, ent->name);
7274 	return;
7275     }
7276 
7277     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7278 	/*
7279 	 * There is a problem on the handling of _private for entities
7280 	 * (bug 155816): Should we copy the content of the field from
7281 	 * the entity (possibly overwriting some value set by the user
7282 	 * when a copy is created), should we leave it alone, or should
7283 	 * we try to take care of different situations?  The problem
7284 	 * is exacerbated by the usage of this field by the xmlReader.
7285 	 * To fix this bug, we look at _private on the created node
7286 	 * and, if it's NULL, we copy in whatever was in the entity.
7287 	 * If it's not NULL we leave it alone.  This is somewhat of a
7288 	 * hack - maybe we should have further tests to determine
7289 	 * what to do.
7290 	 */
7291 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7292 	    /*
7293 	     * Seems we are generating the DOM content, do
7294 	     * a simple tree copy for all references except the first
7295 	     * In the first occurrence list contains the replacement.
7296 	     */
7297 	    if (((list == NULL) && (ent->owner == 0)) ||
7298 		(ctxt->parseMode == XML_PARSE_READER)) {
7299 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7300 
7301 		/*
7302 		 * We are copying here, make sure there is no abuse
7303 		 */
7304 		ctxt->sizeentcopy += ent->length + 5;
7305 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7306 		    return;
7307 
7308 		/*
7309 		 * when operating on a reader, the entities definitions
7310 		 * are always owning the entities subtree.
7311 		if (ctxt->parseMode == XML_PARSE_READER)
7312 		    ent->owner = 1;
7313 		 */
7314 
7315 		cur = ent->children;
7316 		while (cur != NULL) {
7317 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7318 		    if (nw != NULL) {
7319 			if (nw->_private == NULL)
7320 			    nw->_private = cur->_private;
7321 			if (firstChild == NULL){
7322 			    firstChild = nw;
7323 			}
7324 			nw = xmlAddChild(ctxt->node, nw);
7325 		    }
7326 		    if (cur == ent->last) {
7327 			/*
7328 			 * needed to detect some strange empty
7329 			 * node cases in the reader tests
7330 			 */
7331 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7332 			    (nw != NULL) &&
7333 			    (nw->type == XML_ELEMENT_NODE) &&
7334 			    (nw->children == NULL))
7335 			    nw->extra = 1;
7336 
7337 			break;
7338 		    }
7339 		    cur = cur->next;
7340 		}
7341 #ifdef LIBXML_LEGACY_ENABLED
7342 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7343 		  xmlAddEntityReference(ent, firstChild, nw);
7344 #endif /* LIBXML_LEGACY_ENABLED */
7345 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7346 		xmlNodePtr nw = NULL, cur, next, last,
7347 			   firstChild = NULL;
7348 
7349 		/*
7350 		 * We are copying here, make sure there is no abuse
7351 		 */
7352 		ctxt->sizeentcopy += ent->length + 5;
7353 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7354 		    return;
7355 
7356 		/*
7357 		 * Copy the entity child list and make it the new
7358 		 * entity child list. The goal is to make sure any
7359 		 * ID or REF referenced will be the one from the
7360 		 * document content and not the entity copy.
7361 		 */
7362 		cur = ent->children;
7363 		ent->children = NULL;
7364 		last = ent->last;
7365 		ent->last = NULL;
7366 		while (cur != NULL) {
7367 		    next = cur->next;
7368 		    cur->next = NULL;
7369 		    cur->parent = NULL;
7370 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7371 		    if (nw != NULL) {
7372 			if (nw->_private == NULL)
7373 			    nw->_private = cur->_private;
7374 			if (firstChild == NULL){
7375 			    firstChild = cur;
7376 			}
7377 			xmlAddChild((xmlNodePtr) ent, nw);
7378 			xmlAddChild(ctxt->node, cur);
7379 		    }
7380 		    if (cur == last)
7381 			break;
7382 		    cur = next;
7383 		}
7384 		if (ent->owner == 0)
7385 		    ent->owner = 1;
7386 #ifdef LIBXML_LEGACY_ENABLED
7387 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7388 		  xmlAddEntityReference(ent, firstChild, nw);
7389 #endif /* LIBXML_LEGACY_ENABLED */
7390 	    } else {
7391 		const xmlChar *nbktext;
7392 
7393 		/*
7394 		 * the name change is to avoid coalescing of the
7395 		 * node with a possible previous text one which
7396 		 * would make ent->children a dangling pointer
7397 		 */
7398 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7399 					-1);
7400 		if (ent->children->type == XML_TEXT_NODE)
7401 		    ent->children->name = nbktext;
7402 		if ((ent->last != ent->children) &&
7403 		    (ent->last->type == XML_TEXT_NODE))
7404 		    ent->last->name = nbktext;
7405 		xmlAddChildList(ctxt->node, ent->children);
7406 	    }
7407 
7408 	    /*
7409 	     * This is to avoid a nasty side effect, see
7410 	     * characters() in SAX.c
7411 	     */
7412 	    ctxt->nodemem = 0;
7413 	    ctxt->nodelen = 0;
7414 	    return;
7415 	}
7416     }
7417 }
7418 
7419 /**
7420  * xmlParseEntityRef:
7421  * @ctxt:  an XML parser context
7422  *
7423  * parse ENTITY references declarations
7424  *
7425  * [68] EntityRef ::= '&' Name ';'
7426  *
7427  * [ WFC: Entity Declared ]
7428  * In a document without any DTD, a document with only an internal DTD
7429  * subset which contains no parameter entity references, or a document
7430  * with "standalone='yes'", the Name given in the entity reference
7431  * must match that in an entity declaration, except that well-formed
7432  * documents need not declare any of the following entities: amp, lt,
7433  * gt, apos, quot.  The declaration of a parameter entity must precede
7434  * any reference to it.  Similarly, the declaration of a general entity
7435  * must precede any reference to it which appears in a default value in an
7436  * attribute-list declaration. Note that if entities are declared in the
7437  * external subset or in external parameter entities, a non-validating
7438  * processor is not obligated to read and process their declarations;
7439  * for such documents, the rule that an entity must be declared is a
7440  * well-formedness constraint only if standalone='yes'.
7441  *
7442  * [ WFC: Parsed Entity ]
7443  * An entity reference must not contain the name of an unparsed entity
7444  *
7445  * Returns the xmlEntityPtr if found, or NULL otherwise.
7446  */
7447 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7448 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7449     const xmlChar *name;
7450     xmlEntityPtr ent = NULL;
7451 
7452     GROW;
7453     if (ctxt->instate == XML_PARSER_EOF)
7454         return(NULL);
7455 
7456     if (RAW != '&')
7457         return(NULL);
7458     NEXT;
7459     name = xmlParseName(ctxt);
7460     if (name == NULL) {
7461 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7462 		       "xmlParseEntityRef: no name\n");
7463         return(NULL);
7464     }
7465     if (RAW != ';') {
7466 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7467 	return(NULL);
7468     }
7469     NEXT;
7470 
7471     /*
7472      * Predefined entities override any extra definition
7473      */
7474     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7475         ent = xmlGetPredefinedEntity(name);
7476         if (ent != NULL)
7477             return(ent);
7478     }
7479 
7480     /*
7481      * Increase the number of entity references parsed
7482      */
7483     ctxt->nbentities++;
7484 
7485     /*
7486      * Ask first SAX for entity resolution, otherwise try the
7487      * entities which may have stored in the parser context.
7488      */
7489     if (ctxt->sax != NULL) {
7490 	if (ctxt->sax->getEntity != NULL)
7491 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7492 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7493 	    (ctxt->options & XML_PARSE_OLDSAX))
7494 	    ent = xmlGetPredefinedEntity(name);
7495 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496 	    (ctxt->userData==ctxt)) {
7497 	    ent = xmlSAX2GetEntity(ctxt, name);
7498 	}
7499     }
7500     if (ctxt->instate == XML_PARSER_EOF)
7501 	return(NULL);
7502     /*
7503      * [ WFC: Entity Declared ]
7504      * In a document without any DTD, a document with only an
7505      * internal DTD subset which contains no parameter entity
7506      * references, or a document with "standalone='yes'", the
7507      * Name given in the entity reference must match that in an
7508      * entity declaration, except that well-formed documents
7509      * need not declare any of the following entities: amp, lt,
7510      * gt, apos, quot.
7511      * The declaration of a parameter entity must precede any
7512      * reference to it.
7513      * Similarly, the declaration of a general entity must
7514      * precede any reference to it which appears in a default
7515      * value in an attribute-list declaration. Note that if
7516      * entities are declared in the external subset or in
7517      * external parameter entities, a non-validating processor
7518      * is not obligated to read and process their declarations;
7519      * for such documents, the rule that an entity must be
7520      * declared is a well-formedness constraint only if
7521      * standalone='yes'.
7522      */
7523     if (ent == NULL) {
7524 	if ((ctxt->standalone == 1) ||
7525 	    ((ctxt->hasExternalSubset == 0) &&
7526 	     (ctxt->hasPErefs == 0))) {
7527 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7528 		     "Entity '%s' not defined\n", name);
7529 	} else {
7530 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7531 		     "Entity '%s' not defined\n", name);
7532 	    if ((ctxt->inSubset == 0) &&
7533 		(ctxt->sax != NULL) &&
7534 		(ctxt->sax->reference != NULL)) {
7535 		ctxt->sax->reference(ctxt->userData, name);
7536 	    }
7537 	}
7538 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7539 	ctxt->valid = 0;
7540     }
7541 
7542     /*
7543      * [ WFC: Parsed Entity ]
7544      * An entity reference must not contain the name of an
7545      * unparsed entity
7546      */
7547     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7548 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7549 		 "Entity reference to unparsed entity %s\n", name);
7550     }
7551 
7552     /*
7553      * [ WFC: No External Entity References ]
7554      * Attribute values cannot contain direct or indirect
7555      * entity references to external entities.
7556      */
7557     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7558 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7559 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7560 	     "Attribute references external entity '%s'\n", name);
7561     }
7562     /*
7563      * [ WFC: No < in Attribute Values ]
7564      * The replacement text of any entity referred to directly or
7565      * indirectly in an attribute value (other than "&lt;") must
7566      * not contain a <.
7567      */
7568     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7569 	     (ent != NULL) &&
7570 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7571 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7572 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7573 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7574 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7575         }
7576     }
7577 
7578     /*
7579      * Internal check, no parameter entities here ...
7580      */
7581     else {
7582 	switch (ent->etype) {
7583 	    case XML_INTERNAL_PARAMETER_ENTITY:
7584 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7585 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7586 	     "Attempt to reference the parameter entity '%s'\n",
7587 			      name);
7588 	    break;
7589 	    default:
7590 	    break;
7591 	}
7592     }
7593 
7594     /*
7595      * [ WFC: No Recursion ]
7596      * A parsed entity must not contain a recursive reference
7597      * to itself, either directly or indirectly.
7598      * Done somewhere else
7599      */
7600     return(ent);
7601 }
7602 
7603 /**
7604  * xmlParseStringEntityRef:
7605  * @ctxt:  an XML parser context
7606  * @str:  a pointer to an index in the string
7607  *
7608  * parse ENTITY references declarations, but this version parses it from
7609  * a string value.
7610  *
7611  * [68] EntityRef ::= '&' Name ';'
7612  *
7613  * [ WFC: Entity Declared ]
7614  * In a document without any DTD, a document with only an internal DTD
7615  * subset which contains no parameter entity references, or a document
7616  * with "standalone='yes'", the Name given in the entity reference
7617  * must match that in an entity declaration, except that well-formed
7618  * documents need not declare any of the following entities: amp, lt,
7619  * gt, apos, quot.  The declaration of a parameter entity must precede
7620  * any reference to it.  Similarly, the declaration of a general entity
7621  * must precede any reference to it which appears in a default value in an
7622  * attribute-list declaration. Note that if entities are declared in the
7623  * external subset or in external parameter entities, a non-validating
7624  * processor is not obligated to read and process their declarations;
7625  * for such documents, the rule that an entity must be declared is a
7626  * well-formedness constraint only if standalone='yes'.
7627  *
7628  * [ WFC: Parsed Entity ]
7629  * An entity reference must not contain the name of an unparsed entity
7630  *
7631  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7632  * is updated to the current location in the string.
7633  */
7634 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7635 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7636     xmlChar *name;
7637     const xmlChar *ptr;
7638     xmlChar cur;
7639     xmlEntityPtr ent = NULL;
7640 
7641     if ((str == NULL) || (*str == NULL))
7642         return(NULL);
7643     ptr = *str;
7644     cur = *ptr;
7645     if (cur != '&')
7646 	return(NULL);
7647 
7648     ptr++;
7649     name = xmlParseStringName(ctxt, &ptr);
7650     if (name == NULL) {
7651 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7652 		       "xmlParseStringEntityRef: no name\n");
7653 	*str = ptr;
7654 	return(NULL);
7655     }
7656     if (*ptr != ';') {
7657 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7658         xmlFree(name);
7659 	*str = ptr;
7660 	return(NULL);
7661     }
7662     ptr++;
7663 
7664 
7665     /*
7666      * Predefined entities override any extra definition
7667      */
7668     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7669         ent = xmlGetPredefinedEntity(name);
7670         if (ent != NULL) {
7671             xmlFree(name);
7672             *str = ptr;
7673             return(ent);
7674         }
7675     }
7676 
7677     /*
7678      * Increate the number of entity references parsed
7679      */
7680     ctxt->nbentities++;
7681 
7682     /*
7683      * Ask first SAX for entity resolution, otherwise try the
7684      * entities which may have stored in the parser context.
7685      */
7686     if (ctxt->sax != NULL) {
7687 	if (ctxt->sax->getEntity != NULL)
7688 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7689 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7690 	    ent = xmlGetPredefinedEntity(name);
7691 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7692 	    ent = xmlSAX2GetEntity(ctxt, name);
7693 	}
7694     }
7695     if (ctxt->instate == XML_PARSER_EOF) {
7696 	xmlFree(name);
7697 	return(NULL);
7698     }
7699 
7700     /*
7701      * [ WFC: Entity Declared ]
7702      * In a document without any DTD, a document with only an
7703      * internal DTD subset which contains no parameter entity
7704      * references, or a document with "standalone='yes'", the
7705      * Name given in the entity reference must match that in an
7706      * entity declaration, except that well-formed documents
7707      * need not declare any of the following entities: amp, lt,
7708      * gt, apos, quot.
7709      * The declaration of a parameter entity must precede any
7710      * reference to it.
7711      * Similarly, the declaration of a general entity must
7712      * precede any reference to it which appears in a default
7713      * value in an attribute-list declaration. Note that if
7714      * entities are declared in the external subset or in
7715      * external parameter entities, a non-validating processor
7716      * is not obligated to read and process their declarations;
7717      * for such documents, the rule that an entity must be
7718      * declared is a well-formedness constraint only if
7719      * standalone='yes'.
7720      */
7721     if (ent == NULL) {
7722 	if ((ctxt->standalone == 1) ||
7723 	    ((ctxt->hasExternalSubset == 0) &&
7724 	     (ctxt->hasPErefs == 0))) {
7725 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7726 		     "Entity '%s' not defined\n", name);
7727 	} else {
7728 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7729 			  "Entity '%s' not defined\n",
7730 			  name);
7731 	}
7732 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7733 	/* TODO ? check regressions ctxt->valid = 0; */
7734     }
7735 
7736     /*
7737      * [ WFC: Parsed Entity ]
7738      * An entity reference must not contain the name of an
7739      * unparsed entity
7740      */
7741     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7742 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7743 		 "Entity reference to unparsed entity %s\n", name);
7744     }
7745 
7746     /*
7747      * [ WFC: No External Entity References ]
7748      * Attribute values cannot contain direct or indirect
7749      * entity references to external entities.
7750      */
7751     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7752 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7753 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7754 	 "Attribute references external entity '%s'\n", name);
7755     }
7756     /*
7757      * [ WFC: No < in Attribute Values ]
7758      * The replacement text of any entity referred to directly or
7759      * indirectly in an attribute value (other than "&lt;") must
7760      * not contain a <.
7761      */
7762     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7763 	     (ent != NULL) && (ent->content != NULL) &&
7764 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7765 	     (xmlStrchr(ent->content, '<'))) {
7766 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7767      "'<' in entity '%s' is not allowed in attributes values\n",
7768 			  name);
7769     }
7770 
7771     /*
7772      * Internal check, no parameter entities here ...
7773      */
7774     else {
7775 	switch (ent->etype) {
7776 	    case XML_INTERNAL_PARAMETER_ENTITY:
7777 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7778 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7779 	     "Attempt to reference the parameter entity '%s'\n",
7780 				  name);
7781 	    break;
7782 	    default:
7783 	    break;
7784 	}
7785     }
7786 
7787     /*
7788      * [ WFC: No Recursion ]
7789      * A parsed entity must not contain a recursive reference
7790      * to itself, either directly or indirectly.
7791      * Done somewhere else
7792      */
7793 
7794     xmlFree(name);
7795     *str = ptr;
7796     return(ent);
7797 }
7798 
7799 /**
7800  * xmlParsePEReference:
7801  * @ctxt:  an XML parser context
7802  *
7803  * parse PEReference declarations
7804  * The entity content is handled directly by pushing it's content as
7805  * a new input stream.
7806  *
7807  * [69] PEReference ::= '%' Name ';'
7808  *
7809  * [ WFC: No Recursion ]
7810  * A parsed entity must not contain a recursive
7811  * reference to itself, either directly or indirectly.
7812  *
7813  * [ WFC: Entity Declared ]
7814  * In a document without any DTD, a document with only an internal DTD
7815  * subset which contains no parameter entity references, or a document
7816  * with "standalone='yes'", ...  ... The declaration of a parameter
7817  * entity must precede any reference to it...
7818  *
7819  * [ VC: Entity Declared ]
7820  * In a document with an external subset or external parameter entities
7821  * with "standalone='no'", ...  ... The declaration of a parameter entity
7822  * must precede any reference to it...
7823  *
7824  * [ WFC: In DTD ]
7825  * Parameter-entity references may only appear in the DTD.
7826  * NOTE: misleading but this is handled.
7827  */
7828 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7829 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7830 {
7831     const xmlChar *name;
7832     xmlEntityPtr entity = NULL;
7833     xmlParserInputPtr input;
7834 
7835     if (RAW != '%')
7836         return;
7837     NEXT;
7838     name = xmlParseName(ctxt);
7839     if (name == NULL) {
7840 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7841 	return;
7842     }
7843     if (xmlParserDebugEntities)
7844 	xmlGenericError(xmlGenericErrorContext,
7845 		"PEReference: %s\n", name);
7846     if (RAW != ';') {
7847 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7848         return;
7849     }
7850 
7851     NEXT;
7852 
7853     /*
7854      * Increate the number of entity references parsed
7855      */
7856     ctxt->nbentities++;
7857 
7858     /*
7859      * Request the entity from SAX
7860      */
7861     if ((ctxt->sax != NULL) &&
7862 	(ctxt->sax->getParameterEntity != NULL))
7863 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7864     if (ctxt->instate == XML_PARSER_EOF)
7865 	return;
7866     if (entity == NULL) {
7867 	/*
7868 	 * [ WFC: Entity Declared ]
7869 	 * In a document without any DTD, a document with only an
7870 	 * internal DTD subset which contains no parameter entity
7871 	 * references, or a document with "standalone='yes'", ...
7872 	 * ... The declaration of a parameter entity must precede
7873 	 * any reference to it...
7874 	 */
7875 	if ((ctxt->standalone == 1) ||
7876 	    ((ctxt->hasExternalSubset == 0) &&
7877 	     (ctxt->hasPErefs == 0))) {
7878 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7879 			      "PEReference: %%%s; not found\n",
7880 			      name);
7881 	} else {
7882 	    /*
7883 	     * [ VC: Entity Declared ]
7884 	     * In a document with an external subset or external
7885 	     * parameter entities with "standalone='no'", ...
7886 	     * ... The declaration of a parameter entity must
7887 	     * precede any reference to it...
7888 	     */
7889             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7890                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7891                                  "PEReference: %%%s; not found\n",
7892                                  name, NULL);
7893             } else
7894                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895                               "PEReference: %%%s; not found\n",
7896                               name, NULL);
7897             ctxt->valid = 0;
7898 	}
7899 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
7900     } else {
7901 	/*
7902 	 * Internal checking in case the entity quest barfed
7903 	 */
7904 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7905 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7906 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 		  "Internal: %%%s; is not a parameter entity\n",
7908 			  name, NULL);
7909 	} else {
7910             xmlChar start[4];
7911             xmlCharEncoding enc;
7912 
7913 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7914 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7915 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7916 		((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7917 		((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7918 		(ctxt->replaceEntities == 0) &&
7919 		(ctxt->validate == 0))
7920 		return;
7921 
7922 	    input = xmlNewEntityInputStream(ctxt, entity);
7923 	    if (xmlPushInput(ctxt, input) < 0) {
7924                 xmlFreeInputStream(input);
7925 		return;
7926             }
7927 
7928 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7929                 /*
7930                  * Get the 4 first bytes and decode the charset
7931                  * if enc != XML_CHAR_ENCODING_NONE
7932                  * plug some encoding conversion routines.
7933                  * Note that, since we may have some non-UTF8
7934                  * encoding (like UTF16, bug 135229), the 'length'
7935                  * is not known, but we can calculate based upon
7936                  * the amount of data in the buffer.
7937                  */
7938                 GROW
7939                 if (ctxt->instate == XML_PARSER_EOF)
7940                     return;
7941                 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7942                     start[0] = RAW;
7943                     start[1] = NXT(1);
7944                     start[2] = NXT(2);
7945                     start[3] = NXT(3);
7946                     enc = xmlDetectCharEncoding(start, 4);
7947                     if (enc != XML_CHAR_ENCODING_NONE) {
7948                         xmlSwitchEncoding(ctxt, enc);
7949                     }
7950                 }
7951 
7952                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7953                     (IS_BLANK_CH(NXT(5)))) {
7954                     xmlParseTextDecl(ctxt);
7955                 }
7956             }
7957 	}
7958     }
7959     ctxt->hasPErefs = 1;
7960 }
7961 
7962 /**
7963  * xmlLoadEntityContent:
7964  * @ctxt:  an XML parser context
7965  * @entity: an unloaded system entity
7966  *
7967  * Load the original content of the given system entity from the
7968  * ExternalID/SystemID given. This is to be used for Included in Literal
7969  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7970  *
7971  * Returns 0 in case of success and -1 in case of failure
7972  */
7973 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7974 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7975     xmlParserInputPtr input;
7976     xmlBufferPtr buf;
7977     int l, c;
7978     int count = 0;
7979 
7980     if ((ctxt == NULL) || (entity == NULL) ||
7981         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7982 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7983 	(entity->content != NULL)) {
7984 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7985 	            "xmlLoadEntityContent parameter error");
7986         return(-1);
7987     }
7988 
7989     if (xmlParserDebugEntities)
7990 	xmlGenericError(xmlGenericErrorContext,
7991 		"Reading %s entity content input\n", entity->name);
7992 
7993     buf = xmlBufferCreate();
7994     if (buf == NULL) {
7995 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7996 	            "xmlLoadEntityContent parameter error");
7997         return(-1);
7998     }
7999 
8000     input = xmlNewEntityInputStream(ctxt, entity);
8001     if (input == NULL) {
8002 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8003 	            "xmlLoadEntityContent input error");
8004 	xmlBufferFree(buf);
8005         return(-1);
8006     }
8007 
8008     /*
8009      * Push the entity as the current input, read char by char
8010      * saving to the buffer until the end of the entity or an error
8011      */
8012     if (xmlPushInput(ctxt, input) < 0) {
8013         xmlBufferFree(buf);
8014 	return(-1);
8015     }
8016 
8017     GROW;
8018     c = CUR_CHAR(l);
8019     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8020            (IS_CHAR(c))) {
8021         xmlBufferAdd(buf, ctxt->input->cur, l);
8022 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8023 	    count = 0;
8024 	    GROW;
8025             if (ctxt->instate == XML_PARSER_EOF) {
8026                 xmlBufferFree(buf);
8027                 return(-1);
8028             }
8029 	}
8030 	NEXTL(l);
8031 	c = CUR_CHAR(l);
8032 	if (c == 0) {
8033 	    count = 0;
8034 	    GROW;
8035             if (ctxt->instate == XML_PARSER_EOF) {
8036                 xmlBufferFree(buf);
8037                 return(-1);
8038             }
8039 	    c = CUR_CHAR(l);
8040 	}
8041     }
8042 
8043     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8044         xmlPopInput(ctxt);
8045     } else if (!IS_CHAR(c)) {
8046         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8047                           "xmlLoadEntityContent: invalid char value %d\n",
8048 	                  c);
8049 	xmlBufferFree(buf);
8050 	return(-1);
8051     }
8052     entity->content = buf->content;
8053     buf->content = NULL;
8054     xmlBufferFree(buf);
8055 
8056     return(0);
8057 }
8058 
8059 /**
8060  * xmlParseStringPEReference:
8061  * @ctxt:  an XML parser context
8062  * @str:  a pointer to an index in the string
8063  *
8064  * parse PEReference declarations
8065  *
8066  * [69] PEReference ::= '%' Name ';'
8067  *
8068  * [ WFC: No Recursion ]
8069  * A parsed entity must not contain a recursive
8070  * reference to itself, either directly or indirectly.
8071  *
8072  * [ WFC: Entity Declared ]
8073  * In a document without any DTD, a document with only an internal DTD
8074  * subset which contains no parameter entity references, or a document
8075  * with "standalone='yes'", ...  ... The declaration of a parameter
8076  * entity must precede any reference to it...
8077  *
8078  * [ VC: Entity Declared ]
8079  * In a document with an external subset or external parameter entities
8080  * with "standalone='no'", ...  ... The declaration of a parameter entity
8081  * must precede any reference to it...
8082  *
8083  * [ WFC: In DTD ]
8084  * Parameter-entity references may only appear in the DTD.
8085  * NOTE: misleading but this is handled.
8086  *
8087  * Returns the string of the entity content.
8088  *         str is updated to the current value of the index
8089  */
8090 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8091 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8092     const xmlChar *ptr;
8093     xmlChar cur;
8094     xmlChar *name;
8095     xmlEntityPtr entity = NULL;
8096 
8097     if ((str == NULL) || (*str == NULL)) return(NULL);
8098     ptr = *str;
8099     cur = *ptr;
8100     if (cur != '%')
8101         return(NULL);
8102     ptr++;
8103     name = xmlParseStringName(ctxt, &ptr);
8104     if (name == NULL) {
8105 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8106 		       "xmlParseStringPEReference: no name\n");
8107 	*str = ptr;
8108 	return(NULL);
8109     }
8110     cur = *ptr;
8111     if (cur != ';') {
8112 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8113 	xmlFree(name);
8114 	*str = ptr;
8115 	return(NULL);
8116     }
8117     ptr++;
8118 
8119     /*
8120      * Increate the number of entity references parsed
8121      */
8122     ctxt->nbentities++;
8123 
8124     /*
8125      * Request the entity from SAX
8126      */
8127     if ((ctxt->sax != NULL) &&
8128 	(ctxt->sax->getParameterEntity != NULL))
8129 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130     if (ctxt->instate == XML_PARSER_EOF) {
8131 	xmlFree(name);
8132 	*str = ptr;
8133 	return(NULL);
8134     }
8135     if (entity == NULL) {
8136 	/*
8137 	 * [ WFC: Entity Declared ]
8138 	 * In a document without any DTD, a document with only an
8139 	 * internal DTD subset which contains no parameter entity
8140 	 * references, or a document with "standalone='yes'", ...
8141 	 * ... The declaration of a parameter entity must precede
8142 	 * any reference to it...
8143 	 */
8144 	if ((ctxt->standalone == 1) ||
8145 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8146 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8147 		 "PEReference: %%%s; not found\n", name);
8148 	} else {
8149 	    /*
8150 	     * [ VC: Entity Declared ]
8151 	     * In a document with an external subset or external
8152 	     * parameter entities with "standalone='no'", ...
8153 	     * ... The declaration of a parameter entity must
8154 	     * precede any reference to it...
8155 	     */
8156 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8157 			  "PEReference: %%%s; not found\n",
8158 			  name, NULL);
8159 	    ctxt->valid = 0;
8160 	}
8161 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8162     } else {
8163 	/*
8164 	 * Internal checking in case the entity quest barfed
8165 	 */
8166 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8167 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8168 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8169 			  "%%%s; is not a parameter entity\n",
8170 			  name, NULL);
8171 	}
8172     }
8173     ctxt->hasPErefs = 1;
8174     xmlFree(name);
8175     *str = ptr;
8176     return(entity);
8177 }
8178 
8179 /**
8180  * xmlParseDocTypeDecl:
8181  * @ctxt:  an XML parser context
8182  *
8183  * parse a DOCTYPE declaration
8184  *
8185  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8186  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8187  *
8188  * [ VC: Root Element Type ]
8189  * The Name in the document type declaration must match the element
8190  * type of the root element.
8191  */
8192 
8193 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8194 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8195     const xmlChar *name = NULL;
8196     xmlChar *ExternalID = NULL;
8197     xmlChar *URI = NULL;
8198 
8199     /*
8200      * We know that '<!DOCTYPE' has been detected.
8201      */
8202     SKIP(9);
8203 
8204     SKIP_BLANKS;
8205 
8206     /*
8207      * Parse the DOCTYPE name.
8208      */
8209     name = xmlParseName(ctxt);
8210     if (name == NULL) {
8211 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8212 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8213     }
8214     ctxt->intSubName = name;
8215 
8216     SKIP_BLANKS;
8217 
8218     /*
8219      * Check for SystemID and ExternalID
8220      */
8221     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8222 
8223     if ((URI != NULL) || (ExternalID != NULL)) {
8224         ctxt->hasExternalSubset = 1;
8225     }
8226     ctxt->extSubURI = URI;
8227     ctxt->extSubSystem = ExternalID;
8228 
8229     SKIP_BLANKS;
8230 
8231     /*
8232      * Create and update the internal subset.
8233      */
8234     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8235 	(!ctxt->disableSAX))
8236 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8237     if (ctxt->instate == XML_PARSER_EOF)
8238 	return;
8239 
8240     /*
8241      * Is there any internal subset declarations ?
8242      * they are handled separately in xmlParseInternalSubset()
8243      */
8244     if (RAW == '[')
8245 	return;
8246 
8247     /*
8248      * We should be at the end of the DOCTYPE declaration.
8249      */
8250     if (RAW != '>') {
8251 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8252     }
8253     NEXT;
8254 }
8255 
8256 /**
8257  * xmlParseInternalSubset:
8258  * @ctxt:  an XML parser context
8259  *
8260  * parse the internal subset declaration
8261  *
8262  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8263  */
8264 
8265 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8266 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8267     /*
8268      * Is there any DTD definition ?
8269      */
8270     if (RAW == '[') {
8271         int baseInputNr = ctxt->inputNr;
8272         ctxt->instate = XML_PARSER_DTD;
8273         NEXT;
8274 	/*
8275 	 * Parse the succession of Markup declarations and
8276 	 * PEReferences.
8277 	 * Subsequence (markupdecl | PEReference | S)*
8278 	 */
8279 	while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8280                (ctxt->instate != XML_PARSER_EOF)) {
8281 	    const xmlChar *check = CUR_PTR;
8282 	    unsigned int cons = ctxt->input->consumed;
8283 
8284 	    SKIP_BLANKS;
8285 	    xmlParseMarkupDecl(ctxt);
8286 	    xmlParsePEReference(ctxt);
8287 
8288 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8289 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8290 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8291                 if (ctxt->inputNr > baseInputNr)
8292                     xmlPopInput(ctxt);
8293                 else
8294 		    break;
8295 	    }
8296 	}
8297 	if (RAW == ']') {
8298 	    NEXT;
8299 	    SKIP_BLANKS;
8300 	}
8301     }
8302 
8303     /*
8304      * We should be at the end of the DOCTYPE declaration.
8305      */
8306     if (RAW != '>') {
8307 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8308 	return;
8309     }
8310     NEXT;
8311 }
8312 
8313 #ifdef LIBXML_SAX1_ENABLED
8314 /**
8315  * xmlParseAttribute:
8316  * @ctxt:  an XML parser context
8317  * @value:  a xmlChar ** used to store the value of the attribute
8318  *
8319  * parse an attribute
8320  *
8321  * [41] Attribute ::= Name Eq AttValue
8322  *
8323  * [ WFC: No External Entity References ]
8324  * Attribute values cannot contain direct or indirect entity references
8325  * to external entities.
8326  *
8327  * [ WFC: No < in Attribute Values ]
8328  * The replacement text of any entity referred to directly or indirectly in
8329  * an attribute value (other than "&lt;") must not contain a <.
8330  *
8331  * [ VC: Attribute Value Type ]
8332  * The attribute must have been declared; the value must be of the type
8333  * declared for it.
8334  *
8335  * [25] Eq ::= S? '=' S?
8336  *
8337  * With namespace:
8338  *
8339  * [NS 11] Attribute ::= QName Eq AttValue
8340  *
8341  * Also the case QName == xmlns:??? is handled independently as a namespace
8342  * definition.
8343  *
8344  * Returns the attribute name, and the value in *value.
8345  */
8346 
8347 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8348 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8349     const xmlChar *name;
8350     xmlChar *val;
8351 
8352     *value = NULL;
8353     GROW;
8354     name = xmlParseName(ctxt);
8355     if (name == NULL) {
8356 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8357 	               "error parsing attribute name\n");
8358         return(NULL);
8359     }
8360 
8361     /*
8362      * read the value
8363      */
8364     SKIP_BLANKS;
8365     if (RAW == '=') {
8366         NEXT;
8367 	SKIP_BLANKS;
8368 	val = xmlParseAttValue(ctxt);
8369 	ctxt->instate = XML_PARSER_CONTENT;
8370     } else {
8371 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8372 	       "Specification mandates value for attribute %s\n", name);
8373 	return(NULL);
8374     }
8375 
8376     /*
8377      * Check that xml:lang conforms to the specification
8378      * No more registered as an error, just generate a warning now
8379      * since this was deprecated in XML second edition
8380      */
8381     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8382 	if (!xmlCheckLanguageID(val)) {
8383 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8384 		          "Malformed value for xml:lang : %s\n",
8385 			  val, NULL);
8386 	}
8387     }
8388 
8389     /*
8390      * Check that xml:space conforms to the specification
8391      */
8392     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8393 	if (xmlStrEqual(val, BAD_CAST "default"))
8394 	    *(ctxt->space) = 0;
8395 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8396 	    *(ctxt->space) = 1;
8397 	else {
8398 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8399 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8400                                  val, NULL);
8401 	}
8402     }
8403 
8404     *value = val;
8405     return(name);
8406 }
8407 
8408 /**
8409  * xmlParseStartTag:
8410  * @ctxt:  an XML parser context
8411  *
8412  * parse a start of tag either for rule element or
8413  * EmptyElement. In both case we don't parse the tag closing chars.
8414  *
8415  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8416  *
8417  * [ WFC: Unique Att Spec ]
8418  * No attribute name may appear more than once in the same start-tag or
8419  * empty-element tag.
8420  *
8421  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8422  *
8423  * [ WFC: Unique Att Spec ]
8424  * No attribute name may appear more than once in the same start-tag or
8425  * empty-element tag.
8426  *
8427  * With namespace:
8428  *
8429  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8430  *
8431  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8432  *
8433  * Returns the element name parsed
8434  */
8435 
8436 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8437 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8438     const xmlChar *name;
8439     const xmlChar *attname;
8440     xmlChar *attvalue;
8441     const xmlChar **atts = ctxt->atts;
8442     int nbatts = 0;
8443     int maxatts = ctxt->maxatts;
8444     int i;
8445 
8446     if (RAW != '<') return(NULL);
8447     NEXT1;
8448 
8449     name = xmlParseName(ctxt);
8450     if (name == NULL) {
8451 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8452 	     "xmlParseStartTag: invalid element name\n");
8453         return(NULL);
8454     }
8455 
8456     /*
8457      * Now parse the attributes, it ends up with the ending
8458      *
8459      * (S Attribute)* S?
8460      */
8461     SKIP_BLANKS;
8462     GROW;
8463 
8464     while (((RAW != '>') &&
8465 	   ((RAW != '/') || (NXT(1) != '>')) &&
8466 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8467 	const xmlChar *q = CUR_PTR;
8468 	unsigned int cons = ctxt->input->consumed;
8469 
8470 	attname = xmlParseAttribute(ctxt, &attvalue);
8471         if ((attname != NULL) && (attvalue != NULL)) {
8472 	    /*
8473 	     * [ WFC: Unique Att Spec ]
8474 	     * No attribute name may appear more than once in the same
8475 	     * start-tag or empty-element tag.
8476 	     */
8477 	    for (i = 0; i < nbatts;i += 2) {
8478 	        if (xmlStrEqual(atts[i], attname)) {
8479 		    xmlErrAttributeDup(ctxt, NULL, attname);
8480 		    xmlFree(attvalue);
8481 		    goto failed;
8482 		}
8483 	    }
8484 	    /*
8485 	     * Add the pair to atts
8486 	     */
8487 	    if (atts == NULL) {
8488 	        maxatts = 22; /* allow for 10 attrs by default */
8489 	        atts = (const xmlChar **)
8490 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8491 		if (atts == NULL) {
8492 		    xmlErrMemory(ctxt, NULL);
8493 		    if (attvalue != NULL)
8494 			xmlFree(attvalue);
8495 		    goto failed;
8496 		}
8497 		ctxt->atts = atts;
8498 		ctxt->maxatts = maxatts;
8499 	    } else if (nbatts + 4 > maxatts) {
8500 	        const xmlChar **n;
8501 
8502 	        maxatts *= 2;
8503 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8504 					     maxatts * sizeof(const xmlChar *));
8505 		if (n == NULL) {
8506 		    xmlErrMemory(ctxt, NULL);
8507 		    if (attvalue != NULL)
8508 			xmlFree(attvalue);
8509 		    goto failed;
8510 		}
8511 		atts = n;
8512 		ctxt->atts = atts;
8513 		ctxt->maxatts = maxatts;
8514 	    }
8515 	    atts[nbatts++] = attname;
8516 	    atts[nbatts++] = attvalue;
8517 	    atts[nbatts] = NULL;
8518 	    atts[nbatts + 1] = NULL;
8519 	} else {
8520 	    if (attvalue != NULL)
8521 		xmlFree(attvalue);
8522 	}
8523 
8524 failed:
8525 
8526 	GROW
8527 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8528 	    break;
8529 	if (SKIP_BLANKS == 0) {
8530 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8531 			   "attributes construct error\n");
8532 	}
8533         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8534             (attname == NULL) && (attvalue == NULL)) {
8535 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8536 			   "xmlParseStartTag: problem parsing attributes\n");
8537 	    break;
8538 	}
8539 	SHRINK;
8540         GROW;
8541     }
8542 
8543     /*
8544      * SAX: Start of Element !
8545      */
8546     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8547 	(!ctxt->disableSAX)) {
8548 	if (nbatts > 0)
8549 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8550 	else
8551 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8552     }
8553 
8554     if (atts != NULL) {
8555         /* Free only the content strings */
8556         for (i = 1;i < nbatts;i+=2)
8557 	    if (atts[i] != NULL)
8558 	       xmlFree((xmlChar *) atts[i]);
8559     }
8560     return(name);
8561 }
8562 
8563 /**
8564  * xmlParseEndTag1:
8565  * @ctxt:  an XML parser context
8566  * @line:  line of the start tag
8567  * @nsNr:  number of namespaces on the start tag
8568  *
8569  * parse an end of tag
8570  *
8571  * [42] ETag ::= '</' Name S? '>'
8572  *
8573  * With namespace
8574  *
8575  * [NS 9] ETag ::= '</' QName S? '>'
8576  */
8577 
8578 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8579 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8580     const xmlChar *name;
8581 
8582     GROW;
8583     if ((RAW != '<') || (NXT(1) != '/')) {
8584 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8585 		       "xmlParseEndTag: '</' not found\n");
8586 	return;
8587     }
8588     SKIP(2);
8589 
8590     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8591 
8592     /*
8593      * We should definitely be at the ending "S? '>'" part
8594      */
8595     GROW;
8596     SKIP_BLANKS;
8597     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8598 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8599     } else
8600 	NEXT1;
8601 
8602     /*
8603      * [ WFC: Element Type Match ]
8604      * The Name in an element's end-tag must match the element type in the
8605      * start-tag.
8606      *
8607      */
8608     if (name != (xmlChar*)1) {
8609         if (name == NULL) name = BAD_CAST "unparseable";
8610         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8611 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8612 		                ctxt->name, line, name);
8613     }
8614 
8615     /*
8616      * SAX: End of Tag
8617      */
8618     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8619 	(!ctxt->disableSAX))
8620         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8621 
8622     namePop(ctxt);
8623     spacePop(ctxt);
8624     return;
8625 }
8626 
8627 /**
8628  * xmlParseEndTag:
8629  * @ctxt:  an XML parser context
8630  *
8631  * parse an end of tag
8632  *
8633  * [42] ETag ::= '</' Name S? '>'
8634  *
8635  * With namespace
8636  *
8637  * [NS 9] ETag ::= '</' QName S? '>'
8638  */
8639 
8640 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8641 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8642     xmlParseEndTag1(ctxt, 0);
8643 }
8644 #endif /* LIBXML_SAX1_ENABLED */
8645 
8646 /************************************************************************
8647  *									*
8648  *		      SAX 2 specific operations				*
8649  *									*
8650  ************************************************************************/
8651 
8652 /*
8653  * xmlGetNamespace:
8654  * @ctxt:  an XML parser context
8655  * @prefix:  the prefix to lookup
8656  *
8657  * Lookup the namespace name for the @prefix (which ca be NULL)
8658  * The prefix must come from the @ctxt->dict dictionary
8659  *
8660  * Returns the namespace name or NULL if not bound
8661  */
8662 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8663 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8664     int i;
8665 
8666     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8667     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8668         if (ctxt->nsTab[i] == prefix) {
8669 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8670 	        return(NULL);
8671 	    return(ctxt->nsTab[i + 1]);
8672 	}
8673     return(NULL);
8674 }
8675 
8676 /**
8677  * xmlParseQName:
8678  * @ctxt:  an XML parser context
8679  * @prefix:  pointer to store the prefix part
8680  *
8681  * parse an XML Namespace QName
8682  *
8683  * [6]  QName  ::= (Prefix ':')? LocalPart
8684  * [7]  Prefix  ::= NCName
8685  * [8]  LocalPart  ::= NCName
8686  *
8687  * Returns the Name parsed or NULL
8688  */
8689 
8690 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8691 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692     const xmlChar *l, *p;
8693 
8694     GROW;
8695 
8696     l = xmlParseNCName(ctxt);
8697     if (l == NULL) {
8698         if (CUR == ':') {
8699 	    l = xmlParseName(ctxt);
8700 	    if (l != NULL) {
8701 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8702 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8703 		*prefix = NULL;
8704 		return(l);
8705 	    }
8706 	}
8707         return(NULL);
8708     }
8709     if (CUR == ':') {
8710         NEXT;
8711 	p = l;
8712 	l = xmlParseNCName(ctxt);
8713 	if (l == NULL) {
8714 	    xmlChar *tmp;
8715 
8716             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8717 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8718 	    l = xmlParseNmtoken(ctxt);
8719 	    if (l == NULL)
8720 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8721 	    else {
8722 		tmp = xmlBuildQName(l, p, NULL, 0);
8723 		xmlFree((char *)l);
8724 	    }
8725 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8726 	    if (tmp != NULL) xmlFree(tmp);
8727 	    *prefix = NULL;
8728 	    return(p);
8729 	}
8730 	if (CUR == ':') {
8731 	    xmlChar *tmp;
8732 
8733             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8734 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8735 	    NEXT;
8736 	    tmp = (xmlChar *) xmlParseName(ctxt);
8737 	    if (tmp != NULL) {
8738 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8739 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8740 		if (tmp != NULL) xmlFree(tmp);
8741 		*prefix = p;
8742 		return(l);
8743 	    }
8744 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8745 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8746 	    if (tmp != NULL) xmlFree(tmp);
8747 	    *prefix = p;
8748 	    return(l);
8749 	}
8750 	*prefix = p;
8751     } else
8752         *prefix = NULL;
8753     return(l);
8754 }
8755 
8756 /**
8757  * xmlParseQNameAndCompare:
8758  * @ctxt:  an XML parser context
8759  * @name:  the localname
8760  * @prefix:  the prefix, if any.
8761  *
8762  * parse an XML name and compares for match
8763  * (specialized for endtag parsing)
8764  *
8765  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8766  * and the name for mismatch
8767  */
8768 
8769 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8770 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8771                         xmlChar const *prefix) {
8772     const xmlChar *cmp;
8773     const xmlChar *in;
8774     const xmlChar *ret;
8775     const xmlChar *prefix2;
8776 
8777     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8778 
8779     GROW;
8780     in = ctxt->input->cur;
8781 
8782     cmp = prefix;
8783     while (*in != 0 && *in == *cmp) {
8784 	++in;
8785 	++cmp;
8786     }
8787     if ((*cmp == 0) && (*in == ':')) {
8788         in++;
8789 	cmp = name;
8790 	while (*in != 0 && *in == *cmp) {
8791 	    ++in;
8792 	    ++cmp;
8793 	}
8794 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8795 	    /* success */
8796 	    ctxt->input->cur = in;
8797 	    return((const xmlChar*) 1);
8798 	}
8799     }
8800     /*
8801      * all strings coms from the dictionary, equality can be done directly
8802      */
8803     ret = xmlParseQName (ctxt, &prefix2);
8804     if ((ret == name) && (prefix == prefix2))
8805 	return((const xmlChar*) 1);
8806     return ret;
8807 }
8808 
8809 /**
8810  * xmlParseAttValueInternal:
8811  * @ctxt:  an XML parser context
8812  * @len:  attribute len result
8813  * @alloc:  whether the attribute was reallocated as a new string
8814  * @normalize:  if 1 then further non-CDATA normalization must be done
8815  *
8816  * parse a value for an attribute.
8817  * NOTE: if no normalization is needed, the routine will return pointers
8818  *       directly from the data buffer.
8819  *
8820  * 3.3.3 Attribute-Value Normalization:
8821  * Before the value of an attribute is passed to the application or
8822  * checked for validity, the XML processor must normalize it as follows:
8823  * - a character reference is processed by appending the referenced
8824  *   character to the attribute value
8825  * - an entity reference is processed by recursively processing the
8826  *   replacement text of the entity
8827  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8828  *   appending #x20 to the normalized value, except that only a single
8829  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8830  *   parsed entity or the literal entity value of an internal parsed entity
8831  * - other characters are processed by appending them to the normalized value
8832  * If the declared value is not CDATA, then the XML processor must further
8833  * process the normalized attribute value by discarding any leading and
8834  * trailing space (#x20) characters, and by replacing sequences of space
8835  * (#x20) characters by a single space (#x20) character.
8836  * All attributes for which no declaration has been read should be treated
8837  * by a non-validating parser as if declared CDATA.
8838  *
8839  * Returns the AttValue parsed or NULL. The value has to be freed by the
8840  *     caller if it was copied, this can be detected by val[*len] == 0.
8841  */
8842 
8843 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8844 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8845                          int normalize)
8846 {
8847     xmlChar limit = 0;
8848     const xmlChar *in = NULL, *start, *end, *last;
8849     xmlChar *ret = NULL;
8850     int line, col;
8851 
8852     GROW;
8853     in = (xmlChar *) CUR_PTR;
8854     line = ctxt->input->line;
8855     col = ctxt->input->col;
8856     if (*in != '"' && *in != '\'') {
8857         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8858         return (NULL);
8859     }
8860     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8861 
8862     /*
8863      * try to handle in this routine the most common case where no
8864      * allocation of a new string is required and where content is
8865      * pure ASCII.
8866      */
8867     limit = *in++;
8868     col++;
8869     end = ctxt->input->end;
8870     start = in;
8871     if (in >= end) {
8872         const xmlChar *oldbase = ctxt->input->base;
8873 	GROW;
8874 	if (oldbase != ctxt->input->base) {
8875 	    long delta = ctxt->input->base - oldbase;
8876 	    start = start + delta;
8877 	    in = in + delta;
8878 	}
8879 	end = ctxt->input->end;
8880     }
8881     if (normalize) {
8882         /*
8883 	 * Skip any leading spaces
8884 	 */
8885 	while ((in < end) && (*in != limit) &&
8886 	       ((*in == 0x20) || (*in == 0x9) ||
8887 	        (*in == 0xA) || (*in == 0xD))) {
8888 	    if (*in == 0xA) {
8889 	        line++; col = 1;
8890 	    } else {
8891 	        col++;
8892 	    }
8893 	    in++;
8894 	    start = in;
8895 	    if (in >= end) {
8896 		const xmlChar *oldbase = ctxt->input->base;
8897 		GROW;
8898                 if (ctxt->instate == XML_PARSER_EOF)
8899                     return(NULL);
8900 		if (oldbase != ctxt->input->base) {
8901 		    long delta = ctxt->input->base - oldbase;
8902 		    start = start + delta;
8903 		    in = in + delta;
8904 		}
8905 		end = ctxt->input->end;
8906                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8907                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8908                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8909                                    "AttValue length too long\n");
8910                     return(NULL);
8911                 }
8912 	    }
8913 	}
8914 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8915 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8916 	    col++;
8917 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
8918 	    if (in >= end) {
8919 		const xmlChar *oldbase = ctxt->input->base;
8920 		GROW;
8921                 if (ctxt->instate == XML_PARSER_EOF)
8922                     return(NULL);
8923 		if (oldbase != ctxt->input->base) {
8924 		    long delta = ctxt->input->base - oldbase;
8925 		    start = start + delta;
8926 		    in = in + delta;
8927 		}
8928 		end = ctxt->input->end;
8929                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8930                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8931                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8932                                    "AttValue length too long\n");
8933                     return(NULL);
8934                 }
8935 	    }
8936 	}
8937 	last = in;
8938 	/*
8939 	 * skip the trailing blanks
8940 	 */
8941 	while ((last[-1] == 0x20) && (last > start)) last--;
8942 	while ((in < end) && (*in != limit) &&
8943 	       ((*in == 0x20) || (*in == 0x9) ||
8944 	        (*in == 0xA) || (*in == 0xD))) {
8945 	    if (*in == 0xA) {
8946 	        line++, col = 1;
8947 	    } else {
8948 	        col++;
8949 	    }
8950 	    in++;
8951 	    if (in >= end) {
8952 		const xmlChar *oldbase = ctxt->input->base;
8953 		GROW;
8954                 if (ctxt->instate == XML_PARSER_EOF)
8955                     return(NULL);
8956 		if (oldbase != ctxt->input->base) {
8957 		    long delta = ctxt->input->base - oldbase;
8958 		    start = start + delta;
8959 		    in = in + delta;
8960 		    last = last + delta;
8961 		}
8962 		end = ctxt->input->end;
8963                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8964                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8965                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8966                                    "AttValue length too long\n");
8967                     return(NULL);
8968                 }
8969 	    }
8970 	}
8971         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8972             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8973             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8974                            "AttValue length too long\n");
8975             return(NULL);
8976         }
8977 	if (*in != limit) goto need_complex;
8978     } else {
8979 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8980 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8981 	    in++;
8982 	    col++;
8983 	    if (in >= end) {
8984 		const xmlChar *oldbase = ctxt->input->base;
8985 		GROW;
8986                 if (ctxt->instate == XML_PARSER_EOF)
8987                     return(NULL);
8988 		if (oldbase != ctxt->input->base) {
8989 		    long delta = ctxt->input->base - oldbase;
8990 		    start = start + delta;
8991 		    in = in + delta;
8992 		}
8993 		end = ctxt->input->end;
8994                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8995                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8996                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8997                                    "AttValue length too long\n");
8998                     return(NULL);
8999                 }
9000 	    }
9001 	}
9002 	last = in;
9003         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9004             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9005             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9006                            "AttValue length too long\n");
9007             return(NULL);
9008         }
9009 	if (*in != limit) goto need_complex;
9010     }
9011     in++;
9012     col++;
9013     if (len != NULL) {
9014         *len = last - start;
9015         ret = (xmlChar *) start;
9016     } else {
9017         if (alloc) *alloc = 1;
9018         ret = xmlStrndup(start, last - start);
9019     }
9020     CUR_PTR = in;
9021     ctxt->input->line = line;
9022     ctxt->input->col = col;
9023     if (alloc) *alloc = 0;
9024     return ret;
9025 need_complex:
9026     if (alloc) *alloc = 1;
9027     return xmlParseAttValueComplex(ctxt, len, normalize);
9028 }
9029 
9030 /**
9031  * xmlParseAttribute2:
9032  * @ctxt:  an XML parser context
9033  * @pref:  the element prefix
9034  * @elem:  the element name
9035  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9036  * @value:  a xmlChar ** used to store the value of the attribute
9037  * @len:  an int * to save the length of the attribute
9038  * @alloc:  an int * to indicate if the attribute was allocated
9039  *
9040  * parse an attribute in the new SAX2 framework.
9041  *
9042  * Returns the attribute name, and the value in *value, .
9043  */
9044 
9045 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9046 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9047                    const xmlChar * pref, const xmlChar * elem,
9048                    const xmlChar ** prefix, xmlChar ** value,
9049                    int *len, int *alloc)
9050 {
9051     const xmlChar *name;
9052     xmlChar *val, *internal_val = NULL;
9053     int normalize = 0;
9054 
9055     *value = NULL;
9056     GROW;
9057     name = xmlParseQName(ctxt, prefix);
9058     if (name == NULL) {
9059         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9060                        "error parsing attribute name\n");
9061         return (NULL);
9062     }
9063 
9064     /*
9065      * get the type if needed
9066      */
9067     if (ctxt->attsSpecial != NULL) {
9068         int type;
9069 
9070         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9071                                                  pref, elem, *prefix, name);
9072         if (type != 0)
9073             normalize = 1;
9074     }
9075 
9076     /*
9077      * read the value
9078      */
9079     SKIP_BLANKS;
9080     if (RAW == '=') {
9081         NEXT;
9082         SKIP_BLANKS;
9083         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9084 	if (normalize) {
9085 	    /*
9086 	     * Sometimes a second normalisation pass for spaces is needed
9087 	     * but that only happens if charrefs or entities refernces
9088 	     * have been used in the attribute value, i.e. the attribute
9089 	     * value have been extracted in an allocated string already.
9090 	     */
9091 	    if (*alloc) {
9092 	        const xmlChar *val2;
9093 
9094 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9095 		if ((val2 != NULL) && (val2 != val)) {
9096 		    xmlFree(val);
9097 		    val = (xmlChar *) val2;
9098 		}
9099 	    }
9100 	}
9101         ctxt->instate = XML_PARSER_CONTENT;
9102     } else {
9103         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9104                           "Specification mandates value for attribute %s\n",
9105                           name);
9106         return (NULL);
9107     }
9108 
9109     if (*prefix == ctxt->str_xml) {
9110         /*
9111          * Check that xml:lang conforms to the specification
9112          * No more registered as an error, just generate a warning now
9113          * since this was deprecated in XML second edition
9114          */
9115         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9116             internal_val = xmlStrndup(val, *len);
9117             if (!xmlCheckLanguageID(internal_val)) {
9118                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9119                               "Malformed value for xml:lang : %s\n",
9120                               internal_val, NULL);
9121             }
9122         }
9123 
9124         /*
9125          * Check that xml:space conforms to the specification
9126          */
9127         if (xmlStrEqual(name, BAD_CAST "space")) {
9128             internal_val = xmlStrndup(val, *len);
9129             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9130                 *(ctxt->space) = 0;
9131             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9132                 *(ctxt->space) = 1;
9133             else {
9134                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9135                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9136                               internal_val, NULL);
9137             }
9138         }
9139         if (internal_val) {
9140             xmlFree(internal_val);
9141         }
9142     }
9143 
9144     *value = val;
9145     return (name);
9146 }
9147 /**
9148  * xmlParseStartTag2:
9149  * @ctxt:  an XML parser context
9150  *
9151  * parse a start of tag either for rule element or
9152  * EmptyElement. In both case we don't parse the tag closing chars.
9153  * This routine is called when running SAX2 parsing
9154  *
9155  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9156  *
9157  * [ WFC: Unique Att Spec ]
9158  * No attribute name may appear more than once in the same start-tag or
9159  * empty-element tag.
9160  *
9161  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9162  *
9163  * [ WFC: Unique Att Spec ]
9164  * No attribute name may appear more than once in the same start-tag or
9165  * empty-element tag.
9166  *
9167  * With namespace:
9168  *
9169  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9170  *
9171  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9172  *
9173  * Returns the element name parsed
9174  */
9175 
9176 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9177 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9178                   const xmlChar **URI, int *tlen) {
9179     const xmlChar *localname;
9180     const xmlChar *prefix;
9181     const xmlChar *attname;
9182     const xmlChar *aprefix;
9183     const xmlChar *nsname;
9184     xmlChar *attvalue;
9185     const xmlChar **atts = ctxt->atts;
9186     int maxatts = ctxt->maxatts;
9187     int nratts, nbatts, nbdef, inputid;
9188     int i, j, nbNs, attval;
9189     unsigned long cur;
9190     int nsNr = ctxt->nsNr;
9191 
9192     if (RAW != '<') return(NULL);
9193     NEXT1;
9194 
9195     /*
9196      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9197      *       point since the attribute values may be stored as pointers to
9198      *       the buffer and calling SHRINK would destroy them !
9199      *       The Shrinking is only possible once the full set of attribute
9200      *       callbacks have been done.
9201      */
9202     SHRINK;
9203     cur = ctxt->input->cur - ctxt->input->base;
9204     inputid = ctxt->input->id;
9205     nbatts = 0;
9206     nratts = 0;
9207     nbdef = 0;
9208     nbNs = 0;
9209     attval = 0;
9210     /* Forget any namespaces added during an earlier parse of this element. */
9211     ctxt->nsNr = nsNr;
9212 
9213     localname = xmlParseQName(ctxt, &prefix);
9214     if (localname == NULL) {
9215 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9216 		       "StartTag: invalid element name\n");
9217         return(NULL);
9218     }
9219     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9220 
9221     /*
9222      * Now parse the attributes, it ends up with the ending
9223      *
9224      * (S Attribute)* S?
9225      */
9226     SKIP_BLANKS;
9227     GROW;
9228 
9229     while (((RAW != '>') &&
9230 	   ((RAW != '/') || (NXT(1) != '>')) &&
9231 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9232 	const xmlChar *q = CUR_PTR;
9233 	unsigned int cons = ctxt->input->consumed;
9234 	int len = -1, alloc = 0;
9235 
9236 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9237 	                             &aprefix, &attvalue, &len, &alloc);
9238         if ((attname == NULL) || (attvalue == NULL))
9239             goto next_attr;
9240 	if (len < 0) len = xmlStrlen(attvalue);
9241 
9242         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9243             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9244             xmlURIPtr uri;
9245 
9246             if (URL == NULL) {
9247                 xmlErrMemory(ctxt, "dictionary allocation failure");
9248                 if ((attvalue != NULL) && (alloc != 0))
9249                     xmlFree(attvalue);
9250                 return(NULL);
9251             }
9252             if (*URL != 0) {
9253                 uri = xmlParseURI((const char *) URL);
9254                 if (uri == NULL) {
9255                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9256                              "xmlns: '%s' is not a valid URI\n",
9257                                        URL, NULL, NULL);
9258                 } else {
9259                     if (uri->scheme == NULL) {
9260                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9261                                   "xmlns: URI %s is not absolute\n",
9262                                   URL, NULL, NULL);
9263                     }
9264                     xmlFreeURI(uri);
9265                 }
9266                 if (URL == ctxt->str_xml_ns) {
9267                     if (attname != ctxt->str_xml) {
9268                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9269                      "xml namespace URI cannot be the default namespace\n",
9270                                  NULL, NULL, NULL);
9271                     }
9272                     goto next_attr;
9273                 }
9274                 if ((len == 29) &&
9275                     (xmlStrEqual(URL,
9276                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9277                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9278                          "reuse of the xmlns namespace name is forbidden\n",
9279                              NULL, NULL, NULL);
9280                     goto next_attr;
9281                 }
9282             }
9283             /*
9284              * check that it's not a defined namespace
9285              */
9286             for (j = 1;j <= nbNs;j++)
9287                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9288                     break;
9289             if (j <= nbNs)
9290                 xmlErrAttributeDup(ctxt, NULL, attname);
9291             else
9292                 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9293 
9294         } else if (aprefix == ctxt->str_xmlns) {
9295             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9296             xmlURIPtr uri;
9297 
9298             if (attname == ctxt->str_xml) {
9299                 if (URL != ctxt->str_xml_ns) {
9300                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9301                              "xml namespace prefix mapped to wrong URI\n",
9302                              NULL, NULL, NULL);
9303                 }
9304                 /*
9305                  * Do not keep a namespace definition node
9306                  */
9307                 goto next_attr;
9308             }
9309             if (URL == ctxt->str_xml_ns) {
9310                 if (attname != ctxt->str_xml) {
9311                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312                              "xml namespace URI mapped to wrong prefix\n",
9313                              NULL, NULL, NULL);
9314                 }
9315                 goto next_attr;
9316             }
9317             if (attname == ctxt->str_xmlns) {
9318                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319                          "redefinition of the xmlns prefix is forbidden\n",
9320                          NULL, NULL, NULL);
9321                 goto next_attr;
9322             }
9323             if ((len == 29) &&
9324                 (xmlStrEqual(URL,
9325                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9326                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9327                          "reuse of the xmlns namespace name is forbidden\n",
9328                          NULL, NULL, NULL);
9329                 goto next_attr;
9330             }
9331             if ((URL == NULL) || (URL[0] == 0)) {
9332                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9333                          "xmlns:%s: Empty XML namespace is not allowed\n",
9334                               attname, NULL, NULL);
9335                 goto next_attr;
9336             } else {
9337                 uri = xmlParseURI((const char *) URL);
9338                 if (uri == NULL) {
9339                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9340                          "xmlns:%s: '%s' is not a valid URI\n",
9341                                        attname, URL, NULL);
9342                 } else {
9343                     if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9344                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9345                                   "xmlns:%s: URI %s is not absolute\n",
9346                                   attname, URL, NULL);
9347                     }
9348                     xmlFreeURI(uri);
9349                 }
9350             }
9351 
9352             /*
9353              * check that it's not a defined namespace
9354              */
9355             for (j = 1;j <= nbNs;j++)
9356                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9357                     break;
9358             if (j <= nbNs)
9359                 xmlErrAttributeDup(ctxt, aprefix, attname);
9360             else
9361                 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9362 
9363         } else {
9364             /*
9365              * Add the pair to atts
9366              */
9367             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9368                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9369                     goto next_attr;
9370                 }
9371                 maxatts = ctxt->maxatts;
9372                 atts = ctxt->atts;
9373             }
9374             ctxt->attallocs[nratts++] = alloc;
9375             atts[nbatts++] = attname;
9376             atts[nbatts++] = aprefix;
9377             /*
9378              * The namespace URI field is used temporarily to point at the
9379              * base of the current input buffer for non-alloced attributes.
9380              * When the input buffer is reallocated, all the pointers become
9381              * invalid, but they can be reconstructed later.
9382              */
9383             if (alloc)
9384                 atts[nbatts++] = NULL;
9385             else
9386                 atts[nbatts++] = ctxt->input->base;
9387             atts[nbatts++] = attvalue;
9388             attvalue += len;
9389             atts[nbatts++] = attvalue;
9390             /*
9391              * tag if some deallocation is needed
9392              */
9393             if (alloc != 0) attval = 1;
9394             attvalue = NULL; /* moved into atts */
9395         }
9396 
9397 next_attr:
9398         if ((attvalue != NULL) && (alloc != 0)) {
9399             xmlFree(attvalue);
9400             attvalue = NULL;
9401         }
9402 
9403 	GROW
9404         if (ctxt->instate == XML_PARSER_EOF)
9405             break;
9406 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9407 	    break;
9408 	if (SKIP_BLANKS == 0) {
9409 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9410 			   "attributes construct error\n");
9411 	    break;
9412 	}
9413         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9414             (attname == NULL) && (attvalue == NULL)) {
9415 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9416 	         "xmlParseStartTag: problem parsing attributes\n");
9417 	    break;
9418 	}
9419         GROW;
9420     }
9421 
9422     if (ctxt->input->id != inputid) {
9423         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9424                     "Unexpected change of input\n");
9425         localname = NULL;
9426         goto done;
9427     }
9428 
9429     /* Reconstruct attribute value pointers. */
9430     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9431         if (atts[i+2] != NULL) {
9432             /*
9433              * Arithmetic on dangling pointers is technically undefined
9434              * behavior, but well...
9435              */
9436             ptrdiff_t offset = ctxt->input->base - atts[i+2];
9437             atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9438             atts[i+3] += offset;  /* value */
9439             atts[i+4] += offset;  /* valuend */
9440         }
9441     }
9442 
9443     /*
9444      * The attributes defaulting
9445      */
9446     if (ctxt->attsDefault != NULL) {
9447         xmlDefAttrsPtr defaults;
9448 
9449 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9450 	if (defaults != NULL) {
9451 	    for (i = 0;i < defaults->nbAttrs;i++) {
9452 	        attname = defaults->values[5 * i];
9453 		aprefix = defaults->values[5 * i + 1];
9454 
9455                 /*
9456 		 * special work for namespaces defaulted defs
9457 		 */
9458 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9459 		    /*
9460 		     * check that it's not a defined namespace
9461 		     */
9462 		    for (j = 1;j <= nbNs;j++)
9463 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9464 			    break;
9465 	            if (j <= nbNs) continue;
9466 
9467 		    nsname = xmlGetNamespace(ctxt, NULL);
9468 		    if (nsname != defaults->values[5 * i + 2]) {
9469 			if (nsPush(ctxt, NULL,
9470 			           defaults->values[5 * i + 2]) > 0)
9471 			    nbNs++;
9472 		    }
9473 		} else if (aprefix == ctxt->str_xmlns) {
9474 		    /*
9475 		     * check that it's not a defined namespace
9476 		     */
9477 		    for (j = 1;j <= nbNs;j++)
9478 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9479 			    break;
9480 	            if (j <= nbNs) continue;
9481 
9482 		    nsname = xmlGetNamespace(ctxt, attname);
9483 		    if (nsname != defaults->values[2]) {
9484 			if (nsPush(ctxt, attname,
9485 			           defaults->values[5 * i + 2]) > 0)
9486 			    nbNs++;
9487 		    }
9488 		} else {
9489 		    /*
9490 		     * check that it's not a defined attribute
9491 		     */
9492 		    for (j = 0;j < nbatts;j+=5) {
9493 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9494 			    break;
9495 		    }
9496 		    if (j < nbatts) continue;
9497 
9498 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9499 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9500 			    return(NULL);
9501 			}
9502 			maxatts = ctxt->maxatts;
9503 			atts = ctxt->atts;
9504 		    }
9505 		    atts[nbatts++] = attname;
9506 		    atts[nbatts++] = aprefix;
9507 		    if (aprefix == NULL)
9508 			atts[nbatts++] = NULL;
9509 		    else
9510 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9511 		    atts[nbatts++] = defaults->values[5 * i + 2];
9512 		    atts[nbatts++] = defaults->values[5 * i + 3];
9513 		    if ((ctxt->standalone == 1) &&
9514 		        (defaults->values[5 * i + 4] != NULL)) {
9515 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9516 	  "standalone: attribute %s on %s defaulted from external subset\n",
9517 	                                 attname, localname);
9518 		    }
9519 		    nbdef++;
9520 		}
9521 	    }
9522 	}
9523     }
9524 
9525     /*
9526      * The attributes checkings
9527      */
9528     for (i = 0; i < nbatts;i += 5) {
9529         /*
9530 	* The default namespace does not apply to attribute names.
9531 	*/
9532 	if (atts[i + 1] != NULL) {
9533 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9534 	    if (nsname == NULL) {
9535 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9536 		    "Namespace prefix %s for %s on %s is not defined\n",
9537 		    atts[i + 1], atts[i], localname);
9538 	    }
9539 	    atts[i + 2] = nsname;
9540 	} else
9541 	    nsname = NULL;
9542 	/*
9543 	 * [ WFC: Unique Att Spec ]
9544 	 * No attribute name may appear more than once in the same
9545 	 * start-tag or empty-element tag.
9546 	 * As extended by the Namespace in XML REC.
9547 	 */
9548         for (j = 0; j < i;j += 5) {
9549 	    if (atts[i] == atts[j]) {
9550 	        if (atts[i+1] == atts[j+1]) {
9551 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9552 		    break;
9553 		}
9554 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9555 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9556 			     "Namespaced Attribute %s in '%s' redefined\n",
9557 			     atts[i], nsname, NULL);
9558 		    break;
9559 		}
9560 	    }
9561 	}
9562     }
9563 
9564     nsname = xmlGetNamespace(ctxt, prefix);
9565     if ((prefix != NULL) && (nsname == NULL)) {
9566 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9567 	         "Namespace prefix %s on %s is not defined\n",
9568 		 prefix, localname, NULL);
9569     }
9570     *pref = prefix;
9571     *URI = nsname;
9572 
9573     /*
9574      * SAX: Start of Element !
9575      */
9576     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9577 	(!ctxt->disableSAX)) {
9578 	if (nbNs > 0)
9579 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9580 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9581 			  nbatts / 5, nbdef, atts);
9582 	else
9583 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9584 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9585     }
9586 
9587 done:
9588     /*
9589      * Free up attribute allocated strings if needed
9590      */
9591     if (attval != 0) {
9592 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9593 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9594 	        xmlFree((xmlChar *) atts[i]);
9595     }
9596 
9597     return(localname);
9598 }
9599 
9600 /**
9601  * xmlParseEndTag2:
9602  * @ctxt:  an XML parser context
9603  * @line:  line of the start tag
9604  * @nsNr:  number of namespaces on the start tag
9605  *
9606  * parse an end of tag
9607  *
9608  * [42] ETag ::= '</' Name S? '>'
9609  *
9610  * With namespace
9611  *
9612  * [NS 9] ETag ::= '</' QName S? '>'
9613  */
9614 
9615 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9616 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9617                 const xmlChar *URI, int line, int nsNr, int tlen) {
9618     const xmlChar *name;
9619     size_t curLength;
9620 
9621     GROW;
9622     if ((RAW != '<') || (NXT(1) != '/')) {
9623 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9624 	return;
9625     }
9626     SKIP(2);
9627 
9628     curLength = ctxt->input->end - ctxt->input->cur;
9629     if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9630         (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9631         if ((curLength >= (size_t)(tlen + 1)) &&
9632 	    (ctxt->input->cur[tlen] == '>')) {
9633 	    ctxt->input->cur += tlen + 1;
9634 	    ctxt->input->col += tlen + 1;
9635 	    goto done;
9636 	}
9637 	ctxt->input->cur += tlen;
9638 	ctxt->input->col += tlen;
9639 	name = (xmlChar*)1;
9640     } else {
9641 	if (prefix == NULL)
9642 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9643 	else
9644 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9645     }
9646 
9647     /*
9648      * We should definitely be at the ending "S? '>'" part
9649      */
9650     GROW;
9651     if (ctxt->instate == XML_PARSER_EOF)
9652         return;
9653     SKIP_BLANKS;
9654     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9655 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9656     } else
9657 	NEXT1;
9658 
9659     /*
9660      * [ WFC: Element Type Match ]
9661      * The Name in an element's end-tag must match the element type in the
9662      * start-tag.
9663      *
9664      */
9665     if (name != (xmlChar*)1) {
9666         if (name == NULL) name = BAD_CAST "unparseable";
9667         if ((line == 0) && (ctxt->node != NULL))
9668             line = ctxt->node->line;
9669         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9670 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9671 		                ctxt->name, line, name);
9672     }
9673 
9674     /*
9675      * SAX: End of Tag
9676      */
9677 done:
9678     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9679 	(!ctxt->disableSAX))
9680 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9681 
9682     spacePop(ctxt);
9683     if (nsNr != 0)
9684 	nsPop(ctxt, nsNr);
9685     return;
9686 }
9687 
9688 /**
9689  * xmlParseCDSect:
9690  * @ctxt:  an XML parser context
9691  *
9692  * Parse escaped pure raw content.
9693  *
9694  * [18] CDSect ::= CDStart CData CDEnd
9695  *
9696  * [19] CDStart ::= '<![CDATA['
9697  *
9698  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9699  *
9700  * [21] CDEnd ::= ']]>'
9701  */
9702 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9703 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9704     xmlChar *buf = NULL;
9705     int len = 0;
9706     int size = XML_PARSER_BUFFER_SIZE;
9707     int r, rl;
9708     int	s, sl;
9709     int cur, l;
9710     int count = 0;
9711 
9712     /* Check 2.6.0 was NXT(0) not RAW */
9713     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9714 	SKIP(9);
9715     } else
9716         return;
9717 
9718     ctxt->instate = XML_PARSER_CDATA_SECTION;
9719     r = CUR_CHAR(rl);
9720     if (!IS_CHAR(r)) {
9721 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9722 	ctxt->instate = XML_PARSER_CONTENT;
9723         return;
9724     }
9725     NEXTL(rl);
9726     s = CUR_CHAR(sl);
9727     if (!IS_CHAR(s)) {
9728 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9729 	ctxt->instate = XML_PARSER_CONTENT;
9730         return;
9731     }
9732     NEXTL(sl);
9733     cur = CUR_CHAR(l);
9734     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9735     if (buf == NULL) {
9736 	xmlErrMemory(ctxt, NULL);
9737 	return;
9738     }
9739     while (IS_CHAR(cur) &&
9740            ((r != ']') || (s != ']') || (cur != '>'))) {
9741 	if (len + 5 >= size) {
9742 	    xmlChar *tmp;
9743 
9744             if ((size > XML_MAX_TEXT_LENGTH) &&
9745                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9746                 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9747                              "CData section too big found", NULL);
9748                 xmlFree (buf);
9749                 return;
9750             }
9751 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9752 	    if (tmp == NULL) {
9753 	        xmlFree(buf);
9754 		xmlErrMemory(ctxt, NULL);
9755 		return;
9756 	    }
9757 	    buf = tmp;
9758 	    size *= 2;
9759 	}
9760 	COPY_BUF(rl,buf,len,r);
9761 	r = s;
9762 	rl = sl;
9763 	s = cur;
9764 	sl = l;
9765 	count++;
9766 	if (count > 50) {
9767 	    GROW;
9768             if (ctxt->instate == XML_PARSER_EOF) {
9769 		xmlFree(buf);
9770 		return;
9771             }
9772 	    count = 0;
9773 	}
9774 	NEXTL(l);
9775 	cur = CUR_CHAR(l);
9776     }
9777     buf[len] = 0;
9778     ctxt->instate = XML_PARSER_CONTENT;
9779     if (cur != '>') {
9780 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9781 	                     "CData section not finished\n%.50s\n", buf);
9782 	xmlFree(buf);
9783         return;
9784     }
9785     NEXTL(l);
9786 
9787     /*
9788      * OK the buffer is to be consumed as cdata.
9789      */
9790     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9791 	if (ctxt->sax->cdataBlock != NULL)
9792 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9793 	else if (ctxt->sax->characters != NULL)
9794 	    ctxt->sax->characters(ctxt->userData, buf, len);
9795     }
9796     xmlFree(buf);
9797 }
9798 
9799 /**
9800  * xmlParseContent:
9801  * @ctxt:  an XML parser context
9802  *
9803  * Parse a content:
9804  *
9805  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9806  */
9807 
9808 void
xmlParseContent(xmlParserCtxtPtr ctxt)9809 xmlParseContent(xmlParserCtxtPtr ctxt) {
9810     GROW;
9811     while ((RAW != 0) &&
9812 	   ((RAW != '<') || (NXT(1) != '/')) &&
9813 	   (ctxt->instate != XML_PARSER_EOF)) {
9814 	const xmlChar *test = CUR_PTR;
9815 	unsigned int cons = ctxt->input->consumed;
9816 	const xmlChar *cur = ctxt->input->cur;
9817 
9818 	/*
9819 	 * First case : a Processing Instruction.
9820 	 */
9821 	if ((*cur == '<') && (cur[1] == '?')) {
9822 	    xmlParsePI(ctxt);
9823 	}
9824 
9825 	/*
9826 	 * Second case : a CDSection
9827 	 */
9828 	/* 2.6.0 test was *cur not RAW */
9829 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9830 	    xmlParseCDSect(ctxt);
9831 	}
9832 
9833 	/*
9834 	 * Third case :  a comment
9835 	 */
9836 	else if ((*cur == '<') && (NXT(1) == '!') &&
9837 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9838 	    xmlParseComment(ctxt);
9839 	    ctxt->instate = XML_PARSER_CONTENT;
9840 	}
9841 
9842 	/*
9843 	 * Fourth case :  a sub-element.
9844 	 */
9845 	else if (*cur == '<') {
9846 	    xmlParseElement(ctxt);
9847 	}
9848 
9849 	/*
9850 	 * Fifth case : a reference. If if has not been resolved,
9851 	 *    parsing returns it's Name, create the node
9852 	 */
9853 
9854 	else if (*cur == '&') {
9855 	    xmlParseReference(ctxt);
9856 	}
9857 
9858 	/*
9859 	 * Last case, text. Note that References are handled directly.
9860 	 */
9861 	else {
9862 	    xmlParseCharData(ctxt, 0);
9863 	}
9864 
9865 	GROW;
9866 	SHRINK;
9867 
9868 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9869 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9870 	                "detected an error in element content\n");
9871 	    xmlHaltParser(ctxt);
9872             break;
9873 	}
9874     }
9875 }
9876 
9877 /**
9878  * xmlParseElement:
9879  * @ctxt:  an XML parser context
9880  *
9881  * parse an XML element, this is highly recursive
9882  *
9883  * [39] element ::= EmptyElemTag | STag content ETag
9884  *
9885  * [ WFC: Element Type Match ]
9886  * The Name in an element's end-tag must match the element type in the
9887  * start-tag.
9888  *
9889  */
9890 
9891 void
xmlParseElement(xmlParserCtxtPtr ctxt)9892 xmlParseElement(xmlParserCtxtPtr ctxt) {
9893     const xmlChar *name;
9894     const xmlChar *prefix = NULL;
9895     const xmlChar *URI = NULL;
9896     xmlParserNodeInfo node_info;
9897     int line, tlen = 0;
9898     xmlNodePtr ret;
9899     int nsNr = ctxt->nsNr;
9900 
9901     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9902         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9903 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9904 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9905 			  xmlParserMaxDepth);
9906 	xmlHaltParser(ctxt);
9907 	return;
9908     }
9909 
9910     /* Capture start position */
9911     if (ctxt->record_info) {
9912         node_info.begin_pos = ctxt->input->consumed +
9913                           (CUR_PTR - ctxt->input->base);
9914 	node_info.begin_line = ctxt->input->line;
9915     }
9916 
9917     if (ctxt->spaceNr == 0)
9918 	spacePush(ctxt, -1);
9919     else if (*ctxt->space == -2)
9920 	spacePush(ctxt, -1);
9921     else
9922 	spacePush(ctxt, *ctxt->space);
9923 
9924     line = ctxt->input->line;
9925 #ifdef LIBXML_SAX1_ENABLED
9926     if (ctxt->sax2)
9927 #endif /* LIBXML_SAX1_ENABLED */
9928         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9929 #ifdef LIBXML_SAX1_ENABLED
9930     else
9931 	name = xmlParseStartTag(ctxt);
9932 #endif /* LIBXML_SAX1_ENABLED */
9933     if (ctxt->instate == XML_PARSER_EOF)
9934 	return;
9935     if (name == NULL) {
9936 	spacePop(ctxt);
9937         return;
9938     }
9939     namePush(ctxt, name);
9940     ret = ctxt->node;
9941 
9942 #ifdef LIBXML_VALID_ENABLED
9943     /*
9944      * [ VC: Root Element Type ]
9945      * The Name in the document type declaration must match the element
9946      * type of the root element.
9947      */
9948     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9949         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9950         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9951 #endif /* LIBXML_VALID_ENABLED */
9952 
9953     /*
9954      * Check for an Empty Element.
9955      */
9956     if ((RAW == '/') && (NXT(1) == '>')) {
9957         SKIP(2);
9958 	if (ctxt->sax2) {
9959 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9960 		(!ctxt->disableSAX))
9961 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9962 #ifdef LIBXML_SAX1_ENABLED
9963 	} else {
9964 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9965 		(!ctxt->disableSAX))
9966 		ctxt->sax->endElement(ctxt->userData, name);
9967 #endif /* LIBXML_SAX1_ENABLED */
9968 	}
9969 	namePop(ctxt);
9970 	spacePop(ctxt);
9971 	if (nsNr != ctxt->nsNr)
9972 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9973 	if ( ret != NULL && ctxt->record_info ) {
9974 	   node_info.end_pos = ctxt->input->consumed +
9975 			      (CUR_PTR - ctxt->input->base);
9976 	   node_info.end_line = ctxt->input->line;
9977 	   node_info.node = ret;
9978 	   xmlParserAddNodeInfo(ctxt, &node_info);
9979 	}
9980 	return;
9981     }
9982     if (RAW == '>') {
9983         NEXT1;
9984     } else {
9985         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9986 		     "Couldn't find end of Start Tag %s line %d\n",
9987 		                name, line, NULL);
9988 
9989 	/*
9990 	 * end of parsing of this node.
9991 	 */
9992 	nodePop(ctxt);
9993 	namePop(ctxt);
9994 	spacePop(ctxt);
9995 	if (nsNr != ctxt->nsNr)
9996 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9997 
9998 	/*
9999 	 * Capture end position and add node
10000 	 */
10001 	if ( ret != NULL && ctxt->record_info ) {
10002 	   node_info.end_pos = ctxt->input->consumed +
10003 			      (CUR_PTR - ctxt->input->base);
10004 	   node_info.end_line = ctxt->input->line;
10005 	   node_info.node = ret;
10006 	   xmlParserAddNodeInfo(ctxt, &node_info);
10007 	}
10008 	return;
10009     }
10010 
10011     /*
10012      * Parse the content of the element:
10013      */
10014     xmlParseContent(ctxt);
10015     if (ctxt->instate == XML_PARSER_EOF)
10016 	return;
10017     if (!IS_BYTE_CHAR(RAW)) {
10018         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10019 	 "Premature end of data in tag %s line %d\n",
10020 		                name, line, NULL);
10021 
10022 	/*
10023 	 * end of parsing of this node.
10024 	 */
10025 	nodePop(ctxt);
10026 	namePop(ctxt);
10027 	spacePop(ctxt);
10028 	if (nsNr != ctxt->nsNr)
10029 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10030 	return;
10031     }
10032 
10033     /*
10034      * parse the end of tag: '</' should be here.
10035      */
10036     if (ctxt->sax2) {
10037 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10038 	namePop(ctxt);
10039     }
10040 #ifdef LIBXML_SAX1_ENABLED
10041       else
10042 	xmlParseEndTag1(ctxt, line);
10043 #endif /* LIBXML_SAX1_ENABLED */
10044 
10045     /*
10046      * Capture end position and add node
10047      */
10048     if ( ret != NULL && ctxt->record_info ) {
10049        node_info.end_pos = ctxt->input->consumed +
10050                           (CUR_PTR - ctxt->input->base);
10051        node_info.end_line = ctxt->input->line;
10052        node_info.node = ret;
10053        xmlParserAddNodeInfo(ctxt, &node_info);
10054     }
10055 }
10056 
10057 /**
10058  * xmlParseVersionNum:
10059  * @ctxt:  an XML parser context
10060  *
10061  * parse the XML version value.
10062  *
10063  * [26] VersionNum ::= '1.' [0-9]+
10064  *
10065  * In practice allow [0-9].[0-9]+ at that level
10066  *
10067  * Returns the string giving the XML version number, or NULL
10068  */
10069 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10070 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10071     xmlChar *buf = NULL;
10072     int len = 0;
10073     int size = 10;
10074     xmlChar cur;
10075 
10076     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10077     if (buf == NULL) {
10078 	xmlErrMemory(ctxt, NULL);
10079 	return(NULL);
10080     }
10081     cur = CUR;
10082     if (!((cur >= '0') && (cur <= '9'))) {
10083 	xmlFree(buf);
10084 	return(NULL);
10085     }
10086     buf[len++] = cur;
10087     NEXT;
10088     cur=CUR;
10089     if (cur != '.') {
10090 	xmlFree(buf);
10091 	return(NULL);
10092     }
10093     buf[len++] = cur;
10094     NEXT;
10095     cur=CUR;
10096     while ((cur >= '0') && (cur <= '9')) {
10097 	if (len + 1 >= size) {
10098 	    xmlChar *tmp;
10099 
10100 	    size *= 2;
10101 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10102 	    if (tmp == NULL) {
10103 	        xmlFree(buf);
10104 		xmlErrMemory(ctxt, NULL);
10105 		return(NULL);
10106 	    }
10107 	    buf = tmp;
10108 	}
10109 	buf[len++] = cur;
10110 	NEXT;
10111 	cur=CUR;
10112     }
10113     buf[len] = 0;
10114     return(buf);
10115 }
10116 
10117 /**
10118  * xmlParseVersionInfo:
10119  * @ctxt:  an XML parser context
10120  *
10121  * parse the XML version.
10122  *
10123  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10124  *
10125  * [25] Eq ::= S? '=' S?
10126  *
10127  * Returns the version string, e.g. "1.0"
10128  */
10129 
10130 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10131 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10132     xmlChar *version = NULL;
10133 
10134     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10135 	SKIP(7);
10136 	SKIP_BLANKS;
10137 	if (RAW != '=') {
10138 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10139 	    return(NULL);
10140         }
10141 	NEXT;
10142 	SKIP_BLANKS;
10143 	if (RAW == '"') {
10144 	    NEXT;
10145 	    version = xmlParseVersionNum(ctxt);
10146 	    if (RAW != '"') {
10147 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10148 	    } else
10149 	        NEXT;
10150 	} else if (RAW == '\''){
10151 	    NEXT;
10152 	    version = xmlParseVersionNum(ctxt);
10153 	    if (RAW != '\'') {
10154 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10155 	    } else
10156 	        NEXT;
10157 	} else {
10158 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10159 	}
10160     }
10161     return(version);
10162 }
10163 
10164 /**
10165  * xmlParseEncName:
10166  * @ctxt:  an XML parser context
10167  *
10168  * parse the XML encoding name
10169  *
10170  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10171  *
10172  * Returns the encoding name value or NULL
10173  */
10174 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10175 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10176     xmlChar *buf = NULL;
10177     int len = 0;
10178     int size = 10;
10179     xmlChar cur;
10180 
10181     cur = CUR;
10182     if (((cur >= 'a') && (cur <= 'z')) ||
10183         ((cur >= 'A') && (cur <= 'Z'))) {
10184 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10185 	if (buf == NULL) {
10186 	    xmlErrMemory(ctxt, NULL);
10187 	    return(NULL);
10188 	}
10189 
10190 	buf[len++] = cur;
10191 	NEXT;
10192 	cur = CUR;
10193 	while (((cur >= 'a') && (cur <= 'z')) ||
10194 	       ((cur >= 'A') && (cur <= 'Z')) ||
10195 	       ((cur >= '0') && (cur <= '9')) ||
10196 	       (cur == '.') || (cur == '_') ||
10197 	       (cur == '-')) {
10198 	    if (len + 1 >= size) {
10199 	        xmlChar *tmp;
10200 
10201 		size *= 2;
10202 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10203 		if (tmp == NULL) {
10204 		    xmlErrMemory(ctxt, NULL);
10205 		    xmlFree(buf);
10206 		    return(NULL);
10207 		}
10208 		buf = tmp;
10209 	    }
10210 	    buf[len++] = cur;
10211 	    NEXT;
10212 	    cur = CUR;
10213 	    if (cur == 0) {
10214 	        SHRINK;
10215 		GROW;
10216 		cur = CUR;
10217 	    }
10218         }
10219 	buf[len] = 0;
10220     } else {
10221 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10222     }
10223     return(buf);
10224 }
10225 
10226 /**
10227  * xmlParseEncodingDecl:
10228  * @ctxt:  an XML parser context
10229  *
10230  * parse the XML encoding declaration
10231  *
10232  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10233  *
10234  * this setups the conversion filters.
10235  *
10236  * Returns the encoding value or NULL
10237  */
10238 
10239 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10240 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10241     xmlChar *encoding = NULL;
10242 
10243     SKIP_BLANKS;
10244     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10245 	SKIP(8);
10246 	SKIP_BLANKS;
10247 	if (RAW != '=') {
10248 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10249 	    return(NULL);
10250         }
10251 	NEXT;
10252 	SKIP_BLANKS;
10253 	if (RAW == '"') {
10254 	    NEXT;
10255 	    encoding = xmlParseEncName(ctxt);
10256 	    if (RAW != '"') {
10257 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10258 		xmlFree((xmlChar *) encoding);
10259 		return(NULL);
10260 	    } else
10261 	        NEXT;
10262 	} else if (RAW == '\''){
10263 	    NEXT;
10264 	    encoding = xmlParseEncName(ctxt);
10265 	    if (RAW != '\'') {
10266 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10267 		xmlFree((xmlChar *) encoding);
10268 		return(NULL);
10269 	    } else
10270 	        NEXT;
10271 	} else {
10272 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10273 	}
10274 
10275         /*
10276          * Non standard parsing, allowing the user to ignore encoding
10277          */
10278         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10279 	    xmlFree((xmlChar *) encoding);
10280             return(NULL);
10281 	}
10282 
10283 	/*
10284 	 * UTF-16 encoding stwich has already taken place at this stage,
10285 	 * more over the little-endian/big-endian selection is already done
10286 	 */
10287         if ((encoding != NULL) &&
10288 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10289 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10290 	    /*
10291 	     * If no encoding was passed to the parser, that we are
10292 	     * using UTF-16 and no decoder is present i.e. the
10293 	     * document is apparently UTF-8 compatible, then raise an
10294 	     * encoding mismatch fatal error
10295 	     */
10296 	    if ((ctxt->encoding == NULL) &&
10297 	        (ctxt->input->buf != NULL) &&
10298 	        (ctxt->input->buf->encoder == NULL)) {
10299 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10300 		  "Document labelled UTF-16 but has UTF-8 content\n");
10301 	    }
10302 	    if (ctxt->encoding != NULL)
10303 		xmlFree((xmlChar *) ctxt->encoding);
10304 	    ctxt->encoding = encoding;
10305 	}
10306 	/*
10307 	 * UTF-8 encoding is handled natively
10308 	 */
10309         else if ((encoding != NULL) &&
10310 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10311 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10312 	    if (ctxt->encoding != NULL)
10313 		xmlFree((xmlChar *) ctxt->encoding);
10314 	    ctxt->encoding = encoding;
10315 	}
10316 	else if (encoding != NULL) {
10317 	    xmlCharEncodingHandlerPtr handler;
10318 
10319 	    if (ctxt->input->encoding != NULL)
10320 		xmlFree((xmlChar *) ctxt->input->encoding);
10321 	    ctxt->input->encoding = encoding;
10322 
10323             handler = xmlFindCharEncodingHandler((const char *) encoding);
10324 	    if (handler != NULL) {
10325 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10326 		    /* failed to convert */
10327 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10328 		    return(NULL);
10329 		}
10330 	    } else {
10331 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10332 			"Unsupported encoding %s\n", encoding);
10333 		return(NULL);
10334 	    }
10335 	}
10336     }
10337     return(encoding);
10338 }
10339 
10340 /**
10341  * xmlParseSDDecl:
10342  * @ctxt:  an XML parser context
10343  *
10344  * parse the XML standalone declaration
10345  *
10346  * [32] SDDecl ::= S 'standalone' Eq
10347  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10348  *
10349  * [ VC: Standalone Document Declaration ]
10350  * TODO The standalone document declaration must have the value "no"
10351  * if any external markup declarations contain declarations of:
10352  *  - attributes with default values, if elements to which these
10353  *    attributes apply appear in the document without specifications
10354  *    of values for these attributes, or
10355  *  - entities (other than amp, lt, gt, apos, quot), if references
10356  *    to those entities appear in the document, or
10357  *  - attributes with values subject to normalization, where the
10358  *    attribute appears in the document with a value which will change
10359  *    as a result of normalization, or
10360  *  - element types with element content, if white space occurs directly
10361  *    within any instance of those types.
10362  *
10363  * Returns:
10364  *   1 if standalone="yes"
10365  *   0 if standalone="no"
10366  *  -2 if standalone attribute is missing or invalid
10367  *	  (A standalone value of -2 means that the XML declaration was found,
10368  *	   but no value was specified for the standalone attribute).
10369  */
10370 
10371 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10372 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10373     int standalone = -2;
10374 
10375     SKIP_BLANKS;
10376     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10377 	SKIP(10);
10378         SKIP_BLANKS;
10379 	if (RAW != '=') {
10380 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10381 	    return(standalone);
10382         }
10383 	NEXT;
10384 	SKIP_BLANKS;
10385         if (RAW == '\''){
10386 	    NEXT;
10387 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10388 	        standalone = 0;
10389                 SKIP(2);
10390 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10391 	               (NXT(2) == 's')) {
10392 	        standalone = 1;
10393 		SKIP(3);
10394             } else {
10395 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10396 	    }
10397 	    if (RAW != '\'') {
10398 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10399 	    } else
10400 	        NEXT;
10401 	} else if (RAW == '"'){
10402 	    NEXT;
10403 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10404 	        standalone = 0;
10405 		SKIP(2);
10406 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10407 	               (NXT(2) == 's')) {
10408 	        standalone = 1;
10409                 SKIP(3);
10410             } else {
10411 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10412 	    }
10413 	    if (RAW != '"') {
10414 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10415 	    } else
10416 	        NEXT;
10417 	} else {
10418 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10419         }
10420     }
10421     return(standalone);
10422 }
10423 
10424 /**
10425  * xmlParseXMLDecl:
10426  * @ctxt:  an XML parser context
10427  *
10428  * parse an XML declaration header
10429  *
10430  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10431  */
10432 
10433 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10434 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10435     xmlChar *version;
10436 
10437     /*
10438      * This value for standalone indicates that the document has an
10439      * XML declaration but it does not have a standalone attribute.
10440      * It will be overwritten later if a standalone attribute is found.
10441      */
10442     ctxt->input->standalone = -2;
10443 
10444     /*
10445      * We know that '<?xml' is here.
10446      */
10447     SKIP(5);
10448 
10449     if (!IS_BLANK_CH(RAW)) {
10450 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10451 	               "Blank needed after '<?xml'\n");
10452     }
10453     SKIP_BLANKS;
10454 
10455     /*
10456      * We must have the VersionInfo here.
10457      */
10458     version = xmlParseVersionInfo(ctxt);
10459     if (version == NULL) {
10460 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10461     } else {
10462 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10463 	    /*
10464 	     * Changed here for XML-1.0 5th edition
10465 	     */
10466 	    if (ctxt->options & XML_PARSE_OLD10) {
10467 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10468 			          "Unsupported version '%s'\n",
10469 			          version);
10470 	    } else {
10471 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10472 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10473 		                  "Unsupported version '%s'\n",
10474 				  version, NULL);
10475 		} else {
10476 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477 				      "Unsupported version '%s'\n",
10478 				      version);
10479 		}
10480 	    }
10481 	}
10482 	if (ctxt->version != NULL)
10483 	    xmlFree((void *) ctxt->version);
10484 	ctxt->version = version;
10485     }
10486 
10487     /*
10488      * We may have the encoding declaration
10489      */
10490     if (!IS_BLANK_CH(RAW)) {
10491         if ((RAW == '?') && (NXT(1) == '>')) {
10492 	    SKIP(2);
10493 	    return;
10494 	}
10495 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10496     }
10497     xmlParseEncodingDecl(ctxt);
10498     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10499          (ctxt->instate == XML_PARSER_EOF)) {
10500 	/*
10501 	 * The XML REC instructs us to stop parsing right here
10502 	 */
10503         return;
10504     }
10505 
10506     /*
10507      * We may have the standalone status.
10508      */
10509     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10510         if ((RAW == '?') && (NXT(1) == '>')) {
10511 	    SKIP(2);
10512 	    return;
10513 	}
10514 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10515     }
10516 
10517     /*
10518      * We can grow the input buffer freely at that point
10519      */
10520     GROW;
10521 
10522     SKIP_BLANKS;
10523     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10524 
10525     SKIP_BLANKS;
10526     if ((RAW == '?') && (NXT(1) == '>')) {
10527         SKIP(2);
10528     } else if (RAW == '>') {
10529         /* Deprecated old WD ... */
10530 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10531 	NEXT;
10532     } else {
10533 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10534 	MOVETO_ENDTAG(CUR_PTR);
10535 	NEXT;
10536     }
10537 }
10538 
10539 /**
10540  * xmlParseMisc:
10541  * @ctxt:  an XML parser context
10542  *
10543  * parse an XML Misc* optional field.
10544  *
10545  * [27] Misc ::= Comment | PI |  S
10546  */
10547 
10548 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10549 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10550     while ((ctxt->instate != XML_PARSER_EOF) &&
10551            (((RAW == '<') && (NXT(1) == '?')) ||
10552             (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10553             IS_BLANK_CH(CUR))) {
10554         if ((RAW == '<') && (NXT(1) == '?')) {
10555 	    xmlParsePI(ctxt);
10556 	} else if (IS_BLANK_CH(CUR)) {
10557 	    NEXT;
10558 	} else
10559 	    xmlParseComment(ctxt);
10560     }
10561 }
10562 
10563 /**
10564  * xmlParseDocument:
10565  * @ctxt:  an XML parser context
10566  *
10567  * parse an XML document (and build a tree if using the standard SAX
10568  * interface).
10569  *
10570  * [1] document ::= prolog element Misc*
10571  *
10572  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10573  *
10574  * Returns 0, -1 in case of error. the parser context is augmented
10575  *                as a result of the parsing.
10576  */
10577 
10578 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10579 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10580     xmlChar start[4];
10581     xmlCharEncoding enc;
10582 
10583     xmlInitParser();
10584 
10585     if ((ctxt == NULL) || (ctxt->input == NULL))
10586         return(-1);
10587 
10588     GROW;
10589 
10590     /*
10591      * SAX: detecting the level.
10592      */
10593     xmlDetectSAX2(ctxt);
10594 
10595     /*
10596      * SAX: beginning of the document processing.
10597      */
10598     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10599         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10600     if (ctxt->instate == XML_PARSER_EOF)
10601 	return(-1);
10602 
10603     if ((ctxt->encoding == NULL) &&
10604         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10605 	/*
10606 	 * Get the 4 first bytes and decode the charset
10607 	 * if enc != XML_CHAR_ENCODING_NONE
10608 	 * plug some encoding conversion routines.
10609 	 */
10610 	start[0] = RAW;
10611 	start[1] = NXT(1);
10612 	start[2] = NXT(2);
10613 	start[3] = NXT(3);
10614 	enc = xmlDetectCharEncoding(&start[0], 4);
10615 	if (enc != XML_CHAR_ENCODING_NONE) {
10616 	    xmlSwitchEncoding(ctxt, enc);
10617 	}
10618     }
10619 
10620 
10621     if (CUR == 0) {
10622 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10623 	return(-1);
10624     }
10625 
10626     /*
10627      * Check for the XMLDecl in the Prolog.
10628      * do not GROW here to avoid the detected encoder to decode more
10629      * than just the first line, unless the amount of data is really
10630      * too small to hold "<?xml version="1.0" encoding="foo"
10631      */
10632     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10633        GROW;
10634     }
10635     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10636 
10637 	/*
10638 	 * Note that we will switch encoding on the fly.
10639 	 */
10640 	xmlParseXMLDecl(ctxt);
10641 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10642 	    (ctxt->instate == XML_PARSER_EOF)) {
10643 	    /*
10644 	     * The XML REC instructs us to stop parsing right here
10645 	     */
10646 	    return(-1);
10647 	}
10648 	ctxt->standalone = ctxt->input->standalone;
10649 	SKIP_BLANKS;
10650     } else {
10651 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10652     }
10653     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10654         ctxt->sax->startDocument(ctxt->userData);
10655     if (ctxt->instate == XML_PARSER_EOF)
10656 	return(-1);
10657     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10658         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10659 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10660     }
10661 
10662     /*
10663      * The Misc part of the Prolog
10664      */
10665     GROW;
10666     xmlParseMisc(ctxt);
10667 
10668     /*
10669      * Then possibly doc type declaration(s) and more Misc
10670      * (doctypedecl Misc*)?
10671      */
10672     GROW;
10673     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10674 
10675 	ctxt->inSubset = 1;
10676 	xmlParseDocTypeDecl(ctxt);
10677 	if (RAW == '[') {
10678 	    ctxt->instate = XML_PARSER_DTD;
10679 	    xmlParseInternalSubset(ctxt);
10680 	    if (ctxt->instate == XML_PARSER_EOF)
10681 		return(-1);
10682 	}
10683 
10684 	/*
10685 	 * Create and update the external subset.
10686 	 */
10687 	ctxt->inSubset = 2;
10688 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10689 	    (!ctxt->disableSAX))
10690 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10691 	                              ctxt->extSubSystem, ctxt->extSubURI);
10692 	if (ctxt->instate == XML_PARSER_EOF)
10693 	    return(-1);
10694 	ctxt->inSubset = 0;
10695 
10696         xmlCleanSpecialAttr(ctxt);
10697 
10698 	ctxt->instate = XML_PARSER_PROLOG;
10699 	xmlParseMisc(ctxt);
10700     }
10701 
10702     /*
10703      * Time to start parsing the tree itself
10704      */
10705     GROW;
10706     if (RAW != '<') {
10707 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10708 		       "Start tag expected, '<' not found\n");
10709     } else {
10710 	ctxt->instate = XML_PARSER_CONTENT;
10711 	xmlParseElement(ctxt);
10712 	ctxt->instate = XML_PARSER_EPILOG;
10713 
10714 
10715 	/*
10716 	 * The Misc part at the end
10717 	 */
10718 	xmlParseMisc(ctxt);
10719 
10720 	if (RAW != 0) {
10721 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10722 	}
10723 	ctxt->instate = XML_PARSER_EOF;
10724     }
10725 
10726     /*
10727      * SAX: end of the document processing.
10728      */
10729     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10730         ctxt->sax->endDocument(ctxt->userData);
10731 
10732     /*
10733      * Remove locally kept entity definitions if the tree was not built
10734      */
10735     if ((ctxt->myDoc != NULL) &&
10736 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10737 	xmlFreeDoc(ctxt->myDoc);
10738 	ctxt->myDoc = NULL;
10739     }
10740 
10741     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10742         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10743 	if (ctxt->valid)
10744 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10745 	if (ctxt->nsWellFormed)
10746 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10747 	if (ctxt->options & XML_PARSE_OLD10)
10748 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10749     }
10750     if (! ctxt->wellFormed) {
10751 	ctxt->valid = 0;
10752 	return(-1);
10753     }
10754     return(0);
10755 }
10756 
10757 /**
10758  * xmlParseExtParsedEnt:
10759  * @ctxt:  an XML parser context
10760  *
10761  * parse a general parsed entity
10762  * An external general parsed entity is well-formed if it matches the
10763  * production labeled extParsedEnt.
10764  *
10765  * [78] extParsedEnt ::= TextDecl? content
10766  *
10767  * Returns 0, -1 in case of error. the parser context is augmented
10768  *                as a result of the parsing.
10769  */
10770 
10771 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10772 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10773     xmlChar start[4];
10774     xmlCharEncoding enc;
10775 
10776     if ((ctxt == NULL) || (ctxt->input == NULL))
10777         return(-1);
10778 
10779     xmlDefaultSAXHandlerInit();
10780 
10781     xmlDetectSAX2(ctxt);
10782 
10783     GROW;
10784 
10785     /*
10786      * SAX: beginning of the document processing.
10787      */
10788     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10789         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10790 
10791     /*
10792      * Get the 4 first bytes and decode the charset
10793      * if enc != XML_CHAR_ENCODING_NONE
10794      * plug some encoding conversion routines.
10795      */
10796     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10797 	start[0] = RAW;
10798 	start[1] = NXT(1);
10799 	start[2] = NXT(2);
10800 	start[3] = NXT(3);
10801 	enc = xmlDetectCharEncoding(start, 4);
10802 	if (enc != XML_CHAR_ENCODING_NONE) {
10803 	    xmlSwitchEncoding(ctxt, enc);
10804 	}
10805     }
10806 
10807 
10808     if (CUR == 0) {
10809 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10810     }
10811 
10812     /*
10813      * Check for the XMLDecl in the Prolog.
10814      */
10815     GROW;
10816     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10817 
10818 	/*
10819 	 * Note that we will switch encoding on the fly.
10820 	 */
10821 	xmlParseXMLDecl(ctxt);
10822 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10823 	    /*
10824 	     * The XML REC instructs us to stop parsing right here
10825 	     */
10826 	    return(-1);
10827 	}
10828 	SKIP_BLANKS;
10829     } else {
10830 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10831     }
10832     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10833         ctxt->sax->startDocument(ctxt->userData);
10834     if (ctxt->instate == XML_PARSER_EOF)
10835 	return(-1);
10836 
10837     /*
10838      * Doing validity checking on chunk doesn't make sense
10839      */
10840     ctxt->instate = XML_PARSER_CONTENT;
10841     ctxt->validate = 0;
10842     ctxt->loadsubset = 0;
10843     ctxt->depth = 0;
10844 
10845     xmlParseContent(ctxt);
10846     if (ctxt->instate == XML_PARSER_EOF)
10847 	return(-1);
10848 
10849     if ((RAW == '<') && (NXT(1) == '/')) {
10850 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10851     } else if (RAW != 0) {
10852 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10853     }
10854 
10855     /*
10856      * SAX: end of the document processing.
10857      */
10858     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859         ctxt->sax->endDocument(ctxt->userData);
10860 
10861     if (! ctxt->wellFormed) return(-1);
10862     return(0);
10863 }
10864 
10865 #ifdef LIBXML_PUSH_ENABLED
10866 /************************************************************************
10867  *									*
10868  *		Progressive parsing interfaces				*
10869  *									*
10870  ************************************************************************/
10871 
10872 /**
10873  * xmlParseLookupSequence:
10874  * @ctxt:  an XML parser context
10875  * @first:  the first char to lookup
10876  * @next:  the next char to lookup or zero
10877  * @third:  the next char to lookup or zero
10878  *
10879  * Try to find if a sequence (first, next, third) or  just (first next) or
10880  * (first) is available in the input stream.
10881  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10882  * to avoid rescanning sequences of bytes, it DOES change the state of the
10883  * parser, do not use liberally.
10884  *
10885  * Returns the index to the current parsing point if the full sequence
10886  *      is available, -1 otherwise.
10887  */
10888 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10889 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10890                        xmlChar next, xmlChar third) {
10891     int base, len;
10892     xmlParserInputPtr in;
10893     const xmlChar *buf;
10894 
10895     in = ctxt->input;
10896     if (in == NULL) return(-1);
10897     base = in->cur - in->base;
10898     if (base < 0) return(-1);
10899     if (ctxt->checkIndex > base)
10900         base = ctxt->checkIndex;
10901     if (in->buf == NULL) {
10902 	buf = in->base;
10903 	len = in->length;
10904     } else {
10905 	buf = xmlBufContent(in->buf->buffer);
10906 	len = xmlBufUse(in->buf->buffer);
10907     }
10908     /* take into account the sequence length */
10909     if (third) len -= 2;
10910     else if (next) len --;
10911     for (;base < len;base++) {
10912         if (buf[base] == first) {
10913 	    if (third != 0) {
10914 		if ((buf[base + 1] != next) ||
10915 		    (buf[base + 2] != third)) continue;
10916 	    } else if (next != 0) {
10917 		if (buf[base + 1] != next) continue;
10918 	    }
10919 	    ctxt->checkIndex = 0;
10920 #ifdef DEBUG_PUSH
10921 	    if (next == 0)
10922 		xmlGenericError(xmlGenericErrorContext,
10923 			"PP: lookup '%c' found at %d\n",
10924 			first, base);
10925 	    else if (third == 0)
10926 		xmlGenericError(xmlGenericErrorContext,
10927 			"PP: lookup '%c%c' found at %d\n",
10928 			first, next, base);
10929 	    else
10930 		xmlGenericError(xmlGenericErrorContext,
10931 			"PP: lookup '%c%c%c' found at %d\n",
10932 			first, next, third, base);
10933 #endif
10934 	    return(base - (in->cur - in->base));
10935 	}
10936     }
10937     ctxt->checkIndex = base;
10938 #ifdef DEBUG_PUSH
10939     if (next == 0)
10940 	xmlGenericError(xmlGenericErrorContext,
10941 		"PP: lookup '%c' failed\n", first);
10942     else if (third == 0)
10943 	xmlGenericError(xmlGenericErrorContext,
10944 		"PP: lookup '%c%c' failed\n", first, next);
10945     else
10946 	xmlGenericError(xmlGenericErrorContext,
10947 		"PP: lookup '%c%c%c' failed\n", first, next, third);
10948 #endif
10949     return(-1);
10950 }
10951 
10952 /**
10953  * xmlParseGetLasts:
10954  * @ctxt:  an XML parser context
10955  * @lastlt:  pointer to store the last '<' from the input
10956  * @lastgt:  pointer to store the last '>' from the input
10957  *
10958  * Lookup the last < and > in the current chunk
10959  */
10960 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10961 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10962                  const xmlChar **lastgt) {
10963     const xmlChar *tmp;
10964 
10965     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10966 	xmlGenericError(xmlGenericErrorContext,
10967 		    "Internal error: xmlParseGetLasts\n");
10968 	return;
10969     }
10970     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10971         tmp = ctxt->input->end;
10972 	tmp--;
10973 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10974 	if (tmp < ctxt->input->base) {
10975 	    *lastlt = NULL;
10976 	    *lastgt = NULL;
10977 	} else {
10978 	    *lastlt = tmp;
10979 	    tmp++;
10980 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10981 	        if (*tmp == '\'') {
10982 		    tmp++;
10983 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10984 		    if (tmp < ctxt->input->end) tmp++;
10985 		} else if (*tmp == '"') {
10986 		    tmp++;
10987 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10988 		    if (tmp < ctxt->input->end) tmp++;
10989 		} else
10990 		    tmp++;
10991 	    }
10992 	    if (tmp < ctxt->input->end)
10993 	        *lastgt = tmp;
10994 	    else {
10995 	        tmp = *lastlt;
10996 		tmp--;
10997 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10998 		if (tmp >= ctxt->input->base)
10999 		    *lastgt = tmp;
11000 		else
11001 		    *lastgt = NULL;
11002 	    }
11003 	}
11004     } else {
11005         *lastlt = NULL;
11006 	*lastgt = NULL;
11007     }
11008 }
11009 /**
11010  * xmlCheckCdataPush:
11011  * @cur: pointer to the block of characters
11012  * @len: length of the block in bytes
11013  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11014  *
11015  * Check that the block of characters is okay as SCdata content [20]
11016  *
11017  * Returns the number of bytes to pass if okay, a negative index where an
11018  *         UTF-8 error occurred otherwise
11019  */
11020 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11021 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11022     int ix;
11023     unsigned char c;
11024     int codepoint;
11025 
11026     if ((utf == NULL) || (len <= 0))
11027         return(0);
11028 
11029     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11030         c = utf[ix];
11031         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11032 	    if (c >= 0x20)
11033 		ix++;
11034 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11035 	        ix++;
11036 	    else
11037 	        return(-ix);
11038 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11039 	    if (ix + 2 > len) return(complete ? -ix : ix);
11040 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11041 	        return(-ix);
11042 	    codepoint = (utf[ix] & 0x1f) << 6;
11043 	    codepoint |= utf[ix+1] & 0x3f;
11044 	    if (!xmlIsCharQ(codepoint))
11045 	        return(-ix);
11046 	    ix += 2;
11047 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11048 	    if (ix + 3 > len) return(complete ? -ix : ix);
11049 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11050 	        ((utf[ix+2] & 0xc0) != 0x80))
11051 		    return(-ix);
11052 	    codepoint = (utf[ix] & 0xf) << 12;
11053 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11054 	    codepoint |= utf[ix+2] & 0x3f;
11055 	    if (!xmlIsCharQ(codepoint))
11056 	        return(-ix);
11057 	    ix += 3;
11058 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11059 	    if (ix + 4 > len) return(complete ? -ix : ix);
11060 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11061 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11062 		((utf[ix+3] & 0xc0) != 0x80))
11063 		    return(-ix);
11064 	    codepoint = (utf[ix] & 0x7) << 18;
11065 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11066 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11067 	    codepoint |= utf[ix+3] & 0x3f;
11068 	    if (!xmlIsCharQ(codepoint))
11069 	        return(-ix);
11070 	    ix += 4;
11071 	} else				/* unknown encoding */
11072 	    return(-ix);
11073       }
11074       return(ix);
11075 }
11076 
11077 /**
11078  * xmlParseTryOrFinish:
11079  * @ctxt:  an XML parser context
11080  * @terminate:  last chunk indicator
11081  *
11082  * Try to progress on parsing
11083  *
11084  * Returns zero if no parsing was possible
11085  */
11086 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11087 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11088     int ret = 0;
11089     int avail, tlen;
11090     xmlChar cur, next;
11091     const xmlChar *lastlt, *lastgt;
11092 
11093     if (ctxt->input == NULL)
11094         return(0);
11095 
11096 #ifdef DEBUG_PUSH
11097     switch (ctxt->instate) {
11098 	case XML_PARSER_EOF:
11099 	    xmlGenericError(xmlGenericErrorContext,
11100 		    "PP: try EOF\n"); break;
11101 	case XML_PARSER_START:
11102 	    xmlGenericError(xmlGenericErrorContext,
11103 		    "PP: try START\n"); break;
11104 	case XML_PARSER_MISC:
11105 	    xmlGenericError(xmlGenericErrorContext,
11106 		    "PP: try MISC\n");break;
11107 	case XML_PARSER_COMMENT:
11108 	    xmlGenericError(xmlGenericErrorContext,
11109 		    "PP: try COMMENT\n");break;
11110 	case XML_PARSER_PROLOG:
11111 	    xmlGenericError(xmlGenericErrorContext,
11112 		    "PP: try PROLOG\n");break;
11113 	case XML_PARSER_START_TAG:
11114 	    xmlGenericError(xmlGenericErrorContext,
11115 		    "PP: try START_TAG\n");break;
11116 	case XML_PARSER_CONTENT:
11117 	    xmlGenericError(xmlGenericErrorContext,
11118 		    "PP: try CONTENT\n");break;
11119 	case XML_PARSER_CDATA_SECTION:
11120 	    xmlGenericError(xmlGenericErrorContext,
11121 		    "PP: try CDATA_SECTION\n");break;
11122 	case XML_PARSER_END_TAG:
11123 	    xmlGenericError(xmlGenericErrorContext,
11124 		    "PP: try END_TAG\n");break;
11125 	case XML_PARSER_ENTITY_DECL:
11126 	    xmlGenericError(xmlGenericErrorContext,
11127 		    "PP: try ENTITY_DECL\n");break;
11128 	case XML_PARSER_ENTITY_VALUE:
11129 	    xmlGenericError(xmlGenericErrorContext,
11130 		    "PP: try ENTITY_VALUE\n");break;
11131 	case XML_PARSER_ATTRIBUTE_VALUE:
11132 	    xmlGenericError(xmlGenericErrorContext,
11133 		    "PP: try ATTRIBUTE_VALUE\n");break;
11134 	case XML_PARSER_DTD:
11135 	    xmlGenericError(xmlGenericErrorContext,
11136 		    "PP: try DTD\n");break;
11137 	case XML_PARSER_EPILOG:
11138 	    xmlGenericError(xmlGenericErrorContext,
11139 		    "PP: try EPILOG\n");break;
11140 	case XML_PARSER_PI:
11141 	    xmlGenericError(xmlGenericErrorContext,
11142 		    "PP: try PI\n");break;
11143         case XML_PARSER_IGNORE:
11144             xmlGenericError(xmlGenericErrorContext,
11145 		    "PP: try IGNORE\n");break;
11146     }
11147 #endif
11148 
11149     if ((ctxt->input != NULL) &&
11150         (ctxt->input->cur - ctxt->input->base > 4096)) {
11151 	xmlSHRINK(ctxt);
11152 	ctxt->checkIndex = 0;
11153     }
11154     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11155 
11156     while (ctxt->instate != XML_PARSER_EOF) {
11157 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11158 	    return(0);
11159 
11160 	if (ctxt->input == NULL) break;
11161 	if (ctxt->input->buf == NULL)
11162 	    avail = ctxt->input->length -
11163 	            (ctxt->input->cur - ctxt->input->base);
11164 	else {
11165 	    /*
11166 	     * If we are operating on converted input, try to flush
11167 	     * remainng chars to avoid them stalling in the non-converted
11168 	     * buffer. But do not do this in document start where
11169 	     * encoding="..." may not have been read and we work on a
11170 	     * guessed encoding.
11171 	     */
11172 	    if ((ctxt->instate != XML_PARSER_START) &&
11173 	        (ctxt->input->buf->raw != NULL) &&
11174 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11175                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11176                                                  ctxt->input);
11177 		size_t current = ctxt->input->cur - ctxt->input->base;
11178 
11179 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11180                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11181                                       base, current);
11182 	    }
11183 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11184 		    (ctxt->input->cur - ctxt->input->base);
11185 	}
11186         if (avail < 1)
11187 	    goto done;
11188         switch (ctxt->instate) {
11189             case XML_PARSER_EOF:
11190 	        /*
11191 		 * Document parsing is done !
11192 		 */
11193 	        goto done;
11194             case XML_PARSER_START:
11195 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11196 		    xmlChar start[4];
11197 		    xmlCharEncoding enc;
11198 
11199 		    /*
11200 		     * Very first chars read from the document flow.
11201 		     */
11202 		    if (avail < 4)
11203 			goto done;
11204 
11205 		    /*
11206 		     * Get the 4 first bytes and decode the charset
11207 		     * if enc != XML_CHAR_ENCODING_NONE
11208 		     * plug some encoding conversion routines,
11209 		     * else xmlSwitchEncoding will set to (default)
11210 		     * UTF8.
11211 		     */
11212 		    start[0] = RAW;
11213 		    start[1] = NXT(1);
11214 		    start[2] = NXT(2);
11215 		    start[3] = NXT(3);
11216 		    enc = xmlDetectCharEncoding(start, 4);
11217 		    xmlSwitchEncoding(ctxt, enc);
11218 		    break;
11219 		}
11220 
11221 		if (avail < 2)
11222 		    goto done;
11223 		cur = ctxt->input->cur[0];
11224 		next = ctxt->input->cur[1];
11225 		if (cur == 0) {
11226 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227 			ctxt->sax->setDocumentLocator(ctxt->userData,
11228 						      &xmlDefaultSAXLocator);
11229 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11230 		    xmlHaltParser(ctxt);
11231 #ifdef DEBUG_PUSH
11232 		    xmlGenericError(xmlGenericErrorContext,
11233 			    "PP: entering EOF\n");
11234 #endif
11235 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236 			ctxt->sax->endDocument(ctxt->userData);
11237 		    goto done;
11238 		}
11239 	        if ((cur == '<') && (next == '?')) {
11240 		    /* PI or XML decl */
11241 		    if (avail < 5) return(ret);
11242 		    if ((!terminate) &&
11243 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11244 			return(ret);
11245 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246 			ctxt->sax->setDocumentLocator(ctxt->userData,
11247 						      &xmlDefaultSAXLocator);
11248 		    if ((ctxt->input->cur[2] == 'x') &&
11249 			(ctxt->input->cur[3] == 'm') &&
11250 			(ctxt->input->cur[4] == 'l') &&
11251 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11252 			ret += 5;
11253 #ifdef DEBUG_PUSH
11254 			xmlGenericError(xmlGenericErrorContext,
11255 				"PP: Parsing XML Decl\n");
11256 #endif
11257 			xmlParseXMLDecl(ctxt);
11258 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259 			    /*
11260 			     * The XML REC instructs us to stop parsing right
11261 			     * here
11262 			     */
11263 			    xmlHaltParser(ctxt);
11264 			    return(0);
11265 			}
11266 			ctxt->standalone = ctxt->input->standalone;
11267 			if ((ctxt->encoding == NULL) &&
11268 			    (ctxt->input->encoding != NULL))
11269 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271 			    (!ctxt->disableSAX))
11272 			    ctxt->sax->startDocument(ctxt->userData);
11273 			ctxt->instate = XML_PARSER_MISC;
11274 #ifdef DEBUG_PUSH
11275 			xmlGenericError(xmlGenericErrorContext,
11276 				"PP: entering MISC\n");
11277 #endif
11278 		    } else {
11279 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281 			    (!ctxt->disableSAX))
11282 			    ctxt->sax->startDocument(ctxt->userData);
11283 			ctxt->instate = XML_PARSER_MISC;
11284 #ifdef DEBUG_PUSH
11285 			xmlGenericError(xmlGenericErrorContext,
11286 				"PP: entering MISC\n");
11287 #endif
11288 		    }
11289 		} else {
11290 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291 			ctxt->sax->setDocumentLocator(ctxt->userData,
11292 						      &xmlDefaultSAXLocator);
11293 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11294 		    if (ctxt->version == NULL) {
11295 		        xmlErrMemory(ctxt, NULL);
11296 			break;
11297 		    }
11298 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299 		        (!ctxt->disableSAX))
11300 			ctxt->sax->startDocument(ctxt->userData);
11301 		    ctxt->instate = XML_PARSER_MISC;
11302 #ifdef DEBUG_PUSH
11303 		    xmlGenericError(xmlGenericErrorContext,
11304 			    "PP: entering MISC\n");
11305 #endif
11306 		}
11307 		break;
11308             case XML_PARSER_START_TAG: {
11309 	        const xmlChar *name;
11310 		const xmlChar *prefix = NULL;
11311 		const xmlChar *URI = NULL;
11312 		int nsNr = ctxt->nsNr;
11313 
11314 		if ((avail < 2) && (ctxt->inputNr == 1))
11315 		    goto done;
11316 		cur = ctxt->input->cur[0];
11317 	        if (cur != '<') {
11318 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11319 		    xmlHaltParser(ctxt);
11320 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11321 			ctxt->sax->endDocument(ctxt->userData);
11322 		    goto done;
11323 		}
11324 		if (!terminate) {
11325 		    if (ctxt->progressive) {
11326 		        /* > can be found unescaped in attribute values */
11327 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11328 			    goto done;
11329 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11330 			goto done;
11331 		    }
11332 		}
11333 		if (ctxt->spaceNr == 0)
11334 		    spacePush(ctxt, -1);
11335 		else if (*ctxt->space == -2)
11336 		    spacePush(ctxt, -1);
11337 		else
11338 		    spacePush(ctxt, *ctxt->space);
11339 #ifdef LIBXML_SAX1_ENABLED
11340 		if (ctxt->sax2)
11341 #endif /* LIBXML_SAX1_ENABLED */
11342 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11343 #ifdef LIBXML_SAX1_ENABLED
11344 		else
11345 		    name = xmlParseStartTag(ctxt);
11346 #endif /* LIBXML_SAX1_ENABLED */
11347 		if (ctxt->instate == XML_PARSER_EOF)
11348 		    goto done;
11349 		if (name == NULL) {
11350 		    spacePop(ctxt);
11351 		    xmlHaltParser(ctxt);
11352 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11353 			ctxt->sax->endDocument(ctxt->userData);
11354 		    goto done;
11355 		}
11356 #ifdef LIBXML_VALID_ENABLED
11357 		/*
11358 		 * [ VC: Root Element Type ]
11359 		 * The Name in the document type declaration must match
11360 		 * the element type of the root element.
11361 		 */
11362 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11363 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11364 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11365 #endif /* LIBXML_VALID_ENABLED */
11366 
11367 		/*
11368 		 * Check for an Empty Element.
11369 		 */
11370 		if ((RAW == '/') && (NXT(1) == '>')) {
11371 		    SKIP(2);
11372 
11373 		    if (ctxt->sax2) {
11374 			if ((ctxt->sax != NULL) &&
11375 			    (ctxt->sax->endElementNs != NULL) &&
11376 			    (!ctxt->disableSAX))
11377 			    ctxt->sax->endElementNs(ctxt->userData, name,
11378 			                            prefix, URI);
11379 			if (ctxt->nsNr - nsNr > 0)
11380 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11381 #ifdef LIBXML_SAX1_ENABLED
11382 		    } else {
11383 			if ((ctxt->sax != NULL) &&
11384 			    (ctxt->sax->endElement != NULL) &&
11385 			    (!ctxt->disableSAX))
11386 			    ctxt->sax->endElement(ctxt->userData, name);
11387 #endif /* LIBXML_SAX1_ENABLED */
11388 		    }
11389 		    if (ctxt->instate == XML_PARSER_EOF)
11390 			goto done;
11391 		    spacePop(ctxt);
11392 		    if (ctxt->nameNr == 0) {
11393 			ctxt->instate = XML_PARSER_EPILOG;
11394 		    } else {
11395 			ctxt->instate = XML_PARSER_CONTENT;
11396 		    }
11397                     ctxt->progressive = 1;
11398 		    break;
11399 		}
11400 		if (RAW == '>') {
11401 		    NEXT;
11402 		} else {
11403 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11404 					 "Couldn't find end of Start Tag %s\n",
11405 					 name);
11406 		    nodePop(ctxt);
11407 		    spacePop(ctxt);
11408 		}
11409 		if (ctxt->sax2)
11410 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11411 #ifdef LIBXML_SAX1_ENABLED
11412 		else
11413 		    namePush(ctxt, name);
11414 #endif /* LIBXML_SAX1_ENABLED */
11415 
11416 		ctxt->instate = XML_PARSER_CONTENT;
11417                 ctxt->progressive = 1;
11418                 break;
11419 	    }
11420             case XML_PARSER_CONTENT: {
11421 		const xmlChar *test;
11422 		unsigned int cons;
11423 		if ((avail < 2) && (ctxt->inputNr == 1))
11424 		    goto done;
11425 		cur = ctxt->input->cur[0];
11426 		next = ctxt->input->cur[1];
11427 
11428 		test = CUR_PTR;
11429 	        cons = ctxt->input->consumed;
11430 		if ((cur == '<') && (next == '/')) {
11431 		    ctxt->instate = XML_PARSER_END_TAG;
11432 		    break;
11433 	        } else if ((cur == '<') && (next == '?')) {
11434 		    if ((!terminate) &&
11435 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11436                         ctxt->progressive = XML_PARSER_PI;
11437 			goto done;
11438                     }
11439 		    xmlParsePI(ctxt);
11440 		    ctxt->instate = XML_PARSER_CONTENT;
11441                     ctxt->progressive = 1;
11442 		} else if ((cur == '<') && (next != '!')) {
11443 		    ctxt->instate = XML_PARSER_START_TAG;
11444 		    break;
11445 		} else if ((cur == '<') && (next == '!') &&
11446 		           (ctxt->input->cur[2] == '-') &&
11447 			   (ctxt->input->cur[3] == '-')) {
11448 		    int term;
11449 
11450 	            if (avail < 4)
11451 		        goto done;
11452 		    ctxt->input->cur += 4;
11453 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11454 		    ctxt->input->cur -= 4;
11455 		    if ((!terminate) && (term < 0)) {
11456                         ctxt->progressive = XML_PARSER_COMMENT;
11457 			goto done;
11458                     }
11459 		    xmlParseComment(ctxt);
11460 		    ctxt->instate = XML_PARSER_CONTENT;
11461                     ctxt->progressive = 1;
11462 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11463 		    (ctxt->input->cur[2] == '[') &&
11464 		    (ctxt->input->cur[3] == 'C') &&
11465 		    (ctxt->input->cur[4] == 'D') &&
11466 		    (ctxt->input->cur[5] == 'A') &&
11467 		    (ctxt->input->cur[6] == 'T') &&
11468 		    (ctxt->input->cur[7] == 'A') &&
11469 		    (ctxt->input->cur[8] == '[')) {
11470 		    SKIP(9);
11471 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11472 		    break;
11473 		} else if ((cur == '<') && (next == '!') &&
11474 		           (avail < 9)) {
11475 		    goto done;
11476 		} else if (cur == '&') {
11477 		    if ((!terminate) &&
11478 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11479 			goto done;
11480 		    xmlParseReference(ctxt);
11481 		} else {
11482 		    /* TODO Avoid the extra copy, handle directly !!! */
11483 		    /*
11484 		     * Goal of the following test is:
11485 		     *  - minimize calls to the SAX 'character' callback
11486 		     *    when they are mergeable
11487 		     *  - handle an problem for isBlank when we only parse
11488 		     *    a sequence of blank chars and the next one is
11489 		     *    not available to check against '<' presence.
11490 		     *  - tries to homogenize the differences in SAX
11491 		     *    callbacks between the push and pull versions
11492 		     *    of the parser.
11493 		     */
11494 		    if ((ctxt->inputNr == 1) &&
11495 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11496 			if (!terminate) {
11497 			    if (ctxt->progressive) {
11498 				if ((lastlt == NULL) ||
11499 				    (ctxt->input->cur > lastlt))
11500 				    goto done;
11501 			    } else if (xmlParseLookupSequence(ctxt,
11502 			                                      '<', 0, 0) < 0) {
11503 				goto done;
11504 			    }
11505 			}
11506                     }
11507 		    ctxt->checkIndex = 0;
11508 		    xmlParseCharData(ctxt, 0);
11509 		}
11510 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11511 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11512 		                "detected an error in element content\n");
11513 		    xmlHaltParser(ctxt);
11514 		    break;
11515 		}
11516 		break;
11517 	    }
11518             case XML_PARSER_END_TAG:
11519 		if (avail < 2)
11520 		    goto done;
11521 		if (!terminate) {
11522 		    if (ctxt->progressive) {
11523 		        /* > can be found unescaped in attribute values */
11524 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11525 			    goto done;
11526 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11527 			goto done;
11528 		    }
11529 		}
11530 		if (ctxt->sax2) {
11531 		    xmlParseEndTag2(ctxt,
11532 		            (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11533 		            (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11534 		            (int) (ptrdiff_t)
11535                                 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11536 		    nameNsPop(ctxt);
11537 		}
11538 #ifdef LIBXML_SAX1_ENABLED
11539 		  else
11540 		    xmlParseEndTag1(ctxt, 0);
11541 #endif /* LIBXML_SAX1_ENABLED */
11542 		if (ctxt->instate == XML_PARSER_EOF) {
11543 		    /* Nothing */
11544 		} else if (ctxt->nameNr == 0) {
11545 		    ctxt->instate = XML_PARSER_EPILOG;
11546 		} else {
11547 		    ctxt->instate = XML_PARSER_CONTENT;
11548 		}
11549 		break;
11550             case XML_PARSER_CDATA_SECTION: {
11551 	        /*
11552 		 * The Push mode need to have the SAX callback for
11553 		 * cdataBlock merge back contiguous callbacks.
11554 		 */
11555 		int base;
11556 
11557 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11558 		if (base < 0) {
11559 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11560 		        int tmp;
11561 
11562 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11563 			                        XML_PARSER_BIG_BUFFER_SIZE, 0);
11564 			if (tmp < 0) {
11565 			    tmp = -tmp;
11566 			    ctxt->input->cur += tmp;
11567 			    goto encoding_error;
11568 			}
11569 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11570 			    if (ctxt->sax->cdataBlock != NULL)
11571 				ctxt->sax->cdataBlock(ctxt->userData,
11572 				                      ctxt->input->cur, tmp);
11573 			    else if (ctxt->sax->characters != NULL)
11574 				ctxt->sax->characters(ctxt->userData,
11575 				                      ctxt->input->cur, tmp);
11576 			}
11577 			if (ctxt->instate == XML_PARSER_EOF)
11578 			    goto done;
11579 			SKIPL(tmp);
11580 			ctxt->checkIndex = 0;
11581 		    }
11582 		    goto done;
11583 		} else {
11584 		    int tmp;
11585 
11586 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11587 		    if ((tmp < 0) || (tmp != base)) {
11588 			tmp = -tmp;
11589 			ctxt->input->cur += tmp;
11590 			goto encoding_error;
11591 		    }
11592 		    if ((ctxt->sax != NULL) && (base == 0) &&
11593 		        (ctxt->sax->cdataBlock != NULL) &&
11594 		        (!ctxt->disableSAX)) {
11595 			/*
11596 			 * Special case to provide identical behaviour
11597 			 * between pull and push parsers on enpty CDATA
11598 			 * sections
11599 			 */
11600 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11601 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11602 			               "<![CDATA[", 9)))
11603 			     ctxt->sax->cdataBlock(ctxt->userData,
11604 			                           BAD_CAST "", 0);
11605 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11606 			(!ctxt->disableSAX)) {
11607 			if (ctxt->sax->cdataBlock != NULL)
11608 			    ctxt->sax->cdataBlock(ctxt->userData,
11609 						  ctxt->input->cur, base);
11610 			else if (ctxt->sax->characters != NULL)
11611 			    ctxt->sax->characters(ctxt->userData,
11612 						  ctxt->input->cur, base);
11613 		    }
11614 		    if (ctxt->instate == XML_PARSER_EOF)
11615 			goto done;
11616 		    SKIPL(base + 3);
11617 		    ctxt->checkIndex = 0;
11618 		    ctxt->instate = XML_PARSER_CONTENT;
11619 #ifdef DEBUG_PUSH
11620 		    xmlGenericError(xmlGenericErrorContext,
11621 			    "PP: entering CONTENT\n");
11622 #endif
11623 		}
11624 		break;
11625 	    }
11626             case XML_PARSER_MISC:
11627 		SKIP_BLANKS;
11628 		if (ctxt->input->buf == NULL)
11629 		    avail = ctxt->input->length -
11630 		            (ctxt->input->cur - ctxt->input->base);
11631 		else
11632 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11633 		            (ctxt->input->cur - ctxt->input->base);
11634 		if (avail < 2)
11635 		    goto done;
11636 		cur = ctxt->input->cur[0];
11637 		next = ctxt->input->cur[1];
11638 	        if ((cur == '<') && (next == '?')) {
11639 		    if ((!terminate) &&
11640 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11641                         ctxt->progressive = XML_PARSER_PI;
11642 			goto done;
11643                     }
11644 #ifdef DEBUG_PUSH
11645 		    xmlGenericError(xmlGenericErrorContext,
11646 			    "PP: Parsing PI\n");
11647 #endif
11648 		    xmlParsePI(ctxt);
11649 		    if (ctxt->instate == XML_PARSER_EOF)
11650 			goto done;
11651 		    ctxt->instate = XML_PARSER_MISC;
11652                     ctxt->progressive = 1;
11653 		    ctxt->checkIndex = 0;
11654 		} else if ((cur == '<') && (next == '!') &&
11655 		    (ctxt->input->cur[2] == '-') &&
11656 		    (ctxt->input->cur[3] == '-')) {
11657 		    if ((!terminate) &&
11658 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11659                         ctxt->progressive = XML_PARSER_COMMENT;
11660 			goto done;
11661                     }
11662 #ifdef DEBUG_PUSH
11663 		    xmlGenericError(xmlGenericErrorContext,
11664 			    "PP: Parsing Comment\n");
11665 #endif
11666 		    xmlParseComment(ctxt);
11667 		    if (ctxt->instate == XML_PARSER_EOF)
11668 			goto done;
11669 		    ctxt->instate = XML_PARSER_MISC;
11670                     ctxt->progressive = 1;
11671 		    ctxt->checkIndex = 0;
11672 		} else if ((cur == '<') && (next == '!') &&
11673 		    (ctxt->input->cur[2] == 'D') &&
11674 		    (ctxt->input->cur[3] == 'O') &&
11675 		    (ctxt->input->cur[4] == 'C') &&
11676 		    (ctxt->input->cur[5] == 'T') &&
11677 		    (ctxt->input->cur[6] == 'Y') &&
11678 		    (ctxt->input->cur[7] == 'P') &&
11679 		    (ctxt->input->cur[8] == 'E')) {
11680 		    if ((!terminate) &&
11681 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11682                         ctxt->progressive = XML_PARSER_DTD;
11683 			goto done;
11684                     }
11685 #ifdef DEBUG_PUSH
11686 		    xmlGenericError(xmlGenericErrorContext,
11687 			    "PP: Parsing internal subset\n");
11688 #endif
11689 		    ctxt->inSubset = 1;
11690                     ctxt->progressive = 0;
11691 		    ctxt->checkIndex = 0;
11692 		    xmlParseDocTypeDecl(ctxt);
11693 		    if (ctxt->instate == XML_PARSER_EOF)
11694 			goto done;
11695 		    if (RAW == '[') {
11696 			ctxt->instate = XML_PARSER_DTD;
11697 #ifdef DEBUG_PUSH
11698 			xmlGenericError(xmlGenericErrorContext,
11699 				"PP: entering DTD\n");
11700 #endif
11701 		    } else {
11702 			/*
11703 			 * Create and update the external subset.
11704 			 */
11705 			ctxt->inSubset = 2;
11706 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11707 			    (ctxt->sax->externalSubset != NULL))
11708 			    ctxt->sax->externalSubset(ctxt->userData,
11709 				    ctxt->intSubName, ctxt->extSubSystem,
11710 				    ctxt->extSubURI);
11711 			ctxt->inSubset = 0;
11712 			xmlCleanSpecialAttr(ctxt);
11713 			ctxt->instate = XML_PARSER_PROLOG;
11714 #ifdef DEBUG_PUSH
11715 			xmlGenericError(xmlGenericErrorContext,
11716 				"PP: entering PROLOG\n");
11717 #endif
11718 		    }
11719 		} else if ((cur == '<') && (next == '!') &&
11720 		           (avail < 9)) {
11721 		    goto done;
11722 		} else {
11723 		    ctxt->instate = XML_PARSER_START_TAG;
11724 		    ctxt->progressive = XML_PARSER_START_TAG;
11725 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11726 #ifdef DEBUG_PUSH
11727 		    xmlGenericError(xmlGenericErrorContext,
11728 			    "PP: entering START_TAG\n");
11729 #endif
11730 		}
11731 		break;
11732             case XML_PARSER_PROLOG:
11733 		SKIP_BLANKS;
11734 		if (ctxt->input->buf == NULL)
11735 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11736 		else
11737 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11738                             (ctxt->input->cur - ctxt->input->base);
11739 		if (avail < 2)
11740 		    goto done;
11741 		cur = ctxt->input->cur[0];
11742 		next = ctxt->input->cur[1];
11743 	        if ((cur == '<') && (next == '?')) {
11744 		    if ((!terminate) &&
11745 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11746                         ctxt->progressive = XML_PARSER_PI;
11747 			goto done;
11748                     }
11749 #ifdef DEBUG_PUSH
11750 		    xmlGenericError(xmlGenericErrorContext,
11751 			    "PP: Parsing PI\n");
11752 #endif
11753 		    xmlParsePI(ctxt);
11754 		    if (ctxt->instate == XML_PARSER_EOF)
11755 			goto done;
11756 		    ctxt->instate = XML_PARSER_PROLOG;
11757                     ctxt->progressive = 1;
11758 		} else if ((cur == '<') && (next == '!') &&
11759 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11760 		    if ((!terminate) &&
11761 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11762                         ctxt->progressive = XML_PARSER_COMMENT;
11763 			goto done;
11764                     }
11765 #ifdef DEBUG_PUSH
11766 		    xmlGenericError(xmlGenericErrorContext,
11767 			    "PP: Parsing Comment\n");
11768 #endif
11769 		    xmlParseComment(ctxt);
11770 		    if (ctxt->instate == XML_PARSER_EOF)
11771 			goto done;
11772 		    ctxt->instate = XML_PARSER_PROLOG;
11773                     ctxt->progressive = 1;
11774 		} else if ((cur == '<') && (next == '!') &&
11775 		           (avail < 4)) {
11776 		    goto done;
11777 		} else {
11778 		    ctxt->instate = XML_PARSER_START_TAG;
11779 		    if (ctxt->progressive == 0)
11780 			ctxt->progressive = XML_PARSER_START_TAG;
11781 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11782 #ifdef DEBUG_PUSH
11783 		    xmlGenericError(xmlGenericErrorContext,
11784 			    "PP: entering START_TAG\n");
11785 #endif
11786 		}
11787 		break;
11788             case XML_PARSER_EPILOG:
11789 		SKIP_BLANKS;
11790 		if (ctxt->input->buf == NULL)
11791 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11792 		else
11793 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11794                             (ctxt->input->cur - ctxt->input->base);
11795 		if (avail < 2)
11796 		    goto done;
11797 		cur = ctxt->input->cur[0];
11798 		next = ctxt->input->cur[1];
11799 	        if ((cur == '<') && (next == '?')) {
11800 		    if ((!terminate) &&
11801 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11802                         ctxt->progressive = XML_PARSER_PI;
11803 			goto done;
11804                     }
11805 #ifdef DEBUG_PUSH
11806 		    xmlGenericError(xmlGenericErrorContext,
11807 			    "PP: Parsing PI\n");
11808 #endif
11809 		    xmlParsePI(ctxt);
11810 		    if (ctxt->instate == XML_PARSER_EOF)
11811 			goto done;
11812 		    ctxt->instate = XML_PARSER_EPILOG;
11813                     ctxt->progressive = 1;
11814 		} else if ((cur == '<') && (next == '!') &&
11815 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11816 		    if ((!terminate) &&
11817 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11818                         ctxt->progressive = XML_PARSER_COMMENT;
11819 			goto done;
11820                     }
11821 #ifdef DEBUG_PUSH
11822 		    xmlGenericError(xmlGenericErrorContext,
11823 			    "PP: Parsing Comment\n");
11824 #endif
11825 		    xmlParseComment(ctxt);
11826 		    if (ctxt->instate == XML_PARSER_EOF)
11827 			goto done;
11828 		    ctxt->instate = XML_PARSER_EPILOG;
11829                     ctxt->progressive = 1;
11830 		} else if ((cur == '<') && (next == '!') &&
11831 		           (avail < 4)) {
11832 		    goto done;
11833 		} else {
11834 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11835 		    xmlHaltParser(ctxt);
11836 #ifdef DEBUG_PUSH
11837 		    xmlGenericError(xmlGenericErrorContext,
11838 			    "PP: entering EOF\n");
11839 #endif
11840 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11841 			ctxt->sax->endDocument(ctxt->userData);
11842 		    goto done;
11843 		}
11844 		break;
11845             case XML_PARSER_DTD: {
11846 	        /*
11847 		 * Sorry but progressive parsing of the internal subset
11848 		 * is not expected to be supported. We first check that
11849 		 * the full content of the internal subset is available and
11850 		 * the parsing is launched only at that point.
11851 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11852 		 * section and not in a ']]>' sequence which are conditional
11853 		 * sections (whoever argued to keep that crap in XML deserve
11854 		 * a place in hell !).
11855 		 */
11856 		int base, i;
11857 		xmlChar *buf;
11858 	        xmlChar quote = 0;
11859                 size_t use;
11860 
11861 		base = ctxt->input->cur - ctxt->input->base;
11862 		if (base < 0) return(0);
11863 		if (ctxt->checkIndex > base)
11864 		    base = ctxt->checkIndex;
11865 		buf = xmlBufContent(ctxt->input->buf->buffer);
11866                 use = xmlBufUse(ctxt->input->buf->buffer);
11867 		for (;(unsigned int) base < use; base++) {
11868 		    if (quote != 0) {
11869 		        if (buf[base] == quote)
11870 			    quote = 0;
11871 			continue;
11872 		    }
11873 		    if ((quote == 0) && (buf[base] == '<')) {
11874 		        int found  = 0;
11875 			/* special handling of comments */
11876 		        if (((unsigned int) base + 4 < use) &&
11877 			    (buf[base + 1] == '!') &&
11878 			    (buf[base + 2] == '-') &&
11879 			    (buf[base + 3] == '-')) {
11880 			    for (;(unsigned int) base + 3 < use; base++) {
11881 				if ((buf[base] == '-') &&
11882 				    (buf[base + 1] == '-') &&
11883 				    (buf[base + 2] == '>')) {
11884 				    found = 1;
11885 				    base += 2;
11886 				    break;
11887 				}
11888 		            }
11889 			    if (!found) {
11890 #if 0
11891 			        fprintf(stderr, "unfinished comment\n");
11892 #endif
11893 			        break; /* for */
11894 		            }
11895 		            continue;
11896 			}
11897 		    }
11898 		    if (buf[base] == '"') {
11899 		        quote = '"';
11900 			continue;
11901 		    }
11902 		    if (buf[base] == '\'') {
11903 		        quote = '\'';
11904 			continue;
11905 		    }
11906 		    if (buf[base] == ']') {
11907 #if 0
11908 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
11909 			        buf[base + 1], buf[base + 2], buf[base + 3]);
11910 #endif
11911 		        if ((unsigned int) base +1 >= use)
11912 			    break;
11913 			if (buf[base + 1] == ']') {
11914 			    /* conditional crap, skip both ']' ! */
11915 			    base++;
11916 			    continue;
11917 			}
11918 		        for (i = 1; (unsigned int) base + i < use; i++) {
11919 			    if (buf[base + i] == '>') {
11920 #if 0
11921 			        fprintf(stderr, "found\n");
11922 #endif
11923 			        goto found_end_int_subset;
11924 			    }
11925 			    if (!IS_BLANK_CH(buf[base + i])) {
11926 #if 0
11927 			        fprintf(stderr, "not found\n");
11928 #endif
11929 			        goto not_end_of_int_subset;
11930 			    }
11931 			}
11932 #if 0
11933 			fprintf(stderr, "end of stream\n");
11934 #endif
11935 		        break;
11936 
11937 		    }
11938 not_end_of_int_subset:
11939                     continue; /* for */
11940 		}
11941 		/*
11942 		 * We didn't found the end of the Internal subset
11943 		 */
11944                 if (quote == 0)
11945                     ctxt->checkIndex = base;
11946                 else
11947                     ctxt->checkIndex = 0;
11948 #ifdef DEBUG_PUSH
11949 		if (next == 0)
11950 		    xmlGenericError(xmlGenericErrorContext,
11951 			    "PP: lookup of int subset end filed\n");
11952 #endif
11953 	        goto done;
11954 
11955 found_end_int_subset:
11956                 ctxt->checkIndex = 0;
11957 		xmlParseInternalSubset(ctxt);
11958 		if (ctxt->instate == XML_PARSER_EOF)
11959 		    goto done;
11960 		ctxt->inSubset = 2;
11961 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11962 		    (ctxt->sax->externalSubset != NULL))
11963 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11964 			    ctxt->extSubSystem, ctxt->extSubURI);
11965 		ctxt->inSubset = 0;
11966 		xmlCleanSpecialAttr(ctxt);
11967 		if (ctxt->instate == XML_PARSER_EOF)
11968 		    goto done;
11969 		ctxt->instate = XML_PARSER_PROLOG;
11970 		ctxt->checkIndex = 0;
11971 #ifdef DEBUG_PUSH
11972 		xmlGenericError(xmlGenericErrorContext,
11973 			"PP: entering PROLOG\n");
11974 #endif
11975                 break;
11976 	    }
11977             case XML_PARSER_COMMENT:
11978 		xmlGenericError(xmlGenericErrorContext,
11979 			"PP: internal error, state == COMMENT\n");
11980 		ctxt->instate = XML_PARSER_CONTENT;
11981 #ifdef DEBUG_PUSH
11982 		xmlGenericError(xmlGenericErrorContext,
11983 			"PP: entering CONTENT\n");
11984 #endif
11985 		break;
11986             case XML_PARSER_IGNORE:
11987 		xmlGenericError(xmlGenericErrorContext,
11988 			"PP: internal error, state == IGNORE");
11989 	        ctxt->instate = XML_PARSER_DTD;
11990 #ifdef DEBUG_PUSH
11991 		xmlGenericError(xmlGenericErrorContext,
11992 			"PP: entering DTD\n");
11993 #endif
11994 	        break;
11995             case XML_PARSER_PI:
11996 		xmlGenericError(xmlGenericErrorContext,
11997 			"PP: internal error, state == PI\n");
11998 		ctxt->instate = XML_PARSER_CONTENT;
11999 #ifdef DEBUG_PUSH
12000 		xmlGenericError(xmlGenericErrorContext,
12001 			"PP: entering CONTENT\n");
12002 #endif
12003 		break;
12004             case XML_PARSER_ENTITY_DECL:
12005 		xmlGenericError(xmlGenericErrorContext,
12006 			"PP: internal error, state == ENTITY_DECL\n");
12007 		ctxt->instate = XML_PARSER_DTD;
12008 #ifdef DEBUG_PUSH
12009 		xmlGenericError(xmlGenericErrorContext,
12010 			"PP: entering DTD\n");
12011 #endif
12012 		break;
12013             case XML_PARSER_ENTITY_VALUE:
12014 		xmlGenericError(xmlGenericErrorContext,
12015 			"PP: internal error, state == ENTITY_VALUE\n");
12016 		ctxt->instate = XML_PARSER_CONTENT;
12017 #ifdef DEBUG_PUSH
12018 		xmlGenericError(xmlGenericErrorContext,
12019 			"PP: entering DTD\n");
12020 #endif
12021 		break;
12022             case XML_PARSER_ATTRIBUTE_VALUE:
12023 		xmlGenericError(xmlGenericErrorContext,
12024 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12025 		ctxt->instate = XML_PARSER_START_TAG;
12026 #ifdef DEBUG_PUSH
12027 		xmlGenericError(xmlGenericErrorContext,
12028 			"PP: entering START_TAG\n");
12029 #endif
12030 		break;
12031             case XML_PARSER_SYSTEM_LITERAL:
12032 		xmlGenericError(xmlGenericErrorContext,
12033 			"PP: internal error, state == SYSTEM_LITERAL\n");
12034 		ctxt->instate = XML_PARSER_START_TAG;
12035 #ifdef DEBUG_PUSH
12036 		xmlGenericError(xmlGenericErrorContext,
12037 			"PP: entering START_TAG\n");
12038 #endif
12039 		break;
12040             case XML_PARSER_PUBLIC_LITERAL:
12041 		xmlGenericError(xmlGenericErrorContext,
12042 			"PP: internal error, state == PUBLIC_LITERAL\n");
12043 		ctxt->instate = XML_PARSER_START_TAG;
12044 #ifdef DEBUG_PUSH
12045 		xmlGenericError(xmlGenericErrorContext,
12046 			"PP: entering START_TAG\n");
12047 #endif
12048 		break;
12049 	}
12050     }
12051 done:
12052 #ifdef DEBUG_PUSH
12053     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12054 #endif
12055     return(ret);
12056 encoding_error:
12057     {
12058         char buffer[150];
12059 
12060 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12061 			ctxt->input->cur[0], ctxt->input->cur[1],
12062 			ctxt->input->cur[2], ctxt->input->cur[3]);
12063 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12064 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12065 		     BAD_CAST buffer, NULL);
12066     }
12067     return(0);
12068 }
12069 
12070 /**
12071  * xmlParseCheckTransition:
12072  * @ctxt:  an XML parser context
12073  * @chunk:  a char array
12074  * @size:  the size in byte of the chunk
12075  *
12076  * Check depending on the current parser state if the chunk given must be
12077  * processed immediately or one need more data to advance on parsing.
12078  *
12079  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12080  */
12081 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12082 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12083     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12084         return(-1);
12085     if (ctxt->instate == XML_PARSER_START_TAG) {
12086         if (memchr(chunk, '>', size) != NULL)
12087             return(1);
12088         return(0);
12089     }
12090     if (ctxt->progressive == XML_PARSER_COMMENT) {
12091         if (memchr(chunk, '>', size) != NULL)
12092             return(1);
12093         return(0);
12094     }
12095     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12096         if (memchr(chunk, '>', size) != NULL)
12097             return(1);
12098         return(0);
12099     }
12100     if (ctxt->progressive == XML_PARSER_PI) {
12101         if (memchr(chunk, '>', size) != NULL)
12102             return(1);
12103         return(0);
12104     }
12105     if (ctxt->instate == XML_PARSER_END_TAG) {
12106         if (memchr(chunk, '>', size) != NULL)
12107             return(1);
12108         return(0);
12109     }
12110     if ((ctxt->progressive == XML_PARSER_DTD) ||
12111         (ctxt->instate == XML_PARSER_DTD)) {
12112         if (memchr(chunk, '>', size) != NULL)
12113             return(1);
12114         return(0);
12115     }
12116     return(1);
12117 }
12118 
12119 /**
12120  * xmlParseChunk:
12121  * @ctxt:  an XML parser context
12122  * @chunk:  an char array
12123  * @size:  the size in byte of the chunk
12124  * @terminate:  last chunk indicator
12125  *
12126  * Parse a Chunk of memory
12127  *
12128  * Returns zero if no error, the xmlParserErrors otherwise.
12129  */
12130 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12131 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12132               int terminate) {
12133     int end_in_lf = 0;
12134     int remain = 0;
12135     size_t old_avail = 0;
12136     size_t avail = 0;
12137 
12138     if (ctxt == NULL)
12139         return(XML_ERR_INTERNAL_ERROR);
12140     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12141         return(ctxt->errNo);
12142     if (ctxt->instate == XML_PARSER_EOF)
12143         return(-1);
12144     if (ctxt->instate == XML_PARSER_START)
12145         xmlDetectSAX2(ctxt);
12146     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12147         (chunk[size - 1] == '\r')) {
12148 	end_in_lf = 1;
12149 	size--;
12150     }
12151 
12152 xmldecl_done:
12153 
12154     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12155         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12156 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12157 	size_t cur = ctxt->input->cur - ctxt->input->base;
12158 	int res;
12159 
12160         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12161         /*
12162          * Specific handling if we autodetected an encoding, we should not
12163          * push more than the first line ... which depend on the encoding
12164          * And only push the rest once the final encoding was detected
12165          */
12166         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12167             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12168             unsigned int len = 45;
12169 
12170             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12171                                BAD_CAST "UTF-16")) ||
12172                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12173                                BAD_CAST "UTF16")))
12174                 len = 90;
12175             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176                                     BAD_CAST "UCS-4")) ||
12177                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12178                                     BAD_CAST "UCS4")))
12179                 len = 180;
12180 
12181             if (ctxt->input->buf->rawconsumed < len)
12182                 len -= ctxt->input->buf->rawconsumed;
12183 
12184             /*
12185              * Change size for reading the initial declaration only
12186              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12187              * will blindly copy extra bytes from memory.
12188              */
12189             if ((unsigned int) size > len) {
12190                 remain = size - len;
12191                 size = len;
12192             } else {
12193                 remain = 0;
12194             }
12195         }
12196 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12197 	if (res < 0) {
12198 	    ctxt->errNo = XML_PARSER_EOF;
12199 	    xmlHaltParser(ctxt);
12200 	    return (XML_PARSER_EOF);
12201 	}
12202         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12203 #ifdef DEBUG_PUSH
12204 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12205 #endif
12206 
12207     } else if (ctxt->instate != XML_PARSER_EOF) {
12208 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12209 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12210 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12211 		    (in->raw != NULL)) {
12212 		int nbchars;
12213 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12214 		size_t current = ctxt->input->cur - ctxt->input->base;
12215 
12216 		nbchars = xmlCharEncInput(in, terminate);
12217 		if (nbchars < 0) {
12218 		    /* TODO 2.6.0 */
12219 		    xmlGenericError(xmlGenericErrorContext,
12220 				    "xmlParseChunk: encoder error\n");
12221                     xmlHaltParser(ctxt);
12222 		    return(XML_ERR_INVALID_ENCODING);
12223 		}
12224 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12225 	    }
12226 	}
12227     }
12228     if (remain != 0) {
12229         xmlParseTryOrFinish(ctxt, 0);
12230     } else {
12231         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12232             avail = xmlBufUse(ctxt->input->buf->buffer);
12233         /*
12234          * Depending on the current state it may not be such
12235          * a good idea to try parsing if there is nothing in the chunk
12236          * which would be worth doing a parser state transition and we
12237          * need to wait for more data
12238          */
12239         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12240             (old_avail == 0) || (avail == 0) ||
12241             (xmlParseCheckTransition(ctxt,
12242                        (const char *)&ctxt->input->base[old_avail],
12243                                      avail - old_avail)))
12244             xmlParseTryOrFinish(ctxt, terminate);
12245     }
12246     if (ctxt->instate == XML_PARSER_EOF)
12247         return(ctxt->errNo);
12248 
12249     if ((ctxt->input != NULL) &&
12250          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12251          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12252         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12253         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12254         xmlHaltParser(ctxt);
12255     }
12256     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12257         return(ctxt->errNo);
12258 
12259     if (remain != 0) {
12260         chunk += size;
12261         size = remain;
12262         remain = 0;
12263         goto xmldecl_done;
12264     }
12265     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12266         (ctxt->input->buf != NULL)) {
12267 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12268 					 ctxt->input);
12269 	size_t current = ctxt->input->cur - ctxt->input->base;
12270 
12271 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12272 
12273 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12274 			      base, current);
12275     }
12276     if (terminate) {
12277 	/*
12278 	 * Check for termination
12279 	 */
12280 	int cur_avail = 0;
12281 
12282 	if (ctxt->input != NULL) {
12283 	    if (ctxt->input->buf == NULL)
12284 		cur_avail = ctxt->input->length -
12285 			    (ctxt->input->cur - ctxt->input->base);
12286 	    else
12287 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12288 			              (ctxt->input->cur - ctxt->input->base);
12289 	}
12290 
12291 	if ((ctxt->instate != XML_PARSER_EOF) &&
12292 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12293 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12294 	}
12295 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12296 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12297 	}
12298 	if (ctxt->instate != XML_PARSER_EOF) {
12299 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12300 		ctxt->sax->endDocument(ctxt->userData);
12301 	}
12302 	ctxt->instate = XML_PARSER_EOF;
12303     }
12304     if (ctxt->wellFormed == 0)
12305 	return((xmlParserErrors) ctxt->errNo);
12306     else
12307         return(0);
12308 }
12309 
12310 /************************************************************************
12311  *									*
12312  *		I/O front end functions to the parser			*
12313  *									*
12314  ************************************************************************/
12315 
12316 /**
12317  * xmlCreatePushParserCtxt:
12318  * @sax:  a SAX handler
12319  * @user_data:  The user data returned on SAX callbacks
12320  * @chunk:  a pointer to an array of chars
12321  * @size:  number of chars in the array
12322  * @filename:  an optional file name or URI
12323  *
12324  * Create a parser context for using the XML parser in push mode.
12325  * If @buffer and @size are non-NULL, the data is used to detect
12326  * the encoding.  The remaining characters will be parsed so they
12327  * don't need to be fed in again through xmlParseChunk.
12328  * To allow content encoding detection, @size should be >= 4
12329  * The value of @filename is used for fetching external entities
12330  * and error/warning reports.
12331  *
12332  * Returns the new parser context or NULL
12333  */
12334 
12335 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12336 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12337                         const char *chunk, int size, const char *filename) {
12338     xmlParserCtxtPtr ctxt;
12339     xmlParserInputPtr inputStream;
12340     xmlParserInputBufferPtr buf;
12341     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12342 
12343     /*
12344      * plug some encoding conversion routines
12345      */
12346     if ((chunk != NULL) && (size >= 4))
12347 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12348 
12349     buf = xmlAllocParserInputBuffer(enc);
12350     if (buf == NULL) return(NULL);
12351 
12352     ctxt = xmlNewParserCtxt();
12353     if (ctxt == NULL) {
12354         xmlErrMemory(NULL, "creating parser: out of memory\n");
12355 	xmlFreeParserInputBuffer(buf);
12356 	return(NULL);
12357     }
12358     ctxt->dictNames = 1;
12359     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12360     if (ctxt->pushTab == NULL) {
12361         xmlErrMemory(ctxt, NULL);
12362 	xmlFreeParserInputBuffer(buf);
12363 	xmlFreeParserCtxt(ctxt);
12364 	return(NULL);
12365     }
12366     if (sax != NULL) {
12367 #ifdef LIBXML_SAX1_ENABLED
12368 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12369 #endif /* LIBXML_SAX1_ENABLED */
12370 	    xmlFree(ctxt->sax);
12371 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12372 	if (ctxt->sax == NULL) {
12373 	    xmlErrMemory(ctxt, NULL);
12374 	    xmlFreeParserInputBuffer(buf);
12375 	    xmlFreeParserCtxt(ctxt);
12376 	    return(NULL);
12377 	}
12378 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12379 	if (sax->initialized == XML_SAX2_MAGIC)
12380 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12381 	else
12382 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12383 	if (user_data != NULL)
12384 	    ctxt->userData = user_data;
12385     }
12386     if (filename == NULL) {
12387 	ctxt->directory = NULL;
12388     } else {
12389         ctxt->directory = xmlParserGetDirectory(filename);
12390     }
12391 
12392     inputStream = xmlNewInputStream(ctxt);
12393     if (inputStream == NULL) {
12394 	xmlFreeParserCtxt(ctxt);
12395 	xmlFreeParserInputBuffer(buf);
12396 	return(NULL);
12397     }
12398 
12399     if (filename == NULL)
12400 	inputStream->filename = NULL;
12401     else {
12402 	inputStream->filename = (char *)
12403 	    xmlCanonicPath((const xmlChar *) filename);
12404 	if (inputStream->filename == NULL) {
12405 	    xmlFreeParserCtxt(ctxt);
12406 	    xmlFreeParserInputBuffer(buf);
12407 	    return(NULL);
12408 	}
12409     }
12410     inputStream->buf = buf;
12411     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12412     inputPush(ctxt, inputStream);
12413 
12414     /*
12415      * If the caller didn't provide an initial 'chunk' for determining
12416      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12417      * that it can be automatically determined later
12418      */
12419     if ((size == 0) || (chunk == NULL)) {
12420 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12421     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12422 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12423 	size_t cur = ctxt->input->cur - ctxt->input->base;
12424 
12425 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12426 
12427         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12428 #ifdef DEBUG_PUSH
12429 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12430 #endif
12431     }
12432 
12433     if (enc != XML_CHAR_ENCODING_NONE) {
12434         xmlSwitchEncoding(ctxt, enc);
12435     }
12436 
12437     return(ctxt);
12438 }
12439 #endif /* LIBXML_PUSH_ENABLED */
12440 
12441 /**
12442  * xmlHaltParser:
12443  * @ctxt:  an XML parser context
12444  *
12445  * Blocks further parser processing don't override error
12446  * for internal use
12447  */
12448 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12449 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12450     if (ctxt == NULL)
12451         return;
12452     ctxt->instate = XML_PARSER_EOF;
12453     ctxt->disableSAX = 1;
12454     while (ctxt->inputNr > 1)
12455         xmlFreeInputStream(inputPop(ctxt));
12456     if (ctxt->input != NULL) {
12457         /*
12458 	 * in case there was a specific allocation deallocate before
12459 	 * overriding base
12460 	 */
12461         if (ctxt->input->free != NULL) {
12462 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12463 	    ctxt->input->free = NULL;
12464 	}
12465 	ctxt->input->cur = BAD_CAST"";
12466 	ctxt->input->base = ctxt->input->cur;
12467         ctxt->input->end = ctxt->input->cur;
12468     }
12469 }
12470 
12471 /**
12472  * xmlStopParser:
12473  * @ctxt:  an XML parser context
12474  *
12475  * Blocks further parser processing
12476  */
12477 void
xmlStopParser(xmlParserCtxtPtr ctxt)12478 xmlStopParser(xmlParserCtxtPtr ctxt) {
12479     if (ctxt == NULL)
12480         return;
12481     xmlHaltParser(ctxt);
12482     ctxt->errNo = XML_ERR_USER_STOP;
12483 }
12484 
12485 /**
12486  * xmlCreateIOParserCtxt:
12487  * @sax:  a SAX handler
12488  * @user_data:  The user data returned on SAX callbacks
12489  * @ioread:  an I/O read function
12490  * @ioclose:  an I/O close function
12491  * @ioctx:  an I/O handler
12492  * @enc:  the charset encoding if known
12493  *
12494  * Create a parser context for using the XML parser with an existing
12495  * I/O stream
12496  *
12497  * Returns the new parser context or NULL
12498  */
12499 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12500 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12501 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12502 	void *ioctx, xmlCharEncoding enc) {
12503     xmlParserCtxtPtr ctxt;
12504     xmlParserInputPtr inputStream;
12505     xmlParserInputBufferPtr buf;
12506 
12507     if (ioread == NULL) return(NULL);
12508 
12509     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12510     if (buf == NULL) {
12511         if (ioclose != NULL)
12512             ioclose(ioctx);
12513         return (NULL);
12514     }
12515 
12516     ctxt = xmlNewParserCtxt();
12517     if (ctxt == NULL) {
12518 	xmlFreeParserInputBuffer(buf);
12519 	return(NULL);
12520     }
12521     if (sax != NULL) {
12522 #ifdef LIBXML_SAX1_ENABLED
12523 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12524 #endif /* LIBXML_SAX1_ENABLED */
12525 	    xmlFree(ctxt->sax);
12526 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12527 	if (ctxt->sax == NULL) {
12528 	    xmlErrMemory(ctxt, NULL);
12529 	    xmlFreeParserCtxt(ctxt);
12530 	    return(NULL);
12531 	}
12532 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12533 	if (sax->initialized == XML_SAX2_MAGIC)
12534 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12535 	else
12536 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12537 	if (user_data != NULL)
12538 	    ctxt->userData = user_data;
12539     }
12540 
12541     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12542     if (inputStream == NULL) {
12543 	xmlFreeParserCtxt(ctxt);
12544 	return(NULL);
12545     }
12546     inputPush(ctxt, inputStream);
12547 
12548     return(ctxt);
12549 }
12550 
12551 #ifdef LIBXML_VALID_ENABLED
12552 /************************************************************************
12553  *									*
12554  *		Front ends when parsing a DTD				*
12555  *									*
12556  ************************************************************************/
12557 
12558 /**
12559  * xmlIOParseDTD:
12560  * @sax:  the SAX handler block or NULL
12561  * @input:  an Input Buffer
12562  * @enc:  the charset encoding if known
12563  *
12564  * Load and parse a DTD
12565  *
12566  * Returns the resulting xmlDtdPtr or NULL in case of error.
12567  * @input will be freed by the function in any case.
12568  */
12569 
12570 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12571 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12572 	      xmlCharEncoding enc) {
12573     xmlDtdPtr ret = NULL;
12574     xmlParserCtxtPtr ctxt;
12575     xmlParserInputPtr pinput = NULL;
12576     xmlChar start[4];
12577 
12578     if (input == NULL)
12579 	return(NULL);
12580 
12581     ctxt = xmlNewParserCtxt();
12582     if (ctxt == NULL) {
12583         xmlFreeParserInputBuffer(input);
12584 	return(NULL);
12585     }
12586 
12587     /* We are loading a DTD */
12588     ctxt->options |= XML_PARSE_DTDLOAD;
12589 
12590     /*
12591      * Set-up the SAX context
12592      */
12593     if (sax != NULL) {
12594 	if (ctxt->sax != NULL)
12595 	    xmlFree(ctxt->sax);
12596         ctxt->sax = sax;
12597         ctxt->userData = ctxt;
12598     }
12599     xmlDetectSAX2(ctxt);
12600 
12601     /*
12602      * generate a parser input from the I/O handler
12603      */
12604 
12605     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12606     if (pinput == NULL) {
12607         if (sax != NULL) ctxt->sax = NULL;
12608         xmlFreeParserInputBuffer(input);
12609 	xmlFreeParserCtxt(ctxt);
12610 	return(NULL);
12611     }
12612 
12613     /*
12614      * plug some encoding conversion routines here.
12615      */
12616     if (xmlPushInput(ctxt, pinput) < 0) {
12617         if (sax != NULL) ctxt->sax = NULL;
12618 	xmlFreeParserCtxt(ctxt);
12619 	return(NULL);
12620     }
12621     if (enc != XML_CHAR_ENCODING_NONE) {
12622         xmlSwitchEncoding(ctxt, enc);
12623     }
12624 
12625     pinput->filename = NULL;
12626     pinput->line = 1;
12627     pinput->col = 1;
12628     pinput->base = ctxt->input->cur;
12629     pinput->cur = ctxt->input->cur;
12630     pinput->free = NULL;
12631 
12632     /*
12633      * let's parse that entity knowing it's an external subset.
12634      */
12635     ctxt->inSubset = 2;
12636     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12637     if (ctxt->myDoc == NULL) {
12638 	xmlErrMemory(ctxt, "New Doc failed");
12639 	return(NULL);
12640     }
12641     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12642     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12643 	                               BAD_CAST "none", BAD_CAST "none");
12644 
12645     if ((enc == XML_CHAR_ENCODING_NONE) &&
12646         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12647 	/*
12648 	 * Get the 4 first bytes and decode the charset
12649 	 * if enc != XML_CHAR_ENCODING_NONE
12650 	 * plug some encoding conversion routines.
12651 	 */
12652 	start[0] = RAW;
12653 	start[1] = NXT(1);
12654 	start[2] = NXT(2);
12655 	start[3] = NXT(3);
12656 	enc = xmlDetectCharEncoding(start, 4);
12657 	if (enc != XML_CHAR_ENCODING_NONE) {
12658 	    xmlSwitchEncoding(ctxt, enc);
12659 	}
12660     }
12661 
12662     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12663 
12664     if (ctxt->myDoc != NULL) {
12665 	if (ctxt->wellFormed) {
12666 	    ret = ctxt->myDoc->extSubset;
12667 	    ctxt->myDoc->extSubset = NULL;
12668 	    if (ret != NULL) {
12669 		xmlNodePtr tmp;
12670 
12671 		ret->doc = NULL;
12672 		tmp = ret->children;
12673 		while (tmp != NULL) {
12674 		    tmp->doc = NULL;
12675 		    tmp = tmp->next;
12676 		}
12677 	    }
12678 	} else {
12679 	    ret = NULL;
12680 	}
12681         xmlFreeDoc(ctxt->myDoc);
12682         ctxt->myDoc = NULL;
12683     }
12684     if (sax != NULL) ctxt->sax = NULL;
12685     xmlFreeParserCtxt(ctxt);
12686 
12687     return(ret);
12688 }
12689 
12690 /**
12691  * xmlSAXParseDTD:
12692  * @sax:  the SAX handler block
12693  * @ExternalID:  a NAME* containing the External ID of the DTD
12694  * @SystemID:  a NAME* containing the URL to the DTD
12695  *
12696  * Load and parse an external subset.
12697  *
12698  * Returns the resulting xmlDtdPtr or NULL in case of error.
12699  */
12700 
12701 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12702 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12703                           const xmlChar *SystemID) {
12704     xmlDtdPtr ret = NULL;
12705     xmlParserCtxtPtr ctxt;
12706     xmlParserInputPtr input = NULL;
12707     xmlCharEncoding enc;
12708     xmlChar* systemIdCanonic;
12709 
12710     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12711 
12712     ctxt = xmlNewParserCtxt();
12713     if (ctxt == NULL) {
12714 	return(NULL);
12715     }
12716 
12717     /* We are loading a DTD */
12718     ctxt->options |= XML_PARSE_DTDLOAD;
12719 
12720     /*
12721      * Set-up the SAX context
12722      */
12723     if (sax != NULL) {
12724 	if (ctxt->sax != NULL)
12725 	    xmlFree(ctxt->sax);
12726         ctxt->sax = sax;
12727         ctxt->userData = ctxt;
12728     }
12729 
12730     /*
12731      * Canonicalise the system ID
12732      */
12733     systemIdCanonic = xmlCanonicPath(SystemID);
12734     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12735 	xmlFreeParserCtxt(ctxt);
12736 	return(NULL);
12737     }
12738 
12739     /*
12740      * Ask the Entity resolver to load the damn thing
12741      */
12742 
12743     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12744 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12745 	                                 systemIdCanonic);
12746     if (input == NULL) {
12747         if (sax != NULL) ctxt->sax = NULL;
12748 	xmlFreeParserCtxt(ctxt);
12749 	if (systemIdCanonic != NULL)
12750 	    xmlFree(systemIdCanonic);
12751 	return(NULL);
12752     }
12753 
12754     /*
12755      * plug some encoding conversion routines here.
12756      */
12757     if (xmlPushInput(ctxt, input) < 0) {
12758         if (sax != NULL) ctxt->sax = NULL;
12759 	xmlFreeParserCtxt(ctxt);
12760 	if (systemIdCanonic != NULL)
12761 	    xmlFree(systemIdCanonic);
12762 	return(NULL);
12763     }
12764     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12765 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12766 	xmlSwitchEncoding(ctxt, enc);
12767     }
12768 
12769     if (input->filename == NULL)
12770 	input->filename = (char *) systemIdCanonic;
12771     else
12772 	xmlFree(systemIdCanonic);
12773     input->line = 1;
12774     input->col = 1;
12775     input->base = ctxt->input->cur;
12776     input->cur = ctxt->input->cur;
12777     input->free = NULL;
12778 
12779     /*
12780      * let's parse that entity knowing it's an external subset.
12781      */
12782     ctxt->inSubset = 2;
12783     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12784     if (ctxt->myDoc == NULL) {
12785 	xmlErrMemory(ctxt, "New Doc failed");
12786         if (sax != NULL) ctxt->sax = NULL;
12787 	xmlFreeParserCtxt(ctxt);
12788 	return(NULL);
12789     }
12790     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12791     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12792 	                               ExternalID, SystemID);
12793     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12794 
12795     if (ctxt->myDoc != NULL) {
12796 	if (ctxt->wellFormed) {
12797 	    ret = ctxt->myDoc->extSubset;
12798 	    ctxt->myDoc->extSubset = NULL;
12799 	    if (ret != NULL) {
12800 		xmlNodePtr tmp;
12801 
12802 		ret->doc = NULL;
12803 		tmp = ret->children;
12804 		while (tmp != NULL) {
12805 		    tmp->doc = NULL;
12806 		    tmp = tmp->next;
12807 		}
12808 	    }
12809 	} else {
12810 	    ret = NULL;
12811 	}
12812         xmlFreeDoc(ctxt->myDoc);
12813         ctxt->myDoc = NULL;
12814     }
12815     if (sax != NULL) ctxt->sax = NULL;
12816     xmlFreeParserCtxt(ctxt);
12817 
12818     return(ret);
12819 }
12820 
12821 
12822 /**
12823  * xmlParseDTD:
12824  * @ExternalID:  a NAME* containing the External ID of the DTD
12825  * @SystemID:  a NAME* containing the URL to the DTD
12826  *
12827  * Load and parse an external subset.
12828  *
12829  * Returns the resulting xmlDtdPtr or NULL in case of error.
12830  */
12831 
12832 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12833 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12834     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12835 }
12836 #endif /* LIBXML_VALID_ENABLED */
12837 
12838 /************************************************************************
12839  *									*
12840  *		Front ends when parsing an Entity			*
12841  *									*
12842  ************************************************************************/
12843 
12844 /**
12845  * xmlParseCtxtExternalEntity:
12846  * @ctx:  the existing parsing context
12847  * @URL:  the URL for the entity to load
12848  * @ID:  the System ID for the entity to load
12849  * @lst:  the return value for the set of parsed nodes
12850  *
12851  * Parse an external general entity within an existing parsing context
12852  * An external general parsed entity is well-formed if it matches the
12853  * production labeled extParsedEnt.
12854  *
12855  * [78] extParsedEnt ::= TextDecl? content
12856  *
12857  * Returns 0 if the entity is well formed, -1 in case of args problem and
12858  *    the parser error code otherwise
12859  */
12860 
12861 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12862 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12863 	               const xmlChar *ID, xmlNodePtr *lst) {
12864     xmlParserCtxtPtr ctxt;
12865     xmlDocPtr newDoc;
12866     xmlNodePtr newRoot;
12867     xmlSAXHandlerPtr oldsax = NULL;
12868     int ret = 0;
12869     xmlChar start[4];
12870     xmlCharEncoding enc;
12871 
12872     if (ctx == NULL) return(-1);
12873 
12874     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12875         (ctx->depth > 1024)) {
12876 	return(XML_ERR_ENTITY_LOOP);
12877     }
12878 
12879     if (lst != NULL)
12880         *lst = NULL;
12881     if ((URL == NULL) && (ID == NULL))
12882 	return(-1);
12883     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12884 	return(-1);
12885 
12886     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12887     if (ctxt == NULL) {
12888 	return(-1);
12889     }
12890 
12891     oldsax = ctxt->sax;
12892     ctxt->sax = ctx->sax;
12893     xmlDetectSAX2(ctxt);
12894     newDoc = xmlNewDoc(BAD_CAST "1.0");
12895     if (newDoc == NULL) {
12896 	xmlFreeParserCtxt(ctxt);
12897 	return(-1);
12898     }
12899     newDoc->properties = XML_DOC_INTERNAL;
12900     if (ctx->myDoc->dict) {
12901 	newDoc->dict = ctx->myDoc->dict;
12902 	xmlDictReference(newDoc->dict);
12903     }
12904     if (ctx->myDoc != NULL) {
12905 	newDoc->intSubset = ctx->myDoc->intSubset;
12906 	newDoc->extSubset = ctx->myDoc->extSubset;
12907     }
12908     if (ctx->myDoc->URL != NULL) {
12909 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12910     }
12911     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12912     if (newRoot == NULL) {
12913 	ctxt->sax = oldsax;
12914 	xmlFreeParserCtxt(ctxt);
12915 	newDoc->intSubset = NULL;
12916 	newDoc->extSubset = NULL;
12917         xmlFreeDoc(newDoc);
12918 	return(-1);
12919     }
12920     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12921     nodePush(ctxt, newDoc->children);
12922     if (ctx->myDoc == NULL) {
12923 	ctxt->myDoc = newDoc;
12924     } else {
12925 	ctxt->myDoc = ctx->myDoc;
12926 	newDoc->children->doc = ctx->myDoc;
12927     }
12928 
12929     /*
12930      * Get the 4 first bytes and decode the charset
12931      * if enc != XML_CHAR_ENCODING_NONE
12932      * plug some encoding conversion routines.
12933      */
12934     GROW
12935     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12936 	start[0] = RAW;
12937 	start[1] = NXT(1);
12938 	start[2] = NXT(2);
12939 	start[3] = NXT(3);
12940 	enc = xmlDetectCharEncoding(start, 4);
12941 	if (enc != XML_CHAR_ENCODING_NONE) {
12942 	    xmlSwitchEncoding(ctxt, enc);
12943 	}
12944     }
12945 
12946     /*
12947      * Parse a possible text declaration first
12948      */
12949     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12950 	xmlParseTextDecl(ctxt);
12951 	/*
12952 	 * An XML-1.0 document can't reference an entity not XML-1.0
12953 	 */
12954 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12955 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12956 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12957 	                   "Version mismatch between document and entity\n");
12958 	}
12959     }
12960 
12961     /*
12962      * If the user provided its own SAX callbacks then reuse the
12963      * useData callback field, otherwise the expected setup in a
12964      * DOM builder is to have userData == ctxt
12965      */
12966     if (ctx->userData == ctx)
12967         ctxt->userData = ctxt;
12968     else
12969         ctxt->userData = ctx->userData;
12970 
12971     /*
12972      * Doing validity checking on chunk doesn't make sense
12973      */
12974     ctxt->instate = XML_PARSER_CONTENT;
12975     ctxt->validate = ctx->validate;
12976     ctxt->valid = ctx->valid;
12977     ctxt->loadsubset = ctx->loadsubset;
12978     ctxt->depth = ctx->depth + 1;
12979     ctxt->replaceEntities = ctx->replaceEntities;
12980     if (ctxt->validate) {
12981 	ctxt->vctxt.error = ctx->vctxt.error;
12982 	ctxt->vctxt.warning = ctx->vctxt.warning;
12983     } else {
12984 	ctxt->vctxt.error = NULL;
12985 	ctxt->vctxt.warning = NULL;
12986     }
12987     ctxt->vctxt.nodeTab = NULL;
12988     ctxt->vctxt.nodeNr = 0;
12989     ctxt->vctxt.nodeMax = 0;
12990     ctxt->vctxt.node = NULL;
12991     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12992     ctxt->dict = ctx->dict;
12993     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12994     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12995     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12996     ctxt->dictNames = ctx->dictNames;
12997     ctxt->attsDefault = ctx->attsDefault;
12998     ctxt->attsSpecial = ctx->attsSpecial;
12999     ctxt->linenumbers = ctx->linenumbers;
13000 
13001     xmlParseContent(ctxt);
13002 
13003     ctx->validate = ctxt->validate;
13004     ctx->valid = ctxt->valid;
13005     if ((RAW == '<') && (NXT(1) == '/')) {
13006 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13007     } else if (RAW != 0) {
13008 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13009     }
13010     if (ctxt->node != newDoc->children) {
13011 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13012     }
13013 
13014     if (!ctxt->wellFormed) {
13015         if (ctxt->errNo == 0)
13016 	    ret = 1;
13017 	else
13018 	    ret = ctxt->errNo;
13019     } else {
13020 	if (lst != NULL) {
13021 	    xmlNodePtr cur;
13022 
13023 	    /*
13024 	     * Return the newly created nodeset after unlinking it from
13025 	     * they pseudo parent.
13026 	     */
13027 	    cur = newDoc->children->children;
13028 	    *lst = cur;
13029 	    while (cur != NULL) {
13030 		cur->parent = NULL;
13031 		cur = cur->next;
13032 	    }
13033             newDoc->children->children = NULL;
13034 	}
13035 	ret = 0;
13036     }
13037     ctxt->sax = oldsax;
13038     ctxt->dict = NULL;
13039     ctxt->attsDefault = NULL;
13040     ctxt->attsSpecial = NULL;
13041     xmlFreeParserCtxt(ctxt);
13042     newDoc->intSubset = NULL;
13043     newDoc->extSubset = NULL;
13044     xmlFreeDoc(newDoc);
13045 
13046     return(ret);
13047 }
13048 
13049 /**
13050  * xmlParseExternalEntityPrivate:
13051  * @doc:  the document the chunk pertains to
13052  * @oldctxt:  the previous parser context if available
13053  * @sax:  the SAX handler bloc (possibly NULL)
13054  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13055  * @depth:  Used for loop detection, use 0
13056  * @URL:  the URL for the entity to load
13057  * @ID:  the System ID for the entity to load
13058  * @list:  the return value for the set of parsed nodes
13059  *
13060  * Private version of xmlParseExternalEntity()
13061  *
13062  * Returns 0 if the entity is well formed, -1 in case of args problem and
13063  *    the parser error code otherwise
13064  */
13065 
13066 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13067 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13068 	              xmlSAXHandlerPtr sax,
13069 		      void *user_data, int depth, const xmlChar *URL,
13070 		      const xmlChar *ID, xmlNodePtr *list) {
13071     xmlParserCtxtPtr ctxt;
13072     xmlDocPtr newDoc;
13073     xmlNodePtr newRoot;
13074     xmlSAXHandlerPtr oldsax = NULL;
13075     xmlParserErrors ret = XML_ERR_OK;
13076     xmlChar start[4];
13077     xmlCharEncoding enc;
13078 
13079     if (((depth > 40) &&
13080 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13081 	(depth > 1024)) {
13082 	return(XML_ERR_ENTITY_LOOP);
13083     }
13084 
13085     if (list != NULL)
13086         *list = NULL;
13087     if ((URL == NULL) && (ID == NULL))
13088 	return(XML_ERR_INTERNAL_ERROR);
13089     if (doc == NULL)
13090 	return(XML_ERR_INTERNAL_ERROR);
13091 
13092 
13093     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13094     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13095     ctxt->userData = ctxt;
13096     if (oldctxt != NULL) {
13097 	ctxt->_private = oldctxt->_private;
13098 	ctxt->loadsubset = oldctxt->loadsubset;
13099 	ctxt->validate = oldctxt->validate;
13100 	ctxt->external = oldctxt->external;
13101 	ctxt->record_info = oldctxt->record_info;
13102 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13103 	ctxt->node_seq.length = oldctxt->node_seq.length;
13104 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13105     } else {
13106 	/*
13107 	 * Doing validity checking on chunk without context
13108 	 * doesn't make sense
13109 	 */
13110 	ctxt->_private = NULL;
13111 	ctxt->validate = 0;
13112 	ctxt->external = 2;
13113 	ctxt->loadsubset = 0;
13114     }
13115     if (sax != NULL) {
13116 	oldsax = ctxt->sax;
13117         ctxt->sax = sax;
13118 	if (user_data != NULL)
13119 	    ctxt->userData = user_data;
13120     }
13121     xmlDetectSAX2(ctxt);
13122     newDoc = xmlNewDoc(BAD_CAST "1.0");
13123     if (newDoc == NULL) {
13124 	ctxt->node_seq.maximum = 0;
13125 	ctxt->node_seq.length = 0;
13126 	ctxt->node_seq.buffer = NULL;
13127 	xmlFreeParserCtxt(ctxt);
13128 	return(XML_ERR_INTERNAL_ERROR);
13129     }
13130     newDoc->properties = XML_DOC_INTERNAL;
13131     newDoc->intSubset = doc->intSubset;
13132     newDoc->extSubset = doc->extSubset;
13133     newDoc->dict = doc->dict;
13134     xmlDictReference(newDoc->dict);
13135 
13136     if (doc->URL != NULL) {
13137 	newDoc->URL = xmlStrdup(doc->URL);
13138     }
13139     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13140     if (newRoot == NULL) {
13141 	if (sax != NULL)
13142 	    ctxt->sax = oldsax;
13143 	ctxt->node_seq.maximum = 0;
13144 	ctxt->node_seq.length = 0;
13145 	ctxt->node_seq.buffer = NULL;
13146 	xmlFreeParserCtxt(ctxt);
13147 	newDoc->intSubset = NULL;
13148 	newDoc->extSubset = NULL;
13149         xmlFreeDoc(newDoc);
13150 	return(XML_ERR_INTERNAL_ERROR);
13151     }
13152     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13153     nodePush(ctxt, newDoc->children);
13154     ctxt->myDoc = doc;
13155     newRoot->doc = doc;
13156 
13157     /*
13158      * Get the 4 first bytes and decode the charset
13159      * if enc != XML_CHAR_ENCODING_NONE
13160      * plug some encoding conversion routines.
13161      */
13162     GROW;
13163     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13164 	start[0] = RAW;
13165 	start[1] = NXT(1);
13166 	start[2] = NXT(2);
13167 	start[3] = NXT(3);
13168 	enc = xmlDetectCharEncoding(start, 4);
13169 	if (enc != XML_CHAR_ENCODING_NONE) {
13170 	    xmlSwitchEncoding(ctxt, enc);
13171 	}
13172     }
13173 
13174     /*
13175      * Parse a possible text declaration first
13176      */
13177     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13178 	xmlParseTextDecl(ctxt);
13179     }
13180 
13181     ctxt->instate = XML_PARSER_CONTENT;
13182     ctxt->depth = depth;
13183 
13184     xmlParseContent(ctxt);
13185 
13186     if ((RAW == '<') && (NXT(1) == '/')) {
13187 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13188     } else if (RAW != 0) {
13189 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13190     }
13191     if (ctxt->node != newDoc->children) {
13192 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13193     }
13194 
13195     if (!ctxt->wellFormed) {
13196         if (ctxt->errNo == 0)
13197 	    ret = XML_ERR_INTERNAL_ERROR;
13198 	else
13199 	    ret = (xmlParserErrors)ctxt->errNo;
13200     } else {
13201 	if (list != NULL) {
13202 	    xmlNodePtr cur;
13203 
13204 	    /*
13205 	     * Return the newly created nodeset after unlinking it from
13206 	     * they pseudo parent.
13207 	     */
13208 	    cur = newDoc->children->children;
13209 	    *list = cur;
13210 	    while (cur != NULL) {
13211 		cur->parent = NULL;
13212 		cur = cur->next;
13213 	    }
13214             newDoc->children->children = NULL;
13215 	}
13216 	ret = XML_ERR_OK;
13217     }
13218 
13219     /*
13220      * Record in the parent context the number of entities replacement
13221      * done when parsing that reference.
13222      */
13223     if (oldctxt != NULL)
13224         oldctxt->nbentities += ctxt->nbentities;
13225 
13226     /*
13227      * Also record the size of the entity parsed
13228      */
13229     if (ctxt->input != NULL && oldctxt != NULL) {
13230 	oldctxt->sizeentities += ctxt->input->consumed;
13231 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13232     }
13233     /*
13234      * And record the last error if any
13235      */
13236     if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13237         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13238 
13239     if (sax != NULL)
13240 	ctxt->sax = oldsax;
13241     if (oldctxt != NULL) {
13242         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13243         oldctxt->node_seq.length = ctxt->node_seq.length;
13244         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13245     }
13246     ctxt->node_seq.maximum = 0;
13247     ctxt->node_seq.length = 0;
13248     ctxt->node_seq.buffer = NULL;
13249     xmlFreeParserCtxt(ctxt);
13250     newDoc->intSubset = NULL;
13251     newDoc->extSubset = NULL;
13252     xmlFreeDoc(newDoc);
13253 
13254     return(ret);
13255 }
13256 
13257 #ifdef LIBXML_SAX1_ENABLED
13258 /**
13259  * xmlParseExternalEntity:
13260  * @doc:  the document the chunk pertains to
13261  * @sax:  the SAX handler bloc (possibly NULL)
13262  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13263  * @depth:  Used for loop detection, use 0
13264  * @URL:  the URL for the entity to load
13265  * @ID:  the System ID for the entity to load
13266  * @lst:  the return value for the set of parsed nodes
13267  *
13268  * Parse an external general entity
13269  * An external general parsed entity is well-formed if it matches the
13270  * production labeled extParsedEnt.
13271  *
13272  * [78] extParsedEnt ::= TextDecl? content
13273  *
13274  * Returns 0 if the entity is well formed, -1 in case of args problem and
13275  *    the parser error code otherwise
13276  */
13277 
13278 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13279 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13280 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13281     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13282 		                       ID, lst));
13283 }
13284 
13285 /**
13286  * xmlParseBalancedChunkMemory:
13287  * @doc:  the document the chunk pertains to
13288  * @sax:  the SAX handler bloc (possibly NULL)
13289  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13290  * @depth:  Used for loop detection, use 0
13291  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13292  * @lst:  the return value for the set of parsed nodes
13293  *
13294  * Parse a well-balanced chunk of an XML document
13295  * called by the parser
13296  * The allowed sequence for the Well Balanced Chunk is the one defined by
13297  * the content production in the XML grammar:
13298  *
13299  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13300  *
13301  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13302  *    the parser error code otherwise
13303  */
13304 
13305 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13306 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13307      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13308     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13309                                                 depth, string, lst, 0 );
13310 }
13311 #endif /* LIBXML_SAX1_ENABLED */
13312 
13313 /**
13314  * xmlParseBalancedChunkMemoryInternal:
13315  * @oldctxt:  the existing parsing context
13316  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13317  * @user_data:  the user data field for the parser context
13318  * @lst:  the return value for the set of parsed nodes
13319  *
13320  *
13321  * Parse a well-balanced chunk of an XML document
13322  * called by the parser
13323  * The allowed sequence for the Well Balanced Chunk is the one defined by
13324  * the content production in the XML grammar:
13325  *
13326  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13327  *
13328  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13329  * error code otherwise
13330  *
13331  * In case recover is set to 1, the nodelist will not be empty even if
13332  * the parsed chunk is not well balanced.
13333  */
13334 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13335 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13336 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13337     xmlParserCtxtPtr ctxt;
13338     xmlDocPtr newDoc = NULL;
13339     xmlNodePtr newRoot;
13340     xmlSAXHandlerPtr oldsax = NULL;
13341     xmlNodePtr content = NULL;
13342     xmlNodePtr last = NULL;
13343     int size;
13344     xmlParserErrors ret = XML_ERR_OK;
13345 #ifdef SAX2
13346     int i;
13347 #endif
13348 
13349     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13350         (oldctxt->depth >  1024)) {
13351 	return(XML_ERR_ENTITY_LOOP);
13352     }
13353 
13354 
13355     if (lst != NULL)
13356         *lst = NULL;
13357     if (string == NULL)
13358         return(XML_ERR_INTERNAL_ERROR);
13359 
13360     size = xmlStrlen(string);
13361 
13362     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13363     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13364     if (user_data != NULL)
13365 	ctxt->userData = user_data;
13366     else
13367 	ctxt->userData = ctxt;
13368     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13369     ctxt->dict = oldctxt->dict;
13370     ctxt->input_id = oldctxt->input_id + 1;
13371     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13372     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13373     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13374 
13375 #ifdef SAX2
13376     /* propagate namespaces down the entity */
13377     for (i = 0;i < oldctxt->nsNr;i += 2) {
13378         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13379     }
13380 #endif
13381 
13382     oldsax = ctxt->sax;
13383     ctxt->sax = oldctxt->sax;
13384     xmlDetectSAX2(ctxt);
13385     ctxt->replaceEntities = oldctxt->replaceEntities;
13386     ctxt->options = oldctxt->options;
13387 
13388     ctxt->_private = oldctxt->_private;
13389     if (oldctxt->myDoc == NULL) {
13390 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13391 	if (newDoc == NULL) {
13392 	    ctxt->sax = oldsax;
13393 	    ctxt->dict = NULL;
13394 	    xmlFreeParserCtxt(ctxt);
13395 	    return(XML_ERR_INTERNAL_ERROR);
13396 	}
13397 	newDoc->properties = XML_DOC_INTERNAL;
13398 	newDoc->dict = ctxt->dict;
13399 	xmlDictReference(newDoc->dict);
13400 	ctxt->myDoc = newDoc;
13401     } else {
13402 	ctxt->myDoc = oldctxt->myDoc;
13403         content = ctxt->myDoc->children;
13404 	last = ctxt->myDoc->last;
13405     }
13406     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13407     if (newRoot == NULL) {
13408 	ctxt->sax = oldsax;
13409 	ctxt->dict = NULL;
13410 	xmlFreeParserCtxt(ctxt);
13411 	if (newDoc != NULL) {
13412 	    xmlFreeDoc(newDoc);
13413 	}
13414 	return(XML_ERR_INTERNAL_ERROR);
13415     }
13416     ctxt->myDoc->children = NULL;
13417     ctxt->myDoc->last = NULL;
13418     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13419     nodePush(ctxt, ctxt->myDoc->children);
13420     ctxt->instate = XML_PARSER_CONTENT;
13421     ctxt->depth = oldctxt->depth + 1;
13422 
13423     ctxt->validate = 0;
13424     ctxt->loadsubset = oldctxt->loadsubset;
13425     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13426 	/*
13427 	 * ID/IDREF registration will be done in xmlValidateElement below
13428 	 */
13429 	ctxt->loadsubset |= XML_SKIP_IDS;
13430     }
13431     ctxt->dictNames = oldctxt->dictNames;
13432     ctxt->attsDefault = oldctxt->attsDefault;
13433     ctxt->attsSpecial = oldctxt->attsSpecial;
13434 
13435     xmlParseContent(ctxt);
13436     if ((RAW == '<') && (NXT(1) == '/')) {
13437 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13438     } else if (RAW != 0) {
13439 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13440     }
13441     if (ctxt->node != ctxt->myDoc->children) {
13442 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13443     }
13444 
13445     if (!ctxt->wellFormed) {
13446         if (ctxt->errNo == 0)
13447 	    ret = XML_ERR_INTERNAL_ERROR;
13448 	else
13449 	    ret = (xmlParserErrors)ctxt->errNo;
13450     } else {
13451       ret = XML_ERR_OK;
13452     }
13453 
13454     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13455 	xmlNodePtr cur;
13456 
13457 	/*
13458 	 * Return the newly created nodeset after unlinking it from
13459 	 * they pseudo parent.
13460 	 */
13461 	cur = ctxt->myDoc->children->children;
13462 	*lst = cur;
13463 	while (cur != NULL) {
13464 #ifdef LIBXML_VALID_ENABLED
13465 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13466 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13467 		(cur->type == XML_ELEMENT_NODE)) {
13468 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13469 			oldctxt->myDoc, cur);
13470 	    }
13471 #endif /* LIBXML_VALID_ENABLED */
13472 	    cur->parent = NULL;
13473 	    cur = cur->next;
13474 	}
13475 	ctxt->myDoc->children->children = NULL;
13476     }
13477     if (ctxt->myDoc != NULL) {
13478 	xmlFreeNode(ctxt->myDoc->children);
13479         ctxt->myDoc->children = content;
13480         ctxt->myDoc->last = last;
13481     }
13482 
13483     /*
13484      * Record in the parent context the number of entities replacement
13485      * done when parsing that reference.
13486      */
13487     if (oldctxt != NULL)
13488         oldctxt->nbentities += ctxt->nbentities;
13489 
13490     /*
13491      * Also record the last error if any
13492      */
13493     if (ctxt->lastError.code != XML_ERR_OK)
13494         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13495 
13496     ctxt->sax = oldsax;
13497     ctxt->dict = NULL;
13498     ctxt->attsDefault = NULL;
13499     ctxt->attsSpecial = NULL;
13500     xmlFreeParserCtxt(ctxt);
13501     if (newDoc != NULL) {
13502 	xmlFreeDoc(newDoc);
13503     }
13504 
13505     return(ret);
13506 }
13507 
13508 /**
13509  * xmlParseInNodeContext:
13510  * @node:  the context node
13511  * @data:  the input string
13512  * @datalen:  the input string length in bytes
13513  * @options:  a combination of xmlParserOption
13514  * @lst:  the return value for the set of parsed nodes
13515  *
13516  * Parse a well-balanced chunk of an XML document
13517  * within the context (DTD, namespaces, etc ...) of the given node.
13518  *
13519  * The allowed sequence for the data is a Well Balanced Chunk defined by
13520  * the content production in the XML grammar:
13521  *
13522  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13523  *
13524  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13525  * error code otherwise
13526  */
13527 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13528 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13529                       int options, xmlNodePtr *lst) {
13530 #ifdef SAX2
13531     xmlParserCtxtPtr ctxt;
13532     xmlDocPtr doc = NULL;
13533     xmlNodePtr fake, cur;
13534     int nsnr = 0;
13535 
13536     xmlParserErrors ret = XML_ERR_OK;
13537 
13538     /*
13539      * check all input parameters, grab the document
13540      */
13541     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13542         return(XML_ERR_INTERNAL_ERROR);
13543     switch (node->type) {
13544         case XML_ELEMENT_NODE:
13545         case XML_ATTRIBUTE_NODE:
13546         case XML_TEXT_NODE:
13547         case XML_CDATA_SECTION_NODE:
13548         case XML_ENTITY_REF_NODE:
13549         case XML_PI_NODE:
13550         case XML_COMMENT_NODE:
13551         case XML_DOCUMENT_NODE:
13552         case XML_HTML_DOCUMENT_NODE:
13553 	    break;
13554 	default:
13555 	    return(XML_ERR_INTERNAL_ERROR);
13556 
13557     }
13558     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13559            (node->type != XML_DOCUMENT_NODE) &&
13560 	   (node->type != XML_HTML_DOCUMENT_NODE))
13561 	node = node->parent;
13562     if (node == NULL)
13563 	return(XML_ERR_INTERNAL_ERROR);
13564     if (node->type == XML_ELEMENT_NODE)
13565 	doc = node->doc;
13566     else
13567         doc = (xmlDocPtr) node;
13568     if (doc == NULL)
13569 	return(XML_ERR_INTERNAL_ERROR);
13570 
13571     /*
13572      * allocate a context and set-up everything not related to the
13573      * node position in the tree
13574      */
13575     if (doc->type == XML_DOCUMENT_NODE)
13576 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13577 #ifdef LIBXML_HTML_ENABLED
13578     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13579 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13580         /*
13581          * When parsing in context, it makes no sense to add implied
13582          * elements like html/body/etc...
13583          */
13584         options |= HTML_PARSE_NOIMPLIED;
13585     }
13586 #endif
13587     else
13588         return(XML_ERR_INTERNAL_ERROR);
13589 
13590     if (ctxt == NULL)
13591         return(XML_ERR_NO_MEMORY);
13592 
13593     /*
13594      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13595      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13596      * we must wait until the last moment to free the original one.
13597      */
13598     if (doc->dict != NULL) {
13599         if (ctxt->dict != NULL)
13600 	    xmlDictFree(ctxt->dict);
13601 	ctxt->dict = doc->dict;
13602     } else
13603         options |= XML_PARSE_NODICT;
13604 
13605     if (doc->encoding != NULL) {
13606         xmlCharEncodingHandlerPtr hdlr;
13607 
13608         if (ctxt->encoding != NULL)
13609 	    xmlFree((xmlChar *) ctxt->encoding);
13610         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13611 
13612         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13613         if (hdlr != NULL) {
13614             xmlSwitchToEncoding(ctxt, hdlr);
13615 	} else {
13616             return(XML_ERR_UNSUPPORTED_ENCODING);
13617         }
13618     }
13619 
13620     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13621     xmlDetectSAX2(ctxt);
13622     ctxt->myDoc = doc;
13623     /* parsing in context, i.e. as within existing content */
13624     ctxt->input_id = 2;
13625     ctxt->instate = XML_PARSER_CONTENT;
13626 
13627     fake = xmlNewComment(NULL);
13628     if (fake == NULL) {
13629         xmlFreeParserCtxt(ctxt);
13630 	return(XML_ERR_NO_MEMORY);
13631     }
13632     xmlAddChild(node, fake);
13633 
13634     if (node->type == XML_ELEMENT_NODE) {
13635 	nodePush(ctxt, node);
13636 	/*
13637 	 * initialize the SAX2 namespaces stack
13638 	 */
13639 	cur = node;
13640 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13641 	    xmlNsPtr ns = cur->nsDef;
13642 	    const xmlChar *iprefix, *ihref;
13643 
13644 	    while (ns != NULL) {
13645 		if (ctxt->dict) {
13646 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13647 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13648 		} else {
13649 		    iprefix = ns->prefix;
13650 		    ihref = ns->href;
13651 		}
13652 
13653 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13654 		    nsPush(ctxt, iprefix, ihref);
13655 		    nsnr++;
13656 		}
13657 		ns = ns->next;
13658 	    }
13659 	    cur = cur->parent;
13660 	}
13661     }
13662 
13663     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13664 	/*
13665 	 * ID/IDREF registration will be done in xmlValidateElement below
13666 	 */
13667 	ctxt->loadsubset |= XML_SKIP_IDS;
13668     }
13669 
13670 #ifdef LIBXML_HTML_ENABLED
13671     if (doc->type == XML_HTML_DOCUMENT_NODE)
13672         __htmlParseContent(ctxt);
13673     else
13674 #endif
13675 	xmlParseContent(ctxt);
13676 
13677     nsPop(ctxt, nsnr);
13678     if ((RAW == '<') && (NXT(1) == '/')) {
13679 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13680     } else if (RAW != 0) {
13681 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13682     }
13683     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13684 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685 	ctxt->wellFormed = 0;
13686     }
13687 
13688     if (!ctxt->wellFormed) {
13689         if (ctxt->errNo == 0)
13690 	    ret = XML_ERR_INTERNAL_ERROR;
13691 	else
13692 	    ret = (xmlParserErrors)ctxt->errNo;
13693     } else {
13694         ret = XML_ERR_OK;
13695     }
13696 
13697     /*
13698      * Return the newly created nodeset after unlinking it from
13699      * the pseudo sibling.
13700      */
13701 
13702     cur = fake->next;
13703     fake->next = NULL;
13704     node->last = fake;
13705 
13706     if (cur != NULL) {
13707 	cur->prev = NULL;
13708     }
13709 
13710     *lst = cur;
13711 
13712     while (cur != NULL) {
13713 	cur->parent = NULL;
13714 	cur = cur->next;
13715     }
13716 
13717     xmlUnlinkNode(fake);
13718     xmlFreeNode(fake);
13719 
13720 
13721     if (ret != XML_ERR_OK) {
13722         xmlFreeNodeList(*lst);
13723 	*lst = NULL;
13724     }
13725 
13726     if (doc->dict != NULL)
13727         ctxt->dict = NULL;
13728     xmlFreeParserCtxt(ctxt);
13729 
13730     return(ret);
13731 #else /* !SAX2 */
13732     return(XML_ERR_INTERNAL_ERROR);
13733 #endif
13734 }
13735 
13736 #ifdef LIBXML_SAX1_ENABLED
13737 /**
13738  * xmlParseBalancedChunkMemoryRecover:
13739  * @doc:  the document the chunk pertains to
13740  * @sax:  the SAX handler bloc (possibly NULL)
13741  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13742  * @depth:  Used for loop detection, use 0
13743  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13744  * @lst:  the return value for the set of parsed nodes
13745  * @recover: return nodes even if the data is broken (use 0)
13746  *
13747  *
13748  * Parse a well-balanced chunk of an XML document
13749  * called by the parser
13750  * The allowed sequence for the Well Balanced Chunk is the one defined by
13751  * the content production in the XML grammar:
13752  *
13753  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13754  *
13755  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13756  *    the parser error code otherwise
13757  *
13758  * In case recover is set to 1, the nodelist will not be empty even if
13759  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13760  * some extent.
13761  */
13762 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13763 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13764      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13765      int recover) {
13766     xmlParserCtxtPtr ctxt;
13767     xmlDocPtr newDoc;
13768     xmlSAXHandlerPtr oldsax = NULL;
13769     xmlNodePtr content, newRoot;
13770     int size;
13771     int ret = 0;
13772 
13773     if (depth > 40) {
13774 	return(XML_ERR_ENTITY_LOOP);
13775     }
13776 
13777 
13778     if (lst != NULL)
13779         *lst = NULL;
13780     if (string == NULL)
13781         return(-1);
13782 
13783     size = xmlStrlen(string);
13784 
13785     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13786     if (ctxt == NULL) return(-1);
13787     ctxt->userData = ctxt;
13788     if (sax != NULL) {
13789 	oldsax = ctxt->sax;
13790         ctxt->sax = sax;
13791 	if (user_data != NULL)
13792 	    ctxt->userData = user_data;
13793     }
13794     newDoc = xmlNewDoc(BAD_CAST "1.0");
13795     if (newDoc == NULL) {
13796 	xmlFreeParserCtxt(ctxt);
13797 	return(-1);
13798     }
13799     newDoc->properties = XML_DOC_INTERNAL;
13800     if ((doc != NULL) && (doc->dict != NULL)) {
13801         xmlDictFree(ctxt->dict);
13802 	ctxt->dict = doc->dict;
13803 	xmlDictReference(ctxt->dict);
13804 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13805 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13806 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13807 	ctxt->dictNames = 1;
13808     } else {
13809 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13810     }
13811     if (doc != NULL) {
13812 	newDoc->intSubset = doc->intSubset;
13813 	newDoc->extSubset = doc->extSubset;
13814     }
13815     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13816     if (newRoot == NULL) {
13817 	if (sax != NULL)
13818 	    ctxt->sax = oldsax;
13819 	xmlFreeParserCtxt(ctxt);
13820 	newDoc->intSubset = NULL;
13821 	newDoc->extSubset = NULL;
13822         xmlFreeDoc(newDoc);
13823 	return(-1);
13824     }
13825     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13826     nodePush(ctxt, newRoot);
13827     if (doc == NULL) {
13828 	ctxt->myDoc = newDoc;
13829     } else {
13830 	ctxt->myDoc = newDoc;
13831 	newDoc->children->doc = doc;
13832 	/* Ensure that doc has XML spec namespace */
13833 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13834 	newDoc->oldNs = doc->oldNs;
13835     }
13836     ctxt->instate = XML_PARSER_CONTENT;
13837     ctxt->input_id = 2;
13838     ctxt->depth = depth;
13839 
13840     /*
13841      * Doing validity checking on chunk doesn't make sense
13842      */
13843     ctxt->validate = 0;
13844     ctxt->loadsubset = 0;
13845     xmlDetectSAX2(ctxt);
13846 
13847     if ( doc != NULL ){
13848         content = doc->children;
13849         doc->children = NULL;
13850         xmlParseContent(ctxt);
13851         doc->children = content;
13852     }
13853     else {
13854         xmlParseContent(ctxt);
13855     }
13856     if ((RAW == '<') && (NXT(1) == '/')) {
13857 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13858     } else if (RAW != 0) {
13859 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13860     }
13861     if (ctxt->node != newDoc->children) {
13862 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13863     }
13864 
13865     if (!ctxt->wellFormed) {
13866         if (ctxt->errNo == 0)
13867 	    ret = 1;
13868 	else
13869 	    ret = ctxt->errNo;
13870     } else {
13871       ret = 0;
13872     }
13873 
13874     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13875 	xmlNodePtr cur;
13876 
13877 	/*
13878 	 * Return the newly created nodeset after unlinking it from
13879 	 * they pseudo parent.
13880 	 */
13881 	cur = newDoc->children->children;
13882 	*lst = cur;
13883 	while (cur != NULL) {
13884 	    xmlSetTreeDoc(cur, doc);
13885 	    cur->parent = NULL;
13886 	    cur = cur->next;
13887 	}
13888 	newDoc->children->children = NULL;
13889     }
13890 
13891     if (sax != NULL)
13892 	ctxt->sax = oldsax;
13893     xmlFreeParserCtxt(ctxt);
13894     newDoc->intSubset = NULL;
13895     newDoc->extSubset = NULL;
13896     newDoc->oldNs = NULL;
13897     xmlFreeDoc(newDoc);
13898 
13899     return(ret);
13900 }
13901 
13902 /**
13903  * xmlSAXParseEntity:
13904  * @sax:  the SAX handler block
13905  * @filename:  the filename
13906  *
13907  * parse an XML external entity out of context and build a tree.
13908  * It use the given SAX function block to handle the parsing callback.
13909  * If sax is NULL, fallback to the default DOM tree building routines.
13910  *
13911  * [78] extParsedEnt ::= TextDecl? content
13912  *
13913  * This correspond to a "Well Balanced" chunk
13914  *
13915  * Returns the resulting document tree
13916  */
13917 
13918 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13919 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13920     xmlDocPtr ret;
13921     xmlParserCtxtPtr ctxt;
13922 
13923     ctxt = xmlCreateFileParserCtxt(filename);
13924     if (ctxt == NULL) {
13925 	return(NULL);
13926     }
13927     if (sax != NULL) {
13928 	if (ctxt->sax != NULL)
13929 	    xmlFree(ctxt->sax);
13930         ctxt->sax = sax;
13931         ctxt->userData = NULL;
13932     }
13933 
13934     xmlParseExtParsedEnt(ctxt);
13935 
13936     if (ctxt->wellFormed)
13937 	ret = ctxt->myDoc;
13938     else {
13939         ret = NULL;
13940         xmlFreeDoc(ctxt->myDoc);
13941         ctxt->myDoc = NULL;
13942     }
13943     if (sax != NULL)
13944         ctxt->sax = NULL;
13945     xmlFreeParserCtxt(ctxt);
13946 
13947     return(ret);
13948 }
13949 
13950 /**
13951  * xmlParseEntity:
13952  * @filename:  the filename
13953  *
13954  * parse an XML external entity out of context and build a tree.
13955  *
13956  * [78] extParsedEnt ::= TextDecl? content
13957  *
13958  * This correspond to a "Well Balanced" chunk
13959  *
13960  * Returns the resulting document tree
13961  */
13962 
13963 xmlDocPtr
xmlParseEntity(const char * filename)13964 xmlParseEntity(const char *filename) {
13965     return(xmlSAXParseEntity(NULL, filename));
13966 }
13967 #endif /* LIBXML_SAX1_ENABLED */
13968 
13969 /**
13970  * xmlCreateEntityParserCtxtInternal:
13971  * @URL:  the entity URL
13972  * @ID:  the entity PUBLIC ID
13973  * @base:  a possible base for the target URI
13974  * @pctx:  parser context used to set options on new context
13975  *
13976  * Create a parser context for an external entity
13977  * Automatic support for ZLIB/Compress compressed document is provided
13978  * by default if found at compile-time.
13979  *
13980  * Returns the new parser context or NULL
13981  */
13982 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13983 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13984 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13985     xmlParserCtxtPtr ctxt;
13986     xmlParserInputPtr inputStream;
13987     char *directory = NULL;
13988     xmlChar *uri;
13989 
13990     ctxt = xmlNewParserCtxt();
13991     if (ctxt == NULL) {
13992 	return(NULL);
13993     }
13994 
13995     if (pctx != NULL) {
13996         ctxt->options = pctx->options;
13997         ctxt->_private = pctx->_private;
13998 	/*
13999 	 * this is a subparser of pctx, so the input_id should be
14000 	 * incremented to distinguish from main entity
14001 	 */
14002 	ctxt->input_id = pctx->input_id + 1;
14003     }
14004 
14005     uri = xmlBuildURI(URL, base);
14006 
14007     if (uri == NULL) {
14008 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14009 	if (inputStream == NULL) {
14010 	    xmlFreeParserCtxt(ctxt);
14011 	    return(NULL);
14012 	}
14013 
14014 	inputPush(ctxt, inputStream);
14015 
14016 	if ((ctxt->directory == NULL) && (directory == NULL))
14017 	    directory = xmlParserGetDirectory((char *)URL);
14018 	if ((ctxt->directory == NULL) && (directory != NULL))
14019 	    ctxt->directory = directory;
14020     } else {
14021 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14022 	if (inputStream == NULL) {
14023 	    xmlFree(uri);
14024 	    xmlFreeParserCtxt(ctxt);
14025 	    return(NULL);
14026 	}
14027 
14028 	inputPush(ctxt, inputStream);
14029 
14030 	if ((ctxt->directory == NULL) && (directory == NULL))
14031 	    directory = xmlParserGetDirectory((char *)uri);
14032 	if ((ctxt->directory == NULL) && (directory != NULL))
14033 	    ctxt->directory = directory;
14034 	xmlFree(uri);
14035     }
14036     return(ctxt);
14037 }
14038 
14039 /**
14040  * xmlCreateEntityParserCtxt:
14041  * @URL:  the entity URL
14042  * @ID:  the entity PUBLIC ID
14043  * @base:  a possible base for the target URI
14044  *
14045  * Create a parser context for an external entity
14046  * Automatic support for ZLIB/Compress compressed document is provided
14047  * by default if found at compile-time.
14048  *
14049  * Returns the new parser context or NULL
14050  */
14051 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14052 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14053 	                  const xmlChar *base) {
14054     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14055 
14056 }
14057 
14058 /************************************************************************
14059  *									*
14060  *		Front ends when parsing from a file			*
14061  *									*
14062  ************************************************************************/
14063 
14064 /**
14065  * xmlCreateURLParserCtxt:
14066  * @filename:  the filename or URL
14067  * @options:  a combination of xmlParserOption
14068  *
14069  * Create a parser context for a file or URL content.
14070  * Automatic support for ZLIB/Compress compressed document is provided
14071  * by default if found at compile-time and for file accesses
14072  *
14073  * Returns the new parser context or NULL
14074  */
14075 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14076 xmlCreateURLParserCtxt(const char *filename, int options)
14077 {
14078     xmlParserCtxtPtr ctxt;
14079     xmlParserInputPtr inputStream;
14080     char *directory = NULL;
14081 
14082     ctxt = xmlNewParserCtxt();
14083     if (ctxt == NULL) {
14084 	xmlErrMemory(NULL, "cannot allocate parser context");
14085 	return(NULL);
14086     }
14087 
14088     if (options)
14089 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14090     ctxt->linenumbers = 1;
14091 
14092     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14093     if (inputStream == NULL) {
14094 	xmlFreeParserCtxt(ctxt);
14095 	return(NULL);
14096     }
14097 
14098     inputPush(ctxt, inputStream);
14099     if ((ctxt->directory == NULL) && (directory == NULL))
14100         directory = xmlParserGetDirectory(filename);
14101     if ((ctxt->directory == NULL) && (directory != NULL))
14102         ctxt->directory = directory;
14103 
14104     return(ctxt);
14105 }
14106 
14107 /**
14108  * xmlCreateFileParserCtxt:
14109  * @filename:  the filename
14110  *
14111  * Create a parser context for a file content.
14112  * Automatic support for ZLIB/Compress compressed document is provided
14113  * by default if found at compile-time.
14114  *
14115  * Returns the new parser context or NULL
14116  */
14117 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14118 xmlCreateFileParserCtxt(const char *filename)
14119 {
14120     return(xmlCreateURLParserCtxt(filename, 0));
14121 }
14122 
14123 #ifdef LIBXML_SAX1_ENABLED
14124 /**
14125  * xmlSAXParseFileWithData:
14126  * @sax:  the SAX handler block
14127  * @filename:  the filename
14128  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14129  *             documents
14130  * @data:  the userdata
14131  *
14132  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14133  * compressed document is provided by default if found at compile-time.
14134  * It use the given SAX function block to handle the parsing callback.
14135  * If sax is NULL, fallback to the default DOM tree building routines.
14136  *
14137  * User data (void *) is stored within the parser context in the
14138  * context's _private member, so it is available nearly everywhere in libxml
14139  *
14140  * Returns the resulting document tree
14141  */
14142 
14143 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14144 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14145                         int recovery, void *data) {
14146     xmlDocPtr ret;
14147     xmlParserCtxtPtr ctxt;
14148 
14149     xmlInitParser();
14150 
14151     ctxt = xmlCreateFileParserCtxt(filename);
14152     if (ctxt == NULL) {
14153 	return(NULL);
14154     }
14155     if (sax != NULL) {
14156 	if (ctxt->sax != NULL)
14157 	    xmlFree(ctxt->sax);
14158         ctxt->sax = sax;
14159     }
14160     xmlDetectSAX2(ctxt);
14161     if (data!=NULL) {
14162 	ctxt->_private = data;
14163     }
14164 
14165     if (ctxt->directory == NULL)
14166         ctxt->directory = xmlParserGetDirectory(filename);
14167 
14168     ctxt->recovery = recovery;
14169 
14170     xmlParseDocument(ctxt);
14171 
14172     if ((ctxt->wellFormed) || recovery) {
14173         ret = ctxt->myDoc;
14174 	if (ret != NULL) {
14175 	    if (ctxt->input->buf->compressed > 0)
14176 		ret->compression = 9;
14177 	    else
14178 		ret->compression = ctxt->input->buf->compressed;
14179 	}
14180     }
14181     else {
14182        ret = NULL;
14183        xmlFreeDoc(ctxt->myDoc);
14184        ctxt->myDoc = NULL;
14185     }
14186     if (sax != NULL)
14187         ctxt->sax = NULL;
14188     xmlFreeParserCtxt(ctxt);
14189 
14190     return(ret);
14191 }
14192 
14193 /**
14194  * xmlSAXParseFile:
14195  * @sax:  the SAX handler block
14196  * @filename:  the filename
14197  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14198  *             documents
14199  *
14200  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14201  * compressed document is provided by default if found at compile-time.
14202  * It use the given SAX function block to handle the parsing callback.
14203  * If sax is NULL, fallback to the default DOM tree building routines.
14204  *
14205  * Returns the resulting document tree
14206  */
14207 
14208 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14209 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14210                           int recovery) {
14211     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14212 }
14213 
14214 /**
14215  * xmlRecoverDoc:
14216  * @cur:  a pointer to an array of xmlChar
14217  *
14218  * parse an XML in-memory document and build a tree.
14219  * In the case the document is not Well Formed, a attempt to build a
14220  * tree is tried anyway
14221  *
14222  * Returns the resulting document tree or NULL in case of failure
14223  */
14224 
14225 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14226 xmlRecoverDoc(const xmlChar *cur) {
14227     return(xmlSAXParseDoc(NULL, cur, 1));
14228 }
14229 
14230 /**
14231  * xmlParseFile:
14232  * @filename:  the filename
14233  *
14234  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14235  * compressed document is provided by default if found at compile-time.
14236  *
14237  * Returns the resulting document tree if the file was wellformed,
14238  * NULL otherwise.
14239  */
14240 
14241 xmlDocPtr
xmlParseFile(const char * filename)14242 xmlParseFile(const char *filename) {
14243     return(xmlSAXParseFile(NULL, filename, 0));
14244 }
14245 
14246 /**
14247  * xmlRecoverFile:
14248  * @filename:  the filename
14249  *
14250  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14251  * compressed document is provided by default if found at compile-time.
14252  * In the case the document is not Well Formed, it attempts to build
14253  * a tree anyway
14254  *
14255  * Returns the resulting document tree or NULL in case of failure
14256  */
14257 
14258 xmlDocPtr
xmlRecoverFile(const char * filename)14259 xmlRecoverFile(const char *filename) {
14260     return(xmlSAXParseFile(NULL, filename, 1));
14261 }
14262 
14263 
14264 /**
14265  * xmlSetupParserForBuffer:
14266  * @ctxt:  an XML parser context
14267  * @buffer:  a xmlChar * buffer
14268  * @filename:  a file name
14269  *
14270  * Setup the parser context to parse a new buffer; Clears any prior
14271  * contents from the parser context. The buffer parameter must not be
14272  * NULL, but the filename parameter can be
14273  */
14274 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14275 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14276                              const char* filename)
14277 {
14278     xmlParserInputPtr input;
14279 
14280     if ((ctxt == NULL) || (buffer == NULL))
14281         return;
14282 
14283     input = xmlNewInputStream(ctxt);
14284     if (input == NULL) {
14285         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14286         xmlClearParserCtxt(ctxt);
14287         return;
14288     }
14289 
14290     xmlClearParserCtxt(ctxt);
14291     if (filename != NULL)
14292         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14293     input->base = buffer;
14294     input->cur = buffer;
14295     input->end = &buffer[xmlStrlen(buffer)];
14296     inputPush(ctxt, input);
14297 }
14298 
14299 /**
14300  * xmlSAXUserParseFile:
14301  * @sax:  a SAX handler
14302  * @user_data:  The user data returned on SAX callbacks
14303  * @filename:  a file name
14304  *
14305  * parse an XML file and call the given SAX handler routines.
14306  * Automatic support for ZLIB/Compress compressed document is provided
14307  *
14308  * Returns 0 in case of success or a error number otherwise
14309  */
14310 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14311 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14312                     const char *filename) {
14313     int ret = 0;
14314     xmlParserCtxtPtr ctxt;
14315 
14316     ctxt = xmlCreateFileParserCtxt(filename);
14317     if (ctxt == NULL) return -1;
14318     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14319 	xmlFree(ctxt->sax);
14320     ctxt->sax = sax;
14321     xmlDetectSAX2(ctxt);
14322 
14323     if (user_data != NULL)
14324 	ctxt->userData = user_data;
14325 
14326     xmlParseDocument(ctxt);
14327 
14328     if (ctxt->wellFormed)
14329 	ret = 0;
14330     else {
14331         if (ctxt->errNo != 0)
14332 	    ret = ctxt->errNo;
14333 	else
14334 	    ret = -1;
14335     }
14336     if (sax != NULL)
14337 	ctxt->sax = NULL;
14338     if (ctxt->myDoc != NULL) {
14339         xmlFreeDoc(ctxt->myDoc);
14340 	ctxt->myDoc = NULL;
14341     }
14342     xmlFreeParserCtxt(ctxt);
14343 
14344     return ret;
14345 }
14346 #endif /* LIBXML_SAX1_ENABLED */
14347 
14348 /************************************************************************
14349  *									*
14350  *		Front ends when parsing from memory			*
14351  *									*
14352  ************************************************************************/
14353 
14354 /**
14355  * xmlCreateMemoryParserCtxt:
14356  * @buffer:  a pointer to a char array
14357  * @size:  the size of the array
14358  *
14359  * Create a parser context for an XML in-memory document.
14360  *
14361  * Returns the new parser context or NULL
14362  */
14363 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14364 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14365     xmlParserCtxtPtr ctxt;
14366     xmlParserInputPtr input;
14367     xmlParserInputBufferPtr buf;
14368 
14369     if (buffer == NULL)
14370 	return(NULL);
14371     if (size <= 0)
14372 	return(NULL);
14373 
14374     ctxt = xmlNewParserCtxt();
14375     if (ctxt == NULL)
14376 	return(NULL);
14377 
14378     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14379     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14380     if (buf == NULL) {
14381 	xmlFreeParserCtxt(ctxt);
14382 	return(NULL);
14383     }
14384 
14385     input = xmlNewInputStream(ctxt);
14386     if (input == NULL) {
14387 	xmlFreeParserInputBuffer(buf);
14388 	xmlFreeParserCtxt(ctxt);
14389 	return(NULL);
14390     }
14391 
14392     input->filename = NULL;
14393     input->buf = buf;
14394     xmlBufResetInput(input->buf->buffer, input);
14395 
14396     inputPush(ctxt, input);
14397     return(ctxt);
14398 }
14399 
14400 #ifdef LIBXML_SAX1_ENABLED
14401 /**
14402  * xmlSAXParseMemoryWithData:
14403  * @sax:  the SAX handler block
14404  * @buffer:  an pointer to a char array
14405  * @size:  the size of the array
14406  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14407  *             documents
14408  * @data:  the userdata
14409  *
14410  * parse an XML in-memory block and use the given SAX function block
14411  * to handle the parsing callback. If sax is NULL, fallback to the default
14412  * DOM tree building routines.
14413  *
14414  * User data (void *) is stored within the parser context in the
14415  * context's _private member, so it is available nearly everywhere in libxml
14416  *
14417  * Returns the resulting document tree
14418  */
14419 
14420 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14421 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14422 	          int size, int recovery, void *data) {
14423     xmlDocPtr ret;
14424     xmlParserCtxtPtr ctxt;
14425 
14426     xmlInitParser();
14427 
14428     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14429     if (ctxt == NULL) return(NULL);
14430     if (sax != NULL) {
14431 	if (ctxt->sax != NULL)
14432 	    xmlFree(ctxt->sax);
14433         ctxt->sax = sax;
14434     }
14435     xmlDetectSAX2(ctxt);
14436     if (data!=NULL) {
14437 	ctxt->_private=data;
14438     }
14439 
14440     ctxt->recovery = recovery;
14441 
14442     xmlParseDocument(ctxt);
14443 
14444     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14445     else {
14446        ret = NULL;
14447        xmlFreeDoc(ctxt->myDoc);
14448        ctxt->myDoc = NULL;
14449     }
14450     if (sax != NULL)
14451 	ctxt->sax = NULL;
14452     xmlFreeParserCtxt(ctxt);
14453 
14454     return(ret);
14455 }
14456 
14457 /**
14458  * xmlSAXParseMemory:
14459  * @sax:  the SAX handler block
14460  * @buffer:  an pointer to a char array
14461  * @size:  the size of the array
14462  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14463  *             documents
14464  *
14465  * parse an XML in-memory block and use the given SAX function block
14466  * to handle the parsing callback. If sax is NULL, fallback to the default
14467  * DOM tree building routines.
14468  *
14469  * Returns the resulting document tree
14470  */
14471 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14472 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14473 	          int size, int recovery) {
14474     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14475 }
14476 
14477 /**
14478  * xmlParseMemory:
14479  * @buffer:  an pointer to a char array
14480  * @size:  the size of the array
14481  *
14482  * parse an XML in-memory block and build a tree.
14483  *
14484  * Returns the resulting document tree
14485  */
14486 
xmlParseMemory(const char * buffer,int size)14487 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14488    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14489 }
14490 
14491 /**
14492  * xmlRecoverMemory:
14493  * @buffer:  an pointer to a char array
14494  * @size:  the size of the array
14495  *
14496  * parse an XML in-memory block and build a tree.
14497  * In the case the document is not Well Formed, an attempt to
14498  * build a tree is tried anyway
14499  *
14500  * Returns the resulting document tree or NULL in case of error
14501  */
14502 
xmlRecoverMemory(const char * buffer,int size)14503 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14504    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14505 }
14506 
14507 /**
14508  * xmlSAXUserParseMemory:
14509  * @sax:  a SAX handler
14510  * @user_data:  The user data returned on SAX callbacks
14511  * @buffer:  an in-memory XML document input
14512  * @size:  the length of the XML document in bytes
14513  *
14514  * A better SAX parsing routine.
14515  * parse an XML in-memory buffer and call the given SAX handler routines.
14516  *
14517  * Returns 0 in case of success or a error number otherwise
14518  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14519 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14520 			  const char *buffer, int size) {
14521     int ret = 0;
14522     xmlParserCtxtPtr ctxt;
14523 
14524     xmlInitParser();
14525 
14526     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14527     if (ctxt == NULL) return -1;
14528     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14529         xmlFree(ctxt->sax);
14530     ctxt->sax = sax;
14531     xmlDetectSAX2(ctxt);
14532 
14533     if (user_data != NULL)
14534 	ctxt->userData = user_data;
14535 
14536     xmlParseDocument(ctxt);
14537 
14538     if (ctxt->wellFormed)
14539 	ret = 0;
14540     else {
14541         if (ctxt->errNo != 0)
14542 	    ret = ctxt->errNo;
14543 	else
14544 	    ret = -1;
14545     }
14546     if (sax != NULL)
14547         ctxt->sax = NULL;
14548     if (ctxt->myDoc != NULL) {
14549         xmlFreeDoc(ctxt->myDoc);
14550 	ctxt->myDoc = NULL;
14551     }
14552     xmlFreeParserCtxt(ctxt);
14553 
14554     return ret;
14555 }
14556 #endif /* LIBXML_SAX1_ENABLED */
14557 
14558 /**
14559  * xmlCreateDocParserCtxt:
14560  * @cur:  a pointer to an array of xmlChar
14561  *
14562  * Creates a parser context for an XML in-memory document.
14563  *
14564  * Returns the new parser context or NULL
14565  */
14566 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14567 xmlCreateDocParserCtxt(const xmlChar *cur) {
14568     int len;
14569 
14570     if (cur == NULL)
14571 	return(NULL);
14572     len = xmlStrlen(cur);
14573     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14574 }
14575 
14576 #ifdef LIBXML_SAX1_ENABLED
14577 /**
14578  * xmlSAXParseDoc:
14579  * @sax:  the SAX handler block
14580  * @cur:  a pointer to an array of xmlChar
14581  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14582  *             documents
14583  *
14584  * parse an XML in-memory document and build a tree.
14585  * It use the given SAX function block to handle the parsing callback.
14586  * If sax is NULL, fallback to the default DOM tree building routines.
14587  *
14588  * Returns the resulting document tree
14589  */
14590 
14591 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14592 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14593     xmlDocPtr ret;
14594     xmlParserCtxtPtr ctxt;
14595     xmlSAXHandlerPtr oldsax = NULL;
14596 
14597     if (cur == NULL) return(NULL);
14598 
14599 
14600     ctxt = xmlCreateDocParserCtxt(cur);
14601     if (ctxt == NULL) return(NULL);
14602     if (sax != NULL) {
14603         oldsax = ctxt->sax;
14604         ctxt->sax = sax;
14605         ctxt->userData = NULL;
14606     }
14607     xmlDetectSAX2(ctxt);
14608 
14609     xmlParseDocument(ctxt);
14610     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14611     else {
14612        ret = NULL;
14613        xmlFreeDoc(ctxt->myDoc);
14614        ctxt->myDoc = NULL;
14615     }
14616     if (sax != NULL)
14617 	ctxt->sax = oldsax;
14618     xmlFreeParserCtxt(ctxt);
14619 
14620     return(ret);
14621 }
14622 
14623 /**
14624  * xmlParseDoc:
14625  * @cur:  a pointer to an array of xmlChar
14626  *
14627  * parse an XML in-memory document and build a tree.
14628  *
14629  * Returns the resulting document tree
14630  */
14631 
14632 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14633 xmlParseDoc(const xmlChar *cur) {
14634     return(xmlSAXParseDoc(NULL, cur, 0));
14635 }
14636 #endif /* LIBXML_SAX1_ENABLED */
14637 
14638 #ifdef LIBXML_LEGACY_ENABLED
14639 /************************************************************************
14640  *									*
14641  *	Specific function to keep track of entities references		*
14642  *	and used by the XSLT debugger					*
14643  *									*
14644  ************************************************************************/
14645 
14646 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14647 
14648 /**
14649  * xmlAddEntityReference:
14650  * @ent : A valid entity
14651  * @firstNode : A valid first node for children of entity
14652  * @lastNode : A valid last node of children entity
14653  *
14654  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14655  */
14656 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14657 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14658                       xmlNodePtr lastNode)
14659 {
14660     if (xmlEntityRefFunc != NULL) {
14661         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14662     }
14663 }
14664 
14665 
14666 /**
14667  * xmlSetEntityReferenceFunc:
14668  * @func: A valid function
14669  *
14670  * Set the function to call call back when a xml reference has been made
14671  */
14672 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14673 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14674 {
14675     xmlEntityRefFunc = func;
14676 }
14677 #endif /* LIBXML_LEGACY_ENABLED */
14678 
14679 /************************************************************************
14680  *									*
14681  *				Miscellaneous				*
14682  *									*
14683  ************************************************************************/
14684 
14685 #ifdef LIBXML_XPATH_ENABLED
14686 #include <libxml/xpath.h>
14687 #endif
14688 
14689 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14690 static int xmlParserInitialized = 0;
14691 
14692 /**
14693  * xmlInitParser:
14694  *
14695  * Initialization function for the XML parser.
14696  * This is not reentrant. Call once before processing in case of
14697  * use in multithreaded programs.
14698  */
14699 
14700 void
xmlInitParser(void)14701 xmlInitParser(void) {
14702     if (xmlParserInitialized != 0)
14703 	return;
14704 
14705 #ifdef LIBXML_THREAD_ENABLED
14706     __xmlGlobalInitMutexLock();
14707     if (xmlParserInitialized == 0) {
14708 #endif
14709 	xmlInitThreads();
14710 	xmlInitGlobals();
14711 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14712 	    (xmlGenericError == NULL))
14713 	    initGenericErrorDefaultFunc(NULL);
14714 	xmlInitMemory();
14715         xmlInitializeDict();
14716 	xmlInitCharEncodingHandlers();
14717 	xmlDefaultSAXHandlerInit();
14718 	xmlRegisterDefaultInputCallbacks();
14719 #ifdef LIBXML_OUTPUT_ENABLED
14720 	xmlRegisterDefaultOutputCallbacks();
14721 #endif /* LIBXML_OUTPUT_ENABLED */
14722 #ifdef LIBXML_HTML_ENABLED
14723 	htmlInitAutoClose();
14724 	htmlDefaultSAXHandlerInit();
14725 #endif
14726 #ifdef LIBXML_XPATH_ENABLED
14727 	xmlXPathInit();
14728 #endif
14729 	xmlParserInitialized = 1;
14730 #ifdef LIBXML_THREAD_ENABLED
14731     }
14732     __xmlGlobalInitMutexUnlock();
14733 #endif
14734 }
14735 
14736 /**
14737  * xmlCleanupParser:
14738  *
14739  * This function name is somewhat misleading. It does not clean up
14740  * parser state, it cleans up memory allocated by the library itself.
14741  * It is a cleanup function for the XML library. It tries to reclaim all
14742  * related global memory allocated for the library processing.
14743  * It doesn't deallocate any document related memory. One should
14744  * call xmlCleanupParser() only when the process has finished using
14745  * the library and all XML/HTML documents built with it.
14746  * See also xmlInitParser() which has the opposite function of preparing
14747  * the library for operations.
14748  *
14749  * WARNING: if your application is multithreaded or has plugin support
14750  *          calling this may crash the application if another thread or
14751  *          a plugin is still using libxml2. It's sometimes very hard to
14752  *          guess if libxml2 is in use in the application, some libraries
14753  *          or plugins may use it without notice. In case of doubt abstain
14754  *          from calling this function or do it just before calling exit()
14755  *          to avoid leak reports from valgrind !
14756  */
14757 
14758 void
xmlCleanupParser(void)14759 xmlCleanupParser(void) {
14760     if (!xmlParserInitialized)
14761 	return;
14762 
14763     xmlCleanupCharEncodingHandlers();
14764 #ifdef LIBXML_CATALOG_ENABLED
14765     xmlCatalogCleanup();
14766 #endif
14767     xmlDictCleanup();
14768     xmlCleanupInputCallbacks();
14769 #ifdef LIBXML_OUTPUT_ENABLED
14770     xmlCleanupOutputCallbacks();
14771 #endif
14772 #ifdef LIBXML_SCHEMAS_ENABLED
14773     xmlSchemaCleanupTypes();
14774     xmlRelaxNGCleanupTypes();
14775 #endif
14776     xmlResetLastError();
14777     xmlCleanupGlobals();
14778     xmlCleanupThreads(); /* must be last if called not from the main thread */
14779     xmlCleanupMemory();
14780     xmlParserInitialized = 0;
14781 }
14782 
14783 /************************************************************************
14784  *									*
14785  *	New set (2.6.0) of simpler and more flexible APIs		*
14786  *									*
14787  ************************************************************************/
14788 
14789 /**
14790  * DICT_FREE:
14791  * @str:  a string
14792  *
14793  * Free a string if it is not owned by the "dict" dictionary in the
14794  * current scope
14795  */
14796 #define DICT_FREE(str)						\
14797 	if ((str) && ((!dict) ||				\
14798 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14799 	    xmlFree((char *)(str));
14800 
14801 /**
14802  * xmlCtxtReset:
14803  * @ctxt: an XML parser context
14804  *
14805  * Reset a parser context
14806  */
14807 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14808 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14809 {
14810     xmlParserInputPtr input;
14811     xmlDictPtr dict;
14812 
14813     if (ctxt == NULL)
14814         return;
14815 
14816     dict = ctxt->dict;
14817 
14818     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14819         xmlFreeInputStream(input);
14820     }
14821     ctxt->inputNr = 0;
14822     ctxt->input = NULL;
14823 
14824     ctxt->spaceNr = 0;
14825     if (ctxt->spaceTab != NULL) {
14826 	ctxt->spaceTab[0] = -1;
14827 	ctxt->space = &ctxt->spaceTab[0];
14828     } else {
14829         ctxt->space = NULL;
14830     }
14831 
14832 
14833     ctxt->nodeNr = 0;
14834     ctxt->node = NULL;
14835 
14836     ctxt->nameNr = 0;
14837     ctxt->name = NULL;
14838 
14839     DICT_FREE(ctxt->version);
14840     ctxt->version = NULL;
14841     DICT_FREE(ctxt->encoding);
14842     ctxt->encoding = NULL;
14843     DICT_FREE(ctxt->directory);
14844     ctxt->directory = NULL;
14845     DICT_FREE(ctxt->extSubURI);
14846     ctxt->extSubURI = NULL;
14847     DICT_FREE(ctxt->extSubSystem);
14848     ctxt->extSubSystem = NULL;
14849     if (ctxt->myDoc != NULL)
14850         xmlFreeDoc(ctxt->myDoc);
14851     ctxt->myDoc = NULL;
14852 
14853     ctxt->standalone = -1;
14854     ctxt->hasExternalSubset = 0;
14855     ctxt->hasPErefs = 0;
14856     ctxt->html = 0;
14857     ctxt->external = 0;
14858     ctxt->instate = XML_PARSER_START;
14859     ctxt->token = 0;
14860 
14861     ctxt->wellFormed = 1;
14862     ctxt->nsWellFormed = 1;
14863     ctxt->disableSAX = 0;
14864     ctxt->valid = 1;
14865 #if 0
14866     ctxt->vctxt.userData = ctxt;
14867     ctxt->vctxt.error = xmlParserValidityError;
14868     ctxt->vctxt.warning = xmlParserValidityWarning;
14869 #endif
14870     ctxt->record_info = 0;
14871     ctxt->nbChars = 0;
14872     ctxt->checkIndex = 0;
14873     ctxt->inSubset = 0;
14874     ctxt->errNo = XML_ERR_OK;
14875     ctxt->depth = 0;
14876     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14877     ctxt->catalogs = NULL;
14878     ctxt->nbentities = 0;
14879     ctxt->sizeentities = 0;
14880     ctxt->sizeentcopy = 0;
14881     xmlInitNodeInfoSeq(&ctxt->node_seq);
14882 
14883     if (ctxt->attsDefault != NULL) {
14884         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14885         ctxt->attsDefault = NULL;
14886     }
14887     if (ctxt->attsSpecial != NULL) {
14888         xmlHashFree(ctxt->attsSpecial, NULL);
14889         ctxt->attsSpecial = NULL;
14890     }
14891 
14892 #ifdef LIBXML_CATALOG_ENABLED
14893     if (ctxt->catalogs != NULL)
14894 	xmlCatalogFreeLocal(ctxt->catalogs);
14895 #endif
14896     if (ctxt->lastError.code != XML_ERR_OK)
14897         xmlResetError(&ctxt->lastError);
14898 }
14899 
14900 /**
14901  * xmlCtxtResetPush:
14902  * @ctxt: an XML parser context
14903  * @chunk:  a pointer to an array of chars
14904  * @size:  number of chars in the array
14905  * @filename:  an optional file name or URI
14906  * @encoding:  the document encoding, or NULL
14907  *
14908  * Reset a push parser context
14909  *
14910  * Returns 0 in case of success and 1 in case of error
14911  */
14912 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14913 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14914                  int size, const char *filename, const char *encoding)
14915 {
14916     xmlParserInputPtr inputStream;
14917     xmlParserInputBufferPtr buf;
14918     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14919 
14920     if (ctxt == NULL)
14921         return(1);
14922 
14923     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14924         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14925 
14926     buf = xmlAllocParserInputBuffer(enc);
14927     if (buf == NULL)
14928         return(1);
14929 
14930     if (ctxt == NULL) {
14931         xmlFreeParserInputBuffer(buf);
14932         return(1);
14933     }
14934 
14935     xmlCtxtReset(ctxt);
14936 
14937     if (ctxt->pushTab == NULL) {
14938         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14939 	                                    sizeof(xmlChar *));
14940         if (ctxt->pushTab == NULL) {
14941 	    xmlErrMemory(ctxt, NULL);
14942             xmlFreeParserInputBuffer(buf);
14943             return(1);
14944         }
14945     }
14946 
14947     if (filename == NULL) {
14948         ctxt->directory = NULL;
14949     } else {
14950         ctxt->directory = xmlParserGetDirectory(filename);
14951     }
14952 
14953     inputStream = xmlNewInputStream(ctxt);
14954     if (inputStream == NULL) {
14955         xmlFreeParserInputBuffer(buf);
14956         return(1);
14957     }
14958 
14959     if (filename == NULL)
14960         inputStream->filename = NULL;
14961     else
14962         inputStream->filename = (char *)
14963             xmlCanonicPath((const xmlChar *) filename);
14964     inputStream->buf = buf;
14965     xmlBufResetInput(buf->buffer, inputStream);
14966 
14967     inputPush(ctxt, inputStream);
14968 
14969     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14970         (ctxt->input->buf != NULL)) {
14971 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14972         size_t cur = ctxt->input->cur - ctxt->input->base;
14973 
14974         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14975 
14976         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14977 #ifdef DEBUG_PUSH
14978         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14979 #endif
14980     }
14981 
14982     if (encoding != NULL) {
14983         xmlCharEncodingHandlerPtr hdlr;
14984 
14985         if (ctxt->encoding != NULL)
14986 	    xmlFree((xmlChar *) ctxt->encoding);
14987         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14988 
14989         hdlr = xmlFindCharEncodingHandler(encoding);
14990         if (hdlr != NULL) {
14991             xmlSwitchToEncoding(ctxt, hdlr);
14992 	} else {
14993 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14994 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14995         }
14996     } else if (enc != XML_CHAR_ENCODING_NONE) {
14997         xmlSwitchEncoding(ctxt, enc);
14998     }
14999 
15000     return(0);
15001 }
15002 
15003 
15004 /**
15005  * xmlCtxtUseOptionsInternal:
15006  * @ctxt: an XML parser context
15007  * @options:  a combination of xmlParserOption
15008  * @encoding:  the user provided encoding to use
15009  *
15010  * Applies the options to the parser context
15011  *
15012  * Returns 0 in case of success, the set of unknown or unimplemented options
15013  *         in case of error.
15014  */
15015 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15016 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15017 {
15018     if (ctxt == NULL)
15019         return(-1);
15020     if (encoding != NULL) {
15021         if (ctxt->encoding != NULL)
15022 	    xmlFree((xmlChar *) ctxt->encoding);
15023         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15024     }
15025     if (options & XML_PARSE_RECOVER) {
15026         ctxt->recovery = 1;
15027         options -= XML_PARSE_RECOVER;
15028 	ctxt->options |= XML_PARSE_RECOVER;
15029     } else
15030         ctxt->recovery = 0;
15031     if (options & XML_PARSE_DTDLOAD) {
15032         ctxt->loadsubset = XML_DETECT_IDS;
15033         options -= XML_PARSE_DTDLOAD;
15034 	ctxt->options |= XML_PARSE_DTDLOAD;
15035     } else
15036         ctxt->loadsubset = 0;
15037     if (options & XML_PARSE_DTDATTR) {
15038         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15039         options -= XML_PARSE_DTDATTR;
15040 	ctxt->options |= XML_PARSE_DTDATTR;
15041     }
15042     if (options & XML_PARSE_NOENT) {
15043         ctxt->replaceEntities = 1;
15044         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15045         options -= XML_PARSE_NOENT;
15046 	ctxt->options |= XML_PARSE_NOENT;
15047     } else
15048         ctxt->replaceEntities = 0;
15049     if (options & XML_PARSE_PEDANTIC) {
15050         ctxt->pedantic = 1;
15051         options -= XML_PARSE_PEDANTIC;
15052 	ctxt->options |= XML_PARSE_PEDANTIC;
15053     } else
15054         ctxt->pedantic = 0;
15055     if (options & XML_PARSE_NOBLANKS) {
15056         ctxt->keepBlanks = 0;
15057         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15058         options -= XML_PARSE_NOBLANKS;
15059 	ctxt->options |= XML_PARSE_NOBLANKS;
15060     } else
15061         ctxt->keepBlanks = 1;
15062     if (options & XML_PARSE_DTDVALID) {
15063         ctxt->validate = 1;
15064         if (options & XML_PARSE_NOWARNING)
15065             ctxt->vctxt.warning = NULL;
15066         if (options & XML_PARSE_NOERROR)
15067             ctxt->vctxt.error = NULL;
15068         options -= XML_PARSE_DTDVALID;
15069 	ctxt->options |= XML_PARSE_DTDVALID;
15070     } else
15071         ctxt->validate = 0;
15072     if (options & XML_PARSE_NOWARNING) {
15073         ctxt->sax->warning = NULL;
15074         options -= XML_PARSE_NOWARNING;
15075     }
15076     if (options & XML_PARSE_NOERROR) {
15077         ctxt->sax->error = NULL;
15078         ctxt->sax->fatalError = NULL;
15079         options -= XML_PARSE_NOERROR;
15080     }
15081 #ifdef LIBXML_SAX1_ENABLED
15082     if (options & XML_PARSE_SAX1) {
15083         ctxt->sax->startElement = xmlSAX2StartElement;
15084         ctxt->sax->endElement = xmlSAX2EndElement;
15085         ctxt->sax->startElementNs = NULL;
15086         ctxt->sax->endElementNs = NULL;
15087         ctxt->sax->initialized = 1;
15088         options -= XML_PARSE_SAX1;
15089 	ctxt->options |= XML_PARSE_SAX1;
15090     }
15091 #endif /* LIBXML_SAX1_ENABLED */
15092     if (options & XML_PARSE_NODICT) {
15093         ctxt->dictNames = 0;
15094         options -= XML_PARSE_NODICT;
15095 	ctxt->options |= XML_PARSE_NODICT;
15096     } else {
15097         ctxt->dictNames = 1;
15098     }
15099     if (options & XML_PARSE_NOCDATA) {
15100         ctxt->sax->cdataBlock = NULL;
15101         options -= XML_PARSE_NOCDATA;
15102 	ctxt->options |= XML_PARSE_NOCDATA;
15103     }
15104     if (options & XML_PARSE_NSCLEAN) {
15105 	ctxt->options |= XML_PARSE_NSCLEAN;
15106         options -= XML_PARSE_NSCLEAN;
15107     }
15108     if (options & XML_PARSE_NONET) {
15109 	ctxt->options |= XML_PARSE_NONET;
15110         options -= XML_PARSE_NONET;
15111     }
15112     if (options & XML_PARSE_COMPACT) {
15113 	ctxt->options |= XML_PARSE_COMPACT;
15114         options -= XML_PARSE_COMPACT;
15115     }
15116     if (options & XML_PARSE_OLD10) {
15117 	ctxt->options |= XML_PARSE_OLD10;
15118         options -= XML_PARSE_OLD10;
15119     }
15120     if (options & XML_PARSE_NOBASEFIX) {
15121 	ctxt->options |= XML_PARSE_NOBASEFIX;
15122         options -= XML_PARSE_NOBASEFIX;
15123     }
15124     if (options & XML_PARSE_HUGE) {
15125 	ctxt->options |= XML_PARSE_HUGE;
15126         options -= XML_PARSE_HUGE;
15127         if (ctxt->dict != NULL)
15128             xmlDictSetLimit(ctxt->dict, 0);
15129     }
15130     if (options & XML_PARSE_OLDSAX) {
15131 	ctxt->options |= XML_PARSE_OLDSAX;
15132         options -= XML_PARSE_OLDSAX;
15133     }
15134     if (options & XML_PARSE_IGNORE_ENC) {
15135 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15136         options -= XML_PARSE_IGNORE_ENC;
15137     }
15138     if (options & XML_PARSE_BIG_LINES) {
15139 	ctxt->options |= XML_PARSE_BIG_LINES;
15140         options -= XML_PARSE_BIG_LINES;
15141     }
15142     ctxt->linenumbers = 1;
15143     return (options);
15144 }
15145 
15146 /**
15147  * xmlCtxtUseOptions:
15148  * @ctxt: an XML parser context
15149  * @options:  a combination of xmlParserOption
15150  *
15151  * Applies the options to the parser context
15152  *
15153  * Returns 0 in case of success, the set of unknown or unimplemented options
15154  *         in case of error.
15155  */
15156 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15157 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15158 {
15159    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15160 }
15161 
15162 /**
15163  * xmlDoRead:
15164  * @ctxt:  an XML parser context
15165  * @URL:  the base URL to use for the document
15166  * @encoding:  the document encoding, or NULL
15167  * @options:  a combination of xmlParserOption
15168  * @reuse:  keep the context for reuse
15169  *
15170  * Common front-end for the xmlRead functions
15171  *
15172  * Returns the resulting document tree or NULL
15173  */
15174 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15175 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15176           int options, int reuse)
15177 {
15178     xmlDocPtr ret;
15179 
15180     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15181     if (encoding != NULL) {
15182         xmlCharEncodingHandlerPtr hdlr;
15183 
15184 	hdlr = xmlFindCharEncodingHandler(encoding);
15185 	if (hdlr != NULL)
15186 	    xmlSwitchToEncoding(ctxt, hdlr);
15187     }
15188     if ((URL != NULL) && (ctxt->input != NULL) &&
15189         (ctxt->input->filename == NULL))
15190         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15191     xmlParseDocument(ctxt);
15192     if ((ctxt->wellFormed) || ctxt->recovery)
15193         ret = ctxt->myDoc;
15194     else {
15195         ret = NULL;
15196 	if (ctxt->myDoc != NULL) {
15197 	    xmlFreeDoc(ctxt->myDoc);
15198 	}
15199     }
15200     ctxt->myDoc = NULL;
15201     if (!reuse) {
15202 	xmlFreeParserCtxt(ctxt);
15203     }
15204 
15205     return (ret);
15206 }
15207 
15208 /**
15209  * xmlReadDoc:
15210  * @cur:  a pointer to a zero terminated string
15211  * @URL:  the base URL to use for the document
15212  * @encoding:  the document encoding, or NULL
15213  * @options:  a combination of xmlParserOption
15214  *
15215  * parse an XML in-memory document and build a tree.
15216  *
15217  * Returns the resulting document tree
15218  */
15219 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15220 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15221 {
15222     xmlParserCtxtPtr ctxt;
15223 
15224     if (cur == NULL)
15225         return (NULL);
15226     xmlInitParser();
15227 
15228     ctxt = xmlCreateDocParserCtxt(cur);
15229     if (ctxt == NULL)
15230         return (NULL);
15231     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15232 }
15233 
15234 /**
15235  * xmlReadFile:
15236  * @filename:  a file or URL
15237  * @encoding:  the document encoding, or NULL
15238  * @options:  a combination of xmlParserOption
15239  *
15240  * parse an XML file from the filesystem or the network.
15241  *
15242  * Returns the resulting document tree
15243  */
15244 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15245 xmlReadFile(const char *filename, const char *encoding, int options)
15246 {
15247     xmlParserCtxtPtr ctxt;
15248 
15249     xmlInitParser();
15250     ctxt = xmlCreateURLParserCtxt(filename, options);
15251     if (ctxt == NULL)
15252         return (NULL);
15253     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15254 }
15255 
15256 /**
15257  * xmlReadMemory:
15258  * @buffer:  a pointer to a char array
15259  * @size:  the size of the array
15260  * @URL:  the base URL to use for the document
15261  * @encoding:  the document encoding, or NULL
15262  * @options:  a combination of xmlParserOption
15263  *
15264  * parse an XML in-memory document and build a tree.
15265  *
15266  * Returns the resulting document tree
15267  */
15268 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15269 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15270 {
15271     xmlParserCtxtPtr ctxt;
15272 
15273     xmlInitParser();
15274     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15275     if (ctxt == NULL)
15276         return (NULL);
15277     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15278 }
15279 
15280 /**
15281  * xmlReadFd:
15282  * @fd:  an open file descriptor
15283  * @URL:  the base URL to use for the document
15284  * @encoding:  the document encoding, or NULL
15285  * @options:  a combination of xmlParserOption
15286  *
15287  * parse an XML from a file descriptor and build a tree.
15288  * NOTE that the file descriptor will not be closed when the
15289  *      reader is closed or reset.
15290  *
15291  * Returns the resulting document tree
15292  */
15293 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15294 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15295 {
15296     xmlParserCtxtPtr ctxt;
15297     xmlParserInputBufferPtr input;
15298     xmlParserInputPtr stream;
15299 
15300     if (fd < 0)
15301         return (NULL);
15302     xmlInitParser();
15303 
15304     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15305     if (input == NULL)
15306         return (NULL);
15307     input->closecallback = NULL;
15308     ctxt = xmlNewParserCtxt();
15309     if (ctxt == NULL) {
15310         xmlFreeParserInputBuffer(input);
15311         return (NULL);
15312     }
15313     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15314     if (stream == NULL) {
15315         xmlFreeParserInputBuffer(input);
15316 	xmlFreeParserCtxt(ctxt);
15317         return (NULL);
15318     }
15319     inputPush(ctxt, stream);
15320     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15321 }
15322 
15323 /**
15324  * xmlReadIO:
15325  * @ioread:  an I/O read function
15326  * @ioclose:  an I/O close function
15327  * @ioctx:  an I/O handler
15328  * @URL:  the base URL to use for the document
15329  * @encoding:  the document encoding, or NULL
15330  * @options:  a combination of xmlParserOption
15331  *
15332  * parse an XML document from I/O functions and source and build a tree.
15333  *
15334  * Returns the resulting document tree
15335  */
15336 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15337 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15338           void *ioctx, const char *URL, const char *encoding, int options)
15339 {
15340     xmlParserCtxtPtr ctxt;
15341     xmlParserInputBufferPtr input;
15342     xmlParserInputPtr stream;
15343 
15344     if (ioread == NULL)
15345         return (NULL);
15346     xmlInitParser();
15347 
15348     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15349                                          XML_CHAR_ENCODING_NONE);
15350     if (input == NULL) {
15351         if (ioclose != NULL)
15352             ioclose(ioctx);
15353         return (NULL);
15354     }
15355     ctxt = xmlNewParserCtxt();
15356     if (ctxt == NULL) {
15357         xmlFreeParserInputBuffer(input);
15358         return (NULL);
15359     }
15360     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15361     if (stream == NULL) {
15362         xmlFreeParserInputBuffer(input);
15363 	xmlFreeParserCtxt(ctxt);
15364         return (NULL);
15365     }
15366     inputPush(ctxt, stream);
15367     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15368 }
15369 
15370 /**
15371  * xmlCtxtReadDoc:
15372  * @ctxt:  an XML parser context
15373  * @cur:  a pointer to a zero terminated string
15374  * @URL:  the base URL to use for the document
15375  * @encoding:  the document encoding, or NULL
15376  * @options:  a combination of xmlParserOption
15377  *
15378  * parse an XML in-memory document and build a tree.
15379  * This reuses the existing @ctxt parser context
15380  *
15381  * Returns the resulting document tree
15382  */
15383 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15384 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15385                const char *URL, const char *encoding, int options)
15386 {
15387     xmlParserInputPtr stream;
15388 
15389     if (cur == NULL)
15390         return (NULL);
15391     if (ctxt == NULL)
15392         return (NULL);
15393     xmlInitParser();
15394 
15395     xmlCtxtReset(ctxt);
15396 
15397     stream = xmlNewStringInputStream(ctxt, cur);
15398     if (stream == NULL) {
15399         return (NULL);
15400     }
15401     inputPush(ctxt, stream);
15402     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15403 }
15404 
15405 /**
15406  * xmlCtxtReadFile:
15407  * @ctxt:  an XML parser context
15408  * @filename:  a file or URL
15409  * @encoding:  the document encoding, or NULL
15410  * @options:  a combination of xmlParserOption
15411  *
15412  * parse an XML file from the filesystem or the network.
15413  * This reuses the existing @ctxt parser context
15414  *
15415  * Returns the resulting document tree
15416  */
15417 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15418 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15419                 const char *encoding, int options)
15420 {
15421     xmlParserInputPtr stream;
15422 
15423     if (filename == NULL)
15424         return (NULL);
15425     if (ctxt == NULL)
15426         return (NULL);
15427     xmlInitParser();
15428 
15429     xmlCtxtReset(ctxt);
15430 
15431     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15432     if (stream == NULL) {
15433         return (NULL);
15434     }
15435     inputPush(ctxt, stream);
15436     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15437 }
15438 
15439 /**
15440  * xmlCtxtReadMemory:
15441  * @ctxt:  an XML parser context
15442  * @buffer:  a pointer to a char array
15443  * @size:  the size of the array
15444  * @URL:  the base URL to use for the document
15445  * @encoding:  the document encoding, or NULL
15446  * @options:  a combination of xmlParserOption
15447  *
15448  * parse an XML in-memory document and build a tree.
15449  * This reuses the existing @ctxt parser context
15450  *
15451  * Returns the resulting document tree
15452  */
15453 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15454 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15455                   const char *URL, const char *encoding, int options)
15456 {
15457     xmlParserInputBufferPtr input;
15458     xmlParserInputPtr stream;
15459 
15460     if (ctxt == NULL)
15461         return (NULL);
15462     if (buffer == NULL)
15463         return (NULL);
15464     xmlInitParser();
15465 
15466     xmlCtxtReset(ctxt);
15467 
15468     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15469     if (input == NULL) {
15470 	return(NULL);
15471     }
15472 
15473     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15474     if (stream == NULL) {
15475 	xmlFreeParserInputBuffer(input);
15476 	return(NULL);
15477     }
15478 
15479     inputPush(ctxt, stream);
15480     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15481 }
15482 
15483 /**
15484  * xmlCtxtReadFd:
15485  * @ctxt:  an XML parser context
15486  * @fd:  an open file descriptor
15487  * @URL:  the base URL to use for the document
15488  * @encoding:  the document encoding, or NULL
15489  * @options:  a combination of xmlParserOption
15490  *
15491  * parse an XML from a file descriptor and build a tree.
15492  * This reuses the existing @ctxt parser context
15493  * NOTE that the file descriptor will not be closed when the
15494  *      reader is closed or reset.
15495  *
15496  * Returns the resulting document tree
15497  */
15498 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15499 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15500               const char *URL, const char *encoding, int options)
15501 {
15502     xmlParserInputBufferPtr input;
15503     xmlParserInputPtr stream;
15504 
15505     if (fd < 0)
15506         return (NULL);
15507     if (ctxt == NULL)
15508         return (NULL);
15509     xmlInitParser();
15510 
15511     xmlCtxtReset(ctxt);
15512 
15513 
15514     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15515     if (input == NULL)
15516         return (NULL);
15517     input->closecallback = NULL;
15518     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15519     if (stream == NULL) {
15520         xmlFreeParserInputBuffer(input);
15521         return (NULL);
15522     }
15523     inputPush(ctxt, stream);
15524     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15525 }
15526 
15527 /**
15528  * xmlCtxtReadIO:
15529  * @ctxt:  an XML parser context
15530  * @ioread:  an I/O read function
15531  * @ioclose:  an I/O close function
15532  * @ioctx:  an I/O handler
15533  * @URL:  the base URL to use for the document
15534  * @encoding:  the document encoding, or NULL
15535  * @options:  a combination of xmlParserOption
15536  *
15537  * parse an XML document from I/O functions and source and build a tree.
15538  * This reuses the existing @ctxt parser context
15539  *
15540  * Returns the resulting document tree
15541  */
15542 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15543 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15544               xmlInputCloseCallback ioclose, void *ioctx,
15545 	      const char *URL,
15546               const char *encoding, int options)
15547 {
15548     xmlParserInputBufferPtr input;
15549     xmlParserInputPtr stream;
15550 
15551     if (ioread == NULL)
15552         return (NULL);
15553     if (ctxt == NULL)
15554         return (NULL);
15555     xmlInitParser();
15556 
15557     xmlCtxtReset(ctxt);
15558 
15559     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15560                                          XML_CHAR_ENCODING_NONE);
15561     if (input == NULL) {
15562         if (ioclose != NULL)
15563             ioclose(ioctx);
15564         return (NULL);
15565     }
15566     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15567     if (stream == NULL) {
15568         xmlFreeParserInputBuffer(input);
15569         return (NULL);
15570     }
15571     inputPush(ctxt, stream);
15572     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15573 }
15574 
15575 #define bottom_parser
15576 #include "elfgcchack.h"
15577